text_parser 0.1.5 → 0.1.6

Sign up to get free protection for your applications and to get access to all the features.
data/lib/text_parser.rb CHANGED
@@ -1,3 +1,4 @@
1
+ # -*- encoding : utf-8 -*-
1
2
  module TextParser
2
3
  # Returns a parsed text with the words and its occurrences.
3
4
  # @param [Hash] [args]
@@ -11,11 +12,11 @@ module TextParser
11
12
  :negative_dictionary => []
12
13
  }.merge(args)
13
14
  result = []
14
- text = process_text
15
+ text = self.gsub(/[^A-Za-zÀ-ú0-9\-]/u," ").strip
15
16
  options[:dictionary] = text.split(" ") unless options[:dictionary]
16
17
  return [] if options[:dictionary].count < 1
17
- regex = Regexp.new("(\\b#{options[:dictionary].join('\\b|\\b')}\\b)", Regexp::IGNORECASE)
18
- match_result = text.scan(regex).map{|i| i.shift.downcase}
18
+ regex = Regexp.new(options[:dictionary].join('\\b|\\b'), Regexp::IGNORECASE)
19
+ match_result = text.scan(regex).map{|i| i.downcase}
19
20
  match_result.each do |w|
20
21
  result << {:hits => match_result.count(w), :word => w} unless result.select{|r| r[:word] == w}.shift || options[:negative_dictionary].map{|i| i.downcase}.include?(w)
21
22
  end
@@ -23,16 +24,9 @@ module TextParser
23
24
  result.reverse! if options[:order_direction] == :desc
24
25
  result
25
26
  end
26
-
27
- private
28
-
29
- def process_text
30
- text = self.gsub(/\s{2,}/," ")
31
- text = text.gsub(/[^\w\s\-]/u, "")
32
- end
33
27
  end
34
28
 
35
29
  # Includes module TextParser in the String object
36
30
  class String
37
31
  include TextParser
38
- end
32
+ end
@@ -1,10 +1,11 @@
1
+ # -*- encoding : utf-8 -*-
1
2
  require "test/unit"
2
3
  require "text_parser"
3
4
 
4
5
  class TextParserTest < Test::Unit::TestCase
5
6
 
6
7
  def test_should_have_method_parse
7
- assert "some text".methods.select{|a| a == "parse"}.count > 0
8
+ assert "some text".methods.select{|a| a.to_s == "parse"}.count > 0
8
9
  end
9
10
 
10
11
  def test_should_parse
@@ -13,11 +14,11 @@ class TextParserTest < Test::Unit::TestCase
13
14
  {:word => "dolor", :hits => 1}],
14
15
  text.parse(:dictionary => ["dolor", "consectetur"])
15
16
  end
16
-
17
+
17
18
  def test_should_parse_without_dictionary
18
19
  assert_equal [{:word => "test", :hits => 2}], "test test".parse
19
20
  end
20
-
21
+
21
22
  def test_should_remove_some_characters
22
23
  text = "Test? Test. Yes, test!"
23
24
  assert_equal [{:word => "test", :hits => 3}, {:word => "yes", :hits => 1}], text.parse
@@ -65,7 +66,7 @@ class TextParserTest < Test::Unit::TestCase
65
66
  end
66
67
 
67
68
  def test_should_works_with_special_characters
68
- assert_equal [], "'/[.](\")".parse
69
+ assert_equal [], "*&%?!$#%$@\\'///[.](\")".parse
69
70
  end
70
71
 
71
72
  def test_should_works_hifen
@@ -90,20 +91,18 @@ class TextParserTest < Test::Unit::TestCase
90
91
  {:word => "espacos",:hits => 1},
91
92
  {:word => "eu", :hits => 1},
92
93
  {:word => "se", :hits => 1}], text.parse
93
- end
94
-
94
+ end
95
+
95
96
  def test_should_keep_some_special_character
96
97
  assert_equal [{:word => "espaço", :hits => 1},
97
98
  {:word => "sideral",:hits => 1}], "Espaço sideral".parse
98
99
  assert_equal [{:word => "açúcar", :hits => 1},
99
- {:word => "bom", :hits => 1},
100
- {:word => "de", :hits => 1},
101
- {:word => "pão", :hits => 1}], "Pão de açúcar é bom.".parse
100
+ {:word => "pão", :hits => 1}], "Pão açúcar".parse
101
+ assert_equal [{:word => "ãéç", :hits => 1}], "ãéç".parse
102
102
  end
103
103
  end
104
104
 
105
-
106
-
105
+
107
106
 
108
107
 
109
108
 
data/text_parser.gemspec CHANGED
@@ -1,12 +1,10 @@
1
- require "lib/text_parser/version"
2
-
3
1
  Gem::Specification.new do |s|
4
2
  s.name = "text_parser"
5
- s.version = TextParser::Version.const_get("STRING")
3
+ s.version = "0.1.6"
6
4
  s.author = "Frederico de Paula"
7
5
  s.email = "fpaula@gmail.com"
8
6
  s.summary = "A easy way to parse a text."
9
- s.description = "Using method parse in the String object you can parse any text"
7
+ s.description = "Using method parse in the String object you can parse any text."
10
8
  s.files = Dir["{lib/**/*.rb,README.rdoc,test/**/*.rb,Rakefile,*.gemspec,doc/**/*}"]
11
9
  s.homepage = "http://textparser.heroku.com/"
12
10
  end
metadata CHANGED
@@ -1,38 +1,25 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: text_parser
3
- version: !ruby/object:Gem::Version
4
- hash: 17
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.6
5
5
  prerelease:
6
- segments:
7
- - 0
8
- - 1
9
- - 5
10
- version: 0.1.5
11
6
  platform: ruby
12
- authors:
7
+ authors:
13
8
  - Frederico de Paula
14
9
  autorequire:
15
10
  bindir: bin
16
11
  cert_chain: []
17
-
18
- date: 2011-12-09 00:00:00 -02:00
19
- default_executable:
12
+ date: 2011-12-15 00:00:00.000000000Z
20
13
  dependencies: []
21
-
22
- description: Using method parse in the String object you can parse any text
14
+ description: Using method parse in the String object you can parse any text.
23
15
  email: fpaula@gmail.com
24
16
  executables: []
25
-
26
17
  extensions: []
27
-
28
18
  extra_rdoc_files: []
29
-
30
- files:
31
- - lib/text_parser/version.rb
19
+ files:
32
20
  - lib/text_parser.rb
33
21
  - README.rdoc
34
22
  - test/text_parser_test.rb
35
- - test/version_test.rb
36
23
  - Rakefile
37
24
  - text_parser.gemspec
38
25
  - doc/_index.html
@@ -52,39 +39,28 @@ files:
52
39
  - doc/TextParser/Version.html
53
40
  - doc/TextParser.html
54
41
  - doc/top-level-namespace.html
55
- has_rdoc: true
56
42
  homepage: http://textparser.heroku.com/
57
43
  licenses: []
58
-
59
44
  post_install_message:
60
45
  rdoc_options: []
61
-
62
- require_paths:
46
+ require_paths:
63
47
  - lib
64
- required_ruby_version: !ruby/object:Gem::Requirement
48
+ required_ruby_version: !ruby/object:Gem::Requirement
65
49
  none: false
66
- requirements:
67
- - - ">="
68
- - !ruby/object:Gem::Version
69
- hash: 3
70
- segments:
71
- - 0
72
- version: "0"
73
- required_rubygems_version: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ required_rubygems_version: !ruby/object:Gem::Requirement
74
55
  none: false
75
- requirements:
76
- - - ">="
77
- - !ruby/object:Gem::Version
78
- hash: 3
79
- segments:
80
- - 0
81
- version: "0"
56
+ requirements:
57
+ - - ! '>='
58
+ - !ruby/object:Gem::Version
59
+ version: '0'
82
60
  requirements: []
83
-
84
61
  rubyforge_project:
85
- rubygems_version: 1.4.2
62
+ rubygems_version: 1.8.10
86
63
  signing_key:
87
64
  specification_version: 3
88
65
  summary: A easy way to parse a text.
89
66
  test_files: []
90
-
@@ -1,8 +0,0 @@
1
- module TextParser
2
- module Version
3
- MAJOR = 0
4
- MINOR = 1
5
- PATCH = 5
6
- STRING = "#{MAJOR}.#{MINOR}.#{PATCH}"
7
- end
8
- end
data/test/version_test.rb DELETED
@@ -1,23 +0,0 @@
1
- require "test/unit"
2
- require "text_parser/version"
3
-
4
- class TextParserTest < Test::Unit::TestCase
5
- def test_version
6
- assert_equal TextParser::Version.const_get("STRING"), "0.1.5"
7
- end
8
- end
9
-
10
-
11
-
12
-
13
-
14
-
15
-
16
-
17
-
18
-
19
-
20
-
21
-
22
-
23
-