text_parser 0.1.5 → 0.1.6
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/text_parser.rb +5 -11
- data/test/text_parser_test.rb +10 -11
- data/text_parser.gemspec +2 -4
- metadata +19 -43
- data/lib/text_parser/version.rb +0 -8
- data/test/version_test.rb +0 -23
data/lib/text_parser.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
# -*- encoding : utf-8 -*-
|
1
2
|
module TextParser
|
2
3
|
# Returns a parsed text with the words and its occurrences.
|
3
4
|
# @param [Hash] [args]
|
@@ -11,11 +12,11 @@ module TextParser
|
|
11
12
|
:negative_dictionary => []
|
12
13
|
}.merge(args)
|
13
14
|
result = []
|
14
|
-
text =
|
15
|
+
text = self.gsub(/[^A-Za-zÀ-ú0-9\-]/u," ").strip
|
15
16
|
options[:dictionary] = text.split(" ") unless options[:dictionary]
|
16
17
|
return [] if options[:dictionary].count < 1
|
17
|
-
regex = Regexp.new(
|
18
|
-
match_result = text.scan(regex).map{|i| i.
|
18
|
+
regex = Regexp.new(options[:dictionary].join('\\b|\\b'), Regexp::IGNORECASE)
|
19
|
+
match_result = text.scan(regex).map{|i| i.downcase}
|
19
20
|
match_result.each do |w|
|
20
21
|
result << {:hits => match_result.count(w), :word => w} unless result.select{|r| r[:word] == w}.shift || options[:negative_dictionary].map{|i| i.downcase}.include?(w)
|
21
22
|
end
|
@@ -23,16 +24,9 @@ module TextParser
|
|
23
24
|
result.reverse! if options[:order_direction] == :desc
|
24
25
|
result
|
25
26
|
end
|
26
|
-
|
27
|
-
private
|
28
|
-
|
29
|
-
def process_text
|
30
|
-
text = self.gsub(/\s{2,}/," ")
|
31
|
-
text = text.gsub(/[^\w\s\-]/u, "")
|
32
|
-
end
|
33
27
|
end
|
34
28
|
|
35
29
|
# Includes module TextParser in the String object
|
36
30
|
class String
|
37
31
|
include TextParser
|
38
|
-
end
|
32
|
+
end
|
data/test/text_parser_test.rb
CHANGED
@@ -1,10 +1,11 @@
|
|
1
|
+
# -*- encoding : utf-8 -*-
|
1
2
|
require "test/unit"
|
2
3
|
require "text_parser"
|
3
4
|
|
4
5
|
class TextParserTest < Test::Unit::TestCase
|
5
6
|
|
6
7
|
def test_should_have_method_parse
|
7
|
-
assert "some text".methods.select{|a| a == "parse"}.count > 0
|
8
|
+
assert "some text".methods.select{|a| a.to_s == "parse"}.count > 0
|
8
9
|
end
|
9
10
|
|
10
11
|
def test_should_parse
|
@@ -13,11 +14,11 @@ class TextParserTest < Test::Unit::TestCase
|
|
13
14
|
{:word => "dolor", :hits => 1}],
|
14
15
|
text.parse(:dictionary => ["dolor", "consectetur"])
|
15
16
|
end
|
16
|
-
|
17
|
+
|
17
18
|
def test_should_parse_without_dictionary
|
18
19
|
assert_equal [{:word => "test", :hits => 2}], "test test".parse
|
19
20
|
end
|
20
|
-
|
21
|
+
|
21
22
|
def test_should_remove_some_characters
|
22
23
|
text = "Test? Test. Yes, test!"
|
23
24
|
assert_equal [{:word => "test", :hits => 3}, {:word => "yes", :hits => 1}], text.parse
|
@@ -65,7 +66,7 @@ class TextParserTest < Test::Unit::TestCase
|
|
65
66
|
end
|
66
67
|
|
67
68
|
def test_should_works_with_special_characters
|
68
|
-
assert_equal [], "'
|
69
|
+
assert_equal [], "*&%?!$#%$@\\'///[.](\")".parse
|
69
70
|
end
|
70
71
|
|
71
72
|
def test_should_works_hifen
|
@@ -90,20 +91,18 @@ class TextParserTest < Test::Unit::TestCase
|
|
90
91
|
{:word => "espacos",:hits => 1},
|
91
92
|
{:word => "eu", :hits => 1},
|
92
93
|
{:word => "se", :hits => 1}], text.parse
|
93
|
-
end
|
94
|
-
|
94
|
+
end
|
95
|
+
|
95
96
|
def test_should_keep_some_special_character
|
96
97
|
assert_equal [{:word => "espaço", :hits => 1},
|
97
98
|
{:word => "sideral",:hits => 1}], "Espaço sideral".parse
|
98
99
|
assert_equal [{:word => "açúcar", :hits => 1},
|
99
|
-
{:word => "
|
100
|
-
|
101
|
-
{:word => "pão", :hits => 1}], "Pão de açúcar é bom.".parse
|
100
|
+
{:word => "pão", :hits => 1}], "Pão açúcar".parse
|
101
|
+
assert_equal [{:word => "ãéç", :hits => 1}], "ãéç".parse
|
102
102
|
end
|
103
103
|
end
|
104
104
|
|
105
|
-
|
106
|
-
|
105
|
+
|
107
106
|
|
108
107
|
|
109
108
|
|
data/text_parser.gemspec
CHANGED
@@ -1,12 +1,10 @@
|
|
1
|
-
require "lib/text_parser/version"
|
2
|
-
|
3
1
|
Gem::Specification.new do |s|
|
4
2
|
s.name = "text_parser"
|
5
|
-
s.version =
|
3
|
+
s.version = "0.1.6"
|
6
4
|
s.author = "Frederico de Paula"
|
7
5
|
s.email = "fpaula@gmail.com"
|
8
6
|
s.summary = "A easy way to parse a text."
|
9
|
-
s.description = "Using method parse in the String object you can parse any text"
|
7
|
+
s.description = "Using method parse in the String object you can parse any text."
|
10
8
|
s.files = Dir["{lib/**/*.rb,README.rdoc,test/**/*.rb,Rakefile,*.gemspec,doc/**/*}"]
|
11
9
|
s.homepage = "http://textparser.heroku.com/"
|
12
10
|
end
|
metadata
CHANGED
@@ -1,38 +1,25 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: text_parser
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.6
|
5
5
|
prerelease:
|
6
|
-
segments:
|
7
|
-
- 0
|
8
|
-
- 1
|
9
|
-
- 5
|
10
|
-
version: 0.1.5
|
11
6
|
platform: ruby
|
12
|
-
authors:
|
7
|
+
authors:
|
13
8
|
- Frederico de Paula
|
14
9
|
autorequire:
|
15
10
|
bindir: bin
|
16
11
|
cert_chain: []
|
17
|
-
|
18
|
-
date: 2011-12-09 00:00:00 -02:00
|
19
|
-
default_executable:
|
12
|
+
date: 2011-12-15 00:00:00.000000000Z
|
20
13
|
dependencies: []
|
21
|
-
|
22
|
-
description: Using method parse in the String object you can parse any text
|
14
|
+
description: Using method parse in the String object you can parse any text.
|
23
15
|
email: fpaula@gmail.com
|
24
16
|
executables: []
|
25
|
-
|
26
17
|
extensions: []
|
27
|
-
|
28
18
|
extra_rdoc_files: []
|
29
|
-
|
30
|
-
files:
|
31
|
-
- lib/text_parser/version.rb
|
19
|
+
files:
|
32
20
|
- lib/text_parser.rb
|
33
21
|
- README.rdoc
|
34
22
|
- test/text_parser_test.rb
|
35
|
-
- test/version_test.rb
|
36
23
|
- Rakefile
|
37
24
|
- text_parser.gemspec
|
38
25
|
- doc/_index.html
|
@@ -52,39 +39,28 @@ files:
|
|
52
39
|
- doc/TextParser/Version.html
|
53
40
|
- doc/TextParser.html
|
54
41
|
- doc/top-level-namespace.html
|
55
|
-
has_rdoc: true
|
56
42
|
homepage: http://textparser.heroku.com/
|
57
43
|
licenses: []
|
58
|
-
|
59
44
|
post_install_message:
|
60
45
|
rdoc_options: []
|
61
|
-
|
62
|
-
require_paths:
|
46
|
+
require_paths:
|
63
47
|
- lib
|
64
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
48
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
65
49
|
none: false
|
66
|
-
requirements:
|
67
|
-
- -
|
68
|
-
- !ruby/object:Gem::Version
|
69
|
-
|
70
|
-
|
71
|
-
- 0
|
72
|
-
version: "0"
|
73
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
50
|
+
requirements:
|
51
|
+
- - ! '>='
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0'
|
54
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
74
55
|
none: false
|
75
|
-
requirements:
|
76
|
-
- -
|
77
|
-
- !ruby/object:Gem::Version
|
78
|
-
|
79
|
-
segments:
|
80
|
-
- 0
|
81
|
-
version: "0"
|
56
|
+
requirements:
|
57
|
+
- - ! '>='
|
58
|
+
- !ruby/object:Gem::Version
|
59
|
+
version: '0'
|
82
60
|
requirements: []
|
83
|
-
|
84
61
|
rubyforge_project:
|
85
|
-
rubygems_version: 1.
|
62
|
+
rubygems_version: 1.8.10
|
86
63
|
signing_key:
|
87
64
|
specification_version: 3
|
88
65
|
summary: A easy way to parse a text.
|
89
66
|
test_files: []
|
90
|
-
|
data/lib/text_parser/version.rb
DELETED
data/test/version_test.rb
DELETED
@@ -1,23 +0,0 @@
|
|
1
|
-
require "test/unit"
|
2
|
-
require "text_parser/version"
|
3
|
-
|
4
|
-
class TextParserTest < Test::Unit::TestCase
|
5
|
-
def test_version
|
6
|
-
assert_equal TextParser::Version.const_get("STRING"), "0.1.5"
|
7
|
-
end
|
8
|
-
end
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|