text_parser 0.1.5 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/text_parser.rb +5 -11
- data/test/text_parser_test.rb +10 -11
- data/text_parser.gemspec +2 -4
- metadata +19 -43
- data/lib/text_parser/version.rb +0 -8
- data/test/version_test.rb +0 -23
data/lib/text_parser.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
# -*- encoding : utf-8 -*-
|
1
2
|
module TextParser
|
2
3
|
# Returns a parsed text with the words and its occurrences.
|
3
4
|
# @param [Hash] [args]
|
@@ -11,11 +12,11 @@ module TextParser
|
|
11
12
|
:negative_dictionary => []
|
12
13
|
}.merge(args)
|
13
14
|
result = []
|
14
|
-
text =
|
15
|
+
text = self.gsub(/[^A-Za-zÀ-ú0-9\-]/u," ").strip
|
15
16
|
options[:dictionary] = text.split(" ") unless options[:dictionary]
|
16
17
|
return [] if options[:dictionary].count < 1
|
17
|
-
regex = Regexp.new(
|
18
|
-
match_result = text.scan(regex).map{|i| i.
|
18
|
+
regex = Regexp.new(options[:dictionary].join('\\b|\\b'), Regexp::IGNORECASE)
|
19
|
+
match_result = text.scan(regex).map{|i| i.downcase}
|
19
20
|
match_result.each do |w|
|
20
21
|
result << {:hits => match_result.count(w), :word => w} unless result.select{|r| r[:word] == w}.shift || options[:negative_dictionary].map{|i| i.downcase}.include?(w)
|
21
22
|
end
|
@@ -23,16 +24,9 @@ module TextParser
|
|
23
24
|
result.reverse! if options[:order_direction] == :desc
|
24
25
|
result
|
25
26
|
end
|
26
|
-
|
27
|
-
private
|
28
|
-
|
29
|
-
def process_text
|
30
|
-
text = self.gsub(/\s{2,}/," ")
|
31
|
-
text = text.gsub(/[^\w\s\-]/u, "")
|
32
|
-
end
|
33
27
|
end
|
34
28
|
|
35
29
|
# Includes module TextParser in the String object
|
36
30
|
class String
|
37
31
|
include TextParser
|
38
|
-
end
|
32
|
+
end
|
data/test/text_parser_test.rb
CHANGED
@@ -1,10 +1,11 @@
|
|
1
|
+
# -*- encoding : utf-8 -*-
|
1
2
|
require "test/unit"
|
2
3
|
require "text_parser"
|
3
4
|
|
4
5
|
class TextParserTest < Test::Unit::TestCase
|
5
6
|
|
6
7
|
def test_should_have_method_parse
|
7
|
-
assert "some text".methods.select{|a| a == "parse"}.count > 0
|
8
|
+
assert "some text".methods.select{|a| a.to_s == "parse"}.count > 0
|
8
9
|
end
|
9
10
|
|
10
11
|
def test_should_parse
|
@@ -13,11 +14,11 @@ class TextParserTest < Test::Unit::TestCase
|
|
13
14
|
{:word => "dolor", :hits => 1}],
|
14
15
|
text.parse(:dictionary => ["dolor", "consectetur"])
|
15
16
|
end
|
16
|
-
|
17
|
+
|
17
18
|
def test_should_parse_without_dictionary
|
18
19
|
assert_equal [{:word => "test", :hits => 2}], "test test".parse
|
19
20
|
end
|
20
|
-
|
21
|
+
|
21
22
|
def test_should_remove_some_characters
|
22
23
|
text = "Test? Test. Yes, test!"
|
23
24
|
assert_equal [{:word => "test", :hits => 3}, {:word => "yes", :hits => 1}], text.parse
|
@@ -65,7 +66,7 @@ class TextParserTest < Test::Unit::TestCase
|
|
65
66
|
end
|
66
67
|
|
67
68
|
def test_should_works_with_special_characters
|
68
|
-
assert_equal [], "'
|
69
|
+
assert_equal [], "*&%?!$#%$@\\'///[.](\")".parse
|
69
70
|
end
|
70
71
|
|
71
72
|
def test_should_works_hifen
|
@@ -90,20 +91,18 @@ class TextParserTest < Test::Unit::TestCase
|
|
90
91
|
{:word => "espacos",:hits => 1},
|
91
92
|
{:word => "eu", :hits => 1},
|
92
93
|
{:word => "se", :hits => 1}], text.parse
|
93
|
-
end
|
94
|
-
|
94
|
+
end
|
95
|
+
|
95
96
|
def test_should_keep_some_special_character
|
96
97
|
assert_equal [{:word => "espaço", :hits => 1},
|
97
98
|
{:word => "sideral",:hits => 1}], "Espaço sideral".parse
|
98
99
|
assert_equal [{:word => "açúcar", :hits => 1},
|
99
|
-
{:word => "
|
100
|
-
|
101
|
-
{:word => "pão", :hits => 1}], "Pão de açúcar é bom.".parse
|
100
|
+
{:word => "pão", :hits => 1}], "Pão açúcar".parse
|
101
|
+
assert_equal [{:word => "ãéç", :hits => 1}], "ãéç".parse
|
102
102
|
end
|
103
103
|
end
|
104
104
|
|
105
|
-
|
106
|
-
|
105
|
+
|
107
106
|
|
108
107
|
|
109
108
|
|
data/text_parser.gemspec
CHANGED
@@ -1,12 +1,10 @@
|
|
1
|
-
require "lib/text_parser/version"
|
2
|
-
|
3
1
|
Gem::Specification.new do |s|
|
4
2
|
s.name = "text_parser"
|
5
|
-
s.version =
|
3
|
+
s.version = "0.1.6"
|
6
4
|
s.author = "Frederico de Paula"
|
7
5
|
s.email = "fpaula@gmail.com"
|
8
6
|
s.summary = "A easy way to parse a text."
|
9
|
-
s.description = "Using method parse in the String object you can parse any text"
|
7
|
+
s.description = "Using method parse in the String object you can parse any text."
|
10
8
|
s.files = Dir["{lib/**/*.rb,README.rdoc,test/**/*.rb,Rakefile,*.gemspec,doc/**/*}"]
|
11
9
|
s.homepage = "http://textparser.heroku.com/"
|
12
10
|
end
|
metadata
CHANGED
@@ -1,38 +1,25 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: text_parser
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.6
|
5
5
|
prerelease:
|
6
|
-
segments:
|
7
|
-
- 0
|
8
|
-
- 1
|
9
|
-
- 5
|
10
|
-
version: 0.1.5
|
11
6
|
platform: ruby
|
12
|
-
authors:
|
7
|
+
authors:
|
13
8
|
- Frederico de Paula
|
14
9
|
autorequire:
|
15
10
|
bindir: bin
|
16
11
|
cert_chain: []
|
17
|
-
|
18
|
-
date: 2011-12-09 00:00:00 -02:00
|
19
|
-
default_executable:
|
12
|
+
date: 2011-12-15 00:00:00.000000000Z
|
20
13
|
dependencies: []
|
21
|
-
|
22
|
-
description: Using method parse in the String object you can parse any text
|
14
|
+
description: Using method parse in the String object you can parse any text.
|
23
15
|
email: fpaula@gmail.com
|
24
16
|
executables: []
|
25
|
-
|
26
17
|
extensions: []
|
27
|
-
|
28
18
|
extra_rdoc_files: []
|
29
|
-
|
30
|
-
files:
|
31
|
-
- lib/text_parser/version.rb
|
19
|
+
files:
|
32
20
|
- lib/text_parser.rb
|
33
21
|
- README.rdoc
|
34
22
|
- test/text_parser_test.rb
|
35
|
-
- test/version_test.rb
|
36
23
|
- Rakefile
|
37
24
|
- text_parser.gemspec
|
38
25
|
- doc/_index.html
|
@@ -52,39 +39,28 @@ files:
|
|
52
39
|
- doc/TextParser/Version.html
|
53
40
|
- doc/TextParser.html
|
54
41
|
- doc/top-level-namespace.html
|
55
|
-
has_rdoc: true
|
56
42
|
homepage: http://textparser.heroku.com/
|
57
43
|
licenses: []
|
58
|
-
|
59
44
|
post_install_message:
|
60
45
|
rdoc_options: []
|
61
|
-
|
62
|
-
require_paths:
|
46
|
+
require_paths:
|
63
47
|
- lib
|
64
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
48
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
65
49
|
none: false
|
66
|
-
requirements:
|
67
|
-
- -
|
68
|
-
- !ruby/object:Gem::Version
|
69
|
-
|
70
|
-
|
71
|
-
- 0
|
72
|
-
version: "0"
|
73
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
50
|
+
requirements:
|
51
|
+
- - ! '>='
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0'
|
54
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
74
55
|
none: false
|
75
|
-
requirements:
|
76
|
-
- -
|
77
|
-
- !ruby/object:Gem::Version
|
78
|
-
|
79
|
-
segments:
|
80
|
-
- 0
|
81
|
-
version: "0"
|
56
|
+
requirements:
|
57
|
+
- - ! '>='
|
58
|
+
- !ruby/object:Gem::Version
|
59
|
+
version: '0'
|
82
60
|
requirements: []
|
83
|
-
|
84
61
|
rubyforge_project:
|
85
|
-
rubygems_version: 1.
|
62
|
+
rubygems_version: 1.8.10
|
86
63
|
signing_key:
|
87
64
|
specification_version: 3
|
88
65
|
summary: A easy way to parse a text.
|
89
66
|
test_files: []
|
90
|
-
|
data/lib/text_parser/version.rb
DELETED
data/test/version_test.rb
DELETED
@@ -1,23 +0,0 @@
|
|
1
|
-
require "test/unit"
|
2
|
-
require "text_parser/version"
|
3
|
-
|
4
|
-
class TextParserTest < Test::Unit::TestCase
|
5
|
-
def test_version
|
6
|
-
assert_equal TextParser::Version.const_get("STRING"), "0.1.5"
|
7
|
-
end
|
8
|
-
end
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|