text_parser 0.1.4 → 0.1.5
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +1 -1
- data/lib/text_parser.rb +1 -2
- data/lib/text_parser/version.rb +2 -2
- data/test/text_parser_test.rb +39 -26
- data/test/version_test.rb +23 -0
- data/text_parser.gemspec +6 -4
- metadata +7 -6
data/README.rdoc
CHANGED
@@ -5,7 +5,7 @@
|
|
5
5
|
| :dictionary | Array | nil |
|
6
6
|
| :order (:word | :hits) | Symbol | :word |
|
7
7
|
| :order_direction (:asc | :desc) | Symbol | :asc |
|
8
|
-
| :negative_dictionary | Array |
|
8
|
+
| :negative_dictionary | Array | nil |
|
9
9
|
+---------------------------------+--------+---------------+
|
10
10
|
|
11
11
|
|
data/lib/text_parser.rb
CHANGED
@@ -1,4 +1,3 @@
|
|
1
|
-
require 'iconv'
|
2
1
|
module TextParser
|
3
2
|
# Returns a parsed text with the words and its occurrences.
|
4
3
|
# @param [Hash] [args]
|
@@ -29,7 +28,7 @@ module TextParser
|
|
29
28
|
|
30
29
|
def process_text
|
31
30
|
text = self.gsub(/\s{2,}/," ")
|
32
|
-
text = text.gsub(/[^\w\s\-]
|
31
|
+
text = text.gsub(/[^\w\s\-]/u, "")
|
33
32
|
end
|
34
33
|
end
|
35
34
|
|
data/lib/text_parser/version.rb
CHANGED
data/test/text_parser_test.rb
CHANGED
@@ -2,28 +2,29 @@ require "test/unit"
|
|
2
2
|
require "text_parser"
|
3
3
|
|
4
4
|
class TextParserTest < Test::Unit::TestCase
|
5
|
+
|
5
6
|
def test_should_have_method_parse
|
6
7
|
assert "some text".methods.select{|a| a == "parse"}.count > 0
|
7
8
|
end
|
8
9
|
|
9
10
|
def test_should_parse
|
10
11
|
text = "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque pretium consectetur."
|
11
|
-
assert_equal
|
12
|
+
assert_equal [{:word => "consectetur", :hits => 2},
|
13
|
+
{:word => "dolor", :hits => 1}],
|
14
|
+
text.parse(:dictionary => ["dolor", "consectetur"])
|
12
15
|
end
|
13
16
|
|
14
17
|
def test_should_parse_without_dictionary
|
15
|
-
|
16
|
-
assert_equal text.parse, [{:word => "test", :hits => 2}]
|
18
|
+
assert_equal [{:word => "test", :hits => 2}], "test test".parse
|
17
19
|
end
|
18
20
|
|
19
21
|
def test_should_remove_some_characters
|
20
22
|
text = "Test? Test. Yes, test!"
|
21
|
-
assert_equal
|
23
|
+
assert_equal [{:word => "test", :hits => 3}, {:word => "yes", :hits => 1}], text.parse
|
22
24
|
end
|
23
25
|
|
24
26
|
def test_should_return_an_empty_array
|
25
|
-
|
26
|
-
assert_equal text.parse(:dictionary => ['abc']), []
|
27
|
+
assert_equal "test".parse(:dictionary => ['abc']), []
|
27
28
|
end
|
28
29
|
|
29
30
|
def test_should_order_by_word_asc
|
@@ -32,13 +33,14 @@ class TextParserTest < Test::Unit::TestCase
|
|
32
33
|
{:word => "beta", :hits => 1},
|
33
34
|
{:word => "gamma", :hits => 2},
|
34
35
|
{:word => "omega", :hits => 1}]
|
35
|
-
assert_equal text.parse
|
36
|
-
assert_equal text.parse(:order => :word)
|
37
|
-
assert_equal text.parse(:order => :word, :order_direction => :asc)
|
36
|
+
assert_equal result, text.parse
|
37
|
+
assert_equal result, text.parse(:order => :word)
|
38
|
+
assert_equal result, text.parse(:order => :word, :order_direction => :asc)
|
38
39
|
end
|
39
40
|
|
40
41
|
def test_should_order_by_word_desc
|
41
|
-
assert_equal
|
42
|
+
assert_equal [{:word => "zzz", :hits => 1},
|
43
|
+
{:word => "aaa", :hits => 1}], "aaa zzz".parse(:order => :word, :order_direction => :desc)
|
42
44
|
end
|
43
45
|
|
44
46
|
def test_should_order_by_hits_asc
|
@@ -46,47 +48,58 @@ class TextParserTest < Test::Unit::TestCase
|
|
46
48
|
result = [{:word => "beta", :hits => 1},
|
47
49
|
{:word => "alpha", :hits => 2},
|
48
50
|
{:word => "gamma", :hits => 3}]
|
49
|
-
assert_equal text.parse(:order => :hits)
|
50
|
-
assert_equal text.parse(:order => :hits, :order_direction => :asc)
|
51
|
+
assert_equal result, text.parse(:order => :hits)
|
52
|
+
assert_equal result, text.parse(:order => :hits, :order_direction => :asc)
|
51
53
|
end
|
52
54
|
|
53
55
|
def test_should_order_by_hits_desc
|
54
56
|
text = "gamma alpha gamma beta alpha gamma"
|
55
|
-
assert_equal
|
56
|
-
|
57
|
-
|
57
|
+
assert_equal [{:word => "gamma", :hits => 3},
|
58
|
+
{:word => "alpha", :hits => 2},
|
59
|
+
{:word => "beta", :hits => 1}],
|
60
|
+
text.parse(:order => :hits, :order_direction => :desc)
|
58
61
|
end
|
59
62
|
|
60
63
|
def test_should_ignore_negative_dictionary
|
61
|
-
assert_equal "This is good".parse(:negative_dictionary => ["is", "this"])
|
64
|
+
assert_equal [{:word => "good", :hits => 1}], "This is good".parse(:negative_dictionary => ["is", "this"])
|
62
65
|
end
|
63
66
|
|
64
67
|
def test_should_works_with_special_characters
|
65
|
-
assert_equal "'/[.](\")".parse
|
68
|
+
assert_equal [], "'/[.](\")".parse
|
66
69
|
end
|
67
70
|
|
68
71
|
def test_should_works_hifen
|
69
|
-
assert_equal
|
72
|
+
assert_equal [{:word => "self-service", :hits => 1}], "self-service".parse
|
70
73
|
end
|
71
74
|
|
72
75
|
def test_should_return_double_words
|
73
|
-
assert_equal
|
76
|
+
assert_equal [{:word => "forrest gump", :hits => 1}],
|
77
|
+
"I like the movie Forrest Gump.".parse(:dictionary => ["Forrest Gump"])
|
74
78
|
end
|
75
79
|
|
76
80
|
def test_should_manage_null_args
|
77
81
|
args = {:dictionary=>nil, :negative_dictionary=>nil, :order=>nil, :order_direction=>nil}
|
78
|
-
assert_equal
|
82
|
+
assert_equal [{:word => "text", :hits => 1}], "text".parse(args)
|
79
83
|
end
|
80
84
|
|
81
85
|
def test_should_work_with_many_spaces
|
82
86
|
text = "e se eu encher de espacos"
|
83
|
-
assert_equal [{:word => "de",
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
87
|
+
assert_equal [{:word => "de", :hits => 1},
|
88
|
+
{:word => "e", :hits => 1},
|
89
|
+
{:word => "encher", :hits => 1},
|
90
|
+
{:word => "espacos",:hits => 1},
|
91
|
+
{:word => "eu", :hits => 1},
|
92
|
+
{:word => "se", :hits => 1}], text.parse
|
89
93
|
end
|
94
|
+
|
95
|
+
def test_should_keep_some_special_character
|
96
|
+
assert_equal [{:word => "espaço", :hits => 1},
|
97
|
+
{:word => "sideral",:hits => 1}], "Espaço sideral".parse
|
98
|
+
assert_equal [{:word => "açúcar", :hits => 1},
|
99
|
+
{:word => "bom", :hits => 1},
|
100
|
+
{:word => "de", :hits => 1},
|
101
|
+
{:word => "pão", :hits => 1}], "Pão de açúcar é bom.".parse
|
102
|
+
end
|
90
103
|
end
|
91
104
|
|
92
105
|
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require "test/unit"
|
2
|
+
require "text_parser/version"
|
3
|
+
|
4
|
+
class TextParserTest < Test::Unit::TestCase
|
5
|
+
def test_version
|
6
|
+
assert_equal TextParser::Version.const_get("STRING"), "0.1.5"
|
7
|
+
end
|
8
|
+
end
|
9
|
+
|
10
|
+
|
11
|
+
|
12
|
+
|
13
|
+
|
14
|
+
|
15
|
+
|
16
|
+
|
17
|
+
|
18
|
+
|
19
|
+
|
20
|
+
|
21
|
+
|
22
|
+
|
23
|
+
|
data/text_parser.gemspec
CHANGED
@@ -1,10 +1,12 @@
|
|
1
|
+
require "lib/text_parser/version"
|
2
|
+
|
1
3
|
Gem::Specification.new do |s|
|
2
4
|
s.name = "text_parser"
|
3
|
-
s.version =
|
5
|
+
s.version = TextParser::Version.const_get("STRING")
|
4
6
|
s.author = "Frederico de Paula"
|
5
7
|
s.email = "fpaula@gmail.com"
|
6
|
-
s.summary = "A easy way to parse text."
|
8
|
+
s.summary = "A easy way to parse a text."
|
7
9
|
s.description = "Using method parse in the String object you can parse any text"
|
8
10
|
s.files = Dir["{lib/**/*.rb,README.rdoc,test/**/*.rb,Rakefile,*.gemspec,doc/**/*}"]
|
9
|
-
s.homepage = "
|
10
|
-
end
|
11
|
+
s.homepage = "http://textparser.heroku.com/"
|
12
|
+
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: text_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 17
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 1
|
9
|
-
-
|
10
|
-
version: 0.1.
|
9
|
+
- 5
|
10
|
+
version: 0.1.5
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Frederico de Paula
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-12-
|
18
|
+
date: 2011-12-09 00:00:00 -02:00
|
19
19
|
default_executable:
|
20
20
|
dependencies: []
|
21
21
|
|
@@ -32,6 +32,7 @@ files:
|
|
32
32
|
- lib/text_parser.rb
|
33
33
|
- README.rdoc
|
34
34
|
- test/text_parser_test.rb
|
35
|
+
- test/version_test.rb
|
35
36
|
- Rakefile
|
36
37
|
- text_parser.gemspec
|
37
38
|
- doc/_index.html
|
@@ -52,7 +53,7 @@ files:
|
|
52
53
|
- doc/TextParser.html
|
53
54
|
- doc/top-level-namespace.html
|
54
55
|
has_rdoc: true
|
55
|
-
homepage:
|
56
|
+
homepage: http://textparser.heroku.com/
|
56
57
|
licenses: []
|
57
58
|
|
58
59
|
post_install_message:
|
@@ -84,6 +85,6 @@ rubyforge_project:
|
|
84
85
|
rubygems_version: 1.4.2
|
85
86
|
signing_key:
|
86
87
|
specification_version: 3
|
87
|
-
summary: A easy way to parse text.
|
88
|
+
summary: A easy way to parse a text.
|
88
89
|
test_files: []
|
89
90
|
|