text_parser 0.1.4 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +1 -1
- data/lib/text_parser.rb +1 -2
- data/lib/text_parser/version.rb +2 -2
- data/test/text_parser_test.rb +39 -26
- data/test/version_test.rb +23 -0
- data/text_parser.gemspec +6 -4
- metadata +7 -6
data/README.rdoc
CHANGED
@@ -5,7 +5,7 @@
|
|
5
5
|
| :dictionary | Array | nil |
|
6
6
|
| :order (:word | :hits) | Symbol | :word |
|
7
7
|
| :order_direction (:asc | :desc) | Symbol | :asc |
|
8
|
-
| :negative_dictionary | Array |
|
8
|
+
| :negative_dictionary | Array | nil |
|
9
9
|
+---------------------------------+--------+---------------+
|
10
10
|
|
11
11
|
|
data/lib/text_parser.rb
CHANGED
@@ -1,4 +1,3 @@
|
|
1
|
-
require 'iconv'
|
2
1
|
module TextParser
|
3
2
|
# Returns a parsed text with the words and its occurrences.
|
4
3
|
# @param [Hash] [args]
|
@@ -29,7 +28,7 @@ module TextParser
|
|
29
28
|
|
30
29
|
def process_text
|
31
30
|
text = self.gsub(/\s{2,}/," ")
|
32
|
-
text = text.gsub(/[^\w\s\-]
|
31
|
+
text = text.gsub(/[^\w\s\-]/u, "")
|
33
32
|
end
|
34
33
|
end
|
35
34
|
|
data/lib/text_parser/version.rb
CHANGED
data/test/text_parser_test.rb
CHANGED
@@ -2,28 +2,29 @@ require "test/unit"
|
|
2
2
|
require "text_parser"
|
3
3
|
|
4
4
|
class TextParserTest < Test::Unit::TestCase
|
5
|
+
|
5
6
|
def test_should_have_method_parse
|
6
7
|
assert "some text".methods.select{|a| a == "parse"}.count > 0
|
7
8
|
end
|
8
9
|
|
9
10
|
def test_should_parse
|
10
11
|
text = "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque pretium consectetur."
|
11
|
-
assert_equal
|
12
|
+
assert_equal [{:word => "consectetur", :hits => 2},
|
13
|
+
{:word => "dolor", :hits => 1}],
|
14
|
+
text.parse(:dictionary => ["dolor", "consectetur"])
|
12
15
|
end
|
13
16
|
|
14
17
|
def test_should_parse_without_dictionary
|
15
|
-
|
16
|
-
assert_equal text.parse, [{:word => "test", :hits => 2}]
|
18
|
+
assert_equal [{:word => "test", :hits => 2}], "test test".parse
|
17
19
|
end
|
18
20
|
|
19
21
|
def test_should_remove_some_characters
|
20
22
|
text = "Test? Test. Yes, test!"
|
21
|
-
assert_equal
|
23
|
+
assert_equal [{:word => "test", :hits => 3}, {:word => "yes", :hits => 1}], text.parse
|
22
24
|
end
|
23
25
|
|
24
26
|
def test_should_return_an_empty_array
|
25
|
-
|
26
|
-
assert_equal text.parse(:dictionary => ['abc']), []
|
27
|
+
assert_equal "test".parse(:dictionary => ['abc']), []
|
27
28
|
end
|
28
29
|
|
29
30
|
def test_should_order_by_word_asc
|
@@ -32,13 +33,14 @@ class TextParserTest < Test::Unit::TestCase
|
|
32
33
|
{:word => "beta", :hits => 1},
|
33
34
|
{:word => "gamma", :hits => 2},
|
34
35
|
{:word => "omega", :hits => 1}]
|
35
|
-
assert_equal text.parse
|
36
|
-
assert_equal text.parse(:order => :word)
|
37
|
-
assert_equal text.parse(:order => :word, :order_direction => :asc)
|
36
|
+
assert_equal result, text.parse
|
37
|
+
assert_equal result, text.parse(:order => :word)
|
38
|
+
assert_equal result, text.parse(:order => :word, :order_direction => :asc)
|
38
39
|
end
|
39
40
|
|
40
41
|
def test_should_order_by_word_desc
|
41
|
-
assert_equal
|
42
|
+
assert_equal [{:word => "zzz", :hits => 1},
|
43
|
+
{:word => "aaa", :hits => 1}], "aaa zzz".parse(:order => :word, :order_direction => :desc)
|
42
44
|
end
|
43
45
|
|
44
46
|
def test_should_order_by_hits_asc
|
@@ -46,47 +48,58 @@ class TextParserTest < Test::Unit::TestCase
|
|
46
48
|
result = [{:word => "beta", :hits => 1},
|
47
49
|
{:word => "alpha", :hits => 2},
|
48
50
|
{:word => "gamma", :hits => 3}]
|
49
|
-
assert_equal text.parse(:order => :hits)
|
50
|
-
assert_equal text.parse(:order => :hits, :order_direction => :asc)
|
51
|
+
assert_equal result, text.parse(:order => :hits)
|
52
|
+
assert_equal result, text.parse(:order => :hits, :order_direction => :asc)
|
51
53
|
end
|
52
54
|
|
53
55
|
def test_should_order_by_hits_desc
|
54
56
|
text = "gamma alpha gamma beta alpha gamma"
|
55
|
-
assert_equal
|
56
|
-
|
57
|
-
|
57
|
+
assert_equal [{:word => "gamma", :hits => 3},
|
58
|
+
{:word => "alpha", :hits => 2},
|
59
|
+
{:word => "beta", :hits => 1}],
|
60
|
+
text.parse(:order => :hits, :order_direction => :desc)
|
58
61
|
end
|
59
62
|
|
60
63
|
def test_should_ignore_negative_dictionary
|
61
|
-
assert_equal "This is good".parse(:negative_dictionary => ["is", "this"])
|
64
|
+
assert_equal [{:word => "good", :hits => 1}], "This is good".parse(:negative_dictionary => ["is", "this"])
|
62
65
|
end
|
63
66
|
|
64
67
|
def test_should_works_with_special_characters
|
65
|
-
assert_equal "'/[.](\")".parse
|
68
|
+
assert_equal [], "'/[.](\")".parse
|
66
69
|
end
|
67
70
|
|
68
71
|
def test_should_works_hifen
|
69
|
-
assert_equal
|
72
|
+
assert_equal [{:word => "self-service", :hits => 1}], "self-service".parse
|
70
73
|
end
|
71
74
|
|
72
75
|
def test_should_return_double_words
|
73
|
-
assert_equal
|
76
|
+
assert_equal [{:word => "forrest gump", :hits => 1}],
|
77
|
+
"I like the movie Forrest Gump.".parse(:dictionary => ["Forrest Gump"])
|
74
78
|
end
|
75
79
|
|
76
80
|
def test_should_manage_null_args
|
77
81
|
args = {:dictionary=>nil, :negative_dictionary=>nil, :order=>nil, :order_direction=>nil}
|
78
|
-
assert_equal
|
82
|
+
assert_equal [{:word => "text", :hits => 1}], "text".parse(args)
|
79
83
|
end
|
80
84
|
|
81
85
|
def test_should_work_with_many_spaces
|
82
86
|
text = "e se eu encher de espacos"
|
83
|
-
assert_equal [{:word => "de",
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
87
|
+
assert_equal [{:word => "de", :hits => 1},
|
88
|
+
{:word => "e", :hits => 1},
|
89
|
+
{:word => "encher", :hits => 1},
|
90
|
+
{:word => "espacos",:hits => 1},
|
91
|
+
{:word => "eu", :hits => 1},
|
92
|
+
{:word => "se", :hits => 1}], text.parse
|
89
93
|
end
|
94
|
+
|
95
|
+
def test_should_keep_some_special_character
|
96
|
+
assert_equal [{:word => "espaço", :hits => 1},
|
97
|
+
{:word => "sideral",:hits => 1}], "Espaço sideral".parse
|
98
|
+
assert_equal [{:word => "açúcar", :hits => 1},
|
99
|
+
{:word => "bom", :hits => 1},
|
100
|
+
{:word => "de", :hits => 1},
|
101
|
+
{:word => "pão", :hits => 1}], "Pão de açúcar é bom.".parse
|
102
|
+
end
|
90
103
|
end
|
91
104
|
|
92
105
|
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require "test/unit"
|
2
|
+
require "text_parser/version"
|
3
|
+
|
4
|
+
class TextParserTest < Test::Unit::TestCase
|
5
|
+
def test_version
|
6
|
+
assert_equal TextParser::Version.const_get("STRING"), "0.1.5"
|
7
|
+
end
|
8
|
+
end
|
9
|
+
|
10
|
+
|
11
|
+
|
12
|
+
|
13
|
+
|
14
|
+
|
15
|
+
|
16
|
+
|
17
|
+
|
18
|
+
|
19
|
+
|
20
|
+
|
21
|
+
|
22
|
+
|
23
|
+
|
data/text_parser.gemspec
CHANGED
@@ -1,10 +1,12 @@
|
|
1
|
+
require "lib/text_parser/version"
|
2
|
+
|
1
3
|
Gem::Specification.new do |s|
|
2
4
|
s.name = "text_parser"
|
3
|
-
s.version =
|
5
|
+
s.version = TextParser::Version.const_get("STRING")
|
4
6
|
s.author = "Frederico de Paula"
|
5
7
|
s.email = "fpaula@gmail.com"
|
6
|
-
s.summary = "A easy way to parse text."
|
8
|
+
s.summary = "A easy way to parse a text."
|
7
9
|
s.description = "Using method parse in the String object you can parse any text"
|
8
10
|
s.files = Dir["{lib/**/*.rb,README.rdoc,test/**/*.rb,Rakefile,*.gemspec,doc/**/*}"]
|
9
|
-
s.homepage = "
|
10
|
-
end
|
11
|
+
s.homepage = "http://textparser.heroku.com/"
|
12
|
+
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: text_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 17
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 1
|
9
|
-
-
|
10
|
-
version: 0.1.
|
9
|
+
- 5
|
10
|
+
version: 0.1.5
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Frederico de Paula
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-12-
|
18
|
+
date: 2011-12-09 00:00:00 -02:00
|
19
19
|
default_executable:
|
20
20
|
dependencies: []
|
21
21
|
|
@@ -32,6 +32,7 @@ files:
|
|
32
32
|
- lib/text_parser.rb
|
33
33
|
- README.rdoc
|
34
34
|
- test/text_parser_test.rb
|
35
|
+
- test/version_test.rb
|
35
36
|
- Rakefile
|
36
37
|
- text_parser.gemspec
|
37
38
|
- doc/_index.html
|
@@ -52,7 +53,7 @@ files:
|
|
52
53
|
- doc/TextParser.html
|
53
54
|
- doc/top-level-namespace.html
|
54
55
|
has_rdoc: true
|
55
|
-
homepage:
|
56
|
+
homepage: http://textparser.heroku.com/
|
56
57
|
licenses: []
|
57
58
|
|
58
59
|
post_install_message:
|
@@ -84,6 +85,6 @@ rubyforge_project:
|
|
84
85
|
rubygems_version: 1.4.2
|
85
86
|
signing_key:
|
86
87
|
specification_version: 3
|
87
|
-
summary: A easy way to parse text.
|
88
|
+
summary: A easy way to parse a text.
|
88
89
|
test_files: []
|
89
90
|
|