text_parser 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.rdoc ADDED
@@ -0,0 +1,20 @@
1
+ :title: Text Parser Ruby Gem
2
+ =Arguments
3
+ * :dictionary => nil,
4
+ * :order => :word,
5
+ * :order_direction => :asc,
6
+ * :negative_dictionary => []
7
+
8
+ =Usage
9
+ "Simple, simple test".parse # => [{:word => "simple", :hits => 2}, {:word => "test", :hits => 1}]
10
+
11
+ my_text = "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque pretium consectetur."
12
+ my_text.parse(:dictionary => ["dolor", "consectetur"])
13
+ # => [{:word => "consectetur", :hits => 2}, {:word => "dolor", :hits => 1}]
14
+
15
+ my_text.parse(:dictionary => ["dolor", "consectetur"], :order => :word, :order_direction => :desc)
16
+ # => [{:word => "dolor", :hits => 1}, {:word => "consectetur", :hits => 2}]
17
+
18
+
19
+
20
+
data/Rakefile ADDED
@@ -0,0 +1,6 @@
1
+ require "rake/testtask"
2
+
3
+ Rake::TestTask.new do |t|
4
+ t.libs << "lib"
5
+ t.test_files = Dir["test/**/*_test.rb"]
6
+ end
data/lib/string.rb ADDED
@@ -0,0 +1,5 @@
1
+ require 'text_parser'
2
+
3
+ class String
4
+ include TextParser
5
+ end
@@ -0,0 +1,27 @@
1
+ module TextParser
2
+ def parse(args = {})
3
+ options = {
4
+ :dictionary => nil,
5
+ :order => :word,
6
+ :order_direction => :asc,
7
+ :negative_dictionary => []
8
+ }.merge(args)
9
+ result = []
10
+ text = process_text
11
+ options[:dictionary] = text.split(" ") unless options[:dictionary]
12
+ regex = Regexp.new(options[:dictionary].join("|"), Regexp::IGNORECASE)
13
+ match_result = text.scan(regex).map{|i| i.downcase}
14
+ match_result.each do |w|
15
+ result << {:hits => match_result.count(w), :word => w} unless result.select{|r| r[:word] == w}.shift unless options[:negative_dictionary].map{|i| i.downcase}.include?(w)
16
+ end
17
+ result = result.sort_by{|i| i[options[:order]]}
18
+ result.reverse! if options[:order_direction] == :desc
19
+ result
20
+ end
21
+
22
+ private
23
+
24
+ def process_text
25
+ self.gsub(/[^\w\s\-]/, "")
26
+ end
27
+ end
@@ -0,0 +1,8 @@
1
+ module TextParser
2
+ module Version
3
+ MAJOR = 0
4
+ MINOR = 1
5
+ PATCH = 0
6
+ STRING = "#{MAJOR}.#{MINOR}.#{PATCH}"
7
+ end
8
+ end
@@ -0,0 +1,66 @@
1
+ require "test/unit"
2
+ require "string"
3
+ require "text_parser"
4
+
5
+ class TextParserTest < Test::Unit::TestCase
6
+
7
+ def test_should_have_method_parse
8
+ assert "some text".methods.select{|a| a=~/parse/}.count > 0
9
+ end
10
+
11
+ def test_should_parse
12
+ text = "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque pretium consectetur."
13
+ assert_equal text.parse(:dictionary => ["dolor", "consectetur"]), [{:word => "consectetur", :hits => 2}, {:word => "dolor", :hits => 1}]
14
+ end
15
+
16
+ def test_should_parse_without_dictionary
17
+ text = "test test"
18
+ assert_equal text.parse, [{:word => "test", :hits => 2}]
19
+ end
20
+
21
+ def test_should_remove_some_characters
22
+ text = "Test? Test. Yes, test!"
23
+ assert_equal text.parse, [{:word => "test", :hits => 3}, {:word => "yes", :hits => 1}]
24
+ end
25
+
26
+ def test_should_return_an_empty_array
27
+ text = "test"
28
+ assert_equal text.parse(:dictionary => ['abc']), []
29
+ end
30
+
31
+ def test_should_order_by_word_asc
32
+ text = " beta omega gamma alpha gamma"
33
+ result = [{:word => "alpha", :hits => 1},
34
+ {:word => "beta", :hits => 1},
35
+ {:word => "gamma", :hits => 2},
36
+ {:word => "omega", :hits => 1}]
37
+ assert_equal text.parse, result
38
+ assert_equal text.parse(:order => :word), result
39
+ assert_equal text.parse(:order => :word, :order_direction => :asc), result
40
+ end
41
+
42
+ def test_should_order_by_word_desc
43
+ assert_equal "aaa zzz".parse(:order => :word, :order_direction => :desc), [{:word => "zzz", :hits => 1}, {:word => "aaa", :hits => 1}]
44
+ end
45
+
46
+ def test_should_order_by_hits_asc
47
+ text = "gamma alpha gamma beta alpha gamma"
48
+ result = [{:word => "beta", :hits => 1},
49
+ {:word => "alpha", :hits => 2},
50
+ {:word => "gamma", :hits => 3}]
51
+ assert_equal text.parse(:order => :hits), result
52
+ assert_equal text.parse(:order => :hits, :order_direction => :asc), result
53
+ end
54
+
55
+ def test_should_order_by_hits_desc
56
+ text = "gamma alpha gamma beta alpha gamma"
57
+ assert_equal text.parse(:order => :hits, :order_direction => :desc), [{:word => "gamma", :hits => 3},
58
+ {:word => "alpha", :hits => 2},
59
+ {:word => "beta", :hits => 1}]
60
+ end
61
+
62
+ def test_should_ignore_negative_dictionary
63
+ text = "This is good"
64
+ assert_equal text.parse(:negative_dictionary => ["is", "this"]), [{:word => "good", :hits => 1}]
65
+ end
66
+ end
@@ -0,0 +1,7 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = "text_parser"
3
+ s.version = "0.1.0"
4
+ s.author = "Frederico de Paula"
5
+ s.summary = "A easy way to parse text."
6
+ s.files = Dir["{lib/**/*.rb,README.rdoc,test/**/*.rb,Rakefile,*.gemspec}"]
7
+ end
metadata ADDED
@@ -0,0 +1,73 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: text_parser
3
+ version: !ruby/object:Gem::Version
4
+ hash: 27
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 1
9
+ - 0
10
+ version: 0.1.0
11
+ platform: ruby
12
+ authors:
13
+ - Frederico de Paula
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2011-12-01 00:00:00 -02:00
19
+ default_executable:
20
+ dependencies: []
21
+
22
+ description:
23
+ email:
24
+ executables: []
25
+
26
+ extensions: []
27
+
28
+ extra_rdoc_files: []
29
+
30
+ files:
31
+ - lib/string.rb
32
+ - lib/text_parser/version.rb
33
+ - lib/text_parser.rb
34
+ - README.rdoc
35
+ - test/text_parser_test.rb
36
+ - Rakefile
37
+ - text_parser.gemspec
38
+ has_rdoc: true
39
+ homepage:
40
+ licenses: []
41
+
42
+ post_install_message:
43
+ rdoc_options: []
44
+
45
+ require_paths:
46
+ - lib
47
+ required_ruby_version: !ruby/object:Gem::Requirement
48
+ none: false
49
+ requirements:
50
+ - - ">="
51
+ - !ruby/object:Gem::Version
52
+ hash: 3
53
+ segments:
54
+ - 0
55
+ version: "0"
56
+ required_rubygems_version: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ hash: 3
62
+ segments:
63
+ - 0
64
+ version: "0"
65
+ requirements: []
66
+
67
+ rubyforge_project:
68
+ rubygems_version: 1.4.2
69
+ signing_key:
70
+ specification_version: 3
71
+ summary: A easy way to parse text.
72
+ test_files: []
73
+