text_parser 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/README.rdoc ADDED
@@ -0,0 +1,20 @@
1
+ :title: Text Parser Ruby Gem
2
+ =Arguments
3
+ * :dictionary => nil,
4
+ * :order => :word,
5
+ * :order_direction => :asc,
6
+ * :negative_dictionary => []
7
+
8
+ =Usage
9
+ "Simple, simple test".parse # => [{:word => "simple", :hits => 2}, {:word => "test", :hits => 1}]
10
+
11
+ my_text = "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque pretium consectetur."
12
+ my_text.parse(:dictionary => ["dolor", "consectetur"])
13
+ # => [{:word => "consectetur", :hits => 2}, {:word => "dolor", :hits => 1}]
14
+
15
+ my_text.parse(:dictionary => ["dolor", "consectetur"], :order => :word, :order_direction => :desc)
16
+ # => [{:word => "dolor", :hits => 1}, {:word => "consectetur", :hits => 2}]
17
+
18
+
19
+
20
+
data/Rakefile ADDED
@@ -0,0 +1,6 @@
1
+ require "rake/testtask"
2
+
3
+ Rake::TestTask.new do |t|
4
+ t.libs << "lib"
5
+ t.test_files = Dir["test/**/*_test.rb"]
6
+ end
data/lib/string.rb ADDED
@@ -0,0 +1,5 @@
1
+ require 'text_parser'
2
+
3
+ class String
4
+ include TextParser
5
+ end
@@ -0,0 +1,27 @@
1
+ module TextParser
2
+ def parse(args = {})
3
+ options = {
4
+ :dictionary => nil,
5
+ :order => :word,
6
+ :order_direction => :asc,
7
+ :negative_dictionary => []
8
+ }.merge(args)
9
+ result = []
10
+ text = process_text
11
+ options[:dictionary] = text.split(" ") unless options[:dictionary]
12
+ regex = Regexp.new(options[:dictionary].join("|"), Regexp::IGNORECASE)
13
+ match_result = text.scan(regex).map{|i| i.downcase}
14
+ match_result.each do |w|
15
+ result << {:hits => match_result.count(w), :word => w} unless result.select{|r| r[:word] == w}.shift unless options[:negative_dictionary].map{|i| i.downcase}.include?(w)
16
+ end
17
+ result = result.sort_by{|i| i[options[:order]]}
18
+ result.reverse! if options[:order_direction] == :desc
19
+ result
20
+ end
21
+
22
+ private
23
+
24
+ def process_text
25
+ self.gsub(/[^\w\s\-]/, "")
26
+ end
27
+ end
@@ -0,0 +1,8 @@
1
+ module TextParser
2
+ module Version
3
+ MAJOR = 0
4
+ MINOR = 1
5
+ PATCH = 0
6
+ STRING = "#{MAJOR}.#{MINOR}.#{PATCH}"
7
+ end
8
+ end
@@ -0,0 +1,66 @@
1
+ require "test/unit"
2
+ require "string"
3
+ require "text_parser"
4
+
5
+ class TextParserTest < Test::Unit::TestCase
6
+
7
+ def test_should_have_method_parse
8
+ assert "some text".methods.select{|a| a=~/parse/}.count > 0
9
+ end
10
+
11
+ def test_should_parse
12
+ text = "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque pretium consectetur."
13
+ assert_equal text.parse(:dictionary => ["dolor", "consectetur"]), [{:word => "consectetur", :hits => 2}, {:word => "dolor", :hits => 1}]
14
+ end
15
+
16
+ def test_should_parse_without_dictionary
17
+ text = "test test"
18
+ assert_equal text.parse, [{:word => "test", :hits => 2}]
19
+ end
20
+
21
+ def test_should_remove_some_characters
22
+ text = "Test? Test. Yes, test!"
23
+ assert_equal text.parse, [{:word => "test", :hits => 3}, {:word => "yes", :hits => 1}]
24
+ end
25
+
26
+ def test_should_return_an_empty_array
27
+ text = "test"
28
+ assert_equal text.parse(:dictionary => ['abc']), []
29
+ end
30
+
31
+ def test_should_order_by_word_asc
32
+ text = " beta omega gamma alpha gamma"
33
+ result = [{:word => "alpha", :hits => 1},
34
+ {:word => "beta", :hits => 1},
35
+ {:word => "gamma", :hits => 2},
36
+ {:word => "omega", :hits => 1}]
37
+ assert_equal text.parse, result
38
+ assert_equal text.parse(:order => :word), result
39
+ assert_equal text.parse(:order => :word, :order_direction => :asc), result
40
+ end
41
+
42
+ def test_should_order_by_word_desc
43
+ assert_equal "aaa zzz".parse(:order => :word, :order_direction => :desc), [{:word => "zzz", :hits => 1}, {:word => "aaa", :hits => 1}]
44
+ end
45
+
46
+ def test_should_order_by_hits_asc
47
+ text = "gamma alpha gamma beta alpha gamma"
48
+ result = [{:word => "beta", :hits => 1},
49
+ {:word => "alpha", :hits => 2},
50
+ {:word => "gamma", :hits => 3}]
51
+ assert_equal text.parse(:order => :hits), result
52
+ assert_equal text.parse(:order => :hits, :order_direction => :asc), result
53
+ end
54
+
55
+ def test_should_order_by_hits_desc
56
+ text = "gamma alpha gamma beta alpha gamma"
57
+ assert_equal text.parse(:order => :hits, :order_direction => :desc), [{:word => "gamma", :hits => 3},
58
+ {:word => "alpha", :hits => 2},
59
+ {:word => "beta", :hits => 1}]
60
+ end
61
+
62
+ def test_should_ignore_negative_dictionary
63
+ text = "This is good"
64
+ assert_equal text.parse(:negative_dictionary => ["is", "this"]), [{:word => "good", :hits => 1}]
65
+ end
66
+ end
@@ -0,0 +1,7 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = "text_parser"
3
+ s.version = "0.1.0"
4
+ s.author = "Frederico de Paula"
5
+ s.summary = "A easy way to parse text."
6
+ s.files = Dir["{lib/**/*.rb,README.rdoc,test/**/*.rb,Rakefile,*.gemspec}"]
7
+ end
metadata ADDED
@@ -0,0 +1,73 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: text_parser
3
+ version: !ruby/object:Gem::Version
4
+ hash: 27
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 1
9
+ - 0
10
+ version: 0.1.0
11
+ platform: ruby
12
+ authors:
13
+ - Frederico de Paula
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2011-12-01 00:00:00 -02:00
19
+ default_executable:
20
+ dependencies: []
21
+
22
+ description:
23
+ email:
24
+ executables: []
25
+
26
+ extensions: []
27
+
28
+ extra_rdoc_files: []
29
+
30
+ files:
31
+ - lib/string.rb
32
+ - lib/text_parser/version.rb
33
+ - lib/text_parser.rb
34
+ - README.rdoc
35
+ - test/text_parser_test.rb
36
+ - Rakefile
37
+ - text_parser.gemspec
38
+ has_rdoc: true
39
+ homepage:
40
+ licenses: []
41
+
42
+ post_install_message:
43
+ rdoc_options: []
44
+
45
+ require_paths:
46
+ - lib
47
+ required_ruby_version: !ruby/object:Gem::Requirement
48
+ none: false
49
+ requirements:
50
+ - - ">="
51
+ - !ruby/object:Gem::Version
52
+ hash: 3
53
+ segments:
54
+ - 0
55
+ version: "0"
56
+ required_rubygems_version: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ hash: 3
62
+ segments:
63
+ - 0
64
+ version: "0"
65
+ requirements: []
66
+
67
+ rubyforge_project:
68
+ rubygems_version: 1.4.2
69
+ signing_key:
70
+ specification_version: 3
71
+ summary: A easy way to parse text.
72
+ test_files: []
73
+