RubyGems - text_parser - Versions diffs - 0.1.0 - Mend

text_parser 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

data/README.rdoc ADDED Viewed

@@ -0,0 +1,20 @@
+:title: Text Parser Ruby Gem
+=Arguments
+* :dictionary => nil,
+* :order => :word,
+* :order_direction => :asc,
+* :negative_dictionary => []
+=Usage
+  "Simple, simple test".parse # => [{:word => "simple", :hits => 2}, {:word => "test", :hits => 1}]
+  my_text = "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque pretium consectetur."
+  my_text.parse(:dictionary => ["dolor", "consectetur"])
+  # => [{:word => "consectetur", :hits => 2}, {:word => "dolor", :hits => 1}]
+  my_text.parse(:dictionary => ["dolor", "consectetur"], :order => :word, :order_direction => :desc)
+  # => [{:word => "dolor", :hits => 1}, {:word => "consectetur", :hits => 2}]

data/Rakefile ADDED Viewed

@@ -0,0 +1,6 @@
+require "rake/testtask"
+Rake::TestTask.new do |t|
+  t.libs << "lib"
+  t.test_files = Dir["test/**/*_test.rb"]
+end

data/lib/string.rb ADDED Viewed

@@ -0,0 +1,5 @@
+require 'text_parser'
+class String
+  include TextParser
+end

data/lib/text_parser.rb ADDED Viewed

@@ -0,0 +1,27 @@
+module TextParser
+  def parse(args = {})
+    options = {
+      :dictionary => nil,
+      :order => :word,
+      :order_direction => :asc,
+      :negative_dictionary => []
+    }.merge(args)
+    result = []
+    text = process_text
+    options[:dictionary] = text.split(" ") unless options[:dictionary]
+    regex = Regexp.new(options[:dictionary].join("|"), Regexp::IGNORECASE)
+    match_result = text.scan(regex).map{|i| i.downcase}
+    match_result.each do |w|
+      result << {:hits => match_result.count(w), :word => w} unless result.select{|r| r[:word] == w}.shift unless options[:negative_dictionary].map{|i| i.downcase}.include?(w)
+    end
+    result = result.sort_by{|i| i[options[:order]]}
+    result.reverse! if options[:order_direction] == :desc
+    result
+  end
+  private
+  def process_text
+    self.gsub(/[^\w\s\-]/, "")
+  end
+end

data/lib/text_parser/version.rb ADDED Viewed

@@ -0,0 +1,8 @@
+module TextParser
+  module Version
+    MAJOR = 0
+    MINOR = 1
+    PATCH = 0
+    STRING = "#{MAJOR}.#{MINOR}.#{PATCH}"
+  end
+end

data/test/text_parser_test.rb ADDED Viewed

@@ -0,0 +1,66 @@
+require "test/unit"
+require "string"
+require "text_parser"
+class TextParserTest < Test::Unit::TestCase
+  def test_should_have_method_parse
+    assert "some text".methods.select{|a| a=~/parse/}.count > 0
+  end
+  def test_should_parse
+    text = "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque pretium consectetur."
+    assert_equal text.parse(:dictionary => ["dolor", "consectetur"]), [{:word => "consectetur", :hits => 2}, {:word => "dolor", :hits => 1}]
+  end
+  def test_should_parse_without_dictionary
+    text = "test test"
+    assert_equal text.parse, [{:word => "test", :hits => 2}]
+  end
+  def test_should_remove_some_characters
+    text = "Test? Test. Yes, test!"
+    assert_equal text.parse, [{:word => "test", :hits => 3}, {:word => "yes", :hits => 1}]
+  end
+  def test_should_return_an_empty_array
+    text = "test"
+    assert_equal text.parse(:dictionary => ['abc']), []
+  end
+  def test_should_order_by_word_asc
+    text = " beta omega gamma alpha gamma"
+    result = [{:word => "alpha",  :hits => 1},
+              {:word => "beta",   :hits => 1},
+              {:word => "gamma",  :hits => 2},
+              {:word => "omega",  :hits => 1}]
+    assert_equal text.parse, result
+    assert_equal text.parse(:order => :word), result
+    assert_equal text.parse(:order => :word, :order_direction => :asc), result
+  end
+  def test_should_order_by_word_desc
+    assert_equal "aaa zzz".parse(:order => :word, :order_direction => :desc), [{:word => "zzz",  :hits => 1}, {:word => "aaa",  :hits => 1}]
+  end
+  def test_should_order_by_hits_asc
+    text = "gamma alpha gamma beta alpha gamma"
+    result = [{:word => "beta",  :hits => 1},
+              {:word => "alpha", :hits => 2},
+              {:word => "gamma", :hits => 3}]
+    assert_equal text.parse(:order => :hits), result
+    assert_equal text.parse(:order => :hits, :order_direction => :asc), result
+  end
+  def test_should_order_by_hits_desc
+    text = "gamma alpha gamma beta alpha gamma"
+    assert_equal text.parse(:order => :hits, :order_direction => :desc), [{:word => "gamma",  :hits => 3},
+                                                                          {:word => "alpha",  :hits => 2},
+                                                                          {:word => "beta",   :hits => 1}]
+  end
+  def test_should_ignore_negative_dictionary
+    text = "This is good"
+    assert_equal text.parse(:negative_dictionary => ["is", "this"]), [{:word => "good",  :hits => 1}]
+  end
+end

data/text_parser.gemspec ADDED Viewed

@@ -0,0 +1,7 @@
+Gem::Specification.new do |s|
+  s.name        = "text_parser"
+  s.version     = "0.1.0"
+  s.author      = "Frederico de Paula"
+  s.summary     = "A easy way to parse text."
+  s.files       = Dir["{lib/**/*.rb,README.rdoc,test/**/*.rb,Rakefile,*.gemspec}"]
+end

metadata ADDED Viewed

@@ -0,0 +1,73 @@
+--- !ruby/object:Gem::Specification
+name: text_parser
+version: !ruby/object:Gem::Version
+  hash: 27
+  prerelease:
+  segments:
+  - 0
+  - 1
+  - 0
+  version: 0.1.0
+platform: ruby
+authors:
+- Frederico de Paula
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2011-12-01 00:00:00 -02:00
+default_executable:
+dependencies: []
+description:
+email:
+executables: []
+extensions: []
+extra_rdoc_files: []
+files:
+- lib/string.rb
+- lib/text_parser/version.rb
+- lib/text_parser.rb
+- README.rdoc
+- test/text_parser_test.rb
+- Rakefile
+- text_parser.gemspec
+has_rdoc: true
+homepage:
+licenses: []
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  none: false
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      hash: 3
+      segments:
+      - 0
+      version: "0"
+required_rubygems_version: !ruby/object:Gem::Requirement
+  none: false
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      hash: 3
+      segments:
+      - 0
+      version: "0"
+requirements: []
+rubyforge_project:
+rubygems_version: 1.4.2
+signing_key:
+specification_version: 3
+summary: A easy way to parse text.
+test_files: []