RubyGems - regularfry-celerity_parser - Versions diffs - 0.1.1-java - Mend

regularfry-celerity_parser 0.1.1-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

data/.document +5 -0
data/.gitignore +5 -0
data/LICENSE +20 -0
data/README.rdoc +57 -0
data/Rakefile +60 -0
data/VERSION +1 -0
data/celerity_parser.gemspec +57 -0
data/lib/celerity_parser/html_node.rb +47 -0
data/lib/celerity_parser/html_parser.rb +24 -0
data/lib/celerity_parser.rb +56 -0
data/test/helper.rb +5 -0
data/test/test_celerity_parser.rb +0 -0
data/test/test_html_node.rb +39 -0
data/test/test_html_parser.rb +55 -0
metadata +79 -0

data/.document ADDED Viewed

@@ -0,0 +1,5 @@
+README.rdoc
+lib/**/*.rb
+bin/*
+features/**/*.feature
+LICENSE

data/.gitignore ADDED Viewed

@@ -0,0 +1,5 @@
+*.sw?
+.DS_Store
+coverage
+rdoc
+pkg

data/LICENSE ADDED Viewed

@@ -0,0 +1,20 @@
+Copyright (c) 2009 Alex Young
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

data/README.rdoc ADDED Viewed

@@ -0,0 +1,57 @@
+celerity_parser
+    by Alex Young
+    http://github.com/regularfry/celerity_parser
+== DESCRIPTION:
+A very thin wrapper around HtmlUnit's HTML parser to allow xpath searches of
+HTML documents for JRuby. At the moment there's not much here. I don't know yet
+whether it's worth expanding the API to a full nokogiri/hpricot-style
+implementation.  What's here works for what I need for now.
+== FEATURES/PROBLEMS:
+* Reuses HtmlUnit's parser as wrapped by celerity to provide a stable HTML
+* parsing capability.
+== SYNOPSIS:
+Basic use:
+    root_node = CelerityParser.parse(html_content)
+    found_elements = root_node.search("//html/head/title")
+    found_elements.first.text # => "Html page title"
+== REQUIREMENTS:
+    $ jruby -S gem install jarib-celerity --source=http://gems.github.com
+== INSTALL:
+    $ jruby -S gem install celerity_parser --source=http://gems.github.com
+== LICENSE:
+(The MIT License)
+Copyright (c) 2009 Alex Young
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+'Software'), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

data/Rakefile ADDED Viewed

@@ -0,0 +1,60 @@
+require 'rubygems'
+require 'rake'
+begin
+  require 'jeweler'
+  Jeweler::Tasks.new do |gem|
+    gem.name = "celerity_parser"
+    gem.summary = %Q{Thin wrapper around a native Java HTML parser}
+    gem.email = "alex@blackkettle.org"
+    gem.homepage = "http://github.com/regularfry/celerity_parser"
+    gem.authors = ["Alex Young"]
+    gem.rubyforge_project = "celerity_parser"
+    # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
+    gem.add_dependency 'jarib-celerity', '>= 0.0.6.10'
+    gem.platform = "java"
+  end
+  Jeweler::RubyforgeTasks.new
+rescue LoadError
+  puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
+end
+require 'rake/testtask'
+Rake::TestTask.new(:test) do |test|
+  test.libs << 'lib' << 'test'
+  test.pattern = 'test/**/test_*.rb'
+  test.verbose = true
+end
+begin
+  require 'rcov/rcovtask'
+  Rcov::RcovTask.new do |test|
+    test.libs << 'test'
+    test.pattern = 'test/**/*_test.rb'
+    test.verbose = true
+  end
+rescue LoadError
+  task :rcov do
+    abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
+  end
+end
+task :default => :test
+require 'rake/rdoctask'
+Rake::RDocTask.new do |rdoc|
+  if File.exist?('VERSION.yml')
+    config = YAML.load(File.read('VERSION.yml'))
+    version = "#{config[:major]}.#{config[:minor]}.#{config[:patch]}"
+  else
+    version = ""
+  end
+  rdoc.rdoc_dir = 'rdoc'
+  rdoc.title = "celerity_parser #{version}"
+  rdoc.rdoc_files.include('README*')
+  rdoc.rdoc_files.include('lib/**/*.rb')
+end

data/VERSION ADDED Viewed

	@@ -0,0 +1 @@
1	+ 0.1.1

data/celerity_parser.gemspec ADDED Viewed

@@ -0,0 +1,57 @@
+# -*- encoding: utf-8 -*-
+Gem::Specification.new do |s|
+  s.name = %q{celerity_parser}
+  s.version = "0.1.1"
+  s.platform = %q{java}
+  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
+  s.authors = ["Alex Young"]
+  s.date = %q{2009-06-22}
+  s.email = %q{alex@blackkettle.org}
+  s.extra_rdoc_files = [
+    "LICENSE",
+     "README.rdoc"
+  ]
+  s.files = [
+    ".document",
+     ".gitignore",
+     "LICENSE",
+     "README.rdoc",
+     "Rakefile",
+     "VERSION",
+     "celerity_parser.gemspec",
+     "lib/celerity_parser.rb",
+     "lib/celerity_parser/html_node.rb",
+     "lib/celerity_parser/html_parser.rb",
+     "test/helper.rb",
+     "test/test_celerity_parser.rb",
+     "test/test_html_node.rb",
+     "test/test_html_parser.rb"
+  ]
+  s.homepage = %q{http://github.com/regularfry/celerity_parser}
+  s.rdoc_options = ["--charset=UTF-8"]
+  s.require_paths = ["lib"]
+  s.rubyforge_project = %q{celerity_parser}
+  s.rubygems_version = %q{1.3.3}
+  s.summary = %q{Thin wrapper around a native Java HTML parser}
+  s.test_files = [
+    "test/helper.rb",
+     "test/test_html_node.rb",
+     "test/test_html_parser.rb",
+     "test/test_celerity_parser.rb"
+  ]
+  if s.respond_to? :specification_version then
+    current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
+    s.specification_version = 3
+    if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
+      s.add_runtime_dependency(%q<jarib-celerity>, [">= 0.0.6.10"])
+    else
+      s.add_dependency(%q<jarib-celerity>, [">= 0.0.6.10"])
+    end
+  else
+    s.add_dependency(%q<jarib-celerity>, [">= 0.0.6.10"])
+  end
+end

data/lib/celerity_parser/html_node.rb ADDED Viewed

@@ -0,0 +1,47 @@
+module CelerityParser
+  class HtmlNode# {{{
+    # This extra entry point will allow me to insert different Node
+    # classes in the future if necessary.
+    def self.wrap(java_node)
+      new(java_node)
+    end
+    # Not called by user code.
+    def initialize(java_node)
+      @java_node = java_node
+    end
+    # Perform an xpath search from this node and return an array
+    # of HtmlNodes representing the results.
+    def search(xpath)
+      wrap_java_nodes(@java_node.get_by_xpath(xpath))
+    end
+    # The innerText of the node.
+    def text
+      @java_node.as_text
+    end
+    # HtmlNodes represent both tags and attributes, but attributes
+    # have value rather than text.
+    def value
+      @java_node.value
+    end
+    # The XHTML representation of this node.
+    def html
+      @java_node.as_xml
+    end
+    def wrap_java_nodes(java_results)
+      java_results.map do |java_node|
+        HtmlNode.wrap(java_node)
+      end
+    end
+    private :wrap_java_nodes
+  end # }}}
+end

data/lib/celerity_parser/html_parser.rb ADDED Viewed

@@ -0,0 +1,24 @@
+require 'celerity_parser/html_node'
+require 'celerity'
+module CelerityParser
+  class HtmlParser# {{{
+    def self.get_window(title)
+      client = Java.com.gargoylesoftware.htmlunit.WebClient.new()
+      return Java.com.gargoylesoftware.htmlunit.TopLevelWindow.new(title, client)
+    end
+    def self.get_wrapped_content(content, href=nil)
+      url = java.net.URL.new(href || "http://example.com")
+      return Java.com.gargoylesoftware.htmlunit.StringWebResponse.new(content, url)
+    end
+    def self.parse(content, href=nil)
+      raise ArgumentError.new("Nil content!") if !content
+      window = self.get_window("window")
+      wrapped_content = self.get_wrapped_content(content, href)
+      return HtmlNode.wrap(Java.com.gargoylesoftware.htmlunit.html.HTMLParser.parse(wrapped_content, window))
+    end
+  end# }}}
+end

data/lib/celerity_parser.rb ADDED Viewed

@@ -0,0 +1,56 @@
+require 'celerity'
+module CelerityParser
+  # :stopdoc:
+  VERSION = '1.0.0'
+  LIBPATH = ::File.expand_path(::File.dirname(__FILE__)) + ::File::SEPARATOR
+  PATH = ::File.dirname(LIBPATH) + ::File::SEPARATOR
+  # :startdoc:
+  # Returns the version string for the library.
+  #
+  def self.version
+    VERSION
+  end
+  # Returns the library path for the module. If any arguments are given,
+  # they will be joined to the end of the libray path using
+  # <tt>File.join</tt>.
+  #
+  def self.libpath( *args )
+    args.empty? ? LIBPATH : ::File.join(LIBPATH, args.flatten)
+  end
+  # Returns the lpath for the module. If any arguments are given,
+  # they will be joined to the end of the path using
+  # <tt>File.join</tt>.
+  #
+  def self.path( *args )
+    args.empty? ? PATH : ::File.join(PATH, args.flatten)
+  end
+  # Utility method used to require all files ending in .rb that lie in the
+  # directory below this file that has the same name as the filename passed
+  # in. Optionally, a specific _directory_ name can be passed in such that
+  # the _filename_ does not have to be equivalent to the directory.
+  #
+  def self.require_all_libs_relative_to( fname, dir = nil )
+    dir ||= ::File.basename(fname, '.*')
+    search_me = ::File.expand_path(
+        ::File.join(::File.dirname(fname), dir, '**', '*.rb'))
+    Dir.glob(search_me).sort.each {|rb| require rb}
+  end
+  # The entry point. Pass the html string as content, with an optional
+  # href to use for resolving relative URLs.
+  def self.parse(content, href=nil)
+    return CelerityParser::HtmlParser.parse(content, href)
+  end
+end  # module CelerityParser
+CelerityParser.require_all_libs_relative_to(__FILE__)
+# EOF

data/test/helper.rb ADDED Viewed

@@ -0,0 +1,5 @@
+$LOAD_PATH.unshift(File.expand_path(File.join(File.dirname(__FILE__), '..', 'lib')))
+require 'rubygems'
+require 'test/unit'
+require 'mocha'
+gem 'jarib-celerity'

data/test/test_celerity_parser.rb ADDED Viewed

File without changes

data/test/test_html_node.rb ADDED Viewed

@@ -0,0 +1,39 @@
+require File.expand_path(File.join(File.dirname(__FILE__), 'helper'))
+require 'celerity_parser/html_node'
+class TestHtmlNode < Test::Unit::TestCase
+  include CelerityParser
+  def setup
+    @java_node = stub("(java_node)")
+    @node = HtmlNode.new(@java_node)
+  end
+  def test_search
+    xpath = "//example/xpath"
+    @java_node.expects(:get_by_xpath).with(xpath).returns([])
+    @node.search(xpath)
+  end
+  def test_wrap
+    HtmlNode.any_instance.expects(:initialize).with(@java_node)
+    HtmlNode.wrap(@java_node)
+  end
+  def test_text
+    @java_node.expects(:as_text).returns("")
+    @node.text
+  end
+  def test_value
+    @java_node.expects(:value).returns(nil)
+    @node.value
+  end
+  def test_html
+    @java_node.expects(:as_xml).returns("")
+    @node.html
+  end
+end

data/test/test_html_parser.rb ADDED Viewed

@@ -0,0 +1,55 @@
+require File.expand_path(File.join(File.dirname(__FILE__), 'helper'))
+require 'celerity_parser/html_parser'
+class TestHtmlParser < Test::Unit::TestCase
+  include CelerityParser
+  def test_nil_content_raises
+    assert_raises(ArgumentError) do
+      HtmlParser.parse(nil)
+    end
+  end
+  def test_get_window
+    assert_not_nil HtmlParser.get_window("foo title")
+  end
+  def test_get_wrapped_content
+    content = "<html></html>"
+    assert_not_nil HtmlParser.get_wrapped_content(content)
+  end
+  def test_parse_simple_content
+    content = "<html><head></head><body></body></html>"
+    assert_not_nil HtmlParser.parse(content)
+  end
+  ###
+  # This is something of an integration test
+  def test_parse_to_xpath
+    content = <<-HTML
+    <html>
+      <head>
+        <title>HTML Page Title</title>
+      </head>
+      <body>
+        <ul>
+          <li>Foo</li>
+          <li>Bar</li>
+        </ul>
+      </body>
+    </html>
+    HTML
+    root_node = HtmlParser.parse(content)
+    flunk "Bad root node" if root_node.nil?
+    xpath = "//li"
+    search_results = root_node.search(xpath)
+    assert_equal 2, search_results.length
+    assert_equal ["Foo", "Bar"], search_results.map{|r| r.text}
+  end
+end

metadata ADDED Viewed

@@ -0,0 +1,79 @@
+--- !ruby/object:Gem::Specification
+name: regularfry-celerity_parser
+version: !ruby/object:Gem::Version
+  version: 0.1.1
+platform: java
+authors:
+- Alex Young
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2009-06-22 00:00:00 -07:00
+default_executable:
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: jarib-celerity
+  type: :runtime
+  version_requirement:
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: 0.0.6.10
+    version:
+description:
+email: alex@blackkettle.org
+executables: []
+extensions: []
+extra_rdoc_files:
+- LICENSE
+- README.rdoc
+files:
+- .document
+- .gitignore
+- LICENSE
+- README.rdoc
+- Rakefile
+- VERSION
+- celerity_parser.gemspec
+- lib/celerity_parser.rb
+- lib/celerity_parser/html_node.rb
+- lib/celerity_parser/html_parser.rb
+- test/helper.rb
+- test/test_celerity_parser.rb
+- test/test_html_node.rb
+- test/test_html_parser.rb
+has_rdoc: false
+homepage: http://github.com/regularfry/celerity_parser
+post_install_message:
+rdoc_options:
+- --charset=UTF-8
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: "0"
+  version:
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: "0"
+  version:
+requirements: []
+rubyforge_project: celerity_parser
+rubygems_version: 1.2.0
+signing_key:
+specification_version: 3
+summary: Thin wrapper around a native Java HTML parser
+test_files:
+- test/helper.rb
+- test/test_html_node.rb
+- test/test_html_parser.rb
+- test/test_celerity_parser.rb