nddrylliog_pismo 0.7.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
 - data/.gitignore +29 -0
 - data/Gemfile +4 -0
 - data/LICENSE +23 -0
 - data/NOTICE +4 -0
 - data/README.markdown +131 -0
 - data/Rakefile +72 -0
 - data/bin/pismo +45 -0
 - data/lib/pismo.rb +82 -0
 - data/lib/pismo/document.rb +67 -0
 - data/lib/pismo/external_attributes.rb +14 -0
 - data/lib/pismo/internal_attributes.rb +316 -0
 - data/lib/pismo/reader.rb +19 -0
 - data/lib/pismo/reader/base.rb +259 -0
 - data/lib/pismo/reader/cluster.rb +171 -0
 - data/lib/pismo/reader/tree.rb +154 -0
 - data/lib/pismo/stopwords.txt +1002 -0
 - data/lib/pismo/version.rb +3 -0
 - data/pismo.gemspec +30 -0
 - data/test/corpus/bbcnews.html +2131 -0
 - data/test/corpus/bbcnews2.html +1575 -0
 - data/test/corpus/briancray.html +269 -0
 - data/test/corpus/cant_read.html +426 -0
 - data/test/corpus/factor.html +1362 -0
 - data/test/corpus/gmane.html +138 -0
 - data/test/corpus/huffington.html +2932 -0
 - data/test/corpus/metadata_expected.yaml +72 -0
 - data/test/corpus/metadata_expected.yaml.old +122 -0
 - data/test/corpus/queness.html +919 -0
 - data/test/corpus/reader_expected.yaml +39 -0
 - data/test/corpus/readers/cluster_expected.yaml +45 -0
 - data/test/corpus/readers/tree_expected.yaml +55 -0
 - data/test/corpus/rubyinside.html +318 -0
 - data/test/corpus/rww.html +1351 -0
 - data/test/corpus/spolsky.html +298 -0
 - data/test/corpus/techcrunch.html +1285 -0
 - data/test/corpus/tweet.html +360 -0
 - data/test/corpus/youtube.html +2348 -0
 - data/test/corpus/zefrank.html +535 -0
 - data/test/helper.rb +15 -0
 - data/test/test_corpus.rb +54 -0
 - data/test/test_pismo_document.rb +34 -0
 - metadata +156 -0
 
    
        data/test/helper.rb
    ADDED
    
    | 
         @@ -0,0 +1,15 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require 'rubygems'
         
     | 
| 
      
 2 
     | 
    
         
            +
            require 'test/unit'
         
     | 
| 
      
 3 
     | 
    
         
            +
            require 'shoulda'
         
     | 
| 
      
 4 
     | 
    
         
            +
            require 'open-uri'
         
     | 
| 
      
 5 
     | 
    
         
            +
            require 'yaml'
         
     | 
| 
      
 6 
     | 
    
         
            +
            begin; require 'turn'; rescue LoadError; end
         
     | 
| 
      
 7 
     | 
    
         
            +
             
     | 
| 
      
 8 
     | 
    
         
            +
            $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
         
     | 
| 
      
 9 
     | 
    
         
            +
            $LOAD_PATH.unshift(File.dirname(__FILE__))
         
     | 
| 
      
 10 
     | 
    
         
            +
            require 'pismo'
         
     | 
| 
      
 11 
     | 
    
         
            +
             
     | 
| 
      
 12 
     | 
    
         
            +
            class Test::Unit::TestCase
         
     | 
| 
      
 13 
     | 
    
         
            +
              include Pismo
         
     | 
| 
      
 14 
     | 
    
         
            +
              HTML_DIRECTORY = File.dirname(__FILE__) + "/corpus"
         
     | 
| 
      
 15 
     | 
    
         
            +
            end
         
     | 
    
        data/test/test_corpus.rb
    ADDED
    
    | 
         @@ -0,0 +1,54 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require 'helper'
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            class TestCorpus < Test::Unit::TestCase
         
     | 
| 
      
 4 
     | 
    
         
            +
                
         
     | 
| 
      
 5 
     | 
    
         
            +
              context "A corpus of HTML documents" do
         
     | 
| 
      
 6 
     | 
    
         
            +
                setup do
         
     | 
| 
      
 7 
     | 
    
         
            +
                  # Load the corpus files' HTML content into a hash
         
     | 
| 
      
 8 
     | 
    
         
            +
                  @corpus = {}
         
     | 
| 
      
 9 
     | 
    
         
            +
                  Dir[HTML_DIRECTORY + "/*.html"].each { |filename| @corpus[File.basename(filename).sub(/\.html$/, '').to_sym] = File.read(filename) }
         
     | 
| 
      
 10 
     | 
    
         
            +
                  
         
     | 
| 
      
 11 
     | 
    
         
            +
                  # Load the "expected metadata" ready for tests
         
     | 
| 
      
 12 
     | 
    
         
            +
                  @metadata = YAML.load(open(HTML_DIRECTORY + "/metadata_expected.yaml"))
         
     | 
| 
      
 13 
     | 
    
         
            +
                  @reader_metadata = YAML.load(open(HTML_DIRECTORY + "/reader_expected.yaml"))
         
     | 
| 
      
 14 
     | 
    
         
            +
                  @readers = {}
         
     | 
| 
      
 15 
     | 
    
         
            +
                  Dir[HTML_DIRECTORY + "/readers/*_expected.yaml"].each { |filename| @readers[File.basename(filename).sub(/_expected\.yaml$/, '').to_sym] = File.read(filename) }
         
     | 
| 
      
 16 
     | 
    
         
            +
                end
         
     | 
| 
      
 17 
     | 
    
         
            +
                
         
     | 
| 
      
 18 
     | 
    
         
            +
                should "pass basic sanitization and result in Nokogiri documents" do
         
     | 
| 
      
 19 
     | 
    
         
            +
                  @corpus.values.each do |html|
         
     | 
| 
      
 20 
     | 
    
         
            +
                    doc = Document.new(html)
         
     | 
| 
      
 21 
     | 
    
         
            +
                    assert doc.html.length > 1000
         
     | 
| 
      
 22 
     | 
    
         
            +
                    assert doc.doc.kind_of?(Nokogiri::HTML::Document)
         
     | 
| 
      
 23 
     | 
    
         
            +
                  end
         
     | 
| 
      
 24 
     | 
    
         
            +
                end
         
     | 
| 
      
 25 
     | 
    
         
            +
                
         
     | 
| 
      
 26 
     | 
    
         
            +
                should "pass metadata extraction tests" do
         
     | 
| 
      
 27 
     | 
    
         
            +
                  
         
     | 
| 
      
 28 
     | 
    
         
            +
                  @metadata.each do |file, expected|
         
     | 
| 
      
 29 
     | 
    
         
            +
                    @doc = Document.new(@corpus[file])
         
     | 
| 
      
 30 
     | 
    
         
            +
                    expected.each do |k, v|
         
     | 
| 
      
 31 
     | 
    
         
            +
                      assert_equal v, @doc.send(k)
         
     | 
| 
      
 32 
     | 
    
         
            +
                    end
         
     | 
| 
      
 33 
     | 
    
         
            +
                  end
         
     | 
| 
      
 34 
     | 
    
         
            +
                end
         
     | 
| 
      
 35 
     | 
    
         
            +
                
         
     | 
| 
      
 36 
     | 
    
         
            +
                should "pass base reader content extraction tests" do
         
     | 
| 
      
 37 
     | 
    
         
            +
                  @reader_metadata.each do |file, expected|
         
     | 
| 
      
 38 
     | 
    
         
            +
                    @doc = Reader::Document.create(@corpus[file])
         
     | 
| 
      
 39 
     | 
    
         
            +
                    assert_equal expected, @doc.sentences(2)
         
     | 
| 
      
 40 
     | 
    
         
            +
                  end
         
     | 
| 
      
 41 
     | 
    
         
            +
                end
         
     | 
| 
      
 42 
     | 
    
         
            +
             
     | 
| 
      
 43 
     | 
    
         
            +
                should "pass reader content extraction tests" do
         
     | 
| 
      
 44 
     | 
    
         
            +
                  @readers.each do |reader, expected|
         
     | 
| 
      
 45 
     | 
    
         
            +
                    results = YAML.load(expected)
         
     | 
| 
      
 46 
     | 
    
         
            +
                    results.each_key do |file|
         
     | 
| 
      
 47 
     | 
    
         
            +
                      @doc = Document.new(@corpus[file], :reader => reader)
         
     | 
| 
      
 48 
     | 
    
         
            +
                      assert_equal results[file], @doc.body
         
     | 
| 
      
 49 
     | 
    
         
            +
                    end
         
     | 
| 
      
 50 
     | 
    
         
            +
                  end
         
     | 
| 
      
 51 
     | 
    
         
            +
                end
         
     | 
| 
      
 52 
     | 
    
         
            +
             
     | 
| 
      
 53 
     | 
    
         
            +
              end
         
     | 
| 
      
 54 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,34 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require 'helper'
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            class TestPismoDocument < Test::Unit::TestCase
         
     | 
| 
      
 4 
     | 
    
         
            +
              context "Pismo::Document" do
         
     | 
| 
      
 5 
     | 
    
         
            +
                should "process an IO/File object" do
         
     | 
| 
      
 6 
     | 
    
         
            +
                  doc = Document.new(open(HTML_DIRECTORY + "/rubyinside.html"))
         
     | 
| 
      
 7 
     | 
    
         
            +
                  assert doc.doc.kind_of?(Nokogiri::HTML::Document)
         
     | 
| 
      
 8 
     | 
    
         
            +
                end
         
     | 
| 
      
 9 
     | 
    
         
            +
              end
         
     | 
| 
      
 10 
     | 
    
         
            +
              
         
     | 
| 
      
 11 
     | 
    
         
            +
              context "A very basic Pismo document" do
         
     | 
| 
      
 12 
     | 
    
         
            +
                setup do
         
     | 
| 
      
 13 
     | 
    
         
            +
                  @doc = Document.new(%{<html><body><h1>Hello</h1></body></html>})
         
     | 
| 
      
 14 
     | 
    
         
            +
                end
         
     | 
| 
      
 15 
     | 
    
         
            +
                
         
     | 
| 
      
 16 
     | 
    
         
            +
                should "pass sanitization" do
         
     | 
| 
      
 17 
     | 
    
         
            +
                  assert_equal %{<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">\n<html><body><h1>Hello</h1></body></html>\n}, @doc.html
         
     | 
| 
      
 18 
     | 
    
         
            +
                end
         
     | 
| 
      
 19 
     | 
    
         
            +
                
         
     | 
| 
      
 20 
     | 
    
         
            +
                should "result in a Nokogiri document" do
         
     | 
| 
      
 21 
     | 
    
         
            +
                  assert @doc.doc.kind_of?(Nokogiri::HTML::Document)
         
     | 
| 
      
 22 
     | 
    
         
            +
                end
         
     | 
| 
      
 23 
     | 
    
         
            +
              end
         
     | 
| 
      
 24 
     | 
    
         
            +
              
         
     | 
| 
      
 25 
     | 
    
         
            +
              context "A basic real world blog post" do
         
     | 
| 
      
 26 
     | 
    
         
            +
                setup do
         
     | 
| 
      
 27 
     | 
    
         
            +
                  @doc = Document.new(open(HTML_DIRECTORY + "/rubyinside.html"))
         
     | 
| 
      
 28 
     | 
    
         
            +
                end
         
     | 
| 
      
 29 
     | 
    
         
            +
                
         
     | 
| 
      
 30 
     | 
    
         
            +
                should "provide a title" do
         
     | 
| 
      
 31 
     | 
    
         
            +
                  assert_equal  "CoffeeScript: A New Language With A Pure Ruby Compiler", @doc.title
         
     | 
| 
      
 32 
     | 
    
         
            +
                end
         
     | 
| 
      
 33 
     | 
    
         
            +
              end
         
     | 
| 
      
 34 
     | 
    
         
            +
            end
         
     | 
    
        metadata
    ADDED
    
    | 
         @@ -0,0 +1,156 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            --- !ruby/object:Gem::Specification
         
     | 
| 
      
 2 
     | 
    
         
            +
            name: nddrylliog_pismo
         
     | 
| 
      
 3 
     | 
    
         
            +
            version: !ruby/object:Gem::Version
         
     | 
| 
      
 4 
     | 
    
         
            +
              version: 0.7.3
         
     | 
| 
      
 5 
     | 
    
         
            +
              prerelease: 
         
     | 
| 
      
 6 
     | 
    
         
            +
            platform: ruby
         
     | 
| 
      
 7 
     | 
    
         
            +
            authors:
         
     | 
| 
      
 8 
     | 
    
         
            +
            - Peter Cooper
         
     | 
| 
      
 9 
     | 
    
         
            +
            autorequire: 
         
     | 
| 
      
 10 
     | 
    
         
            +
            bindir: bin
         
     | 
| 
      
 11 
     | 
    
         
            +
            cert_chain: []
         
     | 
| 
      
 12 
     | 
    
         
            +
            date: 2010-12-19 00:00:00.000000000 Z
         
     | 
| 
      
 13 
     | 
    
         
            +
            dependencies:
         
     | 
| 
      
 14 
     | 
    
         
            +
            - !ruby/object:Gem::Dependency
         
     | 
| 
      
 15 
     | 
    
         
            +
              name: shoulda
         
     | 
| 
      
 16 
     | 
    
         
            +
              requirement: &14086660 !ruby/object:Gem::Requirement
         
     | 
| 
      
 17 
     | 
    
         
            +
                none: false
         
     | 
| 
      
 18 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 19 
     | 
    
         
            +
                - - ! '>='
         
     | 
| 
      
 20 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 21 
     | 
    
         
            +
                    version: '0'
         
     | 
| 
      
 22 
     | 
    
         
            +
              type: :development
         
     | 
| 
      
 23 
     | 
    
         
            +
              prerelease: false
         
     | 
| 
      
 24 
     | 
    
         
            +
              version_requirements: *14086660
         
     | 
| 
      
 25 
     | 
    
         
            +
            - !ruby/object:Gem::Dependency
         
     | 
| 
      
 26 
     | 
    
         
            +
              name: awesome_print
         
     | 
| 
      
 27 
     | 
    
         
            +
              requirement: &14085980 !ruby/object:Gem::Requirement
         
     | 
| 
      
 28 
     | 
    
         
            +
                none: false
         
     | 
| 
      
 29 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 30 
     | 
    
         
            +
                - - ! '>='
         
     | 
| 
      
 31 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 32 
     | 
    
         
            +
                    version: '0'
         
     | 
| 
      
 33 
     | 
    
         
            +
              type: :runtime
         
     | 
| 
      
 34 
     | 
    
         
            +
              prerelease: false
         
     | 
| 
      
 35 
     | 
    
         
            +
              version_requirements: *14085980
         
     | 
| 
      
 36 
     | 
    
         
            +
            - !ruby/object:Gem::Dependency
         
     | 
| 
      
 37 
     | 
    
         
            +
              name: nokogiri
         
     | 
| 
      
 38 
     | 
    
         
            +
              requirement: &14085420 !ruby/object:Gem::Requirement
         
     | 
| 
      
 39 
     | 
    
         
            +
                none: false
         
     | 
| 
      
 40 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 41 
     | 
    
         
            +
                - - ! '>='
         
     | 
| 
      
 42 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 43 
     | 
    
         
            +
                    version: '0'
         
     | 
| 
      
 44 
     | 
    
         
            +
              type: :runtime
         
     | 
| 
      
 45 
     | 
    
         
            +
              prerelease: false
         
     | 
| 
      
 46 
     | 
    
         
            +
              version_requirements: *14085420
         
     | 
| 
      
 47 
     | 
    
         
            +
            - !ruby/object:Gem::Dependency
         
     | 
| 
      
 48 
     | 
    
         
            +
              name: sanitize
         
     | 
| 
      
 49 
     | 
    
         
            +
              requirement: &14084820 !ruby/object:Gem::Requirement
         
     | 
| 
      
 50 
     | 
    
         
            +
                none: false
         
     | 
| 
      
 51 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 52 
     | 
    
         
            +
                - - ! '>='
         
     | 
| 
      
 53 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 54 
     | 
    
         
            +
                    version: '0'
         
     | 
| 
      
 55 
     | 
    
         
            +
              type: :runtime
         
     | 
| 
      
 56 
     | 
    
         
            +
              prerelease: false
         
     | 
| 
      
 57 
     | 
    
         
            +
              version_requirements: *14084820
         
     | 
| 
      
 58 
     | 
    
         
            +
            - !ruby/object:Gem::Dependency
         
     | 
| 
      
 59 
     | 
    
         
            +
              name: fast-stemmer
         
     | 
| 
      
 60 
     | 
    
         
            +
              requirement: &14084200 !ruby/object:Gem::Requirement
         
     | 
| 
      
 61 
     | 
    
         
            +
                none: false
         
     | 
| 
      
 62 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 63 
     | 
    
         
            +
                - - ! '>='
         
     | 
| 
      
 64 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 65 
     | 
    
         
            +
                    version: '0'
         
     | 
| 
      
 66 
     | 
    
         
            +
              type: :runtime
         
     | 
| 
      
 67 
     | 
    
         
            +
              prerelease: false
         
     | 
| 
      
 68 
     | 
    
         
            +
              version_requirements: *14084200
         
     | 
| 
      
 69 
     | 
    
         
            +
            - !ruby/object:Gem::Dependency
         
     | 
| 
      
 70 
     | 
    
         
            +
              name: chronic
         
     | 
| 
      
 71 
     | 
    
         
            +
              requirement: &14100720 !ruby/object:Gem::Requirement
         
     | 
| 
      
 72 
     | 
    
         
            +
                none: false
         
     | 
| 
      
 73 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 74 
     | 
    
         
            +
                - - ! '>='
         
     | 
| 
      
 75 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 76 
     | 
    
         
            +
                    version: '0'
         
     | 
| 
      
 77 
     | 
    
         
            +
              type: :runtime
         
     | 
| 
      
 78 
     | 
    
         
            +
              prerelease: false
         
     | 
| 
      
 79 
     | 
    
         
            +
              version_requirements: *14100720
         
     | 
| 
      
 80 
     | 
    
         
            +
            description: Pismo extracts and retrieves content-related metadata from HTML pages
         
     | 
| 
      
 81 
     | 
    
         
            +
              - you can use the resulting data in an organized way, such as a summary/first paragraph,
         
     | 
| 
      
 82 
     | 
    
         
            +
              body text, keywords, RSS feed URL, favicon, etc.
         
     | 
| 
      
 83 
     | 
    
         
            +
            email:
         
     | 
| 
      
 84 
     | 
    
         
            +
            - git@peterc.org
         
     | 
| 
      
 85 
     | 
    
         
            +
            executables:
         
     | 
| 
      
 86 
     | 
    
         
            +
            - pismo
         
     | 
| 
      
 87 
     | 
    
         
            +
            extensions: []
         
     | 
| 
      
 88 
     | 
    
         
            +
            extra_rdoc_files: []
         
     | 
| 
      
 89 
     | 
    
         
            +
            files:
         
     | 
| 
      
 90 
     | 
    
         
            +
            - .document
         
     | 
| 
      
 91 
     | 
    
         
            +
            - .gitignore
         
     | 
| 
      
 92 
     | 
    
         
            +
            - Gemfile
         
     | 
| 
      
 93 
     | 
    
         
            +
            - LICENSE
         
     | 
| 
      
 94 
     | 
    
         
            +
            - NOTICE
         
     | 
| 
      
 95 
     | 
    
         
            +
            - README.markdown
         
     | 
| 
      
 96 
     | 
    
         
            +
            - Rakefile
         
     | 
| 
      
 97 
     | 
    
         
            +
            - bin/pismo
         
     | 
| 
      
 98 
     | 
    
         
            +
            - lib/pismo.rb
         
     | 
| 
      
 99 
     | 
    
         
            +
            - lib/pismo/document.rb
         
     | 
| 
      
 100 
     | 
    
         
            +
            - lib/pismo/external_attributes.rb
         
     | 
| 
      
 101 
     | 
    
         
            +
            - lib/pismo/internal_attributes.rb
         
     | 
| 
      
 102 
     | 
    
         
            +
            - lib/pismo/reader.rb
         
     | 
| 
      
 103 
     | 
    
         
            +
            - lib/pismo/reader/base.rb
         
     | 
| 
      
 104 
     | 
    
         
            +
            - lib/pismo/reader/cluster.rb
         
     | 
| 
      
 105 
     | 
    
         
            +
            - lib/pismo/reader/tree.rb
         
     | 
| 
      
 106 
     | 
    
         
            +
            - lib/pismo/stopwords.txt
         
     | 
| 
      
 107 
     | 
    
         
            +
            - lib/pismo/version.rb
         
     | 
| 
      
 108 
     | 
    
         
            +
            - pismo.gemspec
         
     | 
| 
      
 109 
     | 
    
         
            +
            - test/corpus/bbcnews.html
         
     | 
| 
      
 110 
     | 
    
         
            +
            - test/corpus/bbcnews2.html
         
     | 
| 
      
 111 
     | 
    
         
            +
            - test/corpus/briancray.html
         
     | 
| 
      
 112 
     | 
    
         
            +
            - test/corpus/cant_read.html
         
     | 
| 
      
 113 
     | 
    
         
            +
            - test/corpus/factor.html
         
     | 
| 
      
 114 
     | 
    
         
            +
            - test/corpus/gmane.html
         
     | 
| 
      
 115 
     | 
    
         
            +
            - test/corpus/huffington.html
         
     | 
| 
      
 116 
     | 
    
         
            +
            - test/corpus/metadata_expected.yaml
         
     | 
| 
      
 117 
     | 
    
         
            +
            - test/corpus/metadata_expected.yaml.old
         
     | 
| 
      
 118 
     | 
    
         
            +
            - test/corpus/queness.html
         
     | 
| 
      
 119 
     | 
    
         
            +
            - test/corpus/reader_expected.yaml
         
     | 
| 
      
 120 
     | 
    
         
            +
            - test/corpus/readers/cluster_expected.yaml
         
     | 
| 
      
 121 
     | 
    
         
            +
            - test/corpus/readers/tree_expected.yaml
         
     | 
| 
      
 122 
     | 
    
         
            +
            - test/corpus/rubyinside.html
         
     | 
| 
      
 123 
     | 
    
         
            +
            - test/corpus/rww.html
         
     | 
| 
      
 124 
     | 
    
         
            +
            - test/corpus/spolsky.html
         
     | 
| 
      
 125 
     | 
    
         
            +
            - test/corpus/techcrunch.html
         
     | 
| 
      
 126 
     | 
    
         
            +
            - test/corpus/tweet.html
         
     | 
| 
      
 127 
     | 
    
         
            +
            - test/corpus/youtube.html
         
     | 
| 
      
 128 
     | 
    
         
            +
            - test/corpus/zefrank.html
         
     | 
| 
      
 129 
     | 
    
         
            +
            - test/helper.rb
         
     | 
| 
      
 130 
     | 
    
         
            +
            - test/test_corpus.rb
         
     | 
| 
      
 131 
     | 
    
         
            +
            - test/test_pismo_document.rb
         
     | 
| 
      
 132 
     | 
    
         
            +
            homepage: http://github.com/peterc/pismo
         
     | 
| 
      
 133 
     | 
    
         
            +
            licenses: []
         
     | 
| 
      
 134 
     | 
    
         
            +
            post_install_message: 
         
     | 
| 
      
 135 
     | 
    
         
            +
            rdoc_options: []
         
     | 
| 
      
 136 
     | 
    
         
            +
            require_paths:
         
     | 
| 
      
 137 
     | 
    
         
            +
            - lib
         
     | 
| 
      
 138 
     | 
    
         
            +
            required_ruby_version: !ruby/object:Gem::Requirement
         
     | 
| 
      
 139 
     | 
    
         
            +
              none: false
         
     | 
| 
      
 140 
     | 
    
         
            +
              requirements:
         
     | 
| 
      
 141 
     | 
    
         
            +
              - - ! '>='
         
     | 
| 
      
 142 
     | 
    
         
            +
                - !ruby/object:Gem::Version
         
     | 
| 
      
 143 
     | 
    
         
            +
                  version: '0'
         
     | 
| 
      
 144 
     | 
    
         
            +
            required_rubygems_version: !ruby/object:Gem::Requirement
         
     | 
| 
      
 145 
     | 
    
         
            +
              none: false
         
     | 
| 
      
 146 
     | 
    
         
            +
              requirements:
         
     | 
| 
      
 147 
     | 
    
         
            +
              - - ! '>='
         
     | 
| 
      
 148 
     | 
    
         
            +
                - !ruby/object:Gem::Version
         
     | 
| 
      
 149 
     | 
    
         
            +
                  version: '0'
         
     | 
| 
      
 150 
     | 
    
         
            +
            requirements: []
         
     | 
| 
      
 151 
     | 
    
         
            +
            rubyforge_project: nddrylliog_pismo
         
     | 
| 
      
 152 
     | 
    
         
            +
            rubygems_version: 1.8.17
         
     | 
| 
      
 153 
     | 
    
         
            +
            signing_key: 
         
     | 
| 
      
 154 
     | 
    
         
            +
            specification_version: 3
         
     | 
| 
      
 155 
     | 
    
         
            +
            summary: Extracts or retrieves content-related metadata from HTML pages
         
     | 
| 
      
 156 
     | 
    
         
            +
            test_files: []
         
     |