hypermicrodata 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
 - data/.gitignore +17 -0
 - data/.travis.yml +8 -0
 - data/Gemfile +8 -0
 - data/LICENSE.txt +22 -0
 - data/README.md +100 -0
 - data/Rakefile +10 -0
 - data/bin/hypermicrodata.rb +25 -0
 - data/hypermicrodata.gemspec +28 -0
 - data/lib/hypermicrodata.rb +37 -0
 - data/lib/hypermicrodata/document.rb +27 -0
 - data/lib/hypermicrodata/extract.rb +22 -0
 - data/lib/hypermicrodata/item.rb +113 -0
 - data/lib/hypermicrodata/itemprop_parser.rb +114 -0
 - data/lib/hypermicrodata/link.rb +7 -0
 - data/lib/hypermicrodata/property.rb +27 -0
 - data/lib/hypermicrodata/rails/html_based_json_renderer.rb +35 -0
 - data/lib/hypermicrodata/serializer/base.rb +24 -0
 - data/lib/hypermicrodata/serializer/hal.rb +47 -0
 - data/lib/hypermicrodata/serializer/jsonld.rb +44 -0
 - data/lib/hypermicrodata/serializer/uber.rb +100 -0
 - data/lib/hypermicrodata/submit_button.rb +105 -0
 - data/lib/hypermicrodata/version.rb +3 -0
 - data/lib/uberous/uber.rb +104 -0
 - data/test/data/example.html +22 -0
 - data/test/data/example_itemref.html +16 -0
 - data/test/data/example_with_no_itemscope.html +22 -0
 - data/test/test_helper.rb +3 -0
 - data/test/test_itemref.rb +19 -0
 - data/test/test_json.rb +15 -0
 - data/test/test_parse.rb +36 -0
 - metadata +139 -0
 
    
        checksums.yaml
    ADDED
    
    | 
         @@ -0,0 +1,7 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            --- 
         
     | 
| 
      
 2 
     | 
    
         
            +
            SHA1: 
         
     | 
| 
      
 3 
     | 
    
         
            +
              metadata.gz: 6aa222d1d9f2fd94e7eabda85a111de9b63b17ba
         
     | 
| 
      
 4 
     | 
    
         
            +
              data.tar.gz: 624be0e7d6c825c69ed224508f6286da2911cd8e
         
     | 
| 
      
 5 
     | 
    
         
            +
            SHA512: 
         
     | 
| 
      
 6 
     | 
    
         
            +
              metadata.gz: 094a2d0285349d16ff74308ce8756d5a2510f67c0ab564bd93112823c488bc0eeee030725feb87fa6b2f89e2ab4805407a2a536ded47262c3125f81ea1cd9901
         
     | 
| 
      
 7 
     | 
    
         
            +
              data.tar.gz: 6753e62b18ea5b2e4b0550b5fcaaf2eeb5f3101efbb61c5af745901285a3d4621761e2b75a8201e670274350cab0a5a2f7bebac8570e4e87357a6e581626b700
         
     | 
    
        data/.gitignore
    ADDED
    
    
    
        data/.travis.yml
    ADDED
    
    
    
        data/Gemfile
    ADDED
    
    
    
        data/LICENSE.txt
    ADDED
    
    | 
         @@ -0,0 +1,22 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            Copyright (c) 2013 Jason Ronallo, Toru KAWAMURA
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            MIT License
         
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
            Permission is hereby granted, free of charge, to any person obtaining
         
     | 
| 
      
 6 
     | 
    
         
            +
            a copy of this software and associated documentation files (the
         
     | 
| 
      
 7 
     | 
    
         
            +
            "Software"), to deal in the Software without restriction, including
         
     | 
| 
      
 8 
     | 
    
         
            +
            without limitation the rights to use, copy, modify, merge, publish,
         
     | 
| 
      
 9 
     | 
    
         
            +
            distribute, sublicense, and/or sell copies of the Software, and to
         
     | 
| 
      
 10 
     | 
    
         
            +
            permit persons to whom the Software is furnished to do so, subject to
         
     | 
| 
      
 11 
     | 
    
         
            +
            the following conditions:
         
     | 
| 
      
 12 
     | 
    
         
            +
             
     | 
| 
      
 13 
     | 
    
         
            +
            The above copyright notice and this permission notice shall be
         
     | 
| 
      
 14 
     | 
    
         
            +
            included in all copies or substantial portions of the Software.
         
     | 
| 
      
 15 
     | 
    
         
            +
             
     | 
| 
      
 16 
     | 
    
         
            +
            THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
         
     | 
| 
      
 17 
     | 
    
         
            +
            EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
         
     | 
| 
      
 18 
     | 
    
         
            +
            MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
         
     | 
| 
      
 19 
     | 
    
         
            +
            NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
         
     | 
| 
      
 20 
     | 
    
         
            +
            LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
         
     | 
| 
      
 21 
     | 
    
         
            +
            OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
         
     | 
| 
      
 22 
     | 
    
         
            +
            WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
         
     | 
    
        data/README.md
    ADDED
    
    | 
         @@ -0,0 +1,100 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # Hypermicrodata
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            Ruby library for extracting HTML5 Microdata with Hypermedia
         
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
            [](https://travis-ci.org/tkawa/hypermicrodata)
         
     | 
| 
      
 6 
     | 
    
         
            +
             
     | 
| 
      
 7 
     | 
    
         
            +
            ## Story 
         
     | 
| 
      
 8 
     | 
    
         
            +
             
     | 
| 
      
 9 
     | 
    
         
            +
            Most of the code here was extracted from [Mida](https://github.com/LawrenceWoodman/mida) by Lawrence Woodman. This was done in order to have a simpler, more generic Microdata parser without all the vocabulary awareness and other features. This gem is also tested under Ruby 1.9.3 and Ruby 2.0.0, though it could be better tested.
         
     | 
| 
      
 10 
     | 
    
         
            +
             
     | 
| 
      
 11 
     | 
    
         
            +
            ## Installation
         
     | 
| 
      
 12 
     | 
    
         
            +
             
     | 
| 
      
 13 
     | 
    
         
            +
            This library has not been released to RubyGems.org yet, but when it is the intention is to have it install with the following.
         
     | 
| 
      
 14 
     | 
    
         
            +
             
     | 
| 
      
 15 
     | 
    
         
            +
            Add this line to your application's Gemfile:
         
     | 
| 
      
 16 
     | 
    
         
            +
             
     | 
| 
      
 17 
     | 
    
         
            +
                gem 'hypermicrodata'
         
     | 
| 
      
 18 
     | 
    
         
            +
             
     | 
| 
      
 19 
     | 
    
         
            +
            And then execute:
         
     | 
| 
      
 20 
     | 
    
         
            +
             
     | 
| 
      
 21 
     | 
    
         
            +
                $ bundle
         
     | 
| 
      
 22 
     | 
    
         
            +
             
     | 
| 
      
 23 
     | 
    
         
            +
            Or install it yourself as:
         
     | 
| 
      
 24 
     | 
    
         
            +
             
     | 
| 
      
 25 
     | 
    
         
            +
                $ gem install hypermicrodata
         
     | 
| 
      
 26 
     | 
    
         
            +
             
     | 
| 
      
 27 
     | 
    
         
            +
            ## Usage
         
     | 
| 
      
 28 
     | 
    
         
            +
             
     | 
| 
      
 29 
     | 
    
         
            +
            ### Basic
         
     | 
| 
      
 30 
     | 
    
         
            +
             
     | 
| 
      
 31 
     | 
    
         
            +
            ```
         
     | 
| 
      
 32 
     | 
    
         
            +
            json = Hypermicrodata::Extract.new(html).to_json(:uber)
         
     | 
| 
      
 33 
     | 
    
         
            +
            ```
         
     | 
| 
      
 34 
     | 
    
         
            +
             
     | 
| 
      
 35 
     | 
    
         
            +
            Supported formats are
         
     | 
| 
      
 36 
     | 
    
         
            +
             
     | 
| 
      
 37 
     | 
    
         
            +
            - application/vnd.amundsen-uber+json (:uber)
         
     | 
| 
      
 38 
     | 
    
         
            +
            - application/hal+json (:hal)
         
     | 
| 
      
 39 
     | 
    
         
            +
            - application/json (:plain)
         
     | 
| 
      
 40 
     | 
    
         
            +
             
     | 
| 
      
 41 
     | 
    
         
            +
            ### Rails Integration
         
     | 
| 
      
 42 
     | 
    
         
            +
             
     | 
| 
      
 43 
     | 
    
         
            +
            When you use this in Rails, you don't need to extract data manually.
         
     | 
| 
      
 44 
     | 
    
         
            +
             
     | 
| 
      
 45 
     | 
    
         
            +
            /app/controllers/people_controller.rb
         
     | 
| 
      
 46 
     | 
    
         
            +
             
     | 
| 
      
 47 
     | 
    
         
            +
            ```
         
     | 
| 
      
 48 
     | 
    
         
            +
            class PeopleController < ApplicationController
         
     | 
| 
      
 49 
     | 
    
         
            +
              before_action :set_message, only: %i(show edit update destroy)
         
     | 
| 
      
 50 
     | 
    
         
            +
              include Hypermicrodata::Rails::HtmlBasedJsonRenderer
         
     | 
| 
      
 51 
     | 
    
         
            +
              ...
         
     | 
| 
      
 52 
     | 
    
         
            +
            end
         
     | 
| 
      
 53 
     | 
    
         
            +
            ```
         
     | 
| 
      
 54 
     | 
    
         
            +
             
     | 
| 
      
 55 
     | 
    
         
            +
            /app/views/people/show.html.haml
         
     | 
| 
      
 56 
     | 
    
         
            +
             
     | 
| 
      
 57 
     | 
    
         
            +
            ```
         
     | 
| 
      
 58 
     | 
    
         
            +
            .person{itemscope: true, itemtype: 'http://schema.org/Person',
         
     | 
| 
      
 59 
     | 
    
         
            +
                    itemid: person_url(@person), data: {main_item: true}}
         
     | 
| 
      
 60 
     | 
    
         
            +
              .media
         
     | 
| 
      
 61 
     | 
    
         
            +
                .media-image.pull-left
         
     | 
| 
      
 62 
     | 
    
         
            +
                  = image_tag @person.picture_path, alt: '', itemprop: 'image'
         
     | 
| 
      
 63 
     | 
    
         
            +
                .media-body
         
     | 
| 
      
 64 
     | 
    
         
            +
                  %h1.media-heading
         
     | 
| 
      
 65 
     | 
    
         
            +
                    %span{itemprop: 'name'}= @person.name
         
     | 
| 
      
 66 
     | 
    
         
            +
              = link_to 'collection', people_path, rel: 'collection', itemprop: 'isPartOf'
         
     | 
| 
      
 67 
     | 
    
         
            +
            ```
         
     | 
| 
      
 68 
     | 
    
         
            +
             
     | 
| 
      
 69 
     | 
    
         
            +
            And you can serve following JSON:
         
     | 
| 
      
 70 
     | 
    
         
            +
             
     | 
| 
      
 71 
     | 
    
         
            +
            ```
         
     | 
| 
      
 72 
     | 
    
         
            +
            GET /people/1 HTTP/1.1
         
     | 
| 
      
 73 
     | 
    
         
            +
            Host: www.example.com
         
     | 
| 
      
 74 
     | 
    
         
            +
            Accept: application/vnd.amundsen-uber+json
         
     | 
| 
      
 75 
     | 
    
         
            +
            ```
         
     | 
| 
      
 76 
     | 
    
         
            +
             
     | 
| 
      
 77 
     | 
    
         
            +
            ```
         
     | 
| 
      
 78 
     | 
    
         
            +
            {
         
     | 
| 
      
 79 
     | 
    
         
            +
              "uber": {
         
     | 
| 
      
 80 
     | 
    
         
            +
                "version": "1.0",
         
     | 
| 
      
 81 
     | 
    
         
            +
                "data": [{
         
     | 
| 
      
 82 
     | 
    
         
            +
                  "url": "http://www.example.com/people/1",
         
     | 
| 
      
 83 
     | 
    
         
            +
                  "name": "Person",
         
     | 
| 
      
 84 
     | 
    
         
            +
                  "data": [
         
     | 
| 
      
 85 
     | 
    
         
            +
                    { "name": "image", "value": "/assets/bob.png" },
         
     | 
| 
      
 86 
     | 
    
         
            +
                    { "name": "name", "value": "Bob Smith" },
         
     | 
| 
      
 87 
     | 
    
         
            +
                    { "name": "isPartOf", "rel": "collection", "url": "/people" },
         
     | 
| 
      
 88 
     | 
    
         
            +
                  ]
         
     | 
| 
      
 89 
     | 
    
         
            +
                }]
         
     | 
| 
      
 90 
     | 
    
         
            +
              }
         
     | 
| 
      
 91 
     | 
    
         
            +
            }
         
     | 
| 
      
 92 
     | 
    
         
            +
            ```
         
     | 
| 
      
 93 
     | 
    
         
            +
             
     | 
| 
      
 94 
     | 
    
         
            +
            ## Contributing
         
     | 
| 
      
 95 
     | 
    
         
            +
             
     | 
| 
      
 96 
     | 
    
         
            +
            1. Fork it
         
     | 
| 
      
 97 
     | 
    
         
            +
            2. Create your feature branch (`git checkout -b my-new-feature`)
         
     | 
| 
      
 98 
     | 
    
         
            +
            3. Commit your changes (`git commit -am 'Add some feature'`)
         
     | 
| 
      
 99 
     | 
    
         
            +
            4. Push to the branch (`git push origin my-new-feature`)
         
     | 
| 
      
 100 
     | 
    
         
            +
            5. Create new Pull Request
         
     | 
    
        data/Rakefile
    ADDED
    
    
| 
         @@ -0,0 +1,25 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            #! /usr/bin/env ruby
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            # hypermicrodata.rb
         
     | 
| 
      
 4 
     | 
    
         
            +
            # Extract HTML5 Microdata and output JSON
         
     | 
| 
      
 5 
     | 
    
         
            +
            $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', 'lib')
         
     | 
| 
      
 6 
     | 
    
         
            +
            require 'hypermicrodata'
         
     | 
| 
      
 7 
     | 
    
         
            +
             
     | 
| 
      
 8 
     | 
    
         
            +
            location = ARGV[0]
         
     | 
| 
      
 9 
     | 
    
         
            +
            content = open(location)
         
     | 
| 
      
 10 
     | 
    
         
            +
            document = Hypermicrodata::Document.new(content, location)
         
     | 
| 
      
 11 
     | 
    
         
            +
            items = document.extract_items
         
     | 
| 
      
 12 
     | 
    
         
            +
             
     | 
| 
      
 13 
     | 
    
         
            +
            if items.empty? || items.nil?
         
     | 
| 
      
 14 
     | 
    
         
            +
              puts "No Microdata items found."
         
     | 
| 
      
 15 
     | 
    
         
            +
              itemprops = document.doc.search('//*[@itemprop]')
         
     | 
| 
      
 16 
     | 
    
         
            +
              if !itemprops.empty?
         
     | 
| 
      
 17 
     | 
    
         
            +
                puts "There are some itemprops, which means no top level items with an itemscope have been found."
         
     | 
| 
      
 18 
     | 
    
         
            +
              end
         
     | 
| 
      
 19 
     | 
    
         
            +
            else
         
     | 
| 
      
 20 
     | 
    
         
            +
              hash = {}
         
     | 
| 
      
 21 
     | 
    
         
            +
              hash[:items] = items.map do |item|
         
     | 
| 
      
 22 
     | 
    
         
            +
                item.to_hash
         
     | 
| 
      
 23 
     | 
    
         
            +
              end
         
     | 
| 
      
 24 
     | 
    
         
            +
              puts JSON.pretty_generate(hash)
         
     | 
| 
      
 25 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,28 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # coding: utf-8
         
     | 
| 
      
 2 
     | 
    
         
            +
            lib = File.expand_path('../lib', __FILE__)
         
     | 
| 
      
 3 
     | 
    
         
            +
            $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
         
     | 
| 
      
 4 
     | 
    
         
            +
            require 'hypermicrodata/version'
         
     | 
| 
      
 5 
     | 
    
         
            +
             
     | 
| 
      
 6 
     | 
    
         
            +
            Gem::Specification.new do |spec|
         
     | 
| 
      
 7 
     | 
    
         
            +
              spec.name          = "hypermicrodata"
         
     | 
| 
      
 8 
     | 
    
         
            +
              spec.version       = Hypermicrodata::VERSION
         
     | 
| 
      
 9 
     | 
    
         
            +
              spec.authors       = ["Jason Ronallo", "Toru KAWAMURA"]
         
     | 
| 
      
 10 
     | 
    
         
            +
              spec.email         = ["jronallo@gmail.com", "tkawa@4bit.net"]
         
     | 
| 
      
 11 
     | 
    
         
            +
              spec.description   = %q{HTML5 Microdata extractor with Hypermedia}
         
     | 
| 
      
 12 
     | 
    
         
            +
              spec.summary       = %q{Ruby library for extracting HTML5 Microdata with Hypermedia}
         
     | 
| 
      
 13 
     | 
    
         
            +
              spec.homepage      = "https://github.com/tkawa/hypermicrodata"
         
     | 
| 
      
 14 
     | 
    
         
            +
              spec.license       = "MIT"
         
     | 
| 
      
 15 
     | 
    
         
            +
             
     | 
| 
      
 16 
     | 
    
         
            +
              spec.files         = `git ls-files`.split($/)
         
     | 
| 
      
 17 
     | 
    
         
            +
              spec.executables   = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
         
     | 
| 
      
 18 
     | 
    
         
            +
              spec.test_files    = spec.files.grep(%r{^(test|spec|features)/})
         
     | 
| 
      
 19 
     | 
    
         
            +
              spec.require_paths = ["lib"]
         
     | 
| 
      
 20 
     | 
    
         
            +
             
     | 
| 
      
 21 
     | 
    
         
            +
              spec.add_dependency "nokogiri"
         
     | 
| 
      
 22 
     | 
    
         
            +
              spec.add_dependency "mechanize"
         
     | 
| 
      
 23 
     | 
    
         
            +
              spec.add_dependency "halibut"
         
     | 
| 
      
 24 
     | 
    
         
            +
              spec.add_dependency "multi_json"
         
     | 
| 
      
 25 
     | 
    
         
            +
             
     | 
| 
      
 26 
     | 
    
         
            +
              spec.add_development_dependency "bundler", "~> 1.3"
         
     | 
| 
      
 27 
     | 
    
         
            +
              spec.add_development_dependency "rake"
         
     | 
| 
      
 28 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,37 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require "hypermicrodata/version"
         
     | 
| 
      
 2 
     | 
    
         
            +
            require "uberous/uber"
         
     | 
| 
      
 3 
     | 
    
         
            +
            require "nokogiri"
         
     | 
| 
      
 4 
     | 
    
         
            +
            require "mechanize"
         
     | 
| 
      
 5 
     | 
    
         
            +
            require "hypermicrodata/item"
         
     | 
| 
      
 6 
     | 
    
         
            +
            require "hypermicrodata/document"
         
     | 
| 
      
 7 
     | 
    
         
            +
            require "hypermicrodata/property"
         
     | 
| 
      
 8 
     | 
    
         
            +
            require "hypermicrodata/link"
         
     | 
| 
      
 9 
     | 
    
         
            +
            require "hypermicrodata/itemprop_parser"
         
     | 
| 
      
 10 
     | 
    
         
            +
            require "hypermicrodata/submit_button"
         
     | 
| 
      
 11 
     | 
    
         
            +
            require "hypermicrodata/serializer/base"
         
     | 
| 
      
 12 
     | 
    
         
            +
            require "hypermicrodata/serializer/hal"
         
     | 
| 
      
 13 
     | 
    
         
            +
            require "hypermicrodata/serializer/uber"
         
     | 
| 
      
 14 
     | 
    
         
            +
            require "hypermicrodata/extract"
         
     | 
| 
      
 15 
     | 
    
         
            +
            require "hypermicrodata/rails/html_based_json_renderer"
         
     | 
| 
      
 16 
     | 
    
         
            +
            require 'open-uri'
         
     | 
| 
      
 17 
     | 
    
         
            +
            require 'json'
         
     | 
| 
      
 18 
     | 
    
         
            +
            require 'uri'
         
     | 
| 
      
 19 
     | 
    
         
            +
             
     | 
| 
      
 20 
     | 
    
         
            +
            module Hypermicrodata
         
     | 
| 
      
 21 
     | 
    
         
            +
             
     | 
| 
      
 22 
     | 
    
         
            +
              def self.get_items(location)
         
     | 
| 
      
 23 
     | 
    
         
            +
                content = open(location)
         
     | 
| 
      
 24 
     | 
    
         
            +
                page_url = location
         
     | 
| 
      
 25 
     | 
    
         
            +
                Hypermicrodata::Document.new(content, page_url).extract_items
         
     | 
| 
      
 26 
     | 
    
         
            +
              end
         
     | 
| 
      
 27 
     | 
    
         
            +
             
     | 
| 
      
 28 
     | 
    
         
            +
              def self.to_json(location)
         
     | 
| 
      
 29 
     | 
    
         
            +
                items = get_items(location)
         
     | 
| 
      
 30 
     | 
    
         
            +
                hash = {}
         
     | 
| 
      
 31 
     | 
    
         
            +
                hash[:items] = items.map do |item|
         
     | 
| 
      
 32 
     | 
    
         
            +
                  item.to_hash
         
     | 
| 
      
 33 
     | 
    
         
            +
                end
         
     | 
| 
      
 34 
     | 
    
         
            +
                JSON.pretty_generate hash
         
     | 
| 
      
 35 
     | 
    
         
            +
              end
         
     | 
| 
      
 36 
     | 
    
         
            +
             
     | 
| 
      
 37 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,27 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            module Hypermicrodata
         
     | 
| 
      
 2 
     | 
    
         
            +
              class Document
         
     | 
| 
      
 3 
     | 
    
         
            +
             
     | 
| 
      
 4 
     | 
    
         
            +
                attr_reader :items, :doc
         
     | 
| 
      
 5 
     | 
    
         
            +
             
     | 
| 
      
 6 
     | 
    
         
            +
                def initialize(content, page_url=nil, filter_xpath_attr=nil)
         
     | 
| 
      
 7 
     | 
    
         
            +
                  @doc = Nokogiri::HTML(content)
         
     | 
| 
      
 8 
     | 
    
         
            +
                  @page_url = page_url
         
     | 
| 
      
 9 
     | 
    
         
            +
                  @filter_xpath_attr = filter_xpath_attr
         
     | 
| 
      
 10 
     | 
    
         
            +
                  @items = extract_items
         
     | 
| 
      
 11 
     | 
    
         
            +
                end
         
     | 
| 
      
 12 
     | 
    
         
            +
             
     | 
| 
      
 13 
     | 
    
         
            +
                def extract_items
         
     | 
| 
      
 14 
     | 
    
         
            +
                  itemscopes = []
         
     | 
| 
      
 15 
     | 
    
         
            +
                  if @filter_xpath_attr
         
     | 
| 
      
 16 
     | 
    
         
            +
                    itemscopes = @doc.xpath("//*[#{@filter_xpath_attr} and @itemscope]")
         
     | 
| 
      
 17 
     | 
    
         
            +
                    puts "XPath //*[#{@filter_xpath_attr}] is not found. root node is used." if itemscopes.empty?
         
     | 
| 
      
 18 
     | 
    
         
            +
                  end
         
     | 
| 
      
 19 
     | 
    
         
            +
                  itemscopes = @doc.xpath('self::*[@itemscope] | .//*[@itemscope and not(@itemprop)]') if itemscopes.empty?
         
     | 
| 
      
 20 
     | 
    
         
            +
             
     | 
| 
      
 21 
     | 
    
         
            +
                  itemscopes.collect do |itemscope|
         
     | 
| 
      
 22 
     | 
    
         
            +
                    Item.new(itemscope, @page_url)
         
     | 
| 
      
 23 
     | 
    
         
            +
                  end
         
     | 
| 
      
 24 
     | 
    
         
            +
                end
         
     | 
| 
      
 25 
     | 
    
         
            +
             
     | 
| 
      
 26 
     | 
    
         
            +
              end
         
     | 
| 
      
 27 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,22 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            module Hypermicrodata
         
     | 
| 
      
 2 
     | 
    
         
            +
              class Extract
         
     | 
| 
      
 3 
     | 
    
         
            +
                def initialize(html, options = {})
         
     | 
| 
      
 4 
     | 
    
         
            +
                  default_data_attr_name = 'main-item'
         
     | 
| 
      
 5 
     | 
    
         
            +
                  @location = options[:location]
         
     | 
| 
      
 6 
     | 
    
         
            +
                  @profile_path = options[:profile_path]
         
     | 
| 
      
 7 
     | 
    
         
            +
                  filter_xpath_attr = "@data-#{options[:data_attr_name] || default_data_attr_name}"
         
     | 
| 
      
 8 
     | 
    
         
            +
                  @document = Hypermicrodata::Document.new(html, @location, filter_xpath_attr)
         
     | 
| 
      
 9 
     | 
    
         
            +
                end
         
     | 
| 
      
 10 
     | 
    
         
            +
             
     | 
| 
      
 11 
     | 
    
         
            +
                def to_json(format = :plain, options = {})
         
     | 
| 
      
 12 
     | 
    
         
            +
                  case format
         
     | 
| 
      
 13 
     | 
    
         
            +
                  when :hal
         
     | 
| 
      
 14 
     | 
    
         
            +
                    Hypermicrodata::Serializer::Hal.new(@document, @location, @profile_path).to_json(options)
         
     | 
| 
      
 15 
     | 
    
         
            +
                  when :uber
         
     | 
| 
      
 16 
     | 
    
         
            +
                    Hypermicrodata::Serializer::Uber.new(@document, @location, @profile_path).to_json(options)
         
     | 
| 
      
 17 
     | 
    
         
            +
                  else
         
     | 
| 
      
 18 
     | 
    
         
            +
                    Hypermicrodata::Serializer::Base.new(@document, @location, @profile_path).to_json(options)
         
     | 
| 
      
 19 
     | 
    
         
            +
                  end
         
     | 
| 
      
 20 
     | 
    
         
            +
                end
         
     | 
| 
      
 21 
     | 
    
         
            +
              end
         
     | 
| 
      
 22 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,113 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            module Hypermicrodata
         
     | 
| 
      
 2 
     | 
    
         
            +
              class Item
         
     | 
| 
      
 3 
     | 
    
         
            +
                attr_reader :type, :properties, :links, :id
         
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
                def initialize(top_node, page_url)
         
     | 
| 
      
 6 
     | 
    
         
            +
                  @top_node = top_node
         
     | 
| 
      
 7 
     | 
    
         
            +
                  @type = extract_itemtype
         
     | 
| 
      
 8 
     | 
    
         
            +
                  @id   = extract_itemid
         
     | 
| 
      
 9 
     | 
    
         
            +
                  @properties = {}
         
     | 
| 
      
 10 
     | 
    
         
            +
                  @links = {}
         
     | 
| 
      
 11 
     | 
    
         
            +
                  @page_url = page_url
         
     | 
| 
      
 12 
     | 
    
         
            +
                  add_itemref_properties(@top_node)
         
     | 
| 
      
 13 
     | 
    
         
            +
                  parse_elements(extract_elements(@top_node))
         
     | 
| 
      
 14 
     | 
    
         
            +
                end
         
     | 
| 
      
 15 
     | 
    
         
            +
             
     | 
| 
      
 16 
     | 
    
         
            +
                def to_hash
         
     | 
| 
      
 17 
     | 
    
         
            +
                  hash = {}
         
     | 
| 
      
 18 
     | 
    
         
            +
                  hash[:id] = id if id
         
     | 
| 
      
 19 
     | 
    
         
            +
                  hash[:type] = type if type
         
     | 
| 
      
 20 
     | 
    
         
            +
                  hash[:properties] = {}
         
     | 
| 
      
 21 
     | 
    
         
            +
                  properties.each do |name, same_name_properties|
         
     | 
| 
      
 22 
     | 
    
         
            +
                    final_values = same_name_properties.map do |property|
         
     | 
| 
      
 23 
     | 
    
         
            +
                      if property.item
         
     | 
| 
      
 24 
     | 
    
         
            +
                        property.item.to_hash
         
     | 
| 
      
 25 
     | 
    
         
            +
                      else
         
     | 
| 
      
 26 
     | 
    
         
            +
                        property.value
         
     | 
| 
      
 27 
     | 
    
         
            +
                      end
         
     | 
| 
      
 28 
     | 
    
         
            +
                    end
         
     | 
| 
      
 29 
     | 
    
         
            +
                    hash[:properties][name] = final_values
         
     | 
| 
      
 30 
     | 
    
         
            +
                  end
         
     | 
| 
      
 31 
     | 
    
         
            +
                  hash[:links] = {}
         
     | 
| 
      
 32 
     | 
    
         
            +
                  links.each do |rel, same_rel_links|
         
     | 
| 
      
 33 
     | 
    
         
            +
                    final_values = same_rel_links.map do |link|
         
     | 
| 
      
 34 
     | 
    
         
            +
                      if link.item
         
     | 
| 
      
 35 
     | 
    
         
            +
                        link.item.to_hash
         
     | 
| 
      
 36 
     | 
    
         
            +
                      else
         
     | 
| 
      
 37 
     | 
    
         
            +
                        link.value
         
     | 
| 
      
 38 
     | 
    
         
            +
                      end
         
     | 
| 
      
 39 
     | 
    
         
            +
                    end
         
     | 
| 
      
 40 
     | 
    
         
            +
                    hash[:links][rel] = final_values
         
     | 
| 
      
 41 
     | 
    
         
            +
                  end
         
     | 
| 
      
 42 
     | 
    
         
            +
                  hash
         
     | 
| 
      
 43 
     | 
    
         
            +
                end
         
     | 
| 
      
 44 
     | 
    
         
            +
             
     | 
| 
      
 45 
     | 
    
         
            +
                def all_properties_and_links
         
     | 
| 
      
 46 
     | 
    
         
            +
                  properties.values.flatten | links.values.flatten
         
     | 
| 
      
 47 
     | 
    
         
            +
                end
         
     | 
| 
      
 48 
     | 
    
         
            +
             
     | 
| 
      
 49 
     | 
    
         
            +
                private
         
     | 
| 
      
 50 
     | 
    
         
            +
             
     | 
| 
      
 51 
     | 
    
         
            +
                def extract_elements(node)
         
     | 
| 
      
 52 
     | 
    
         
            +
                  node.search('./*')
         
     | 
| 
      
 53 
     | 
    
         
            +
                end
         
     | 
| 
      
 54 
     | 
    
         
            +
             
     | 
| 
      
 55 
     | 
    
         
            +
                def extract_itemid
         
     | 
| 
      
 56 
     | 
    
         
            +
                  (value = @top_node.attribute('itemid')) ? value.value : nil
         
     | 
| 
      
 57 
     | 
    
         
            +
                end
         
     | 
| 
      
 58 
     | 
    
         
            +
             
     | 
| 
      
 59 
     | 
    
         
            +
                def extract_itemtype
         
     | 
| 
      
 60 
     | 
    
         
            +
                  (value = @top_node.attribute('itemtype')) ? value.value.split(' ') : nil
         
     | 
| 
      
 61 
     | 
    
         
            +
                end
         
     | 
| 
      
 62 
     | 
    
         
            +
             
     | 
| 
      
 63 
     | 
    
         
            +
                def parse_elements(elements)
         
     | 
| 
      
 64 
     | 
    
         
            +
                  elements.each {|element| parse_element(element)}
         
     | 
| 
      
 65 
     | 
    
         
            +
                end
         
     | 
| 
      
 66 
     | 
    
         
            +
             
     | 
| 
      
 67 
     | 
    
         
            +
                def parse_element(element)
         
     | 
| 
      
 68 
     | 
    
         
            +
                  itemscope = element.attribute('itemscope')
         
     | 
| 
      
 69 
     | 
    
         
            +
                  itemprop = element.attribute('itemprop')
         
     | 
| 
      
 70 
     | 
    
         
            +
                  internal_elements = extract_elements(element)
         
     | 
| 
      
 71 
     | 
    
         
            +
                  add_itemprop(element) if itemscope || itemprop || ItempropParser::LINK_ELEMENTS.include?(element.name)
         
     | 
| 
      
 72 
     | 
    
         
            +
                  add_form(element) if element.name == 'form'
         
     | 
| 
      
 73 
     | 
    
         
            +
                  parse_elements(internal_elements) if internal_elements && !itemscope
         
     | 
| 
      
 74 
     | 
    
         
            +
                end
         
     | 
| 
      
 75 
     | 
    
         
            +
             
     | 
| 
      
 76 
     | 
    
         
            +
                # Add an 'itemprop' to the properties
         
     | 
| 
      
 77 
     | 
    
         
            +
                def add_itemprop(element)
         
     | 
| 
      
 78 
     | 
    
         
            +
                  property = ItempropParser.parse(element, @page_url)
         
     | 
| 
      
 79 
     | 
    
         
            +
                  if property.link? && property.names.empty? && property.rels.empty?
         
     | 
| 
      
 80 
     | 
    
         
            +
                    (@links['link'] ||= []) << property
         
     | 
| 
      
 81 
     | 
    
         
            +
                  else
         
     | 
| 
      
 82 
     | 
    
         
            +
                    property.names.each { |name| (@properties[name] ||= []) << property }
         
     | 
| 
      
 83 
     | 
    
         
            +
                    property.rels.each { |rel| (@links[rel] ||= []) << property }
         
     | 
| 
      
 84 
     | 
    
         
            +
                  end
         
     | 
| 
      
 85 
     | 
    
         
            +
                end
         
     | 
| 
      
 86 
     | 
    
         
            +
             
     | 
| 
      
 87 
     | 
    
         
            +
                # Add any properties referred to by 'itemref'
         
     | 
| 
      
 88 
     | 
    
         
            +
                def add_itemref_properties(element)
         
     | 
| 
      
 89 
     | 
    
         
            +
                  itemref = element.attribute('itemref')
         
     | 
| 
      
 90 
     | 
    
         
            +
                  if itemref
         
     | 
| 
      
 91 
     | 
    
         
            +
                    itemref.value.split(' ').each {|id| parse_elements(find_with_id(id))}
         
     | 
| 
      
 92 
     | 
    
         
            +
                  end
         
     | 
| 
      
 93 
     | 
    
         
            +
                end
         
     | 
| 
      
 94 
     | 
    
         
            +
             
     | 
| 
      
 95 
     | 
    
         
            +
                def add_form(element)
         
     | 
| 
      
 96 
     | 
    
         
            +
                  submit_buttons = FormParser.parse(element, @page_url)
         
     | 
| 
      
 97 
     | 
    
         
            +
                  submit_buttons.each do |submit_button|
         
     | 
| 
      
 98 
     | 
    
         
            +
                    submit_button.names.each { |name| (@properties[name] ||= []) << submit_button }
         
     | 
| 
      
 99 
     | 
    
         
            +
                    if submit_button.rels.empty?
         
     | 
| 
      
 100 
     | 
    
         
            +
                      (@links['submit'] ||= []) << submit_button
         
     | 
| 
      
 101 
     | 
    
         
            +
                    else
         
     | 
| 
      
 102 
     | 
    
         
            +
                      submit_button.rels.each { |rel| (@links[rel] ||= []) << submit_button }
         
     | 
| 
      
 103 
     | 
    
         
            +
                    end
         
     | 
| 
      
 104 
     | 
    
         
            +
                  end
         
     | 
| 
      
 105 
     | 
    
         
            +
                end
         
     | 
| 
      
 106 
     | 
    
         
            +
             
     | 
| 
      
 107 
     | 
    
         
            +
                # Find an element with a matching id
         
     | 
| 
      
 108 
     | 
    
         
            +
                def find_with_id(id)
         
     | 
| 
      
 109 
     | 
    
         
            +
                  @top_node.search("//*[@id='#{id}']")
         
     | 
| 
      
 110 
     | 
    
         
            +
                end
         
     | 
| 
      
 111 
     | 
    
         
            +
             
     | 
| 
      
 112 
     | 
    
         
            +
              end
         
     | 
| 
      
 113 
     | 
    
         
            +
            end
         
     |