hypermicrodata 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 6aa222d1d9f2fd94e7eabda85a111de9b63b17ba
4
+ data.tar.gz: 624be0e7d6c825c69ed224508f6286da2911cd8e
5
+ SHA512:
6
+ metadata.gz: 094a2d0285349d16ff74308ce8756d5a2510f67c0ab564bd93112823c488bc0eeee030725feb87fa6b2f89e2ab4805407a2a536ded47262c3125f81ea1cd9901
7
+ data.tar.gz: 6753e62b18ea5b2e4b0550b5fcaaf2eeb5f3101efbb61c5af745901285a3d4621761e2b75a8201e670274350cab0a5a2f7bebac8570e4e87357a6e581626b700
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/.travis.yml ADDED
@@ -0,0 +1,8 @@
1
+ language: ruby
2
+ rvm:
3
+ - "1.9.2"
4
+ - "1.9.3"
5
+ - "2.0.0"
6
+ - jruby-19mode # JRuby in 1.9 mode
7
+ # uncomment this line if your project needs to run something other than `rake`:
8
+ script: rake test
data/Gemfile ADDED
@@ -0,0 +1,8 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in hypermicrodata.gemspec
4
+ gemspec
5
+
6
+ group :test do
7
+ gem 'pry'
8
+ end
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Jason Ronallo, Toru KAWAMURA
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,100 @@
1
+ # Hypermicrodata
2
+
3
+ Ruby library for extracting HTML5 Microdata with Hypermedia
4
+
5
+ [![Build Status](https://travis-ci.org/tkawa/hypermicrodata.png)](https://travis-ci.org/tkawa/hypermicrodata)
6
+
7
+ ## Story
8
+
9
+ Most of the code here was extracted from [Mida](https://github.com/LawrenceWoodman/mida) by Lawrence Woodman. This was done in order to have a simpler, more generic Microdata parser without all the vocabulary awareness and other features. This gem is also tested under Ruby 1.9.3 and Ruby 2.0.0, though it could be better tested.
10
+
11
+ ## Installation
12
+
13
+ This library has not been released to RubyGems.org yet, but when it is the intention is to have it install with the following.
14
+
15
+ Add this line to your application's Gemfile:
16
+
17
+ gem 'hypermicrodata'
18
+
19
+ And then execute:
20
+
21
+ $ bundle
22
+
23
+ Or install it yourself as:
24
+
25
+ $ gem install hypermicrodata
26
+
27
+ ## Usage
28
+
29
+ ### Basic
30
+
31
+ ```
32
+ json = Hypermicrodata::Extract.new(html).to_json(:uber)
33
+ ```
34
+
35
+ Supported formats are
36
+
37
+ - application/vnd.amundsen-uber+json (:uber)
38
+ - application/hal+json (:hal)
39
+ - application/json (:plain)
40
+
41
+ ### Rails Integration
42
+
43
+ When you use this in Rails, you don't need to extract data manually.
44
+
45
+ /app/controllers/people_controller.rb
46
+
47
+ ```
48
+ class PeopleController < ApplicationController
49
+ before_action :set_message, only: %i(show edit update destroy)
50
+ include Hypermicrodata::Rails::HtmlBasedJsonRenderer
51
+ ...
52
+ end
53
+ ```
54
+
55
+ /app/views/people/show.html.haml
56
+
57
+ ```
58
+ .person{itemscope: true, itemtype: 'http://schema.org/Person',
59
+ itemid: person_url(@person), data: {main_item: true}}
60
+ .media
61
+ .media-image.pull-left
62
+ = image_tag @person.picture_path, alt: '', itemprop: 'image'
63
+ .media-body
64
+ %h1.media-heading
65
+ %span{itemprop: 'name'}= @person.name
66
+ = link_to 'collection', people_path, rel: 'collection', itemprop: 'isPartOf'
67
+ ```
68
+
69
+ And you can serve following JSON:
70
+
71
+ ```
72
+ GET /people/1 HTTP/1.1
73
+ Host: www.example.com
74
+ Accept: application/vnd.amundsen-uber+json
75
+ ```
76
+
77
+ ```
78
+ {
79
+ "uber": {
80
+ "version": "1.0",
81
+ "data": [{
82
+ "url": "http://www.example.com/people/1",
83
+ "name": "Person",
84
+ "data": [
85
+ { "name": "image", "value": "/assets/bob.png" },
86
+ { "name": "name", "value": "Bob Smith" },
87
+ { "name": "isPartOf", "rel": "collection", "url": "/people" },
88
+ ]
89
+ }]
90
+ }
91
+ }
92
+ ```
93
+
94
+ ## Contributing
95
+
96
+ 1. Fork it
97
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
98
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
99
+ 4. Push to the branch (`git push origin my-new-feature`)
100
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,10 @@
1
+ require "bundler/gem_tasks"
2
+
3
+ require 'rake/testtask'
4
+
5
+ Rake::TestTask.new do |t|
6
+ t.libs << 'test'
7
+ end
8
+
9
+ desc "Run tests"
10
+ task :default => :test
@@ -0,0 +1,25 @@
1
+ #! /usr/bin/env ruby
2
+
3
+ # hypermicrodata.rb
4
+ # Extract HTML5 Microdata and output JSON
5
+ $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', 'lib')
6
+ require 'hypermicrodata'
7
+
8
+ location = ARGV[0]
9
+ content = open(location)
10
+ document = Hypermicrodata::Document.new(content, location)
11
+ items = document.extract_items
12
+
13
+ if items.empty? || items.nil?
14
+ puts "No Microdata items found."
15
+ itemprops = document.doc.search('//*[@itemprop]')
16
+ if !itemprops.empty?
17
+ puts "There are some itemprops, which means no top level items with an itemscope have been found."
18
+ end
19
+ else
20
+ hash = {}
21
+ hash[:items] = items.map do |item|
22
+ item.to_hash
23
+ end
24
+ puts JSON.pretty_generate(hash)
25
+ end
@@ -0,0 +1,28 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'hypermicrodata/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "hypermicrodata"
8
+ spec.version = Hypermicrodata::VERSION
9
+ spec.authors = ["Jason Ronallo", "Toru KAWAMURA"]
10
+ spec.email = ["jronallo@gmail.com", "tkawa@4bit.net"]
11
+ spec.description = %q{HTML5 Microdata extractor with Hypermedia}
12
+ spec.summary = %q{Ruby library for extracting HTML5 Microdata with Hypermedia}
13
+ spec.homepage = "https://github.com/tkawa/hypermicrodata"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_dependency "nokogiri"
22
+ spec.add_dependency "mechanize"
23
+ spec.add_dependency "halibut"
24
+ spec.add_dependency "multi_json"
25
+
26
+ spec.add_development_dependency "bundler", "~> 1.3"
27
+ spec.add_development_dependency "rake"
28
+ end
@@ -0,0 +1,37 @@
1
+ require "hypermicrodata/version"
2
+ require "uberous/uber"
3
+ require "nokogiri"
4
+ require "mechanize"
5
+ require "hypermicrodata/item"
6
+ require "hypermicrodata/document"
7
+ require "hypermicrodata/property"
8
+ require "hypermicrodata/link"
9
+ require "hypermicrodata/itemprop_parser"
10
+ require "hypermicrodata/submit_button"
11
+ require "hypermicrodata/serializer/base"
12
+ require "hypermicrodata/serializer/hal"
13
+ require "hypermicrodata/serializer/uber"
14
+ require "hypermicrodata/extract"
15
+ require "hypermicrodata/rails/html_based_json_renderer"
16
+ require 'open-uri'
17
+ require 'json'
18
+ require 'uri'
19
+
20
+ module Hypermicrodata
21
+
22
+ def self.get_items(location)
23
+ content = open(location)
24
+ page_url = location
25
+ Hypermicrodata::Document.new(content, page_url).extract_items
26
+ end
27
+
28
+ def self.to_json(location)
29
+ items = get_items(location)
30
+ hash = {}
31
+ hash[:items] = items.map do |item|
32
+ item.to_hash
33
+ end
34
+ JSON.pretty_generate hash
35
+ end
36
+
37
+ end
@@ -0,0 +1,27 @@
1
+ module Hypermicrodata
2
+ class Document
3
+
4
+ attr_reader :items, :doc
5
+
6
+ def initialize(content, page_url=nil, filter_xpath_attr=nil)
7
+ @doc = Nokogiri::HTML(content)
8
+ @page_url = page_url
9
+ @filter_xpath_attr = filter_xpath_attr
10
+ @items = extract_items
11
+ end
12
+
13
+ def extract_items
14
+ itemscopes = []
15
+ if @filter_xpath_attr
16
+ itemscopes = @doc.xpath("//*[#{@filter_xpath_attr} and @itemscope]")
17
+ puts "XPath //*[#{@filter_xpath_attr}] is not found. root node is used." if itemscopes.empty?
18
+ end
19
+ itemscopes = @doc.xpath('self::*[@itemscope] | .//*[@itemscope and not(@itemprop)]') if itemscopes.empty?
20
+
21
+ itemscopes.collect do |itemscope|
22
+ Item.new(itemscope, @page_url)
23
+ end
24
+ end
25
+
26
+ end
27
+ end
@@ -0,0 +1,22 @@
1
+ module Hypermicrodata
2
+ class Extract
3
+ def initialize(html, options = {})
4
+ default_data_attr_name = 'main-item'
5
+ @location = options[:location]
6
+ @profile_path = options[:profile_path]
7
+ filter_xpath_attr = "@data-#{options[:data_attr_name] || default_data_attr_name}"
8
+ @document = Hypermicrodata::Document.new(html, @location, filter_xpath_attr)
9
+ end
10
+
11
+ def to_json(format = :plain, options = {})
12
+ case format
13
+ when :hal
14
+ Hypermicrodata::Serializer::Hal.new(@document, @location, @profile_path).to_json(options)
15
+ when :uber
16
+ Hypermicrodata::Serializer::Uber.new(@document, @location, @profile_path).to_json(options)
17
+ else
18
+ Hypermicrodata::Serializer::Base.new(@document, @location, @profile_path).to_json(options)
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,113 @@
1
+ module Hypermicrodata
2
+ class Item
3
+ attr_reader :type, :properties, :links, :id
4
+
5
+ def initialize(top_node, page_url)
6
+ @top_node = top_node
7
+ @type = extract_itemtype
8
+ @id = extract_itemid
9
+ @properties = {}
10
+ @links = {}
11
+ @page_url = page_url
12
+ add_itemref_properties(@top_node)
13
+ parse_elements(extract_elements(@top_node))
14
+ end
15
+
16
+ def to_hash
17
+ hash = {}
18
+ hash[:id] = id if id
19
+ hash[:type] = type if type
20
+ hash[:properties] = {}
21
+ properties.each do |name, same_name_properties|
22
+ final_values = same_name_properties.map do |property|
23
+ if property.item
24
+ property.item.to_hash
25
+ else
26
+ property.value
27
+ end
28
+ end
29
+ hash[:properties][name] = final_values
30
+ end
31
+ hash[:links] = {}
32
+ links.each do |rel, same_rel_links|
33
+ final_values = same_rel_links.map do |link|
34
+ if link.item
35
+ link.item.to_hash
36
+ else
37
+ link.value
38
+ end
39
+ end
40
+ hash[:links][rel] = final_values
41
+ end
42
+ hash
43
+ end
44
+
45
+ def all_properties_and_links
46
+ properties.values.flatten | links.values.flatten
47
+ end
48
+
49
+ private
50
+
51
+ def extract_elements(node)
52
+ node.search('./*')
53
+ end
54
+
55
+ def extract_itemid
56
+ (value = @top_node.attribute('itemid')) ? value.value : nil
57
+ end
58
+
59
+ def extract_itemtype
60
+ (value = @top_node.attribute('itemtype')) ? value.value.split(' ') : nil
61
+ end
62
+
63
+ def parse_elements(elements)
64
+ elements.each {|element| parse_element(element)}
65
+ end
66
+
67
+ def parse_element(element)
68
+ itemscope = element.attribute('itemscope')
69
+ itemprop = element.attribute('itemprop')
70
+ internal_elements = extract_elements(element)
71
+ add_itemprop(element) if itemscope || itemprop || ItempropParser::LINK_ELEMENTS.include?(element.name)
72
+ add_form(element) if element.name == 'form'
73
+ parse_elements(internal_elements) if internal_elements && !itemscope
74
+ end
75
+
76
+ # Add an 'itemprop' to the properties
77
+ def add_itemprop(element)
78
+ property = ItempropParser.parse(element, @page_url)
79
+ if property.link? && property.names.empty? && property.rels.empty?
80
+ (@links['link'] ||= []) << property
81
+ else
82
+ property.names.each { |name| (@properties[name] ||= []) << property }
83
+ property.rels.each { |rel| (@links[rel] ||= []) << property }
84
+ end
85
+ end
86
+
87
+ # Add any properties referred to by 'itemref'
88
+ def add_itemref_properties(element)
89
+ itemref = element.attribute('itemref')
90
+ if itemref
91
+ itemref.value.split(' ').each {|id| parse_elements(find_with_id(id))}
92
+ end
93
+ end
94
+
95
+ def add_form(element)
96
+ submit_buttons = FormParser.parse(element, @page_url)
97
+ submit_buttons.each do |submit_button|
98
+ submit_button.names.each { |name| (@properties[name] ||= []) << submit_button }
99
+ if submit_button.rels.empty?
100
+ (@links['submit'] ||= []) << submit_button
101
+ else
102
+ submit_button.rels.each { |rel| (@links[rel] ||= []) << submit_button }
103
+ end
104
+ end
105
+ end
106
+
107
+ # Find an element with a matching id
108
+ def find_with_id(id)
109
+ @top_node.search("//*[@id='#{id}']")
110
+ end
111
+
112
+ end
113
+ end