hypermicrodata 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 6aa222d1d9f2fd94e7eabda85a111de9b63b17ba
4
+ data.tar.gz: 624be0e7d6c825c69ed224508f6286da2911cd8e
5
+ SHA512:
6
+ metadata.gz: 094a2d0285349d16ff74308ce8756d5a2510f67c0ab564bd93112823c488bc0eeee030725feb87fa6b2f89e2ab4805407a2a536ded47262c3125f81ea1cd9901
7
+ data.tar.gz: 6753e62b18ea5b2e4b0550b5fcaaf2eeb5f3101efbb61c5af745901285a3d4621761e2b75a8201e670274350cab0a5a2f7bebac8570e4e87357a6e581626b700
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/.travis.yml ADDED
@@ -0,0 +1,8 @@
1
+ language: ruby
2
+ rvm:
3
+ - "1.9.2"
4
+ - "1.9.3"
5
+ - "2.0.0"
6
+ - jruby-19mode # JRuby in 1.9 mode
7
+ # uncomment this line if your project needs to run something other than `rake`:
8
+ script: rake test
data/Gemfile ADDED
@@ -0,0 +1,8 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in hypermicrodata.gemspec
4
+ gemspec
5
+
6
+ group :test do
7
+ gem 'pry'
8
+ end
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Jason Ronallo, Toru KAWAMURA
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,100 @@
1
+ # Hypermicrodata
2
+
3
+ Ruby library for extracting HTML5 Microdata with Hypermedia
4
+
5
+ [![Build Status](https://travis-ci.org/tkawa/hypermicrodata.png)](https://travis-ci.org/tkawa/hypermicrodata)
6
+
7
+ ## Story
8
+
9
+ Most of the code here was extracted from [Mida](https://github.com/LawrenceWoodman/mida) by Lawrence Woodman. This was done in order to have a simpler, more generic Microdata parser without all the vocabulary awareness and other features. This gem is also tested under Ruby 1.9.3 and Ruby 2.0.0, though it could be better tested.
10
+
11
+ ## Installation
12
+
13
+ This library has not been released to RubyGems.org yet, but when it is the intention is to have it install with the following.
14
+
15
+ Add this line to your application's Gemfile:
16
+
17
+ gem 'hypermicrodata'
18
+
19
+ And then execute:
20
+
21
+ $ bundle
22
+
23
+ Or install it yourself as:
24
+
25
+ $ gem install hypermicrodata
26
+
27
+ ## Usage
28
+
29
+ ### Basic
30
+
31
+ ```
32
+ json = Hypermicrodata::Extract.new(html).to_json(:uber)
33
+ ```
34
+
35
+ Supported formats are
36
+
37
+ - application/vnd.amundsen-uber+json (:uber)
38
+ - application/hal+json (:hal)
39
+ - application/json (:plain)
40
+
41
+ ### Rails Integration
42
+
43
+ When you use this in Rails, you don't need to extract data manually.
44
+
45
+ /app/controllers/people_controller.rb
46
+
47
+ ```
48
+ class PeopleController < ApplicationController
49
+ before_action :set_message, only: %i(show edit update destroy)
50
+ include Hypermicrodata::Rails::HtmlBasedJsonRenderer
51
+ ...
52
+ end
53
+ ```
54
+
55
+ /app/views/people/show.html.haml
56
+
57
+ ```
58
+ .person{itemscope: true, itemtype: 'http://schema.org/Person',
59
+ itemid: person_url(@person), data: {main_item: true}}
60
+ .media
61
+ .media-image.pull-left
62
+ = image_tag @person.picture_path, alt: '', itemprop: 'image'
63
+ .media-body
64
+ %h1.media-heading
65
+ %span{itemprop: 'name'}= @person.name
66
+ = link_to 'collection', people_path, rel: 'collection', itemprop: 'isPartOf'
67
+ ```
68
+
69
+ And you can serve following JSON:
70
+
71
+ ```
72
+ GET /people/1 HTTP/1.1
73
+ Host: www.example.com
74
+ Accept: application/vnd.amundsen-uber+json
75
+ ```
76
+
77
+ ```
78
+ {
79
+ "uber": {
80
+ "version": "1.0",
81
+ "data": [{
82
+ "url": "http://www.example.com/people/1",
83
+ "name": "Person",
84
+ "data": [
85
+ { "name": "image", "value": "/assets/bob.png" },
86
+ { "name": "name", "value": "Bob Smith" },
87
+ { "name": "isPartOf", "rel": "collection", "url": "/people" },
88
+ ]
89
+ }]
90
+ }
91
+ }
92
+ ```
93
+
94
+ ## Contributing
95
+
96
+ 1. Fork it
97
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
98
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
99
+ 4. Push to the branch (`git push origin my-new-feature`)
100
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,10 @@
1
+ require "bundler/gem_tasks"
2
+
3
+ require 'rake/testtask'
4
+
5
+ Rake::TestTask.new do |t|
6
+ t.libs << 'test'
7
+ end
8
+
9
+ desc "Run tests"
10
+ task :default => :test
@@ -0,0 +1,25 @@
1
+ #! /usr/bin/env ruby
2
+
3
+ # hypermicrodata.rb
4
+ # Extract HTML5 Microdata and output JSON
5
+ $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', 'lib')
6
+ require 'hypermicrodata'
7
+
8
+ location = ARGV[0]
9
+ content = open(location)
10
+ document = Hypermicrodata::Document.new(content, location)
11
+ items = document.extract_items
12
+
13
+ if items.empty? || items.nil?
14
+ puts "No Microdata items found."
15
+ itemprops = document.doc.search('//*[@itemprop]')
16
+ if !itemprops.empty?
17
+ puts "There are some itemprops, which means no top level items with an itemscope have been found."
18
+ end
19
+ else
20
+ hash = {}
21
+ hash[:items] = items.map do |item|
22
+ item.to_hash
23
+ end
24
+ puts JSON.pretty_generate(hash)
25
+ end
@@ -0,0 +1,28 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'hypermicrodata/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "hypermicrodata"
8
+ spec.version = Hypermicrodata::VERSION
9
+ spec.authors = ["Jason Ronallo", "Toru KAWAMURA"]
10
+ spec.email = ["jronallo@gmail.com", "tkawa@4bit.net"]
11
+ spec.description = %q{HTML5 Microdata extractor with Hypermedia}
12
+ spec.summary = %q{Ruby library for extracting HTML5 Microdata with Hypermedia}
13
+ spec.homepage = "https://github.com/tkawa/hypermicrodata"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_dependency "nokogiri"
22
+ spec.add_dependency "mechanize"
23
+ spec.add_dependency "halibut"
24
+ spec.add_dependency "multi_json"
25
+
26
+ spec.add_development_dependency "bundler", "~> 1.3"
27
+ spec.add_development_dependency "rake"
28
+ end
@@ -0,0 +1,37 @@
1
+ require "hypermicrodata/version"
2
+ require "uberous/uber"
3
+ require "nokogiri"
4
+ require "mechanize"
5
+ require "hypermicrodata/item"
6
+ require "hypermicrodata/document"
7
+ require "hypermicrodata/property"
8
+ require "hypermicrodata/link"
9
+ require "hypermicrodata/itemprop_parser"
10
+ require "hypermicrodata/submit_button"
11
+ require "hypermicrodata/serializer/base"
12
+ require "hypermicrodata/serializer/hal"
13
+ require "hypermicrodata/serializer/uber"
14
+ require "hypermicrodata/extract"
15
+ require "hypermicrodata/rails/html_based_json_renderer"
16
+ require 'open-uri'
17
+ require 'json'
18
+ require 'uri'
19
+
20
+ module Hypermicrodata
21
+
22
+ def self.get_items(location)
23
+ content = open(location)
24
+ page_url = location
25
+ Hypermicrodata::Document.new(content, page_url).extract_items
26
+ end
27
+
28
+ def self.to_json(location)
29
+ items = get_items(location)
30
+ hash = {}
31
+ hash[:items] = items.map do |item|
32
+ item.to_hash
33
+ end
34
+ JSON.pretty_generate hash
35
+ end
36
+
37
+ end
@@ -0,0 +1,27 @@
1
+ module Hypermicrodata
2
+ class Document
3
+
4
+ attr_reader :items, :doc
5
+
6
+ def initialize(content, page_url=nil, filter_xpath_attr=nil)
7
+ @doc = Nokogiri::HTML(content)
8
+ @page_url = page_url
9
+ @filter_xpath_attr = filter_xpath_attr
10
+ @items = extract_items
11
+ end
12
+
13
+ def extract_items
14
+ itemscopes = []
15
+ if @filter_xpath_attr
16
+ itemscopes = @doc.xpath("//*[#{@filter_xpath_attr} and @itemscope]")
17
+ puts "XPath //*[#{@filter_xpath_attr}] is not found. root node is used." if itemscopes.empty?
18
+ end
19
+ itemscopes = @doc.xpath('self::*[@itemscope] | .//*[@itemscope and not(@itemprop)]') if itemscopes.empty?
20
+
21
+ itemscopes.collect do |itemscope|
22
+ Item.new(itemscope, @page_url)
23
+ end
24
+ end
25
+
26
+ end
27
+ end
@@ -0,0 +1,22 @@
1
+ module Hypermicrodata
2
+ class Extract
3
+ def initialize(html, options = {})
4
+ default_data_attr_name = 'main-item'
5
+ @location = options[:location]
6
+ @profile_path = options[:profile_path]
7
+ filter_xpath_attr = "@data-#{options[:data_attr_name] || default_data_attr_name}"
8
+ @document = Hypermicrodata::Document.new(html, @location, filter_xpath_attr)
9
+ end
10
+
11
+ def to_json(format = :plain, options = {})
12
+ case format
13
+ when :hal
14
+ Hypermicrodata::Serializer::Hal.new(@document, @location, @profile_path).to_json(options)
15
+ when :uber
16
+ Hypermicrodata::Serializer::Uber.new(@document, @location, @profile_path).to_json(options)
17
+ else
18
+ Hypermicrodata::Serializer::Base.new(@document, @location, @profile_path).to_json(options)
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,113 @@
1
+ module Hypermicrodata
2
+ class Item
3
+ attr_reader :type, :properties, :links, :id
4
+
5
+ def initialize(top_node, page_url)
6
+ @top_node = top_node
7
+ @type = extract_itemtype
8
+ @id = extract_itemid
9
+ @properties = {}
10
+ @links = {}
11
+ @page_url = page_url
12
+ add_itemref_properties(@top_node)
13
+ parse_elements(extract_elements(@top_node))
14
+ end
15
+
16
+ def to_hash
17
+ hash = {}
18
+ hash[:id] = id if id
19
+ hash[:type] = type if type
20
+ hash[:properties] = {}
21
+ properties.each do |name, same_name_properties|
22
+ final_values = same_name_properties.map do |property|
23
+ if property.item
24
+ property.item.to_hash
25
+ else
26
+ property.value
27
+ end
28
+ end
29
+ hash[:properties][name] = final_values
30
+ end
31
+ hash[:links] = {}
32
+ links.each do |rel, same_rel_links|
33
+ final_values = same_rel_links.map do |link|
34
+ if link.item
35
+ link.item.to_hash
36
+ else
37
+ link.value
38
+ end
39
+ end
40
+ hash[:links][rel] = final_values
41
+ end
42
+ hash
43
+ end
44
+
45
+ def all_properties_and_links
46
+ properties.values.flatten | links.values.flatten
47
+ end
48
+
49
+ private
50
+
51
+ def extract_elements(node)
52
+ node.search('./*')
53
+ end
54
+
55
+ def extract_itemid
56
+ (value = @top_node.attribute('itemid')) ? value.value : nil
57
+ end
58
+
59
+ def extract_itemtype
60
+ (value = @top_node.attribute('itemtype')) ? value.value.split(' ') : nil
61
+ end
62
+
63
+ def parse_elements(elements)
64
+ elements.each {|element| parse_element(element)}
65
+ end
66
+
67
+ def parse_element(element)
68
+ itemscope = element.attribute('itemscope')
69
+ itemprop = element.attribute('itemprop')
70
+ internal_elements = extract_elements(element)
71
+ add_itemprop(element) if itemscope || itemprop || ItempropParser::LINK_ELEMENTS.include?(element.name)
72
+ add_form(element) if element.name == 'form'
73
+ parse_elements(internal_elements) if internal_elements && !itemscope
74
+ end
75
+
76
+ # Add an 'itemprop' to the properties
77
+ def add_itemprop(element)
78
+ property = ItempropParser.parse(element, @page_url)
79
+ if property.link? && property.names.empty? && property.rels.empty?
80
+ (@links['link'] ||= []) << property
81
+ else
82
+ property.names.each { |name| (@properties[name] ||= []) << property }
83
+ property.rels.each { |rel| (@links[rel] ||= []) << property }
84
+ end
85
+ end
86
+
87
+ # Add any properties referred to by 'itemref'
88
+ def add_itemref_properties(element)
89
+ itemref = element.attribute('itemref')
90
+ if itemref
91
+ itemref.value.split(' ').each {|id| parse_elements(find_with_id(id))}
92
+ end
93
+ end
94
+
95
+ def add_form(element)
96
+ submit_buttons = FormParser.parse(element, @page_url)
97
+ submit_buttons.each do |submit_button|
98
+ submit_button.names.each { |name| (@properties[name] ||= []) << submit_button }
99
+ if submit_button.rels.empty?
100
+ (@links['submit'] ||= []) << submit_button
101
+ else
102
+ submit_button.rels.each { |rel| (@links[rel] ||= []) << submit_button }
103
+ end
104
+ end
105
+ end
106
+
107
+ # Find an element with a matching id
108
+ def find_with_id(id)
109
+ @top_node.search("//*[@id='#{id}']")
110
+ end
111
+
112
+ end
113
+ end