otg-microformat 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in microformat.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 James Gregory
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,29 @@
1
+ # Microformat
2
+
3
+ A basic microformat parser. Handles only [Product](http://schema.org/Product) at the moment.
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'microformat'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install microformat
18
+
19
+ ## Usage
20
+
21
+ Microformat::SchemaOrg.parse '<your><html>'
22
+
23
+ ## Contributing
24
+
25
+ 1. Fork it
26
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
27
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
28
+ 4. Push to the branch (`git push origin my-new-feature`)
29
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,101 @@
1
+ require 'microformat/version'
2
+ require 'nokogiri'
3
+ require 'andand'
4
+
5
+ module Microformat
6
+ class ItemProp
7
+ def self.parse(node)
8
+ # If the element has no itemprop attribute
9
+ # The attribute must return null on getting and must throw an INVALID_ACCESS_ERR exception on setting.
10
+ return nil unless node.attribute('itemprop')
11
+
12
+ # If the element has an itemscope attribute
13
+ # The attribute must return the element itself on getting and must throw an INVALID_ACCESS_ERR exception on setting.
14
+ return ItemScope.new(node) if node.attribute('itemscope')
15
+
16
+ # If the element is a meta element
17
+ # The attribute must act as it would if it was reflecting the element's content content attribute
18
+ return node.attribute('content').andand.value if node.name == 'meta'
19
+
20
+ # If the element is an audio, embed, iframe, img, source, track, or video element
21
+ # The attribute must act as it would if it was reflecting the element's src content attribute.
22
+ return node.attribute('src').andand.value if ['audio', 'embed', 'iframe', 'img', 'source', 'track', 'video'].include? node.name
23
+
24
+ # If the element is an a, area, or link element
25
+ # The attribute must act as it would if it was reflecting the element's href content attribute.
26
+ return node.attribute('href').andand.value if ['a', 'area', 'link'].include? node.name
27
+
28
+ # If the element is an object element
29
+ # The attribute must act as it would if it was reflecting the element's data content attribute.
30
+ return node.attribute('data').andand.value if node.name == 'object'
31
+
32
+ # If the element is a time element with a datetime attribute
33
+ # The attribute must act as it would if it was reflecting the element's datetime content attribute.
34
+ return node.attribute('datetime').andand.value if node.name == 'time' && node.attribute('datetime')
35
+
36
+ # Otherwise
37
+ # The attribute must act the same as the element's textContent attribute.
38
+ return node.text.chomp.strip
39
+ end
40
+ end
41
+
42
+ class ItemScope
43
+ attr_reader :type, :id
44
+
45
+ def initialize(node)
46
+ @type = attr 'itemtype', node
47
+ @id = attr 'itemid', node
48
+ @properties = {}
49
+
50
+ parse_elements node.search './*'
51
+ end
52
+
53
+ def [](name)
54
+ @properties[name]
55
+ end
56
+
57
+ private
58
+ def attr(name, node)
59
+ val = node.attribute name
60
+ val ? val.value : nil
61
+ end
62
+
63
+ def parse_elements(elements)
64
+ elements.each do |el|
65
+ itemprop = attr('itemprop', el)
66
+ prop = ItemProp.parse el
67
+
68
+ if prop
69
+ @properties[itemprop] ||= []
70
+ @properties[itemprop] << prop
71
+ else
72
+ parse_elements el.search('./*')
73
+ end
74
+ end
75
+ end
76
+ end
77
+
78
+ class ItemDocument
79
+ def initialize(scopes)
80
+ @scopes = scopes
81
+ end
82
+
83
+ def products
84
+ @scopes.select { |x| x.type == 'http://schema.org/Product' }
85
+ end
86
+ end
87
+
88
+ class SchemaOrg
89
+ def self.parse(html)
90
+ html = Nokogiri::HTML.parse html unless html.respond_to? :search
91
+ scopes = html.search('//*[@itemscope and not(@itemprop)]')
92
+ .map { |node| ItemScope.new node }
93
+
94
+ if scopes.any?
95
+ ItemDocument.new scopes
96
+ else
97
+ nil
98
+ end
99
+ end
100
+ end
101
+ end
@@ -0,0 +1,3 @@
1
+ module Microformat
2
+ VERSION = '0.0.1'
3
+ end
@@ -0,0 +1,26 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'microformat/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = 'otg-microformat'
8
+ spec.version = Microformat::VERSION
9
+ spec.authors = ['On the Game, James Gregory']
10
+ spec.email = ['james@onthegame.com.au']
11
+ spec.description = 'A basic microformat parser for Ruby.'
12
+ spec.summary = 'Simple Microformat parser'
13
+ spec.homepage = 'http://www.onthegame.com.au/about/opensource'
14
+ spec.license = 'MIT'
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ['lib']
20
+
21
+ spec.add_dependency 'nokogiri'
22
+ spec.add_dependency 'andand'
23
+
24
+ spec.add_development_dependency 'bundler', '~> 1.3'
25
+ spec.add_development_dependency 'rake'
26
+ end
metadata ADDED
@@ -0,0 +1,124 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: otg-microformat
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - On the Game, James Gregory
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-05-05 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: nokogiri
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: andand
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: bundler
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ~>
52
+ - !ruby/object:Gem::Version
53
+ version: '1.3'
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: '1.3'
62
+ - !ruby/object:Gem::Dependency
63
+ name: rake
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ type: :development
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ description: A basic microformat parser for Ruby.
79
+ email:
80
+ - james@onthegame.com.au
81
+ executables: []
82
+ extensions: []
83
+ extra_rdoc_files: []
84
+ files:
85
+ - .gitignore
86
+ - Gemfile
87
+ - LICENSE.txt
88
+ - README.md
89
+ - Rakefile
90
+ - lib/microformat.rb
91
+ - lib/microformat/version.rb
92
+ - microformat.gemspec
93
+ homepage: http://www.onthegame.com.au/about/opensource
94
+ licenses:
95
+ - MIT
96
+ post_install_message:
97
+ rdoc_options: []
98
+ require_paths:
99
+ - lib
100
+ required_ruby_version: !ruby/object:Gem::Requirement
101
+ none: false
102
+ requirements:
103
+ - - ! '>='
104
+ - !ruby/object:Gem::Version
105
+ version: '0'
106
+ segments:
107
+ - 0
108
+ hash: -164261226516133318
109
+ required_rubygems_version: !ruby/object:Gem::Requirement
110
+ none: false
111
+ requirements:
112
+ - - ! '>='
113
+ - !ruby/object:Gem::Version
114
+ version: '0'
115
+ segments:
116
+ - 0
117
+ hash: -164261226516133318
118
+ requirements: []
119
+ rubyforge_project:
120
+ rubygems_version: 1.8.25
121
+ signing_key:
122
+ specification_version: 3
123
+ summary: Simple Microformat parser
124
+ test_files: []