otg-microformat 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in microformat.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 James Gregory
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,29 @@
1
+ # Microformat
2
+
3
+ A basic microformat parser. Handles only [Product](http://schema.org/Product) at the moment.
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'microformat'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install microformat
18
+
19
+ ## Usage
20
+
21
+ Microformat::SchemaOrg.parse '<your><html>'
22
+
23
+ ## Contributing
24
+
25
+ 1. Fork it
26
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
27
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
28
+ 4. Push to the branch (`git push origin my-new-feature`)
29
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,101 @@
1
+ require 'microformat/version'
2
+ require 'nokogiri'
3
+ require 'andand'
4
+
5
+ module Microformat
6
+ class ItemProp
7
+ def self.parse(node)
8
+ # If the element has no itemprop attribute
9
+ # The attribute must return null on getting and must throw an INVALID_ACCESS_ERR exception on setting.
10
+ return nil unless node.attribute('itemprop')
11
+
12
+ # If the element has an itemscope attribute
13
+ # The attribute must return the element itself on getting and must throw an INVALID_ACCESS_ERR exception on setting.
14
+ return ItemScope.new(node) if node.attribute('itemscope')
15
+
16
+ # If the element is a meta element
17
+ # The attribute must act as it would if it was reflecting the element's content content attribute
18
+ return node.attribute('content').andand.value if node.name == 'meta'
19
+
20
+ # If the element is an audio, embed, iframe, img, source, track, or video element
21
+ # The attribute must act as it would if it was reflecting the element's src content attribute.
22
+ return node.attribute('src').andand.value if ['audio', 'embed', 'iframe', 'img', 'source', 'track', 'video'].include? node.name
23
+
24
+ # If the element is an a, area, or link element
25
+ # The attribute must act as it would if it was reflecting the element's href content attribute.
26
+ return node.attribute('href').andand.value if ['a', 'area', 'link'].include? node.name
27
+
28
+ # If the element is an object element
29
+ # The attribute must act as it would if it was reflecting the element's data content attribute.
30
+ return node.attribute('data').andand.value if node.name == 'object'
31
+
32
+ # If the element is a time element with a datetime attribute
33
+ # The attribute must act as it would if it was reflecting the element's datetime content attribute.
34
+ return node.attribute('datetime').andand.value if node.name == 'time' && node.attribute('datetime')
35
+
36
+ # Otherwise
37
+ # The attribute must act the same as the element's textContent attribute.
38
+ return node.text.chomp.strip
39
+ end
40
+ end
41
+
42
+ class ItemScope
43
+ attr_reader :type, :id
44
+
45
+ def initialize(node)
46
+ @type = attr 'itemtype', node
47
+ @id = attr 'itemid', node
48
+ @properties = {}
49
+
50
+ parse_elements node.search './*'
51
+ end
52
+
53
+ def [](name)
54
+ @properties[name]
55
+ end
56
+
57
+ private
58
+ def attr(name, node)
59
+ val = node.attribute name
60
+ val ? val.value : nil
61
+ end
62
+
63
+ def parse_elements(elements)
64
+ elements.each do |el|
65
+ itemprop = attr('itemprop', el)
66
+ prop = ItemProp.parse el
67
+
68
+ if prop
69
+ @properties[itemprop] ||= []
70
+ @properties[itemprop] << prop
71
+ else
72
+ parse_elements el.search('./*')
73
+ end
74
+ end
75
+ end
76
+ end
77
+
78
+ class ItemDocument
79
+ def initialize(scopes)
80
+ @scopes = scopes
81
+ end
82
+
83
+ def products
84
+ @scopes.select { |x| x.type == 'http://schema.org/Product' }
85
+ end
86
+ end
87
+
88
+ class SchemaOrg
89
+ def self.parse(html)
90
+ html = Nokogiri::HTML.parse html unless html.respond_to? :search
91
+ scopes = html.search('//*[@itemscope and not(@itemprop)]')
92
+ .map { |node| ItemScope.new node }
93
+
94
+ if scopes.any?
95
+ ItemDocument.new scopes
96
+ else
97
+ nil
98
+ end
99
+ end
100
+ end
101
+ end
@@ -0,0 +1,3 @@
1
+ module Microformat
2
+ VERSION = '0.0.1'
3
+ end
@@ -0,0 +1,26 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'microformat/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = 'otg-microformat'
8
+ spec.version = Microformat::VERSION
9
+ spec.authors = ['On the Game, James Gregory']
10
+ spec.email = ['james@onthegame.com.au']
11
+ spec.description = 'A basic microformat parser for Ruby.'
12
+ spec.summary = 'Simple Microformat parser'
13
+ spec.homepage = 'http://www.onthegame.com.au/about/opensource'
14
+ spec.license = 'MIT'
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ['lib']
20
+
21
+ spec.add_dependency 'nokogiri'
22
+ spec.add_dependency 'andand'
23
+
24
+ spec.add_development_dependency 'bundler', '~> 1.3'
25
+ spec.add_development_dependency 'rake'
26
+ end
metadata ADDED
@@ -0,0 +1,124 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: otg-microformat
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - On the Game, James Gregory
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-05-05 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: nokogiri
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: andand
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: bundler
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ~>
52
+ - !ruby/object:Gem::Version
53
+ version: '1.3'
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: '1.3'
62
+ - !ruby/object:Gem::Dependency
63
+ name: rake
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ type: :development
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ description: A basic microformat parser for Ruby.
79
+ email:
80
+ - james@onthegame.com.au
81
+ executables: []
82
+ extensions: []
83
+ extra_rdoc_files: []
84
+ files:
85
+ - .gitignore
86
+ - Gemfile
87
+ - LICENSE.txt
88
+ - README.md
89
+ - Rakefile
90
+ - lib/microformat.rb
91
+ - lib/microformat/version.rb
92
+ - microformat.gemspec
93
+ homepage: http://www.onthegame.com.au/about/opensource
94
+ licenses:
95
+ - MIT
96
+ post_install_message:
97
+ rdoc_options: []
98
+ require_paths:
99
+ - lib
100
+ required_ruby_version: !ruby/object:Gem::Requirement
101
+ none: false
102
+ requirements:
103
+ - - ! '>='
104
+ - !ruby/object:Gem::Version
105
+ version: '0'
106
+ segments:
107
+ - 0
108
+ hash: -164261226516133318
109
+ required_rubygems_version: !ruby/object:Gem::Requirement
110
+ none: false
111
+ requirements:
112
+ - - ! '>='
113
+ - !ruby/object:Gem::Version
114
+ version: '0'
115
+ segments:
116
+ - 0
117
+ hash: -164261226516133318
118
+ requirements: []
119
+ rubyforge_project:
120
+ rubygems_version: 1.8.25
121
+ signing_key:
122
+ specification_version: 3
123
+ summary: Simple Microformat parser
124
+ test_files: []