hmachine 0.0.1 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +2 -2
- data/Gemfile +6 -4
- data/Gemfile.lock +51 -0
- data/README.md +123 -9
- data/Rakefile +12 -3
- data/bin/hmachine +99 -0
- data/hmachine.gemspec +132 -0
- data/lib/hmachine.rb +121 -12
- data/lib/hmachine/microformat.rb +39 -20
- data/lib/hmachine/microformat/adr.rb +22 -0
- data/lib/hmachine/microformat/geo.rb +48 -0
- data/lib/hmachine/microformat/hcard.rb +169 -11
- data/lib/hmachine/microformat/rellicense.rb +20 -0
- data/lib/hmachine/microformat/reltag.rb +38 -0
- data/lib/hmachine/microformat/votelinks.rb +42 -0
- data/lib/hmachine/microformat/xfn.rb +54 -0
- data/lib/hmachine/microformat/xmdp.rb +14 -0
- data/lib/hmachine/microformat/xoxo.rb +69 -0
- data/lib/hmachine/pattern.rb +26 -0
- data/lib/hmachine/pattern/abbr.rb +21 -0
- data/lib/hmachine/pattern/datetime.rb +75 -0
- data/lib/hmachine/pattern/typevalue.rb +32 -0
- data/lib/hmachine/pattern/url.rb +32 -0
- data/lib/hmachine/pattern/valueclass.rb +51 -0
- data/lib/hmachine/posh.rb +3 -0
- data/lib/hmachine/posh/anchor.rb +40 -0
- data/lib/hmachine/posh/base.rb +204 -0
- data/lib/hmachine/posh/definition_list.rb +41 -0
- data/test/fixtures/huffduffer.html +466 -0
- data/test/fixtures/likeorhate.html +48 -0
- data/test/fixtures/rel_license.html +4 -0
- data/test/fixtures/test-fixture/hcard/hcard1.html +147 -0
- data/test/fixtures/test-fixture/hcard/hcard11.html +123 -0
- data/test/fixtures/test-fixture/hcard/hcard12.html +178 -0
- data/test/fixtures/test-fixture/hcard/hcard17.html +165 -0
- data/test/fixtures/test-fixture/hcard/hcard2.html +264 -0
- data/test/fixtures/test-fixture/hcard/hcard3.html +144 -0
- data/test/fixtures/test-fixture/hcard/hcard4.html +117 -0
- data/test/fixtures/test-fixture/hcard/hcard5.html +119 -0
- data/test/fixtures/test-fixture/hcard/hcard6.html +188 -0
- data/test/fixtures/test-fixture/hcard/hcard7.html +188 -0
- data/test/fixtures/test-fixture/hcard/hcard8.html +130 -0
- data/test/fixtures/test-fixture/hcard/hcard9.html +111 -0
- data/test/fixtures/test-fixture/hcard/hcard99.html +215 -0
- data/test/fixtures/test-fixture/value-class-date-time/value-dt-test-YYYY-MM-DD--HH-MM.html +9 -0
- data/test/fixtures/test-fixture/value-class-date-time/value-dt-test-abbr-YYYY-MM-DD--HH-MM.html +4 -0
- data/test/fixtures/xfn.html +198 -0
- data/test/fixtures/xmdp.html +32 -0
- data/test/fixtures/xoxo.html +51 -0
- data/test/hmachine_test.rb +122 -6
- data/test/microformat/adr_test.rb +47 -0
- data/test/microformat/geo_test.rb +66 -0
- data/test/microformat/hcard_test.rb +487 -20
- data/test/microformat/rellicense_test.rb +36 -0
- data/test/microformat/reltag_test.rb +61 -0
- data/test/microformat/votelinks_test.rb +44 -0
- data/test/microformat/xfn_test.rb +28 -0
- data/test/microformat/xmdp_test.rb +16 -0
- data/test/microformat/xoxo_test.rb +51 -0
- data/test/microformat_test.rb +12 -34
- data/test/pattern/date_time_test.rb +55 -0
- data/test/pattern/value_class_test.rb +33 -0
- data/test/pattern_test.rb +132 -0
- data/test/posh/anchor_test.rb +41 -0
- data/test/posh/base_test.rb +150 -0
- data/test/posh/definition_list_test.rb +38 -0
- data/test/test_helper.rb +24 -6
- metadata +93 -15
- data/lib/hmachine/microformat/base.rb +0 -17
@@ -0,0 +1,20 @@
|
|
1
|
+
module HMachine
|
2
|
+
module Microformat
|
3
|
+
class RelLicense < POSH::Anchor
|
4
|
+
FRIENDLY_NAME = "rel-license"
|
5
|
+
WIKI_URL = 'http://microformats.org/wiki/rel-license'
|
6
|
+
XMDP = 'http://microformats.org/profile/rel-license'
|
7
|
+
|
8
|
+
selector 'a[rel~="license"], link[rel~="license"]'
|
9
|
+
|
10
|
+
validate {|a| a['rel'] && a['rel'].split.include?('license') }
|
11
|
+
|
12
|
+
alias license url
|
13
|
+
|
14
|
+
def to_s
|
15
|
+
license
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
module HMachine
|
2
|
+
module Microformat
|
3
|
+
class RelTag < POSH::Anchor
|
4
|
+
FRIENDLY_NAME = "rel-tag"
|
5
|
+
WIKI_URL = "http://microformats.org/wiki/rel-tag"
|
6
|
+
XMDP = 'http://microformats.org/profile/rel-tag'
|
7
|
+
|
8
|
+
selector 'a[rel~="tag"]'
|
9
|
+
|
10
|
+
validate {|a| a['rel'] && a['rel'].split.include?('tag') }
|
11
|
+
|
12
|
+
def tag
|
13
|
+
@tag ||= { node['href'].split('/').last => node['href'] }
|
14
|
+
end
|
15
|
+
|
16
|
+
def name
|
17
|
+
tag.keys.to_s
|
18
|
+
end
|
19
|
+
|
20
|
+
def to_s
|
21
|
+
name
|
22
|
+
end
|
23
|
+
|
24
|
+
def url
|
25
|
+
tag.values.to_s
|
26
|
+
end
|
27
|
+
|
28
|
+
def to_h
|
29
|
+
tag
|
30
|
+
end
|
31
|
+
|
32
|
+
def inspect
|
33
|
+
"<#{self.class}:#{hash}: '#{tag}'>"
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
module HMachine
|
2
|
+
module Microformat
|
3
|
+
class VoteLinks < POSH::Anchor
|
4
|
+
FRIENDLY_NAME = "VoteLinks"
|
5
|
+
WIKI_URL = 'http://microformats.org/wiki/vote-links'
|
6
|
+
XMDP = 'http://microformats.org/profile/vote-links'
|
7
|
+
|
8
|
+
selector 'a[rev~="vote-for"], a[rev~="vote-against"], a[rev~="vote-abstain"]'
|
9
|
+
|
10
|
+
validate do |a|
|
11
|
+
return false unless a['rev']
|
12
|
+
!%w(vote-for vote-against vote-abstain).reject { |vote|
|
13
|
+
a['rev'].split.include?(vote)
|
14
|
+
}.empty?
|
15
|
+
end
|
16
|
+
|
17
|
+
def vote
|
18
|
+
@vote ||= { type => [url, title].compact }
|
19
|
+
end
|
20
|
+
|
21
|
+
def type
|
22
|
+
vote_type = node['rev'].split(' ').reject do |vote|
|
23
|
+
vote.index('vote-') != 0
|
24
|
+
end
|
25
|
+
vote_type.first
|
26
|
+
end
|
27
|
+
|
28
|
+
def for?
|
29
|
+
type == 'vote-for'
|
30
|
+
end
|
31
|
+
|
32
|
+
def against?
|
33
|
+
type == 'vote-against'
|
34
|
+
end
|
35
|
+
|
36
|
+
def abstain?
|
37
|
+
type == 'vote-abstain'
|
38
|
+
end
|
39
|
+
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
module HMachine
|
2
|
+
module Microformat
|
3
|
+
class XFN < POSH::Anchor
|
4
|
+
FRIENDLY_NAME = "XFN"
|
5
|
+
WIKI_URL = 'http://microformats.org/wiki/XFN'
|
6
|
+
XMDP = 'http://gmpg.org/xfn/11'
|
7
|
+
|
8
|
+
@@friendship = %w( contact acquaintance friend)
|
9
|
+
@@physical = %w( met )
|
10
|
+
@@professional = %w( co-worker colleague )
|
11
|
+
@@geographical = %w( co-resident neighbor )
|
12
|
+
@@family = %w( child parent sibling spouse kin )
|
13
|
+
@@romantic = %w( muse crush date sweetheart )
|
14
|
+
@@identity = %w( me )
|
15
|
+
|
16
|
+
@@relationships = @@friendship + @@physical + @@professional + @@geographical + @@family + @@romantic + @@identity
|
17
|
+
|
18
|
+
search do |doc|
|
19
|
+
doc.css @@relationships.collect {|rel| "a[rel~='#{rel}']" }.join(', ')
|
20
|
+
end
|
21
|
+
|
22
|
+
validate do |a|
|
23
|
+
return false unless a['rel']
|
24
|
+
!@@relationships.reject { |rel| a['rel'].split.include?(rel) }.empty?
|
25
|
+
end
|
26
|
+
|
27
|
+
# Performant way to parse identity relationships
|
28
|
+
def self.parse_me(document)
|
29
|
+
nodes = document.css("a[rel~='me']")
|
30
|
+
if !nodes.empty?
|
31
|
+
contents = nodes.collect do |node|
|
32
|
+
extract_from(node)
|
33
|
+
end
|
34
|
+
(contents.length == 1) ? contents.first : contents
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
%w(friendship physical professional geographical family romantic identity).each do |type|
|
39
|
+
class_eval %Q{
|
40
|
+
def #{type}?
|
41
|
+
!(@@#{type} & rel).empty?
|
42
|
+
end
|
43
|
+
}
|
44
|
+
end
|
45
|
+
alias me? identity?
|
46
|
+
alias met? physical?
|
47
|
+
|
48
|
+
def inspect
|
49
|
+
"<#{self.class}:#{hash}: '#{rel.join(', ')}'>"
|
50
|
+
end
|
51
|
+
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
module HMachine
|
2
|
+
module Microformat
|
3
|
+
class XMDP < POSH::DefinitionList
|
4
|
+
FRIENDLY_NAME = "XMDP"
|
5
|
+
WIKI_URL = 'http://microformats.org/wiki/XMDP'
|
6
|
+
XMDP = 'http://gmpg.org/xmdp/1'
|
7
|
+
|
8
|
+
search {|doc| doc.css('dl.profile') }
|
9
|
+
|
10
|
+
validate {|dl| dl.matches?('dl.profile') }
|
11
|
+
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,69 @@
|
|
1
|
+
module HMachine
|
2
|
+
module Microformat
|
3
|
+
class XOXO < POSH::Base
|
4
|
+
FRIENDLY_NAME = "XOXO"
|
5
|
+
WIKI_URL = 'http://microformats.org/wiki/xoxo'
|
6
|
+
XMDP = 'http://microformats.org/profile/xoxo'
|
7
|
+
|
8
|
+
selector 'ol.xoxo, ul.xoxo, ol.blogroll, ul.blogroll'
|
9
|
+
|
10
|
+
# Seriously ugly WTF
|
11
|
+
def self.build_outline(node)
|
12
|
+
tree = []
|
13
|
+
node.children.each do |child|
|
14
|
+
if child.elem? &&
|
15
|
+
case child.node_name
|
16
|
+
when 'li'
|
17
|
+
if child.children.select {|li| li.elem? }.empty?
|
18
|
+
tree = tree | build_outline(child)
|
19
|
+
else
|
20
|
+
tree << build_outline(child)
|
21
|
+
end
|
22
|
+
when 'ol', 'ul'
|
23
|
+
tree << build_outline(child)
|
24
|
+
when 'dl'
|
25
|
+
definition_list = {}
|
26
|
+
keys = child.css('dt')
|
27
|
+
keys.each do |key|
|
28
|
+
definition = key.next_element if key.next_element.node_name.eql?('dd')
|
29
|
+
definition_contents = definition.children.select {|dd| dd.elem? }
|
30
|
+
definition_list.merge!({ key.content.strip => (definition_contents.empty? ? definition.content.to_s : build_outline(definition)) })
|
31
|
+
end
|
32
|
+
tree << definition_list
|
33
|
+
when 'a'
|
34
|
+
link = { :url => child['href'], :text => child.content.strip }
|
35
|
+
link[:rel] = child['rel'].split(' ') if child['rel']
|
36
|
+
link[:type] = child['type'] if child['type']
|
37
|
+
link[:title] = child['title'] if child['title']
|
38
|
+
tree << link
|
39
|
+
else
|
40
|
+
tree << child.content.strip
|
41
|
+
end
|
42
|
+
elsif (child.text? && !child.content.strip.empty?)
|
43
|
+
tree << child.content.strip
|
44
|
+
end
|
45
|
+
end
|
46
|
+
tree
|
47
|
+
end
|
48
|
+
|
49
|
+
def outline
|
50
|
+
@outline ||= self.class.build_outline(node)
|
51
|
+
end
|
52
|
+
|
53
|
+
def to_a
|
54
|
+
outline
|
55
|
+
end
|
56
|
+
|
57
|
+
def [](index)
|
58
|
+
outline[index]
|
59
|
+
end
|
60
|
+
|
61
|
+
def blogroll?
|
62
|
+
node['class'].split.include?('blogroll')
|
63
|
+
end
|
64
|
+
|
65
|
+
|
66
|
+
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
require 'hmachine/pattern/url'
|
2
|
+
require 'hmachine/pattern/datetime'
|
3
|
+
require 'hmachine/pattern/abbr'
|
4
|
+
require 'hmachine/pattern/valueclass'
|
5
|
+
require 'hmachine/pattern/typevalue'
|
6
|
+
|
7
|
+
module HMachine
|
8
|
+
module Pattern
|
9
|
+
|
10
|
+
def self.map(name)
|
11
|
+
case HMachine.normalize(name)
|
12
|
+
when :value_class, :valueclass
|
13
|
+
HMachine::Pattern::ValueClass
|
14
|
+
when :abbr
|
15
|
+
HMachine::Pattern::Abbr
|
16
|
+
when :uri, :url
|
17
|
+
HMachine::Pattern::URL
|
18
|
+
when :typevalue
|
19
|
+
HMachine::Pattern::TypeValue
|
20
|
+
else
|
21
|
+
raise "#{name} is not a recognized markup design pattern."
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module HMachine
|
2
|
+
module Pattern
|
3
|
+
module Abbr
|
4
|
+
extend HMachine
|
5
|
+
WIKI_URL = 'http://microformats.org/wiki/abbr-design-pattern'
|
6
|
+
|
7
|
+
search {|element| element.css('abbr[title]') }
|
8
|
+
|
9
|
+
validate {|abbr| abbr.node_name.eql?('abbr') && abbr['title'] }
|
10
|
+
|
11
|
+
extract do |node|
|
12
|
+
if valid?(node)
|
13
|
+
DateTime.valid?(node['title']) ? DateTime.extract_from(node['title']) : node['title']
|
14
|
+
else
|
15
|
+
node.content.strip
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,75 @@
|
|
1
|
+
require 'time'
|
2
|
+
|
3
|
+
module HMachine
|
4
|
+
module Pattern
|
5
|
+
module DateTime
|
6
|
+
extend HMachine
|
7
|
+
|
8
|
+
# Is this string a simple date?
|
9
|
+
def self.date?(string)
|
10
|
+
!date(string).nil?
|
11
|
+
end
|
12
|
+
|
13
|
+
# Is this string a simple time?
|
14
|
+
def self.time?(string)
|
15
|
+
!time(string).nil?
|
16
|
+
end
|
17
|
+
|
18
|
+
# Normalize ISO8601 Dates
|
19
|
+
def self.date(datestring)
|
20
|
+
datetime = Date._parse(datestring)
|
21
|
+
if !datetime.empty? && datetime[:year] && (datetime[:mon] || datetime[:yday])
|
22
|
+
local = Time.now
|
23
|
+
year = datetime[:year] || local.year
|
24
|
+
if datetime[:yday]
|
25
|
+
ordinal = Date.ordinal(year, datetime[:yday]) rescue nil
|
26
|
+
if ordinal
|
27
|
+
month = ordinal.month
|
28
|
+
day = ordinal.day
|
29
|
+
end
|
30
|
+
else
|
31
|
+
month = datetime[:mon] || local.month
|
32
|
+
day = datetime[:mday] || 1
|
33
|
+
end
|
34
|
+
"#{year}-#{month}-#{day}" if (month && day)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
# Normalize ISO8601 Times
|
39
|
+
def self.time(timestring)
|
40
|
+
datetime = Date._parse(timestring)
|
41
|
+
if !datetime.empty? && datetime[:hour]
|
42
|
+
local = Time.now
|
43
|
+
hour = datetime[:hour]
|
44
|
+
min = datetime[:min] || 0
|
45
|
+
sec = datetime[:sec] || 0
|
46
|
+
zone = datetime[:zone] || local.utc_offset
|
47
|
+
"T#{hour}:#{min}:#{sec}#{zone}"
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
# Build a normalized iso8601 datetime string
|
52
|
+
def self.iso8601(datetime)
|
53
|
+
datestamp = date(datetime) || ''
|
54
|
+
timestamp = time(datetime) || ''
|
55
|
+
datestamp + timestamp
|
56
|
+
end
|
57
|
+
|
58
|
+
validate do |datetime|
|
59
|
+
if !iso8601(datetime).empty?
|
60
|
+
begin
|
61
|
+
Time.parse(iso8601(datetime)).respond_to?(:iso8601)
|
62
|
+
rescue ArgumentError
|
63
|
+
# An out-of-bounds error means a false positive
|
64
|
+
false
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
extract do |datetime|
|
70
|
+
Time.parse(iso8601(datetime))
|
71
|
+
end
|
72
|
+
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module HMachine
|
2
|
+
module Pattern
|
3
|
+
module TypeValue
|
4
|
+
extend HMachine
|
5
|
+
|
6
|
+
search do |doc|
|
7
|
+
doc.css('.type').reject {|type| type.parent.matches?('.type') }
|
8
|
+
end
|
9
|
+
|
10
|
+
extract do |node|
|
11
|
+
if found_in?(node)
|
12
|
+
types_and_values = {}
|
13
|
+
element = find_in(node)
|
14
|
+
types = element.collect {|type| HMachine.normalize Pattern::ValueClass.extract_from(type.unlink) }
|
15
|
+
types = (types.length == 1) ? types.first : types
|
16
|
+
{:type => types, :value => get_value(node)}
|
17
|
+
else
|
18
|
+
get_value(node)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def self.get_value(node)
|
23
|
+
if Pattern::URL.valid?(node)
|
24
|
+
Pattern::URL.extract_from(node)
|
25
|
+
else
|
26
|
+
Pattern::ValueClass.extract_from(node)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module HMachine
|
2
|
+
module Pattern
|
3
|
+
module URL
|
4
|
+
extend HMachine
|
5
|
+
|
6
|
+
validate {|node| node.matches?("a[href] ,area[href], img[src], object[data]") }
|
7
|
+
|
8
|
+
extract do |url|
|
9
|
+
if valid?(url)
|
10
|
+
value = if (url.node_name.eql?('a') || url.node_name.eql?('area'))
|
11
|
+
url['href']
|
12
|
+
elsif url.node_name.eql?('img')
|
13
|
+
url['src']
|
14
|
+
elsif url.node_name.eql?('object')
|
15
|
+
url['data']
|
16
|
+
end
|
17
|
+
normalize(value) if value
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def self.normalize(url)
|
22
|
+
uri = URI.parse(url).normalize.to_s
|
23
|
+
if uri.index('mailto:').eql?(0)
|
24
|
+
email = uri.split('mailto:')[1].split('?').first
|
25
|
+
else
|
26
|
+
uri
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
module HMachine
|
2
|
+
module Pattern
|
3
|
+
module ValueClass
|
4
|
+
extend HMachine
|
5
|
+
WIKI_URL = 'http://microformats.org/wiki/value-class-pattern'
|
6
|
+
|
7
|
+
search do |element|
|
8
|
+
element.css('.value, .value-title[title]').reject {|val| val.parent.matches?('.value') }
|
9
|
+
end
|
10
|
+
|
11
|
+
validate {|value| value.matches?('.value, .value-title[title]') }
|
12
|
+
|
13
|
+
extract do |node|
|
14
|
+
if found_in?(node)
|
15
|
+
values = get_values(node)
|
16
|
+
normalize_values = values.collect { |val| DateTime.valid?(val) ? DateTime.iso8601(val) : val }.join
|
17
|
+
DateTime.valid?(normalize_values) ? DateTime.extract_from(normalize_values) : normalize_values
|
18
|
+
elsif Abbr.valid?(node)
|
19
|
+
Abbr.extract_from(node)
|
20
|
+
else
|
21
|
+
get_text(node)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def self.get_values(node)
|
26
|
+
find_in(node).collect do |val|
|
27
|
+
if ((val.node_name.eql?('img') || val.node_name.eql?('area')) && val['alt'])
|
28
|
+
val['alt'].strip
|
29
|
+
elsif (val.node_name.eql?('object') && val['data'])
|
30
|
+
val['data'].strip
|
31
|
+
elsif (Abbr.valid?(val) || val.matches?('.value-title'))
|
32
|
+
val['title'].strip
|
33
|
+
else
|
34
|
+
val.content.strip
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def self.get_text(node)
|
40
|
+
if ((node.node_name.eql?('img') || node.node_name.eql?('area')) && node['alt'])
|
41
|
+
node['alt'].strip
|
42
|
+
elsif (node.node_name.eql?('object') && node['data'])
|
43
|
+
node['data'].strip
|
44
|
+
else
|
45
|
+
node.content.strip
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|