hmachine 0.0.1 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (69) hide show
  1. data/.gitignore +2 -2
  2. data/Gemfile +6 -4
  3. data/Gemfile.lock +51 -0
  4. data/README.md +123 -9
  5. data/Rakefile +12 -3
  6. data/bin/hmachine +99 -0
  7. data/hmachine.gemspec +132 -0
  8. data/lib/hmachine.rb +121 -12
  9. data/lib/hmachine/microformat.rb +39 -20
  10. data/lib/hmachine/microformat/adr.rb +22 -0
  11. data/lib/hmachine/microformat/geo.rb +48 -0
  12. data/lib/hmachine/microformat/hcard.rb +169 -11
  13. data/lib/hmachine/microformat/rellicense.rb +20 -0
  14. data/lib/hmachine/microformat/reltag.rb +38 -0
  15. data/lib/hmachine/microformat/votelinks.rb +42 -0
  16. data/lib/hmachine/microformat/xfn.rb +54 -0
  17. data/lib/hmachine/microformat/xmdp.rb +14 -0
  18. data/lib/hmachine/microformat/xoxo.rb +69 -0
  19. data/lib/hmachine/pattern.rb +26 -0
  20. data/lib/hmachine/pattern/abbr.rb +21 -0
  21. data/lib/hmachine/pattern/datetime.rb +75 -0
  22. data/lib/hmachine/pattern/typevalue.rb +32 -0
  23. data/lib/hmachine/pattern/url.rb +32 -0
  24. data/lib/hmachine/pattern/valueclass.rb +51 -0
  25. data/lib/hmachine/posh.rb +3 -0
  26. data/lib/hmachine/posh/anchor.rb +40 -0
  27. data/lib/hmachine/posh/base.rb +204 -0
  28. data/lib/hmachine/posh/definition_list.rb +41 -0
  29. data/test/fixtures/huffduffer.html +466 -0
  30. data/test/fixtures/likeorhate.html +48 -0
  31. data/test/fixtures/rel_license.html +4 -0
  32. data/test/fixtures/test-fixture/hcard/hcard1.html +147 -0
  33. data/test/fixtures/test-fixture/hcard/hcard11.html +123 -0
  34. data/test/fixtures/test-fixture/hcard/hcard12.html +178 -0
  35. data/test/fixtures/test-fixture/hcard/hcard17.html +165 -0
  36. data/test/fixtures/test-fixture/hcard/hcard2.html +264 -0
  37. data/test/fixtures/test-fixture/hcard/hcard3.html +144 -0
  38. data/test/fixtures/test-fixture/hcard/hcard4.html +117 -0
  39. data/test/fixtures/test-fixture/hcard/hcard5.html +119 -0
  40. data/test/fixtures/test-fixture/hcard/hcard6.html +188 -0
  41. data/test/fixtures/test-fixture/hcard/hcard7.html +188 -0
  42. data/test/fixtures/test-fixture/hcard/hcard8.html +130 -0
  43. data/test/fixtures/test-fixture/hcard/hcard9.html +111 -0
  44. data/test/fixtures/test-fixture/hcard/hcard99.html +215 -0
  45. data/test/fixtures/test-fixture/value-class-date-time/value-dt-test-YYYY-MM-DD--HH-MM.html +9 -0
  46. data/test/fixtures/test-fixture/value-class-date-time/value-dt-test-abbr-YYYY-MM-DD--HH-MM.html +4 -0
  47. data/test/fixtures/xfn.html +198 -0
  48. data/test/fixtures/xmdp.html +32 -0
  49. data/test/fixtures/xoxo.html +51 -0
  50. data/test/hmachine_test.rb +122 -6
  51. data/test/microformat/adr_test.rb +47 -0
  52. data/test/microformat/geo_test.rb +66 -0
  53. data/test/microformat/hcard_test.rb +487 -20
  54. data/test/microformat/rellicense_test.rb +36 -0
  55. data/test/microformat/reltag_test.rb +61 -0
  56. data/test/microformat/votelinks_test.rb +44 -0
  57. data/test/microformat/xfn_test.rb +28 -0
  58. data/test/microformat/xmdp_test.rb +16 -0
  59. data/test/microformat/xoxo_test.rb +51 -0
  60. data/test/microformat_test.rb +12 -34
  61. data/test/pattern/date_time_test.rb +55 -0
  62. data/test/pattern/value_class_test.rb +33 -0
  63. data/test/pattern_test.rb +132 -0
  64. data/test/posh/anchor_test.rb +41 -0
  65. data/test/posh/base_test.rb +150 -0
  66. data/test/posh/definition_list_test.rb +38 -0
  67. data/test/test_helper.rb +24 -6
  68. metadata +93 -15
  69. data/lib/hmachine/microformat/base.rb +0 -17
@@ -0,0 +1,20 @@
1
+ module HMachine
2
+ module Microformat
3
+ class RelLicense < POSH::Anchor
4
+ FRIENDLY_NAME = "rel-license"
5
+ WIKI_URL = 'http://microformats.org/wiki/rel-license'
6
+ XMDP = 'http://microformats.org/profile/rel-license'
7
+
8
+ selector 'a[rel~="license"], link[rel~="license"]'
9
+
10
+ validate {|a| a['rel'] && a['rel'].split.include?('license') }
11
+
12
+ alias license url
13
+
14
+ def to_s
15
+ license
16
+ end
17
+
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,38 @@
1
+ module HMachine
2
+ module Microformat
3
+ class RelTag < POSH::Anchor
4
+ FRIENDLY_NAME = "rel-tag"
5
+ WIKI_URL = "http://microformats.org/wiki/rel-tag"
6
+ XMDP = 'http://microformats.org/profile/rel-tag'
7
+
8
+ selector 'a[rel~="tag"]'
9
+
10
+ validate {|a| a['rel'] && a['rel'].split.include?('tag') }
11
+
12
+ def tag
13
+ @tag ||= { node['href'].split('/').last => node['href'] }
14
+ end
15
+
16
+ def name
17
+ tag.keys.to_s
18
+ end
19
+
20
+ def to_s
21
+ name
22
+ end
23
+
24
+ def url
25
+ tag.values.to_s
26
+ end
27
+
28
+ def to_h
29
+ tag
30
+ end
31
+
32
+ def inspect
33
+ "<#{self.class}:#{hash}: '#{tag}'>"
34
+ end
35
+
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,42 @@
1
+ module HMachine
2
+ module Microformat
3
+ class VoteLinks < POSH::Anchor
4
+ FRIENDLY_NAME = "VoteLinks"
5
+ WIKI_URL = 'http://microformats.org/wiki/vote-links'
6
+ XMDP = 'http://microformats.org/profile/vote-links'
7
+
8
+ selector 'a[rev~="vote-for"], a[rev~="vote-against"], a[rev~="vote-abstain"]'
9
+
10
+ validate do |a|
11
+ return false unless a['rev']
12
+ !%w(vote-for vote-against vote-abstain).reject { |vote|
13
+ a['rev'].split.include?(vote)
14
+ }.empty?
15
+ end
16
+
17
+ def vote
18
+ @vote ||= { type => [url, title].compact }
19
+ end
20
+
21
+ def type
22
+ vote_type = node['rev'].split(' ').reject do |vote|
23
+ vote.index('vote-') != 0
24
+ end
25
+ vote_type.first
26
+ end
27
+
28
+ def for?
29
+ type == 'vote-for'
30
+ end
31
+
32
+ def against?
33
+ type == 'vote-against'
34
+ end
35
+
36
+ def abstain?
37
+ type == 'vote-abstain'
38
+ end
39
+
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,54 @@
1
+ module HMachine
2
+ module Microformat
3
+ class XFN < POSH::Anchor
4
+ FRIENDLY_NAME = "XFN"
5
+ WIKI_URL = 'http://microformats.org/wiki/XFN'
6
+ XMDP = 'http://gmpg.org/xfn/11'
7
+
8
+ @@friendship = %w( contact acquaintance friend)
9
+ @@physical = %w( met )
10
+ @@professional = %w( co-worker colleague )
11
+ @@geographical = %w( co-resident neighbor )
12
+ @@family = %w( child parent sibling spouse kin )
13
+ @@romantic = %w( muse crush date sweetheart )
14
+ @@identity = %w( me )
15
+
16
+ @@relationships = @@friendship + @@physical + @@professional + @@geographical + @@family + @@romantic + @@identity
17
+
18
+ search do |doc|
19
+ doc.css @@relationships.collect {|rel| "a[rel~='#{rel}']" }.join(', ')
20
+ end
21
+
22
+ validate do |a|
23
+ return false unless a['rel']
24
+ !@@relationships.reject { |rel| a['rel'].split.include?(rel) }.empty?
25
+ end
26
+
27
+ # Performant way to parse identity relationships
28
+ def self.parse_me(document)
29
+ nodes = document.css("a[rel~='me']")
30
+ if !nodes.empty?
31
+ contents = nodes.collect do |node|
32
+ extract_from(node)
33
+ end
34
+ (contents.length == 1) ? contents.first : contents
35
+ end
36
+ end
37
+
38
+ %w(friendship physical professional geographical family romantic identity).each do |type|
39
+ class_eval %Q{
40
+ def #{type}?
41
+ !(@@#{type} & rel).empty?
42
+ end
43
+ }
44
+ end
45
+ alias me? identity?
46
+ alias met? physical?
47
+
48
+ def inspect
49
+ "<#{self.class}:#{hash}: '#{rel.join(', ')}'>"
50
+ end
51
+
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,14 @@
1
+ module HMachine
2
+ module Microformat
3
+ class XMDP < POSH::DefinitionList
4
+ FRIENDLY_NAME = "XMDP"
5
+ WIKI_URL = 'http://microformats.org/wiki/XMDP'
6
+ XMDP = 'http://gmpg.org/xmdp/1'
7
+
8
+ search {|doc| doc.css('dl.profile') }
9
+
10
+ validate {|dl| dl.matches?('dl.profile') }
11
+
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,69 @@
1
+ module HMachine
2
+ module Microformat
3
+ class XOXO < POSH::Base
4
+ FRIENDLY_NAME = "XOXO"
5
+ WIKI_URL = 'http://microformats.org/wiki/xoxo'
6
+ XMDP = 'http://microformats.org/profile/xoxo'
7
+
8
+ selector 'ol.xoxo, ul.xoxo, ol.blogroll, ul.blogroll'
9
+
10
+ # Seriously ugly WTF
11
+ def self.build_outline(node)
12
+ tree = []
13
+ node.children.each do |child|
14
+ if child.elem? &&
15
+ case child.node_name
16
+ when 'li'
17
+ if child.children.select {|li| li.elem? }.empty?
18
+ tree = tree | build_outline(child)
19
+ else
20
+ tree << build_outline(child)
21
+ end
22
+ when 'ol', 'ul'
23
+ tree << build_outline(child)
24
+ when 'dl'
25
+ definition_list = {}
26
+ keys = child.css('dt')
27
+ keys.each do |key|
28
+ definition = key.next_element if key.next_element.node_name.eql?('dd')
29
+ definition_contents = definition.children.select {|dd| dd.elem? }
30
+ definition_list.merge!({ key.content.strip => (definition_contents.empty? ? definition.content.to_s : build_outline(definition)) })
31
+ end
32
+ tree << definition_list
33
+ when 'a'
34
+ link = { :url => child['href'], :text => child.content.strip }
35
+ link[:rel] = child['rel'].split(' ') if child['rel']
36
+ link[:type] = child['type'] if child['type']
37
+ link[:title] = child['title'] if child['title']
38
+ tree << link
39
+ else
40
+ tree << child.content.strip
41
+ end
42
+ elsif (child.text? && !child.content.strip.empty?)
43
+ tree << child.content.strip
44
+ end
45
+ end
46
+ tree
47
+ end
48
+
49
+ def outline
50
+ @outline ||= self.class.build_outline(node)
51
+ end
52
+
53
+ def to_a
54
+ outline
55
+ end
56
+
57
+ def [](index)
58
+ outline[index]
59
+ end
60
+
61
+ def blogroll?
62
+ node['class'].split.include?('blogroll')
63
+ end
64
+
65
+
66
+
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,26 @@
1
+ require 'hmachine/pattern/url'
2
+ require 'hmachine/pattern/datetime'
3
+ require 'hmachine/pattern/abbr'
4
+ require 'hmachine/pattern/valueclass'
5
+ require 'hmachine/pattern/typevalue'
6
+
7
+ module HMachine
8
+ module Pattern
9
+
10
+ def self.map(name)
11
+ case HMachine.normalize(name)
12
+ when :value_class, :valueclass
13
+ HMachine::Pattern::ValueClass
14
+ when :abbr
15
+ HMachine::Pattern::Abbr
16
+ when :uri, :url
17
+ HMachine::Pattern::URL
18
+ when :typevalue
19
+ HMachine::Pattern::TypeValue
20
+ else
21
+ raise "#{name} is not a recognized markup design pattern."
22
+ end
23
+ end
24
+
25
+ end
26
+ end
@@ -0,0 +1,21 @@
1
+ module HMachine
2
+ module Pattern
3
+ module Abbr
4
+ extend HMachine
5
+ WIKI_URL = 'http://microformats.org/wiki/abbr-design-pattern'
6
+
7
+ search {|element| element.css('abbr[title]') }
8
+
9
+ validate {|abbr| abbr.node_name.eql?('abbr') && abbr['title'] }
10
+
11
+ extract do |node|
12
+ if valid?(node)
13
+ DateTime.valid?(node['title']) ? DateTime.extract_from(node['title']) : node['title']
14
+ else
15
+ node.content.strip
16
+ end
17
+ end
18
+
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,75 @@
1
+ require 'time'
2
+
3
+ module HMachine
4
+ module Pattern
5
+ module DateTime
6
+ extend HMachine
7
+
8
+ # Is this string a simple date?
9
+ def self.date?(string)
10
+ !date(string).nil?
11
+ end
12
+
13
+ # Is this string a simple time?
14
+ def self.time?(string)
15
+ !time(string).nil?
16
+ end
17
+
18
+ # Normalize ISO8601 Dates
19
+ def self.date(datestring)
20
+ datetime = Date._parse(datestring)
21
+ if !datetime.empty? && datetime[:year] && (datetime[:mon] || datetime[:yday])
22
+ local = Time.now
23
+ year = datetime[:year] || local.year
24
+ if datetime[:yday]
25
+ ordinal = Date.ordinal(year, datetime[:yday]) rescue nil
26
+ if ordinal
27
+ month = ordinal.month
28
+ day = ordinal.day
29
+ end
30
+ else
31
+ month = datetime[:mon] || local.month
32
+ day = datetime[:mday] || 1
33
+ end
34
+ "#{year}-#{month}-#{day}" if (month && day)
35
+ end
36
+ end
37
+
38
+ # Normalize ISO8601 Times
39
+ def self.time(timestring)
40
+ datetime = Date._parse(timestring)
41
+ if !datetime.empty? && datetime[:hour]
42
+ local = Time.now
43
+ hour = datetime[:hour]
44
+ min = datetime[:min] || 0
45
+ sec = datetime[:sec] || 0
46
+ zone = datetime[:zone] || local.utc_offset
47
+ "T#{hour}:#{min}:#{sec}#{zone}"
48
+ end
49
+ end
50
+
51
+ # Build a normalized iso8601 datetime string
52
+ def self.iso8601(datetime)
53
+ datestamp = date(datetime) || ''
54
+ timestamp = time(datetime) || ''
55
+ datestamp + timestamp
56
+ end
57
+
58
+ validate do |datetime|
59
+ if !iso8601(datetime).empty?
60
+ begin
61
+ Time.parse(iso8601(datetime)).respond_to?(:iso8601)
62
+ rescue ArgumentError
63
+ # An out-of-bounds error means a false positive
64
+ false
65
+ end
66
+ end
67
+ end
68
+
69
+ extract do |datetime|
70
+ Time.parse(iso8601(datetime))
71
+ end
72
+
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,32 @@
1
+ module HMachine
2
+ module Pattern
3
+ module TypeValue
4
+ extend HMachine
5
+
6
+ search do |doc|
7
+ doc.css('.type').reject {|type| type.parent.matches?('.type') }
8
+ end
9
+
10
+ extract do |node|
11
+ if found_in?(node)
12
+ types_and_values = {}
13
+ element = find_in(node)
14
+ types = element.collect {|type| HMachine.normalize Pattern::ValueClass.extract_from(type.unlink) }
15
+ types = (types.length == 1) ? types.first : types
16
+ {:type => types, :value => get_value(node)}
17
+ else
18
+ get_value(node)
19
+ end
20
+ end
21
+
22
+ def self.get_value(node)
23
+ if Pattern::URL.valid?(node)
24
+ Pattern::URL.extract_from(node)
25
+ else
26
+ Pattern::ValueClass.extract_from(node)
27
+ end
28
+ end
29
+
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,32 @@
1
+ module HMachine
2
+ module Pattern
3
+ module URL
4
+ extend HMachine
5
+
6
+ validate {|node| node.matches?("a[href] ,area[href], img[src], object[data]") }
7
+
8
+ extract do |url|
9
+ if valid?(url)
10
+ value = if (url.node_name.eql?('a') || url.node_name.eql?('area'))
11
+ url['href']
12
+ elsif url.node_name.eql?('img')
13
+ url['src']
14
+ elsif url.node_name.eql?('object')
15
+ url['data']
16
+ end
17
+ normalize(value) if value
18
+ end
19
+ end
20
+
21
+ def self.normalize(url)
22
+ uri = URI.parse(url).normalize.to_s
23
+ if uri.index('mailto:').eql?(0)
24
+ email = uri.split('mailto:')[1].split('?').first
25
+ else
26
+ uri
27
+ end
28
+ end
29
+
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,51 @@
1
+ module HMachine
2
+ module Pattern
3
+ module ValueClass
4
+ extend HMachine
5
+ WIKI_URL = 'http://microformats.org/wiki/value-class-pattern'
6
+
7
+ search do |element|
8
+ element.css('.value, .value-title[title]').reject {|val| val.parent.matches?('.value') }
9
+ end
10
+
11
+ validate {|value| value.matches?('.value, .value-title[title]') }
12
+
13
+ extract do |node|
14
+ if found_in?(node)
15
+ values = get_values(node)
16
+ normalize_values = values.collect { |val| DateTime.valid?(val) ? DateTime.iso8601(val) : val }.join
17
+ DateTime.valid?(normalize_values) ? DateTime.extract_from(normalize_values) : normalize_values
18
+ elsif Abbr.valid?(node)
19
+ Abbr.extract_from(node)
20
+ else
21
+ get_text(node)
22
+ end
23
+ end
24
+
25
+ def self.get_values(node)
26
+ find_in(node).collect do |val|
27
+ if ((val.node_name.eql?('img') || val.node_name.eql?('area')) && val['alt'])
28
+ val['alt'].strip
29
+ elsif (val.node_name.eql?('object') && val['data'])
30
+ val['data'].strip
31
+ elsif (Abbr.valid?(val) || val.matches?('.value-title'))
32
+ val['title'].strip
33
+ else
34
+ val.content.strip
35
+ end
36
+ end
37
+ end
38
+
39
+ def self.get_text(node)
40
+ if ((node.node_name.eql?('img') || node.node_name.eql?('area')) && node['alt'])
41
+ node['alt'].strip
42
+ elsif (node.node_name.eql?('object') && node['data'])
43
+ node['data'].strip
44
+ else
45
+ node.content.strip
46
+ end
47
+ end
48
+
49
+ end
50
+ end
51
+ end