hmachine 0.0.1 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. data/.gitignore +2 -2
  2. data/Gemfile +6 -4
  3. data/Gemfile.lock +51 -0
  4. data/README.md +123 -9
  5. data/Rakefile +12 -3
  6. data/bin/hmachine +99 -0
  7. data/hmachine.gemspec +132 -0
  8. data/lib/hmachine.rb +121 -12
  9. data/lib/hmachine/microformat.rb +39 -20
  10. data/lib/hmachine/microformat/adr.rb +22 -0
  11. data/lib/hmachine/microformat/geo.rb +48 -0
  12. data/lib/hmachine/microformat/hcard.rb +169 -11
  13. data/lib/hmachine/microformat/rellicense.rb +20 -0
  14. data/lib/hmachine/microformat/reltag.rb +38 -0
  15. data/lib/hmachine/microformat/votelinks.rb +42 -0
  16. data/lib/hmachine/microformat/xfn.rb +54 -0
  17. data/lib/hmachine/microformat/xmdp.rb +14 -0
  18. data/lib/hmachine/microformat/xoxo.rb +69 -0
  19. data/lib/hmachine/pattern.rb +26 -0
  20. data/lib/hmachine/pattern/abbr.rb +21 -0
  21. data/lib/hmachine/pattern/datetime.rb +75 -0
  22. data/lib/hmachine/pattern/typevalue.rb +32 -0
  23. data/lib/hmachine/pattern/url.rb +32 -0
  24. data/lib/hmachine/pattern/valueclass.rb +51 -0
  25. data/lib/hmachine/posh.rb +3 -0
  26. data/lib/hmachine/posh/anchor.rb +40 -0
  27. data/lib/hmachine/posh/base.rb +204 -0
  28. data/lib/hmachine/posh/definition_list.rb +41 -0
  29. data/test/fixtures/huffduffer.html +466 -0
  30. data/test/fixtures/likeorhate.html +48 -0
  31. data/test/fixtures/rel_license.html +4 -0
  32. data/test/fixtures/test-fixture/hcard/hcard1.html +147 -0
  33. data/test/fixtures/test-fixture/hcard/hcard11.html +123 -0
  34. data/test/fixtures/test-fixture/hcard/hcard12.html +178 -0
  35. data/test/fixtures/test-fixture/hcard/hcard17.html +165 -0
  36. data/test/fixtures/test-fixture/hcard/hcard2.html +264 -0
  37. data/test/fixtures/test-fixture/hcard/hcard3.html +144 -0
  38. data/test/fixtures/test-fixture/hcard/hcard4.html +117 -0
  39. data/test/fixtures/test-fixture/hcard/hcard5.html +119 -0
  40. data/test/fixtures/test-fixture/hcard/hcard6.html +188 -0
  41. data/test/fixtures/test-fixture/hcard/hcard7.html +188 -0
  42. data/test/fixtures/test-fixture/hcard/hcard8.html +130 -0
  43. data/test/fixtures/test-fixture/hcard/hcard9.html +111 -0
  44. data/test/fixtures/test-fixture/hcard/hcard99.html +215 -0
  45. data/test/fixtures/test-fixture/value-class-date-time/value-dt-test-YYYY-MM-DD--HH-MM.html +9 -0
  46. data/test/fixtures/test-fixture/value-class-date-time/value-dt-test-abbr-YYYY-MM-DD--HH-MM.html +4 -0
  47. data/test/fixtures/xfn.html +198 -0
  48. data/test/fixtures/xmdp.html +32 -0
  49. data/test/fixtures/xoxo.html +51 -0
  50. data/test/hmachine_test.rb +122 -6
  51. data/test/microformat/adr_test.rb +47 -0
  52. data/test/microformat/geo_test.rb +66 -0
  53. data/test/microformat/hcard_test.rb +487 -20
  54. data/test/microformat/rellicense_test.rb +36 -0
  55. data/test/microformat/reltag_test.rb +61 -0
  56. data/test/microformat/votelinks_test.rb +44 -0
  57. data/test/microformat/xfn_test.rb +28 -0
  58. data/test/microformat/xmdp_test.rb +16 -0
  59. data/test/microformat/xoxo_test.rb +51 -0
  60. data/test/microformat_test.rb +12 -34
  61. data/test/pattern/date_time_test.rb +55 -0
  62. data/test/pattern/value_class_test.rb +33 -0
  63. data/test/pattern_test.rb +132 -0
  64. data/test/posh/anchor_test.rb +41 -0
  65. data/test/posh/base_test.rb +150 -0
  66. data/test/posh/definition_list_test.rb +38 -0
  67. data/test/test_helper.rb +24 -6
  68. metadata +93 -15
  69. data/lib/hmachine/microformat/base.rb +0 -17
@@ -0,0 +1,20 @@
1
+ module HMachine
2
+ module Microformat
3
+ class RelLicense < POSH::Anchor
4
+ FRIENDLY_NAME = "rel-license"
5
+ WIKI_URL = 'http://microformats.org/wiki/rel-license'
6
+ XMDP = 'http://microformats.org/profile/rel-license'
7
+
8
+ selector 'a[rel~="license"], link[rel~="license"]'
9
+
10
+ validate {|a| a['rel'] && a['rel'].split.include?('license') }
11
+
12
+ alias license url
13
+
14
+ def to_s
15
+ license
16
+ end
17
+
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,38 @@
1
+ module HMachine
2
+ module Microformat
3
+ class RelTag < POSH::Anchor
4
+ FRIENDLY_NAME = "rel-tag"
5
+ WIKI_URL = "http://microformats.org/wiki/rel-tag"
6
+ XMDP = 'http://microformats.org/profile/rel-tag'
7
+
8
+ selector 'a[rel~="tag"]'
9
+
10
+ validate {|a| a['rel'] && a['rel'].split.include?('tag') }
11
+
12
+ def tag
13
+ @tag ||= { node['href'].split('/').last => node['href'] }
14
+ end
15
+
16
+ def name
17
+ tag.keys.to_s
18
+ end
19
+
20
+ def to_s
21
+ name
22
+ end
23
+
24
+ def url
25
+ tag.values.to_s
26
+ end
27
+
28
+ def to_h
29
+ tag
30
+ end
31
+
32
+ def inspect
33
+ "<#{self.class}:#{hash}: '#{tag}'>"
34
+ end
35
+
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,42 @@
1
+ module HMachine
2
+ module Microformat
3
+ class VoteLinks < POSH::Anchor
4
+ FRIENDLY_NAME = "VoteLinks"
5
+ WIKI_URL = 'http://microformats.org/wiki/vote-links'
6
+ XMDP = 'http://microformats.org/profile/vote-links'
7
+
8
+ selector 'a[rev~="vote-for"], a[rev~="vote-against"], a[rev~="vote-abstain"]'
9
+
10
+ validate do |a|
11
+ return false unless a['rev']
12
+ !%w(vote-for vote-against vote-abstain).reject { |vote|
13
+ a['rev'].split.include?(vote)
14
+ }.empty?
15
+ end
16
+
17
+ def vote
18
+ @vote ||= { type => [url, title].compact }
19
+ end
20
+
21
+ def type
22
+ vote_type = node['rev'].split(' ').reject do |vote|
23
+ vote.index('vote-') != 0
24
+ end
25
+ vote_type.first
26
+ end
27
+
28
+ def for?
29
+ type == 'vote-for'
30
+ end
31
+
32
+ def against?
33
+ type == 'vote-against'
34
+ end
35
+
36
+ def abstain?
37
+ type == 'vote-abstain'
38
+ end
39
+
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,54 @@
1
+ module HMachine
2
+ module Microformat
3
+ class XFN < POSH::Anchor
4
+ FRIENDLY_NAME = "XFN"
5
+ WIKI_URL = 'http://microformats.org/wiki/XFN'
6
+ XMDP = 'http://gmpg.org/xfn/11'
7
+
8
+ @@friendship = %w( contact acquaintance friend)
9
+ @@physical = %w( met )
10
+ @@professional = %w( co-worker colleague )
11
+ @@geographical = %w( co-resident neighbor )
12
+ @@family = %w( child parent sibling spouse kin )
13
+ @@romantic = %w( muse crush date sweetheart )
14
+ @@identity = %w( me )
15
+
16
+ @@relationships = @@friendship + @@physical + @@professional + @@geographical + @@family + @@romantic + @@identity
17
+
18
+ search do |doc|
19
+ doc.css @@relationships.collect {|rel| "a[rel~='#{rel}']" }.join(', ')
20
+ end
21
+
22
+ validate do |a|
23
+ return false unless a['rel']
24
+ !@@relationships.reject { |rel| a['rel'].split.include?(rel) }.empty?
25
+ end
26
+
27
+ # Performant way to parse identity relationships
28
+ def self.parse_me(document)
29
+ nodes = document.css("a[rel~='me']")
30
+ if !nodes.empty?
31
+ contents = nodes.collect do |node|
32
+ extract_from(node)
33
+ end
34
+ (contents.length == 1) ? contents.first : contents
35
+ end
36
+ end
37
+
38
+ %w(friendship physical professional geographical family romantic identity).each do |type|
39
+ class_eval %Q{
40
+ def #{type}?
41
+ !(@@#{type} & rel).empty?
42
+ end
43
+ }
44
+ end
45
+ alias me? identity?
46
+ alias met? physical?
47
+
48
+ def inspect
49
+ "<#{self.class}:#{hash}: '#{rel.join(', ')}'>"
50
+ end
51
+
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,14 @@
1
+ module HMachine
2
+ module Microformat
3
+ class XMDP < POSH::DefinitionList
4
+ FRIENDLY_NAME = "XMDP"
5
+ WIKI_URL = 'http://microformats.org/wiki/XMDP'
6
+ XMDP = 'http://gmpg.org/xmdp/1'
7
+
8
+ search {|doc| doc.css('dl.profile') }
9
+
10
+ validate {|dl| dl.matches?('dl.profile') }
11
+
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,69 @@
1
+ module HMachine
2
+ module Microformat
3
+ class XOXO < POSH::Base
4
+ FRIENDLY_NAME = "XOXO"
5
+ WIKI_URL = 'http://microformats.org/wiki/xoxo'
6
+ XMDP = 'http://microformats.org/profile/xoxo'
7
+
8
+ selector 'ol.xoxo, ul.xoxo, ol.blogroll, ul.blogroll'
9
+
10
+ # Seriously ugly WTF
11
+ def self.build_outline(node)
12
+ tree = []
13
+ node.children.each do |child|
14
+ if child.elem? &&
15
+ case child.node_name
16
+ when 'li'
17
+ if child.children.select {|li| li.elem? }.empty?
18
+ tree = tree | build_outline(child)
19
+ else
20
+ tree << build_outline(child)
21
+ end
22
+ when 'ol', 'ul'
23
+ tree << build_outline(child)
24
+ when 'dl'
25
+ definition_list = {}
26
+ keys = child.css('dt')
27
+ keys.each do |key|
28
+ definition = key.next_element if key.next_element.node_name.eql?('dd')
29
+ definition_contents = definition.children.select {|dd| dd.elem? }
30
+ definition_list.merge!({ key.content.strip => (definition_contents.empty? ? definition.content.to_s : build_outline(definition)) })
31
+ end
32
+ tree << definition_list
33
+ when 'a'
34
+ link = { :url => child['href'], :text => child.content.strip }
35
+ link[:rel] = child['rel'].split(' ') if child['rel']
36
+ link[:type] = child['type'] if child['type']
37
+ link[:title] = child['title'] if child['title']
38
+ tree << link
39
+ else
40
+ tree << child.content.strip
41
+ end
42
+ elsif (child.text? && !child.content.strip.empty?)
43
+ tree << child.content.strip
44
+ end
45
+ end
46
+ tree
47
+ end
48
+
49
+ def outline
50
+ @outline ||= self.class.build_outline(node)
51
+ end
52
+
53
+ def to_a
54
+ outline
55
+ end
56
+
57
+ def [](index)
58
+ outline[index]
59
+ end
60
+
61
+ def blogroll?
62
+ node['class'].split.include?('blogroll')
63
+ end
64
+
65
+
66
+
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,26 @@
1
+ require 'hmachine/pattern/url'
2
+ require 'hmachine/pattern/datetime'
3
+ require 'hmachine/pattern/abbr'
4
+ require 'hmachine/pattern/valueclass'
5
+ require 'hmachine/pattern/typevalue'
6
+
7
+ module HMachine
8
+ module Pattern
9
+
10
+ def self.map(name)
11
+ case HMachine.normalize(name)
12
+ when :value_class, :valueclass
13
+ HMachine::Pattern::ValueClass
14
+ when :abbr
15
+ HMachine::Pattern::Abbr
16
+ when :uri, :url
17
+ HMachine::Pattern::URL
18
+ when :typevalue
19
+ HMachine::Pattern::TypeValue
20
+ else
21
+ raise "#{name} is not a recognized markup design pattern."
22
+ end
23
+ end
24
+
25
+ end
26
+ end
@@ -0,0 +1,21 @@
1
+ module HMachine
2
+ module Pattern
3
+ module Abbr
4
+ extend HMachine
5
+ WIKI_URL = 'http://microformats.org/wiki/abbr-design-pattern'
6
+
7
+ search {|element| element.css('abbr[title]') }
8
+
9
+ validate {|abbr| abbr.node_name.eql?('abbr') && abbr['title'] }
10
+
11
+ extract do |node|
12
+ if valid?(node)
13
+ DateTime.valid?(node['title']) ? DateTime.extract_from(node['title']) : node['title']
14
+ else
15
+ node.content.strip
16
+ end
17
+ end
18
+
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,75 @@
1
+ require 'time'
2
+
3
+ module HMachine
4
+ module Pattern
5
+ module DateTime
6
+ extend HMachine
7
+
8
+ # Is this string a simple date?
9
+ def self.date?(string)
10
+ !date(string).nil?
11
+ end
12
+
13
+ # Is this string a simple time?
14
+ def self.time?(string)
15
+ !time(string).nil?
16
+ end
17
+
18
+ # Normalize ISO8601 Dates
19
+ def self.date(datestring)
20
+ datetime = Date._parse(datestring)
21
+ if !datetime.empty? && datetime[:year] && (datetime[:mon] || datetime[:yday])
22
+ local = Time.now
23
+ year = datetime[:year] || local.year
24
+ if datetime[:yday]
25
+ ordinal = Date.ordinal(year, datetime[:yday]) rescue nil
26
+ if ordinal
27
+ month = ordinal.month
28
+ day = ordinal.day
29
+ end
30
+ else
31
+ month = datetime[:mon] || local.month
32
+ day = datetime[:mday] || 1
33
+ end
34
+ "#{year}-#{month}-#{day}" if (month && day)
35
+ end
36
+ end
37
+
38
+ # Normalize ISO8601 Times
39
+ def self.time(timestring)
40
+ datetime = Date._parse(timestring)
41
+ if !datetime.empty? && datetime[:hour]
42
+ local = Time.now
43
+ hour = datetime[:hour]
44
+ min = datetime[:min] || 0
45
+ sec = datetime[:sec] || 0
46
+ zone = datetime[:zone] || local.utc_offset
47
+ "T#{hour}:#{min}:#{sec}#{zone}"
48
+ end
49
+ end
50
+
51
+ # Build a normalized iso8601 datetime string
52
+ def self.iso8601(datetime)
53
+ datestamp = date(datetime) || ''
54
+ timestamp = time(datetime) || ''
55
+ datestamp + timestamp
56
+ end
57
+
58
+ validate do |datetime|
59
+ if !iso8601(datetime).empty?
60
+ begin
61
+ Time.parse(iso8601(datetime)).respond_to?(:iso8601)
62
+ rescue ArgumentError
63
+ # An out-of-bounds error means a false positive
64
+ false
65
+ end
66
+ end
67
+ end
68
+
69
+ extract do |datetime|
70
+ Time.parse(iso8601(datetime))
71
+ end
72
+
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,32 @@
1
+ module HMachine
2
+ module Pattern
3
+ module TypeValue
4
+ extend HMachine
5
+
6
+ search do |doc|
7
+ doc.css('.type').reject {|type| type.parent.matches?('.type') }
8
+ end
9
+
10
+ extract do |node|
11
+ if found_in?(node)
12
+ types_and_values = {}
13
+ element = find_in(node)
14
+ types = element.collect {|type| HMachine.normalize Pattern::ValueClass.extract_from(type.unlink) }
15
+ types = (types.length == 1) ? types.first : types
16
+ {:type => types, :value => get_value(node)}
17
+ else
18
+ get_value(node)
19
+ end
20
+ end
21
+
22
+ def self.get_value(node)
23
+ if Pattern::URL.valid?(node)
24
+ Pattern::URL.extract_from(node)
25
+ else
26
+ Pattern::ValueClass.extract_from(node)
27
+ end
28
+ end
29
+
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,32 @@
1
+ module HMachine
2
+ module Pattern
3
+ module URL
4
+ extend HMachine
5
+
6
+ validate {|node| node.matches?("a[href] ,area[href], img[src], object[data]") }
7
+
8
+ extract do |url|
9
+ if valid?(url)
10
+ value = if (url.node_name.eql?('a') || url.node_name.eql?('area'))
11
+ url['href']
12
+ elsif url.node_name.eql?('img')
13
+ url['src']
14
+ elsif url.node_name.eql?('object')
15
+ url['data']
16
+ end
17
+ normalize(value) if value
18
+ end
19
+ end
20
+
21
+ def self.normalize(url)
22
+ uri = URI.parse(url).normalize.to_s
23
+ if uri.index('mailto:').eql?(0)
24
+ email = uri.split('mailto:')[1].split('?').first
25
+ else
26
+ uri
27
+ end
28
+ end
29
+
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,51 @@
1
+ module HMachine
2
+ module Pattern
3
+ module ValueClass
4
+ extend HMachine
5
+ WIKI_URL = 'http://microformats.org/wiki/value-class-pattern'
6
+
7
+ search do |element|
8
+ element.css('.value, .value-title[title]').reject {|val| val.parent.matches?('.value') }
9
+ end
10
+
11
+ validate {|value| value.matches?('.value, .value-title[title]') }
12
+
13
+ extract do |node|
14
+ if found_in?(node)
15
+ values = get_values(node)
16
+ normalize_values = values.collect { |val| DateTime.valid?(val) ? DateTime.iso8601(val) : val }.join
17
+ DateTime.valid?(normalize_values) ? DateTime.extract_from(normalize_values) : normalize_values
18
+ elsif Abbr.valid?(node)
19
+ Abbr.extract_from(node)
20
+ else
21
+ get_text(node)
22
+ end
23
+ end
24
+
25
+ def self.get_values(node)
26
+ find_in(node).collect do |val|
27
+ if ((val.node_name.eql?('img') || val.node_name.eql?('area')) && val['alt'])
28
+ val['alt'].strip
29
+ elsif (val.node_name.eql?('object') && val['data'])
30
+ val['data'].strip
31
+ elsif (Abbr.valid?(val) || val.matches?('.value-title'))
32
+ val['title'].strip
33
+ else
34
+ val.content.strip
35
+ end
36
+ end
37
+ end
38
+
39
+ def self.get_text(node)
40
+ if ((node.node_name.eql?('img') || node.node_name.eql?('area')) && node['alt'])
41
+ node['alt'].strip
42
+ elsif (node.node_name.eql?('object') && node['data'])
43
+ node['data'].strip
44
+ else
45
+ node.content.strip
46
+ end
47
+ end
48
+
49
+ end
50
+ end
51
+ end