hmachine 0.0.1 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. data/.gitignore +2 -2
  2. data/Gemfile +6 -4
  3. data/Gemfile.lock +51 -0
  4. data/README.md +123 -9
  5. data/Rakefile +12 -3
  6. data/bin/hmachine +99 -0
  7. data/hmachine.gemspec +132 -0
  8. data/lib/hmachine.rb +121 -12
  9. data/lib/hmachine/microformat.rb +39 -20
  10. data/lib/hmachine/microformat/adr.rb +22 -0
  11. data/lib/hmachine/microformat/geo.rb +48 -0
  12. data/lib/hmachine/microformat/hcard.rb +169 -11
  13. data/lib/hmachine/microformat/rellicense.rb +20 -0
  14. data/lib/hmachine/microformat/reltag.rb +38 -0
  15. data/lib/hmachine/microformat/votelinks.rb +42 -0
  16. data/lib/hmachine/microformat/xfn.rb +54 -0
  17. data/lib/hmachine/microformat/xmdp.rb +14 -0
  18. data/lib/hmachine/microformat/xoxo.rb +69 -0
  19. data/lib/hmachine/pattern.rb +26 -0
  20. data/lib/hmachine/pattern/abbr.rb +21 -0
  21. data/lib/hmachine/pattern/datetime.rb +75 -0
  22. data/lib/hmachine/pattern/typevalue.rb +32 -0
  23. data/lib/hmachine/pattern/url.rb +32 -0
  24. data/lib/hmachine/pattern/valueclass.rb +51 -0
  25. data/lib/hmachine/posh.rb +3 -0
  26. data/lib/hmachine/posh/anchor.rb +40 -0
  27. data/lib/hmachine/posh/base.rb +204 -0
  28. data/lib/hmachine/posh/definition_list.rb +41 -0
  29. data/test/fixtures/huffduffer.html +466 -0
  30. data/test/fixtures/likeorhate.html +48 -0
  31. data/test/fixtures/rel_license.html +4 -0
  32. data/test/fixtures/test-fixture/hcard/hcard1.html +147 -0
  33. data/test/fixtures/test-fixture/hcard/hcard11.html +123 -0
  34. data/test/fixtures/test-fixture/hcard/hcard12.html +178 -0
  35. data/test/fixtures/test-fixture/hcard/hcard17.html +165 -0
  36. data/test/fixtures/test-fixture/hcard/hcard2.html +264 -0
  37. data/test/fixtures/test-fixture/hcard/hcard3.html +144 -0
  38. data/test/fixtures/test-fixture/hcard/hcard4.html +117 -0
  39. data/test/fixtures/test-fixture/hcard/hcard5.html +119 -0
  40. data/test/fixtures/test-fixture/hcard/hcard6.html +188 -0
  41. data/test/fixtures/test-fixture/hcard/hcard7.html +188 -0
  42. data/test/fixtures/test-fixture/hcard/hcard8.html +130 -0
  43. data/test/fixtures/test-fixture/hcard/hcard9.html +111 -0
  44. data/test/fixtures/test-fixture/hcard/hcard99.html +215 -0
  45. data/test/fixtures/test-fixture/value-class-date-time/value-dt-test-YYYY-MM-DD--HH-MM.html +9 -0
  46. data/test/fixtures/test-fixture/value-class-date-time/value-dt-test-abbr-YYYY-MM-DD--HH-MM.html +4 -0
  47. data/test/fixtures/xfn.html +198 -0
  48. data/test/fixtures/xmdp.html +32 -0
  49. data/test/fixtures/xoxo.html +51 -0
  50. data/test/hmachine_test.rb +122 -6
  51. data/test/microformat/adr_test.rb +47 -0
  52. data/test/microformat/geo_test.rb +66 -0
  53. data/test/microformat/hcard_test.rb +487 -20
  54. data/test/microformat/rellicense_test.rb +36 -0
  55. data/test/microformat/reltag_test.rb +61 -0
  56. data/test/microformat/votelinks_test.rb +44 -0
  57. data/test/microformat/xfn_test.rb +28 -0
  58. data/test/microformat/xmdp_test.rb +16 -0
  59. data/test/microformat/xoxo_test.rb +51 -0
  60. data/test/microformat_test.rb +12 -34
  61. data/test/pattern/date_time_test.rb +55 -0
  62. data/test/pattern/value_class_test.rb +33 -0
  63. data/test/pattern_test.rb +132 -0
  64. data/test/posh/anchor_test.rb +41 -0
  65. data/test/posh/base_test.rb +150 -0
  66. data/test/posh/definition_list_test.rb +38 -0
  67. data/test/test_helper.rb +24 -6
  68. metadata +93 -15
  69. data/lib/hmachine/microformat/base.rb +0 -17
@@ -1,21 +1,130 @@
1
- require 'uri'
1
+ require 'open-uri'
2
2
  require 'nokogiri'
3
3
 
4
- require 'hmachine/microformat'
5
-
6
4
  module HMachine
7
- VERSION = "0.0.1"
5
+ VERSION = "0.1.0"
6
+ PRODID = "-//markwunsch.com//hMachine #{VERSION}//EN"
7
+
8
+ # Convenience method for HMachine::Microformat.find method
9
+ def self.find(document, format=nil)
10
+ HMachine::Microformat.find(document, format)
11
+ end
12
+
13
+ # Get a string of html or a url and convert it to a Nokogiri Document
14
+ def self.get(html)
15
+ return html if html.is_a?(Nokogiri::XML::Node)
16
+ begin
17
+ url = URI.parse(html)
18
+ doc = url.is_a?(URI::HTTP) ? get_url(url.normalize.to_s) : get_document(html)
19
+ rescue URI::InvalidURIError
20
+ doc = get_document(html)
21
+ end
22
+ doc
23
+ end
24
+
25
+ # Open a URL and convert the contents to a Nokogiri Document
26
+ def self.get_url(url)
27
+ uri = URI.parse(url)
28
+ doc = ''
29
+ uri.open do |web|
30
+ web.each_line {|line| doc += line }
31
+ end
32
+ get_document(doc, url)
33
+ end
34
+
35
+ # Convert HTML to a Nokogiri Document
36
+ def self.get_document(html, url=nil)
37
+ html.is_a?(Nokogiri::XML::Node) ? html : Nokogiri::HTML.parse(html, url)
38
+ end
39
+
40
+ def self.normalize(name)
41
+ name.to_s.strip.downcase.intern
42
+ end
43
+
44
+ # Map a key to an element or design pattern
45
+ def self.map(key)
46
+ case normalize(key)
47
+ when :value_class, :valueclass, :abbr, :uri, :url, :typevalue
48
+ HMachine::Pattern.map(key)
49
+ when :hcard, :geo, :rellicense, :reltag, :votelinks, :xfn, :xmdp, :xoxo, :adr
50
+ HMachine::Microformat.map(key)
51
+ when :base
52
+ HMachine::POSH::Base
53
+ else
54
+ raise "#{key} is not a recognized parser."
55
+ end
56
+ end
8
57
 
9
- def self.find(document)
10
- html = get_document(document)
11
- Microformat.find_all html
58
+ # Get/Set a function that defines how to find an element in a node.
59
+ # The Search function should return a Nokogiri::XML::NodeSet.
60
+ # eg. <tt>search {|node| node.css(element) }
61
+ def search(&block)
62
+ @search = block if block_given?
63
+ @search || lambda {|node| node }
12
64
  end
13
65
 
14
- def self.find_with_url(url)
15
- # open url and call find method on resulting document
66
+ # Search for the element in a document
67
+ def find_in(document)
68
+ search.call(document)
16
69
  end
17
70
 
18
- def self.get_document(html)
19
- html.is_a?(Nokogiri::XML::Node) ? html : Nokogiri::HTML.parse(html)
71
+ # Is the element found in node?
72
+ def found_in?(node)
73
+ find_in(node).eql?(node) || !find_in(node).empty?
20
74
  end
21
- end
75
+
76
+ # Get/Set a function that tests to make sure a given node is
77
+ # the element we want. Should return truthy.
78
+ # Default just tests to see if the node passed is a child of its parent node.
79
+ def validate(&block)
80
+ @validate = block if block_given?
81
+ @validate || lambda { |node| find_in(node.parent).children.include?(node) }
82
+ end
83
+
84
+ # Is this a valid node?
85
+ def valid?(node)
86
+ validate.call(node)
87
+ end
88
+
89
+ # Define the pattern used to extract contents from node
90
+ # Can be a symbols that match to an Element parser, or a block
91
+ def extract(pattern = nil, &block)
92
+ if block_given?
93
+ @extract = block
94
+ else
95
+ @extract = HMachine.map(pattern).extract if pattern
96
+ end
97
+ @extract || lambda{|node| node.content.strip }
98
+ end
99
+
100
+ # Extract the content from the node
101
+ def extract_from(node)
102
+ extract.call(node)
103
+ end
104
+
105
+ # Parse the document, finding every instance of the desired element, and extract their contents
106
+ def parse(document)
107
+ if found_in?(document)
108
+ contents = if find_in(document).respond_to?(:collect)
109
+ find_in(document).collect { |element| extract_from(element) }
110
+ else
111
+ extract_from(document)
112
+ end
113
+ return contents.first if contents.respond_to?(:length) && (contents.length == 1)
114
+ contents
115
+ end
116
+ end
117
+
118
+ # Parse the document, extracting the content for the first instance of the element
119
+ def parse_first(document)
120
+ if found_in?(document)
121
+ elements = find_in(document)
122
+ extract_from elements.respond_to?(:first) ? elements.first : elements
123
+ end
124
+ end
125
+
126
+ end
127
+
128
+ require 'hmachine/pattern'
129
+ require 'hmachine/posh'
130
+ require 'hmachine/microformat'
@@ -1,30 +1,49 @@
1
- require 'hmachine/microformat/base'
2
- require 'hmachine/microformat/hcard'
3
-
4
1
  module HMachine
5
2
  module Microformat
6
-
7
- def self.find_hcard(html)
8
- doc = HMachine.get_document(html)
9
- find_in_node(HCard, doc)
3
+
4
+ def self.map(name)
5
+ map = microformats[HMachine.normalize(name)]
6
+ raise "#{name} is not a recognized microformat." unless map
7
+ map
10
8
  end
11
-
12
- def self.find_all(html)
13
- find_hcard html
9
+
10
+ def self.microformats
11
+ { :hcard => HMachine::Microformat::HCard,
12
+ :geo => HMachine::Microformat::Geo,
13
+ :adr => HMachine::Microformat::Adr,
14
+ :rellicense => HMachine::Microformat::RelLicense,
15
+ :reltag => HMachine::Microformat::RelTag,
16
+ :votelinks => HMachine::Microformat::VoteLinks,
17
+ :xfn => HMachine::Microformat::XFN,
18
+ :xmdp => HMachine::Microformat::XMDP,
19
+ :xoxo => HMachine::Microformat::XOXO }
14
20
  end
15
-
16
- def self.find_in_node(microformat, node)
17
- microformats = []
18
- node.css(microformat::ROOT_SELECTOR).each do |node|
19
- microformats << create_for_node(microformat, node) if microformat.validate(node)
21
+
22
+ def self.find(html, uformat = nil)
23
+ if uformat
24
+ map(uformat).parse HMachine.get(html)
25
+ else
26
+ find_all(html)
20
27
  end
21
- microformats
22
28
  end
23
29
 
24
- def self.create_for_node(microformat, node)
25
- return unless microformat.validate(node)
26
- microformat.new node
30
+ def self.find_all(html)
31
+ doc = HMachine.get(html)
32
+ uformats = microformats.values.collect do |uf|
33
+ uf.parse(doc)
34
+ end
35
+ uformats.compact.flatten
27
36
  end
28
37
 
29
38
  end
30
- end
39
+ end
40
+
41
+ require 'hmachine/microformat/reltag'
42
+ require 'hmachine/microformat/rellicense'
43
+ require 'hmachine/microformat/votelinks'
44
+ require 'hmachine/microformat/xoxo'
45
+ require 'hmachine/microformat/xmdp'
46
+ require 'hmachine/microformat/xfn'
47
+ require 'hmachine/microformat/geo'
48
+ require 'hmachine/microformat/adr'
49
+ require 'hmachine/microformat/hcard'
@@ -0,0 +1,22 @@
1
+ module HMachine
2
+ module Microformat
3
+ class Adr < POSH::Base
4
+ FRIENDLY_NAME = "adr"
5
+ WIKI_URL = 'http://microformats.org/wiki/adr'
6
+
7
+ name :adr
8
+
9
+ # http://microformats.org/wiki/adr-singular-properties
10
+ has_one :post_office_box, :postal_code
11
+ has_many :street_address, :locality, :region, :extended_address, :country_name
12
+
13
+ has_many :type do
14
+ extract do |node|
15
+ value = Pattern::ValueClass.extract_from(node)
16
+ HMachine.normalize(value) if value
17
+ end
18
+ end
19
+
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,48 @@
1
+ module HMachine
2
+ module Microformat
3
+ class Geo < POSH::Base
4
+ FRIENDLY_NAME = "geo"
5
+ WIKI_URL = 'http://microformats.org/wiki/geo'
6
+
7
+ name :geo
8
+
9
+ has_one :latitude do
10
+ search do |geo|
11
+ lat = geo.css(".#{name}")
12
+ !lat.empty? ? lat : geo
13
+ end
14
+
15
+ extract do |geo|
16
+ if geo['class'] && geo['class'].split.include?("#{name}")
17
+ HMachine::Pattern::ValueClass.extract_from(geo)
18
+ else
19
+ HMachine::Pattern::ValueClass.extract_from(geo).split(';')[0]
20
+ end
21
+ end
22
+ end
23
+
24
+ has_one :longitude do
25
+ search do |geo|
26
+ long = geo.css(".#{name}")
27
+ !long.empty? ? long : geo
28
+ end
29
+
30
+ extract do |geo|
31
+ if geo['class'] && geo['class'].split.include?("#{name}")
32
+ HMachine::Pattern::ValueClass.extract_from(geo)
33
+ else
34
+ HMachine::Pattern::ValueClass.extract_from(geo).split(';')[1]
35
+ end
36
+ end
37
+ end
38
+
39
+ alias lat latitude
40
+ alias long longitude
41
+
42
+ def to_google_maps
43
+ "http://maps.google.com/?q=#{latitude},#{longitude}"
44
+ end
45
+
46
+ end
47
+ end
48
+ end
@@ -1,24 +1,182 @@
1
1
  module HMachine
2
2
  module Microformat
3
- class HCard < Base
4
-
5
- ROOT_CLASS = "vcard"
6
- ROOT_SELECTOR = ".#{ROOT_CLASS}"
3
+ class HCard < POSH::Base
4
+ FRIENDLY_NAME = "hCard"
7
5
  WIKI_URL = "http://microformats.org/wiki/hcard"
6
+ XMDP = 'http://microformats.org/profile/hcard'
7
+
8
+ name :vcard
9
+
10
+ has_one :fn, :bday, :tz, :sort_string, :uid, :rev
11
+ alias birthday bday
8
12
 
9
- def initialize(node)
10
- raise "hCard not found in node" unless self.class.validate(node)
11
- @node = node
13
+ has_many :agent, :category, :key, :label,
14
+ :mailer, :nickname, :note, :role, :sound,
15
+ :title
16
+
17
+ has_many :logo, :photo, :url do
18
+ extract :url
12
19
  end
13
20
 
14
- def to_vcard
15
- # convert to vcard
21
+ has_one :geo do
22
+ extract :geo
23
+ end
24
+
25
+ has_many :email, :tel do
26
+ extract :typevalue
27
+ end
28
+
29
+ has_many :adr do
30
+ extract :adr
31
+ end
32
+ alias address adr
33
+
34
+ has_many :org do
35
+ has_one :organization_unit
36
+ has_one :organization_name do
37
+ search do |org|
38
+ org_name = org.css(".organization-name")
39
+ !org_name.empty? ? org_name : org
40
+ end
41
+ end
16
42
  end
17
43
 
18
- def self.infer_n_from_fn(fn)
19
- # ...
44
+ has_one :n do
45
+ search do |doc|
46
+ n = doc.css(".#{name}")
47
+ !n.empty? ? n : parent.properties[:fn].find_in(doc)
48
+ end
49
+
50
+ # N Optimization from Sumo:
51
+ # http://www.danwebb.net/2007/2/9/sumo-a-generic-microformats-parser-for-javascript
52
+ # See: http://microformats.org/wiki/hcard#Implied_.22n.22_Optimization
53
+ has_many :family_name, :given_name do
54
+ search do |node|
55
+ name_parts = node.css(".#{name}".gsub('_','-'))
56
+ !name_parts.empty? ? name_parts : node
57
+ end
58
+
59
+ extract do |node|
60
+ if node['class'].split.include?("#{name}".gsub('_','-'))
61
+ HMachine::Pattern::ValueClass.extract_from(node)
62
+ else
63
+ fn = parent.parent[:fn].extract_from(node)
64
+ if (fn =~ /^(\w+) (\w+)$/)
65
+ if name.eql? :given_name
66
+ Regexp.last_match[1]
67
+ elsif name.eql? :family_name
68
+ Regexp.last_match[2]
69
+ end
70
+ elsif (fn =~ /^(\w+), (\w+)\.?$/)
71
+ if name.eql? :given_name
72
+ Regexp.last_match[2]
73
+ elsif name.eql? :family_name
74
+ Regexp.last_match[1]
75
+ end
76
+ end
77
+ end
78
+ end
79
+ end
80
+
81
+ has_many :additional_name, :honorific_prefix, :honorific_suffix
20
82
  end
21
83
 
84
+ has_one! :class
85
+
86
+ def organization?
87
+ if org
88
+ fn == org[:organization_name]
89
+ end
90
+ end
91
+ alias company? organization?
92
+
93
+ # http://tools.ietf.org/html/rfc2426
94
+ # TODO: Make this less ugly
95
+ def to_vcard
96
+ @vcard = "BEGIN:VCARD\x0D\x0AVERSION:3.0\x0D\x0APRODID:#{HMachine::PRODID}"
97
+ @vcard += "\x0D\x0ANAME:#{node.document.css('title').first.content}" if node.document.css('title').first
98
+ @vcard += "\x0D\x0ASOURCE:#{@source}" if @source
99
+ @vcard += "\x0D\x0AFN:#{fn}" if fn
100
+ @vcard += n_vcard if n
101
+ if nickname
102
+ @vcard += "\x0D\x0ANICKNAME" + (nickname.respond_to?(:join) ? nickname.join(',') : nickname)
103
+ end
104
+ @vcard += "\x0D\x0APHOTO;VALUE=uri:#{photo.first}" if photo
105
+ @vcard += "\x0D\x0ABDAY:#{bday.strftime('%Y-%m-%d')}" if bday
106
+ if adr
107
+ if adr.respond_to?(:each)
108
+ adr.each { |address| @vcard += adr_vcard(address) }
109
+ else
110
+ @vcard += adr_vcard(adr)
111
+ end
112
+ end
113
+ if tel
114
+ if tel.respond_to?(:join)
115
+ tel.each { |phone| @vcard += "\x0D\x0ATEL#{type_value_vcard(phone)}" }
116
+ else
117
+ @vcard += "\x0D\x0ATEL#{type_value_vcard(phone)}"
118
+ end
119
+ end
120
+ if email
121
+ if email.respond_to?(:join)
122
+ email.each { |mail| @vcard += "\x0D\x0AEMAIL#{type_value_vcard(mail)}" }
123
+ else
124
+ @vcard += "\x0D\x0AEMAIL#{type_value_vcard(email)}"
125
+ end
126
+ end
127
+ mailer.each {|software| @vcard += "\x0D\x0AMAILER:#{software}" } if mailer
128
+ @vcard += "\x0D\x0ATZ:#{tz}" if tz
129
+ @vcard += "\x0D\x0AGEO:#{geo[:latitude]};#{geo[:longitude]}" if geo
130
+ title.each {|titl| @vcard += "\x0D\x0ATITLE:#{titl}" } if title
131
+ role.each {|roll| @vcard += "\x0D\x0AROLE:#{roll}" } if role
132
+ logo.each { |log| @vcard += "\x0D\x0ALOGO;VALUE=uri:#{log}" } if logo
133
+ agent.each {|mrsmith| @vcard += "\x0D\x0AAGENT:#{mrsmith}" } if agent
134
+ @vcard += "\x0D\x0AORG:#{org[:organization_name]};#{org[:organization_unit]}" if org
135
+ @vcard += "\x0D\x0ACATEGORIES:#{join_vcard_values(category).upcase}" if category
136
+ note.each {|notes| @vcard += "\x0D\x0ANOTE:#{notes}" } if note
137
+ @vcard += "\x0D\x0AREV:#{rev.iso8601}" if rev
138
+ @vcard += "\x0D\x0ASORT-STRING:#{sort_string}" if sort_string
139
+ sound.each {|audio| @vcard += "\x0D\x0ASOUND;VALUE=uri:#{audio}" } if sound
140
+ @vcard += "\x0D\x0AUID:#{uid}" if uid
141
+ url.each {|web| @vcard += "\x0D\x0AURL:#{web}" } if url
142
+ @vcard += "\x0D\x0ACLASS:#{to_h[:class]}" if has_property?(:class)
143
+ key.each {|auth| @vcard += "\x0D\x0AKEY:#{key}" }if key
144
+ @vcard += "\x0D\x0AEND:VCARD\x0D\x0A\x0D\x0A"
145
+ end
146
+
147
+ private
148
+
149
+ def join_vcard_values(values)
150
+ values.respond_to?(:join) ? values.join(',') : values.to_s.strip
151
+ end
152
+
153
+ def type_value_vcard(communication)
154
+ if communication.respond_to?(:keys)
155
+ _comm = ";TYPE=#{join_vcard_values(communication[:type])}:#{communication[:value]}"
156
+ else
157
+ _comm = ":#{communication}"
158
+ end
159
+ _comm
160
+ end
161
+
162
+ def adr_vcard(address)
163
+ addresses = "\x0D\x0AADR"
164
+ adr_vcard = {}
165
+ adr.to_h.each_pair { |key,value| adr_vcard[key] = join_vcard_values(value) }
166
+ if address[:type]
167
+ addresses += ";TYPE=" + (address[:type].respond_to?(:join) ? address[:type].join(',') : address[:type].to_s)
168
+ end
169
+ addresses += ":#{address[:post_office_box]};#{adr_vcard[:extended_address]};#{adr_vcard[:street_address]};"
170
+ addresses += "#{adr_vcard[:locality]};#{adr_vcard[:region]};#{address[:postal_code]};#{adr_vcard[:country_name]}"
171
+ end
172
+
173
+ def n_vcard
174
+ n_vcard = {}
175
+ n.each_pair { |key,value| n_vcard[key] = join_vcard_values(value) }
176
+ "\x0D\x0AN:#{n_vcard[:family_name]};#{n_vcard[:given_name]};#{n_vcard[:additional_name]};"+\
177
+ "#{n_vcard[:honorific_prefix]};#{n_vcard[:honorific_suffix]}"
178
+ end
179
+
22
180
  end
23
181
  end
24
182
  end