hmachine 0.0.1 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (69) hide show
  1. data/.gitignore +2 -2
  2. data/Gemfile +6 -4
  3. data/Gemfile.lock +51 -0
  4. data/README.md +123 -9
  5. data/Rakefile +12 -3
  6. data/bin/hmachine +99 -0
  7. data/hmachine.gemspec +132 -0
  8. data/lib/hmachine.rb +121 -12
  9. data/lib/hmachine/microformat.rb +39 -20
  10. data/lib/hmachine/microformat/adr.rb +22 -0
  11. data/lib/hmachine/microformat/geo.rb +48 -0
  12. data/lib/hmachine/microformat/hcard.rb +169 -11
  13. data/lib/hmachine/microformat/rellicense.rb +20 -0
  14. data/lib/hmachine/microformat/reltag.rb +38 -0
  15. data/lib/hmachine/microformat/votelinks.rb +42 -0
  16. data/lib/hmachine/microformat/xfn.rb +54 -0
  17. data/lib/hmachine/microformat/xmdp.rb +14 -0
  18. data/lib/hmachine/microformat/xoxo.rb +69 -0
  19. data/lib/hmachine/pattern.rb +26 -0
  20. data/lib/hmachine/pattern/abbr.rb +21 -0
  21. data/lib/hmachine/pattern/datetime.rb +75 -0
  22. data/lib/hmachine/pattern/typevalue.rb +32 -0
  23. data/lib/hmachine/pattern/url.rb +32 -0
  24. data/lib/hmachine/pattern/valueclass.rb +51 -0
  25. data/lib/hmachine/posh.rb +3 -0
  26. data/lib/hmachine/posh/anchor.rb +40 -0
  27. data/lib/hmachine/posh/base.rb +204 -0
  28. data/lib/hmachine/posh/definition_list.rb +41 -0
  29. data/test/fixtures/huffduffer.html +466 -0
  30. data/test/fixtures/likeorhate.html +48 -0
  31. data/test/fixtures/rel_license.html +4 -0
  32. data/test/fixtures/test-fixture/hcard/hcard1.html +147 -0
  33. data/test/fixtures/test-fixture/hcard/hcard11.html +123 -0
  34. data/test/fixtures/test-fixture/hcard/hcard12.html +178 -0
  35. data/test/fixtures/test-fixture/hcard/hcard17.html +165 -0
  36. data/test/fixtures/test-fixture/hcard/hcard2.html +264 -0
  37. data/test/fixtures/test-fixture/hcard/hcard3.html +144 -0
  38. data/test/fixtures/test-fixture/hcard/hcard4.html +117 -0
  39. data/test/fixtures/test-fixture/hcard/hcard5.html +119 -0
  40. data/test/fixtures/test-fixture/hcard/hcard6.html +188 -0
  41. data/test/fixtures/test-fixture/hcard/hcard7.html +188 -0
  42. data/test/fixtures/test-fixture/hcard/hcard8.html +130 -0
  43. data/test/fixtures/test-fixture/hcard/hcard9.html +111 -0
  44. data/test/fixtures/test-fixture/hcard/hcard99.html +215 -0
  45. data/test/fixtures/test-fixture/value-class-date-time/value-dt-test-YYYY-MM-DD--HH-MM.html +9 -0
  46. data/test/fixtures/test-fixture/value-class-date-time/value-dt-test-abbr-YYYY-MM-DD--HH-MM.html +4 -0
  47. data/test/fixtures/xfn.html +198 -0
  48. data/test/fixtures/xmdp.html +32 -0
  49. data/test/fixtures/xoxo.html +51 -0
  50. data/test/hmachine_test.rb +122 -6
  51. data/test/microformat/adr_test.rb +47 -0
  52. data/test/microformat/geo_test.rb +66 -0
  53. data/test/microformat/hcard_test.rb +487 -20
  54. data/test/microformat/rellicense_test.rb +36 -0
  55. data/test/microformat/reltag_test.rb +61 -0
  56. data/test/microformat/votelinks_test.rb +44 -0
  57. data/test/microformat/xfn_test.rb +28 -0
  58. data/test/microformat/xmdp_test.rb +16 -0
  59. data/test/microformat/xoxo_test.rb +51 -0
  60. data/test/microformat_test.rb +12 -34
  61. data/test/pattern/date_time_test.rb +55 -0
  62. data/test/pattern/value_class_test.rb +33 -0
  63. data/test/pattern_test.rb +132 -0
  64. data/test/posh/anchor_test.rb +41 -0
  65. data/test/posh/base_test.rb +150 -0
  66. data/test/posh/definition_list_test.rb +38 -0
  67. data/test/test_helper.rb +24 -6
  68. metadata +93 -15
  69. data/lib/hmachine/microformat/base.rb +0 -17
@@ -1,21 +1,130 @@
1
- require 'uri'
1
+ require 'open-uri'
2
2
  require 'nokogiri'
3
3
 
4
- require 'hmachine/microformat'
5
-
6
4
  module HMachine
7
- VERSION = "0.0.1"
5
+ VERSION = "0.1.0"
6
+ PRODID = "-//markwunsch.com//hMachine #{VERSION}//EN"
7
+
8
+ # Convenience method for HMachine::Microformat.find method
9
+ def self.find(document, format=nil)
10
+ HMachine::Microformat.find(document, format)
11
+ end
12
+
13
+ # Get a string of html or a url and convert it to a Nokogiri Document
14
+ def self.get(html)
15
+ return html if html.is_a?(Nokogiri::XML::Node)
16
+ begin
17
+ url = URI.parse(html)
18
+ doc = url.is_a?(URI::HTTP) ? get_url(url.normalize.to_s) : get_document(html)
19
+ rescue URI::InvalidURIError
20
+ doc = get_document(html)
21
+ end
22
+ doc
23
+ end
24
+
25
+ # Open a URL and convert the contents to a Nokogiri Document
26
+ def self.get_url(url)
27
+ uri = URI.parse(url)
28
+ doc = ''
29
+ uri.open do |web|
30
+ web.each_line {|line| doc += line }
31
+ end
32
+ get_document(doc, url)
33
+ end
34
+
35
+ # Convert HTML to a Nokogiri Document
36
+ def self.get_document(html, url=nil)
37
+ html.is_a?(Nokogiri::XML::Node) ? html : Nokogiri::HTML.parse(html, url)
38
+ end
39
+
40
+ def self.normalize(name)
41
+ name.to_s.strip.downcase.intern
42
+ end
43
+
44
+ # Map a key to an element or design pattern
45
+ def self.map(key)
46
+ case normalize(key)
47
+ when :value_class, :valueclass, :abbr, :uri, :url, :typevalue
48
+ HMachine::Pattern.map(key)
49
+ when :hcard, :geo, :rellicense, :reltag, :votelinks, :xfn, :xmdp, :xoxo, :adr
50
+ HMachine::Microformat.map(key)
51
+ when :base
52
+ HMachine::POSH::Base
53
+ else
54
+ raise "#{key} is not a recognized parser."
55
+ end
56
+ end
8
57
 
9
- def self.find(document)
10
- html = get_document(document)
11
- Microformat.find_all html
58
+ # Get/Set a function that defines how to find an element in a node.
59
+ # The Search function should return a Nokogiri::XML::NodeSet.
60
+ # eg. <tt>search {|node| node.css(element) }
61
+ def search(&block)
62
+ @search = block if block_given?
63
+ @search || lambda {|node| node }
12
64
  end
13
65
 
14
- def self.find_with_url(url)
15
- # open url and call find method on resulting document
66
+ # Search for the element in a document
67
+ def find_in(document)
68
+ search.call(document)
16
69
  end
17
70
 
18
- def self.get_document(html)
19
- html.is_a?(Nokogiri::XML::Node) ? html : Nokogiri::HTML.parse(html)
71
+ # Is the element found in node?
72
+ def found_in?(node)
73
+ find_in(node).eql?(node) || !find_in(node).empty?
20
74
  end
21
- end
75
+
76
+ # Get/Set a function that tests to make sure a given node is
77
+ # the element we want. Should return truthy.
78
+ # Default just tests to see if the node passed is a child of its parent node.
79
+ def validate(&block)
80
+ @validate = block if block_given?
81
+ @validate || lambda { |node| find_in(node.parent).children.include?(node) }
82
+ end
83
+
84
+ # Is this a valid node?
85
+ def valid?(node)
86
+ validate.call(node)
87
+ end
88
+
89
+ # Define the pattern used to extract contents from node
90
+ # Can be a symbols that match to an Element parser, or a block
91
+ def extract(pattern = nil, &block)
92
+ if block_given?
93
+ @extract = block
94
+ else
95
+ @extract = HMachine.map(pattern).extract if pattern
96
+ end
97
+ @extract || lambda{|node| node.content.strip }
98
+ end
99
+
100
+ # Extract the content from the node
101
+ def extract_from(node)
102
+ extract.call(node)
103
+ end
104
+
105
+ # Parse the document, finding every instance of the desired element, and extract their contents
106
+ def parse(document)
107
+ if found_in?(document)
108
+ contents = if find_in(document).respond_to?(:collect)
109
+ find_in(document).collect { |element| extract_from(element) }
110
+ else
111
+ extract_from(document)
112
+ end
113
+ return contents.first if contents.respond_to?(:length) && (contents.length == 1)
114
+ contents
115
+ end
116
+ end
117
+
118
+ # Parse the document, extracting the content for the first instance of the element
119
+ def parse_first(document)
120
+ if found_in?(document)
121
+ elements = find_in(document)
122
+ extract_from elements.respond_to?(:first) ? elements.first : elements
123
+ end
124
+ end
125
+
126
+ end
127
+
128
+ require 'hmachine/pattern'
129
+ require 'hmachine/posh'
130
+ require 'hmachine/microformat'
@@ -1,30 +1,49 @@
1
- require 'hmachine/microformat/base'
2
- require 'hmachine/microformat/hcard'
3
-
4
1
  module HMachine
5
2
  module Microformat
6
-
7
- def self.find_hcard(html)
8
- doc = HMachine.get_document(html)
9
- find_in_node(HCard, doc)
3
+
4
+ def self.map(name)
5
+ map = microformats[HMachine.normalize(name)]
6
+ raise "#{name} is not a recognized microformat." unless map
7
+ map
10
8
  end
11
-
12
- def self.find_all(html)
13
- find_hcard html
9
+
10
+ def self.microformats
11
+ { :hcard => HMachine::Microformat::HCard,
12
+ :geo => HMachine::Microformat::Geo,
13
+ :adr => HMachine::Microformat::Adr,
14
+ :rellicense => HMachine::Microformat::RelLicense,
15
+ :reltag => HMachine::Microformat::RelTag,
16
+ :votelinks => HMachine::Microformat::VoteLinks,
17
+ :xfn => HMachine::Microformat::XFN,
18
+ :xmdp => HMachine::Microformat::XMDP,
19
+ :xoxo => HMachine::Microformat::XOXO }
14
20
  end
15
-
16
- def self.find_in_node(microformat, node)
17
- microformats = []
18
- node.css(microformat::ROOT_SELECTOR).each do |node|
19
- microformats << create_for_node(microformat, node) if microformat.validate(node)
21
+
22
+ def self.find(html, uformat = nil)
23
+ if uformat
24
+ map(uformat).parse HMachine.get(html)
25
+ else
26
+ find_all(html)
20
27
  end
21
- microformats
22
28
  end
23
29
 
24
- def self.create_for_node(microformat, node)
25
- return unless microformat.validate(node)
26
- microformat.new node
30
+ def self.find_all(html)
31
+ doc = HMachine.get(html)
32
+ uformats = microformats.values.collect do |uf|
33
+ uf.parse(doc)
34
+ end
35
+ uformats.compact.flatten
27
36
  end
28
37
 
29
38
  end
30
- end
39
+ end
40
+
41
+ require 'hmachine/microformat/reltag'
42
+ require 'hmachine/microformat/rellicense'
43
+ require 'hmachine/microformat/votelinks'
44
+ require 'hmachine/microformat/xoxo'
45
+ require 'hmachine/microformat/xmdp'
46
+ require 'hmachine/microformat/xfn'
47
+ require 'hmachine/microformat/geo'
48
+ require 'hmachine/microformat/adr'
49
+ require 'hmachine/microformat/hcard'
@@ -0,0 +1,22 @@
1
+ module HMachine
2
+ module Microformat
3
+ class Adr < POSH::Base
4
+ FRIENDLY_NAME = "adr"
5
+ WIKI_URL = 'http://microformats.org/wiki/adr'
6
+
7
+ name :adr
8
+
9
+ # http://microformats.org/wiki/adr-singular-properties
10
+ has_one :post_office_box, :postal_code
11
+ has_many :street_address, :locality, :region, :extended_address, :country_name
12
+
13
+ has_many :type do
14
+ extract do |node|
15
+ value = Pattern::ValueClass.extract_from(node)
16
+ HMachine.normalize(value) if value
17
+ end
18
+ end
19
+
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,48 @@
1
+ module HMachine
2
+ module Microformat
3
+ class Geo < POSH::Base
4
+ FRIENDLY_NAME = "geo"
5
+ WIKI_URL = 'http://microformats.org/wiki/geo'
6
+
7
+ name :geo
8
+
9
+ has_one :latitude do
10
+ search do |geo|
11
+ lat = geo.css(".#{name}")
12
+ !lat.empty? ? lat : geo
13
+ end
14
+
15
+ extract do |geo|
16
+ if geo['class'] && geo['class'].split.include?("#{name}")
17
+ HMachine::Pattern::ValueClass.extract_from(geo)
18
+ else
19
+ HMachine::Pattern::ValueClass.extract_from(geo).split(';')[0]
20
+ end
21
+ end
22
+ end
23
+
24
+ has_one :longitude do
25
+ search do |geo|
26
+ long = geo.css(".#{name}")
27
+ !long.empty? ? long : geo
28
+ end
29
+
30
+ extract do |geo|
31
+ if geo['class'] && geo['class'].split.include?("#{name}")
32
+ HMachine::Pattern::ValueClass.extract_from(geo)
33
+ else
34
+ HMachine::Pattern::ValueClass.extract_from(geo).split(';')[1]
35
+ end
36
+ end
37
+ end
38
+
39
+ alias lat latitude
40
+ alias long longitude
41
+
42
+ def to_google_maps
43
+ "http://maps.google.com/?q=#{latitude},#{longitude}"
44
+ end
45
+
46
+ end
47
+ end
48
+ end
@@ -1,24 +1,182 @@
1
1
  module HMachine
2
2
  module Microformat
3
- class HCard < Base
4
-
5
- ROOT_CLASS = "vcard"
6
- ROOT_SELECTOR = ".#{ROOT_CLASS}"
3
+ class HCard < POSH::Base
4
+ FRIENDLY_NAME = "hCard"
7
5
  WIKI_URL = "http://microformats.org/wiki/hcard"
6
+ XMDP = 'http://microformats.org/profile/hcard'
7
+
8
+ name :vcard
9
+
10
+ has_one :fn, :bday, :tz, :sort_string, :uid, :rev
11
+ alias birthday bday
8
12
 
9
- def initialize(node)
10
- raise "hCard not found in node" unless self.class.validate(node)
11
- @node = node
13
+ has_many :agent, :category, :key, :label,
14
+ :mailer, :nickname, :note, :role, :sound,
15
+ :title
16
+
17
+ has_many :logo, :photo, :url do
18
+ extract :url
12
19
  end
13
20
 
14
- def to_vcard
15
- # convert to vcard
21
+ has_one :geo do
22
+ extract :geo
23
+ end
24
+
25
+ has_many :email, :tel do
26
+ extract :typevalue
27
+ end
28
+
29
+ has_many :adr do
30
+ extract :adr
31
+ end
32
+ alias address adr
33
+
34
+ has_many :org do
35
+ has_one :organization_unit
36
+ has_one :organization_name do
37
+ search do |org|
38
+ org_name = org.css(".organization-name")
39
+ !org_name.empty? ? org_name : org
40
+ end
41
+ end
16
42
  end
17
43
 
18
- def self.infer_n_from_fn(fn)
19
- # ...
44
+ has_one :n do
45
+ search do |doc|
46
+ n = doc.css(".#{name}")
47
+ !n.empty? ? n : parent.properties[:fn].find_in(doc)
48
+ end
49
+
50
+ # N Optimization from Sumo:
51
+ # http://www.danwebb.net/2007/2/9/sumo-a-generic-microformats-parser-for-javascript
52
+ # See: http://microformats.org/wiki/hcard#Implied_.22n.22_Optimization
53
+ has_many :family_name, :given_name do
54
+ search do |node|
55
+ name_parts = node.css(".#{name}".gsub('_','-'))
56
+ !name_parts.empty? ? name_parts : node
57
+ end
58
+
59
+ extract do |node|
60
+ if node['class'].split.include?("#{name}".gsub('_','-'))
61
+ HMachine::Pattern::ValueClass.extract_from(node)
62
+ else
63
+ fn = parent.parent[:fn].extract_from(node)
64
+ if (fn =~ /^(\w+) (\w+)$/)
65
+ if name.eql? :given_name
66
+ Regexp.last_match[1]
67
+ elsif name.eql? :family_name
68
+ Regexp.last_match[2]
69
+ end
70
+ elsif (fn =~ /^(\w+), (\w+)\.?$/)
71
+ if name.eql? :given_name
72
+ Regexp.last_match[2]
73
+ elsif name.eql? :family_name
74
+ Regexp.last_match[1]
75
+ end
76
+ end
77
+ end
78
+ end
79
+ end
80
+
81
+ has_many :additional_name, :honorific_prefix, :honorific_suffix
20
82
  end
21
83
 
84
+ has_one! :class
85
+
86
+ def organization?
87
+ if org
88
+ fn == org[:organization_name]
89
+ end
90
+ end
91
+ alias company? organization?
92
+
93
+ # http://tools.ietf.org/html/rfc2426
94
+ # TODO: Make this less ugly
95
+ def to_vcard
96
+ @vcard = "BEGIN:VCARD\x0D\x0AVERSION:3.0\x0D\x0APRODID:#{HMachine::PRODID}"
97
+ @vcard += "\x0D\x0ANAME:#{node.document.css('title').first.content}" if node.document.css('title').first
98
+ @vcard += "\x0D\x0ASOURCE:#{@source}" if @source
99
+ @vcard += "\x0D\x0AFN:#{fn}" if fn
100
+ @vcard += n_vcard if n
101
+ if nickname
102
+ @vcard += "\x0D\x0ANICKNAME" + (nickname.respond_to?(:join) ? nickname.join(',') : nickname)
103
+ end
104
+ @vcard += "\x0D\x0APHOTO;VALUE=uri:#{photo.first}" if photo
105
+ @vcard += "\x0D\x0ABDAY:#{bday.strftime('%Y-%m-%d')}" if bday
106
+ if adr
107
+ if adr.respond_to?(:each)
108
+ adr.each { |address| @vcard += adr_vcard(address) }
109
+ else
110
+ @vcard += adr_vcard(adr)
111
+ end
112
+ end
113
+ if tel
114
+ if tel.respond_to?(:join)
115
+ tel.each { |phone| @vcard += "\x0D\x0ATEL#{type_value_vcard(phone)}" }
116
+ else
117
+ @vcard += "\x0D\x0ATEL#{type_value_vcard(phone)}"
118
+ end
119
+ end
120
+ if email
121
+ if email.respond_to?(:join)
122
+ email.each { |mail| @vcard += "\x0D\x0AEMAIL#{type_value_vcard(mail)}" }
123
+ else
124
+ @vcard += "\x0D\x0AEMAIL#{type_value_vcard(email)}"
125
+ end
126
+ end
127
+ mailer.each {|software| @vcard += "\x0D\x0AMAILER:#{software}" } if mailer
128
+ @vcard += "\x0D\x0ATZ:#{tz}" if tz
129
+ @vcard += "\x0D\x0AGEO:#{geo[:latitude]};#{geo[:longitude]}" if geo
130
+ title.each {|titl| @vcard += "\x0D\x0ATITLE:#{titl}" } if title
131
+ role.each {|roll| @vcard += "\x0D\x0AROLE:#{roll}" } if role
132
+ logo.each { |log| @vcard += "\x0D\x0ALOGO;VALUE=uri:#{log}" } if logo
133
+ agent.each {|mrsmith| @vcard += "\x0D\x0AAGENT:#{mrsmith}" } if agent
134
+ @vcard += "\x0D\x0AORG:#{org[:organization_name]};#{org[:organization_unit]}" if org
135
+ @vcard += "\x0D\x0ACATEGORIES:#{join_vcard_values(category).upcase}" if category
136
+ note.each {|notes| @vcard += "\x0D\x0ANOTE:#{notes}" } if note
137
+ @vcard += "\x0D\x0AREV:#{rev.iso8601}" if rev
138
+ @vcard += "\x0D\x0ASORT-STRING:#{sort_string}" if sort_string
139
+ sound.each {|audio| @vcard += "\x0D\x0ASOUND;VALUE=uri:#{audio}" } if sound
140
+ @vcard += "\x0D\x0AUID:#{uid}" if uid
141
+ url.each {|web| @vcard += "\x0D\x0AURL:#{web}" } if url
142
+ @vcard += "\x0D\x0ACLASS:#{to_h[:class]}" if has_property?(:class)
143
+ key.each {|auth| @vcard += "\x0D\x0AKEY:#{key}" }if key
144
+ @vcard += "\x0D\x0AEND:VCARD\x0D\x0A\x0D\x0A"
145
+ end
146
+
147
+ private
148
+
149
+ def join_vcard_values(values)
150
+ values.respond_to?(:join) ? values.join(',') : values.to_s.strip
151
+ end
152
+
153
+ def type_value_vcard(communication)
154
+ if communication.respond_to?(:keys)
155
+ _comm = ";TYPE=#{join_vcard_values(communication[:type])}:#{communication[:value]}"
156
+ else
157
+ _comm = ":#{communication}"
158
+ end
159
+ _comm
160
+ end
161
+
162
+ def adr_vcard(address)
163
+ addresses = "\x0D\x0AADR"
164
+ adr_vcard = {}
165
+ adr.to_h.each_pair { |key,value| adr_vcard[key] = join_vcard_values(value) }
166
+ if address[:type]
167
+ addresses += ";TYPE=" + (address[:type].respond_to?(:join) ? address[:type].join(',') : address[:type].to_s)
168
+ end
169
+ addresses += ":#{address[:post_office_box]};#{adr_vcard[:extended_address]};#{adr_vcard[:street_address]};"
170
+ addresses += "#{adr_vcard[:locality]};#{adr_vcard[:region]};#{address[:postal_code]};#{adr_vcard[:country_name]}"
171
+ end
172
+
173
+ def n_vcard
174
+ n_vcard = {}
175
+ n.each_pair { |key,value| n_vcard[key] = join_vcard_values(value) }
176
+ "\x0D\x0AN:#{n_vcard[:family_name]};#{n_vcard[:given_name]};#{n_vcard[:additional_name]};"+\
177
+ "#{n_vcard[:honorific_prefix]};#{n_vcard[:honorific_suffix]}"
178
+ end
179
+
22
180
  end
23
181
  end
24
182
  end