microformats2 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.autotest ADDED
@@ -0,0 +1,23 @@
1
+ # -*- ruby -*-
2
+
3
+ require 'autotest/restart'
4
+
5
+ # Autotest.add_hook :initialize do |at|
6
+ # at.extra_files << "../some/external/dependency.rb"
7
+ #
8
+ # at.libs << ":../some/external"
9
+ #
10
+ # at.add_exception 'vendor'
11
+ #
12
+ # at.add_mapping(/dependency.rb/) do |f, _|
13
+ # at.files_matching(/test_.*rb$/)
14
+ # end
15
+ #
16
+ # %w(TestA TestB).each do |klass|
17
+ # at.extra_class_map[klass] = "test/test_misc.rb"
18
+ # end
19
+ # end
20
+
21
+ # Autotest.add_hook :run_command do |at|
22
+ # system "rake build"
23
+ # end
data/.gemtest ADDED
File without changes
data/History.txt ADDED
@@ -0,0 +1,6 @@
1
+ === 1.0.0 / 2011-06-14
2
+
3
+ * 1 major enhancement
4
+
5
+ * Birthday!
6
+
data/Manifest.txt ADDED
@@ -0,0 +1,8 @@
1
+ .autotest
2
+ History.txt
3
+ Manifest.txt
4
+ README.txt
5
+ Rakefile
6
+ bin/microformats2
7
+ lib/microformats2.rb
8
+ test/test_microformats2.rb
data/README.txt ADDED
@@ -0,0 +1,41 @@
1
+ # Microformats
2
+
3
+ http://github.com/veganstraightedge/microformats2
4
+
5
+ ## DESCRIPTION
6
+
7
+ Generic Microformats 2 Extractor
8
+
9
+ ## FEATURES/PROBLEMS
10
+
11
+ * parses and extracts [Microformats 2](http://microformats.org/wiki/microformats-2) syntax
12
+ * needs more test cases
13
+ * needs better docs
14
+
15
+ ## SYNOPSIS
16
+
17
+ Microformats2.parse(File.open("http://iamshane.html"))
18
+
19
+ ## REQUIREMENTS
20
+
21
+ * Hoe
22
+ * Nokogiri
23
+
24
+ ## INSTALL
25
+
26
+ sudo gem install microformats2
27
+
28
+ ## DEVELOPERS
29
+
30
+ After checking out the source, run:
31
+
32
+ rake newb
33
+
34
+ This task will install any missing dependencies,
35
+ run the tests/specs, and generate the RDoc.
36
+
37
+ ## LICENSE
38
+
39
+ PUBLIC DOMAIN.
40
+ Your heart is as free as the air you breathe.
41
+ The ground you stand on is liberated territory.
data/Rakefile ADDED
@@ -0,0 +1,12 @@
1
+ # -*- ruby -*-
2
+
3
+ require 'rubygems'
4
+ require 'hoe'
5
+
6
+ Hoe.spec 'microformats2' do
7
+ developer('Shane Becker', 'veganstraightedge@gmail.com')
8
+ extra_deps << ['nokogiri', ">= 0"]
9
+ end
10
+
11
+
12
+ # vim: syntax=ruby
data/bin/microformats2 ADDED
@@ -0,0 +1,3 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ abort "you need to write me"
@@ -0,0 +1,165 @@
1
+ require 'nokogiri'
2
+ require 'time'
3
+ require 'date'
4
+
5
+ module Microformats2
6
+ VERSION = "1.0.0"
7
+
8
+ class LoadError < StandardError; end
9
+
10
+ def self.parse(html)
11
+ raise LoadError, "argument must be a String or File" unless [String, File].include?(html.class)
12
+
13
+ doc = Nokogiri::HTML(html)
14
+ microformats = Hash.new{|hash, key| hash[key] = Array.new}
15
+ doc.css("*[class^=h-]").each do |microformat|
16
+ constant_name = classify(microformat.attribute("class").to_s.gsub("-","_"))
17
+
18
+ if Object.const_defined?(constant_name)
19
+ klass = Object.const_get(constant_name)
20
+ else
21
+ klass = Class.new
22
+ Object.const_set constant_name, klass
23
+ end
24
+
25
+ obj = klass.new
26
+
27
+ # Add any properties to the object
28
+ add_properties(microformat, obj)
29
+ add_urls(microformat, obj)
30
+ add_dates(microformat, obj)
31
+ add_times(microformat, obj)
32
+
33
+ microformats[constant_name.downcase.to_sym] << obj
34
+ end
35
+
36
+ return microformats
37
+ end
38
+
39
+ def self.add_method(obj, method_name)
40
+ unless obj.respond_to?(method_name)
41
+ obj.class.class_eval { attr_accessor method_name }
42
+ end
43
+
44
+ obj
45
+ end
46
+
47
+ def self.add_properties(mf, obj)
48
+ %w(p n e i).each do |letter|
49
+ mf.css("*[class|=#{letter}]").each do |property|
50
+ property.attribute("class").to_s.split.each do |css_class|
51
+ if css_class =~ /^[pnei]/
52
+ css_class = css_class[2..-1].gsub("-","_")
53
+ method_name = css_class.gsub("-","_")
54
+ value = property.text.gsub(/\n+/, " ").gsub(/\s+/, " ").strip
55
+
56
+ add_method(obj, method_name)
57
+
58
+ if cur = obj.send(method_name)
59
+ if cur.kind_of? Array
60
+ cur << value
61
+ else
62
+ obj.send("#{method_name}=", [cur, value])
63
+ end
64
+ else
65
+ obj.send("#{method_name}=", value)
66
+ end
67
+ end
68
+ end
69
+ end
70
+ end
71
+ end
72
+
73
+ def self.add_urls(mf, obj)
74
+ mf.css("*[class*=u-]").each do |property|
75
+ property.attribute("class").to_s.split.each do |css_class|
76
+ if css_class =~ /^u/
77
+ css_class = css_class[2..-1].gsub("-","_")
78
+ method_name = css_class.gsub("-","_")
79
+ value = property.attribute("href").to_s
80
+
81
+ add_method(obj, method_name)
82
+
83
+ if cur = obj.send(method_name)
84
+ if cur.kind_of? Array
85
+ cur << value
86
+ else
87
+ obj.send("#{method_name}=", [cur, value])
88
+ end
89
+ else
90
+ obj.send("#{method_name}=", value)
91
+ end
92
+ end
93
+ end
94
+ end
95
+ end
96
+
97
+ def self.add_dates(mf, obj)
98
+ mf.css("*[class*=d-]").each do |property|
99
+ property.attribute("class").to_s.split.each do |css_class|
100
+ if css_class =~ /^d/
101
+ css_class = css_class[2..-1].gsub("-","_")
102
+ method_name = css_class.gsub("-","_")
103
+ value = DateTime.parse((property.attribute("title") || property.text).to_s)
104
+
105
+ add_method(obj, method_name)
106
+
107
+ if cur = obj.send(method_name)
108
+ if cur.kind_of? Array
109
+ cur << value
110
+ else
111
+ obj.send("#{method_name}=", [cur, value])
112
+ end
113
+ else
114
+ obj.send("#{method_name}=", value)
115
+ end
116
+ end
117
+ end
118
+ end
119
+ end
120
+
121
+ def self.add_times(mf, obj)
122
+ mf.css("*[class*=t-]").each do |property|
123
+ property.attribute("class").to_s.split.each do |css_class|
124
+ if css_class =~ /^t/
125
+ css_class = css_class[2..-1].gsub("-","_")
126
+ method_name = css_class.gsub("-","_")
127
+ value = Time.parse((property.attribute("title") || property.text).to_s)
128
+
129
+ add_method(obj, method_name)
130
+
131
+ if cur = obj.send(method_name)
132
+ if cur.kind_of? Array
133
+ cur << value
134
+ else
135
+ obj.send("#{method_name}=", [cur, value])
136
+ end
137
+ else
138
+ obj.send("#{method_name}=", value)
139
+ end
140
+ end
141
+ end
142
+ end
143
+ end
144
+
145
+ # Thank you Rails Developers for your unitentional contribution to this project
146
+ # File activesupport/lib/active_support/inflector/inflections.rb, line 206
147
+ def self.classify(str)
148
+ # strip out any leading schema name
149
+ camelize(singularize(str.to_s.sub(/.*\./, '')))
150
+ end
151
+
152
+ # File activesupport/lib/active_support/inflector/inflections.rb, line 148
153
+ def self.singularize(word)
154
+ result = word.to_s.dup
155
+ end
156
+
157
+ # File activesupport/lib/active_support/inflector/methods.rb, line 28
158
+ def self.camelize(lower_case_and_underscored_word, first_letter_in_uppercase = true)
159
+ if first_letter_in_uppercase
160
+ lower_case_and_underscored_word.to_s.gsub(/\/(.?)/) { "::#{$1.upcase}" }.gsub(/(?:^|_)(.)/) { $1.upcase }
161
+ else
162
+ lower_case_and_underscored_word.to_s[0].chr.downcase + camelize(lower_case_and_underscored_word)[1..-1]
163
+ end
164
+ end
165
+ end
@@ -0,0 +1,163 @@
1
+ require "test/unit"
2
+ require "microformats2"
3
+
4
+ class TestMicroformats2 < Test::Unit::TestCase
5
+ def test_throw_exception_on_non_string_params
6
+ assert_raise Microformats2::LoadError do
7
+ Microformats2.parse(nil)
8
+ end
9
+ end
10
+
11
+ def test_returns_hash_of_microformat_objects
12
+ result = Microformats2.parse("A String")
13
+ assert_equal Hash, result.class
14
+ end
15
+
16
+ def test_only_parse_microformats
17
+ result = Microformats2.parse("<html><body><p>Something</p></body></html>")
18
+ assert_equal 0, result.size
19
+ end
20
+
21
+ def test_extracts_hcard_from_an_html_file
22
+ hcard = <<-END
23
+ <html>
24
+ <head>
25
+ <title>Simple hCard</title>
26
+ </head>
27
+
28
+ <body>
29
+ <h1 class="h-card">Chris</h1>
30
+ </body>
31
+ </html>
32
+ END
33
+ result = Microformats2.parse(File.open(File.join(File.dirname(__FILE__), "hcard.html")))
34
+ assert_equal HCard, result[:hcard].first.class
35
+ end
36
+
37
+ def test_extracts_hcard_from_html
38
+ hcard = <<-END
39
+ <html>
40
+ <head>
41
+ <title>Simple hCard</title>
42
+ </head>
43
+
44
+ <body>
45
+ <h1 class="h-card">Chris</h1>
46
+ </body>
47
+ </html>
48
+ END
49
+ result = Microformats2.parse(hcard)
50
+ assert_equal HCard, result[:hcard].first.class
51
+ end
52
+
53
+ def test_constructs_properties_from_hcard
54
+ hcard = <<-END
55
+ <html>
56
+ <head>
57
+ <title>Simple hCard</title>
58
+ </head>
59
+
60
+ <body>
61
+ <h1 class="h-card">
62
+ <a class="p-fn u-url" href="http://factoryjoe.com/">
63
+ <span class="p-given-name">Chris</span>
64
+ <abbr class="p-additional-name">R.</abbr>
65
+ <span class="p-family-name">Messina</span>
66
+ </a>
67
+ </h1>
68
+ </body>
69
+ </html>
70
+ END
71
+ result = Microformats2.parse(hcard)
72
+ mycard = result[:hcard].first
73
+
74
+ assert_equal "Chris", mycard.given_name
75
+ assert_equal "R.", mycard.additional_name
76
+ assert_equal "Messina", mycard.family_name
77
+ assert_equal "Chris R. Messina", mycard.fn
78
+ end
79
+
80
+ def test_constructs_dates
81
+ hcard = <<-END
82
+ <html>
83
+ <head>
84
+ <title>Simple hCard</title>
85
+ </head>
86
+
87
+ <body>
88
+ <h1 class="h-card">
89
+ <span class="d-bday">1979-09-18</span>
90
+ <span class="d-epoch" title="1970-01-01">EPOCH!</span>
91
+ </h1>
92
+ </body>
93
+ </html>
94
+ END
95
+ result = Microformats2.parse(hcard)
96
+ mycard = result[:hcard].first
97
+
98
+ assert_equal DateTime.parse("1979-09-18"), mycard.bday
99
+ assert_equal DateTime.parse("1970-01-01"), mycard.epoch
100
+ end
101
+
102
+ def test_constructs_times
103
+ hcard = <<-END
104
+ <html>
105
+ <head>
106
+ <title>Simple hCard</title>
107
+ </head>
108
+
109
+ <body>
110
+ <h1 class="h-card">
111
+ <span class="t-start">09:30</span>
112
+ <span class="t-end" title="6:00">Leaving time</span>
113
+ </h1>
114
+ </body>
115
+ </html>
116
+ END
117
+ result = Microformats2.parse(hcard)
118
+ mycard = result[:hcard].first
119
+
120
+ assert_equal Time.parse("09:30"), mycard.start
121
+ assert_equal Time.parse("06:00"), mycard.end
122
+ end
123
+
124
+ def test_ignores_pattern_matches_not_at_the_beginning_of_class
125
+ hcard = <<-END
126
+ <html>
127
+ <head>
128
+ <title>Simple hCard</title>
129
+ </head>
130
+
131
+ <body>
132
+ <h1 class="h-card">
133
+ <span class="p-n-x">Chris</span>
134
+ </h1>
135
+ </body>
136
+ </html>
137
+ END
138
+ result = Microformats2.parse(hcard)
139
+ mycard = result[:hcard].first
140
+
141
+ assert_equal "Chris", mycard.n_x
142
+ assert mycard.n_x.is_a?(String)
143
+ end
144
+
145
+ def test_constructs_urls_from_hcard
146
+ hcard = <<-END
147
+ <html>
148
+ <head>
149
+ <title>Simple hCard</title>
150
+ </head>
151
+
152
+ <body>
153
+ <h1 class="h-card">
154
+ <a class="p-fn u-url" href="http://factoryjoe.com/">Chris</a>
155
+ </h1>
156
+ </body>
157
+ </html>
158
+ END
159
+ result = Microformats2.parse(hcard)
160
+ mycard = result[:hcard].first
161
+ assert_equal "http://factoryjoe.com/", mycard.url
162
+ end
163
+ end
metadata ADDED
@@ -0,0 +1,105 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: microformats2
3
+ version: !ruby/object:Gem::Version
4
+ hash: 23
5
+ prerelease:
6
+ segments:
7
+ - 1
8
+ - 0
9
+ - 0
10
+ version: 1.0.0
11
+ platform: ruby
12
+ authors:
13
+ - Shane Becker
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2011-06-25 00:00:00 Z
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: nokogiri
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ none: false
25
+ requirements:
26
+ - - ">="
27
+ - !ruby/object:Gem::Version
28
+ hash: 3
29
+ segments:
30
+ - 0
31
+ version: "0"
32
+ type: :runtime
33
+ version_requirements: *id001
34
+ - !ruby/object:Gem::Dependency
35
+ name: hoe
36
+ prerelease: false
37
+ requirement: &id002 !ruby/object:Gem::Requirement
38
+ none: false
39
+ requirements:
40
+ - - ~>
41
+ - !ruby/object:Gem::Version
42
+ hash: 17
43
+ segments:
44
+ - 2
45
+ - 9
46
+ version: "2.9"
47
+ type: :development
48
+ version_requirements: *id002
49
+ description: Generic Microformats 2 Extractor
50
+ email:
51
+ - veganstraightedge@gmail.com
52
+ executables:
53
+ - microformats2
54
+ extensions: []
55
+
56
+ extra_rdoc_files:
57
+ - History.txt
58
+ - Manifest.txt
59
+ - README.txt
60
+ files:
61
+ - .autotest
62
+ - History.txt
63
+ - Manifest.txt
64
+ - README.txt
65
+ - Rakefile
66
+ - bin/microformats2
67
+ - lib/microformats2.rb
68
+ - test/test_microformats2.rb
69
+ - .gemtest
70
+ homepage: http://github.com/veganstraightedge/microformats2
71
+ licenses: []
72
+
73
+ post_install_message:
74
+ rdoc_options:
75
+ - --main
76
+ - README.txt
77
+ require_paths:
78
+ - lib
79
+ required_ruby_version: !ruby/object:Gem::Requirement
80
+ none: false
81
+ requirements:
82
+ - - ">="
83
+ - !ruby/object:Gem::Version
84
+ hash: 3
85
+ segments:
86
+ - 0
87
+ version: "0"
88
+ required_rubygems_version: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ">="
92
+ - !ruby/object:Gem::Version
93
+ hash: 3
94
+ segments:
95
+ - 0
96
+ version: "0"
97
+ requirements: []
98
+
99
+ rubyforge_project: microformats2
100
+ rubygems_version: 1.8.5
101
+ signing_key:
102
+ specification_version: 3
103
+ summary: Generic Microformats 2 Extractor
104
+ test_files:
105
+ - test/test_microformats2.rb