microformats2 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
data/.autotest ADDED
@@ -0,0 +1,23 @@
1
+ # -*- ruby -*-
2
+
3
+ require 'autotest/restart'
4
+
5
+ # Autotest.add_hook :initialize do |at|
6
+ # at.extra_files << "../some/external/dependency.rb"
7
+ #
8
+ # at.libs << ":../some/external"
9
+ #
10
+ # at.add_exception 'vendor'
11
+ #
12
+ # at.add_mapping(/dependency.rb/) do |f, _|
13
+ # at.files_matching(/test_.*rb$/)
14
+ # end
15
+ #
16
+ # %w(TestA TestB).each do |klass|
17
+ # at.extra_class_map[klass] = "test/test_misc.rb"
18
+ # end
19
+ # end
20
+
21
+ # Autotest.add_hook :run_command do |at|
22
+ # system "rake build"
23
+ # end
data/.gemtest ADDED
File without changes
data/History.txt ADDED
@@ -0,0 +1,6 @@
1
+ === 1.0.0 / 2011-06-14
2
+
3
+ * 1 major enhancement
4
+
5
+ * Birthday!
6
+
data/Manifest.txt ADDED
@@ -0,0 +1,8 @@
1
+ .autotest
2
+ History.txt
3
+ Manifest.txt
4
+ README.txt
5
+ Rakefile
6
+ bin/microformats2
7
+ lib/microformats2.rb
8
+ test/test_microformats2.rb
data/README.txt ADDED
@@ -0,0 +1,41 @@
1
+ # Microformats
2
+
3
+ http://github.com/veganstraightedge/microformats2
4
+
5
+ ## DESCRIPTION
6
+
7
+ Generic Microformats 2 Extractor
8
+
9
+ ## FEATURES/PROBLEMS
10
+
11
+ * parses and extracts [Microformats 2](http://microformats.org/wiki/microformats-2) syntax
12
+ * needs more test cases
13
+ * needs better docs
14
+
15
+ ## SYNOPSIS
16
+
17
+ Microformats2.parse(File.open("http://iamshane.html"))
18
+
19
+ ## REQUIREMENTS
20
+
21
+ * Hoe
22
+ * Nokogiri
23
+
24
+ ## INSTALL
25
+
26
+ sudo gem install microformats2
27
+
28
+ ## DEVELOPERS
29
+
30
+ After checking out the source, run:
31
+
32
+ rake newb
33
+
34
+ This task will install any missing dependencies,
35
+ run the tests/specs, and generate the RDoc.
36
+
37
+ ## LICENSE
38
+
39
+ PUBLIC DOMAIN.
40
+ Your heart is as free as the air you breathe.
41
+ The ground you stand on is liberated territory.
data/Rakefile ADDED
@@ -0,0 +1,12 @@
1
+ # -*- ruby -*-
2
+
3
+ require 'rubygems'
4
+ require 'hoe'
5
+
6
+ Hoe.spec 'microformats2' do
7
+ developer('Shane Becker', 'veganstraightedge@gmail.com')
8
+ extra_deps << ['nokogiri', ">= 0"]
9
+ end
10
+
11
+
12
+ # vim: syntax=ruby
data/bin/microformats2 ADDED
@@ -0,0 +1,3 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ abort "you need to write me"
@@ -0,0 +1,165 @@
1
+ require 'nokogiri'
2
+ require 'time'
3
+ require 'date'
4
+
5
+ module Microformats2
6
+ VERSION = "1.0.0"
7
+
8
+ class LoadError < StandardError; end
9
+
10
+ def self.parse(html)
11
+ raise LoadError, "argument must be a String or File" unless [String, File].include?(html.class)
12
+
13
+ doc = Nokogiri::HTML(html)
14
+ microformats = Hash.new{|hash, key| hash[key] = Array.new}
15
+ doc.css("*[class^=h-]").each do |microformat|
16
+ constant_name = classify(microformat.attribute("class").to_s.gsub("-","_"))
17
+
18
+ if Object.const_defined?(constant_name)
19
+ klass = Object.const_get(constant_name)
20
+ else
21
+ klass = Class.new
22
+ Object.const_set constant_name, klass
23
+ end
24
+
25
+ obj = klass.new
26
+
27
+ # Add any properties to the object
28
+ add_properties(microformat, obj)
29
+ add_urls(microformat, obj)
30
+ add_dates(microformat, obj)
31
+ add_times(microformat, obj)
32
+
33
+ microformats[constant_name.downcase.to_sym] << obj
34
+ end
35
+
36
+ return microformats
37
+ end
38
+
39
+ def self.add_method(obj, method_name)
40
+ unless obj.respond_to?(method_name)
41
+ obj.class.class_eval { attr_accessor method_name }
42
+ end
43
+
44
+ obj
45
+ end
46
+
47
+ def self.add_properties(mf, obj)
48
+ %w(p n e i).each do |letter|
49
+ mf.css("*[class|=#{letter}]").each do |property|
50
+ property.attribute("class").to_s.split.each do |css_class|
51
+ if css_class =~ /^[pnei]/
52
+ css_class = css_class[2..-1].gsub("-","_")
53
+ method_name = css_class.gsub("-","_")
54
+ value = property.text.gsub(/\n+/, " ").gsub(/\s+/, " ").strip
55
+
56
+ add_method(obj, method_name)
57
+
58
+ if cur = obj.send(method_name)
59
+ if cur.kind_of? Array
60
+ cur << value
61
+ else
62
+ obj.send("#{method_name}=", [cur, value])
63
+ end
64
+ else
65
+ obj.send("#{method_name}=", value)
66
+ end
67
+ end
68
+ end
69
+ end
70
+ end
71
+ end
72
+
73
+ def self.add_urls(mf, obj)
74
+ mf.css("*[class*=u-]").each do |property|
75
+ property.attribute("class").to_s.split.each do |css_class|
76
+ if css_class =~ /^u/
77
+ css_class = css_class[2..-1].gsub("-","_")
78
+ method_name = css_class.gsub("-","_")
79
+ value = property.attribute("href").to_s
80
+
81
+ add_method(obj, method_name)
82
+
83
+ if cur = obj.send(method_name)
84
+ if cur.kind_of? Array
85
+ cur << value
86
+ else
87
+ obj.send("#{method_name}=", [cur, value])
88
+ end
89
+ else
90
+ obj.send("#{method_name}=", value)
91
+ end
92
+ end
93
+ end
94
+ end
95
+ end
96
+
97
+ def self.add_dates(mf, obj)
98
+ mf.css("*[class*=d-]").each do |property|
99
+ property.attribute("class").to_s.split.each do |css_class|
100
+ if css_class =~ /^d/
101
+ css_class = css_class[2..-1].gsub("-","_")
102
+ method_name = css_class.gsub("-","_")
103
+ value = DateTime.parse((property.attribute("title") || property.text).to_s)
104
+
105
+ add_method(obj, method_name)
106
+
107
+ if cur = obj.send(method_name)
108
+ if cur.kind_of? Array
109
+ cur << value
110
+ else
111
+ obj.send("#{method_name}=", [cur, value])
112
+ end
113
+ else
114
+ obj.send("#{method_name}=", value)
115
+ end
116
+ end
117
+ end
118
+ end
119
+ end
120
+
121
+ def self.add_times(mf, obj)
122
+ mf.css("*[class*=t-]").each do |property|
123
+ property.attribute("class").to_s.split.each do |css_class|
124
+ if css_class =~ /^t/
125
+ css_class = css_class[2..-1].gsub("-","_")
126
+ method_name = css_class.gsub("-","_")
127
+ value = Time.parse((property.attribute("title") || property.text).to_s)
128
+
129
+ add_method(obj, method_name)
130
+
131
+ if cur = obj.send(method_name)
132
+ if cur.kind_of? Array
133
+ cur << value
134
+ else
135
+ obj.send("#{method_name}=", [cur, value])
136
+ end
137
+ else
138
+ obj.send("#{method_name}=", value)
139
+ end
140
+ end
141
+ end
142
+ end
143
+ end
144
+
145
+ # Thank you Rails Developers for your unitentional contribution to this project
146
+ # File activesupport/lib/active_support/inflector/inflections.rb, line 206
147
+ def self.classify(str)
148
+ # strip out any leading schema name
149
+ camelize(singularize(str.to_s.sub(/.*\./, '')))
150
+ end
151
+
152
+ # File activesupport/lib/active_support/inflector/inflections.rb, line 148
153
+ def self.singularize(word)
154
+ result = word.to_s.dup
155
+ end
156
+
157
+ # File activesupport/lib/active_support/inflector/methods.rb, line 28
158
+ def self.camelize(lower_case_and_underscored_word, first_letter_in_uppercase = true)
159
+ if first_letter_in_uppercase
160
+ lower_case_and_underscored_word.to_s.gsub(/\/(.?)/) { "::#{$1.upcase}" }.gsub(/(?:^|_)(.)/) { $1.upcase }
161
+ else
162
+ lower_case_and_underscored_word.to_s[0].chr.downcase + camelize(lower_case_and_underscored_word)[1..-1]
163
+ end
164
+ end
165
+ end
@@ -0,0 +1,163 @@
1
+ require "test/unit"
2
+ require "microformats2"
3
+
4
+ class TestMicroformats2 < Test::Unit::TestCase
5
+ def test_throw_exception_on_non_string_params
6
+ assert_raise Microformats2::LoadError do
7
+ Microformats2.parse(nil)
8
+ end
9
+ end
10
+
11
+ def test_returns_hash_of_microformat_objects
12
+ result = Microformats2.parse("A String")
13
+ assert_equal Hash, result.class
14
+ end
15
+
16
+ def test_only_parse_microformats
17
+ result = Microformats2.parse("<html><body><p>Something</p></body></html>")
18
+ assert_equal 0, result.size
19
+ end
20
+
21
+ def test_extracts_hcard_from_an_html_file
22
+ hcard = <<-END
23
+ <html>
24
+ <head>
25
+ <title>Simple hCard</title>
26
+ </head>
27
+
28
+ <body>
29
+ <h1 class="h-card">Chris</h1>
30
+ </body>
31
+ </html>
32
+ END
33
+ result = Microformats2.parse(File.open(File.join(File.dirname(__FILE__), "hcard.html")))
34
+ assert_equal HCard, result[:hcard].first.class
35
+ end
36
+
37
+ def test_extracts_hcard_from_html
38
+ hcard = <<-END
39
+ <html>
40
+ <head>
41
+ <title>Simple hCard</title>
42
+ </head>
43
+
44
+ <body>
45
+ <h1 class="h-card">Chris</h1>
46
+ </body>
47
+ </html>
48
+ END
49
+ result = Microformats2.parse(hcard)
50
+ assert_equal HCard, result[:hcard].first.class
51
+ end
52
+
53
+ def test_constructs_properties_from_hcard
54
+ hcard = <<-END
55
+ <html>
56
+ <head>
57
+ <title>Simple hCard</title>
58
+ </head>
59
+
60
+ <body>
61
+ <h1 class="h-card">
62
+ <a class="p-fn u-url" href="http://factoryjoe.com/">
63
+ <span class="p-given-name">Chris</span>
64
+ <abbr class="p-additional-name">R.</abbr>
65
+ <span class="p-family-name">Messina</span>
66
+ </a>
67
+ </h1>
68
+ </body>
69
+ </html>
70
+ END
71
+ result = Microformats2.parse(hcard)
72
+ mycard = result[:hcard].first
73
+
74
+ assert_equal "Chris", mycard.given_name
75
+ assert_equal "R.", mycard.additional_name
76
+ assert_equal "Messina", mycard.family_name
77
+ assert_equal "Chris R. Messina", mycard.fn
78
+ end
79
+
80
+ def test_constructs_dates
81
+ hcard = <<-END
82
+ <html>
83
+ <head>
84
+ <title>Simple hCard</title>
85
+ </head>
86
+
87
+ <body>
88
+ <h1 class="h-card">
89
+ <span class="d-bday">1979-09-18</span>
90
+ <span class="d-epoch" title="1970-01-01">EPOCH!</span>
91
+ </h1>
92
+ </body>
93
+ </html>
94
+ END
95
+ result = Microformats2.parse(hcard)
96
+ mycard = result[:hcard].first
97
+
98
+ assert_equal DateTime.parse("1979-09-18"), mycard.bday
99
+ assert_equal DateTime.parse("1970-01-01"), mycard.epoch
100
+ end
101
+
102
+ def test_constructs_times
103
+ hcard = <<-END
104
+ <html>
105
+ <head>
106
+ <title>Simple hCard</title>
107
+ </head>
108
+
109
+ <body>
110
+ <h1 class="h-card">
111
+ <span class="t-start">09:30</span>
112
+ <span class="t-end" title="6:00">Leaving time</span>
113
+ </h1>
114
+ </body>
115
+ </html>
116
+ END
117
+ result = Microformats2.parse(hcard)
118
+ mycard = result[:hcard].first
119
+
120
+ assert_equal Time.parse("09:30"), mycard.start
121
+ assert_equal Time.parse("06:00"), mycard.end
122
+ end
123
+
124
+ def test_ignores_pattern_matches_not_at_the_beginning_of_class
125
+ hcard = <<-END
126
+ <html>
127
+ <head>
128
+ <title>Simple hCard</title>
129
+ </head>
130
+
131
+ <body>
132
+ <h1 class="h-card">
133
+ <span class="p-n-x">Chris</span>
134
+ </h1>
135
+ </body>
136
+ </html>
137
+ END
138
+ result = Microformats2.parse(hcard)
139
+ mycard = result[:hcard].first
140
+
141
+ assert_equal "Chris", mycard.n_x
142
+ assert mycard.n_x.is_a?(String)
143
+ end
144
+
145
+ def test_constructs_urls_from_hcard
146
+ hcard = <<-END
147
+ <html>
148
+ <head>
149
+ <title>Simple hCard</title>
150
+ </head>
151
+
152
+ <body>
153
+ <h1 class="h-card">
154
+ <a class="p-fn u-url" href="http://factoryjoe.com/">Chris</a>
155
+ </h1>
156
+ </body>
157
+ </html>
158
+ END
159
+ result = Microformats2.parse(hcard)
160
+ mycard = result[:hcard].first
161
+ assert_equal "http://factoryjoe.com/", mycard.url
162
+ end
163
+ end
metadata ADDED
@@ -0,0 +1,105 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: microformats2
3
+ version: !ruby/object:Gem::Version
4
+ hash: 23
5
+ prerelease:
6
+ segments:
7
+ - 1
8
+ - 0
9
+ - 0
10
+ version: 1.0.0
11
+ platform: ruby
12
+ authors:
13
+ - Shane Becker
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2011-06-25 00:00:00 Z
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: nokogiri
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ none: false
25
+ requirements:
26
+ - - ">="
27
+ - !ruby/object:Gem::Version
28
+ hash: 3
29
+ segments:
30
+ - 0
31
+ version: "0"
32
+ type: :runtime
33
+ version_requirements: *id001
34
+ - !ruby/object:Gem::Dependency
35
+ name: hoe
36
+ prerelease: false
37
+ requirement: &id002 !ruby/object:Gem::Requirement
38
+ none: false
39
+ requirements:
40
+ - - ~>
41
+ - !ruby/object:Gem::Version
42
+ hash: 17
43
+ segments:
44
+ - 2
45
+ - 9
46
+ version: "2.9"
47
+ type: :development
48
+ version_requirements: *id002
49
+ description: Generic Microformats 2 Extractor
50
+ email:
51
+ - veganstraightedge@gmail.com
52
+ executables:
53
+ - microformats2
54
+ extensions: []
55
+
56
+ extra_rdoc_files:
57
+ - History.txt
58
+ - Manifest.txt
59
+ - README.txt
60
+ files:
61
+ - .autotest
62
+ - History.txt
63
+ - Manifest.txt
64
+ - README.txt
65
+ - Rakefile
66
+ - bin/microformats2
67
+ - lib/microformats2.rb
68
+ - test/test_microformats2.rb
69
+ - .gemtest
70
+ homepage: http://github.com/veganstraightedge/microformats2
71
+ licenses: []
72
+
73
+ post_install_message:
74
+ rdoc_options:
75
+ - --main
76
+ - README.txt
77
+ require_paths:
78
+ - lib
79
+ required_ruby_version: !ruby/object:Gem::Requirement
80
+ none: false
81
+ requirements:
82
+ - - ">="
83
+ - !ruby/object:Gem::Version
84
+ hash: 3
85
+ segments:
86
+ - 0
87
+ version: "0"
88
+ required_rubygems_version: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ">="
92
+ - !ruby/object:Gem::Version
93
+ hash: 3
94
+ segments:
95
+ - 0
96
+ version: "0"
97
+ requirements: []
98
+
99
+ rubyforge_project: microformats2
100
+ rubygems_version: 1.8.5
101
+ signing_key:
102
+ specification_version: 3
103
+ summary: Generic Microformats 2 Extractor
104
+ test_files:
105
+ - test/test_microformats2.rb