microformats2 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.autotest +23 -0
- data/.gemtest +0 -0
- data/History.txt +6 -0
- data/Manifest.txt +8 -0
- data/README.txt +41 -0
- data/Rakefile +12 -0
- data/bin/microformats2 +3 -0
- data/lib/microformats2.rb +165 -0
- data/test/test_microformats2.rb +163 -0
- metadata +105 -0
data/.autotest
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
# -*- ruby -*-
|
2
|
+
|
3
|
+
require 'autotest/restart'
|
4
|
+
|
5
|
+
# Autotest.add_hook :initialize do |at|
|
6
|
+
# at.extra_files << "../some/external/dependency.rb"
|
7
|
+
#
|
8
|
+
# at.libs << ":../some/external"
|
9
|
+
#
|
10
|
+
# at.add_exception 'vendor'
|
11
|
+
#
|
12
|
+
# at.add_mapping(/dependency.rb/) do |f, _|
|
13
|
+
# at.files_matching(/test_.*rb$/)
|
14
|
+
# end
|
15
|
+
#
|
16
|
+
# %w(TestA TestB).each do |klass|
|
17
|
+
# at.extra_class_map[klass] = "test/test_misc.rb"
|
18
|
+
# end
|
19
|
+
# end
|
20
|
+
|
21
|
+
# Autotest.add_hook :run_command do |at|
|
22
|
+
# system "rake build"
|
23
|
+
# end
|
data/.gemtest
ADDED
File without changes
|
data/History.txt
ADDED
data/Manifest.txt
ADDED
data/README.txt
ADDED
@@ -0,0 +1,41 @@
|
|
1
|
+
# Microformats
|
2
|
+
|
3
|
+
http://github.com/veganstraightedge/microformats2
|
4
|
+
|
5
|
+
## DESCRIPTION
|
6
|
+
|
7
|
+
Generic Microformats 2 Extractor
|
8
|
+
|
9
|
+
## FEATURES/PROBLEMS
|
10
|
+
|
11
|
+
* parses and extracts [Microformats 2](http://microformats.org/wiki/microformats-2) syntax
|
12
|
+
* needs more test cases
|
13
|
+
* needs better docs
|
14
|
+
|
15
|
+
## SYNOPSIS
|
16
|
+
|
17
|
+
Microformats2.parse(File.open("http://iamshane.html"))
|
18
|
+
|
19
|
+
## REQUIREMENTS
|
20
|
+
|
21
|
+
* Hoe
|
22
|
+
* Nokogiri
|
23
|
+
|
24
|
+
## INSTALL
|
25
|
+
|
26
|
+
sudo gem install microformats2
|
27
|
+
|
28
|
+
## DEVELOPERS
|
29
|
+
|
30
|
+
After checking out the source, run:
|
31
|
+
|
32
|
+
rake newb
|
33
|
+
|
34
|
+
This task will install any missing dependencies,
|
35
|
+
run the tests/specs, and generate the RDoc.
|
36
|
+
|
37
|
+
## LICENSE
|
38
|
+
|
39
|
+
PUBLIC DOMAIN.
|
40
|
+
Your heart is as free as the air you breathe.
|
41
|
+
The ground you stand on is liberated territory.
|
data/Rakefile
ADDED
data/bin/microformats2
ADDED
@@ -0,0 +1,165 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'time'
|
3
|
+
require 'date'
|
4
|
+
|
5
|
+
module Microformats2
|
6
|
+
VERSION = "1.0.0"
|
7
|
+
|
8
|
+
class LoadError < StandardError; end
|
9
|
+
|
10
|
+
def self.parse(html)
|
11
|
+
raise LoadError, "argument must be a String or File" unless [String, File].include?(html.class)
|
12
|
+
|
13
|
+
doc = Nokogiri::HTML(html)
|
14
|
+
microformats = Hash.new{|hash, key| hash[key] = Array.new}
|
15
|
+
doc.css("*[class^=h-]").each do |microformat|
|
16
|
+
constant_name = classify(microformat.attribute("class").to_s.gsub("-","_"))
|
17
|
+
|
18
|
+
if Object.const_defined?(constant_name)
|
19
|
+
klass = Object.const_get(constant_name)
|
20
|
+
else
|
21
|
+
klass = Class.new
|
22
|
+
Object.const_set constant_name, klass
|
23
|
+
end
|
24
|
+
|
25
|
+
obj = klass.new
|
26
|
+
|
27
|
+
# Add any properties to the object
|
28
|
+
add_properties(microformat, obj)
|
29
|
+
add_urls(microformat, obj)
|
30
|
+
add_dates(microformat, obj)
|
31
|
+
add_times(microformat, obj)
|
32
|
+
|
33
|
+
microformats[constant_name.downcase.to_sym] << obj
|
34
|
+
end
|
35
|
+
|
36
|
+
return microformats
|
37
|
+
end
|
38
|
+
|
39
|
+
def self.add_method(obj, method_name)
|
40
|
+
unless obj.respond_to?(method_name)
|
41
|
+
obj.class.class_eval { attr_accessor method_name }
|
42
|
+
end
|
43
|
+
|
44
|
+
obj
|
45
|
+
end
|
46
|
+
|
47
|
+
def self.add_properties(mf, obj)
|
48
|
+
%w(p n e i).each do |letter|
|
49
|
+
mf.css("*[class|=#{letter}]").each do |property|
|
50
|
+
property.attribute("class").to_s.split.each do |css_class|
|
51
|
+
if css_class =~ /^[pnei]/
|
52
|
+
css_class = css_class[2..-1].gsub("-","_")
|
53
|
+
method_name = css_class.gsub("-","_")
|
54
|
+
value = property.text.gsub(/\n+/, " ").gsub(/\s+/, " ").strip
|
55
|
+
|
56
|
+
add_method(obj, method_name)
|
57
|
+
|
58
|
+
if cur = obj.send(method_name)
|
59
|
+
if cur.kind_of? Array
|
60
|
+
cur << value
|
61
|
+
else
|
62
|
+
obj.send("#{method_name}=", [cur, value])
|
63
|
+
end
|
64
|
+
else
|
65
|
+
obj.send("#{method_name}=", value)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
def self.add_urls(mf, obj)
|
74
|
+
mf.css("*[class*=u-]").each do |property|
|
75
|
+
property.attribute("class").to_s.split.each do |css_class|
|
76
|
+
if css_class =~ /^u/
|
77
|
+
css_class = css_class[2..-1].gsub("-","_")
|
78
|
+
method_name = css_class.gsub("-","_")
|
79
|
+
value = property.attribute("href").to_s
|
80
|
+
|
81
|
+
add_method(obj, method_name)
|
82
|
+
|
83
|
+
if cur = obj.send(method_name)
|
84
|
+
if cur.kind_of? Array
|
85
|
+
cur << value
|
86
|
+
else
|
87
|
+
obj.send("#{method_name}=", [cur, value])
|
88
|
+
end
|
89
|
+
else
|
90
|
+
obj.send("#{method_name}=", value)
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
def self.add_dates(mf, obj)
|
98
|
+
mf.css("*[class*=d-]").each do |property|
|
99
|
+
property.attribute("class").to_s.split.each do |css_class|
|
100
|
+
if css_class =~ /^d/
|
101
|
+
css_class = css_class[2..-1].gsub("-","_")
|
102
|
+
method_name = css_class.gsub("-","_")
|
103
|
+
value = DateTime.parse((property.attribute("title") || property.text).to_s)
|
104
|
+
|
105
|
+
add_method(obj, method_name)
|
106
|
+
|
107
|
+
if cur = obj.send(method_name)
|
108
|
+
if cur.kind_of? Array
|
109
|
+
cur << value
|
110
|
+
else
|
111
|
+
obj.send("#{method_name}=", [cur, value])
|
112
|
+
end
|
113
|
+
else
|
114
|
+
obj.send("#{method_name}=", value)
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
def self.add_times(mf, obj)
|
122
|
+
mf.css("*[class*=t-]").each do |property|
|
123
|
+
property.attribute("class").to_s.split.each do |css_class|
|
124
|
+
if css_class =~ /^t/
|
125
|
+
css_class = css_class[2..-1].gsub("-","_")
|
126
|
+
method_name = css_class.gsub("-","_")
|
127
|
+
value = Time.parse((property.attribute("title") || property.text).to_s)
|
128
|
+
|
129
|
+
add_method(obj, method_name)
|
130
|
+
|
131
|
+
if cur = obj.send(method_name)
|
132
|
+
if cur.kind_of? Array
|
133
|
+
cur << value
|
134
|
+
else
|
135
|
+
obj.send("#{method_name}=", [cur, value])
|
136
|
+
end
|
137
|
+
else
|
138
|
+
obj.send("#{method_name}=", value)
|
139
|
+
end
|
140
|
+
end
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
# Thank you Rails Developers for your unitentional contribution to this project
|
146
|
+
# File activesupport/lib/active_support/inflector/inflections.rb, line 206
|
147
|
+
def self.classify(str)
|
148
|
+
# strip out any leading schema name
|
149
|
+
camelize(singularize(str.to_s.sub(/.*\./, '')))
|
150
|
+
end
|
151
|
+
|
152
|
+
# File activesupport/lib/active_support/inflector/inflections.rb, line 148
|
153
|
+
def self.singularize(word)
|
154
|
+
result = word.to_s.dup
|
155
|
+
end
|
156
|
+
|
157
|
+
# File activesupport/lib/active_support/inflector/methods.rb, line 28
|
158
|
+
def self.camelize(lower_case_and_underscored_word, first_letter_in_uppercase = true)
|
159
|
+
if first_letter_in_uppercase
|
160
|
+
lower_case_and_underscored_word.to_s.gsub(/\/(.?)/) { "::#{$1.upcase}" }.gsub(/(?:^|_)(.)/) { $1.upcase }
|
161
|
+
else
|
162
|
+
lower_case_and_underscored_word.to_s[0].chr.downcase + camelize(lower_case_and_underscored_word)[1..-1]
|
163
|
+
end
|
164
|
+
end
|
165
|
+
end
|
@@ -0,0 +1,163 @@
|
|
1
|
+
require "test/unit"
|
2
|
+
require "microformats2"
|
3
|
+
|
4
|
+
class TestMicroformats2 < Test::Unit::TestCase
|
5
|
+
def test_throw_exception_on_non_string_params
|
6
|
+
assert_raise Microformats2::LoadError do
|
7
|
+
Microformats2.parse(nil)
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
def test_returns_hash_of_microformat_objects
|
12
|
+
result = Microformats2.parse("A String")
|
13
|
+
assert_equal Hash, result.class
|
14
|
+
end
|
15
|
+
|
16
|
+
def test_only_parse_microformats
|
17
|
+
result = Microformats2.parse("<html><body><p>Something</p></body></html>")
|
18
|
+
assert_equal 0, result.size
|
19
|
+
end
|
20
|
+
|
21
|
+
def test_extracts_hcard_from_an_html_file
|
22
|
+
hcard = <<-END
|
23
|
+
<html>
|
24
|
+
<head>
|
25
|
+
<title>Simple hCard</title>
|
26
|
+
</head>
|
27
|
+
|
28
|
+
<body>
|
29
|
+
<h1 class="h-card">Chris</h1>
|
30
|
+
</body>
|
31
|
+
</html>
|
32
|
+
END
|
33
|
+
result = Microformats2.parse(File.open(File.join(File.dirname(__FILE__), "hcard.html")))
|
34
|
+
assert_equal HCard, result[:hcard].first.class
|
35
|
+
end
|
36
|
+
|
37
|
+
def test_extracts_hcard_from_html
|
38
|
+
hcard = <<-END
|
39
|
+
<html>
|
40
|
+
<head>
|
41
|
+
<title>Simple hCard</title>
|
42
|
+
</head>
|
43
|
+
|
44
|
+
<body>
|
45
|
+
<h1 class="h-card">Chris</h1>
|
46
|
+
</body>
|
47
|
+
</html>
|
48
|
+
END
|
49
|
+
result = Microformats2.parse(hcard)
|
50
|
+
assert_equal HCard, result[:hcard].first.class
|
51
|
+
end
|
52
|
+
|
53
|
+
def test_constructs_properties_from_hcard
|
54
|
+
hcard = <<-END
|
55
|
+
<html>
|
56
|
+
<head>
|
57
|
+
<title>Simple hCard</title>
|
58
|
+
</head>
|
59
|
+
|
60
|
+
<body>
|
61
|
+
<h1 class="h-card">
|
62
|
+
<a class="p-fn u-url" href="http://factoryjoe.com/">
|
63
|
+
<span class="p-given-name">Chris</span>
|
64
|
+
<abbr class="p-additional-name">R.</abbr>
|
65
|
+
<span class="p-family-name">Messina</span>
|
66
|
+
</a>
|
67
|
+
</h1>
|
68
|
+
</body>
|
69
|
+
</html>
|
70
|
+
END
|
71
|
+
result = Microformats2.parse(hcard)
|
72
|
+
mycard = result[:hcard].first
|
73
|
+
|
74
|
+
assert_equal "Chris", mycard.given_name
|
75
|
+
assert_equal "R.", mycard.additional_name
|
76
|
+
assert_equal "Messina", mycard.family_name
|
77
|
+
assert_equal "Chris R. Messina", mycard.fn
|
78
|
+
end
|
79
|
+
|
80
|
+
def test_constructs_dates
|
81
|
+
hcard = <<-END
|
82
|
+
<html>
|
83
|
+
<head>
|
84
|
+
<title>Simple hCard</title>
|
85
|
+
</head>
|
86
|
+
|
87
|
+
<body>
|
88
|
+
<h1 class="h-card">
|
89
|
+
<span class="d-bday">1979-09-18</span>
|
90
|
+
<span class="d-epoch" title="1970-01-01">EPOCH!</span>
|
91
|
+
</h1>
|
92
|
+
</body>
|
93
|
+
</html>
|
94
|
+
END
|
95
|
+
result = Microformats2.parse(hcard)
|
96
|
+
mycard = result[:hcard].first
|
97
|
+
|
98
|
+
assert_equal DateTime.parse("1979-09-18"), mycard.bday
|
99
|
+
assert_equal DateTime.parse("1970-01-01"), mycard.epoch
|
100
|
+
end
|
101
|
+
|
102
|
+
def test_constructs_times
|
103
|
+
hcard = <<-END
|
104
|
+
<html>
|
105
|
+
<head>
|
106
|
+
<title>Simple hCard</title>
|
107
|
+
</head>
|
108
|
+
|
109
|
+
<body>
|
110
|
+
<h1 class="h-card">
|
111
|
+
<span class="t-start">09:30</span>
|
112
|
+
<span class="t-end" title="6:00">Leaving time</span>
|
113
|
+
</h1>
|
114
|
+
</body>
|
115
|
+
</html>
|
116
|
+
END
|
117
|
+
result = Microformats2.parse(hcard)
|
118
|
+
mycard = result[:hcard].first
|
119
|
+
|
120
|
+
assert_equal Time.parse("09:30"), mycard.start
|
121
|
+
assert_equal Time.parse("06:00"), mycard.end
|
122
|
+
end
|
123
|
+
|
124
|
+
def test_ignores_pattern_matches_not_at_the_beginning_of_class
|
125
|
+
hcard = <<-END
|
126
|
+
<html>
|
127
|
+
<head>
|
128
|
+
<title>Simple hCard</title>
|
129
|
+
</head>
|
130
|
+
|
131
|
+
<body>
|
132
|
+
<h1 class="h-card">
|
133
|
+
<span class="p-n-x">Chris</span>
|
134
|
+
</h1>
|
135
|
+
</body>
|
136
|
+
</html>
|
137
|
+
END
|
138
|
+
result = Microformats2.parse(hcard)
|
139
|
+
mycard = result[:hcard].first
|
140
|
+
|
141
|
+
assert_equal "Chris", mycard.n_x
|
142
|
+
assert mycard.n_x.is_a?(String)
|
143
|
+
end
|
144
|
+
|
145
|
+
def test_constructs_urls_from_hcard
|
146
|
+
hcard = <<-END
|
147
|
+
<html>
|
148
|
+
<head>
|
149
|
+
<title>Simple hCard</title>
|
150
|
+
</head>
|
151
|
+
|
152
|
+
<body>
|
153
|
+
<h1 class="h-card">
|
154
|
+
<a class="p-fn u-url" href="http://factoryjoe.com/">Chris</a>
|
155
|
+
</h1>
|
156
|
+
</body>
|
157
|
+
</html>
|
158
|
+
END
|
159
|
+
result = Microformats2.parse(hcard)
|
160
|
+
mycard = result[:hcard].first
|
161
|
+
assert_equal "http://factoryjoe.com/", mycard.url
|
162
|
+
end
|
163
|
+
end
|
metadata
ADDED
@@ -0,0 +1,105 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: microformats2
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 23
|
5
|
+
prerelease:
|
6
|
+
segments:
|
7
|
+
- 1
|
8
|
+
- 0
|
9
|
+
- 0
|
10
|
+
version: 1.0.0
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Shane Becker
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2011-06-25 00:00:00 Z
|
19
|
+
dependencies:
|
20
|
+
- !ruby/object:Gem::Dependency
|
21
|
+
name: nokogiri
|
22
|
+
prerelease: false
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
none: false
|
25
|
+
requirements:
|
26
|
+
- - ">="
|
27
|
+
- !ruby/object:Gem::Version
|
28
|
+
hash: 3
|
29
|
+
segments:
|
30
|
+
- 0
|
31
|
+
version: "0"
|
32
|
+
type: :runtime
|
33
|
+
version_requirements: *id001
|
34
|
+
- !ruby/object:Gem::Dependency
|
35
|
+
name: hoe
|
36
|
+
prerelease: false
|
37
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
38
|
+
none: false
|
39
|
+
requirements:
|
40
|
+
- - ~>
|
41
|
+
- !ruby/object:Gem::Version
|
42
|
+
hash: 17
|
43
|
+
segments:
|
44
|
+
- 2
|
45
|
+
- 9
|
46
|
+
version: "2.9"
|
47
|
+
type: :development
|
48
|
+
version_requirements: *id002
|
49
|
+
description: Generic Microformats 2 Extractor
|
50
|
+
email:
|
51
|
+
- veganstraightedge@gmail.com
|
52
|
+
executables:
|
53
|
+
- microformats2
|
54
|
+
extensions: []
|
55
|
+
|
56
|
+
extra_rdoc_files:
|
57
|
+
- History.txt
|
58
|
+
- Manifest.txt
|
59
|
+
- README.txt
|
60
|
+
files:
|
61
|
+
- .autotest
|
62
|
+
- History.txt
|
63
|
+
- Manifest.txt
|
64
|
+
- README.txt
|
65
|
+
- Rakefile
|
66
|
+
- bin/microformats2
|
67
|
+
- lib/microformats2.rb
|
68
|
+
- test/test_microformats2.rb
|
69
|
+
- .gemtest
|
70
|
+
homepage: http://github.com/veganstraightedge/microformats2
|
71
|
+
licenses: []
|
72
|
+
|
73
|
+
post_install_message:
|
74
|
+
rdoc_options:
|
75
|
+
- --main
|
76
|
+
- README.txt
|
77
|
+
require_paths:
|
78
|
+
- lib
|
79
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
80
|
+
none: false
|
81
|
+
requirements:
|
82
|
+
- - ">="
|
83
|
+
- !ruby/object:Gem::Version
|
84
|
+
hash: 3
|
85
|
+
segments:
|
86
|
+
- 0
|
87
|
+
version: "0"
|
88
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
89
|
+
none: false
|
90
|
+
requirements:
|
91
|
+
- - ">="
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
hash: 3
|
94
|
+
segments:
|
95
|
+
- 0
|
96
|
+
version: "0"
|
97
|
+
requirements: []
|
98
|
+
|
99
|
+
rubyforge_project: microformats2
|
100
|
+
rubygems_version: 1.8.5
|
101
|
+
signing_key:
|
102
|
+
specification_version: 3
|
103
|
+
summary: Generic Microformats 2 Extractor
|
104
|
+
test_files:
|
105
|
+
- test/test_microformats2.rb
|