hmachine 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +3 -0
- data/Gemfile +9 -0
- data/LICENSE +20 -0
- data/README.md +21 -0
- data/Rakefile +44 -0
- data/lib/hmachine/microformat/base.rb +17 -0
- data/lib/hmachine/microformat/hcard.rb +24 -0
- data/lib/hmachine/microformat.rb +30 -0
- data/lib/hmachine.rb +21 -0
- data/test/fixtures/hcard/commercenet.html +21 -0
- data/test/fixtures/hcard/geo.html +28 -0
- data/test/hmachine_test.rb +17 -0
- data/test/microformat/hcard_test.rb +43 -0
- data/test/microformat_test.rb +42 -0
- data/test/test_helper.rb +14 -0
- metadata +83 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2010 Mark Wunsch
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
# hMachine
|
2
|
+
|
3
|
+
**Ruby microformat parser**
|
4
|
+
|
5
|
+
## Warning
|
6
|
+
|
7
|
+
This is so little right now. It's not even close to being done. Don't touch it until it has at least a minor version number (Right now it is v0.0.1). You've been warned.
|
8
|
+
|
9
|
+
It should be:
|
10
|
+
|
11
|
+
+ A fully featured microformat parser, with support for every microformat
|
12
|
+
+ A CLI for fetching microformats from a url or a string of html
|
13
|
+
+ Extensions so you can do something like: `String.is_a_valid? :hcard` in your tests
|
14
|
+
+ Export microformats to other standards. hCard => vCard.
|
15
|
+
+ HTML outlininer (using HTML5 sectioning)
|
16
|
+
|
17
|
+
Maybe some more than just that. It should be your lowercase-semantic-web friend.
|
18
|
+
|
19
|
+
## License
|
20
|
+
|
21
|
+
hMachine is licensed under the [MIT License](http://creativecommons.org/licenses/MIT/) and is Copyright (c) 2010 Mark Wunsch.
|
data/Rakefile
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
$LOAD_PATH.unshift File.join(File.dirname(__FILE__), 'lib')
|
2
|
+
require 'hmachine'
|
3
|
+
|
4
|
+
require 'rake'
|
5
|
+
|
6
|
+
task :default => :test
|
7
|
+
|
8
|
+
require 'rake/testtask'
|
9
|
+
Rake::TestTask.new do |t|
|
10
|
+
t.libs << "test"
|
11
|
+
t.pattern = 'test/**/*_test.rb'
|
12
|
+
t.verbose = false
|
13
|
+
end
|
14
|
+
|
15
|
+
begin
|
16
|
+
require 'jeweler'
|
17
|
+
Jeweler::Tasks.new do |gemspec|
|
18
|
+
gemspec.name = "hmachine"
|
19
|
+
gemspec.summary = "Ruby microformat parser"
|
20
|
+
gemspec.description = "A Ruby microformat parser powered by Nokogiri"
|
21
|
+
gemspec.version = HMachine::VERSION
|
22
|
+
gemspec.homepage = "http://github.com/mwunsch/hmachine"
|
23
|
+
gemspec.authors = ["Mark Wunsch"]
|
24
|
+
gemspec.email = ["mark@markwunsch.com"]
|
25
|
+
gemspec.add_dependency 'nokogiri'
|
26
|
+
end
|
27
|
+
Jeweler::GemcutterTasks.new
|
28
|
+
rescue LoadError
|
29
|
+
puts "Jeweler not available. Install it with: gem install jeweler"
|
30
|
+
end
|
31
|
+
|
32
|
+
require 'rake/rdoctask'
|
33
|
+
Rake::RDocTask.new do |rdoc|
|
34
|
+
rdoc.rdoc_dir = 'doc'
|
35
|
+
rdoc.title = 'hMachine'
|
36
|
+
rdoc.main = 'README.md'
|
37
|
+
rdoc.rdoc_files.include('README.*', 'lib/**/*.rb', 'LICENSE')
|
38
|
+
rdoc.options << '--inline-source'
|
39
|
+
end
|
40
|
+
|
41
|
+
desc "Open an irb session preloaded with this library"
|
42
|
+
task :console do
|
43
|
+
sh "irb -rubygems -I lib -r hmachine"
|
44
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module HMachine
|
2
|
+
module Microformat
|
3
|
+
class HCard < Base
|
4
|
+
|
5
|
+
ROOT_CLASS = "vcard"
|
6
|
+
ROOT_SELECTOR = ".#{ROOT_CLASS}"
|
7
|
+
WIKI_URL = "http://microformats.org/wiki/hcard"
|
8
|
+
|
9
|
+
def initialize(node)
|
10
|
+
raise "hCard not found in node" unless self.class.validate(node)
|
11
|
+
@node = node
|
12
|
+
end
|
13
|
+
|
14
|
+
def to_vcard
|
15
|
+
# convert to vcard
|
16
|
+
end
|
17
|
+
|
18
|
+
def self.infer_n_from_fn(fn)
|
19
|
+
# ...
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
require 'hmachine/microformat/base'
|
2
|
+
require 'hmachine/microformat/hcard'
|
3
|
+
|
4
|
+
module HMachine
|
5
|
+
module Microformat
|
6
|
+
|
7
|
+
def self.find_hcard(html)
|
8
|
+
doc = HMachine.get_document(html)
|
9
|
+
find_in_node(HCard, doc)
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.find_all(html)
|
13
|
+
find_hcard html
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.find_in_node(microformat, node)
|
17
|
+
microformats = []
|
18
|
+
node.css(microformat::ROOT_SELECTOR).each do |node|
|
19
|
+
microformats << create_for_node(microformat, node) if microformat.validate(node)
|
20
|
+
end
|
21
|
+
microformats
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.create_for_node(microformat, node)
|
25
|
+
return unless microformat.validate(node)
|
26
|
+
microformat.new node
|
27
|
+
end
|
28
|
+
|
29
|
+
end
|
30
|
+
end
|
data/lib/hmachine.rb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'uri'
|
2
|
+
require 'nokogiri'
|
3
|
+
|
4
|
+
require 'hmachine/microformat'
|
5
|
+
|
6
|
+
module HMachine
|
7
|
+
VERSION = "0.0.1"
|
8
|
+
|
9
|
+
def self.find(document)
|
10
|
+
html = get_document(document)
|
11
|
+
Microformat.find_all html
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.find_with_url(url)
|
15
|
+
# open url and call find method on resulting document
|
16
|
+
end
|
17
|
+
|
18
|
+
def self.get_document(html)
|
19
|
+
html.is_a?(Nokogiri::XML::Node) ? html : Nokogiri::HTML.parse(html)
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
<!-- http://microformats.org/wiki/hcard#Live_example -->
|
2
|
+
<div class="vcard">
|
3
|
+
<a class="fn org url" href="http://www.commerce.net/">CommerceNet</a>
|
4
|
+
<div class="adr">
|
5
|
+
<span class="type">Work</span>:
|
6
|
+
<div class="street-address">169 University Avenue</div>
|
7
|
+
<span class="locality">Palo Alto</span>,
|
8
|
+
<abbr class="region" title="California">CA</abbr>
|
9
|
+
<span class="postal-code">94301</span>
|
10
|
+
<div class="country-name">USA</div>
|
11
|
+
</div>
|
12
|
+
<div class="tel">
|
13
|
+
<span class="type">Work</span> +1-650-289-4040
|
14
|
+
</div>
|
15
|
+
<div class="tel">
|
16
|
+
<span class="type">Fax</span> +1-650-289-4041
|
17
|
+
</div>
|
18
|
+
<div>Email:
|
19
|
+
<span class="email">info@commerce.net</span>
|
20
|
+
</div>
|
21
|
+
</div>
|
@@ -0,0 +1,28 @@
|
|
1
|
+
<!-- http://microformats.org/wiki/hcard-examples#GEO_parsing -->
|
2
|
+
<div class="vcard">
|
3
|
+
<span class="fn n">
|
4
|
+
<a class="url" href="http://t37.net">
|
5
|
+
<span class="given-name">Fréderic</span>
|
6
|
+
<span class="family-name">de Villamil</span>
|
7
|
+
</a>
|
8
|
+
</span>
|
9
|
+
<span class="nickname">neuro</span>
|
10
|
+
<a class="email" href="mailto:neuroNOSPAM@t37.net">
|
11
|
+
<span class="type">pref</span><span>erred email</span>
|
12
|
+
</a>
|
13
|
+
<span class="org">Omatis</span>
|
14
|
+
<span class="adr">
|
15
|
+
<abbr class="type" title="dom">France</abbr>
|
16
|
+
<span class="type">home</span> address
|
17
|
+
<abbr class="type" title="postal">mail</abbr> and
|
18
|
+
<abbr class="type" title="parcel">shipments</abbr>:
|
19
|
+
<span class="street-address">12 rue Danton</span>
|
20
|
+
<span class="locality">Le Kremlin-Bicetre</span>
|
21
|
+
<span class="postal-code">94270</span>
|
22
|
+
<span class="country-name">France</span>
|
23
|
+
</span>
|
24
|
+
<span class="geo">
|
25
|
+
<abbr class="latitude" title="48.816667">N 48° 81.6667</abbr>
|
26
|
+
<abbr class="longitude" title="2.366667">E 2° 36.6667</abbr>
|
27
|
+
</span>
|
28
|
+
</div>
|
@@ -0,0 +1,17 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), 'test_helper')
|
2
|
+
|
3
|
+
class HMachineTest < Test::Unit::TestCase
|
4
|
+
setup do
|
5
|
+
@html = get_fixture('hcard/commercenet.html')
|
6
|
+
end
|
7
|
+
|
8
|
+
test 'gets a Nokogiri doc for a string of HTML' do
|
9
|
+
doc = HMachine.get_document(@html)
|
10
|
+
assert doc.is_a?(Nokogiri::HTML::Document), "Document is a #{doc.class}"
|
11
|
+
end
|
12
|
+
|
13
|
+
test 'finds the microformats in a document' do
|
14
|
+
microformats = HMachine.find(@html)
|
15
|
+
assert microformats.respond_to? :length
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), '..', 'test_helper')
|
2
|
+
|
3
|
+
class HCardTest < Test::Unit::TestCase
|
4
|
+
setup do
|
5
|
+
@html = get_fixture('hcard/commercenet.html')
|
6
|
+
@node = Nokogiri::HTML.parse(@html).css(HMachine::Microformat::HCard::ROOT_SELECTOR)[0]
|
7
|
+
@hcard = HMachine::Microformat::HCard.new(@node)
|
8
|
+
end
|
9
|
+
|
10
|
+
describe 'Constants' do
|
11
|
+
test "hCard's root class" do
|
12
|
+
assert @hcard.class::ROOT_CLASS == 'vcard', "Root class is #{@hcard.class::ROOT_CLASS}"
|
13
|
+
end
|
14
|
+
|
15
|
+
test "hCard's root selector" do
|
16
|
+
assert @hcard.class::ROOT_SELECTOR == '.vcard', "Root selector is #{@hcard.class::ROOT_SELECTOR}"
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
describe "Inheritance" do
|
21
|
+
test "hCard's wiki url" do
|
22
|
+
assert @hcard.class.wiki_url == @hcard.class::WIKI_URL, "Wiki url is #{@hcard.class.wiki_url}"
|
23
|
+
end
|
24
|
+
|
25
|
+
test "hCard's validation" do
|
26
|
+
assert @hcard.class.validate(@node)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
test "rejects invalid nodes" do
|
31
|
+
assert_raise RuntimeError do
|
32
|
+
HMachine::Microformat::HCard.new(Nokogiri::HTML.parse(@html))
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
test 'retains original node' do
|
37
|
+
assert @hcard.node == @node
|
38
|
+
end
|
39
|
+
|
40
|
+
|
41
|
+
|
42
|
+
|
43
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), 'test_helper')
|
2
|
+
|
3
|
+
class MicroformatTest < Test::Unit::TestCase
|
4
|
+
setup do
|
5
|
+
@html = get_fixture('hcard/commercenet.html')
|
6
|
+
@document = Nokogiri::HTML.parse(@html)
|
7
|
+
@hcard_class = HMachine::Microformat::HCard
|
8
|
+
end
|
9
|
+
|
10
|
+
test 'creates a microformat for a given node' do
|
11
|
+
hcard = HMachine::Microformat.create_for_node(@hcard_class, @document.css(@hcard_class::ROOT_SELECTOR)[0])
|
12
|
+
assert hcard.is_a?(@hcard_class), "Created a #{hcard.class}"
|
13
|
+
end
|
14
|
+
|
15
|
+
test "rejects invalid nodes" do
|
16
|
+
hcard = HMachine::Microformat.create_for_node(@hcard_class, @document)
|
17
|
+
assert hcard.nil?
|
18
|
+
end
|
19
|
+
|
20
|
+
test 'finds a given microformat in a document' do
|
21
|
+
first_hcard = HMachine::Microformat.find_in_node(@hcard_class, @document)[0]
|
22
|
+
assert first_hcard.is_a?(@hcard_class), "Object is a #{first_hcard.class}"
|
23
|
+
end
|
24
|
+
|
25
|
+
test 'knows that there are multiple microformats in a document' do
|
26
|
+
hcards = HMachine::Microformat.find_in_node(@hcard_class, @document)
|
27
|
+
assert hcards.respond_to? :length
|
28
|
+
end
|
29
|
+
|
30
|
+
test 'finds all the microformats in a document' do
|
31
|
+
microformats = HMachine::Microformat.find_all(@document)
|
32
|
+
assert microformats.length == 1, "Number of Microformats in document: #{microformats.length}"
|
33
|
+
end
|
34
|
+
|
35
|
+
describe 'Find hCard' do
|
36
|
+
test 'document contains an hCard' do
|
37
|
+
first_hcard = HMachine::Microformat.find_hcard(@document)[0]
|
38
|
+
assert first_hcard.is_a?(@hcard_class), "Object is a #{first_hcard.class}"
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
data/test/test_helper.rb
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require File.join(File.dirname(__FILE__), "../vendor/gems/environment")
|
3
|
+
|
4
|
+
lib_path = File.join(File.dirname(__FILE__), '..', 'lib')
|
5
|
+
$LOAD_PATH.unshift lib_path unless $LOAD_PATH.include?(lib_path)
|
6
|
+
|
7
|
+
require 'contest'
|
8
|
+
require 'redgreen'
|
9
|
+
|
10
|
+
require 'hmachine'
|
11
|
+
|
12
|
+
def get_fixture(filename)
|
13
|
+
open(File.join(File.dirname(__FILE__), 'fixtures', "#{filename}")).read
|
14
|
+
end
|
metadata
ADDED
@@ -0,0 +1,83 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: hmachine
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Mark Wunsch
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2010-01-09 00:00:00 -05:00
|
13
|
+
default_executable:
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: nokogiri
|
17
|
+
type: :runtime
|
18
|
+
version_requirement:
|
19
|
+
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: "0"
|
24
|
+
version:
|
25
|
+
description: A Ruby microformat parser powered by Nokogiri
|
26
|
+
email:
|
27
|
+
- mark@markwunsch.com
|
28
|
+
executables: []
|
29
|
+
|
30
|
+
extensions: []
|
31
|
+
|
32
|
+
extra_rdoc_files:
|
33
|
+
- LICENSE
|
34
|
+
- README.md
|
35
|
+
files:
|
36
|
+
- .gitignore
|
37
|
+
- Gemfile
|
38
|
+
- LICENSE
|
39
|
+
- README.md
|
40
|
+
- Rakefile
|
41
|
+
- lib/hmachine.rb
|
42
|
+
- lib/hmachine/microformat.rb
|
43
|
+
- lib/hmachine/microformat/base.rb
|
44
|
+
- lib/hmachine/microformat/hcard.rb
|
45
|
+
- test/fixtures/hcard/commercenet.html
|
46
|
+
- test/fixtures/hcard/geo.html
|
47
|
+
- test/hmachine_test.rb
|
48
|
+
- test/microformat/hcard_test.rb
|
49
|
+
- test/microformat_test.rb
|
50
|
+
- test/test_helper.rb
|
51
|
+
has_rdoc: true
|
52
|
+
homepage: http://github.com/mwunsch/hmachine
|
53
|
+
licenses: []
|
54
|
+
|
55
|
+
post_install_message:
|
56
|
+
rdoc_options:
|
57
|
+
- --charset=UTF-8
|
58
|
+
require_paths:
|
59
|
+
- lib
|
60
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
61
|
+
requirements:
|
62
|
+
- - ">="
|
63
|
+
- !ruby/object:Gem::Version
|
64
|
+
version: "0"
|
65
|
+
version:
|
66
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
67
|
+
requirements:
|
68
|
+
- - ">="
|
69
|
+
- !ruby/object:Gem::Version
|
70
|
+
version: "0"
|
71
|
+
version:
|
72
|
+
requirements: []
|
73
|
+
|
74
|
+
rubyforge_project:
|
75
|
+
rubygems_version: 1.3.5
|
76
|
+
signing_key:
|
77
|
+
specification_version: 3
|
78
|
+
summary: Ruby microformat parser
|
79
|
+
test_files:
|
80
|
+
- test/hmachine_test.rb
|
81
|
+
- test/microformat/hcard_test.rb
|
82
|
+
- test/microformat_test.rb
|
83
|
+
- test/test_helper.rb
|