hmachine 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +3 -0
- data/Gemfile +9 -0
- data/LICENSE +20 -0
- data/README.md +21 -0
- data/Rakefile +44 -0
- data/lib/hmachine/microformat/base.rb +17 -0
- data/lib/hmachine/microformat/hcard.rb +24 -0
- data/lib/hmachine/microformat.rb +30 -0
- data/lib/hmachine.rb +21 -0
- data/test/fixtures/hcard/commercenet.html +21 -0
- data/test/fixtures/hcard/geo.html +28 -0
- data/test/hmachine_test.rb +17 -0
- data/test/microformat/hcard_test.rb +43 -0
- data/test/microformat_test.rb +42 -0
- data/test/test_helper.rb +14 -0
- metadata +83 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2010 Mark Wunsch
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
# hMachine
|
2
|
+
|
3
|
+
**Ruby microformat parser**
|
4
|
+
|
5
|
+
## Warning
|
6
|
+
|
7
|
+
This is so little right now. It's not even close to being done. Don't touch it until it has at least a minor version number (Right now it is v0.0.1). You've been warned.
|
8
|
+
|
9
|
+
It should be:
|
10
|
+
|
11
|
+
+ A fully featured microformat parser, with support for every microformat
|
12
|
+
+ A CLI for fetching microformats from a url or a string of html
|
13
|
+
+ Extensions so you can do something like: `String.is_a_valid? :hcard` in your tests
|
14
|
+
+ Export microformats to other standards. hCard => vCard.
|
15
|
+
+ HTML outlininer (using HTML5 sectioning)
|
16
|
+
|
17
|
+
Maybe some more than just that. It should be your lowercase-semantic-web friend.
|
18
|
+
|
19
|
+
## License
|
20
|
+
|
21
|
+
hMachine is licensed under the [MIT License](http://creativecommons.org/licenses/MIT/) and is Copyright (c) 2010 Mark Wunsch.
|
data/Rakefile
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
$LOAD_PATH.unshift File.join(File.dirname(__FILE__), 'lib')
|
2
|
+
require 'hmachine'
|
3
|
+
|
4
|
+
require 'rake'
|
5
|
+
|
6
|
+
task :default => :test
|
7
|
+
|
8
|
+
require 'rake/testtask'
|
9
|
+
Rake::TestTask.new do |t|
|
10
|
+
t.libs << "test"
|
11
|
+
t.pattern = 'test/**/*_test.rb'
|
12
|
+
t.verbose = false
|
13
|
+
end
|
14
|
+
|
15
|
+
begin
|
16
|
+
require 'jeweler'
|
17
|
+
Jeweler::Tasks.new do |gemspec|
|
18
|
+
gemspec.name = "hmachine"
|
19
|
+
gemspec.summary = "Ruby microformat parser"
|
20
|
+
gemspec.description = "A Ruby microformat parser powered by Nokogiri"
|
21
|
+
gemspec.version = HMachine::VERSION
|
22
|
+
gemspec.homepage = "http://github.com/mwunsch/hmachine"
|
23
|
+
gemspec.authors = ["Mark Wunsch"]
|
24
|
+
gemspec.email = ["mark@markwunsch.com"]
|
25
|
+
gemspec.add_dependency 'nokogiri'
|
26
|
+
end
|
27
|
+
Jeweler::GemcutterTasks.new
|
28
|
+
rescue LoadError
|
29
|
+
puts "Jeweler not available. Install it with: gem install jeweler"
|
30
|
+
end
|
31
|
+
|
32
|
+
require 'rake/rdoctask'
|
33
|
+
Rake::RDocTask.new do |rdoc|
|
34
|
+
rdoc.rdoc_dir = 'doc'
|
35
|
+
rdoc.title = 'hMachine'
|
36
|
+
rdoc.main = 'README.md'
|
37
|
+
rdoc.rdoc_files.include('README.*', 'lib/**/*.rb', 'LICENSE')
|
38
|
+
rdoc.options << '--inline-source'
|
39
|
+
end
|
40
|
+
|
41
|
+
desc "Open an irb session preloaded with this library"
|
42
|
+
task :console do
|
43
|
+
sh "irb -rubygems -I lib -r hmachine"
|
44
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module HMachine
|
2
|
+
module Microformat
|
3
|
+
class HCard < Base
|
4
|
+
|
5
|
+
ROOT_CLASS = "vcard"
|
6
|
+
ROOT_SELECTOR = ".#{ROOT_CLASS}"
|
7
|
+
WIKI_URL = "http://microformats.org/wiki/hcard"
|
8
|
+
|
9
|
+
def initialize(node)
|
10
|
+
raise "hCard not found in node" unless self.class.validate(node)
|
11
|
+
@node = node
|
12
|
+
end
|
13
|
+
|
14
|
+
def to_vcard
|
15
|
+
# convert to vcard
|
16
|
+
end
|
17
|
+
|
18
|
+
def self.infer_n_from_fn(fn)
|
19
|
+
# ...
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
require 'hmachine/microformat/base'
|
2
|
+
require 'hmachine/microformat/hcard'
|
3
|
+
|
4
|
+
module HMachine
|
5
|
+
module Microformat
|
6
|
+
|
7
|
+
def self.find_hcard(html)
|
8
|
+
doc = HMachine.get_document(html)
|
9
|
+
find_in_node(HCard, doc)
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.find_all(html)
|
13
|
+
find_hcard html
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.find_in_node(microformat, node)
|
17
|
+
microformats = []
|
18
|
+
node.css(microformat::ROOT_SELECTOR).each do |node|
|
19
|
+
microformats << create_for_node(microformat, node) if microformat.validate(node)
|
20
|
+
end
|
21
|
+
microformats
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.create_for_node(microformat, node)
|
25
|
+
return unless microformat.validate(node)
|
26
|
+
microformat.new node
|
27
|
+
end
|
28
|
+
|
29
|
+
end
|
30
|
+
end
|
data/lib/hmachine.rb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'uri'
|
2
|
+
require 'nokogiri'
|
3
|
+
|
4
|
+
require 'hmachine/microformat'
|
5
|
+
|
6
|
+
module HMachine
|
7
|
+
VERSION = "0.0.1"
|
8
|
+
|
9
|
+
def self.find(document)
|
10
|
+
html = get_document(document)
|
11
|
+
Microformat.find_all html
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.find_with_url(url)
|
15
|
+
# open url and call find method on resulting document
|
16
|
+
end
|
17
|
+
|
18
|
+
def self.get_document(html)
|
19
|
+
html.is_a?(Nokogiri::XML::Node) ? html : Nokogiri::HTML.parse(html)
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
<!-- http://microformats.org/wiki/hcard#Live_example -->
|
2
|
+
<div class="vcard">
|
3
|
+
<a class="fn org url" href="http://www.commerce.net/">CommerceNet</a>
|
4
|
+
<div class="adr">
|
5
|
+
<span class="type">Work</span>:
|
6
|
+
<div class="street-address">169 University Avenue</div>
|
7
|
+
<span class="locality">Palo Alto</span>,
|
8
|
+
<abbr class="region" title="California">CA</abbr>
|
9
|
+
<span class="postal-code">94301</span>
|
10
|
+
<div class="country-name">USA</div>
|
11
|
+
</div>
|
12
|
+
<div class="tel">
|
13
|
+
<span class="type">Work</span> +1-650-289-4040
|
14
|
+
</div>
|
15
|
+
<div class="tel">
|
16
|
+
<span class="type">Fax</span> +1-650-289-4041
|
17
|
+
</div>
|
18
|
+
<div>Email:
|
19
|
+
<span class="email">info@commerce.net</span>
|
20
|
+
</div>
|
21
|
+
</div>
|
@@ -0,0 +1,28 @@
|
|
1
|
+
<!-- http://microformats.org/wiki/hcard-examples#GEO_parsing -->
|
2
|
+
<div class="vcard">
|
3
|
+
<span class="fn n">
|
4
|
+
<a class="url" href="http://t37.net">
|
5
|
+
<span class="given-name">Fréderic</span>
|
6
|
+
<span class="family-name">de Villamil</span>
|
7
|
+
</a>
|
8
|
+
</span>
|
9
|
+
<span class="nickname">neuro</span>
|
10
|
+
<a class="email" href="mailto:neuroNOSPAM@t37.net">
|
11
|
+
<span class="type">pref</span><span>erred email</span>
|
12
|
+
</a>
|
13
|
+
<span class="org">Omatis</span>
|
14
|
+
<span class="adr">
|
15
|
+
<abbr class="type" title="dom">France</abbr>
|
16
|
+
<span class="type">home</span> address
|
17
|
+
<abbr class="type" title="postal">mail</abbr> and
|
18
|
+
<abbr class="type" title="parcel">shipments</abbr>:
|
19
|
+
<span class="street-address">12 rue Danton</span>
|
20
|
+
<span class="locality">Le Kremlin-Bicetre</span>
|
21
|
+
<span class="postal-code">94270</span>
|
22
|
+
<span class="country-name">France</span>
|
23
|
+
</span>
|
24
|
+
<span class="geo">
|
25
|
+
<abbr class="latitude" title="48.816667">N 48° 81.6667</abbr>
|
26
|
+
<abbr class="longitude" title="2.366667">E 2° 36.6667</abbr>
|
27
|
+
</span>
|
28
|
+
</div>
|
@@ -0,0 +1,17 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), 'test_helper')
|
2
|
+
|
3
|
+
class HMachineTest < Test::Unit::TestCase
|
4
|
+
setup do
|
5
|
+
@html = get_fixture('hcard/commercenet.html')
|
6
|
+
end
|
7
|
+
|
8
|
+
test 'gets a Nokogiri doc for a string of HTML' do
|
9
|
+
doc = HMachine.get_document(@html)
|
10
|
+
assert doc.is_a?(Nokogiri::HTML::Document), "Document is a #{doc.class}"
|
11
|
+
end
|
12
|
+
|
13
|
+
test 'finds the microformats in a document' do
|
14
|
+
microformats = HMachine.find(@html)
|
15
|
+
assert microformats.respond_to? :length
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), '..', 'test_helper')
|
2
|
+
|
3
|
+
class HCardTest < Test::Unit::TestCase
|
4
|
+
setup do
|
5
|
+
@html = get_fixture('hcard/commercenet.html')
|
6
|
+
@node = Nokogiri::HTML.parse(@html).css(HMachine::Microformat::HCard::ROOT_SELECTOR)[0]
|
7
|
+
@hcard = HMachine::Microformat::HCard.new(@node)
|
8
|
+
end
|
9
|
+
|
10
|
+
describe 'Constants' do
|
11
|
+
test "hCard's root class" do
|
12
|
+
assert @hcard.class::ROOT_CLASS == 'vcard', "Root class is #{@hcard.class::ROOT_CLASS}"
|
13
|
+
end
|
14
|
+
|
15
|
+
test "hCard's root selector" do
|
16
|
+
assert @hcard.class::ROOT_SELECTOR == '.vcard', "Root selector is #{@hcard.class::ROOT_SELECTOR}"
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
describe "Inheritance" do
|
21
|
+
test "hCard's wiki url" do
|
22
|
+
assert @hcard.class.wiki_url == @hcard.class::WIKI_URL, "Wiki url is #{@hcard.class.wiki_url}"
|
23
|
+
end
|
24
|
+
|
25
|
+
test "hCard's validation" do
|
26
|
+
assert @hcard.class.validate(@node)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
test "rejects invalid nodes" do
|
31
|
+
assert_raise RuntimeError do
|
32
|
+
HMachine::Microformat::HCard.new(Nokogiri::HTML.parse(@html))
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
test 'retains original node' do
|
37
|
+
assert @hcard.node == @node
|
38
|
+
end
|
39
|
+
|
40
|
+
|
41
|
+
|
42
|
+
|
43
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), 'test_helper')
|
2
|
+
|
3
|
+
class MicroformatTest < Test::Unit::TestCase
|
4
|
+
setup do
|
5
|
+
@html = get_fixture('hcard/commercenet.html')
|
6
|
+
@document = Nokogiri::HTML.parse(@html)
|
7
|
+
@hcard_class = HMachine::Microformat::HCard
|
8
|
+
end
|
9
|
+
|
10
|
+
test 'creates a microformat for a given node' do
|
11
|
+
hcard = HMachine::Microformat.create_for_node(@hcard_class, @document.css(@hcard_class::ROOT_SELECTOR)[0])
|
12
|
+
assert hcard.is_a?(@hcard_class), "Created a #{hcard.class}"
|
13
|
+
end
|
14
|
+
|
15
|
+
test "rejects invalid nodes" do
|
16
|
+
hcard = HMachine::Microformat.create_for_node(@hcard_class, @document)
|
17
|
+
assert hcard.nil?
|
18
|
+
end
|
19
|
+
|
20
|
+
test 'finds a given microformat in a document' do
|
21
|
+
first_hcard = HMachine::Microformat.find_in_node(@hcard_class, @document)[0]
|
22
|
+
assert first_hcard.is_a?(@hcard_class), "Object is a #{first_hcard.class}"
|
23
|
+
end
|
24
|
+
|
25
|
+
test 'knows that there are multiple microformats in a document' do
|
26
|
+
hcards = HMachine::Microformat.find_in_node(@hcard_class, @document)
|
27
|
+
assert hcards.respond_to? :length
|
28
|
+
end
|
29
|
+
|
30
|
+
test 'finds all the microformats in a document' do
|
31
|
+
microformats = HMachine::Microformat.find_all(@document)
|
32
|
+
assert microformats.length == 1, "Number of Microformats in document: #{microformats.length}"
|
33
|
+
end
|
34
|
+
|
35
|
+
describe 'Find hCard' do
|
36
|
+
test 'document contains an hCard' do
|
37
|
+
first_hcard = HMachine::Microformat.find_hcard(@document)[0]
|
38
|
+
assert first_hcard.is_a?(@hcard_class), "Object is a #{first_hcard.class}"
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
data/test/test_helper.rb
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require File.join(File.dirname(__FILE__), "../vendor/gems/environment")
|
3
|
+
|
4
|
+
lib_path = File.join(File.dirname(__FILE__), '..', 'lib')
|
5
|
+
$LOAD_PATH.unshift lib_path unless $LOAD_PATH.include?(lib_path)
|
6
|
+
|
7
|
+
require 'contest'
|
8
|
+
require 'redgreen'
|
9
|
+
|
10
|
+
require 'hmachine'
|
11
|
+
|
12
|
+
def get_fixture(filename)
|
13
|
+
open(File.join(File.dirname(__FILE__), 'fixtures', "#{filename}")).read
|
14
|
+
end
|
metadata
ADDED
@@ -0,0 +1,83 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: hmachine
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Mark Wunsch
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2010-01-09 00:00:00 -05:00
|
13
|
+
default_executable:
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: nokogiri
|
17
|
+
type: :runtime
|
18
|
+
version_requirement:
|
19
|
+
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: "0"
|
24
|
+
version:
|
25
|
+
description: A Ruby microformat parser powered by Nokogiri
|
26
|
+
email:
|
27
|
+
- mark@markwunsch.com
|
28
|
+
executables: []
|
29
|
+
|
30
|
+
extensions: []
|
31
|
+
|
32
|
+
extra_rdoc_files:
|
33
|
+
- LICENSE
|
34
|
+
- README.md
|
35
|
+
files:
|
36
|
+
- .gitignore
|
37
|
+
- Gemfile
|
38
|
+
- LICENSE
|
39
|
+
- README.md
|
40
|
+
- Rakefile
|
41
|
+
- lib/hmachine.rb
|
42
|
+
- lib/hmachine/microformat.rb
|
43
|
+
- lib/hmachine/microformat/base.rb
|
44
|
+
- lib/hmachine/microformat/hcard.rb
|
45
|
+
- test/fixtures/hcard/commercenet.html
|
46
|
+
- test/fixtures/hcard/geo.html
|
47
|
+
- test/hmachine_test.rb
|
48
|
+
- test/microformat/hcard_test.rb
|
49
|
+
- test/microformat_test.rb
|
50
|
+
- test/test_helper.rb
|
51
|
+
has_rdoc: true
|
52
|
+
homepage: http://github.com/mwunsch/hmachine
|
53
|
+
licenses: []
|
54
|
+
|
55
|
+
post_install_message:
|
56
|
+
rdoc_options:
|
57
|
+
- --charset=UTF-8
|
58
|
+
require_paths:
|
59
|
+
- lib
|
60
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
61
|
+
requirements:
|
62
|
+
- - ">="
|
63
|
+
- !ruby/object:Gem::Version
|
64
|
+
version: "0"
|
65
|
+
version:
|
66
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
67
|
+
requirements:
|
68
|
+
- - ">="
|
69
|
+
- !ruby/object:Gem::Version
|
70
|
+
version: "0"
|
71
|
+
version:
|
72
|
+
requirements: []
|
73
|
+
|
74
|
+
rubyforge_project:
|
75
|
+
rubygems_version: 1.3.5
|
76
|
+
signing_key:
|
77
|
+
specification_version: 3
|
78
|
+
summary: Ruby microformat parser
|
79
|
+
test_files:
|
80
|
+
- test/hmachine_test.rb
|
81
|
+
- test/microformat/hcard_test.rb
|
82
|
+
- test/microformat_test.rb
|
83
|
+
- test/test_helper.rb
|