hmachine 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,3 @@
1
+ vendor/
2
+ pkg/
3
+ doc/
data/Gemfile ADDED
@@ -0,0 +1,9 @@
1
+ gem 'nokogiri'
2
+
3
+ only :test do
4
+ gem 'rake'
5
+ gem 'contest'
6
+ gem 'redgreen'
7
+ end
8
+
9
+ bin_path 'vendor/bin'
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2010 Mark Wunsch
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,21 @@
1
+ # hMachine
2
+
3
+ **Ruby microformat parser**
4
+
5
+ ## Warning
6
+
7
+ This is so little right now. It's not even close to being done. Don't touch it until it has at least a minor version number (Right now it is v0.0.1). You've been warned.
8
+
9
+ It should be:
10
+
11
+ + A fully featured microformat parser, with support for every microformat
12
+ + A CLI for fetching microformats from a url or a string of html
13
+ + Extensions so you can do something like: `String.is_a_valid? :hcard` in your tests
14
+ + Export microformats to other standards. hCard => vCard.
15
+ + HTML outlininer (using HTML5 sectioning)
16
+
17
+ Maybe some more than just that. It should be your lowercase-semantic-web friend.
18
+
19
+ ## License
20
+
21
+ hMachine is licensed under the [MIT License](http://creativecommons.org/licenses/MIT/) and is Copyright (c) 2010 Mark Wunsch.
data/Rakefile ADDED
@@ -0,0 +1,44 @@
1
+ $LOAD_PATH.unshift File.join(File.dirname(__FILE__), 'lib')
2
+ require 'hmachine'
3
+
4
+ require 'rake'
5
+
6
+ task :default => :test
7
+
8
+ require 'rake/testtask'
9
+ Rake::TestTask.new do |t|
10
+ t.libs << "test"
11
+ t.pattern = 'test/**/*_test.rb'
12
+ t.verbose = false
13
+ end
14
+
15
+ begin
16
+ require 'jeweler'
17
+ Jeweler::Tasks.new do |gemspec|
18
+ gemspec.name = "hmachine"
19
+ gemspec.summary = "Ruby microformat parser"
20
+ gemspec.description = "A Ruby microformat parser powered by Nokogiri"
21
+ gemspec.version = HMachine::VERSION
22
+ gemspec.homepage = "http://github.com/mwunsch/hmachine"
23
+ gemspec.authors = ["Mark Wunsch"]
24
+ gemspec.email = ["mark@markwunsch.com"]
25
+ gemspec.add_dependency 'nokogiri'
26
+ end
27
+ Jeweler::GemcutterTasks.new
28
+ rescue LoadError
29
+ puts "Jeweler not available. Install it with: gem install jeweler"
30
+ end
31
+
32
+ require 'rake/rdoctask'
33
+ Rake::RDocTask.new do |rdoc|
34
+ rdoc.rdoc_dir = 'doc'
35
+ rdoc.title = 'hMachine'
36
+ rdoc.main = 'README.md'
37
+ rdoc.rdoc_files.include('README.*', 'lib/**/*.rb', 'LICENSE')
38
+ rdoc.options << '--inline-source'
39
+ end
40
+
41
+ desc "Open an irb session preloaded with this library"
42
+ task :console do
43
+ sh "irb -rubygems -I lib -r hmachine"
44
+ end
@@ -0,0 +1,17 @@
1
+ module HMachine
2
+ module Microformat
3
+ class Base
4
+
5
+ def self.validate(node)
6
+ node['class'] == self::ROOT_CLASS
7
+ end
8
+
9
+ def self.wiki_url
10
+ self::WIKI_URL
11
+ end
12
+
13
+ attr_reader :node
14
+
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,24 @@
1
+ module HMachine
2
+ module Microformat
3
+ class HCard < Base
4
+
5
+ ROOT_CLASS = "vcard"
6
+ ROOT_SELECTOR = ".#{ROOT_CLASS}"
7
+ WIKI_URL = "http://microformats.org/wiki/hcard"
8
+
9
+ def initialize(node)
10
+ raise "hCard not found in node" unless self.class.validate(node)
11
+ @node = node
12
+ end
13
+
14
+ def to_vcard
15
+ # convert to vcard
16
+ end
17
+
18
+ def self.infer_n_from_fn(fn)
19
+ # ...
20
+ end
21
+
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,30 @@
1
+ require 'hmachine/microformat/base'
2
+ require 'hmachine/microformat/hcard'
3
+
4
+ module HMachine
5
+ module Microformat
6
+
7
+ def self.find_hcard(html)
8
+ doc = HMachine.get_document(html)
9
+ find_in_node(HCard, doc)
10
+ end
11
+
12
+ def self.find_all(html)
13
+ find_hcard html
14
+ end
15
+
16
+ def self.find_in_node(microformat, node)
17
+ microformats = []
18
+ node.css(microformat::ROOT_SELECTOR).each do |node|
19
+ microformats << create_for_node(microformat, node) if microformat.validate(node)
20
+ end
21
+ microformats
22
+ end
23
+
24
+ def self.create_for_node(microformat, node)
25
+ return unless microformat.validate(node)
26
+ microformat.new node
27
+ end
28
+
29
+ end
30
+ end
data/lib/hmachine.rb ADDED
@@ -0,0 +1,21 @@
1
+ require 'uri'
2
+ require 'nokogiri'
3
+
4
+ require 'hmachine/microformat'
5
+
6
+ module HMachine
7
+ VERSION = "0.0.1"
8
+
9
+ def self.find(document)
10
+ html = get_document(document)
11
+ Microformat.find_all html
12
+ end
13
+
14
+ def self.find_with_url(url)
15
+ # open url and call find method on resulting document
16
+ end
17
+
18
+ def self.get_document(html)
19
+ html.is_a?(Nokogiri::XML::Node) ? html : Nokogiri::HTML.parse(html)
20
+ end
21
+ end
@@ -0,0 +1,21 @@
1
+ <!-- http://microformats.org/wiki/hcard#Live_example -->
2
+ <div class="vcard">
3
+ <a class="fn org url" href="http://www.commerce.net/">CommerceNet</a>
4
+ <div class="adr">
5
+ <span class="type">Work</span>:
6
+ <div class="street-address">169 University Avenue</div>
7
+ <span class="locality">Palo Alto</span>,
8
+ <abbr class="region" title="California">CA</abbr>
9
+ <span class="postal-code">94301</span>
10
+ <div class="country-name">USA</div>
11
+ </div>
12
+ <div class="tel">
13
+ <span class="type">Work</span> +1-650-289-4040
14
+ </div>
15
+ <div class="tel">
16
+ <span class="type">Fax</span> +1-650-289-4041
17
+ </div>
18
+ <div>Email:
19
+ <span class="email">info@commerce.net</span>
20
+ </div>
21
+ </div>
@@ -0,0 +1,28 @@
1
+ <!-- http://microformats.org/wiki/hcard-examples#GEO_parsing -->
2
+ <div class="vcard">
3
+ <span class="fn n">
4
+ <a class="url" href="http://t37.net">
5
+ <span class="given-name">Fréderic</span>
6
+ <span class="family-name">de Villamil</span>
7
+ </a>
8
+ </span>
9
+ <span class="nickname">neuro</span>
10
+ <a class="email" href="mailto:neuroNOSPAM@t37.net">
11
+ <span class="type">pref</span><span>erred email</span>
12
+ </a>
13
+ <span class="org">Omatis</span>
14
+ <span class="adr">
15
+ <abbr class="type" title="dom">France</abbr>
16
+ <span class="type">home</span> address
17
+ <abbr class="type" title="postal">mail</abbr> and
18
+ <abbr class="type" title="parcel">shipments</abbr>:
19
+ <span class="street-address">12 rue Danton</span>
20
+ <span class="locality">Le Kremlin-Bicetre</span>
21
+ <span class="postal-code">94270</span>
22
+ <span class="country-name">France</span>
23
+ </span>
24
+ <span class="geo">
25
+ <abbr class="latitude" title="48.816667">N 48° 81.6667</abbr>
26
+ <abbr class="longitude" title="2.366667">E 2° 36.6667</abbr>
27
+ </span>
28
+ </div>
@@ -0,0 +1,17 @@
1
+ require File.join(File.dirname(__FILE__), 'test_helper')
2
+
3
+ class HMachineTest < Test::Unit::TestCase
4
+ setup do
5
+ @html = get_fixture('hcard/commercenet.html')
6
+ end
7
+
8
+ test 'gets a Nokogiri doc for a string of HTML' do
9
+ doc = HMachine.get_document(@html)
10
+ assert doc.is_a?(Nokogiri::HTML::Document), "Document is a #{doc.class}"
11
+ end
12
+
13
+ test 'finds the microformats in a document' do
14
+ microformats = HMachine.find(@html)
15
+ assert microformats.respond_to? :length
16
+ end
17
+ end
@@ -0,0 +1,43 @@
1
+ require File.join(File.dirname(__FILE__), '..', 'test_helper')
2
+
3
+ class HCardTest < Test::Unit::TestCase
4
+ setup do
5
+ @html = get_fixture('hcard/commercenet.html')
6
+ @node = Nokogiri::HTML.parse(@html).css(HMachine::Microformat::HCard::ROOT_SELECTOR)[0]
7
+ @hcard = HMachine::Microformat::HCard.new(@node)
8
+ end
9
+
10
+ describe 'Constants' do
11
+ test "hCard's root class" do
12
+ assert @hcard.class::ROOT_CLASS == 'vcard', "Root class is #{@hcard.class::ROOT_CLASS}"
13
+ end
14
+
15
+ test "hCard's root selector" do
16
+ assert @hcard.class::ROOT_SELECTOR == '.vcard', "Root selector is #{@hcard.class::ROOT_SELECTOR}"
17
+ end
18
+ end
19
+
20
+ describe "Inheritance" do
21
+ test "hCard's wiki url" do
22
+ assert @hcard.class.wiki_url == @hcard.class::WIKI_URL, "Wiki url is #{@hcard.class.wiki_url}"
23
+ end
24
+
25
+ test "hCard's validation" do
26
+ assert @hcard.class.validate(@node)
27
+ end
28
+ end
29
+
30
+ test "rejects invalid nodes" do
31
+ assert_raise RuntimeError do
32
+ HMachine::Microformat::HCard.new(Nokogiri::HTML.parse(@html))
33
+ end
34
+ end
35
+
36
+ test 'retains original node' do
37
+ assert @hcard.node == @node
38
+ end
39
+
40
+
41
+
42
+
43
+ end
@@ -0,0 +1,42 @@
1
+ require File.join(File.dirname(__FILE__), 'test_helper')
2
+
3
+ class MicroformatTest < Test::Unit::TestCase
4
+ setup do
5
+ @html = get_fixture('hcard/commercenet.html')
6
+ @document = Nokogiri::HTML.parse(@html)
7
+ @hcard_class = HMachine::Microformat::HCard
8
+ end
9
+
10
+ test 'creates a microformat for a given node' do
11
+ hcard = HMachine::Microformat.create_for_node(@hcard_class, @document.css(@hcard_class::ROOT_SELECTOR)[0])
12
+ assert hcard.is_a?(@hcard_class), "Created a #{hcard.class}"
13
+ end
14
+
15
+ test "rejects invalid nodes" do
16
+ hcard = HMachine::Microformat.create_for_node(@hcard_class, @document)
17
+ assert hcard.nil?
18
+ end
19
+
20
+ test 'finds a given microformat in a document' do
21
+ first_hcard = HMachine::Microformat.find_in_node(@hcard_class, @document)[0]
22
+ assert first_hcard.is_a?(@hcard_class), "Object is a #{first_hcard.class}"
23
+ end
24
+
25
+ test 'knows that there are multiple microformats in a document' do
26
+ hcards = HMachine::Microformat.find_in_node(@hcard_class, @document)
27
+ assert hcards.respond_to? :length
28
+ end
29
+
30
+ test 'finds all the microformats in a document' do
31
+ microformats = HMachine::Microformat.find_all(@document)
32
+ assert microformats.length == 1, "Number of Microformats in document: #{microformats.length}"
33
+ end
34
+
35
+ describe 'Find hCard' do
36
+ test 'document contains an hCard' do
37
+ first_hcard = HMachine::Microformat.find_hcard(@document)[0]
38
+ assert first_hcard.is_a?(@hcard_class), "Object is a #{first_hcard.class}"
39
+ end
40
+ end
41
+
42
+ end
@@ -0,0 +1,14 @@
1
+ require 'test/unit'
2
+ require File.join(File.dirname(__FILE__), "../vendor/gems/environment")
3
+
4
+ lib_path = File.join(File.dirname(__FILE__), '..', 'lib')
5
+ $LOAD_PATH.unshift lib_path unless $LOAD_PATH.include?(lib_path)
6
+
7
+ require 'contest'
8
+ require 'redgreen'
9
+
10
+ require 'hmachine'
11
+
12
+ def get_fixture(filename)
13
+ open(File.join(File.dirname(__FILE__), 'fixtures', "#{filename}")).read
14
+ end
metadata ADDED
@@ -0,0 +1,83 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: hmachine
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Mark Wunsch
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2010-01-09 00:00:00 -05:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: nokogiri
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: "0"
24
+ version:
25
+ description: A Ruby microformat parser powered by Nokogiri
26
+ email:
27
+ - mark@markwunsch.com
28
+ executables: []
29
+
30
+ extensions: []
31
+
32
+ extra_rdoc_files:
33
+ - LICENSE
34
+ - README.md
35
+ files:
36
+ - .gitignore
37
+ - Gemfile
38
+ - LICENSE
39
+ - README.md
40
+ - Rakefile
41
+ - lib/hmachine.rb
42
+ - lib/hmachine/microformat.rb
43
+ - lib/hmachine/microformat/base.rb
44
+ - lib/hmachine/microformat/hcard.rb
45
+ - test/fixtures/hcard/commercenet.html
46
+ - test/fixtures/hcard/geo.html
47
+ - test/hmachine_test.rb
48
+ - test/microformat/hcard_test.rb
49
+ - test/microformat_test.rb
50
+ - test/test_helper.rb
51
+ has_rdoc: true
52
+ homepage: http://github.com/mwunsch/hmachine
53
+ licenses: []
54
+
55
+ post_install_message:
56
+ rdoc_options:
57
+ - --charset=UTF-8
58
+ require_paths:
59
+ - lib
60
+ required_ruby_version: !ruby/object:Gem::Requirement
61
+ requirements:
62
+ - - ">="
63
+ - !ruby/object:Gem::Version
64
+ version: "0"
65
+ version:
66
+ required_rubygems_version: !ruby/object:Gem::Requirement
67
+ requirements:
68
+ - - ">="
69
+ - !ruby/object:Gem::Version
70
+ version: "0"
71
+ version:
72
+ requirements: []
73
+
74
+ rubyforge_project:
75
+ rubygems_version: 1.3.5
76
+ signing_key:
77
+ specification_version: 3
78
+ summary: Ruby microformat parser
79
+ test_files:
80
+ - test/hmachine_test.rb
81
+ - test/microformat/hcard_test.rb
82
+ - test/microformat_test.rb
83
+ - test/test_helper.rb