prism 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of prism might be problematic. Click here for more details.
- data/.gitignore +3 -0
- data/Gemfile +11 -0
- data/Gemfile.lock +51 -0
- data/LICENSE +20 -0
- data/README.md +137 -0
- data/Rakefile +53 -0
- data/bin/prism +99 -0
- data/lib/prism.rb +130 -0
- data/lib/prism/microformat.rb +49 -0
- data/lib/prism/microformat/adr.rb +22 -0
- data/lib/prism/microformat/geo.rb +48 -0
- data/lib/prism/microformat/hcard.rb +182 -0
- data/lib/prism/microformat/rellicense.rb +20 -0
- data/lib/prism/microformat/reltag.rb +38 -0
- data/lib/prism/microformat/votelinks.rb +42 -0
- data/lib/prism/microformat/xfn.rb +54 -0
- data/lib/prism/microformat/xmdp.rb +14 -0
- data/lib/prism/microformat/xoxo.rb +69 -0
- data/lib/prism/pattern.rb +26 -0
- data/lib/prism/pattern/abbr.rb +21 -0
- data/lib/prism/pattern/datetime.rb +75 -0
- data/lib/prism/pattern/typevalue.rb +32 -0
- data/lib/prism/pattern/url.rb +32 -0
- data/lib/prism/pattern/valueclass.rb +51 -0
- data/lib/prism/posh.rb +3 -0
- data/lib/prism/posh/anchor.rb +40 -0
- data/lib/prism/posh/base.rb +204 -0
- data/lib/prism/posh/definition_list.rb +41 -0
- data/prism.gemspec +132 -0
- data/test/fixtures/hcard/commercenet.html +21 -0
- data/test/fixtures/hcard/geo.html +28 -0
- data/test/fixtures/huffduffer.html +466 -0
- data/test/fixtures/likeorhate.html +48 -0
- data/test/fixtures/rel_license.html +4 -0
- data/test/fixtures/test-fixture/hcard/hcard1.html +147 -0
- data/test/fixtures/test-fixture/hcard/hcard11.html +123 -0
- data/test/fixtures/test-fixture/hcard/hcard12.html +178 -0
- data/test/fixtures/test-fixture/hcard/hcard17.html +165 -0
- data/test/fixtures/test-fixture/hcard/hcard2.html +264 -0
- data/test/fixtures/test-fixture/hcard/hcard3.html +144 -0
- data/test/fixtures/test-fixture/hcard/hcard4.html +117 -0
- data/test/fixtures/test-fixture/hcard/hcard5.html +119 -0
- data/test/fixtures/test-fixture/hcard/hcard6.html +188 -0
- data/test/fixtures/test-fixture/hcard/hcard7.html +188 -0
- data/test/fixtures/test-fixture/hcard/hcard8.html +130 -0
- data/test/fixtures/test-fixture/hcard/hcard9.html +111 -0
- data/test/fixtures/test-fixture/hcard/hcard99.html +215 -0
- data/test/fixtures/test-fixture/value-class-date-time/value-dt-test-YYYY-MM-DD--HH-MM.html +9 -0
- data/test/fixtures/test-fixture/value-class-date-time/value-dt-test-abbr-YYYY-MM-DD--HH-MM.html +4 -0
- data/test/fixtures/xfn.html +198 -0
- data/test/fixtures/xmdp.html +32 -0
- data/test/fixtures/xoxo.html +51 -0
- data/test/microformat/adr_test.rb +47 -0
- data/test/microformat/geo_test.rb +66 -0
- data/test/microformat/hcard_test.rb +510 -0
- data/test/microformat/rellicense_test.rb +36 -0
- data/test/microformat/reltag_test.rb +61 -0
- data/test/microformat/votelinks_test.rb +44 -0
- data/test/microformat/xfn_test.rb +28 -0
- data/test/microformat/xmdp_test.rb +16 -0
- data/test/microformat/xoxo_test.rb +51 -0
- data/test/microformat_test.rb +20 -0
- data/test/pattern/date_time_test.rb +55 -0
- data/test/pattern/value_class_test.rb +33 -0
- data/test/pattern_test.rb +132 -0
- data/test/posh/anchor_test.rb +41 -0
- data/test/posh/base_test.rb +150 -0
- data/test/posh/definition_list_test.rb +38 -0
- data/test/prism_test.rb +133 -0
- data/test/test_helper.rb +32 -0
- metadata +161 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
---
|
2
|
+
dependencies:
|
3
|
+
fakeweb:
|
4
|
+
group:
|
5
|
+
- :test
|
6
|
+
version: ">= 0"
|
7
|
+
rake:
|
8
|
+
group:
|
9
|
+
- :test
|
10
|
+
version: ">= 0"
|
11
|
+
contest:
|
12
|
+
group:
|
13
|
+
- :test
|
14
|
+
version: ">= 0"
|
15
|
+
jeweler:
|
16
|
+
group:
|
17
|
+
- :test
|
18
|
+
version: ">= 0"
|
19
|
+
redgreen:
|
20
|
+
group:
|
21
|
+
- :test
|
22
|
+
version: ">= 0"
|
23
|
+
nokogiri:
|
24
|
+
group:
|
25
|
+
- :default
|
26
|
+
version: ">= 0"
|
27
|
+
specs:
|
28
|
+
- nokogiri:
|
29
|
+
version: 1.4.1
|
30
|
+
- json_pure:
|
31
|
+
version: 1.2.0
|
32
|
+
- git:
|
33
|
+
version: 1.2.5
|
34
|
+
- redgreen:
|
35
|
+
version: 1.2.2
|
36
|
+
- rubyforge:
|
37
|
+
version: 2.0.3
|
38
|
+
- rake:
|
39
|
+
version: 0.8.7
|
40
|
+
- gemcutter:
|
41
|
+
version: 0.3.0
|
42
|
+
- jeweler:
|
43
|
+
version: 1.4.0
|
44
|
+
- contest:
|
45
|
+
version: 0.1.2
|
46
|
+
- fakeweb:
|
47
|
+
version: 1.2.8
|
48
|
+
hash: a411a98d29121a4b1d05b9fbe457c6e068325a09
|
49
|
+
sources:
|
50
|
+
- Rubygems:
|
51
|
+
uri: http://gemcutter.org
|
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2010 Mark Wunsch
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,137 @@
|
|
1
|
+
# Prism
|
2
|
+
|
3
|
+
**Ruby microformat parser and HTML toolkit**
|
4
|
+
|
5
|
+
_Formerly known as hMachine_
|
6
|
+
|
7
|
+
[RDoc](http://rdoc.info/projects/mwunsch/prism) | [Gem](http://rubygems.org/gems/prism) | [Metrics](http://getcaliper.com/caliper/project?repo=git%3A%2F%2Fgithub.com%2Fmwunsch%2Fprism.git)
|
8
|
+
|
9
|
+
## What Prism is:
|
10
|
+
|
11
|
+
+ A robust microformat parser
|
12
|
+
+ A command-line tool for parsing microformats from a url or a string of markup
|
13
|
+
+ A DSL for defining semantic markup patterns
|
14
|
+
+ Export microformats to other standards:
|
15
|
+
+ hCard => vCard
|
16
|
+
|
17
|
+
It is your [lowercase semantic web](http://tantek.com/presentations/2004etech/realworldsemanticspres.html) friend.
|
18
|
+
|
19
|
+
>Designed for humans first and machines second, microformats are a set of simple, open data formats built upon existing and widely adopted standards. Instead of throwing away what works today, microformats intend to solve simpler problems first by adapting to current behaviors and usage patterns (e.g. XHTML, blogging).
|
20
|
+
|
21
|
+
Learn more about Microformats at http://microformats.org.
|
22
|
+
|
23
|
+
## Usage
|
24
|
+
|
25
|
+
The command line tool takes a SOURCE from the Standard Input or as an argument:
|
26
|
+
|
27
|
+
$: curl http://markwunsch.com | prism --hcard > ~/Desktop/me.vcf
|
28
|
+
|
29
|
+
OR
|
30
|
+
|
31
|
+
$: prism --hcard http://markwunsch.com > ~/Desktop/me.vcf
|
32
|
+
|
33
|
+
## Installation
|
34
|
+
|
35
|
+
With Ruby and Rubygems:
|
36
|
+
|
37
|
+
gem install prism
|
38
|
+
|
39
|
+
Or clone the repository and run `bundle install` to get the development dependencies.
|
40
|
+
|
41
|
+
#### Requirements:
|
42
|
+
|
43
|
+
+ [Nokogiri](http://github.com/tenderlove/nokogiri)
|
44
|
+
|
45
|
+
## Microformats supported (right now, as of this very moment)
|
46
|
+
|
47
|
+
+ [rel-tag](http://microformats.org/wiki/rel-tag)
|
48
|
+
+ [rel-license](http://microformats.org/wiki/rel-license)
|
49
|
+
+ [VoteLinks](http://microformats.org/wiki/vote-links)
|
50
|
+
+ [XFN](http://microformats.org/wiki/XFN)
|
51
|
+
+ [XOXO](http://microformats.org/wiki/xoxo)
|
52
|
+
+ [XMDP](http://microformats.org/wiki/XMDP)
|
53
|
+
+ [geo](http://microformats.org/wiki/geo)
|
54
|
+
+ [adr](http://microformats.org/wiki/adr)
|
55
|
+
+ [hCard](http://microformats.org/wiki/hcard)
|
56
|
+
|
57
|
+
More on the way.
|
58
|
+
|
59
|
+
## Finding Microformats:
|
60
|
+
|
61
|
+
# All microformats
|
62
|
+
Prism.find 'http://foobar.com'
|
63
|
+
|
64
|
+
# A specific microformat
|
65
|
+
Prism.find 'http://foobar.com', :hcard
|
66
|
+
|
67
|
+
# Search HTML too
|
68
|
+
Prism.find big_string_of_html
|
69
|
+
|
70
|
+
### Parsing Microformats:
|
71
|
+
|
72
|
+
twitter_contacts = Prism.find 'http://twitter.com/markwunsch', :hcard
|
73
|
+
me = twitter_contacts.first
|
74
|
+
me.fn
|
75
|
+
#=> "Mark Wunsch"
|
76
|
+
me.n.family_name
|
77
|
+
#=> "Wunsch"
|
78
|
+
me.url
|
79
|
+
#=> "http://markwunsch.com/"
|
80
|
+
File.open('mark.vcf','w') {|f| f.write me.to_vcard }
|
81
|
+
## Add me to your address book!
|
82
|
+
|
83
|
+
## POSH DSL
|
84
|
+
|
85
|
+
The `Prism` module defines a group of methods to search, validate, and extract nodes out of a Nokogiri document.
|
86
|
+
|
87
|
+
All microformats inherit from `Prism::POSH::Base`, because all microformats begin as [POSH formats](http://microformats.org/wiki/posh). If you wanted to create your own POSH format, you'd do something like this:
|
88
|
+
|
89
|
+
class Navigation < Prism::POSH::Base
|
90
|
+
search {|document| document.css('ul#navigation') }
|
91
|
+
# Search a Nokogiri document for nodes of a certain type
|
92
|
+
|
93
|
+
validate {|node| node.matches?('ul#navigation') }
|
94
|
+
# Validate that a node is the right element we want
|
95
|
+
|
96
|
+
has_many :items do
|
97
|
+
search {|doc| doc.css('li') }
|
98
|
+
end
|
99
|
+
# has_many and has_one define Properties of the POSH format (Prism::Property)
|
100
|
+
# Each Property object includes the Prism module, so they can search, validate, and extract
|
101
|
+
end
|
102
|
+
|
103
|
+
Now you can do:
|
104
|
+
|
105
|
+
nav = Navigation.parse_first(document)
|
106
|
+
# document is a Nokogiri document.
|
107
|
+
# parse_first extracts just the first example of the format out of the document
|
108
|
+
|
109
|
+
nav.items
|
110
|
+
# Returns an array of contents
|
111
|
+
# This method comes from the has_many call up above that defines the Property
|
112
|
+
|
113
|
+
## Other Microformat parsers
|
114
|
+
|
115
|
+
+ [Mofo](http://mofo.rubyforge.org/) is a Ruby microformat parser backed by Hpricot.
|
116
|
+
+ [Sumo](http://www.danwebb.net/2007/2/9/sumo-a-generic-microformats-parser-for-javascript) is a JavaScript microformat parser.
|
117
|
+
+ [Operator](https://addons.mozilla.org/en-US/firefox/addon/4106) is a Firefox extension.
|
118
|
+
+ [hKit](http://code.google.com/p/hkit/) is a microformat parser for PHP.
|
119
|
+
+ [Oomph](http://visitmix.com/labs/oomph/) is a microformat toolkit add-in for Internet Explorer.
|
120
|
+
|
121
|
+
## Feature wishlist:
|
122
|
+
|
123
|
+
+ HTML outliner (using HTML5 sectioning)
|
124
|
+
+ Extensions so you can do something like: `String.is_a_valid? :hcard` in your tests
|
125
|
+
+ Extensions to turn Ruby objects into semantic HTML. Hash.to_definition_list, Array.to_ordered_list, etc.
|
126
|
+
|
127
|
+
## TODO:
|
128
|
+
|
129
|
+
+ Handle nested microformats better (I like Prism::Pattern::ValueClass's search implementation the best)
|
130
|
+
+ Code is ugly. Especially XOXO.
|
131
|
+
+ Better recursive parsing of trees. See above.
|
132
|
+
+ Tests are all kinds of disorganized.
|
133
|
+
+ Broader support for some of the weirder Patterns, like object[data]
|
134
|
+
|
135
|
+
## License
|
136
|
+
|
137
|
+
Prism is licensed under the [MIT License](http://creativecommons.org/licenses/MIT/) and is Copyright (c) 2010 Mark Wunsch.
|
data/Rakefile
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
begin
|
2
|
+
# Try to require the preresolved locked set of gems.
|
3
|
+
require File.expand_path('../.bundle/environment', __FILE__)
|
4
|
+
rescue LoadError
|
5
|
+
# Fall back on doing an unlocked resolve at runtime.
|
6
|
+
require "rubygems"
|
7
|
+
require "bundler"
|
8
|
+
Bundler.setup
|
9
|
+
end
|
10
|
+
|
11
|
+
$LOAD_PATH.unshift File.join(File.dirname(__FILE__), 'lib')
|
12
|
+
require 'prism'
|
13
|
+
require 'rake'
|
14
|
+
|
15
|
+
task :default => :test
|
16
|
+
|
17
|
+
require 'rake/testtask'
|
18
|
+
Rake::TestTask.new do |t|
|
19
|
+
t.libs << "test"
|
20
|
+
t.pattern = 'test/**/*_test.rb'
|
21
|
+
t.verbose = false
|
22
|
+
end
|
23
|
+
|
24
|
+
begin
|
25
|
+
require 'jeweler'
|
26
|
+
Jeweler::Tasks.new do |gemspec|
|
27
|
+
gemspec.name = "prism"
|
28
|
+
gemspec.summary = "Ruby microformat parser and HTML toolkit"
|
29
|
+
gemspec.description = "A Ruby microformat parser and HTML toolkit powered by Nokogiri"
|
30
|
+
gemspec.version = Prism::VERSION
|
31
|
+
gemspec.homepage = "http://github.com/mwunsch/prism"
|
32
|
+
gemspec.authors = ["Mark Wunsch"]
|
33
|
+
gemspec.email = ["mark@markwunsch.com"]
|
34
|
+
gemspec.add_dependency 'nokogiri'
|
35
|
+
end
|
36
|
+
Jeweler::GemcutterTasks.new
|
37
|
+
rescue LoadError
|
38
|
+
puts "Jeweler not available. Install it with: gem install jeweler"
|
39
|
+
end
|
40
|
+
|
41
|
+
require 'rake/rdoctask'
|
42
|
+
Rake::RDocTask.new do |rdoc|
|
43
|
+
rdoc.rdoc_dir = 'doc'
|
44
|
+
rdoc.title = 'Prism'
|
45
|
+
rdoc.main = 'README.md'
|
46
|
+
rdoc.rdoc_files.include('README.*', 'lib/**/*.rb', 'LICENSE')
|
47
|
+
rdoc.options << '--inline-source'
|
48
|
+
end
|
49
|
+
|
50
|
+
desc "Open an irb session preloaded with this library"
|
51
|
+
task :console do
|
52
|
+
sh "irb -rubygems -I lib -r prism"
|
53
|
+
end
|
data/bin/prism
ADDED
@@ -0,0 +1,99 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
## Prism: Microformat parser and HTML toolkit.
|
3
|
+
##
|
4
|
+
## Usage: prism [ OPTION ] [ SOURCE ]
|
5
|
+
##
|
6
|
+
## Find the Microformats in the given SOURCE. SOURCE can be a URL
|
7
|
+
## or a string of HTML. If no Microformat is specified in UFORMAT,
|
8
|
+
## prism will just print a list of what has been found.
|
9
|
+
##
|
10
|
+
## If a Microformat is specified, Prism will convert it into a
|
11
|
+
## more suitable format. eg. hCard becomes a vCard.
|
12
|
+
##
|
13
|
+
## If no SOURCE is given, prism will read from the Standard Input.
|
14
|
+
##
|
15
|
+
## Microformats:
|
16
|
+
## --vcard, --hcard hCard => vCard converter
|
17
|
+
## --xfn Get XFN URLs
|
18
|
+
##
|
19
|
+
## Other Options:
|
20
|
+
## -h, --help show this help message
|
21
|
+
## -v, --version version of Prism
|
22
|
+
##
|
23
|
+
## Learn more about Microformats at http://microformats.org
|
24
|
+
##
|
25
|
+
##
|
26
|
+
|
27
|
+
require 'optparse'
|
28
|
+
|
29
|
+
def usage
|
30
|
+
File.readlines(__FILE__).
|
31
|
+
grep(/^##.*/).
|
32
|
+
map { |line| line.chomp[3..-1] }.
|
33
|
+
join("\n")
|
34
|
+
end
|
35
|
+
|
36
|
+
begin
|
37
|
+
require 'prism'
|
38
|
+
rescue LoadError
|
39
|
+
$LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', 'lib')
|
40
|
+
require 'prism'
|
41
|
+
end
|
42
|
+
|
43
|
+
options = {}
|
44
|
+
ARGV.options do |option|
|
45
|
+
option.banner = "Hi."
|
46
|
+
option.on('--hcard','--vcard') { options[:vcard] = true }
|
47
|
+
option.on('--xfn') { options[:xfn] = true }
|
48
|
+
option.on_tail('-h','--help') { puts usage ; exit }
|
49
|
+
option.on_tail('-v','--version') { puts Prism::VERSION ; exit }
|
50
|
+
option.parse!
|
51
|
+
end
|
52
|
+
|
53
|
+
if ARGV.empty? && STDIN.tty?
|
54
|
+
puts usage
|
55
|
+
exit
|
56
|
+
end
|
57
|
+
|
58
|
+
def uformat_counts(group)
|
59
|
+
uf_count = Prism::Microformat.microformats.values.collect do |uformat|
|
60
|
+
name = "#{uformat::FRIENDLY_NAME}"
|
61
|
+
found = group.select {|format| format.is_a?(uformat) }.count
|
62
|
+
if found > 0
|
63
|
+
name += "s" if found > 1
|
64
|
+
"Found #{found} #{name} in the document. Read more at: #{uformat::WIKI_URL}"
|
65
|
+
end
|
66
|
+
end.compact
|
67
|
+
if !uf_count.empty?
|
68
|
+
uf_count
|
69
|
+
else
|
70
|
+
puts "No microformats found in this document." ; exit
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def parse_microformats(doc, type)
|
75
|
+
uformats = Prism.find(doc, type)
|
76
|
+
if uformats
|
77
|
+
if uformats.respond_to?(:length)
|
78
|
+
uformats.each {|uf| yield uf if block_given? }
|
79
|
+
else
|
80
|
+
yield uformats if block_given?
|
81
|
+
end
|
82
|
+
else
|
83
|
+
puts "No #{type}s found in this document."
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
def input
|
88
|
+
ARGV.first ? ARGV.first : STDIN.read
|
89
|
+
end
|
90
|
+
|
91
|
+
if options.empty?
|
92
|
+
uformat_counts(Prism.find(input)).each {|count| puts count }
|
93
|
+
else
|
94
|
+
if options[:vcard]
|
95
|
+
parse_microformats(input, :hcard) {|hcard| puts hcard.to_vcard }
|
96
|
+
elsif options[:xfn]
|
97
|
+
parse_microformats(input, :xfn) {|xfn| puts xfn.url }
|
98
|
+
end
|
99
|
+
end
|
data/lib/prism.rb
ADDED
@@ -0,0 +1,130 @@
|
|
1
|
+
require 'open-uri'
|
2
|
+
require 'nokogiri'
|
3
|
+
|
4
|
+
module Prism
|
5
|
+
VERSION = "0.1.0"
|
6
|
+
PRODID = "-//markwunsch.com//Prism #{VERSION}//EN"
|
7
|
+
|
8
|
+
# Convenience method for Prism::Microformat.find method
|
9
|
+
def self.find(document, format=nil)
|
10
|
+
Prism::Microformat.find(document, format)
|
11
|
+
end
|
12
|
+
|
13
|
+
# Get a string of html or a url and convert it to a Nokogiri Document
|
14
|
+
def self.get(html)
|
15
|
+
return html if html.is_a?(Nokogiri::XML::Node)
|
16
|
+
begin
|
17
|
+
url = URI.parse(html)
|
18
|
+
doc = url.is_a?(URI::HTTP) ? get_url(url.normalize.to_s) : get_document(html)
|
19
|
+
rescue URI::InvalidURIError
|
20
|
+
doc = get_document(html)
|
21
|
+
end
|
22
|
+
doc
|
23
|
+
end
|
24
|
+
|
25
|
+
# Open a URL and convert the contents to a Nokogiri Document
|
26
|
+
def self.get_url(url)
|
27
|
+
uri = URI.parse(url)
|
28
|
+
doc = ''
|
29
|
+
uri.open do |web|
|
30
|
+
web.each_line {|line| doc += line }
|
31
|
+
end
|
32
|
+
get_document(doc, url)
|
33
|
+
end
|
34
|
+
|
35
|
+
# Convert HTML to a Nokogiri Document
|
36
|
+
def self.get_document(html, url=nil)
|
37
|
+
html.is_a?(Nokogiri::XML::Node) ? html : Nokogiri::HTML.parse(html, url)
|
38
|
+
end
|
39
|
+
|
40
|
+
def self.normalize(name)
|
41
|
+
name.to_s.strip.downcase.intern
|
42
|
+
end
|
43
|
+
|
44
|
+
# Map a key to an element or design pattern
|
45
|
+
def self.map(key)
|
46
|
+
case normalize(key)
|
47
|
+
when :value_class, :valueclass, :abbr, :uri, :url, :typevalue
|
48
|
+
Prism::Pattern.map(key)
|
49
|
+
when :hcard, :geo, :rellicense, :reltag, :votelinks, :xfn, :xmdp, :xoxo, :adr
|
50
|
+
Prism::Microformat.map(key)
|
51
|
+
when :base
|
52
|
+
Prism::POSH::Base
|
53
|
+
else
|
54
|
+
raise "#{key} is not a recognized parser."
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
# Get/Set a function that defines how to find an element in a node.
|
59
|
+
# The Search function should return a Nokogiri::XML::NodeSet.
|
60
|
+
# eg. <tt>search {|node| node.css(element) }
|
61
|
+
def search(&block)
|
62
|
+
@search = block if block_given?
|
63
|
+
@search || lambda {|node| node }
|
64
|
+
end
|
65
|
+
|
66
|
+
# Search for the element in a document
|
67
|
+
def find_in(document)
|
68
|
+
search.call(document)
|
69
|
+
end
|
70
|
+
|
71
|
+
# Is the element found in node?
|
72
|
+
def found_in?(node)
|
73
|
+
find_in(node).eql?(node) || !find_in(node).empty?
|
74
|
+
end
|
75
|
+
|
76
|
+
# Get/Set a function that tests to make sure a given node is
|
77
|
+
# the element we want. Should return truthy.
|
78
|
+
# Default just tests to see if the node passed is a child of its parent node.
|
79
|
+
def validate(&block)
|
80
|
+
@validate = block if block_given?
|
81
|
+
@validate || lambda { |node| find_in(node.parent).children.include?(node) }
|
82
|
+
end
|
83
|
+
|
84
|
+
# Is this a valid node?
|
85
|
+
def valid?(node)
|
86
|
+
validate.call(node)
|
87
|
+
end
|
88
|
+
|
89
|
+
# Define the pattern used to extract contents from node
|
90
|
+
# Can be a symbols that match to an Element parser, or a block
|
91
|
+
def extract(pattern = nil, &block)
|
92
|
+
if block_given?
|
93
|
+
@extract = block
|
94
|
+
else
|
95
|
+
@extract = Prism.map(pattern).extract if pattern
|
96
|
+
end
|
97
|
+
@extract || lambda{|node| node.content.strip }
|
98
|
+
end
|
99
|
+
|
100
|
+
# Extract the content from the node
|
101
|
+
def extract_from(node)
|
102
|
+
extract.call(node)
|
103
|
+
end
|
104
|
+
|
105
|
+
# Parse the document, finding every instance of the desired element, and extract their contents
|
106
|
+
def parse(document)
|
107
|
+
if found_in?(document)
|
108
|
+
contents = if find_in(document).respond_to?(:collect)
|
109
|
+
find_in(document).collect { |element| extract_from(element) }
|
110
|
+
else
|
111
|
+
extract_from(document)
|
112
|
+
end
|
113
|
+
return contents.first if contents.respond_to?(:length) && (contents.length == 1)
|
114
|
+
contents
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
# Parse the document, extracting the content for the first instance of the element
|
119
|
+
def parse_first(document)
|
120
|
+
if found_in?(document)
|
121
|
+
elements = find_in(document)
|
122
|
+
extract_from elements.respond_to?(:first) ? elements.first : elements
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
end
|
127
|
+
|
128
|
+
require 'prism/pattern'
|
129
|
+
require 'prism/posh'
|
130
|
+
require 'prism/microformat'
|