opds 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/.gitignore +21 -0
- data/LICENSE +20 -0
- data/README.md +32 -0
- data/Rakefile +46 -0
- data/VERSION +1 -0
- data/lib/opds.rb +10 -0
- data/lib/opds/acquisition_feed.rb +4 -0
- data/lib/opds/entry.rb +138 -0
- data/lib/opds/feed.rb +149 -0
- data/lib/opds/navigation_feed.rb +4 -0
- data/lib/opds/opds.rb +5 -0
- data/lib/opds/parser.rb +45 -0
- data/lib/opds/support/browser.rb +85 -0
- data/lib/opds/support/linkset.rb +164 -0
- data/lib/opds/support/logging.rb +11 -0
- data/opds.gemspec +76 -0
- data/samples/acquisition.txt +538 -0
- data/samples/entry.txt +32 -0
- data/samples/navigation.txt +46 -0
- data/spec/browser_spec.rb +21 -0
- data/spec/entry_spec.rb +51 -0
- data/spec/linkset_spec.rb +34 -0
- data/spec/opdsparser_spec.rb +73 -0
- data/spec/spec.opts +1 -0
- data/spec/spec_helper.rb +9 -0
- metadata +126 -0
data/.document
ADDED
data/.gitignore
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2009 Benoit Larroque
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
OPDS Parsing library
|
2
|
+
====================
|
3
|
+
|
4
|
+
This gem provides a parsing library for [OPDS Catalogs](http://opds-spec.org).
|
5
|
+
|
6
|
+
It also has the ability to discover catalogs in html feeds.
|
7
|
+
|
8
|
+
Installation
|
9
|
+
------------
|
10
|
+
|
11
|
+
gem install opds
|
12
|
+
|
13
|
+
|
14
|
+
Usage
|
15
|
+
-----
|
16
|
+
|
17
|
+
Parsing a feed is simply done.
|
18
|
+
|
19
|
+
require "opds"
|
20
|
+
OPDS::Feed.parse_url("http://catalog.com/catalog.atom")
|
21
|
+
|
22
|
+
This method will return an instance of the Feed or Entry classes. Each Atom element is accessible directly via a dedicated method (ex: `feed.title`). Entry also provides a method to directly access any embeded Dublin Core metadata (`dcmeta`). The `raw_doc` attribute gives access to the Nokogiri parsed source.
|
23
|
+
|
24
|
+
### Complete atom entries ###
|
25
|
+
|
26
|
+
Complete atom entries are available if detected as another instance of the Entry class. Just call `entry.complete` on the partial entry to access it.
|
27
|
+
|
28
|
+
### Links ###
|
29
|
+
|
30
|
+
Every links are automatically parsed in feeds and entries. They are made available in a collection called `links`. Relative links should be transformed in their absolute equivalent. On each link there is a `navigate` method which will proxy a call to OPDS::Feed.parse_url.
|
31
|
+
|
32
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'jeweler'
|
6
|
+
Jeweler::Tasks.new do |gem|
|
7
|
+
gem.name = "opds"
|
8
|
+
gem.summary = %Q{ruby lib to read OPDS feeds}
|
9
|
+
gem.description = %Q{ruby lib to access OPDS feeds}
|
10
|
+
gem.email = "benoit dot larroque at feedbooks dot com"
|
11
|
+
gem.homepage = "http://github.com/zetaben/opds"
|
12
|
+
gem.authors = ["Benoit Larroque"]
|
13
|
+
gem.add_development_dependency "rspec", ">= 1.2.9"
|
14
|
+
gem.add_runtime_dependency "nokogiri"
|
15
|
+
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
16
|
+
end
|
17
|
+
Jeweler::GemcutterTasks.new
|
18
|
+
rescue LoadError
|
19
|
+
puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
|
20
|
+
end
|
21
|
+
|
22
|
+
require 'spec/rake/spectask'
|
23
|
+
Spec::Rake::SpecTask.new(:spec) do |spec|
|
24
|
+
spec.libs << 'lib' << 'spec'
|
25
|
+
spec.spec_files = FileList['spec/**/*_spec.rb']
|
26
|
+
end
|
27
|
+
|
28
|
+
Spec::Rake::SpecTask.new(:rcov) do |spec|
|
29
|
+
spec.libs << 'lib' << 'spec'
|
30
|
+
spec.pattern = 'spec/**/*_spec.rb'
|
31
|
+
spec.rcov = true
|
32
|
+
end
|
33
|
+
|
34
|
+
task :spec => :check_dependencies
|
35
|
+
|
36
|
+
task :default => :spec
|
37
|
+
|
38
|
+
require 'rake/rdoctask'
|
39
|
+
Rake::RDocTask.new do |rdoc|
|
40
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
41
|
+
|
42
|
+
rdoc.rdoc_dir = 'rdoc'
|
43
|
+
rdoc.title = "opds #{version}"
|
44
|
+
rdoc.rdoc_files.include('README*')
|
45
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
46
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.2.0
|
data/lib/opds.rb
ADDED
data/lib/opds/entry.rb
ADDED
@@ -0,0 +1,138 @@
|
|
1
|
+
module OPDS
|
2
|
+
class Entry
|
3
|
+
include Logging
|
4
|
+
attr_reader :raw_doc
|
5
|
+
attr_reader :title
|
6
|
+
attr_reader :id
|
7
|
+
attr_reader :updated
|
8
|
+
attr_reader :published
|
9
|
+
attr_reader :summary
|
10
|
+
attr_reader :authors
|
11
|
+
attr_reader :links
|
12
|
+
attr_reader :dcmetas
|
13
|
+
attr_reader :categories
|
14
|
+
attr_reader :content
|
15
|
+
attr_reader :rights
|
16
|
+
attr_reader :subtitle
|
17
|
+
|
18
|
+
def initialize(browser=nil)
|
19
|
+
@browser=browser
|
20
|
+
@browser||=OPDS::Support::Browser.new
|
21
|
+
end
|
22
|
+
|
23
|
+
|
24
|
+
def self.from_nokogiri(content,namespaces=nil, browser=nil)
|
25
|
+
z=self.new browser
|
26
|
+
z.instance_variable_set('@raw_doc',content)
|
27
|
+
z.instance_variable_set('@namespaces',namespaces)
|
28
|
+
z.serialize!
|
29
|
+
z
|
30
|
+
end
|
31
|
+
|
32
|
+
|
33
|
+
def serialize!
|
34
|
+
@namespaces=raw_doc.root.namespaces if @namespaces.nil?
|
35
|
+
@authors=[]
|
36
|
+
@raw_doc=raw_doc.at('./xmlns:entry',@namespaces) if raw_doc.at('./xmlns:entry',@namespaces)
|
37
|
+
@title=text(raw_doc.at('./xmlns:title',@namespaces))
|
38
|
+
@id=text(raw_doc.at('./xmlns:id',@namespaces))
|
39
|
+
@summary=text(raw_doc.at('./xmlns:summary',@namespaces))
|
40
|
+
d=text(raw_doc.at('./xmlns:updated',@namespaces))
|
41
|
+
@updated=DateTime.parse(d) unless d.nil?
|
42
|
+
d=text(raw_doc.at('./xmlns:published',@namespaces))
|
43
|
+
@published=DateTime.parse(d) unless d.nil?
|
44
|
+
|
45
|
+
@authors=raw_doc.xpath('./xmlns:author',@namespaces).collect do |auth|
|
46
|
+
{
|
47
|
+
:name => text(raw_doc.at('./xmlns:author/xmlns:name',@namespaces)),
|
48
|
+
:uri => text(raw_doc.at('./xmlns:author/xmlns:uri',@namespaces)),
|
49
|
+
:email => text(raw_doc.at('./xmlns:author/xmlns:email',@namespaces))
|
50
|
+
}
|
51
|
+
end
|
52
|
+
|
53
|
+
@links=OPDS::Support::LinkSet.new @browser
|
54
|
+
raw_doc.xpath('./xmlns:link',@namespaces).each do |n|
|
55
|
+
text=nil
|
56
|
+
text=n.attributes['title'].value unless n.attributes['title'].nil?
|
57
|
+
link=n.attributes['href'].value
|
58
|
+
type=n.attributes['type'].value unless n.attributes['type'].nil?
|
59
|
+
price=nil
|
60
|
+
currency=nil
|
61
|
+
@namespaces['opds']||='http://opds-spec.org/2010/catalog'
|
62
|
+
oprice=n.at('./opds:price',@namespaces)
|
63
|
+
if oprice
|
64
|
+
price=text(oprice)
|
65
|
+
currency=oprice.attributes['currencycode'].value unless oprice.attributes['currencycode'].nil?
|
66
|
+
end
|
67
|
+
|
68
|
+
unless n.attributes['rel'].nil?
|
69
|
+
n.attributes['rel'].value.split.each do |rel|
|
70
|
+
@links.push(rel,link,text,type,price,currency)
|
71
|
+
end
|
72
|
+
else
|
73
|
+
@links.push(nil,link,text,type,price,currency)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
@dcmetas=Hash.new
|
77
|
+
prefs=@namespaces.reject{|_,v| !%W[http://purl.org/dc/terms/ http://purl.org/dc/elements/1.1/].include?v}
|
78
|
+
prefs.keys.map{|p| p.split(':').last}.each do |pref|
|
79
|
+
raw_doc.xpath('./'+pref+':*',@namespaces).each do |n|
|
80
|
+
@dcmetas[n.name]=[] unless @dcmetas[n.name]
|
81
|
+
@dcmetas[n.name].push [n.text, n]
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
@categories=raw_doc.xpath('./xmlns:category',@namespaces).collect do |n|
|
86
|
+
[text(n.attributes['label']),text(n.attributes['term'])]
|
87
|
+
end
|
88
|
+
|
89
|
+
@content=raw_doc.at('./xmlns:content',@namespaces).to_s
|
90
|
+
|
91
|
+
@contributors=raw_doc.xpath('./xmlns:contributor',@namespaces).collect do |auth|
|
92
|
+
{
|
93
|
+
:name => text(raw_doc.at('./xmlns:contributor/xmlns:name',@namespaces)),
|
94
|
+
:uri => text(raw_doc.at('./xmlns:contributor/xmlns:uri',@namespaces)),
|
95
|
+
:email => text(raw_doc.at('./xmlns:contributor/xmlns:email',@namespaces))
|
96
|
+
}
|
97
|
+
end
|
98
|
+
|
99
|
+
@rights=text(raw_doc.at('./xmlns:rights',@namespaces))
|
100
|
+
@subtitle=text(raw_doc.at('./xmlns:rights',@namespaces))
|
101
|
+
|
102
|
+
end
|
103
|
+
|
104
|
+
|
105
|
+
def author
|
106
|
+
authors.first
|
107
|
+
end
|
108
|
+
|
109
|
+
def partial?
|
110
|
+
links.by(:rel)['alternate'].any? do |l|
|
111
|
+
l[3]=='application/atom+xml'||l[3]=='application/atom+xml;type=entry'
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
def complete_url
|
116
|
+
links.by(:rel)['alternate'].find do |l|
|
117
|
+
l[3]=='application/atom+xml;type=entry'||l[3]=='application/atom+xml'
|
118
|
+
end unless !partial?
|
119
|
+
end
|
120
|
+
|
121
|
+
def acquisition_links
|
122
|
+
rel_start='http://opds-spec.org/acquisition'
|
123
|
+
[*links.by(:rel).reject do |k,_|
|
124
|
+
k[0,rel_start.size]!=rel_start unless k.nil?
|
125
|
+
end.values]
|
126
|
+
end
|
127
|
+
|
128
|
+
def inspect
|
129
|
+
"#<#{self.class}:0x#{self.object_id.abs.to_s(16)} #{instance_variables.reject{|e| e=='@raw_doc' }.collect{|e| "#{e}=#{instance_variable_get(e).inspect}"}.join(' ')} >"
|
130
|
+
end
|
131
|
+
|
132
|
+
protected
|
133
|
+
def text(t)
|
134
|
+
return t.text unless t.nil?
|
135
|
+
t
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|
data/lib/opds/feed.rb
ADDED
@@ -0,0 +1,149 @@
|
|
1
|
+
module OPDS
|
2
|
+
class Feed
|
3
|
+
include Logging
|
4
|
+
attr_reader :raw_doc
|
5
|
+
attr_reader :entries
|
6
|
+
|
7
|
+
|
8
|
+
def initialize(browser=nil)
|
9
|
+
@browser=browser
|
10
|
+
@browser||=OPDS::Support::Browser.new
|
11
|
+
end
|
12
|
+
|
13
|
+
# access root catalog
|
14
|
+
def root
|
15
|
+
return @root unless root?
|
16
|
+
self
|
17
|
+
end
|
18
|
+
|
19
|
+
# root catalog predicate
|
20
|
+
def root?
|
21
|
+
end
|
22
|
+
|
23
|
+
def self.parse_url(url,browser=nil,parser_opts={})
|
24
|
+
@browser=browser
|
25
|
+
@browser||=OPDS::Support::Browser.new
|
26
|
+
@browser.go_to(url)
|
27
|
+
if @browser.ok?
|
28
|
+
parsed = self.parse_raw(@browser.body,parser_opts,browser)
|
29
|
+
if parsed.nil?
|
30
|
+
disco=@browser.discover(@browser.current_location)
|
31
|
+
if disco.size > 0
|
32
|
+
d=disco[nil]
|
33
|
+
d||=disco['related']
|
34
|
+
d||=disco
|
35
|
+
Logging.log("Discovered : #{d.first.url}")
|
36
|
+
return d.first.navigate
|
37
|
+
end
|
38
|
+
return false
|
39
|
+
else
|
40
|
+
return parsed
|
41
|
+
end
|
42
|
+
else
|
43
|
+
return false
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def self.parse_raw(txt,opts={},browser=nil)
|
48
|
+
parser=OPDSParser.new(opts)
|
49
|
+
pfeed=parser.parse(txt,browser)
|
50
|
+
type=parser.sniffed_type
|
51
|
+
return pfeed unless type.nil?
|
52
|
+
nil
|
53
|
+
end
|
54
|
+
|
55
|
+
def self.from_nokogiri(content,browser=nil)
|
56
|
+
z=self.new browser
|
57
|
+
z.instance_variable_set('@raw_doc',content)
|
58
|
+
z.serialize!
|
59
|
+
z
|
60
|
+
end
|
61
|
+
|
62
|
+
#read xml entries into entry struct
|
63
|
+
def serialize!
|
64
|
+
@entries=raw_doc.xpath('/xmlns:feed/xmlns:entry',raw_doc.root.namespaces).map do |el|
|
65
|
+
OPDS::Entry.from_nokogiri(el,raw_doc.root.namespaces,@browser)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
def title
|
70
|
+
text(raw_doc.at('/xmlns:feed/xmlns:title',raw_doc.root.namespaces))
|
71
|
+
end
|
72
|
+
|
73
|
+
def icon
|
74
|
+
text(raw_doc.at('/xmlns:feed/xmlns:icon',raw_doc.root.namespaces))
|
75
|
+
end
|
76
|
+
|
77
|
+
def links
|
78
|
+
if !@links || @links.size ==0
|
79
|
+
@links=OPDS::Support::LinkSet.new @browser
|
80
|
+
raw_doc.xpath('/xmlns:feed/xmlns:link',raw_doc.root.namespaces).each do |n|
|
81
|
+
text=nil
|
82
|
+
text=n.attributes['title'].value unless n.attributes['title'].nil?
|
83
|
+
link=n.attributes['href'].value
|
84
|
+
unless n.attributes['rel'].nil?
|
85
|
+
n.attributes['rel'].value.split.each do |rel|
|
86
|
+
@links.push(rel,link,text)
|
87
|
+
end
|
88
|
+
else
|
89
|
+
@links.push(nil,link,text)
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
end
|
94
|
+
@links
|
95
|
+
end
|
96
|
+
|
97
|
+
def id
|
98
|
+
text(raw_doc.at('/xmlns:feed/xmlns:id',raw_doc.root.namespaces))
|
99
|
+
end
|
100
|
+
|
101
|
+
def author
|
102
|
+
{
|
103
|
+
:name => text(raw_doc.at('/xmlns:feed/xmlns:author/xmlns:name',raw_doc.root.namespaces)),
|
104
|
+
:uri => text(raw_doc.at('/xmlns:feed/xmlns:author/xmlns:uri',raw_doc.root.namespaces)),
|
105
|
+
:email => text(raw_doc.at('/xmlns:feed/xmlns:author/xmlns:email',raw_doc.root.namespaces))
|
106
|
+
}
|
107
|
+
end
|
108
|
+
|
109
|
+
|
110
|
+
def next_page_url
|
111
|
+
links.link_url(:rel => 'next')
|
112
|
+
end
|
113
|
+
|
114
|
+
def prev_page_url
|
115
|
+
links.link_url(:rel => 'prev')
|
116
|
+
end
|
117
|
+
|
118
|
+
def paginated?
|
119
|
+
!next_page_url.nil?||!prev_page_url.nil?
|
120
|
+
end
|
121
|
+
|
122
|
+
def first_page?
|
123
|
+
!prev_page_url if paginated?
|
124
|
+
end
|
125
|
+
|
126
|
+
def last_page?
|
127
|
+
!next_page_url if paginated?
|
128
|
+
end
|
129
|
+
|
130
|
+
def next_page
|
131
|
+
Feed.parse_url(next_page_url,@browser)
|
132
|
+
end
|
133
|
+
|
134
|
+
def prev_page
|
135
|
+
Feed.parse_url(prev_page_url,@browser)
|
136
|
+
end
|
137
|
+
|
138
|
+
def inspect
|
139
|
+
"#<#{self.class}:0x#{self.object_id.abs.to_s(16)} entries(count):#{@entries.size} #{instance_variables.reject{|e| e=='@raw_doc'||e=='@entries' }.collect{|e| "#{e}=#{instance_variable_get(e).inspect}"}.join(' ')} >"
|
140
|
+
end
|
141
|
+
|
142
|
+
protected
|
143
|
+
def text(t)
|
144
|
+
return t.text unless t.nil?
|
145
|
+
t
|
146
|
+
end
|
147
|
+
|
148
|
+
end
|
149
|
+
end
|