opds 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ README.rdoc
2
+ lib/**/*.rb
3
+ bin/*
4
+ features/**/*.feature
5
+ LICENSE
data/.gitignore ADDED
@@ -0,0 +1,21 @@
1
+ ## MAC OS
2
+ .DS_Store
3
+
4
+ ## TEXTMATE
5
+ *.tmproj
6
+ tmtags
7
+
8
+ ## EMACS
9
+ *~
10
+ \#*
11
+ .\#*
12
+
13
+ ## VIM
14
+ *.swp
15
+
16
+ ## PROJECT::GENERAL
17
+ coverage
18
+ rdoc
19
+ pkg
20
+
21
+ ## PROJECT::SPECIFIC
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 Benoit Larroque
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,32 @@
1
+ OPDS Parsing library
2
+ ====================
3
+
4
+ This gem provides a parsing library for [OPDS Catalogs](http://opds-spec.org).
5
+
6
+ It also has the ability to discover catalogs in html feeds.
7
+
8
+ Installation
9
+ ------------
10
+
11
+ gem install opds
12
+
13
+
14
+ Usage
15
+ -----
16
+
17
+ Parsing a feed is simply done.
18
+
19
+ require "opds"
20
+ OPDS::Feed.parse_url("http://catalog.com/catalog.atom")
21
+
22
+ This method will return an instance of the Feed or Entry classes. Each Atom element is accessible directly via a dedicated method (ex: `feed.title`). Entry also provides a method to directly access any embeded Dublin Core metadata (`dcmeta`). The `raw_doc` attribute gives access to the Nokogiri parsed source.
23
+
24
+ ### Complete atom entries ###
25
+
26
+ Complete atom entries are available if detected as another instance of the Entry class. Just call `entry.complete` on the partial entry to access it.
27
+
28
+ ### Links ###
29
+
30
+ Every links are automatically parsed in feeds and entries. They are made available in a collection called `links`. Relative links should be transformed in their absolute equivalent. On each link there is a `navigate` method which will proxy a call to OPDS::Feed.parse_url.
31
+
32
+
data/Rakefile ADDED
@@ -0,0 +1,46 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "opds"
8
+ gem.summary = %Q{ruby lib to read OPDS feeds}
9
+ gem.description = %Q{ruby lib to access OPDS feeds}
10
+ gem.email = "benoit dot larroque at feedbooks dot com"
11
+ gem.homepage = "http://github.com/zetaben/opds"
12
+ gem.authors = ["Benoit Larroque"]
13
+ gem.add_development_dependency "rspec", ">= 1.2.9"
14
+ gem.add_runtime_dependency "nokogiri"
15
+ # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
16
+ end
17
+ Jeweler::GemcutterTasks.new
18
+ rescue LoadError
19
+ puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
20
+ end
21
+
22
+ require 'spec/rake/spectask'
23
+ Spec::Rake::SpecTask.new(:spec) do |spec|
24
+ spec.libs << 'lib' << 'spec'
25
+ spec.spec_files = FileList['spec/**/*_spec.rb']
26
+ end
27
+
28
+ Spec::Rake::SpecTask.new(:rcov) do |spec|
29
+ spec.libs << 'lib' << 'spec'
30
+ spec.pattern = 'spec/**/*_spec.rb'
31
+ spec.rcov = true
32
+ end
33
+
34
+ task :spec => :check_dependencies
35
+
36
+ task :default => :spec
37
+
38
+ require 'rake/rdoctask'
39
+ Rake::RDocTask.new do |rdoc|
40
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
41
+
42
+ rdoc.rdoc_dir = 'rdoc'
43
+ rdoc.title = "opds #{version}"
44
+ rdoc.rdoc_files.include('README*')
45
+ rdoc.rdoc_files.include('lib/**/*.rb')
46
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.2.0
data/lib/opds.rb ADDED
@@ -0,0 +1,10 @@
1
+ require 'opds/opds'
2
+ require 'opds/support/logging'
3
+ require 'opds/support/browser'
4
+ require 'opds/support/linkset'
5
+ require 'opds/parser'
6
+
7
+ require 'opds/feed'
8
+ require 'opds/entry'
9
+ require 'opds/acquisition_feed'
10
+ require 'opds/navigation_feed'
@@ -0,0 +1,4 @@
1
+ module OPDS
2
+ class AcquisitionFeed < Feed
3
+ end
4
+ end
data/lib/opds/entry.rb ADDED
@@ -0,0 +1,138 @@
1
+ module OPDS
2
+ class Entry
3
+ include Logging
4
+ attr_reader :raw_doc
5
+ attr_reader :title
6
+ attr_reader :id
7
+ attr_reader :updated
8
+ attr_reader :published
9
+ attr_reader :summary
10
+ attr_reader :authors
11
+ attr_reader :links
12
+ attr_reader :dcmetas
13
+ attr_reader :categories
14
+ attr_reader :content
15
+ attr_reader :rights
16
+ attr_reader :subtitle
17
+
18
+ def initialize(browser=nil)
19
+ @browser=browser
20
+ @browser||=OPDS::Support::Browser.new
21
+ end
22
+
23
+
24
+ def self.from_nokogiri(content,namespaces=nil, browser=nil)
25
+ z=self.new browser
26
+ z.instance_variable_set('@raw_doc',content)
27
+ z.instance_variable_set('@namespaces',namespaces)
28
+ z.serialize!
29
+ z
30
+ end
31
+
32
+
33
+ def serialize!
34
+ @namespaces=raw_doc.root.namespaces if @namespaces.nil?
35
+ @authors=[]
36
+ @raw_doc=raw_doc.at('./xmlns:entry',@namespaces) if raw_doc.at('./xmlns:entry',@namespaces)
37
+ @title=text(raw_doc.at('./xmlns:title',@namespaces))
38
+ @id=text(raw_doc.at('./xmlns:id',@namespaces))
39
+ @summary=text(raw_doc.at('./xmlns:summary',@namespaces))
40
+ d=text(raw_doc.at('./xmlns:updated',@namespaces))
41
+ @updated=DateTime.parse(d) unless d.nil?
42
+ d=text(raw_doc.at('./xmlns:published',@namespaces))
43
+ @published=DateTime.parse(d) unless d.nil?
44
+
45
+ @authors=raw_doc.xpath('./xmlns:author',@namespaces).collect do |auth|
46
+ {
47
+ :name => text(raw_doc.at('./xmlns:author/xmlns:name',@namespaces)),
48
+ :uri => text(raw_doc.at('./xmlns:author/xmlns:uri',@namespaces)),
49
+ :email => text(raw_doc.at('./xmlns:author/xmlns:email',@namespaces))
50
+ }
51
+ end
52
+
53
+ @links=OPDS::Support::LinkSet.new @browser
54
+ raw_doc.xpath('./xmlns:link',@namespaces).each do |n|
55
+ text=nil
56
+ text=n.attributes['title'].value unless n.attributes['title'].nil?
57
+ link=n.attributes['href'].value
58
+ type=n.attributes['type'].value unless n.attributes['type'].nil?
59
+ price=nil
60
+ currency=nil
61
+ @namespaces['opds']||='http://opds-spec.org/2010/catalog'
62
+ oprice=n.at('./opds:price',@namespaces)
63
+ if oprice
64
+ price=text(oprice)
65
+ currency=oprice.attributes['currencycode'].value unless oprice.attributes['currencycode'].nil?
66
+ end
67
+
68
+ unless n.attributes['rel'].nil?
69
+ n.attributes['rel'].value.split.each do |rel|
70
+ @links.push(rel,link,text,type,price,currency)
71
+ end
72
+ else
73
+ @links.push(nil,link,text,type,price,currency)
74
+ end
75
+ end
76
+ @dcmetas=Hash.new
77
+ prefs=@namespaces.reject{|_,v| !%W[http://purl.org/dc/terms/ http://purl.org/dc/elements/1.1/].include?v}
78
+ prefs.keys.map{|p| p.split(':').last}.each do |pref|
79
+ raw_doc.xpath('./'+pref+':*',@namespaces).each do |n|
80
+ @dcmetas[n.name]=[] unless @dcmetas[n.name]
81
+ @dcmetas[n.name].push [n.text, n]
82
+ end
83
+ end
84
+
85
+ @categories=raw_doc.xpath('./xmlns:category',@namespaces).collect do |n|
86
+ [text(n.attributes['label']),text(n.attributes['term'])]
87
+ end
88
+
89
+ @content=raw_doc.at('./xmlns:content',@namespaces).to_s
90
+
91
+ @contributors=raw_doc.xpath('./xmlns:contributor',@namespaces).collect do |auth|
92
+ {
93
+ :name => text(raw_doc.at('./xmlns:contributor/xmlns:name',@namespaces)),
94
+ :uri => text(raw_doc.at('./xmlns:contributor/xmlns:uri',@namespaces)),
95
+ :email => text(raw_doc.at('./xmlns:contributor/xmlns:email',@namespaces))
96
+ }
97
+ end
98
+
99
+ @rights=text(raw_doc.at('./xmlns:rights',@namespaces))
100
+ @subtitle=text(raw_doc.at('./xmlns:rights',@namespaces))
101
+
102
+ end
103
+
104
+
105
+ def author
106
+ authors.first
107
+ end
108
+
109
+ def partial?
110
+ links.by(:rel)['alternate'].any? do |l|
111
+ l[3]=='application/atom+xml'||l[3]=='application/atom+xml;type=entry'
112
+ end
113
+ end
114
+
115
+ def complete_url
116
+ links.by(:rel)['alternate'].find do |l|
117
+ l[3]=='application/atom+xml;type=entry'||l[3]=='application/atom+xml'
118
+ end unless !partial?
119
+ end
120
+
121
+ def acquisition_links
122
+ rel_start='http://opds-spec.org/acquisition'
123
+ [*links.by(:rel).reject do |k,_|
124
+ k[0,rel_start.size]!=rel_start unless k.nil?
125
+ end.values]
126
+ end
127
+
128
+ def inspect
129
+ "#<#{self.class}:0x#{self.object_id.abs.to_s(16)} #{instance_variables.reject{|e| e=='@raw_doc' }.collect{|e| "#{e}=#{instance_variable_get(e).inspect}"}.join(' ')} >"
130
+ end
131
+
132
+ protected
133
+ def text(t)
134
+ return t.text unless t.nil?
135
+ t
136
+ end
137
+ end
138
+ end
data/lib/opds/feed.rb ADDED
@@ -0,0 +1,149 @@
1
+ module OPDS
2
+ class Feed
3
+ include Logging
4
+ attr_reader :raw_doc
5
+ attr_reader :entries
6
+
7
+
8
+ def initialize(browser=nil)
9
+ @browser=browser
10
+ @browser||=OPDS::Support::Browser.new
11
+ end
12
+
13
+ # access root catalog
14
+ def root
15
+ return @root unless root?
16
+ self
17
+ end
18
+
19
+ # root catalog predicate
20
+ def root?
21
+ end
22
+
23
+ def self.parse_url(url,browser=nil,parser_opts={})
24
+ @browser=browser
25
+ @browser||=OPDS::Support::Browser.new
26
+ @browser.go_to(url)
27
+ if @browser.ok?
28
+ parsed = self.parse_raw(@browser.body,parser_opts,browser)
29
+ if parsed.nil?
30
+ disco=@browser.discover(@browser.current_location)
31
+ if disco.size > 0
32
+ d=disco[nil]
33
+ d||=disco['related']
34
+ d||=disco
35
+ Logging.log("Discovered : #{d.first.url}")
36
+ return d.first.navigate
37
+ end
38
+ return false
39
+ else
40
+ return parsed
41
+ end
42
+ else
43
+ return false
44
+ end
45
+ end
46
+
47
+ def self.parse_raw(txt,opts={},browser=nil)
48
+ parser=OPDSParser.new(opts)
49
+ pfeed=parser.parse(txt,browser)
50
+ type=parser.sniffed_type
51
+ return pfeed unless type.nil?
52
+ nil
53
+ end
54
+
55
+ def self.from_nokogiri(content,browser=nil)
56
+ z=self.new browser
57
+ z.instance_variable_set('@raw_doc',content)
58
+ z.serialize!
59
+ z
60
+ end
61
+
62
+ #read xml entries into entry struct
63
+ def serialize!
64
+ @entries=raw_doc.xpath('/xmlns:feed/xmlns:entry',raw_doc.root.namespaces).map do |el|
65
+ OPDS::Entry.from_nokogiri(el,raw_doc.root.namespaces,@browser)
66
+ end
67
+ end
68
+
69
+ def title
70
+ text(raw_doc.at('/xmlns:feed/xmlns:title',raw_doc.root.namespaces))
71
+ end
72
+
73
+ def icon
74
+ text(raw_doc.at('/xmlns:feed/xmlns:icon',raw_doc.root.namespaces))
75
+ end
76
+
77
+ def links
78
+ if !@links || @links.size ==0
79
+ @links=OPDS::Support::LinkSet.new @browser
80
+ raw_doc.xpath('/xmlns:feed/xmlns:link',raw_doc.root.namespaces).each do |n|
81
+ text=nil
82
+ text=n.attributes['title'].value unless n.attributes['title'].nil?
83
+ link=n.attributes['href'].value
84
+ unless n.attributes['rel'].nil?
85
+ n.attributes['rel'].value.split.each do |rel|
86
+ @links.push(rel,link,text)
87
+ end
88
+ else
89
+ @links.push(nil,link,text)
90
+ end
91
+ end
92
+
93
+ end
94
+ @links
95
+ end
96
+
97
+ def id
98
+ text(raw_doc.at('/xmlns:feed/xmlns:id',raw_doc.root.namespaces))
99
+ end
100
+
101
+ def author
102
+ {
103
+ :name => text(raw_doc.at('/xmlns:feed/xmlns:author/xmlns:name',raw_doc.root.namespaces)),
104
+ :uri => text(raw_doc.at('/xmlns:feed/xmlns:author/xmlns:uri',raw_doc.root.namespaces)),
105
+ :email => text(raw_doc.at('/xmlns:feed/xmlns:author/xmlns:email',raw_doc.root.namespaces))
106
+ }
107
+ end
108
+
109
+
110
+ def next_page_url
111
+ links.link_url(:rel => 'next')
112
+ end
113
+
114
+ def prev_page_url
115
+ links.link_url(:rel => 'prev')
116
+ end
117
+
118
+ def paginated?
119
+ !next_page_url.nil?||!prev_page_url.nil?
120
+ end
121
+
122
+ def first_page?
123
+ !prev_page_url if paginated?
124
+ end
125
+
126
+ def last_page?
127
+ !next_page_url if paginated?
128
+ end
129
+
130
+ def next_page
131
+ Feed.parse_url(next_page_url,@browser)
132
+ end
133
+
134
+ def prev_page
135
+ Feed.parse_url(prev_page_url,@browser)
136
+ end
137
+
138
+ def inspect
139
+ "#<#{self.class}:0x#{self.object_id.abs.to_s(16)} entries(count):#{@entries.size} #{instance_variables.reject{|e| e=='@raw_doc'||e=='@entries' }.collect{|e| "#{e}=#{instance_variable_get(e).inspect}"}.join(' ')} >"
140
+ end
141
+
142
+ protected
143
+ def text(t)
144
+ return t.text unless t.nil?
145
+ t
146
+ end
147
+
148
+ end
149
+ end
@@ -0,0 +1,4 @@
1
+ module OPDS
2
+ class NavigationFeed < Feed
3
+ end
4
+ end