opds 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +3 -1
- data/Rakefile +1 -0
- data/VERSION +1 -1
- data/lib/opds/acquisition_feed.rb +29 -0
- data/lib/opds/entry.rb +41 -9
- data/lib/opds/feed.rb +73 -8
- data/lib/opds/navigation_feed.rb +17 -0
- data/lib/opds/opds.rb +4 -0
- data/lib/opds/parser.rb +14 -0
- data/lib/opds/support/browser.rb +15 -1
- data/lib/opds/support/linkset.rb +104 -6
- data/lib/opds/support/logging.rb +3 -0
- data/opds.gemspec +30 -37
- data/samples/acquisition_opds1_1.txt +639 -0
- data/spec/linkset_spec.rb +12 -3
- data/spec/opdsparser_opds_1_1_spec.rb +75 -0
- metadata +10 -13
- data/.gitignore +0 -21
data/README.md
CHANGED
@@ -17,10 +17,12 @@ Usage
|
|
17
17
|
Parsing a feed is simply done.
|
18
18
|
|
19
19
|
require "opds"
|
20
|
-
OPDS
|
20
|
+
OPDS.access("http://catalog.com/catalog.atom")
|
21
21
|
|
22
22
|
This method will return an instance of the Feed or Entry classes. Each Atom element is accessible directly via a dedicated method (ex: `feed.title`). Entry also provides a method to directly access any embeded Dublin Core metadata (`dcmeta`). The `raw_doc` attribute gives access to the Nokogiri parsed source.
|
23
23
|
|
24
|
+
API documentation can be found on [rdoc.info](http://rdoc.info/github/zetaben/opds/master/frames)
|
25
|
+
|
24
26
|
### Complete atom entries ###
|
25
27
|
|
26
28
|
Complete atom entries are available if detected as another instance of the Entry class. Just call `entry.complete` on the partial entry to access it.
|
data/Rakefile
CHANGED
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.3.0
|
@@ -1,4 +1,33 @@
|
|
1
1
|
module OPDS
|
2
|
+
# Represents an acquisition feed
|
3
|
+
# @see http://opds-spec.org/specs/opds-catalog-1-0-20100830/#Acquisition_Feeds
|
2
4
|
class AcquisitionFeed < Feed
|
5
|
+
|
6
|
+
# Get a collection of facets groupped by opds:facetGroup
|
7
|
+
# @return [Hash] facets
|
8
|
+
def facets
|
9
|
+
return @facets if @facets
|
10
|
+
@facets={}
|
11
|
+
links['http://opds-spec.org/facet'].each do |facet|
|
12
|
+
@facets[facet.facet_group]||=[]
|
13
|
+
@facets[facet.facet_group].push facet
|
14
|
+
end
|
15
|
+
@facets
|
16
|
+
end
|
17
|
+
|
18
|
+
|
19
|
+
# Get a collection of active_facets by opds:facetGroup
|
20
|
+
# @return [Hash] active facets
|
21
|
+
def active_facets
|
22
|
+
return @selected if @selected
|
23
|
+
@selected={}
|
24
|
+
facets.each do |k,v|
|
25
|
+
@selected[k]=nil
|
26
|
+
v.each do |f|
|
27
|
+
@selected[k]=f if f.active_facet?
|
28
|
+
end
|
29
|
+
end
|
30
|
+
@selected
|
31
|
+
end
|
3
32
|
end
|
4
33
|
end
|
data/lib/opds/entry.rb
CHANGED
@@ -1,27 +1,51 @@
|
|
1
1
|
module OPDS
|
2
|
+
# Represents a catalog entry
|
2
3
|
class Entry
|
3
4
|
include Logging
|
5
|
+
|
6
|
+
# "Raw" Nokogiri document used while parsing.
|
7
|
+
# It might useful to access atom foreign markup
|
8
|
+
# @return [Nokogiri::XML::Document] Parsed document
|
4
9
|
attr_reader :raw_doc
|
10
|
+
# @return [String] entry title
|
5
11
|
attr_reader :title
|
12
|
+
# @return [String] entry id
|
6
13
|
attr_reader :id
|
14
|
+
# @return [Date] entry updated date
|
7
15
|
attr_reader :updated
|
16
|
+
# @return [Date] entry published date
|
8
17
|
attr_reader :published
|
18
|
+
# @return [String] entry summary
|
9
19
|
attr_reader :summary
|
20
|
+
# @return [Array] entry parsed authors
|
10
21
|
attr_reader :authors
|
22
|
+
# @return [Array] entry parsed contributors
|
23
|
+
attr_reader :contributors
|
24
|
+
# @return [OPDS::Support::LinkSet] Set of links found in the entry
|
11
25
|
attr_reader :links
|
26
|
+
# @return [Hash] Hash of found dublin core metadata found in the entry
|
27
|
+
# @see http://dublincore.org/documents/dcmi-terms/
|
12
28
|
attr_reader :dcmetas
|
29
|
+
# @return [Array] Categories found
|
13
30
|
attr_reader :categories
|
31
|
+
# @return [String] content found
|
14
32
|
attr_reader :content
|
33
|
+
# @return [String] entry right
|
15
34
|
attr_reader :rights
|
35
|
+
# @return [String] entry subtitle
|
16
36
|
attr_reader :subtitle
|
17
37
|
|
18
|
-
|
38
|
+
# @param browser (see Feed.parse_url)
|
39
|
+
def initialize(browser=OPDS::Support::Browser.new)
|
19
40
|
@browser=browser
|
20
|
-
@browser||=OPDS::Support::Browser.new
|
21
41
|
end
|
22
42
|
|
23
|
-
|
24
|
-
|
43
|
+
# Create an entry from a nokogiri fragment
|
44
|
+
# @param content [Nokogiri::XML::Element] Nokogiri fragment (should be <entry>)
|
45
|
+
# @param namespaces Associated document namespaces
|
46
|
+
# @param browser (see Feed.parse_url)
|
47
|
+
# @return [Entry]
|
48
|
+
def self.from_nokogiri(content,namespaces=nil, browser=OPDS::Support::Browser.new)
|
25
49
|
z=self.new browser
|
26
50
|
z.instance_variable_set('@raw_doc',content)
|
27
51
|
z.instance_variable_set('@namespaces',namespaces)
|
@@ -29,7 +53,9 @@ module OPDS
|
|
29
53
|
z
|
30
54
|
end
|
31
55
|
|
32
|
-
|
56
|
+
# Read the provided document into the entry struct
|
57
|
+
# @private
|
58
|
+
# @todo really make private
|
33
59
|
def serialize!
|
34
60
|
@namespaces=raw_doc.root.namespaces if @namespaces.nil?
|
35
61
|
@authors=[]
|
@@ -101,23 +127,29 @@ module OPDS
|
|
101
127
|
|
102
128
|
end
|
103
129
|
|
104
|
-
|
130
|
+
#First Author
|
131
|
+
# @return [Hash]
|
105
132
|
def author
|
106
133
|
authors.first
|
107
134
|
end
|
108
|
-
|
135
|
+
|
136
|
+
# Is it a partial atom entry ?
|
137
|
+
# @return [boolean]
|
109
138
|
def partial?
|
110
139
|
links.by(:rel)['alternate'].any? do |l|
|
111
140
|
l[3]=='application/atom+xml'||l[3]=='application/atom+xml;type=entry'
|
112
141
|
end
|
113
142
|
end
|
114
|
-
|
143
|
+
|
144
|
+
# @return [String] URL to the complete entry
|
145
|
+
# @todo accessor to the complete entry
|
115
146
|
def complete_url
|
116
147
|
links.by(:rel)['alternate'].find do |l|
|
117
148
|
l[3]=='application/atom+xml;type=entry'||l[3]=='application/atom+xml'
|
118
149
|
end unless !partial?
|
119
150
|
end
|
120
|
-
|
151
|
+
|
152
|
+
# @return [Array] acquisition link subset
|
121
153
|
def acquisition_links
|
122
154
|
rel_start='http://opds-spec.org/acquisition'
|
123
155
|
[*links.by(:rel).reject do |k,_|
|
data/lib/opds/feed.rb
CHANGED
@@ -1,7 +1,16 @@
|
|
1
1
|
module OPDS
|
2
|
+
# Feed class is used as an ancestor to NavigationFeed and AcquisitionFeed it handles
|
3
|
+
# all the parsing
|
4
|
+
# @abstract Not really abstract as it's full fledged, but it should not be used directly
|
2
5
|
class Feed
|
3
6
|
include Logging
|
7
|
+
# "Raw" Nokogiri document used while parsing.
|
8
|
+
# It might useful to access atom foreign markup
|
9
|
+
# @return [Nokogiri::XML::Document] Parsed document
|
4
10
|
attr_reader :raw_doc
|
11
|
+
# Entry list
|
12
|
+
# @see Entry
|
13
|
+
# @return [Array<Entry>] list of parsed entries
|
5
14
|
attr_reader :entries
|
6
15
|
|
7
16
|
|
@@ -9,7 +18,7 @@ module OPDS
|
|
9
18
|
@browser=browser
|
10
19
|
@browser||=OPDS::Support::Browser.new
|
11
20
|
end
|
12
|
-
|
21
|
+
=begin
|
13
22
|
# access root catalog
|
14
23
|
def root
|
15
24
|
return @root unless root?
|
@@ -19,10 +28,23 @@ module OPDS
|
|
19
28
|
# root catalog predicate
|
20
29
|
def root?
|
21
30
|
end
|
22
|
-
|
23
|
-
|
31
|
+
=end
|
32
|
+
|
33
|
+
|
34
|
+
# Parse the given url.
|
35
|
+
#
|
36
|
+
# If the resource at the give url is not an OPDS Catalog, this method will
|
37
|
+
# try to find a linked catalog.
|
38
|
+
# If many are available it will take the first one with a priority given to
|
39
|
+
# nil rel or rel="related" catalogs.
|
40
|
+
#
|
41
|
+
# @param url [String] url to parse
|
42
|
+
# @param browser (see Feed.parse_raw)
|
43
|
+
# @param parser_opts parser options (unused at the moment)
|
44
|
+
# @see OPDS::Support::Browser
|
45
|
+
# @return [AcquisitionFeed,NavigationFeed, Entry, nil] an instance of a parsed feed, entry or nil
|
46
|
+
def self.parse_url(url,browser=OPDS::Support::Browser.new,parser_opts={})
|
24
47
|
@browser=browser
|
25
|
-
@browser||=OPDS::Support::Browser.new
|
26
48
|
@browser.go_to(url)
|
27
49
|
if @browser.ok?
|
28
50
|
parsed = self.parse_raw(@browser.body,parser_opts,browser)
|
@@ -44,6 +66,12 @@ module OPDS
|
|
44
66
|
end
|
45
67
|
end
|
46
68
|
|
69
|
+
# Will parse a text stream as an OPDS Catalog, internaly used by #parse_url
|
70
|
+
#
|
71
|
+
# @param txt [String] text to parse
|
72
|
+
# @param opts [Hash] options to pass to the parser
|
73
|
+
# @param browser [OPDS::Support::Browser] an optional compatible browser to use
|
74
|
+
# @return [AcquisitionFeed,NavigationFeed] an instance of a parsed feed or nil
|
47
75
|
def self.parse_raw(txt,opts={},browser=nil)
|
48
76
|
parser=OPDSParser.new(opts)
|
49
77
|
pfeed=parser.parse(txt,browser)
|
@@ -52,6 +80,11 @@ module OPDS
|
|
52
80
|
nil
|
53
81
|
end
|
54
82
|
|
83
|
+
|
84
|
+
# Create a feed from a nokogiri document
|
85
|
+
# @param content [Nokogiri::XML::Document] nokogiri document
|
86
|
+
# @param browser (see Feed.parse_url)
|
87
|
+
# @return [Feed] new feed
|
55
88
|
def self.from_nokogiri(content,browser=nil)
|
56
89
|
z=self.new browser
|
57
90
|
z.instance_variable_set('@raw_doc',content)
|
@@ -59,34 +92,52 @@ module OPDS
|
|
59
92
|
z
|
60
93
|
end
|
61
94
|
|
62
|
-
#
|
95
|
+
# @private
|
96
|
+
# read xml entries into the entry list struct
|
97
|
+
# @todo really make private
|
63
98
|
def serialize!
|
64
99
|
@entries=raw_doc.xpath('/xmlns:feed/xmlns:entry',raw_doc.root.namespaces).map do |el|
|
65
100
|
OPDS::Entry.from_nokogiri(el,raw_doc.root.namespaces,@browser)
|
66
101
|
end
|
67
102
|
end
|
68
103
|
|
104
|
+
|
105
|
+
# @return [String] Feed title
|
69
106
|
def title
|
70
107
|
text(raw_doc.at('/xmlns:feed/xmlns:title',raw_doc.root.namespaces))
|
71
108
|
end
|
72
109
|
|
110
|
+
# @return [String] Feed icon definition
|
73
111
|
def icon
|
74
112
|
text(raw_doc.at('/xmlns:feed/xmlns:icon',raw_doc.root.namespaces))
|
75
113
|
end
|
76
114
|
|
115
|
+
# @return [OPDS::Support::LinkSet] Set with atom feed level links
|
77
116
|
def links
|
78
117
|
if !@links || @links.size ==0
|
79
118
|
@links=OPDS::Support::LinkSet.new @browser
|
80
119
|
raw_doc.xpath('/xmlns:feed/xmlns:link',raw_doc.root.namespaces).each do |n|
|
81
120
|
text=nil
|
82
121
|
text=n.attributes['title'].value unless n.attributes['title'].nil?
|
122
|
+
type=n.attributes['type'].value unless n.attributes['type'].nil?
|
83
123
|
link=n.attributes['href'].value
|
84
124
|
unless n.attributes['rel'].nil?
|
85
125
|
n.attributes['rel'].value.split.each do |rel|
|
86
|
-
|
126
|
+
if rel=='http://opds-spec.org/facet'
|
127
|
+
group=n.attribute_with_ns('facetGroup','http://opds-spec.org/2010/catalog')
|
128
|
+
group=group.value unless group.nil?
|
129
|
+
active=n.attribute_with_ns('activeFacet','http://opds-spec.org/2010/catalog')
|
130
|
+
active=active.value unless active.nil?
|
131
|
+
count=n.attribute_with_ns('count','http://purl.org/syndication/thread/1.0')
|
132
|
+
count=count.value unless count.nil?
|
133
|
+
|
134
|
+
@links.push_facet(link,text,type,group,active,count)
|
135
|
+
else
|
136
|
+
@links.push(rel,link,text,type)
|
137
|
+
end
|
87
138
|
end
|
88
139
|
else
|
89
|
-
@links.push(nil,link,text)
|
140
|
+
@links.push(nil,link,text,type)
|
90
141
|
end
|
91
142
|
end
|
92
143
|
|
@@ -94,10 +145,12 @@ module OPDS
|
|
94
145
|
@links
|
95
146
|
end
|
96
147
|
|
148
|
+
# @return [String] Feed id
|
97
149
|
def id
|
98
150
|
text(raw_doc.at('/xmlns:feed/xmlns:id',raw_doc.root.namespaces))
|
99
151
|
end
|
100
152
|
|
153
|
+
# @return [Hash] Feed author (keys : name,uri,email)
|
101
154
|
def author
|
102
155
|
{
|
103
156
|
:name => text(raw_doc.at('/xmlns:feed/xmlns:author/xmlns:name',raw_doc.root.namespaces)),
|
@@ -106,31 +159,42 @@ module OPDS
|
|
106
159
|
}
|
107
160
|
end
|
108
161
|
|
109
|
-
|
162
|
+
# @return [String] Next page url
|
110
163
|
def next_page_url
|
111
164
|
links.link_url(:rel => 'next')
|
112
165
|
end
|
113
166
|
|
167
|
+
# @return [String] Previous page url
|
114
168
|
def prev_page_url
|
115
169
|
links.link_url(:rel => 'prev')
|
116
170
|
end
|
117
171
|
|
172
|
+
# Is the feed paginated ?
|
173
|
+
# @return Boolean
|
118
174
|
def paginated?
|
119
175
|
!next_page_url.nil?||!prev_page_url.nil?
|
120
176
|
end
|
121
177
|
|
178
|
+
# Is it the first page ?
|
179
|
+
# @return Boolean
|
122
180
|
def first_page?
|
123
181
|
!prev_page_url if paginated?
|
124
182
|
end
|
125
183
|
|
184
|
+
# Is it the last page ?
|
185
|
+
# @return Boolean
|
126
186
|
def last_page?
|
127
187
|
!next_page_url if paginated?
|
128
188
|
end
|
129
189
|
|
190
|
+
# Get next page feed
|
191
|
+
# @return (see Feed.parse_url)
|
130
192
|
def next_page
|
131
193
|
Feed.parse_url(next_page_url,@browser)
|
132
194
|
end
|
133
195
|
|
196
|
+
# Get previous page feed
|
197
|
+
# @return (see Feed.parse_url)
|
134
198
|
def prev_page
|
135
199
|
Feed.parse_url(prev_page_url,@browser)
|
136
200
|
end
|
@@ -140,6 +204,7 @@ module OPDS
|
|
140
204
|
end
|
141
205
|
|
142
206
|
protected
|
207
|
+
# Convert a nokogiri node to String value if not nil
|
143
208
|
def text(t)
|
144
209
|
return t.text unless t.nil?
|
145
210
|
t
|
data/lib/opds/navigation_feed.rb
CHANGED
@@ -1,4 +1,21 @@
|
|
1
1
|
module OPDS
|
2
|
+
# Represents a navigation feed
|
3
|
+
# @see http://opds-spec.org/specs/opds-catalog-1-0-20100830/#Navigation_Feeds
|
2
4
|
class NavigationFeed < Feed
|
5
|
+
# Collection of all Navigation feeds found in this feed
|
6
|
+
# @return [OPDS::Support::LinkSet] found links
|
7
|
+
def navigation_links
|
8
|
+
nav_links=Support::LinkSet.new @browser
|
9
|
+
self.links.each do |l|
|
10
|
+
nav_links.push_link l if l.type=='application/atom+xml'
|
11
|
+
end
|
12
|
+
|
13
|
+
self.entries.each do |entry|
|
14
|
+
entry.links.each do |l|
|
15
|
+
nav_links.push_link l if l.type=='application/atom+xml'
|
16
|
+
end
|
17
|
+
end
|
18
|
+
nav_links
|
19
|
+
end
|
3
20
|
end
|
4
21
|
end
|
data/lib/opds/opds.rb
CHANGED
data/lib/opds/parser.rb
CHANGED
@@ -1,14 +1,25 @@
|
|
1
1
|
require "nokogiri"
|
2
2
|
module OPDS
|
3
|
+
# Class in charge of discovering the type of the given text stream.
|
4
|
+
# It will dispatch the pre-parsed atom content to the desired class
|
5
|
+
# @see OPDS::AcquisitionFeed
|
6
|
+
# @see OPDS::NavigationFeed
|
7
|
+
# @see OPDS::Entry
|
3
8
|
class OPDSParser
|
4
9
|
include Logging
|
10
|
+
# @return [Hash] parsing options
|
5
11
|
attr_accessor :options
|
12
|
+
# @return [Symbol] last parsed stream sniffed type (:acquisition,:navigation,:entry)
|
6
13
|
attr_reader :sniffed_type
|
7
14
|
def initialize(opts={})
|
8
15
|
@sniffed_type=nil
|
9
16
|
self.options=opts.merge({})
|
10
17
|
end
|
11
18
|
|
19
|
+
# Parse a text stream
|
20
|
+
# @param content [String] text stream
|
21
|
+
# @param browser (see Feed.parse_url)
|
22
|
+
# @return [NavigationFeed, AcquisitionFeed, Entry] the parsed structure
|
12
23
|
def parse(content,browser=nil)
|
13
24
|
@ret=Nokogiri::XML(content)
|
14
25
|
@sniffed_type=sniff(@ret)
|
@@ -20,6 +31,9 @@ module OPDS
|
|
20
31
|
end
|
21
32
|
|
22
33
|
protected
|
34
|
+
# Sniff a provided nokogiri document to detect it's type
|
35
|
+
# @param doc [Nokogiri::XML::Document] Document to sniff
|
36
|
+
# @return [:acquisition, :navigation, :entry, nil] sniffed type
|
23
37
|
def sniff(doc)
|
24
38
|
return :entry if doc.root.name=='entry'
|
25
39
|
entries = doc.xpath('/xmlns:feed/xmlns:entry',doc.root.namespaces)
|
data/lib/opds/support/browser.rb
CHANGED
@@ -1,8 +1,13 @@
|
|
1
1
|
require "open-uri"
|
2
2
|
module OPDS
|
3
|
+
# Supporting classes
|
3
4
|
module Support
|
5
|
+
# Browser class, it will be used to access the Internet.
|
6
|
+
# Currently based on open-uri only
|
4
7
|
class Browser
|
5
8
|
include Logging
|
9
|
+
# Navigate to the provided uri
|
10
|
+
# @param uri [String] uri to go to
|
6
11
|
def go_to(uri)
|
7
12
|
log("Accessing #{uri}")
|
8
13
|
url=URI.parse(uri)
|
@@ -21,27 +26,35 @@ module OPDS
|
|
21
26
|
end
|
22
27
|
end
|
23
28
|
|
29
|
+
# Last page load was ok ?
|
30
|
+
# @return [boolean]
|
24
31
|
def ok?
|
25
32
|
status==200
|
26
33
|
end
|
27
34
|
|
28
|
-
|
35
|
+
# @return [integer] Last page load return code
|
29
36
|
def status
|
30
37
|
@last_response.code.to_i if @last_response
|
31
38
|
end
|
32
39
|
|
40
|
+
# @return [Hash] Last page HTTP headers
|
33
41
|
def headers
|
34
42
|
@last_response.to_hash if @last_response
|
35
43
|
end
|
36
44
|
|
45
|
+
# @return [String] Last page body
|
37
46
|
def body
|
38
47
|
@last_response.body if @last_response
|
39
48
|
end
|
40
49
|
|
50
|
+
# @return [String] current uri
|
41
51
|
def current_location
|
42
52
|
@current_location
|
43
53
|
end
|
44
54
|
|
55
|
+
# Try to discover catalog links at the given url
|
56
|
+
# @param [String] url to search
|
57
|
+
# @return [OPDS::Support::LinkSet, false] discovered links
|
45
58
|
def discover(url)
|
46
59
|
go_to(url)
|
47
60
|
if ok?
|
@@ -59,6 +72,7 @@ module OPDS
|
|
59
72
|
end
|
60
73
|
|
61
74
|
private
|
75
|
+
# extracts linkset from doc + xpath expression
|
62
76
|
def extract_links(tab,doc, expr)
|
63
77
|
doc.xpath(expr).each do |n|
|
64
78
|
text=nil
|