repub 0.3.3 → 0.3.4
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +11 -0
- data/README.rdoc +14 -8
- data/TODO +0 -2
- data/lib/repub.rb +1 -1
- data/lib/repub/app.rb +3 -0
- data/lib/repub/app/builder.rb +151 -154
- data/lib/repub/app/fetcher.rb +10 -23
- data/lib/repub/app/filter.rb +30 -0
- data/lib/repub/app/options.rb +0 -6
- data/lib/repub/app/parser.rb +63 -73
- data/lib/repub/app/post_filters.rb +135 -0
- data/lib/repub/app/pre_filters.rb +50 -0
- data/lib/repub/app/profile.rb +1 -1
- data/lib/repub/epub.rb +4 -3
- data/lib/repub/epub/container_item.rb +49 -0
- data/lib/repub/epub/{toc.rb → ncx.rb} +137 -139
- data/lib/repub/epub/ocf.rb +62 -0
- data/lib/repub/epub/opf.rb +136 -0
- data/repub.gemspec +4 -4
- data/test/epub/{test_toc.rb → test_ncx.rb} +14 -12
- data/test/epub/test_ocf.rb +28 -0
- data/test/epub/{test_content.rb → test_opf.rb} +25 -19
- data/test/test_filter.rb +28 -0
- data/test/test_parser.rb +3 -4
- metadata +17 -11
- data/lib/repub/epub/container.rb +0 -28
- data/lib/repub/epub/content.rb +0 -178
- data/test/epub/test_container.rb +0 -15
@@ -0,0 +1,50 @@
|
|
1
|
+
require 'repub/app/filter'
|
2
|
+
|
3
|
+
module Repub
|
4
|
+
class App
|
5
|
+
class PreFilters
|
6
|
+
include Filter
|
7
|
+
|
8
|
+
# Detect and convert source encoding
|
9
|
+
# Standard requires it to be UTF-8
|
10
|
+
#
|
11
|
+
filter :fix_encoding do |s|
|
12
|
+
encoding = options[:encoding]
|
13
|
+
unless encoding
|
14
|
+
log.info "Detecting encoding"
|
15
|
+
encoding = UniversalDetector.chardet(s)['encoding']
|
16
|
+
end
|
17
|
+
if encoding.downcase != 'utf-8'
|
18
|
+
log.info "Source encoding appears to be #{encoding}, converting to UTF-8"
|
19
|
+
s = Iconv.conv('utf-8', encoding, s)
|
20
|
+
end
|
21
|
+
s
|
22
|
+
end
|
23
|
+
|
24
|
+
# Convert line endings to LF
|
25
|
+
#
|
26
|
+
filter :fix_line_endings do |s|
|
27
|
+
s.gsub(/\r\n/, "\n")
|
28
|
+
end
|
29
|
+
|
30
|
+
# Fix all elements with broken id attribute
|
31
|
+
# In XHTML id must match [A-Za-z][A-Za-z0-9:_.-]*
|
32
|
+
# TODO: currently only testing for non-alpha first char...
|
33
|
+
#
|
34
|
+
filter :fix_ids do |s|
|
35
|
+
match = s.scan(/\s+((?:id|name)\s*?=\s*?['"])(\d+[^'"]*)['"]/im)
|
36
|
+
unless match.empty?
|
37
|
+
log.debug "-- Fixing broken element IDs"
|
38
|
+
match.each do |m|
|
39
|
+
# fix id so it starts with alpha char
|
40
|
+
s.gsub!(m.join(''), m.join('x'))
|
41
|
+
# update fragment references
|
42
|
+
s.gsub!(/##{m[1]}(['"])/, "#x#{m[1]}\\1")
|
43
|
+
end
|
44
|
+
end
|
45
|
+
s
|
46
|
+
end
|
47
|
+
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
data/lib/repub/app/profile.rb
CHANGED
@@ -5,7 +5,7 @@ module Repub
|
|
5
5
|
class App
|
6
6
|
module Profile
|
7
7
|
|
8
|
-
PROFILE_KEYS = %w[css encoding
|
8
|
+
PROFILE_KEYS = %w[css encoding helper metadata remove rx selectors].map {|k| k.to_sym}
|
9
9
|
|
10
10
|
def load_profile(name = nil)
|
11
11
|
name ||= 'default'
|
data/lib/repub/epub.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
-
require 'repub/epub/
|
2
|
-
require 'repub/epub/
|
3
|
-
require 'repub/epub/
|
1
|
+
require 'repub/epub/container_item'
|
2
|
+
require 'repub/epub/ocf'
|
3
|
+
require 'repub/epub/opf'
|
4
|
+
require 'repub/epub/ncx'
|
@@ -0,0 +1,49 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
|
3
|
+
module Repub
|
4
|
+
module Epub
|
5
|
+
|
6
|
+
# Mixin for stuff that can be added to the ePub package
|
7
|
+
#
|
8
|
+
module ContainerItem
|
9
|
+
attr_accessor :file_path
|
10
|
+
attr_accessor :media_type
|
11
|
+
|
12
|
+
def document?
|
13
|
+
['application/xhtml+xml', 'application/x-dtbook+xml'].include? self.media_type
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
# Wrapper class for ePub items that do not have specialized classes
|
18
|
+
# e.g. HTML files, CSSs etc.
|
19
|
+
#
|
20
|
+
class Item
|
21
|
+
include ContainerItem
|
22
|
+
|
23
|
+
def initialize(file_path, media_type = nil)
|
24
|
+
@file_path = file_path.strip
|
25
|
+
@media_type = media_type || case @file_path.downcase
|
26
|
+
when /.*\.html?$/
|
27
|
+
'application/xhtml+xml'
|
28
|
+
when /.*\.css$/
|
29
|
+
'text/css'
|
30
|
+
when /.*\.(jpeg|jpg)$/
|
31
|
+
'image/jpeg'
|
32
|
+
when /.*\.png$/
|
33
|
+
'image/png'
|
34
|
+
when /.*\.gif$/
|
35
|
+
'image/gif'
|
36
|
+
when /.*\.svg$/
|
37
|
+
'image/svg+xml'
|
38
|
+
when /.*\.ncx$/
|
39
|
+
'application/x-dtbncx+xml'
|
40
|
+
when /.*\.opf$/
|
41
|
+
'application/oebps-package+xml'
|
42
|
+
else
|
43
|
+
raise 'Unknown media type'
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
end
|
49
|
+
end
|
@@ -1,139 +1,137 @@
|
|
1
|
-
require 'rubygems'
|
2
|
-
require 'builder'
|
3
|
-
|
4
|
-
module Repub
|
5
|
-
module Epub
|
6
|
-
|
7
|
-
class
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
@
|
12
|
-
@
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
builder
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
@
|
35
|
-
@nav_map.
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
builder.meta :name => "dtb:
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
def initialize
|
110
|
-
|
111
|
-
@depth = 1
|
112
|
-
end
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
end
|
137
|
-
|
138
|
-
end
|
139
|
-
end
|
1
|
+
require 'rubygems'
|
2
|
+
require 'builder'
|
3
|
+
|
4
|
+
module Repub
|
5
|
+
module Epub
|
6
|
+
|
7
|
+
class NCX
|
8
|
+
include ContainerItem
|
9
|
+
|
10
|
+
def initialize(uid, file_path = 'toc.ncx')
|
11
|
+
@file_path = file_path
|
12
|
+
@media_type = 'application/x-dtbncx+xml'
|
13
|
+
@head = Head.new(uid)
|
14
|
+
@doc_title = DocTitle.new('Untitled')
|
15
|
+
@nav_map = NavMap.new
|
16
|
+
end
|
17
|
+
|
18
|
+
def title
|
19
|
+
@doc_title.text
|
20
|
+
end
|
21
|
+
|
22
|
+
def title=(text)
|
23
|
+
@doc_title = DocTitle.new(text)
|
24
|
+
end
|
25
|
+
|
26
|
+
attr_reader :nav_map
|
27
|
+
|
28
|
+
def to_xml
|
29
|
+
out = ''
|
30
|
+
builder = Builder::XmlMarkup.new(:target => out)
|
31
|
+
builder.instruct!
|
32
|
+
builder.declare! :DOCTYPE, :ncx, :PUBLIC, "-//NISO//DTD ncx 2005-1//EN", "http://www.daisy.org/z3986/2005/ncx-2005-1.dtd"
|
33
|
+
builder.ncx :xmlns => "http://www.daisy.org/z3986/2005/ncx/", :version => "2005-1" do
|
34
|
+
@nav_map.calc_depth_and_play_order
|
35
|
+
@head.depth = @nav_map.depth
|
36
|
+
@head.to_xml(builder)
|
37
|
+
@doc_title.to_xml(builder)
|
38
|
+
@nav_map.to_xml(builder)
|
39
|
+
end
|
40
|
+
out
|
41
|
+
end
|
42
|
+
|
43
|
+
def save
|
44
|
+
File.open(@file_path, 'w') do |f|
|
45
|
+
f << to_xml
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
class Head < Struct.new(
|
50
|
+
:uid
|
51
|
+
)
|
52
|
+
|
53
|
+
attr_accessor :depth
|
54
|
+
|
55
|
+
def to_xml(builder)
|
56
|
+
builder.head do
|
57
|
+
builder.meta :name => "dtb:uid", :content => self.uid
|
58
|
+
builder.meta :name => "dtb:depth", :content => @depth
|
59
|
+
builder.meta :name => "dtb:totalPageCount", :content => 0
|
60
|
+
builder.meta :name => "dtb:maxPageNumber", :content => 0
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
class DocTitle < Struct.new(
|
66
|
+
:text
|
67
|
+
)
|
68
|
+
|
69
|
+
def to_xml(builder)
|
70
|
+
builder.docTitle do
|
71
|
+
builder.text self.text
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
class NavPoint < Struct.new(
|
77
|
+
:title,
|
78
|
+
:src
|
79
|
+
)
|
80
|
+
|
81
|
+
def initialize(title, src, points = nil)
|
82
|
+
super(title, src)
|
83
|
+
@play_order = 0
|
84
|
+
@points = points || []
|
85
|
+
end
|
86
|
+
|
87
|
+
attr_accessor :play_order
|
88
|
+
attr_accessor :points
|
89
|
+
|
90
|
+
def to_xml(builder)
|
91
|
+
builder.navPoint :id => point_id(@play_order), :playOrder => @play_order do
|
92
|
+
builder.navLabel do
|
93
|
+
builder.text self.title
|
94
|
+
end
|
95
|
+
builder.content :src => self.src
|
96
|
+
@points.each { |point| point.to_xml(builder) }
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
private
|
101
|
+
|
102
|
+
def point_id(play_order)
|
103
|
+
"navPoint-#{play_order}"
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
class NavMap < NavPoint
|
108
|
+
|
109
|
+
def initialize
|
110
|
+
super(nil, nil)
|
111
|
+
@depth = 1
|
112
|
+
end
|
113
|
+
|
114
|
+
attr_reader :depth
|
115
|
+
|
116
|
+
def calc_depth_and_play_order
|
117
|
+
play_order = 0
|
118
|
+
l = lambda do |points, depth|
|
119
|
+
@depth = depth if depth > @depth
|
120
|
+
points.each do |pt|
|
121
|
+
pt.play_order = (play_order += 1)
|
122
|
+
l.call(pt.points, depth + 1) unless pt.points.empty?
|
123
|
+
end
|
124
|
+
end
|
125
|
+
l.call(@points, @depth = 1)
|
126
|
+
end
|
127
|
+
|
128
|
+
def to_xml(builder)
|
129
|
+
builder.navMap do
|
130
|
+
@points.each { |point| point.to_xml(builder) }
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
end
|
137
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'fileutils'
|
3
|
+
require 'builder'
|
4
|
+
|
5
|
+
module Repub
|
6
|
+
module Epub
|
7
|
+
|
8
|
+
# OEBPS Container Format (OCF) 1.0 wrapper
|
9
|
+
# (see http://www.idpf.org/ocf/ocf1.0/download/ocf10.htm)
|
10
|
+
#
|
11
|
+
class OCF
|
12
|
+
|
13
|
+
def initialize
|
14
|
+
@items = []
|
15
|
+
end
|
16
|
+
|
17
|
+
attr_reader :items
|
18
|
+
|
19
|
+
def <<(item)
|
20
|
+
if item.kind_of? ContainerItem
|
21
|
+
@items << item
|
22
|
+
elsif item.is_a? String
|
23
|
+
@items << Item.new(item)
|
24
|
+
else
|
25
|
+
raise "Unsupported item class: #{item.class}"
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def to_xml
|
30
|
+
out = ''
|
31
|
+
builder = Builder::XmlMarkup.new(:target => out)
|
32
|
+
builder.instruct!
|
33
|
+
builder.container :xmlns => "urn:oasis:names:tc:opendocument:xmlns:container", :version => "1.0" do
|
34
|
+
builder.rootfiles do
|
35
|
+
@items.each do |item|
|
36
|
+
builder.rootfile 'full-path' => item.file_path, 'media-type' => item.media_type
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
out
|
41
|
+
end
|
42
|
+
|
43
|
+
def save
|
44
|
+
meta_inf = 'META-INF'
|
45
|
+
FileUtils.mkdir_p(meta_inf)
|
46
|
+
File.open(File.join(meta_inf, 'container.xml'), 'w') do |f|
|
47
|
+
f << to_xml
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def zip(output_path)
|
52
|
+
File.open('mimetype', 'w') do |f|
|
53
|
+
f << 'application/epub+zip'
|
54
|
+
end
|
55
|
+
# mimetype has to be first in the archive
|
56
|
+
%x(zip -X9 \"#{output_path}\" mimetype)
|
57
|
+
%x(zip -Xr9D \"#{output_path}\" * -xi mimetype)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
end
|
62
|
+
end
|