repub 0.3.3 → 0.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +11 -0
- data/README.rdoc +14 -8
- data/TODO +0 -2
- data/lib/repub.rb +1 -1
- data/lib/repub/app.rb +3 -0
- data/lib/repub/app/builder.rb +151 -154
- data/lib/repub/app/fetcher.rb +10 -23
- data/lib/repub/app/filter.rb +30 -0
- data/lib/repub/app/options.rb +0 -6
- data/lib/repub/app/parser.rb +63 -73
- data/lib/repub/app/post_filters.rb +135 -0
- data/lib/repub/app/pre_filters.rb +50 -0
- data/lib/repub/app/profile.rb +1 -1
- data/lib/repub/epub.rb +4 -3
- data/lib/repub/epub/container_item.rb +49 -0
- data/lib/repub/epub/{toc.rb → ncx.rb} +137 -139
- data/lib/repub/epub/ocf.rb +62 -0
- data/lib/repub/epub/opf.rb +136 -0
- data/repub.gemspec +4 -4
- data/test/epub/{test_toc.rb → test_ncx.rb} +14 -12
- data/test/epub/test_ocf.rb +28 -0
- data/test/epub/{test_content.rb → test_opf.rb} +25 -19
- data/test/test_filter.rb +28 -0
- data/test/test_parser.rb +3 -4
- metadata +17 -11
- data/lib/repub/epub/container.rb +0 -28
- data/lib/repub/epub/content.rb +0 -178
- data/test/epub/test_container.rb +0 -15
@@ -0,0 +1,50 @@
|
|
1
|
+
require 'repub/app/filter'
|
2
|
+
|
3
|
+
module Repub
|
4
|
+
class App
|
5
|
+
class PreFilters
|
6
|
+
include Filter
|
7
|
+
|
8
|
+
# Detect and convert source encoding
|
9
|
+
# Standard requires it to be UTF-8
|
10
|
+
#
|
11
|
+
filter :fix_encoding do |s|
|
12
|
+
encoding = options[:encoding]
|
13
|
+
unless encoding
|
14
|
+
log.info "Detecting encoding"
|
15
|
+
encoding = UniversalDetector.chardet(s)['encoding']
|
16
|
+
end
|
17
|
+
if encoding.downcase != 'utf-8'
|
18
|
+
log.info "Source encoding appears to be #{encoding}, converting to UTF-8"
|
19
|
+
s = Iconv.conv('utf-8', encoding, s)
|
20
|
+
end
|
21
|
+
s
|
22
|
+
end
|
23
|
+
|
24
|
+
# Convert line endings to LF
|
25
|
+
#
|
26
|
+
filter :fix_line_endings do |s|
|
27
|
+
s.gsub(/\r\n/, "\n")
|
28
|
+
end
|
29
|
+
|
30
|
+
# Fix all elements with broken id attribute
|
31
|
+
# In XHTML id must match [A-Za-z][A-Za-z0-9:_.-]*
|
32
|
+
# TODO: currently only testing for non-alpha first char...
|
33
|
+
#
|
34
|
+
filter :fix_ids do |s|
|
35
|
+
match = s.scan(/\s+((?:id|name)\s*?=\s*?['"])(\d+[^'"]*)['"]/im)
|
36
|
+
unless match.empty?
|
37
|
+
log.debug "-- Fixing broken element IDs"
|
38
|
+
match.each do |m|
|
39
|
+
# fix id so it starts with alpha char
|
40
|
+
s.gsub!(m.join(''), m.join('x'))
|
41
|
+
# update fragment references
|
42
|
+
s.gsub!(/##{m[1]}(['"])/, "#x#{m[1]}\\1")
|
43
|
+
end
|
44
|
+
end
|
45
|
+
s
|
46
|
+
end
|
47
|
+
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
data/lib/repub/app/profile.rb
CHANGED
@@ -5,7 +5,7 @@ module Repub
|
|
5
5
|
class App
|
6
6
|
module Profile
|
7
7
|
|
8
|
-
PROFILE_KEYS = %w[css encoding
|
8
|
+
PROFILE_KEYS = %w[css encoding helper metadata remove rx selectors].map {|k| k.to_sym}
|
9
9
|
|
10
10
|
def load_profile(name = nil)
|
11
11
|
name ||= 'default'
|
data/lib/repub/epub.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
-
require 'repub/epub/
|
2
|
-
require 'repub/epub/
|
3
|
-
require 'repub/epub/
|
1
|
+
require 'repub/epub/container_item'
|
2
|
+
require 'repub/epub/ocf'
|
3
|
+
require 'repub/epub/opf'
|
4
|
+
require 'repub/epub/ncx'
|
@@ -0,0 +1,49 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
|
3
|
+
module Repub
|
4
|
+
module Epub
|
5
|
+
|
6
|
+
# Mixin for stuff that can be added to the ePub package
|
7
|
+
#
|
8
|
+
module ContainerItem
|
9
|
+
attr_accessor :file_path
|
10
|
+
attr_accessor :media_type
|
11
|
+
|
12
|
+
def document?
|
13
|
+
['application/xhtml+xml', 'application/x-dtbook+xml'].include? self.media_type
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
# Wrapper class for ePub items that do not have specialized classes
|
18
|
+
# e.g. HTML files, CSSs etc.
|
19
|
+
#
|
20
|
+
class Item
|
21
|
+
include ContainerItem
|
22
|
+
|
23
|
+
def initialize(file_path, media_type = nil)
|
24
|
+
@file_path = file_path.strip
|
25
|
+
@media_type = media_type || case @file_path.downcase
|
26
|
+
when /.*\.html?$/
|
27
|
+
'application/xhtml+xml'
|
28
|
+
when /.*\.css$/
|
29
|
+
'text/css'
|
30
|
+
when /.*\.(jpeg|jpg)$/
|
31
|
+
'image/jpeg'
|
32
|
+
when /.*\.png$/
|
33
|
+
'image/png'
|
34
|
+
when /.*\.gif$/
|
35
|
+
'image/gif'
|
36
|
+
when /.*\.svg$/
|
37
|
+
'image/svg+xml'
|
38
|
+
when /.*\.ncx$/
|
39
|
+
'application/x-dtbncx+xml'
|
40
|
+
when /.*\.opf$/
|
41
|
+
'application/oebps-package+xml'
|
42
|
+
else
|
43
|
+
raise 'Unknown media type'
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
end
|
49
|
+
end
|
@@ -1,139 +1,137 @@
|
|
1
|
-
require 'rubygems'
|
2
|
-
require 'builder'
|
3
|
-
|
4
|
-
module Repub
|
5
|
-
module Epub
|
6
|
-
|
7
|
-
class
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
@
|
12
|
-
@
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
builder
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
@
|
35
|
-
@nav_map.
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
builder.meta :name => "dtb:
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
def initialize
|
110
|
-
|
111
|
-
@depth = 1
|
112
|
-
end
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
end
|
137
|
-
|
138
|
-
end
|
139
|
-
end
|
1
|
+
require 'rubygems'
|
2
|
+
require 'builder'
|
3
|
+
|
4
|
+
module Repub
|
5
|
+
module Epub
|
6
|
+
|
7
|
+
class NCX
|
8
|
+
include ContainerItem
|
9
|
+
|
10
|
+
def initialize(uid, file_path = 'toc.ncx')
|
11
|
+
@file_path = file_path
|
12
|
+
@media_type = 'application/x-dtbncx+xml'
|
13
|
+
@head = Head.new(uid)
|
14
|
+
@doc_title = DocTitle.new('Untitled')
|
15
|
+
@nav_map = NavMap.new
|
16
|
+
end
|
17
|
+
|
18
|
+
def title
|
19
|
+
@doc_title.text
|
20
|
+
end
|
21
|
+
|
22
|
+
def title=(text)
|
23
|
+
@doc_title = DocTitle.new(text)
|
24
|
+
end
|
25
|
+
|
26
|
+
attr_reader :nav_map
|
27
|
+
|
28
|
+
def to_xml
|
29
|
+
out = ''
|
30
|
+
builder = Builder::XmlMarkup.new(:target => out)
|
31
|
+
builder.instruct!
|
32
|
+
builder.declare! :DOCTYPE, :ncx, :PUBLIC, "-//NISO//DTD ncx 2005-1//EN", "http://www.daisy.org/z3986/2005/ncx-2005-1.dtd"
|
33
|
+
builder.ncx :xmlns => "http://www.daisy.org/z3986/2005/ncx/", :version => "2005-1" do
|
34
|
+
@nav_map.calc_depth_and_play_order
|
35
|
+
@head.depth = @nav_map.depth
|
36
|
+
@head.to_xml(builder)
|
37
|
+
@doc_title.to_xml(builder)
|
38
|
+
@nav_map.to_xml(builder)
|
39
|
+
end
|
40
|
+
out
|
41
|
+
end
|
42
|
+
|
43
|
+
def save
|
44
|
+
File.open(@file_path, 'w') do |f|
|
45
|
+
f << to_xml
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
class Head < Struct.new(
|
50
|
+
:uid
|
51
|
+
)
|
52
|
+
|
53
|
+
attr_accessor :depth
|
54
|
+
|
55
|
+
def to_xml(builder)
|
56
|
+
builder.head do
|
57
|
+
builder.meta :name => "dtb:uid", :content => self.uid
|
58
|
+
builder.meta :name => "dtb:depth", :content => @depth
|
59
|
+
builder.meta :name => "dtb:totalPageCount", :content => 0
|
60
|
+
builder.meta :name => "dtb:maxPageNumber", :content => 0
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
class DocTitle < Struct.new(
|
66
|
+
:text
|
67
|
+
)
|
68
|
+
|
69
|
+
def to_xml(builder)
|
70
|
+
builder.docTitle do
|
71
|
+
builder.text self.text
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
class NavPoint < Struct.new(
|
77
|
+
:title,
|
78
|
+
:src
|
79
|
+
)
|
80
|
+
|
81
|
+
def initialize(title, src, points = nil)
|
82
|
+
super(title, src)
|
83
|
+
@play_order = 0
|
84
|
+
@points = points || []
|
85
|
+
end
|
86
|
+
|
87
|
+
attr_accessor :play_order
|
88
|
+
attr_accessor :points
|
89
|
+
|
90
|
+
def to_xml(builder)
|
91
|
+
builder.navPoint :id => point_id(@play_order), :playOrder => @play_order do
|
92
|
+
builder.navLabel do
|
93
|
+
builder.text self.title
|
94
|
+
end
|
95
|
+
builder.content :src => self.src
|
96
|
+
@points.each { |point| point.to_xml(builder) }
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
private
|
101
|
+
|
102
|
+
def point_id(play_order)
|
103
|
+
"navPoint-#{play_order}"
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
class NavMap < NavPoint
|
108
|
+
|
109
|
+
def initialize
|
110
|
+
super(nil, nil)
|
111
|
+
@depth = 1
|
112
|
+
end
|
113
|
+
|
114
|
+
attr_reader :depth
|
115
|
+
|
116
|
+
def calc_depth_and_play_order
|
117
|
+
play_order = 0
|
118
|
+
l = lambda do |points, depth|
|
119
|
+
@depth = depth if depth > @depth
|
120
|
+
points.each do |pt|
|
121
|
+
pt.play_order = (play_order += 1)
|
122
|
+
l.call(pt.points, depth + 1) unless pt.points.empty?
|
123
|
+
end
|
124
|
+
end
|
125
|
+
l.call(@points, @depth = 1)
|
126
|
+
end
|
127
|
+
|
128
|
+
def to_xml(builder)
|
129
|
+
builder.navMap do
|
130
|
+
@points.each { |point| point.to_xml(builder) }
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
end
|
137
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'fileutils'
|
3
|
+
require 'builder'
|
4
|
+
|
5
|
+
module Repub
|
6
|
+
module Epub
|
7
|
+
|
8
|
+
# OEBPS Container Format (OCF) 1.0 wrapper
|
9
|
+
# (see http://www.idpf.org/ocf/ocf1.0/download/ocf10.htm)
|
10
|
+
#
|
11
|
+
class OCF
|
12
|
+
|
13
|
+
def initialize
|
14
|
+
@items = []
|
15
|
+
end
|
16
|
+
|
17
|
+
attr_reader :items
|
18
|
+
|
19
|
+
def <<(item)
|
20
|
+
if item.kind_of? ContainerItem
|
21
|
+
@items << item
|
22
|
+
elsif item.is_a? String
|
23
|
+
@items << Item.new(item)
|
24
|
+
else
|
25
|
+
raise "Unsupported item class: #{item.class}"
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def to_xml
|
30
|
+
out = ''
|
31
|
+
builder = Builder::XmlMarkup.new(:target => out)
|
32
|
+
builder.instruct!
|
33
|
+
builder.container :xmlns => "urn:oasis:names:tc:opendocument:xmlns:container", :version => "1.0" do
|
34
|
+
builder.rootfiles do
|
35
|
+
@items.each do |item|
|
36
|
+
builder.rootfile 'full-path' => item.file_path, 'media-type' => item.media_type
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
out
|
41
|
+
end
|
42
|
+
|
43
|
+
def save
|
44
|
+
meta_inf = 'META-INF'
|
45
|
+
FileUtils.mkdir_p(meta_inf)
|
46
|
+
File.open(File.join(meta_inf, 'container.xml'), 'w') do |f|
|
47
|
+
f << to_xml
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def zip(output_path)
|
52
|
+
File.open('mimetype', 'w') do |f|
|
53
|
+
f << 'application/epub+zip'
|
54
|
+
end
|
55
|
+
# mimetype has to be first in the archive
|
56
|
+
%x(zip -X9 \"#{output_path}\" mimetype)
|
57
|
+
%x(zip -Xr9D \"#{output_path}\" * -xi mimetype)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
end
|
62
|
+
end
|