ebps 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. data/History.txt +3 -0
  2. data/InstalledFiles +36 -0
  3. data/LICENSE.txt +339 -0
  4. data/Manifest.txt +56 -0
  5. data/README.txt +28 -0
  6. data/Rakefile +28 -0
  7. data/SetupConfig +30 -0
  8. data/bin/ebps +86 -0
  9. data/example/config.yml +12 -0
  10. data/example/converter_for_firefox.rb +59 -0
  11. data/example/data.yml +60 -0
  12. data/example/example.sh +3 -0
  13. data/example/sample.epub +0 -0
  14. data/lib/ebps.rb +5 -0
  15. data/lib/ebps/config.rb +61 -0
  16. data/lib/ebps/conversion/de_fachinfo_yaml.rb +81 -0
  17. data/lib/ebps/conversion/epub.rb +38 -0
  18. data/lib/ebps/conversion/fachinfo_xml.rb +170 -0
  19. data/lib/ebps/conversion/fachinfo_yaml.rb +113 -0
  20. data/lib/ebps/conversion/import_module_sample.rb +86 -0
  21. data/lib/ebps/conversion/mobi_pocket.rb +30 -0
  22. data/lib/ebps/conversion/oebps.rb +537 -0
  23. data/lib/ebps/conversion/patinfo_yaml.rb +107 -0
  24. data/lib/ebps/data/default_cover.jpg +0 -0
  25. data/lib/ebps/data/stylesheet.css +16 -0
  26. data/lib/ebps/postprocess/bookworm.rb +16 -0
  27. data/lib/ebps/postprocess/copy.rb +28 -0
  28. data/lib/ebps/postprocess/system_call.rb +18 -0
  29. data/lib/ebps/preprocess/copy.rb +28 -0
  30. data/lib/ebps/preprocess/system_call.rb +18 -0
  31. data/lib/ebps/text/chapter.rb +36 -0
  32. data/lib/ebps/text/document.rb +36 -0
  33. data/lib/ebps/text/format.rb +34 -0
  34. data/lib/ebps/text/paragraph.rb +63 -0
  35. data/lib/ebps/text/picture.rb +41 -0
  36. data/lib/ebps/text/table.rb +65 -0
  37. data/lib/ebps/util/mail.rb +47 -0
  38. data/lib/ebps/util/smtp_tls.rb +62 -0
  39. data/spec/conversion/data/DF_15164_1_3.gif +0 -0
  40. data/spec/conversion/data/DF_15164_2_3.gif +0 -0
  41. data/spec/conversion/data/appendix.png +0 -0
  42. data/spec/conversion/data/fachinfo.xml +1151 -0
  43. data/spec/conversion/data/fachinfo.yaml +1214 -0
  44. data/spec/conversion/data/fachinfo_with_image.xml +334 -0
  45. data/spec/conversion/data/fachinfo_with_table.xml +1101 -0
  46. data/spec/conversion/data/fachinfos.de.oddb.yaml +5789 -0
  47. data/spec/conversion/data/images/5c/5c54d52c8132230e8c40c37a428fe761.png +0 -0
  48. data/spec/conversion/de_fachinfo_yaml_spec.rb +86 -0
  49. data/spec/conversion/epub_spec.rb +59 -0
  50. data/spec/conversion/fachinfo_xml_spec.rb +245 -0
  51. data/spec/conversion/fachinfo_yaml_spec.rb +52 -0
  52. data/spec/conversion/mobi_pocket_spec.rb +55 -0
  53. data/spec/conversion/oebps_spec.rb +555 -0
  54. data/spec/text/chapter_spec.rb +65 -0
  55. data/spec/text/document_spec.rb +78 -0
  56. data/spec/text/paragraph_spec.rb +77 -0
  57. metadata +145 -0
@@ -0,0 +1,170 @@
1
+ require 'ebps/config'
2
+ require 'ebps/text/document'
3
+ require 'open-uri'
4
+ require 'rexml/document'
5
+ require 'rexml/streamlistener'
6
+
7
+ module EBPS
8
+ module Conversion
9
+ module FachinfoXml
10
+ class StreamListener
11
+ include REXML::StreamListener
12
+ attr_reader :current_chapter, :current_document, :current_target,
13
+ :documents
14
+ def initialize
15
+ @documents = []
16
+ @stack = []
17
+ end
18
+ def end_fi
19
+ @current_document = nil
20
+ end
21
+ def end_i
22
+ @stack.delete 'i'
23
+ if @current_target.respond_to?(:set_format)
24
+ @current_target.set_format *@stack
25
+ end
26
+ end
27
+ def end_image
28
+ # nothing to be done
29
+ end
30
+ def end_monographies
31
+ # outermost tag, we can safely ignore this.
32
+ end
33
+ def end_p
34
+ @current_chapter.add_paragraph(@current_target) if @current_target
35
+ @current_target = nil
36
+ end
37
+ def end_paragraph
38
+ @current_chapter = nil
39
+ end
40
+ alias end_paragraphSubtitle end_p
41
+ def end_paragraphTitle
42
+ @current_target = nil
43
+ end
44
+ def end_owner
45
+ end_paragraphTitle
46
+ end_paragraph
47
+ end
48
+ def end_table
49
+ end_p
50
+ end
51
+ def end_tb
52
+ # end of table body. Ignored for now.
53
+ end
54
+ def end_td
55
+ @in_td = false
56
+ end
57
+ def end_th
58
+ # end of table head. Ignored for now.
59
+ end
60
+ def end_title
61
+ @current_target = nil
62
+ end
63
+ def end_tr
64
+ @current_target << "\n"
65
+ end
66
+ def start_fi attrs
67
+ @documents.push(@current_document = Text::Document.new)
68
+ @current_document
69
+ end
70
+ def start_i attrs
71
+ @stack.push 'i'
72
+ if @current_target.respond_to?(:set_format)
73
+ @current_target.set_format *@stack.uniq
74
+ end
75
+ end
76
+ def start_image attrs
77
+ config = EBPS.config
78
+ raise <<-EOS unless config.image_prefix && config.image_suffix
79
+ This monography contains images.
80
+ Please configure both image_prefix and image_suffix
81
+ EOS
82
+ file = attrs["src"] + config.image_suffix
83
+ src = File.join config.image_prefix, file
84
+ handle = nil
85
+ begin
86
+ handle = open src
87
+ picture = Text::Picture.new
88
+ picture << handle.read
89
+ @current_target = picture
90
+ rescue
91
+ @current_target = nil
92
+ end
93
+ ensure
94
+ handle.close if handle
95
+ end
96
+ def start_monographies attrs
97
+ # outermost tag, we can safely ignore this.
98
+ end
99
+ def start_owner attrs
100
+ start_paragraph attrs
101
+ start_paragraphTitle attrs
102
+ end
103
+ def start_p attrs
104
+ @current_target = Text::Paragraph.new
105
+ end
106
+ def start_paragraph attrs
107
+ @current_chapter = Text::Chapter.new
108
+ @current_document.add_chapter @current_chapter
109
+ @current_chapter
110
+ end
111
+ def start_paragraphSubtitle attrs
112
+ @current_target = Text::Subheading.new
113
+ end
114
+ def start_paragraphTitle attrs
115
+ @current_target = @current_chapter.heading
116
+ end
117
+ def start_table attrs
118
+ paragraph = start_p attrs
119
+ paragraph.set_format('pre')
120
+ paragraph
121
+ end
122
+ def start_tb attrs
123
+ # start of table body. Ignored for now.
124
+ end
125
+ def start_td attrs
126
+ @in_td = true
127
+ end
128
+ def start_th attrs
129
+ # start of table head. Ignored for now.
130
+ end
131
+ def start_title attrs
132
+ @current_target = @current_document.title
133
+ end
134
+ def start_tr attrs
135
+ # ignore. end_tr will add a newline character
136
+ end
137
+ def tag_end name
138
+ method = "end_#{name}"
139
+ if respond_to?(method)
140
+ send method
141
+ else
142
+ raise "unhandled end tag '#{name}'"
143
+ end
144
+ end
145
+ def tag_start name, attrs
146
+ method = "start_#{name}"
147
+ if respond_to?(method)
148
+ send method, attrs
149
+ else
150
+ raise "unhandled start tag '#{name}'"
151
+ end
152
+ end
153
+ def text data
154
+ if @current_target && data
155
+ data = data.strip unless @in_td
156
+ @current_target << data
157
+ end
158
+ end
159
+ end
160
+ def self.import string_or_io, path=''
161
+ listener = StreamListener.new
162
+ REXML::Document.parse_stream string_or_io, listener
163
+ listener.documents
164
+ rescue ArgumentError => e
165
+ warn "#{e.message} while importing #{path}, ignoring input file"
166
+ []
167
+ end
168
+ end
169
+ end
170
+ end
@@ -0,0 +1,113 @@
1
+ require 'ebps/text/document'
2
+ require 'yaml'
3
+
4
+ module EBPS
5
+ YAML.add_domain_type 'oddb.org,2003', 'ODDB::Fachinfo' do |type, val|
6
+ if descr = val.delete('descriptions')
7
+ doc = descr[EBPS.config.language]
8
+ begin
9
+ doc.metadata.update val
10
+ rescue NoMethodError => err
11
+ comment = "Probably '#{EBPS.config.language}' data is missing in the following data:\n"
12
+ err.message << "\n\n" << comment << val.to_a.to_s << "\n"
13
+ raise err
14
+ end
15
+ doc
16
+ end
17
+ end
18
+ YAML.add_domain_type 'oddb.org,2003', 'ODDB::FachinfoDocument' do |type, val|
19
+ chapters = %w{galenic_form composition effects kinetic indications usage
20
+ restrictions unwanted_effects interactions overdose
21
+ other_advice delivery distribution fabrication iksnrs
22
+ packages date}
23
+ Conversion::FachinfoYaml.assemble_document chapters, val
24
+ end
25
+ YAML.add_domain_type 'oddb.org,2003', 'ODDB::FachinfoDocument2001' do |type, val|
26
+ chapters = %w{amzv composition galenic_form indications usage
27
+ contra_indications restrictions interactions pregnancy
28
+ driving_ability unwanted_effects overdose effects kinetic
29
+ preclinic other_advice iksnrs packages registration_owner
30
+ date}
31
+ Conversion::FachinfoYaml.assemble_document chapters, val
32
+ end
33
+ YAML.add_domain_type 'oddb.org,2003', 'ODDB::Text::Chapter' do |type, val|
34
+ chp = Text::Chapter.new
35
+ chp.heading << val['heading']
36
+ Conversion::FachinfoYaml.encode chp.heading
37
+ chp.paragraphs.concat val['sections'].flatten.compact
38
+ chp
39
+ end
40
+ YAML.add_domain_type 'oddb.org,2003', 'ODDB::Text::Section' do |type, val|
41
+ paragraphs = []
42
+ if (txt = val['subheading']) && !txt.empty?
43
+ sh = Text::Subheading.new
44
+ sh << txt
45
+ Conversion::FachinfoYaml.encode sh.text
46
+ paragraphs << sh
47
+ end
48
+ paragraphs.concat val['paragraphs']
49
+ paragraphs
50
+ end
51
+ YAML.add_domain_type 'oddb.org,2003', 'ODDB::Text::Paragraph' do |type, val|
52
+ par = Text::Paragraph.new
53
+ par << val['text']
54
+ Conversion::FachinfoYaml.encode par.text
55
+ par.formats.replace val['formats']
56
+ if val['preformatted']
57
+ par.formats.each do |fmt|
58
+ fmt.values << 'pre'
59
+ end
60
+ end
61
+ par
62
+ end
63
+ YAML.add_domain_type 'oddb.org,2003', 'ODDB::Text::ImageLink' do |type, val|
64
+ src = File.join EBPS.config.image_prefix, val['src']
65
+ file = File.basename src
66
+ handle = nil
67
+ begin
68
+ handle = open src
69
+ picture = Text::Picture.new
70
+ picture << handle.read
71
+ picture
72
+ rescue
73
+ end
74
+ end
75
+ YAML.add_domain_type 'oddb.org,2003', 'ODDB::Text::Format' do |type, val|
76
+ fmt = Text::Format.new
77
+ fmt.values = val['values']
78
+ fmt.start = val['start']
79
+ fmt.end = val['end']
80
+ fmt
81
+ end
82
+ YAML.add_domain_type 'oddb.org,2003', 'ODDB::SimpleLanguage::Descriptions' do |type, val|
83
+ val
84
+ end
85
+ module Conversion
86
+ module FachinfoYaml
87
+ def self.import string_or_io, path=nil
88
+ collection = {}
89
+ YAML.each_document string_or_io do |doc|
90
+ if doc
91
+ collection.store Digest::MD5.hexdigest(doc.to_s), doc
92
+ end
93
+ end
94
+ ## apparently we have some nil-values here (?)
95
+ collection.values.compact
96
+ end
97
+ def self.assemble_document chapters, yaml_value
98
+ doc = Text::Document.new
99
+ doc.title = Conversion::FachinfoYaml.encode(yaml_value['name'])
100
+ chapters.each do |name|
101
+ if chapter = yaml_value[name]
102
+ doc.add_chapter chapter
103
+ end
104
+ end
105
+ doc
106
+ end
107
+ def self.encode txt
108
+ txt.force_encoding 'UTF-8'
109
+ txt
110
+ end
111
+ end
112
+ end
113
+ end
@@ -0,0 +1,86 @@
1
+ require 'ebps/text/document'
2
+ require 'yaml'
3
+
4
+ module EBPS
5
+
6
+ YAML.add_domain_type 'ywesee,2010', 'CompanyInfo' do |type, val|
7
+ if descr = val.delete('descriptions')
8
+ doc = descr[EBPS.config.language]
9
+ doc.metadata.update val
10
+ doc
11
+ end
12
+ end
13
+ YAML.add_domain_type 'ywesee,2010', 'CompanyInfo2010' do |type, val|
14
+ chapters = %w{short_description general_information address_form}
15
+ Conversion::ImportModuleSample.assemble_document chapters, val
16
+ end
17
+ YAML.add_domain_type 'ywesee,2010', 'Text::Chapter' do |type, val|
18
+ chp = Text::Chapter.new
19
+ chp.heading << val['heading']
20
+ Conversion::ImportModuleSample.encode chp.heading
21
+ chp.paragraphs.concat val['sections'].flatten.compact
22
+ chp
23
+ end
24
+ YAML.add_domain_type 'ywesee,2010', 'Text::Section' do |type, val|
25
+ paragraphs = []
26
+ if (txt = val['subheading']) && !txt.empty?
27
+ sh = Text::Subheading.new
28
+ sh << txt
29
+ Conversion::ImportModuleSample.encode sh.text
30
+ paragraphs << sh
31
+ end
32
+ paragraphs.concat val['paragraphs']
33
+ paragraphs
34
+ end
35
+ YAML.add_domain_type 'ywesee,2010', 'Text::Paragraph' do |type, val|
36
+ par = Text::Paragraph.new
37
+ par << val['text']
38
+ Conversion::ImportModuleSample.encode par.text
39
+ par.formats.replace val['formats']
40
+ if val['preformatted']
41
+ par.formats.each do |fmt|
42
+ fmt.values << 'pre'
43
+ end
44
+ end
45
+ par
46
+ end
47
+ YAML.add_domain_type 'ywesee,2010', 'Text::Format' do |type, val|
48
+ fmt = Text::Format.new
49
+ fmt.values = val['values']
50
+ fmt.start = val['start']
51
+ fmt.end = val['end']
52
+ fmt
53
+ end
54
+ YAML.add_domain_type 'ywesee,2010', 'SimpleLanguage::Descriptions' do |type, val|
55
+ val
56
+ end
57
+
58
+ module Conversion
59
+ module ImportModuleSample
60
+ def self.import string_or_io, path=nil
61
+ collection = {}
62
+ YAML.each_document string_or_io do |doc|
63
+ if doc
64
+ collection.store Digest::MD5.hexdigest(doc.to_s), doc
65
+ end
66
+ end
67
+ ## apparently we have some nil-values here (?)
68
+ collection.values.compact
69
+ end
70
+ def self.assemble_document chapters, yaml_value
71
+ doc = Text::Document.new
72
+ doc.title = Conversion::ImportModuleSample.encode(yaml_value['name'])
73
+ chapters.each do |name|
74
+ if chapter = yaml_value[name]
75
+ doc.add_chapter chapter
76
+ end
77
+ end
78
+ doc
79
+ end
80
+ def self.encode txt
81
+ txt.force_encoding 'UTF-8'
82
+ txt
83
+ end
84
+ end
85
+ end
86
+ end
@@ -0,0 +1,30 @@
1
+ # - encoding: utf-8
2
+ require 'ebps/config'
3
+ require 'ebps/conversion/oebps'
4
+
5
+ module EBPS
6
+ module Conversion
7
+ module MobiPocket
8
+ def self.compile_azw target, tmpdir, opf
9
+ current_dir = Dir.pwd
10
+ Dir.chdir tmpdir
11
+ opfpath = File.join tmpdir, opf
12
+ config = EBPS.config
13
+ command = "#{config.kindlegen_path} #{opfpath} #{config.kindlegen_args} -o mobipocket.azw"
14
+ # kindlegen returns status -1 if a warning has been issued, so system's
15
+ # return value is no indication of success.
16
+ system command
17
+ FileUtils.cp File.join(tmpdir, 'mobipocket.azw'), target
18
+ ensure
19
+ Dir.chdir current_dir
20
+ end
21
+ def self.export docs, target, override_tmpdir=nil
22
+ ## kindlegen depends on the somewhat obscure kindle_quirks options.
23
+ EBPS.config.kindle_quirks = true
24
+ Oebps.export docs, target, override_tmpdir do |tmpdir, name|
25
+ compile_azw target, tmpdir, name
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,537 @@
1
+ # - encoding: utf-8
2
+ require 'builder'
3
+ require 'ebps/config'
4
+ require 'ebps/text/document'
5
+ require 'tmpdir'
6
+
7
+ class String
8
+ UMLAUT_SUB = {
9
+ 'Ä' => 'ae',
10
+ 'ä' => 'ae',
11
+ 'Ö' => 'oe',
12
+ 'ö' => 'oe',
13
+ 'Ü' => 'ue',
14
+ 'ü' => 'ue',
15
+ }
16
+ def sortable
17
+ dupl = dup
18
+ dupl.sortable!
19
+ dupl
20
+ end
21
+ def sortable!
22
+ res = downcase!
23
+ UMLAUT_SUB.each do |ptrn, repl|
24
+ tmp = gsub! Regexp.new(ptrn, 'U'), repl
25
+ res ||= tmp
26
+ end
27
+ res
28
+ end
29
+ ## overwrite Builder's to_xs
30
+ def to_xs
31
+ gsub('&', '&amp;').gsub('<', '&lt;').gsub('>', '&gt;')
32
+ end
33
+ end
34
+ module EBPS
35
+ module Conversion
36
+ module Oebps
37
+ class Factory
38
+ attr_reader :uid
39
+ def initialize subject, ids, tmpdir
40
+ @ids = ids
41
+ @indent = EBPS.config.xml_indent
42
+ @builder = Builder::XmlMarkup.new :indent => @indent
43
+ @builder.instruct! :xml, :version=>"1.0", :encoding=>"UTF-8"
44
+ @subject = subject
45
+ @tmpdir = tmpdir
46
+ @link_drug_brand_name = EBPS.config.link_drug_brand_name
47
+ end
48
+ end
49
+ class HtmlFactory < Factory
50
+ DEPRECATED_TAGS = {
51
+ 'u' => 'text-decoration: underline;'
52
+ }
53
+ def initialize *args
54
+ super
55
+ @builder.declare! :DOCTYPE, :html, :PUBLIC,
56
+ "-//W3C//DTD XHTML 1.0 Strict//EN",
57
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"
58
+ @uid = "id" << Digest::MD5.hexdigest(@subject.to_s)
59
+ @current_chapter_id = 0
60
+ end
61
+ def body
62
+ if @link_drug_brand_name
63
+ search_url = ["http://just-medical.oddb.org/de/just-medical/search/zone/drugs/search_query/",
64
+ @subject.title.delete('®').gsub(/ [0-9,]+%.*/,''),
65
+ "/search_type/st_sequence#best_result"].join
66
+ @builder.h1 'id' => @uid do |xml|
67
+ xml.a @subject.title, 'href' => search_url
68
+ end
69
+ else
70
+ @builder.h1 @subject.title, 'id' => @uid
71
+ end
72
+ if EBPS.config.html_index \
73
+ && @subject.chapters.any? do |chp| !chp.partial? end
74
+ table_of_contents
75
+ end
76
+ chapters
77
+ end
78
+ def chapters
79
+ @subject.chapters.each_with_index do |chapter, idx|
80
+ unless chapter.heading.empty?
81
+ @current_chapter_id = idx.next
82
+ id = "chapter-#{@current_chapter_id}-#@uid"
83
+ @ids.push [id, chapter.heading, []] unless chapter.partial?
84
+ @builder.h3 chapter.heading, 'id' => id
85
+ end
86
+ paragraphs chapter
87
+ end
88
+ end
89
+ def format paragraph
90
+ return unless paragraph
91
+ paragraph.formats.each do |format|
92
+ if txt = paragraph.text[format.range]
93
+ reduce_formats format.values, txt
94
+ end
95
+ end
96
+ end
97
+ def format_table table
98
+ @builder.table 'class' => 'ebps' do |xml|
99
+ xml.tbody do
100
+ table.each_normalized do |row|
101
+ xml.tr do
102
+ row.each do |cell|
103
+ xml.td do format cell end
104
+ end
105
+ end
106
+ end
107
+ end
108
+ end
109
+ end
110
+ def paragraphs chapter
111
+ sections = 0
112
+ chapter.paragraphs.each do |paragraph|
113
+ case paragraph
114
+ when Text::Picture
115
+ name = paragraph.filename
116
+ path = File.join @tmpdir, name
117
+ paragraph.image.write path
118
+ @builder.p 'class' => 'ebps' do |xml|
119
+ xml.img 'src' => name, 'alt' => ''
120
+ end
121
+ when Text::Subheading
122
+ attrs = {}
123
+ _, _, ids = @ids.last
124
+ if ids
125
+ sections += 1
126
+ id = "section-#{@current_chapter_id}.#{sections}-#@uid"
127
+ ids.push [id, paragraph.to_s]
128
+ attrs.store 'id', id
129
+ end
130
+ @builder.h4 attrs do format(paragraph) end
131
+ when Text::Table
132
+ format_table paragraph
133
+ when Text::LinkedParagraph
134
+ @builder.p 'class' => 'ebps' do |xml|
135
+ xml.a 'href' => paragraph.url do format paragraph end
136
+ end
137
+ else
138
+ @builder.p 'class' => 'ebps' do format(paragraph) end
139
+ end
140
+ end
141
+ end
142
+ def reduce_formats formats, text
143
+ if formats.empty?
144
+ @builder.send(:_indent)
145
+ @builder.text! text
146
+ @builder << "\n" if @indent
147
+ elsif style = DEPRECATED_TAGS[formats.first]
148
+ @builder.span 'style' => style do
149
+ reduce_formats formats[1..-1], text
150
+ end
151
+ else
152
+ @builder.tag! formats.first do
153
+ reduce_formats formats[1..-1], text
154
+ end
155
+ end
156
+ end
157
+ def table_of_contents
158
+ if title = EBPS.config.content_title
159
+ id = "toc-#@uid"
160
+ @ids.push [id, title]
161
+ @builder.h2(title, :id => id)
162
+ end
163
+ @builder.p 'class' => 'ebps' do |xml|
164
+ @subject.chapters.each_with_index do |chapter, idx|
165
+ unless chapter.partial?
166
+ xml.a chapter.heading, 'href' => "#chapter-#{idx.next}-#@uid"
167
+ xml.br
168
+ end
169
+ end
170
+ end
171
+ end
172
+ def to_html
173
+ @builder.html "xmlns" => "http://www.w3.org/1999/xhtml" do |xml|
174
+ xml.head do
175
+ xml.meta 'http-equiv' => 'Content-Type',
176
+ 'content' => 'text/html;charset=utf-8'
177
+ xml.title @subject.title
178
+ xml.link 'type' => 'text/css', 'rel' => 'stylesheet',
179
+ 'href' => File.basename(EBPS.config.stylesheet)
180
+ end
181
+ xml.body do
182
+ body
183
+ end
184
+ end
185
+ end
186
+ end
187
+ class IndexFactory < Factory
188
+ def initialize uid, subject, ids, tmpdir
189
+ super subject, ids, tmpdir
190
+ @builder.declare! :DOCTYPE, :html, :PUBLIC,
191
+ "-//W3C//DTD XHTML 1.0 Strict//EN",
192
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"
193
+ @uid = uid
194
+ end
195
+ def to_html
196
+ @builder.html "xmlns" => "http://www.w3.org/1999/xhtml" do |xml|
197
+ xml.head do
198
+ xml.meta 'http-equiv' => 'Content-Type',
199
+ 'content' => 'text/html;charset=utf-8'
200
+ xml.title @uid
201
+ xml.link 'type' => 'text/css', 'rel' => 'stylesheet',
202
+ 'href' => File.basename(EBPS.config.stylesheet)
203
+ end
204
+ xml.body 'id' => @uid do
205
+ xml.p 'class' => 'ebps' do |xml|
206
+ @subject.each do |name, id, content, _|
207
+ if id
208
+ xml.a content, 'href' => name
209
+ xml.br
210
+ else
211
+ xml.a 'id' => name, 'name' => name
212
+ end
213
+ end
214
+ end
215
+ if EBPS.config.kindle_quirks
216
+ xml.mbp :pagebreak
217
+ end
218
+ end
219
+ end
220
+ end
221
+ end
222
+ class XmlFactory < Factory
223
+ def initialize *args
224
+ super
225
+ @uid = "id" << Digest::MD5.hexdigest(@subject.join("\n"))
226
+ end
227
+ end
228
+ class NcxFactory < XmlFactory
229
+ def initialize *args
230
+ super
231
+ @builder.declare! :DOCTYPE, :ncx, :PUBLIC,
232
+ "-//NISO//DTD ncx 2005-1//EN",
233
+ "http://www.daisy.org/z3986/2005/ncx-2005-1.dtd"
234
+ @depth = EBPS.config.max_depth
235
+ @page_count = @ids.size
236
+ @sections = 0
237
+ end
238
+ def doc_title
239
+ @builder.docTitle do |xml|
240
+ xml.text EBPS.config.title || @subject.first.title
241
+ end
242
+ end
243
+ def head
244
+ @builder.head do |xml|
245
+ xml.meta 'name' => 'dtb:uid', 'content' => @uid
246
+ xml.meta 'name' => 'dtb:depth', 'content' => @depth
247
+ xml.meta 'name' => 'dtb:generator', 'content' => self.class.name
248
+ xml.meta 'name' => 'dtb:totalPageCount', 'content' => @page_count
249
+ xml.meta 'name' => 'dtb:maxPageNumber', 'content' => @page_count
250
+ end
251
+ end
252
+ def nav_map
253
+ play_order = 0
254
+ @builder.navMap do |xml|
255
+ @ids.each do |id|
256
+ if id.is_a?(Array)
257
+ @append_id = false
258
+ play_order = nav_point play_order, 1, *id
259
+ end
260
+ end
261
+ end
262
+ end
263
+ def nav_point play_order, depth, file, id, txt, local_ids=nil, joker=nil
264
+ ## discard duplicate filenames
265
+ if joker
266
+ _, file, id, txt, local_ids = file, id, txt, local_ids, joker
267
+ end
268
+ play_order += 1
269
+ @builder.navPoint 'id' => id, 'playOrder' => play_order do |xml|
270
+ xml.navLabel do
271
+ xml.text txt
272
+ end
273
+ src = @append_id ? sprintf("%s#%s", file, id) : file
274
+ xml.content 'src' => src
275
+ @append_id = true
276
+ if local_ids && depth < @depth
277
+ local_ids.each do |local_id|
278
+ play_order = nav_point play_order, depth + 1, file, *local_id
279
+ end
280
+ end
281
+ end
282
+ play_order
283
+ end
284
+ def to_ncx
285
+ @builder.ncx 'xmlns' => "http://www.daisy.org/z3986/2005/ncx/",
286
+ 'xml:lang' => EBPS.config.language,
287
+ 'version' => "2005-1" do |xml|
288
+ head
289
+ doc_title
290
+ # xml.docAuthor could follow here
291
+ nav_map
292
+ # possibly we need a page_list
293
+ # and maybe even a nav_list
294
+ end
295
+ end
296
+ end
297
+ class OpfFactory < XmlFactory
298
+ def guide
299
+ @builder.guide do |xml|
300
+ xml.reference 'type' => 'toc', 'title' => EBPS.config.guide_index,
301
+ 'href' => 'toc.html'
302
+ end
303
+ end
304
+ def manifest
305
+ docs = imgs = 0
306
+ table = Hash[@ids.collect do |id| id[0,2] end]
307
+ cover = File.basename EBPS.config.cover
308
+ css = File.basename EBPS.config.stylesheet
309
+ @builder.manifest do |xml|
310
+ Dir.foreach @tmpdir do |file|
311
+ if match = /\.([^\.]+)$/.match(file)
312
+ type = match[1]
313
+ case type
314
+ when /html?/
315
+ docs += 1
316
+ id = table[file] or raise "Unidentified Html-File '#{file}'"
317
+ xml.item 'id' => id, 'href' => file,
318
+ 'media-type' => 'application/xhtml+xml'
319
+ when 'ncx'
320
+ xml.item 'id' => 'toc', 'href' => file,
321
+ 'media-type' => 'application/x-dtbncx+xml'
322
+ when 'css'
323
+ xml.item 'id' => 'css', 'href' => file,
324
+ 'media-type' => 'text/css'
325
+ else
326
+ id = case file
327
+ when cover
328
+ 'cover'
329
+ when css
330
+ 'css'
331
+ else
332
+ imgs += 1
333
+ "image#{imgs}"
334
+ end
335
+ xml.item 'id' => id, 'href' => file,
336
+ 'media-type' => "image/#{type.sub('jpg', 'jpeg')}"
337
+ end
338
+ end
339
+ end
340
+ end
341
+ end
342
+ def metadata
343
+ @builder.metadata 'xmlns:dc' => 'http://purl.org/dc/elements/1.1/',
344
+ 'xmlns:opf'=> 'http://www.idpf.org/2007/opf' do |xml|
345
+ xml.dc :title, EBPS.config.title || @subject.first.title
346
+ xml.dc :language, EBPS.config.language
347
+ xml.dc :identifier, @uid, 'id' => 'uid'
348
+ xml.dc :date, Date.today.strftime('%Y-%m-%d')
349
+ if author = EBPS.config.author
350
+ xml.dc :creator, author
351
+ end
352
+ xml.meta 'name' => 'cover', 'content' => 'cover'
353
+ end
354
+ end
355
+ def spine
356
+ @builder.spine 'toc' => 'toc' do |xml|
357
+ @ids.each do |file, id|
358
+ xml.itemref 'idref' => id if id
359
+ end
360
+ end
361
+ end
362
+ def to_opf
363
+ @builder.package 'version' => '2.0',
364
+ 'xmlns' => 'http://www.idpf.org/2007/opf',
365
+ 'unique-identifier' => 'uid' do |xml|
366
+ metadata
367
+ manifest
368
+ spine
369
+ guide
370
+ end
371
+ end
372
+ end
373
+ def self.add_appendix appendix, ids, tmpdir
374
+ pic = Text::Picture.new
375
+ pic << File.read(appendix)
376
+ chp = Text::Chapter.new
377
+ chp.add_paragraph pic
378
+ doc = Text::Document.new
379
+ doc.add_chapter chp
380
+ local_ids = []
381
+ html, id = document_to_html doc, local_ids, tmpdir
382
+ name = 'appendix.html'
383
+ write tmpdir, name, html
384
+ ids.push [name, id, '', local_ids]
385
+ end
386
+ def self.compile_azw tmpdir, opf
387
+ current_dir = Dir.pwd
388
+ Dir.chdir tmpdir
389
+ opfpath = File.join tmpdir, opf
390
+ config = EBPS.config
391
+ command = "#{config.kindlegen_path} #{opfpath} #{config.kindlegen_args} -o mobipocket.azw"
392
+ # kindlegen returns status -1 if a warning has been issued, so system's
393
+ # return value is no indication of success.
394
+ system command
395
+ ensure
396
+ Dir.chdir current_dir
397
+ end
398
+ def self.compile_epub tmpdir, name
399
+ Zip::ZipOutputStream.open('/tmp/test.epub') do |zh|
400
+ zh.put_next_entry('mimetype')
401
+ zh << 'application/epub+zip'
402
+ zh.put_next_entry('META-INF/container.xml')
403
+ zh << <<-EOS
404
+ <?xml version="1.0" encoding="UTF-8" ?>
405
+ <container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
406
+ <rootfiles>
407
+ <rootfile full-path="OPS/#{name}" media-type="application/oebps-package+xml"/>
408
+ </rootfiles>
409
+ </container>
410
+ EOS
411
+ Dir.foreach(tmpdir) do |entry|
412
+ path = File.join tmpdir, entry
413
+ if File.ftype(path) == 'file'
414
+ zh.put_next_entry("OPS/#{entry}")
415
+ zh << File.read(path)
416
+ end
417
+ end
418
+ end
419
+ end
420
+ def self.document_filename doc, count
421
+ title = doc.title.dup
422
+ title = 'part' if title.empty?
423
+ filename sprintf("%i %s.html", count, title[0,32])
424
+ end
425
+ def self.document_to_html doc, ids, tmpdir
426
+ factory = HtmlFactory.new(doc, ids, tmpdir)
427
+ [factory.to_html, factory.uid]
428
+ end
429
+ def self.export docs, target, override_tmpdir=nil
430
+ ids = []
431
+ with_tmpdir do |tmpdir|
432
+ tmpdir = override_tmpdir || tmpdir
433
+ # HTML
434
+ if EBPS.config.sort
435
+ docs = docs.sort_by do |doc|
436
+ doc.title.sortable
437
+ end
438
+ end
439
+ docs.each_with_index do |doc, idx|
440
+ count = idx.next
441
+ local_ids = []
442
+ html, id = document_to_html doc, local_ids, tmpdir
443
+ name = document_filename doc, count
444
+ write tmpdir, name, html
445
+ ids.push [name, id, doc.title, local_ids]
446
+ end
447
+ # Indices
448
+ ncx_ids = write_index ids, tmpdir
449
+ # NCX
450
+ xml = to_ncx docs, ncx_ids, tmpdir
451
+ write tmpdir, 'toc.ncx', xml
452
+ # Copy the Cover
453
+ cover = EBPS.config.cover
454
+ FileUtils.cp cover, File.join(tmpdir, File.basename(cover))
455
+ # Copy the stylesheet
456
+ style = EBPS.config.stylesheet
457
+ FileUtils.cp style, File.join(tmpdir, File.basename(style))
458
+ # Appendix
459
+ if appendix = EBPS.config.appendix
460
+ add_appendix appendix, ids, tmpdir
461
+ end
462
+ # OPF
463
+ title = EBPS.config.title || docs.first.title
464
+ name = filename("%s.opf" % title)
465
+ xml = to_opf docs, ids, tmpdir
466
+ write tmpdir, name, xml
467
+ yield tmpdir, name
468
+ end
469
+ end
470
+ def self.filename name
471
+ name.gsub(/\s+/u, '_').gsub(/[^0-9a-z_.]/iu, '')
472
+ end
473
+ def self.to_index key, keys, ids, tmpdir
474
+ IndexFactory.new(key, keys, ids, tmpdir).to_html
475
+ end
476
+ def self.to_ncx docs, ids, tmpdir
477
+ NcxFactory.new(docs, ids, tmpdir).to_ncx
478
+ end
479
+ def self.to_opf docs, ids, tmpdir
480
+ OpfFactory.new(docs, ids, tmpdir).to_opf
481
+ end
482
+ def self.with_tmpdir &block
483
+ tmpdir = Dir.mktmpdir
484
+ block.call tmpdir
485
+ ensure
486
+ FileUtils.rm_r tmpdir unless EBPS.config.keep_tmpfiles
487
+ end
488
+ def self.write tmpdir, name, data
489
+ FileUtils.mkdir_p tmpdir
490
+ path = File.join tmpdir, name
491
+ File.open path, 'wb' do |fh| fh.puts data end
492
+ path
493
+ end
494
+ def self.write_index ids, tmpdir
495
+ tbl = {}
496
+ ids.each do |data|
497
+ name, id, title, local_ids = data
498
+ (tbl[title.sortable[0,1]] ||= []).push data
499
+ end
500
+ ncx_ids = []
501
+ waypoints = []
502
+ tbl.sort_by do |key, key_ids|
503
+ /[a-z]/.match(key) ? key : "{#{key}"
504
+ end.each do |key, key_ids|
505
+ name = nil
506
+ if EBPS.config.html_index
507
+ name = "#{key}.html"
508
+ html = to_index key, key_ids, ids, tmpdir
509
+ write tmpdir, name, html
510
+ else
511
+ name, = key_ids.first
512
+ end
513
+ descr = "#{key.upcase} (#{key_ids.size})"
514
+ nav_point = [ name, key, descr ]
515
+ ncx_ids.push nav_point
516
+ ## Kindle does not like nested nav_points for content that is not
517
+ # contained in the same file.
518
+ if EBPS.config.kindle_quirks
519
+ waypoints.concat key_ids
520
+ else
521
+ nav_point.push key_ids
522
+ end
523
+ end
524
+ if EBPS.config.html_index
525
+ html = to_index 'toc-html', ncx_ids, ids, tmpdir
526
+ write tmpdir, 'toc.html', html
527
+ ids.replace(ncx_ids + ids)
528
+ ids.unshift [ 'toc.html', 'toc-html', EBPS.config.index_name ]
529
+ end
530
+ if EBPS.config.kindle_quirks
531
+ ncx_ids.concat waypoints
532
+ end
533
+ ncx_ids
534
+ end
535
+ end
536
+ end
537
+ end