ebps 1.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (57) hide show
  1. data/History.txt +3 -0
  2. data/InstalledFiles +36 -0
  3. data/LICENSE.txt +339 -0
  4. data/Manifest.txt +56 -0
  5. data/README.txt +28 -0
  6. data/Rakefile +28 -0
  7. data/SetupConfig +30 -0
  8. data/bin/ebps +86 -0
  9. data/example/config.yml +12 -0
  10. data/example/converter_for_firefox.rb +59 -0
  11. data/example/data.yml +60 -0
  12. data/example/example.sh +3 -0
  13. data/example/sample.epub +0 -0
  14. data/lib/ebps.rb +5 -0
  15. data/lib/ebps/config.rb +61 -0
  16. data/lib/ebps/conversion/de_fachinfo_yaml.rb +81 -0
  17. data/lib/ebps/conversion/epub.rb +38 -0
  18. data/lib/ebps/conversion/fachinfo_xml.rb +170 -0
  19. data/lib/ebps/conversion/fachinfo_yaml.rb +113 -0
  20. data/lib/ebps/conversion/import_module_sample.rb +86 -0
  21. data/lib/ebps/conversion/mobi_pocket.rb +30 -0
  22. data/lib/ebps/conversion/oebps.rb +537 -0
  23. data/lib/ebps/conversion/patinfo_yaml.rb +107 -0
  24. data/lib/ebps/data/default_cover.jpg +0 -0
  25. data/lib/ebps/data/stylesheet.css +16 -0
  26. data/lib/ebps/postprocess/bookworm.rb +16 -0
  27. data/lib/ebps/postprocess/copy.rb +28 -0
  28. data/lib/ebps/postprocess/system_call.rb +18 -0
  29. data/lib/ebps/preprocess/copy.rb +28 -0
  30. data/lib/ebps/preprocess/system_call.rb +18 -0
  31. data/lib/ebps/text/chapter.rb +36 -0
  32. data/lib/ebps/text/document.rb +36 -0
  33. data/lib/ebps/text/format.rb +34 -0
  34. data/lib/ebps/text/paragraph.rb +63 -0
  35. data/lib/ebps/text/picture.rb +41 -0
  36. data/lib/ebps/text/table.rb +65 -0
  37. data/lib/ebps/util/mail.rb +47 -0
  38. data/lib/ebps/util/smtp_tls.rb +62 -0
  39. data/spec/conversion/data/DF_15164_1_3.gif +0 -0
  40. data/spec/conversion/data/DF_15164_2_3.gif +0 -0
  41. data/spec/conversion/data/appendix.png +0 -0
  42. data/spec/conversion/data/fachinfo.xml +1151 -0
  43. data/spec/conversion/data/fachinfo.yaml +1214 -0
  44. data/spec/conversion/data/fachinfo_with_image.xml +334 -0
  45. data/spec/conversion/data/fachinfo_with_table.xml +1101 -0
  46. data/spec/conversion/data/fachinfos.de.oddb.yaml +5789 -0
  47. data/spec/conversion/data/images/5c/5c54d52c8132230e8c40c37a428fe761.png +0 -0
  48. data/spec/conversion/de_fachinfo_yaml_spec.rb +86 -0
  49. data/spec/conversion/epub_spec.rb +59 -0
  50. data/spec/conversion/fachinfo_xml_spec.rb +245 -0
  51. data/spec/conversion/fachinfo_yaml_spec.rb +52 -0
  52. data/spec/conversion/mobi_pocket_spec.rb +55 -0
  53. data/spec/conversion/oebps_spec.rb +555 -0
  54. data/spec/text/chapter_spec.rb +65 -0
  55. data/spec/text/document_spec.rb +78 -0
  56. data/spec/text/paragraph_spec.rb +77 -0
  57. metadata +145 -0
@@ -0,0 +1,170 @@
1
+ require 'ebps/config'
2
+ require 'ebps/text/document'
3
+ require 'open-uri'
4
+ require 'rexml/document'
5
+ require 'rexml/streamlistener'
6
+
7
+ module EBPS
8
+ module Conversion
9
+ module FachinfoXml
10
+ class StreamListener
11
+ include REXML::StreamListener
12
+ attr_reader :current_chapter, :current_document, :current_target,
13
+ :documents
14
+ def initialize
15
+ @documents = []
16
+ @stack = []
17
+ end
18
+ def end_fi
19
+ @current_document = nil
20
+ end
21
+ def end_i
22
+ @stack.delete 'i'
23
+ if @current_target.respond_to?(:set_format)
24
+ @current_target.set_format *@stack
25
+ end
26
+ end
27
+ def end_image
28
+ # nothing to be done
29
+ end
30
+ def end_monographies
31
+ # outermost tag, we can safely ignore this.
32
+ end
33
+ def end_p
34
+ @current_chapter.add_paragraph(@current_target) if @current_target
35
+ @current_target = nil
36
+ end
37
+ def end_paragraph
38
+ @current_chapter = nil
39
+ end
40
+ alias end_paragraphSubtitle end_p
41
+ def end_paragraphTitle
42
+ @current_target = nil
43
+ end
44
+ def end_owner
45
+ end_paragraphTitle
46
+ end_paragraph
47
+ end
48
+ def end_table
49
+ end_p
50
+ end
51
+ def end_tb
52
+ # end of table body. Ignored for now.
53
+ end
54
+ def end_td
55
+ @in_td = false
56
+ end
57
+ def end_th
58
+ # end of table head. Ignored for now.
59
+ end
60
+ def end_title
61
+ @current_target = nil
62
+ end
63
+ def end_tr
64
+ @current_target << "\n"
65
+ end
66
+ def start_fi attrs
67
+ @documents.push(@current_document = Text::Document.new)
68
+ @current_document
69
+ end
70
+ def start_i attrs
71
+ @stack.push 'i'
72
+ if @current_target.respond_to?(:set_format)
73
+ @current_target.set_format *@stack.uniq
74
+ end
75
+ end
76
+ def start_image attrs
77
+ config = EBPS.config
78
+ raise <<-EOS unless config.image_prefix && config.image_suffix
79
+ This monography contains images.
80
+ Please configure both image_prefix and image_suffix
81
+ EOS
82
+ file = attrs["src"] + config.image_suffix
83
+ src = File.join config.image_prefix, file
84
+ handle = nil
85
+ begin
86
+ handle = open src
87
+ picture = Text::Picture.new
88
+ picture << handle.read
89
+ @current_target = picture
90
+ rescue
91
+ @current_target = nil
92
+ end
93
+ ensure
94
+ handle.close if handle
95
+ end
96
+ def start_monographies attrs
97
+ # outermost tag, we can safely ignore this.
98
+ end
99
+ def start_owner attrs
100
+ start_paragraph attrs
101
+ start_paragraphTitle attrs
102
+ end
103
+ def start_p attrs
104
+ @current_target = Text::Paragraph.new
105
+ end
106
+ def start_paragraph attrs
107
+ @current_chapter = Text::Chapter.new
108
+ @current_document.add_chapter @current_chapter
109
+ @current_chapter
110
+ end
111
+ def start_paragraphSubtitle attrs
112
+ @current_target = Text::Subheading.new
113
+ end
114
+ def start_paragraphTitle attrs
115
+ @current_target = @current_chapter.heading
116
+ end
117
+ def start_table attrs
118
+ paragraph = start_p attrs
119
+ paragraph.set_format('pre')
120
+ paragraph
121
+ end
122
+ def start_tb attrs
123
+ # start of table body. Ignored for now.
124
+ end
125
+ def start_td attrs
126
+ @in_td = true
127
+ end
128
+ def start_th attrs
129
+ # start of table head. Ignored for now.
130
+ end
131
+ def start_title attrs
132
+ @current_target = @current_document.title
133
+ end
134
+ def start_tr attrs
135
+ # ignore. end_tr will add a newline character
136
+ end
137
+ def tag_end name
138
+ method = "end_#{name}"
139
+ if respond_to?(method)
140
+ send method
141
+ else
142
+ raise "unhandled end tag '#{name}'"
143
+ end
144
+ end
145
+ def tag_start name, attrs
146
+ method = "start_#{name}"
147
+ if respond_to?(method)
148
+ send method, attrs
149
+ else
150
+ raise "unhandled start tag '#{name}'"
151
+ end
152
+ end
153
+ def text data
154
+ if @current_target && data
155
+ data = data.strip unless @in_td
156
+ @current_target << data
157
+ end
158
+ end
159
+ end
160
+ def self.import string_or_io, path=''
161
+ listener = StreamListener.new
162
+ REXML::Document.parse_stream string_or_io, listener
163
+ listener.documents
164
+ rescue ArgumentError => e
165
+ warn "#{e.message} while importing #{path}, ignoring input file"
166
+ []
167
+ end
168
+ end
169
+ end
170
+ end
@@ -0,0 +1,113 @@
1
+ require 'ebps/text/document'
2
+ require 'yaml'
3
+
4
+ module EBPS
5
+ YAML.add_domain_type 'oddb.org,2003', 'ODDB::Fachinfo' do |type, val|
6
+ if descr = val.delete('descriptions')
7
+ doc = descr[EBPS.config.language]
8
+ begin
9
+ doc.metadata.update val
10
+ rescue NoMethodError => err
11
+ comment = "Probably '#{EBPS.config.language}' data is missing in the following data:\n"
12
+ err.message << "\n\n" << comment << val.to_a.to_s << "\n"
13
+ raise err
14
+ end
15
+ doc
16
+ end
17
+ end
18
+ YAML.add_domain_type 'oddb.org,2003', 'ODDB::FachinfoDocument' do |type, val|
19
+ chapters = %w{galenic_form composition effects kinetic indications usage
20
+ restrictions unwanted_effects interactions overdose
21
+ other_advice delivery distribution fabrication iksnrs
22
+ packages date}
23
+ Conversion::FachinfoYaml.assemble_document chapters, val
24
+ end
25
+ YAML.add_domain_type 'oddb.org,2003', 'ODDB::FachinfoDocument2001' do |type, val|
26
+ chapters = %w{amzv composition galenic_form indications usage
27
+ contra_indications restrictions interactions pregnancy
28
+ driving_ability unwanted_effects overdose effects kinetic
29
+ preclinic other_advice iksnrs packages registration_owner
30
+ date}
31
+ Conversion::FachinfoYaml.assemble_document chapters, val
32
+ end
33
+ YAML.add_domain_type 'oddb.org,2003', 'ODDB::Text::Chapter' do |type, val|
34
+ chp = Text::Chapter.new
35
+ chp.heading << val['heading']
36
+ Conversion::FachinfoYaml.encode chp.heading
37
+ chp.paragraphs.concat val['sections'].flatten.compact
38
+ chp
39
+ end
40
+ YAML.add_domain_type 'oddb.org,2003', 'ODDB::Text::Section' do |type, val|
41
+ paragraphs = []
42
+ if (txt = val['subheading']) && !txt.empty?
43
+ sh = Text::Subheading.new
44
+ sh << txt
45
+ Conversion::FachinfoYaml.encode sh.text
46
+ paragraphs << sh
47
+ end
48
+ paragraphs.concat val['paragraphs']
49
+ paragraphs
50
+ end
51
+ YAML.add_domain_type 'oddb.org,2003', 'ODDB::Text::Paragraph' do |type, val|
52
+ par = Text::Paragraph.new
53
+ par << val['text']
54
+ Conversion::FachinfoYaml.encode par.text
55
+ par.formats.replace val['formats']
56
+ if val['preformatted']
57
+ par.formats.each do |fmt|
58
+ fmt.values << 'pre'
59
+ end
60
+ end
61
+ par
62
+ end
63
+ YAML.add_domain_type 'oddb.org,2003', 'ODDB::Text::ImageLink' do |type, val|
64
+ src = File.join EBPS.config.image_prefix, val['src']
65
+ file = File.basename src
66
+ handle = nil
67
+ begin
68
+ handle = open src
69
+ picture = Text::Picture.new
70
+ picture << handle.read
71
+ picture
72
+ rescue
73
+ end
74
+ end
75
+ YAML.add_domain_type 'oddb.org,2003', 'ODDB::Text::Format' do |type, val|
76
+ fmt = Text::Format.new
77
+ fmt.values = val['values']
78
+ fmt.start = val['start']
79
+ fmt.end = val['end']
80
+ fmt
81
+ end
82
+ YAML.add_domain_type 'oddb.org,2003', 'ODDB::SimpleLanguage::Descriptions' do |type, val|
83
+ val
84
+ end
85
+ module Conversion
86
+ module FachinfoYaml
87
+ def self.import string_or_io, path=nil
88
+ collection = {}
89
+ YAML.each_document string_or_io do |doc|
90
+ if doc
91
+ collection.store Digest::MD5.hexdigest(doc.to_s), doc
92
+ end
93
+ end
94
+ ## apparently we have some nil-values here (?)
95
+ collection.values.compact
96
+ end
97
+ def self.assemble_document chapters, yaml_value
98
+ doc = Text::Document.new
99
+ doc.title = Conversion::FachinfoYaml.encode(yaml_value['name'])
100
+ chapters.each do |name|
101
+ if chapter = yaml_value[name]
102
+ doc.add_chapter chapter
103
+ end
104
+ end
105
+ doc
106
+ end
107
+ def self.encode txt
108
+ txt.force_encoding 'UTF-8'
109
+ txt
110
+ end
111
+ end
112
+ end
113
+ end
@@ -0,0 +1,86 @@
1
+ require 'ebps/text/document'
2
+ require 'yaml'
3
+
4
+ module EBPS
5
+
6
+ YAML.add_domain_type 'ywesee,2010', 'CompanyInfo' do |type, val|
7
+ if descr = val.delete('descriptions')
8
+ doc = descr[EBPS.config.language]
9
+ doc.metadata.update val
10
+ doc
11
+ end
12
+ end
13
+ YAML.add_domain_type 'ywesee,2010', 'CompanyInfo2010' do |type, val|
14
+ chapters = %w{short_description general_information address_form}
15
+ Conversion::ImportModuleSample.assemble_document chapters, val
16
+ end
17
+ YAML.add_domain_type 'ywesee,2010', 'Text::Chapter' do |type, val|
18
+ chp = Text::Chapter.new
19
+ chp.heading << val['heading']
20
+ Conversion::ImportModuleSample.encode chp.heading
21
+ chp.paragraphs.concat val['sections'].flatten.compact
22
+ chp
23
+ end
24
+ YAML.add_domain_type 'ywesee,2010', 'Text::Section' do |type, val|
25
+ paragraphs = []
26
+ if (txt = val['subheading']) && !txt.empty?
27
+ sh = Text::Subheading.new
28
+ sh << txt
29
+ Conversion::ImportModuleSample.encode sh.text
30
+ paragraphs << sh
31
+ end
32
+ paragraphs.concat val['paragraphs']
33
+ paragraphs
34
+ end
35
+ YAML.add_domain_type 'ywesee,2010', 'Text::Paragraph' do |type, val|
36
+ par = Text::Paragraph.new
37
+ par << val['text']
38
+ Conversion::ImportModuleSample.encode par.text
39
+ par.formats.replace val['formats']
40
+ if val['preformatted']
41
+ par.formats.each do |fmt|
42
+ fmt.values << 'pre'
43
+ end
44
+ end
45
+ par
46
+ end
47
+ YAML.add_domain_type 'ywesee,2010', 'Text::Format' do |type, val|
48
+ fmt = Text::Format.new
49
+ fmt.values = val['values']
50
+ fmt.start = val['start']
51
+ fmt.end = val['end']
52
+ fmt
53
+ end
54
+ YAML.add_domain_type 'ywesee,2010', 'SimpleLanguage::Descriptions' do |type, val|
55
+ val
56
+ end
57
+
58
+ module Conversion
59
+ module ImportModuleSample
60
+ def self.import string_or_io, path=nil
61
+ collection = {}
62
+ YAML.each_document string_or_io do |doc|
63
+ if doc
64
+ collection.store Digest::MD5.hexdigest(doc.to_s), doc
65
+ end
66
+ end
67
+ ## apparently we have some nil-values here (?)
68
+ collection.values.compact
69
+ end
70
+ def self.assemble_document chapters, yaml_value
71
+ doc = Text::Document.new
72
+ doc.title = Conversion::ImportModuleSample.encode(yaml_value['name'])
73
+ chapters.each do |name|
74
+ if chapter = yaml_value[name]
75
+ doc.add_chapter chapter
76
+ end
77
+ end
78
+ doc
79
+ end
80
+ def self.encode txt
81
+ txt.force_encoding 'UTF-8'
82
+ txt
83
+ end
84
+ end
85
+ end
86
+ end
@@ -0,0 +1,30 @@
1
+ # - encoding: utf-8
2
+ require 'ebps/config'
3
+ require 'ebps/conversion/oebps'
4
+
5
+ module EBPS
6
+ module Conversion
7
+ module MobiPocket
8
+ def self.compile_azw target, tmpdir, opf
9
+ current_dir = Dir.pwd
10
+ Dir.chdir tmpdir
11
+ opfpath = File.join tmpdir, opf
12
+ config = EBPS.config
13
+ command = "#{config.kindlegen_path} #{opfpath} #{config.kindlegen_args} -o mobipocket.azw"
14
+ # kindlegen returns status -1 if a warning has been issued, so system's
15
+ # return value is no indication of success.
16
+ system command
17
+ FileUtils.cp File.join(tmpdir, 'mobipocket.azw'), target
18
+ ensure
19
+ Dir.chdir current_dir
20
+ end
21
+ def self.export docs, target, override_tmpdir=nil
22
+ ## kindlegen depends on the somewhat obscure kindle_quirks options.
23
+ EBPS.config.kindle_quirks = true
24
+ Oebps.export docs, target, override_tmpdir do |tmpdir, name|
25
+ compile_azw target, tmpdir, name
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,537 @@
1
+ # - encoding: utf-8
2
+ require 'builder'
3
+ require 'ebps/config'
4
+ require 'ebps/text/document'
5
+ require 'tmpdir'
6
+
7
+ class String
8
+ UMLAUT_SUB = {
9
+ 'Ä' => 'ae',
10
+ 'ä' => 'ae',
11
+ 'Ö' => 'oe',
12
+ 'ö' => 'oe',
13
+ 'Ü' => 'ue',
14
+ 'ü' => 'ue',
15
+ }
16
+ def sortable
17
+ dupl = dup
18
+ dupl.sortable!
19
+ dupl
20
+ end
21
+ def sortable!
22
+ res = downcase!
23
+ UMLAUT_SUB.each do |ptrn, repl|
24
+ tmp = gsub! Regexp.new(ptrn, 'U'), repl
25
+ res ||= tmp
26
+ end
27
+ res
28
+ end
29
+ ## overwrite Builder's to_xs
30
+ def to_xs
31
+ gsub('&', '&amp;').gsub('<', '&lt;').gsub('>', '&gt;')
32
+ end
33
+ end
34
+ module EBPS
35
+ module Conversion
36
+ module Oebps
37
+ class Factory
38
+ attr_reader :uid
39
+ def initialize subject, ids, tmpdir
40
+ @ids = ids
41
+ @indent = EBPS.config.xml_indent
42
+ @builder = Builder::XmlMarkup.new :indent => @indent
43
+ @builder.instruct! :xml, :version=>"1.0", :encoding=>"UTF-8"
44
+ @subject = subject
45
+ @tmpdir = tmpdir
46
+ @link_drug_brand_name = EBPS.config.link_drug_brand_name
47
+ end
48
+ end
49
+ class HtmlFactory < Factory
50
+ DEPRECATED_TAGS = {
51
+ 'u' => 'text-decoration: underline;'
52
+ }
53
+ def initialize *args
54
+ super
55
+ @builder.declare! :DOCTYPE, :html, :PUBLIC,
56
+ "-//W3C//DTD XHTML 1.0 Strict//EN",
57
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"
58
+ @uid = "id" << Digest::MD5.hexdigest(@subject.to_s)
59
+ @current_chapter_id = 0
60
+ end
61
+ def body
62
+ if @link_drug_brand_name
63
+ search_url = ["http://just-medical.oddb.org/de/just-medical/search/zone/drugs/search_query/",
64
+ @subject.title.delete('®').gsub(/ [0-9,]+%.*/,''),
65
+ "/search_type/st_sequence#best_result"].join
66
+ @builder.h1 'id' => @uid do |xml|
67
+ xml.a @subject.title, 'href' => search_url
68
+ end
69
+ else
70
+ @builder.h1 @subject.title, 'id' => @uid
71
+ end
72
+ if EBPS.config.html_index \
73
+ && @subject.chapters.any? do |chp| !chp.partial? end
74
+ table_of_contents
75
+ end
76
+ chapters
77
+ end
78
+ def chapters
79
+ @subject.chapters.each_with_index do |chapter, idx|
80
+ unless chapter.heading.empty?
81
+ @current_chapter_id = idx.next
82
+ id = "chapter-#{@current_chapter_id}-#@uid"
83
+ @ids.push [id, chapter.heading, []] unless chapter.partial?
84
+ @builder.h3 chapter.heading, 'id' => id
85
+ end
86
+ paragraphs chapter
87
+ end
88
+ end
89
+ def format paragraph
90
+ return unless paragraph
91
+ paragraph.formats.each do |format|
92
+ if txt = paragraph.text[format.range]
93
+ reduce_formats format.values, txt
94
+ end
95
+ end
96
+ end
97
+ def format_table table
98
+ @builder.table 'class' => 'ebps' do |xml|
99
+ xml.tbody do
100
+ table.each_normalized do |row|
101
+ xml.tr do
102
+ row.each do |cell|
103
+ xml.td do format cell end
104
+ end
105
+ end
106
+ end
107
+ end
108
+ end
109
+ end
110
+ def paragraphs chapter
111
+ sections = 0
112
+ chapter.paragraphs.each do |paragraph|
113
+ case paragraph
114
+ when Text::Picture
115
+ name = paragraph.filename
116
+ path = File.join @tmpdir, name
117
+ paragraph.image.write path
118
+ @builder.p 'class' => 'ebps' do |xml|
119
+ xml.img 'src' => name, 'alt' => ''
120
+ end
121
+ when Text::Subheading
122
+ attrs = {}
123
+ _, _, ids = @ids.last
124
+ if ids
125
+ sections += 1
126
+ id = "section-#{@current_chapter_id}.#{sections}-#@uid"
127
+ ids.push [id, paragraph.to_s]
128
+ attrs.store 'id', id
129
+ end
130
+ @builder.h4 attrs do format(paragraph) end
131
+ when Text::Table
132
+ format_table paragraph
133
+ when Text::LinkedParagraph
134
+ @builder.p 'class' => 'ebps' do |xml|
135
+ xml.a 'href' => paragraph.url do format paragraph end
136
+ end
137
+ else
138
+ @builder.p 'class' => 'ebps' do format(paragraph) end
139
+ end
140
+ end
141
+ end
142
+ def reduce_formats formats, text
143
+ if formats.empty?
144
+ @builder.send(:_indent)
145
+ @builder.text! text
146
+ @builder << "\n" if @indent
147
+ elsif style = DEPRECATED_TAGS[formats.first]
148
+ @builder.span 'style' => style do
149
+ reduce_formats formats[1..-1], text
150
+ end
151
+ else
152
+ @builder.tag! formats.first do
153
+ reduce_formats formats[1..-1], text
154
+ end
155
+ end
156
+ end
157
+ def table_of_contents
158
+ if title = EBPS.config.content_title
159
+ id = "toc-#@uid"
160
+ @ids.push [id, title]
161
+ @builder.h2(title, :id => id)
162
+ end
163
+ @builder.p 'class' => 'ebps' do |xml|
164
+ @subject.chapters.each_with_index do |chapter, idx|
165
+ unless chapter.partial?
166
+ xml.a chapter.heading, 'href' => "#chapter-#{idx.next}-#@uid"
167
+ xml.br
168
+ end
169
+ end
170
+ end
171
+ end
172
+ def to_html
173
+ @builder.html "xmlns" => "http://www.w3.org/1999/xhtml" do |xml|
174
+ xml.head do
175
+ xml.meta 'http-equiv' => 'Content-Type',
176
+ 'content' => 'text/html;charset=utf-8'
177
+ xml.title @subject.title
178
+ xml.link 'type' => 'text/css', 'rel' => 'stylesheet',
179
+ 'href' => File.basename(EBPS.config.stylesheet)
180
+ end
181
+ xml.body do
182
+ body
183
+ end
184
+ end
185
+ end
186
+ end
187
+ class IndexFactory < Factory
188
+ def initialize uid, subject, ids, tmpdir
189
+ super subject, ids, tmpdir
190
+ @builder.declare! :DOCTYPE, :html, :PUBLIC,
191
+ "-//W3C//DTD XHTML 1.0 Strict//EN",
192
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"
193
+ @uid = uid
194
+ end
195
+ def to_html
196
+ @builder.html "xmlns" => "http://www.w3.org/1999/xhtml" do |xml|
197
+ xml.head do
198
+ xml.meta 'http-equiv' => 'Content-Type',
199
+ 'content' => 'text/html;charset=utf-8'
200
+ xml.title @uid
201
+ xml.link 'type' => 'text/css', 'rel' => 'stylesheet',
202
+ 'href' => File.basename(EBPS.config.stylesheet)
203
+ end
204
+ xml.body 'id' => @uid do
205
+ xml.p 'class' => 'ebps' do |xml|
206
+ @subject.each do |name, id, content, _|
207
+ if id
208
+ xml.a content, 'href' => name
209
+ xml.br
210
+ else
211
+ xml.a 'id' => name, 'name' => name
212
+ end
213
+ end
214
+ end
215
+ if EBPS.config.kindle_quirks
216
+ xml.mbp :pagebreak
217
+ end
218
+ end
219
+ end
220
+ end
221
+ end
222
+ class XmlFactory < Factory
223
+ def initialize *args
224
+ super
225
+ @uid = "id" << Digest::MD5.hexdigest(@subject.join("\n"))
226
+ end
227
+ end
228
+ class NcxFactory < XmlFactory
229
+ def initialize *args
230
+ super
231
+ @builder.declare! :DOCTYPE, :ncx, :PUBLIC,
232
+ "-//NISO//DTD ncx 2005-1//EN",
233
+ "http://www.daisy.org/z3986/2005/ncx-2005-1.dtd"
234
+ @depth = EBPS.config.max_depth
235
+ @page_count = @ids.size
236
+ @sections = 0
237
+ end
238
+ def doc_title
239
+ @builder.docTitle do |xml|
240
+ xml.text EBPS.config.title || @subject.first.title
241
+ end
242
+ end
243
+ def head
244
+ @builder.head do |xml|
245
+ xml.meta 'name' => 'dtb:uid', 'content' => @uid
246
+ xml.meta 'name' => 'dtb:depth', 'content' => @depth
247
+ xml.meta 'name' => 'dtb:generator', 'content' => self.class.name
248
+ xml.meta 'name' => 'dtb:totalPageCount', 'content' => @page_count
249
+ xml.meta 'name' => 'dtb:maxPageNumber', 'content' => @page_count
250
+ end
251
+ end
252
+ def nav_map
253
+ play_order = 0
254
+ @builder.navMap do |xml|
255
+ @ids.each do |id|
256
+ if id.is_a?(Array)
257
+ @append_id = false
258
+ play_order = nav_point play_order, 1, *id
259
+ end
260
+ end
261
+ end
262
+ end
263
+ def nav_point play_order, depth, file, id, txt, local_ids=nil, joker=nil
264
+ ## discard duplicate filenames
265
+ if joker
266
+ _, file, id, txt, local_ids = file, id, txt, local_ids, joker
267
+ end
268
+ play_order += 1
269
+ @builder.navPoint 'id' => id, 'playOrder' => play_order do |xml|
270
+ xml.navLabel do
271
+ xml.text txt
272
+ end
273
+ src = @append_id ? sprintf("%s#%s", file, id) : file
274
+ xml.content 'src' => src
275
+ @append_id = true
276
+ if local_ids && depth < @depth
277
+ local_ids.each do |local_id|
278
+ play_order = nav_point play_order, depth + 1, file, *local_id
279
+ end
280
+ end
281
+ end
282
+ play_order
283
+ end
284
+ def to_ncx
285
+ @builder.ncx 'xmlns' => "http://www.daisy.org/z3986/2005/ncx/",
286
+ 'xml:lang' => EBPS.config.language,
287
+ 'version' => "2005-1" do |xml|
288
+ head
289
+ doc_title
290
+ # xml.docAuthor could follow here
291
+ nav_map
292
+ # possibly we need a page_list
293
+ # and maybe even a nav_list
294
+ end
295
+ end
296
+ end
297
+ class OpfFactory < XmlFactory
298
+ def guide
299
+ @builder.guide do |xml|
300
+ xml.reference 'type' => 'toc', 'title' => EBPS.config.guide_index,
301
+ 'href' => 'toc.html'
302
+ end
303
+ end
304
+ def manifest
305
+ docs = imgs = 0
306
+ table = Hash[@ids.collect do |id| id[0,2] end]
307
+ cover = File.basename EBPS.config.cover
308
+ css = File.basename EBPS.config.stylesheet
309
+ @builder.manifest do |xml|
310
+ Dir.foreach @tmpdir do |file|
311
+ if match = /\.([^\.]+)$/.match(file)
312
+ type = match[1]
313
+ case type
314
+ when /html?/
315
+ docs += 1
316
+ id = table[file] or raise "Unidentified Html-File '#{file}'"
317
+ xml.item 'id' => id, 'href' => file,
318
+ 'media-type' => 'application/xhtml+xml'
319
+ when 'ncx'
320
+ xml.item 'id' => 'toc', 'href' => file,
321
+ 'media-type' => 'application/x-dtbncx+xml'
322
+ when 'css'
323
+ xml.item 'id' => 'css', 'href' => file,
324
+ 'media-type' => 'text/css'
325
+ else
326
+ id = case file
327
+ when cover
328
+ 'cover'
329
+ when css
330
+ 'css'
331
+ else
332
+ imgs += 1
333
+ "image#{imgs}"
334
+ end
335
+ xml.item 'id' => id, 'href' => file,
336
+ 'media-type' => "image/#{type.sub('jpg', 'jpeg')}"
337
+ end
338
+ end
339
+ end
340
+ end
341
+ end
342
+ def metadata
343
+ @builder.metadata 'xmlns:dc' => 'http://purl.org/dc/elements/1.1/',
344
+ 'xmlns:opf'=> 'http://www.idpf.org/2007/opf' do |xml|
345
+ xml.dc :title, EBPS.config.title || @subject.first.title
346
+ xml.dc :language, EBPS.config.language
347
+ xml.dc :identifier, @uid, 'id' => 'uid'
348
+ xml.dc :date, Date.today.strftime('%Y-%m-%d')
349
+ if author = EBPS.config.author
350
+ xml.dc :creator, author
351
+ end
352
+ xml.meta 'name' => 'cover', 'content' => 'cover'
353
+ end
354
+ end
355
+ def spine
356
+ @builder.spine 'toc' => 'toc' do |xml|
357
+ @ids.each do |file, id|
358
+ xml.itemref 'idref' => id if id
359
+ end
360
+ end
361
+ end
362
+ def to_opf
363
+ @builder.package 'version' => '2.0',
364
+ 'xmlns' => 'http://www.idpf.org/2007/opf',
365
+ 'unique-identifier' => 'uid' do |xml|
366
+ metadata
367
+ manifest
368
+ spine
369
+ guide
370
+ end
371
+ end
372
+ end
373
+ def self.add_appendix appendix, ids, tmpdir
374
+ pic = Text::Picture.new
375
+ pic << File.read(appendix)
376
+ chp = Text::Chapter.new
377
+ chp.add_paragraph pic
378
+ doc = Text::Document.new
379
+ doc.add_chapter chp
380
+ local_ids = []
381
+ html, id = document_to_html doc, local_ids, tmpdir
382
+ name = 'appendix.html'
383
+ write tmpdir, name, html
384
+ ids.push [name, id, '', local_ids]
385
+ end
386
+ def self.compile_azw tmpdir, opf
387
+ current_dir = Dir.pwd
388
+ Dir.chdir tmpdir
389
+ opfpath = File.join tmpdir, opf
390
+ config = EBPS.config
391
+ command = "#{config.kindlegen_path} #{opfpath} #{config.kindlegen_args} -o mobipocket.azw"
392
+ # kindlegen returns status -1 if a warning has been issued, so system's
393
+ # return value is no indication of success.
394
+ system command
395
+ ensure
396
+ Dir.chdir current_dir
397
+ end
398
+ def self.compile_epub tmpdir, name
399
+ Zip::ZipOutputStream.open('/tmp/test.epub') do |zh|
400
+ zh.put_next_entry('mimetype')
401
+ zh << 'application/epub+zip'
402
+ zh.put_next_entry('META-INF/container.xml')
403
+ zh << <<-EOS
404
+ <?xml version="1.0" encoding="UTF-8" ?>
405
+ <container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
406
+ <rootfiles>
407
+ <rootfile full-path="OPS/#{name}" media-type="application/oebps-package+xml"/>
408
+ </rootfiles>
409
+ </container>
410
+ EOS
411
+ Dir.foreach(tmpdir) do |entry|
412
+ path = File.join tmpdir, entry
413
+ if File.ftype(path) == 'file'
414
+ zh.put_next_entry("OPS/#{entry}")
415
+ zh << File.read(path)
416
+ end
417
+ end
418
+ end
419
+ end
420
+ def self.document_filename doc, count
421
+ title = doc.title.dup
422
+ title = 'part' if title.empty?
423
+ filename sprintf("%i %s.html", count, title[0,32])
424
+ end
425
+ def self.document_to_html doc, ids, tmpdir
426
+ factory = HtmlFactory.new(doc, ids, tmpdir)
427
+ [factory.to_html, factory.uid]
428
+ end
429
+ def self.export docs, target, override_tmpdir=nil
430
+ ids = []
431
+ with_tmpdir do |tmpdir|
432
+ tmpdir = override_tmpdir || tmpdir
433
+ # HTML
434
+ if EBPS.config.sort
435
+ docs = docs.sort_by do |doc|
436
+ doc.title.sortable
437
+ end
438
+ end
439
+ docs.each_with_index do |doc, idx|
440
+ count = idx.next
441
+ local_ids = []
442
+ html, id = document_to_html doc, local_ids, tmpdir
443
+ name = document_filename doc, count
444
+ write tmpdir, name, html
445
+ ids.push [name, id, doc.title, local_ids]
446
+ end
447
+ # Indices
448
+ ncx_ids = write_index ids, tmpdir
449
+ # NCX
450
+ xml = to_ncx docs, ncx_ids, tmpdir
451
+ write tmpdir, 'toc.ncx', xml
452
+ # Copy the Cover
453
+ cover = EBPS.config.cover
454
+ FileUtils.cp cover, File.join(tmpdir, File.basename(cover))
455
+ # Copy the stylesheet
456
+ style = EBPS.config.stylesheet
457
+ FileUtils.cp style, File.join(tmpdir, File.basename(style))
458
+ # Appendix
459
+ if appendix = EBPS.config.appendix
460
+ add_appendix appendix, ids, tmpdir
461
+ end
462
+ # OPF
463
+ title = EBPS.config.title || docs.first.title
464
+ name = filename("%s.opf" % title)
465
+ xml = to_opf docs, ids, tmpdir
466
+ write tmpdir, name, xml
467
+ yield tmpdir, name
468
+ end
469
+ end
470
+ def self.filename name
471
+ name.gsub(/\s+/u, '_').gsub(/[^0-9a-z_.]/iu, '')
472
+ end
473
+ def self.to_index key, keys, ids, tmpdir
474
+ IndexFactory.new(key, keys, ids, tmpdir).to_html
475
+ end
476
+ def self.to_ncx docs, ids, tmpdir
477
+ NcxFactory.new(docs, ids, tmpdir).to_ncx
478
+ end
479
+ def self.to_opf docs, ids, tmpdir
480
+ OpfFactory.new(docs, ids, tmpdir).to_opf
481
+ end
482
+ def self.with_tmpdir &block
483
+ tmpdir = Dir.mktmpdir
484
+ block.call tmpdir
485
+ ensure
486
+ FileUtils.rm_r tmpdir unless EBPS.config.keep_tmpfiles
487
+ end
488
+ def self.write tmpdir, name, data
489
+ FileUtils.mkdir_p tmpdir
490
+ path = File.join tmpdir, name
491
+ File.open path, 'wb' do |fh| fh.puts data end
492
+ path
493
+ end
494
+ def self.write_index ids, tmpdir
495
+ tbl = {}
496
+ ids.each do |data|
497
+ name, id, title, local_ids = data
498
+ (tbl[title.sortable[0,1]] ||= []).push data
499
+ end
500
+ ncx_ids = []
501
+ waypoints = []
502
+ tbl.sort_by do |key, key_ids|
503
+ /[a-z]/.match(key) ? key : "{#{key}"
504
+ end.each do |key, key_ids|
505
+ name = nil
506
+ if EBPS.config.html_index
507
+ name = "#{key}.html"
508
+ html = to_index key, key_ids, ids, tmpdir
509
+ write tmpdir, name, html
510
+ else
511
+ name, = key_ids.first
512
+ end
513
+ descr = "#{key.upcase} (#{key_ids.size})"
514
+ nav_point = [ name, key, descr ]
515
+ ncx_ids.push nav_point
516
+ ## Kindle does not like nested nav_points for content that is not
517
+ # contained in the same file.
518
+ if EBPS.config.kindle_quirks
519
+ waypoints.concat key_ids
520
+ else
521
+ nav_point.push key_ids
522
+ end
523
+ end
524
+ if EBPS.config.html_index
525
+ html = to_index 'toc-html', ncx_ids, ids, tmpdir
526
+ write tmpdir, 'toc.html', html
527
+ ids.replace(ncx_ids + ids)
528
+ ids.unshift [ 'toc.html', 'toc-html', EBPS.config.index_name ]
529
+ end
530
+ if EBPS.config.kindle_quirks
531
+ ncx_ids.concat waypoints
532
+ end
533
+ ncx_ids
534
+ end
535
+ end
536
+ end
537
+ end