bookmaker 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,137 @@
1
+ module Bookmaker
2
+ module Parser
3
+ class Epub < Base
4
+ def sections
5
+ @sections ||= html.css("div.section").each_with_index.map do |chapter, index|
6
+ OpenStruct.new({
7
+ :index => index,
8
+ :filename => "section_#{index}.html",
9
+ :filepath => tmp_dir.join("section_#{index}.html").to_s,
10
+ :html => Nokogiri::HTML(chapter.inner_html)
11
+ })
12
+ end
13
+ end
14
+
15
+ def epub; @epub ||= EeePub.make ;end
16
+ def html; @html ||= Nokogiri::HTML(html_path.read); end
17
+
18
+ def parse
19
+ epub.title config["title"]
20
+ epub.language config["language"]
21
+ epub.creator config["authors"].to_sentence
22
+ epub.publisher config["publisher"]
23
+ epub.date config["published_at"]
24
+ epub.uid config["uid"]
25
+ epub.identifier config["identifier"]["id"], :scheme => config["identifier"]["type"]
26
+ epub.cover_page cover_image if cover_image && File.exist?(cover_image)
27
+
28
+ write_sections!
29
+ write_toc!
30
+
31
+ epub.files sections.map(&:filepath) + assets
32
+ epub.nav navigation
33
+
34
+ epub.save(epub_path)
35
+ true
36
+ rescue Exception
37
+ p $!, $@
38
+ false
39
+ end
40
+
41
+ def write_toc!
42
+ toc = TOC::Epub.new(navigation)
43
+ File.open(toc_path, "w") do |file|
44
+ file << toc.to_html
45
+ end
46
+ end
47
+
48
+ def write_sections!
49
+ # First we need to get all ids, which are used as
50
+ # the anchor target.
51
+ links = sections.inject({}) do |buffer, section|
52
+ section.html.css("[id]").each do |element|
53
+ anchor = "##{element["id"]}"
54
+ buffer[anchor] = "#{section.filename}#{anchor}"
55
+ end
56
+
57
+ buffer
58
+ end
59
+
60
+ # Then we can normalize all links and
61
+ # manipulate other paths.
62
+ #
63
+ sections.each do |section|
64
+ section.html.css("a[href^='#']").each do |link|
65
+ href = link["href"]
66
+ link.set_attribute("href", links.fetch(href, href))
67
+ end
68
+
69
+ # Replace all srcs.
70
+ #
71
+ section.html.css("[src]").each do |element|
72
+ src = File.basename(element["src"]).gsub(/\.svg$/, ".png")
73
+ element.set_attribute("src", src)
74
+ element.set_attribute("alt", "")
75
+ element.node_name = "img"
76
+ end
77
+
78
+ FileUtils.mkdir_p(tmp_dir)
79
+
80
+ # Save file to disk.
81
+ #
82
+ File.open(section.filepath, "w") do |file|
83
+ body = section.html.css("body").to_xhtml.gsub(%r[<body>(.*?)</body>]m, "\\1")
84
+ file << render_chapter(body)
85
+ end
86
+ end
87
+ end
88
+
89
+ def render_chapter(content)
90
+ locals = config.merge(:content => content)
91
+ render_template(template_path, locals)
92
+ end
93
+
94
+ def assets
95
+ @assets ||= begin
96
+ assets = Dir[root_dir.join("templates/epub/*.css")]
97
+ assets += Dir[root_dir.join("images/**/*.{jpg,png,gif}")]
98
+ assets
99
+ end
100
+ end
101
+
102
+ def cover_image
103
+ path = Dir[root_dir.join("templates/epub/cover.{jpg,png,gif}").to_s].first
104
+ return path if path && File.exist?(path)
105
+ end
106
+
107
+ def navigation
108
+ sections.map do |section|
109
+ {
110
+ :label => section.html.css("h2:first-of-type").text,
111
+ :content => section.filename
112
+ }
113
+ end
114
+ end
115
+
116
+ def template_path
117
+ root_dir.join("templates/epub/page.erb")
118
+ end
119
+
120
+ def html_path
121
+ root_dir.join("output/#{name}.html")
122
+ end
123
+
124
+ def epub_path
125
+ root_dir.join("output/#{name}.epub")
126
+ end
127
+
128
+ def tmp_dir
129
+ root_dir.join("output/tmp")
130
+ end
131
+
132
+ def toc_path
133
+ tmp_dir.join("toc.html")
134
+ end
135
+ end
136
+ end
137
+ end
@@ -0,0 +1,199 @@
1
+ require 'kramdown'
2
+ module Bookmaker
3
+ module Parser
4
+ class HTML < Base
5
+ def content
6
+ raw = []
7
+ entries.keys.each do |chapter|
8
+ raw << "{::nomarkdown}<h2>#{chapter.split(/_/)[1].gsub('-',' ')}</h2>{:/}"
9
+ entries[chapter].each do |section|
10
+ raw << read_content(section)[0] + "\n\n* * *"
11
+ end
12
+ end
13
+ raw
14
+ end
15
+ def parse
16
+ html = parse_layout(content)
17
+ toc = TOC::HTML.generate(html)
18
+ locals = config.merge({
19
+ :contents => toc.content,
20
+ :toc => toc.to_html,
21
+ })
22
+ output = render_template(root_dir.join("templates/html/layout.erb"), locals)
23
+ File.open(root_dir.join("output/#{name}.html"), 'w').write(output)
24
+ true
25
+ rescue Exception
26
+ p $!, $@
27
+ false
28
+ end
29
+ def parse_layout(text)
30
+ output = ""
31
+ text.each do |s|
32
+ output << "<div class='section'>\n#{Kramdown::Document.new(s).to_html}\n</div>\n"
33
+ end
34
+ output
35
+ end
36
+ end
37
+ # # List of directories that should be skipped.
38
+ # #
39
+ # IGNORE_DIR = %w[. .. .svn]
40
+ #
41
+ # # Files that should be skipped.
42
+ # #
43
+ # IGNORE_FILES = /^(CHANGELOG|TOC)\..*?$/
44
+ #
45
+ # # List of recognized extensions.
46
+ # #
47
+ # EXTENSIONS = %w[md mkdn markdown]
48
+ #
49
+ # class << self
50
+ # # The footnote index control. We have to manipulate footnotes
51
+ # # because each chapter starts from 1, so we have duplicated references.
52
+ # #
53
+ # attr_accessor :footnote_index
54
+ # end
55
+ #
56
+ # # Parse all files and save the parsed content
57
+ # # to <tt>output/book_name.html</tt>.
58
+ # #
59
+ # def parse
60
+ # reset_footnote_index!
61
+ #
62
+ # # File.open(root_dir.join("output/#{name}.html"), "w") do |file|
63
+ # # file << parse_layout(content)
64
+ # # end
65
+ # true
66
+ # rescue Exception
67
+ # false
68
+ # end
69
+ #
70
+ # def reset_footnote_index!
71
+ # self.class.footnote_index = 1
72
+ # end
73
+ #
74
+ # private
75
+ # def chapter_files(entry)
76
+ # # Chapters can be files outside a directory.
77
+ # if File.file?(entry)
78
+ # [entry]
79
+ # else
80
+ # Dir.glob("#{entry}/**/*.{#{EXTENSIONS.join(",")}}").sort
81
+ # end
82
+ # end
83
+ #
84
+ # # Check if path is a valid entry.
85
+ # # Files/directories that start with a dot or underscore will be skipped.
86
+ # #
87
+ # def valid_entry?(entry)
88
+ # entry !~ /^(\.|_)/ && (valid_directory?(entry) || valid_file?(entry))
89
+ # end
90
+ #
91
+ # # Check if path is a valid directory.
92
+ # #
93
+ # def valid_directory?(entry)
94
+ # File.directory?(source.join(entry)) && !IGNORE_DIR.include?(File.basename(entry))
95
+ # end
96
+ #
97
+ # # Check if path is a valid file.
98
+ # #
99
+ # def valid_file?(entry)
100
+ # ext = File.extname(entry).gsub(/\./, "").downcase
101
+ # File.file?(source.join(entry)) && EXTENSIONS.include?(ext) && entry !~ IGNORE_FILES
102
+ # end
103
+ #
104
+ # # Render +file+ considering its extension.
105
+ # #
106
+ # def render_file(file, plain_syntax = false)
107
+ # file_format = format(file)
108
+ #
109
+ # content = Bookmaker::Syntax.render(root_dir, file_format, File.read(file), plain_syntax)
110
+ #
111
+ # content = case file_format
112
+ # when :markdown
113
+ # Markdown.to_html(content)
114
+ # when :textile
115
+ # RedCloth.convert(content)
116
+ # else
117
+ # content
118
+ # end
119
+ #
120
+ # render_footnotes(content, plain_syntax)
121
+ # end
122
+ #
123
+ # def render_footnotes(content, plain_syntax = false)
124
+ # html = Nokogiri::HTML(content)
125
+ # footnotes = html.css("p[id^='fn']")
126
+ #
127
+ # return content if footnotes.empty?
128
+ #
129
+ # reset_footnote_index! unless self.class.footnote_index
130
+ #
131
+ # footnotes.each do |fn|
132
+ # index = self.class.footnote_index
133
+ # actual_index = fn["id"].gsub(/[^\d]/, "")
134
+ #
135
+ # fn.set_attribute("id", "_fn#{index}")
136
+ #
137
+ # html.css("a[href='#fn#{actual_index}']").each do |link|
138
+ # link.set_attribute("href", "#_fn#{index}")
139
+ # end
140
+ #
141
+ # html.css("a[href='#fnr#{actual_index}']").each do |link|
142
+ # link.set_attribute("href", "#_fnr#{index}")
143
+ # end
144
+ #
145
+ # html.css("[id=fnr#{actual_index}]").each do |tag|
146
+ # tag.set_attribute("id", "_fnr#{index}")
147
+ # end
148
+ #
149
+ # self.class.footnote_index += 1
150
+ # end
151
+ #
152
+ # html.css("body").inner_html
153
+ # end
154
+ #
155
+ # def format(file)
156
+ # case File.extname(file).downcase
157
+ # when ".markdown", ".mkdn", ".md"
158
+ # :markdown
159
+ # when ".textile"
160
+ # :textile
161
+ # else
162
+ # :html
163
+ # end
164
+ # end
165
+ #
166
+ # # Parse layout file, making available all configuration entries.
167
+ # #
168
+ # def parse_layout(html)
169
+ # puts "parse layout."
170
+ # toc = TOC::HTML.generate(html)
171
+ # locals = config.merge({
172
+ # :content => toc.content,
173
+ # :toc => toc.to_html,
174
+ # :changelog => render_changelog
175
+ # })
176
+ # render_template(root_dir.join("templates/html/layout.erb"), locals)
177
+ # end
178
+ #
179
+ # # Render changelog file.
180
+ # # This file can be used to inform any book change.
181
+ # #
182
+ # def render_changelog
183
+ # changelog = Dir[root_dir.join("text/CHANGELOG.*")].first
184
+ # return render_file(changelog) if changelog
185
+ # nil
186
+ # end
187
+ #
188
+ # # Render all +files+ from a given chapter.
189
+ # #
190
+ # def render_chapter(files, plain_syntax = false)
191
+ # String.new.tap do |chapter|
192
+ # files.each do |file|
193
+ # chapter << render_file(file, plain_syntax) << "\n\n"
194
+ # end
195
+ # end
196
+ # end
197
+ # end
198
+ end
199
+ end
@@ -0,0 +1,14 @@
1
+ module Bookmaker
2
+ module Parser
3
+ class Mobi < Base
4
+ def parse
5
+ spawn_command ["kindlegen", epub_file.to_s,]
6
+ true
7
+ end
8
+
9
+ def epub_file
10
+ root_dir.join("output/#{name}.epub")
11
+ end
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,40 @@
1
+ require 'kramdown'
2
+
3
+ module Bookmaker
4
+ module Parser
5
+ class PDF < Base
6
+ def content
7
+ raw = []
8
+ entries.keys.each do |chapter|
9
+ raw << "{::nomarkdown}\\Chapter{#{chapter.split(/_/)[1].gsub('-',' ')}}{:/}"
10
+ entries[chapter].each do |section|
11
+ raw << read_content(section)[0] + "\n\n* * *"
12
+ end
13
+ end
14
+ raw
15
+ end
16
+ def parse
17
+ locals = config.merge({ :contents => parse_layout(content) })
18
+ locals['copyright'].gsub!("(C)", "\\copyright{}")
19
+ output = render_template(root_dir.join("templates/pdf/layout.erb"), locals)
20
+ File.open(root_dir.join(tex_file), 'w').write(output)
21
+ spawn_command ["xelatex", tex_file.to_s,]
22
+ spawn_command ["xelatex", tex_file.to_s,]
23
+ spawn_command ["rm *.glo *.idx *.log *.out *.toc *aux"]
24
+ spawn_command ["mv #{name}.pdf output/#{name}.pdf"]
25
+ true
26
+ rescue Exception
27
+ p $!, $@
28
+ false
29
+ end
30
+ def parse_layout(text)
31
+ text = text.join("\n\n")
32
+ text.gsub!('* * *', "\n\n{::nomarkdown}\\pbreak{:/}\n\n")
33
+ Kramdown::Document.new(text).to_latex
34
+ end
35
+ def tex_file
36
+ root_dir.join("output/#{name}.tex")
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,45 @@
1
+ module Bookmaker
2
+ class Stats
3
+ attr_reader :root_dir
4
+
5
+ def initialize(root_dir)
6
+ @root_dir = root_dir
7
+ end
8
+
9
+ def text
10
+ @text ||= html.text
11
+ end
12
+
13
+ def html
14
+ @html ||= Nokogiri::HTML(content)
15
+ end
16
+
17
+ def words
18
+ @words ||= text.split(" ").size
19
+ end
20
+
21
+ def chapters
22
+ @chapters ||= html.css(".chapter").size
23
+ end
24
+
25
+ def images
26
+ @images ||= html.css("img").size
27
+ end
28
+
29
+ def footnotes
30
+ @footnotes ||= html.css("p.footnote").size
31
+ end
32
+
33
+ def links
34
+ @links ||= html.css("[href^='http']").size
35
+ end
36
+
37
+ def code_blocks
38
+ @code_blocks ||= html.css("pre").size
39
+ end
40
+
41
+ def content
42
+ @content ||= Parser::HTML.new(root_dir).content
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,27 @@
1
+ module Bookmaker
2
+ class Stream
3
+ attr_accessor :listener, :content
4
+ attr_reader :html
5
+
6
+ def initialize(content, listener)
7
+ @content = content
8
+ @listener = listener
9
+ @html = Nokogiri::HTML.parse(content)
10
+ end
11
+
12
+ def parse
13
+ traverse(html)
14
+ end
15
+
16
+ def traverse(node)
17
+ node.children.each do |child|
18
+ emit(child)
19
+ traverse(child)
20
+ end
21
+ end
22
+
23
+ def emit(node)
24
+ listener.send(:tag, node) if node.name =~ /h[1-6]/
25
+ end
26
+ end
27
+ end