bookmaker 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,137 @@
1
+ module Bookmaker
2
+ module Parser
3
+ class Epub < Base
4
+ def sections
5
+ @sections ||= html.css("div.section").each_with_index.map do |chapter, index|
6
+ OpenStruct.new({
7
+ :index => index,
8
+ :filename => "section_#{index}.html",
9
+ :filepath => tmp_dir.join("section_#{index}.html").to_s,
10
+ :html => Nokogiri::HTML(chapter.inner_html)
11
+ })
12
+ end
13
+ end
14
+
15
+ def epub; @epub ||= EeePub.make ;end
16
+ def html; @html ||= Nokogiri::HTML(html_path.read); end
17
+
18
+ def parse
19
+ epub.title config["title"]
20
+ epub.language config["language"]
21
+ epub.creator config["authors"].to_sentence
22
+ epub.publisher config["publisher"]
23
+ epub.date config["published_at"]
24
+ epub.uid config["uid"]
25
+ epub.identifier config["identifier"]["id"], :scheme => config["identifier"]["type"]
26
+ epub.cover_page cover_image if cover_image && File.exist?(cover_image)
27
+
28
+ write_sections!
29
+ write_toc!
30
+
31
+ epub.files sections.map(&:filepath) + assets
32
+ epub.nav navigation
33
+
34
+ epub.save(epub_path)
35
+ true
36
+ rescue Exception
37
+ p $!, $@
38
+ false
39
+ end
40
+
41
+ def write_toc!
42
+ toc = TOC::Epub.new(navigation)
43
+ File.open(toc_path, "w") do |file|
44
+ file << toc.to_html
45
+ end
46
+ end
47
+
48
+ def write_sections!
49
+ # First we need to get all ids, which are used as
50
+ # the anchor target.
51
+ links = sections.inject({}) do |buffer, section|
52
+ section.html.css("[id]").each do |element|
53
+ anchor = "##{element["id"]}"
54
+ buffer[anchor] = "#{section.filename}#{anchor}"
55
+ end
56
+
57
+ buffer
58
+ end
59
+
60
+ # Then we can normalize all links and
61
+ # manipulate other paths.
62
+ #
63
+ sections.each do |section|
64
+ section.html.css("a[href^='#']").each do |link|
65
+ href = link["href"]
66
+ link.set_attribute("href", links.fetch(href, href))
67
+ end
68
+
69
+ # Replace all srcs.
70
+ #
71
+ section.html.css("[src]").each do |element|
72
+ src = File.basename(element["src"]).gsub(/\.svg$/, ".png")
73
+ element.set_attribute("src", src)
74
+ element.set_attribute("alt", "")
75
+ element.node_name = "img"
76
+ end
77
+
78
+ FileUtils.mkdir_p(tmp_dir)
79
+
80
+ # Save file to disk.
81
+ #
82
+ File.open(section.filepath, "w") do |file|
83
+ body = section.html.css("body").to_xhtml.gsub(%r[<body>(.*?)</body>]m, "\\1")
84
+ file << render_chapter(body)
85
+ end
86
+ end
87
+ end
88
+
89
+ def render_chapter(content)
90
+ locals = config.merge(:content => content)
91
+ render_template(template_path, locals)
92
+ end
93
+
94
+ def assets
95
+ @assets ||= begin
96
+ assets = Dir[root_dir.join("templates/epub/*.css")]
97
+ assets += Dir[root_dir.join("images/**/*.{jpg,png,gif}")]
98
+ assets
99
+ end
100
+ end
101
+
102
+ def cover_image
103
+ path = Dir[root_dir.join("templates/epub/cover.{jpg,png,gif}").to_s].first
104
+ return path if path && File.exist?(path)
105
+ end
106
+
107
+ def navigation
108
+ sections.map do |section|
109
+ {
110
+ :label => section.html.css("h2:first-of-type").text,
111
+ :content => section.filename
112
+ }
113
+ end
114
+ end
115
+
116
+ def template_path
117
+ root_dir.join("templates/epub/page.erb")
118
+ end
119
+
120
+ def html_path
121
+ root_dir.join("output/#{name}.html")
122
+ end
123
+
124
+ def epub_path
125
+ root_dir.join("output/#{name}.epub")
126
+ end
127
+
128
+ def tmp_dir
129
+ root_dir.join("output/tmp")
130
+ end
131
+
132
+ def toc_path
133
+ tmp_dir.join("toc.html")
134
+ end
135
+ end
136
+ end
137
+ end
@@ -0,0 +1,199 @@
1
+ require 'kramdown'
2
+ module Bookmaker
3
+ module Parser
4
+ class HTML < Base
5
+ def content
6
+ raw = []
7
+ entries.keys.each do |chapter|
8
+ raw << "{::nomarkdown}<h2>#{chapter.split(/_/)[1].gsub('-',' ')}</h2>{:/}"
9
+ entries[chapter].each do |section|
10
+ raw << read_content(section)[0] + "\n\n* * *"
11
+ end
12
+ end
13
+ raw
14
+ end
15
+ def parse
16
+ html = parse_layout(content)
17
+ toc = TOC::HTML.generate(html)
18
+ locals = config.merge({
19
+ :contents => toc.content,
20
+ :toc => toc.to_html,
21
+ })
22
+ output = render_template(root_dir.join("templates/html/layout.erb"), locals)
23
+ File.open(root_dir.join("output/#{name}.html"), 'w').write(output)
24
+ true
25
+ rescue Exception
26
+ p $!, $@
27
+ false
28
+ end
29
+ def parse_layout(text)
30
+ output = ""
31
+ text.each do |s|
32
+ output << "<div class='section'>\n#{Kramdown::Document.new(s).to_html}\n</div>\n"
33
+ end
34
+ output
35
+ end
36
+ end
37
+ # # List of directories that should be skipped.
38
+ # #
39
+ # IGNORE_DIR = %w[. .. .svn]
40
+ #
41
+ # # Files that should be skipped.
42
+ # #
43
+ # IGNORE_FILES = /^(CHANGELOG|TOC)\..*?$/
44
+ #
45
+ # # List of recognized extensions.
46
+ # #
47
+ # EXTENSIONS = %w[md mkdn markdown]
48
+ #
49
+ # class << self
50
+ # # The footnote index control. We have to manipulate footnotes
51
+ # # because each chapter starts from 1, so we have duplicated references.
52
+ # #
53
+ # attr_accessor :footnote_index
54
+ # end
55
+ #
56
+ # # Parse all files and save the parsed content
57
+ # # to <tt>output/book_name.html</tt>.
58
+ # #
59
+ # def parse
60
+ # reset_footnote_index!
61
+ #
62
+ # # File.open(root_dir.join("output/#{name}.html"), "w") do |file|
63
+ # # file << parse_layout(content)
64
+ # # end
65
+ # true
66
+ # rescue Exception
67
+ # false
68
+ # end
69
+ #
70
+ # def reset_footnote_index!
71
+ # self.class.footnote_index = 1
72
+ # end
73
+ #
74
+ # private
75
+ # def chapter_files(entry)
76
+ # # Chapters can be files outside a directory.
77
+ # if File.file?(entry)
78
+ # [entry]
79
+ # else
80
+ # Dir.glob("#{entry}/**/*.{#{EXTENSIONS.join(",")}}").sort
81
+ # end
82
+ # end
83
+ #
84
+ # # Check if path is a valid entry.
85
+ # # Files/directories that start with a dot or underscore will be skipped.
86
+ # #
87
+ # def valid_entry?(entry)
88
+ # entry !~ /^(\.|_)/ && (valid_directory?(entry) || valid_file?(entry))
89
+ # end
90
+ #
91
+ # # Check if path is a valid directory.
92
+ # #
93
+ # def valid_directory?(entry)
94
+ # File.directory?(source.join(entry)) && !IGNORE_DIR.include?(File.basename(entry))
95
+ # end
96
+ #
97
+ # # Check if path is a valid file.
98
+ # #
99
+ # def valid_file?(entry)
100
+ # ext = File.extname(entry).gsub(/\./, "").downcase
101
+ # File.file?(source.join(entry)) && EXTENSIONS.include?(ext) && entry !~ IGNORE_FILES
102
+ # end
103
+ #
104
+ # # Render +file+ considering its extension.
105
+ # #
106
+ # def render_file(file, plain_syntax = false)
107
+ # file_format = format(file)
108
+ #
109
+ # content = Bookmaker::Syntax.render(root_dir, file_format, File.read(file), plain_syntax)
110
+ #
111
+ # content = case file_format
112
+ # when :markdown
113
+ # Markdown.to_html(content)
114
+ # when :textile
115
+ # RedCloth.convert(content)
116
+ # else
117
+ # content
118
+ # end
119
+ #
120
+ # render_footnotes(content, plain_syntax)
121
+ # end
122
+ #
123
+ # def render_footnotes(content, plain_syntax = false)
124
+ # html = Nokogiri::HTML(content)
125
+ # footnotes = html.css("p[id^='fn']")
126
+ #
127
+ # return content if footnotes.empty?
128
+ #
129
+ # reset_footnote_index! unless self.class.footnote_index
130
+ #
131
+ # footnotes.each do |fn|
132
+ # index = self.class.footnote_index
133
+ # actual_index = fn["id"].gsub(/[^\d]/, "")
134
+ #
135
+ # fn.set_attribute("id", "_fn#{index}")
136
+ #
137
+ # html.css("a[href='#fn#{actual_index}']").each do |link|
138
+ # link.set_attribute("href", "#_fn#{index}")
139
+ # end
140
+ #
141
+ # html.css("a[href='#fnr#{actual_index}']").each do |link|
142
+ # link.set_attribute("href", "#_fnr#{index}")
143
+ # end
144
+ #
145
+ # html.css("[id=fnr#{actual_index}]").each do |tag|
146
+ # tag.set_attribute("id", "_fnr#{index}")
147
+ # end
148
+ #
149
+ # self.class.footnote_index += 1
150
+ # end
151
+ #
152
+ # html.css("body").inner_html
153
+ # end
154
+ #
155
+ # def format(file)
156
+ # case File.extname(file).downcase
157
+ # when ".markdown", ".mkdn", ".md"
158
+ # :markdown
159
+ # when ".textile"
160
+ # :textile
161
+ # else
162
+ # :html
163
+ # end
164
+ # end
165
+ #
166
+ # # Parse layout file, making available all configuration entries.
167
+ # #
168
+ # def parse_layout(html)
169
+ # puts "parse layout."
170
+ # toc = TOC::HTML.generate(html)
171
+ # locals = config.merge({
172
+ # :content => toc.content,
173
+ # :toc => toc.to_html,
174
+ # :changelog => render_changelog
175
+ # })
176
+ # render_template(root_dir.join("templates/html/layout.erb"), locals)
177
+ # end
178
+ #
179
+ # # Render changelog file.
180
+ # # This file can be used to inform any book change.
181
+ # #
182
+ # def render_changelog
183
+ # changelog = Dir[root_dir.join("text/CHANGELOG.*")].first
184
+ # return render_file(changelog) if changelog
185
+ # nil
186
+ # end
187
+ #
188
+ # # Render all +files+ from a given chapter.
189
+ # #
190
+ # def render_chapter(files, plain_syntax = false)
191
+ # String.new.tap do |chapter|
192
+ # files.each do |file|
193
+ # chapter << render_file(file, plain_syntax) << "\n\n"
194
+ # end
195
+ # end
196
+ # end
197
+ # end
198
+ end
199
+ end
@@ -0,0 +1,14 @@
1
+ module Bookmaker
2
+ module Parser
3
+ class Mobi < Base
4
+ def parse
5
+ spawn_command ["kindlegen", epub_file.to_s,]
6
+ true
7
+ end
8
+
9
+ def epub_file
10
+ root_dir.join("output/#{name}.epub")
11
+ end
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,40 @@
1
+ require 'kramdown'
2
+
3
+ module Bookmaker
4
+ module Parser
5
+ class PDF < Base
6
+ def content
7
+ raw = []
8
+ entries.keys.each do |chapter|
9
+ raw << "{::nomarkdown}\\Chapter{#{chapter.split(/_/)[1].gsub('-',' ')}}{:/}"
10
+ entries[chapter].each do |section|
11
+ raw << read_content(section)[0] + "\n\n* * *"
12
+ end
13
+ end
14
+ raw
15
+ end
16
+ def parse
17
+ locals = config.merge({ :contents => parse_layout(content) })
18
+ locals['copyright'].gsub!("(C)", "\\copyright{}")
19
+ output = render_template(root_dir.join("templates/pdf/layout.erb"), locals)
20
+ File.open(root_dir.join(tex_file), 'w').write(output)
21
+ spawn_command ["xelatex", tex_file.to_s,]
22
+ spawn_command ["xelatex", tex_file.to_s,]
23
+ spawn_command ["rm *.glo *.idx *.log *.out *.toc *aux"]
24
+ spawn_command ["mv #{name}.pdf output/#{name}.pdf"]
25
+ true
26
+ rescue Exception
27
+ p $!, $@
28
+ false
29
+ end
30
+ def parse_layout(text)
31
+ text = text.join("\n\n")
32
+ text.gsub!('* * *', "\n\n{::nomarkdown}\\pbreak{:/}\n\n")
33
+ Kramdown::Document.new(text).to_latex
34
+ end
35
+ def tex_file
36
+ root_dir.join("output/#{name}.tex")
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,45 @@
1
+ module Bookmaker
2
+ class Stats
3
+ attr_reader :root_dir
4
+
5
+ def initialize(root_dir)
6
+ @root_dir = root_dir
7
+ end
8
+
9
+ def text
10
+ @text ||= html.text
11
+ end
12
+
13
+ def html
14
+ @html ||= Nokogiri::HTML(content)
15
+ end
16
+
17
+ def words
18
+ @words ||= text.split(" ").size
19
+ end
20
+
21
+ def chapters
22
+ @chapters ||= html.css(".chapter").size
23
+ end
24
+
25
+ def images
26
+ @images ||= html.css("img").size
27
+ end
28
+
29
+ def footnotes
30
+ @footnotes ||= html.css("p.footnote").size
31
+ end
32
+
33
+ def links
34
+ @links ||= html.css("[href^='http']").size
35
+ end
36
+
37
+ def code_blocks
38
+ @code_blocks ||= html.css("pre").size
39
+ end
40
+
41
+ def content
42
+ @content ||= Parser::HTML.new(root_dir).content
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,27 @@
1
+ module Bookmaker
2
+ class Stream
3
+ attr_accessor :listener, :content
4
+ attr_reader :html
5
+
6
+ def initialize(content, listener)
7
+ @content = content
8
+ @listener = listener
9
+ @html = Nokogiri::HTML.parse(content)
10
+ end
11
+
12
+ def parse
13
+ traverse(html)
14
+ end
15
+
16
+ def traverse(node)
17
+ node.children.each do |child|
18
+ emit(child)
19
+ traverse(child)
20
+ end
21
+ end
22
+
23
+ def emit(node)
24
+ listener.send(:tag, node) if node.name =~ /h[1-6]/
25
+ end
26
+ end
27
+ end