docgen 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +14 -0
- data/.rspec +2 -0
- data/.travis.yml +5 -0
- data/CODE_OF_CONDUCT.md +74 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +66 -0
- data/Rakefile +6 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/create_database.sh +1 -0
- data/create_database.sql +55 -0
- data/devinit +71 -0
- data/devinit.log +25 -0
- data/docgen +0 -0
- data/docgen.gemspec +43 -0
- data/document.html +22 -0
- data/document.latex +50 -0
- data/images/docgen-text-replacement.png +0 -0
- data/images/pptx-customization.png +0 -0
- data/lib/db.rb +25 -0
- data/lib/docgen.rb +58 -0
- data/lib/docgen/version.rb +3 -0
- data/lib/gen_html.rb +14 -0
- data/lib/gen_latex.rb +15 -0
- data/lib/gen_pdf.rb +18 -0
- data/lib/gen_text.rb +7 -0
- data/lib/process_pptx.rb +230 -0
- data/lib/settings.rb +13 -0
- data/lib/slide_set.rb +22 -0
- data/lib/zip_utils.rb +55 -0
- data/settings.yml +3 -0
- data/temp.pdf +105 -0
- data/template.html.erb +8 -0
- metadata +219 -0
data/document.latex
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
<%
|
2
|
+
encmap = {
|
3
|
+
'UTF-8' => 'utf8x',
|
4
|
+
'US-ASCII' => 'ascii',
|
5
|
+
'ISO-8859-1' => 'latin1',
|
6
|
+
'ISO-8859-2' => 'latin2',
|
7
|
+
'ISO-8859-3' => 'latin3',
|
8
|
+
'ISO-8859-4' => 'latin4',
|
9
|
+
'ISO-8859-5' => 'latin5',
|
10
|
+
'ISO-8859-9' => 'latin9',
|
11
|
+
'ISO-8859-10' => 'latin10',
|
12
|
+
'CP850' => 'cp850',
|
13
|
+
'CP852' => 'cp852',
|
14
|
+
'CP858' => 'cp858',
|
15
|
+
'CP437' => 'cp437',
|
16
|
+
'CP865' => 'cp865',
|
17
|
+
'CP1250' => 'cp120',
|
18
|
+
'CP1252' => 'cp1252',
|
19
|
+
'CP1257' => 'cp1257'
|
20
|
+
}
|
21
|
+
%>
|
22
|
+
\documentclass{scrartcl}
|
23
|
+
<% if RUBY_VERSION >= '1.9' %>
|
24
|
+
\usepackage[<%= encmap[@body.encoding.name] %>]{inputenc}
|
25
|
+
<% else %>
|
26
|
+
\usepackage[mathletters]{ucs}
|
27
|
+
\usepackage[utf8x]{inputenc}
|
28
|
+
<% end %>
|
29
|
+
\usepackage[T1]{fontenc}
|
30
|
+
\usepackage{listings}
|
31
|
+
<% @converter.data[:packages].each {|pkg| %>\usepackage{<%= pkg %>}
|
32
|
+
<% } %>
|
33
|
+
\usepackage{hyperref}
|
34
|
+
|
35
|
+
<% if @converter.data[:packages].include?('fancyvrb') %>
|
36
|
+
\VerbatimFootnotes
|
37
|
+
<% end %>
|
38
|
+
|
39
|
+
<% if @converter.data[:packages].include?('acronym') %>
|
40
|
+
<% @converter.root.options[:abbrev_defs].each_pair do |k,v| %>\acrodef{<%= @converter.normalize_abbreviation_key(k) %>}[<%= k %>]{<%= @converter.escape(v) %>}
|
41
|
+
<% end %>
|
42
|
+
<% end %>
|
43
|
+
|
44
|
+
\setcounter{footnote}{<%= @converter.options[:footnote_nr] - 1 %>}
|
45
|
+
|
46
|
+
\hypersetup{colorlinks=true,urlcolor=blue}
|
47
|
+
|
48
|
+
\begin{document}
|
49
|
+
<%= @body %>
|
50
|
+
\end{document}
|
Binary file
|
Binary file
|
data/lib/db.rb
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'sqlite3'
|
2
|
+
|
3
|
+
module Db
|
4
|
+
|
5
|
+
def connect
|
6
|
+
begin
|
7
|
+
@conn = Sequel.connect('sqlite://docgen')
|
8
|
+
@substitutions = @conn.from(:substitutions)
|
9
|
+
rescue Exeption => e
|
10
|
+
puts "Database connection error: #{e.message}"
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def close
|
15
|
+
@conn.disconnect
|
16
|
+
@conn = nil
|
17
|
+
end
|
18
|
+
|
19
|
+
def substitution_text_for set_id, key
|
20
|
+
connect unless @conn
|
21
|
+
res = @substitutions.where(:set_id => set_id, :key => key).select(:value)
|
22
|
+
res.get(:value)
|
23
|
+
end
|
24
|
+
|
25
|
+
end
|
data/lib/docgen.rb
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
require "docgen/version"
|
2
|
+
require_relative "./db"
|
3
|
+
require_relative "./gen_text"
|
4
|
+
require_relative "./gen_html"
|
5
|
+
require_relative "./gen_latex"
|
6
|
+
require_relative "./gen_pdf"
|
7
|
+
require_relative "./settings"
|
8
|
+
require_relative "./zip_utils"
|
9
|
+
require_relative "./process_pptx"
|
10
|
+
|
11
|
+
module Docgen
|
12
|
+
include Db, Settings, ZipUtils
|
13
|
+
|
14
|
+
# Apply customizations to a complex file type such as pptx, xlsx, docx, odp, ods, odt).
|
15
|
+
# "other_args" will be interpreted differently by the processor for each file type.
|
16
|
+
def process document_set, file_type, file_path, *other_args
|
17
|
+
processor_class_name = "Process#{file_type.split('_').collect(&:capitalize).join}"
|
18
|
+
begin
|
19
|
+
processor = Object::const_get("#{processor_class_name}").new
|
20
|
+
rescue NameError => e
|
21
|
+
raise "Undefined processor class: #{processor_class_name}"
|
22
|
+
end
|
23
|
+
processor.process document_set, file_path, other_args
|
24
|
+
end
|
25
|
+
|
26
|
+
# Substitute custom values for text placeholders
|
27
|
+
def gen document_set, format_name, boilerplate, *template
|
28
|
+
content = apply_substitutions_to document_set, boilerplate
|
29
|
+
get_formatter(format_name).format content, template
|
30
|
+
end
|
31
|
+
|
32
|
+
private
|
33
|
+
|
34
|
+
def get_formatter format_name
|
35
|
+
formatter_class_name = "Gen#{format_name.split('_').collect(&:capitalize).join}"
|
36
|
+
begin
|
37
|
+
formatter = Object::const_get("#{formatter_class_name}").new
|
38
|
+
rescue NameError => e
|
39
|
+
raise "Unsupported output format: #{format_name}"
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def apply_substitutions_to document_set, boilerplate
|
44
|
+
keys = boilerplate.scan(/(::.*?::)/m)
|
45
|
+
return boilerplate unless keys.any?
|
46
|
+
content = boilerplate
|
47
|
+
keys.flatten!
|
48
|
+
keys.each do |key|
|
49
|
+
content = content.gsub(key,lookup(document_set, key))
|
50
|
+
end
|
51
|
+
content
|
52
|
+
end
|
53
|
+
|
54
|
+
def lookup document_set, key
|
55
|
+
substitution_text_for document_set, key.gsub(/::/,'')
|
56
|
+
end
|
57
|
+
|
58
|
+
end
|
data/lib/gen_html.rb
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
require 'kramdown'
|
2
|
+
|
3
|
+
class GenHtml
|
4
|
+
|
5
|
+
def format content, *template
|
6
|
+
use_template = template.any? ? template[0][0] : nil
|
7
|
+
begin
|
8
|
+
Kramdown::Document.new(content, :template => use_template).to_html
|
9
|
+
rescue
|
10
|
+
puts 'in rescue block'
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
end
|
data/lib/gen_latex.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
require 'kramdown'
|
2
|
+
|
3
|
+
class GenLatex
|
4
|
+
|
5
|
+
def format content, *template
|
6
|
+
use_template = template.any? ? template[0][0] : nil
|
7
|
+
begin
|
8
|
+
Kramdown::Document.new(content, :template => use_template).to_latex
|
9
|
+
rescue RuntimeError => e
|
10
|
+
puts "RuntimeError: #{e}"
|
11
|
+
raise e
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
end
|
data/lib/gen_pdf.rb
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'kramdown'
|
2
|
+
require 'prawn'
|
3
|
+
require "prawn/table"
|
4
|
+
|
5
|
+
class GenPdf
|
6
|
+
|
7
|
+
def format content, *template
|
8
|
+
use_template = template.any? ? template[0][0] : nil
|
9
|
+
# Suppress a warning message regarding UTF-8 font support
|
10
|
+
Prawn::Font::AFM.hide_m17n_warning = true
|
11
|
+
begin
|
12
|
+
Kramdown::Document.new(content, :template => use_template).to_pdf
|
13
|
+
rescue RuntimeError => e
|
14
|
+
raise e
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
data/lib/gen_text.rb
ADDED
data/lib/process_pptx.rb
ADDED
@@ -0,0 +1,230 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require_relative "./docgen"
|
3
|
+
require_relative "./db"
|
4
|
+
require_relative "./settings"
|
5
|
+
|
6
|
+
# 1. Insert one or more sets of slides into the pptx_file, if specified.
|
7
|
+
# 2. Replace text placeholders with values for the document set, if any.
|
8
|
+
# 3. Replace the presentation theme, if one is specified.
|
9
|
+
class ProcessPptx
|
10
|
+
include Docgen, Db, Settings
|
11
|
+
|
12
|
+
PATH_TO_PPT = 'ppt'
|
13
|
+
PATH_TO_PPT_RELS = 'ppt/_rels'
|
14
|
+
PATH_TO_SLIDE_RELS = 'ppt/slides/_rels'
|
15
|
+
SLIDES_START_WITH = 'ppt/slides/slide'
|
16
|
+
PRESENTATION_XML = 'presentation.xml'
|
17
|
+
PRESENTATION_XML_RELS = 'presentation.xml.rels'
|
18
|
+
|
19
|
+
def process document_set, pptx_file, *other_args
|
20
|
+
parse_arguments other_args
|
21
|
+
initialize_work_files
|
22
|
+
begin
|
23
|
+
package = Zip::File.open(pptx_file)
|
24
|
+
insert_slides_in package, other_args[0][0] if @insert_slides
|
25
|
+
apply_text_substitutions_to_slides_in document_set, package
|
26
|
+
replace_presentation_theme_in( package, @template ) unless @template == nil
|
27
|
+
ensure
|
28
|
+
package.close
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
private
|
33
|
+
|
34
|
+
# other_args are arguments specific to pptx processing
|
35
|
+
#
|
36
|
+
# other_args may contain one or both of:
|
37
|
+
# - the path to a template file (potx or ppts) containing a theme
|
38
|
+
# - an array of SlideSet objects containing slides to be inserted
|
39
|
+
# into the pptx_file. Both those arguments are optional.
|
40
|
+
def parse_arguments other_args
|
41
|
+
@template = nil
|
42
|
+
@insert_slides = false
|
43
|
+
unless other_args.empty?
|
44
|
+
if other_args[0][0].is_a?(String)
|
45
|
+
@template = other_args.shift
|
46
|
+
end
|
47
|
+
unless other_args.empty?
|
48
|
+
if other_args[0][0].is_a?(Array) && other_args[0][0][0].is_a?(SlideSet)
|
49
|
+
@insert_slides = true
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def insert_slides_in package, slide_sets
|
56
|
+
insert_slide_entries_in package, slide_sets
|
57
|
+
renumber_slides_after_insertion_in package
|
58
|
+
add_rels_entries_after_insertion_in package
|
59
|
+
update_presentation_rels_in package
|
60
|
+
update_presentation_xml_entry_in package
|
61
|
+
end
|
62
|
+
|
63
|
+
def insert_slide_entries_in package, slide_sets
|
64
|
+
@original_slide_count = package.entries.map(&:name).select{|i| i.start_with?(SLIDES_START_WITH)}.size
|
65
|
+
package.entries.map(&:name).select{|i| i.start_with?(SLIDES_START_WITH)}.sort.each do |original_entry_name|
|
66
|
+
doc = package.find_entry(original_entry_name)
|
67
|
+
original_slide = Nokogiri::XML.parse(doc.get_input_stream)
|
68
|
+
slide_sets.each do |slide_set|
|
69
|
+
pattern = Regexp.new(slide_set.name).freeze
|
70
|
+
if pattern.match?(original_slide)
|
71
|
+
|
72
|
+
slide_number = 0
|
73
|
+
slide_set.package.entries.map(&:name).select{|i| i.start_with?(SLIDES_START_WITH)}.sort.each do |entry_name|
|
74
|
+
|
75
|
+
# ppt/slide entries from the source package
|
76
|
+
|
77
|
+
extracted_entry_name = "#{original_entry_name.chomp('.xml')}_#{slide_number}.xml"
|
78
|
+
slide_set.package.extract(entry_name, "#{@tempdir}/#{extracted_entry_name}")
|
79
|
+
package.add("#{extracted_entry_name}", "#{@tempdir}/#{extracted_entry_name}")
|
80
|
+
slide_number += 1
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
@slide_count = package.entries.map(&:name).select{|i| i.start_with?(SLIDES_START_WITH)}.size
|
86
|
+
end
|
87
|
+
|
88
|
+
def renumber_slides_after_insertion_in package
|
89
|
+
slide_number = @slide_count
|
90
|
+
package.entries.map(&:name).select{|i| i.start_with?('ppt/slides/slide')}.sort.reverse_each do |modified_entry_name|
|
91
|
+
name_start = modified_entry_name.slice(0..(modified_entry_name.index(SLIDES_START_WITH) + SLIDES_START_WITH.length))
|
92
|
+
package.rename(modified_entry_name, "#{SLIDES_START_WITH}#{slide_number}.xml") unless package.find_entry("#{SLIDES_START_WITH}#{slide_number}.xml")
|
93
|
+
slide_number -= 1
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
# First attempt: Add ppt/slides/_rels/slideN.xml.rels with <Relatioship Id="rId1" ... Target="../slideLayouts/slideLayout1.xml">
|
98
|
+
# for all entries in ppt/slides. That is, copy the entry ppt/slides/_rels/slide1.xml.rels from the original deck for each new entry.
|
99
|
+
# This will probably not be accurate for complicated slide decks. All rels point to rId1 and slideLayout1
|
100
|
+
# in the simple test deck I made for purposes of this rspec example. Inserted slides don't automatically have a corresponding entry in
|
101
|
+
# ppt/slides/_rels. (Note to self: Extract this info to the project wiki once the details have been worked out.)
|
102
|
+
def add_rels_entries_after_insertion_in package
|
103
|
+
extracted_file_name = "#{@tempdir}/base_slide_rel_name"
|
104
|
+
slide_number = @slide_count
|
105
|
+
@slide_count.times do
|
106
|
+
remove_file extracted_file_name
|
107
|
+
base_slide_rel_name = "#{PATH_TO_SLIDE_RELS}/slide1.xml.rels"
|
108
|
+
package.extract base_slide_rel_name, extracted_file_name
|
109
|
+
slide_rel_entry_name = "#{PATH_TO_SLIDE_RELS}/slide#{slide_number}.xml.rels"
|
110
|
+
package.add slide_rel_entry_name, extracted_file_name unless package.find_entry slide_rel_entry_name
|
111
|
+
slide_number -= 1
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
# Add elements in ppt/_rels/presentation.xml.rels for the inserted slides
|
116
|
+
# Increment Id value and slide number from the last Relationship node for a slide
|
117
|
+
def update_presentation_rels_in package
|
118
|
+
rels_entry_name = "#{PATH_TO_PPT_RELS}/#{PRESENTATION_XML_RELS}"
|
119
|
+
temp_file_name = "#{@tempdir}/#{PRESENTATION_XML_RELS}"
|
120
|
+
remove_file temp_file_name
|
121
|
+
package.extract rels_entry_name, temp_file_name
|
122
|
+
|
123
|
+
raw_text = get_text_from temp_file_name
|
124
|
+
# Nokogiri can't handle the xmlns attribute on the Relationships node
|
125
|
+
raw_text.gsub!(/xmlns/,'snlmx')
|
126
|
+
xml_doc = xml_doc_from raw_text
|
127
|
+
last_relationship = xml_doc.xpath('/Relationships/Relationship[starts-with(@Target, "slides/slide")]').last
|
128
|
+
last_id_value = last_relationship['Id'] # Id value looks like 'rId8' (ugh!)
|
129
|
+
last_id_value = last_id_value[3..last_id_value.length].to_i
|
130
|
+
last_slide_number = last_relationship['Target'].gsub(/slides\/slide/,'').gsub(/.xml/,'').to_i
|
131
|
+
rels_type = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/slide"
|
132
|
+
|
133
|
+
# Add a Relationship element for each new slide inserted into the deck.
|
134
|
+
rels_count = @slide_count - @original_slide_count
|
135
|
+
slide_number = @original_slide_count + 1
|
136
|
+
rels_count.times do
|
137
|
+
last_id_value += 1
|
138
|
+
last_id_value_str = 'rId' + last_id_value.to_s
|
139
|
+
rels_node = Nokogiri::XML::Node.new('Relationship',xml_doc)
|
140
|
+
rels_node['Id'] = last_id_value_str
|
141
|
+
rels_node['Type'] = rels_type
|
142
|
+
rels_node['Target'] = "slides/slide#{slide_number.to_s}.xml"
|
143
|
+
xml_doc.xpath('//Relationships/Relationship').last.add_next_sibling(rels_node)
|
144
|
+
slide_number += 1
|
145
|
+
end
|
146
|
+
updated_xml = xml_doc.to_s.gsub(/snlmx/,'xmlns') # reverse the workaround for Nokogiri
|
147
|
+
remove_file temp_file_name
|
148
|
+
new_rels_xml = File.open(temp_file_name, "w")
|
149
|
+
new_rels_xml.puts updated_xml
|
150
|
+
new_rels_xml.close
|
151
|
+
package.replace rels_entry_name, temp_file_name
|
152
|
+
end
|
153
|
+
|
154
|
+
# Add elements to ppt/presentation.xml for the inserted slides
|
155
|
+
def update_presentation_xml_entry_in package
|
156
|
+
presentation_entry_name = "#{PATH_TO_PPT}/#{PRESENTATION_XML}"
|
157
|
+
temp_file_name = "#{@tempdir}/#{PRESENTATION_XML}"
|
158
|
+
remove_file temp_file_name
|
159
|
+
package.extract presentation_entry_name, temp_file_name
|
160
|
+
raw_text = get_text_from temp_file_name
|
161
|
+
modified_text = raw_text.gsub(/:/,'__') # workaround - namespaces not declared in presentation.xml
|
162
|
+
xml_doc = xml_doc_from modified_text
|
163
|
+
|
164
|
+
# Start with the last sldId element in the document
|
165
|
+
last_sldId_element = xml_doc.xpath("//p__presentation/p__sldIdLst").last.last_element_child
|
166
|
+
last_sldId_id_value = last_sldId_element['id'].to_i
|
167
|
+
last_sldId_rid_value = last_sldId_element['r__id'] # r:id value looks like 'rId8' (ugh!)
|
168
|
+
last_sldId_rid_value = last_sldId_rid_value[3..last_sldId_rid_value.length].to_i
|
169
|
+
|
170
|
+
# Add a p:sldId element for each new slide inserted into the deck.
|
171
|
+
sldId_count = @slide_count - @original_slide_count
|
172
|
+
slide_number = @original_slide_count + 1
|
173
|
+
sldId_count.times do
|
174
|
+
# increment the id values and create a new child element under p:presentation/p:sldIdLst
|
175
|
+
last_sldId_id_value += 1
|
176
|
+
last_sldId_rid_value += 1
|
177
|
+
sldId_rid_value_str = 'rId' + last_sldId_rid_value.to_s
|
178
|
+
sldId_node = Nokogiri::XML::Node.new('p__sldId',xml_doc)
|
179
|
+
sldId_node['id'] = last_sldId_id_value.to_s
|
180
|
+
sldId_node['r__id'] = sldId_rid_value_str
|
181
|
+
xml_doc.xpath("//p__presentation/p__sldIdLst")[0] << sldId_node
|
182
|
+
slide_number += 1
|
183
|
+
end
|
184
|
+
|
185
|
+
updated_xml = xml_doc.to_s.gsub(/__/,':') # reverse the workaround for namespace prefixes
|
186
|
+
remove_file temp_file_name
|
187
|
+
new_presentation_xml = File.open(temp_file_name, "w")
|
188
|
+
new_presentation_xml.puts updated_xml
|
189
|
+
new_presentation_xml.close
|
190
|
+
package.replace presentation_entry_name, temp_file_name
|
191
|
+
end
|
192
|
+
|
193
|
+
def apply_text_substitutions_to_slides_in document_set, package
|
194
|
+
package.entries.map(&:name).select{|i| i.start_with?('ppt/slides/slide')}.each do |entry|
|
195
|
+
doc = package.find_entry(entry)
|
196
|
+
original_slide = Nokogiri::XML.parse(doc.get_input_stream)
|
197
|
+
modified_slide = gen document_set, 'text', original_slide.to_s
|
198
|
+
package.get_output_stream(entry) { |f| f << modified_slide.to_s }
|
199
|
+
end
|
200
|
+
end
|
201
|
+
|
202
|
+
def replace_presentation_theme_in package, template
|
203
|
+
theme_entry_name = 'ppt/theme/theme1.xml'
|
204
|
+
theme_source = Zip::File.open(template[0])
|
205
|
+
replacement_theme_entry = theme_source.find_entry(theme_entry_name)
|
206
|
+
replacement_theme = Nokogiri::XML.parse(replacement_theme_entry.get_input_stream)
|
207
|
+
original_theme = package.find_entry(theme_entry_name)
|
208
|
+
package.get_output_stream(original_theme) { |f| f << replacement_theme.to_s }
|
209
|
+
theme_source.close
|
210
|
+
end
|
211
|
+
|
212
|
+
def get_text_from file_name
|
213
|
+
IO.read(file_name).gsub(/\n/,'')
|
214
|
+
end
|
215
|
+
|
216
|
+
def xml_doc_from text
|
217
|
+
Nokogiri::XML(text) { |config| config.strict }
|
218
|
+
end
|
219
|
+
|
220
|
+
def initialize_work_files
|
221
|
+
@tempdir = settings 'ziptemp'
|
222
|
+
FileUtils.rm_rf "#{@tempdir}"
|
223
|
+
FileUtils.mkdir_p "#{@tempdir}/#{PATH_TO_SLIDE_RELS}"
|
224
|
+
end
|
225
|
+
|
226
|
+
def remove_file file_name
|
227
|
+
FileUtils.rm file_name if File.exists? file_name
|
228
|
+
end
|
229
|
+
|
230
|
+
end
|