docgen 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,50 @@
1
+ <%
2
+ encmap = {
3
+ 'UTF-8' => 'utf8x',
4
+ 'US-ASCII' => 'ascii',
5
+ 'ISO-8859-1' => 'latin1',
6
+ 'ISO-8859-2' => 'latin2',
7
+ 'ISO-8859-3' => 'latin3',
8
+ 'ISO-8859-4' => 'latin4',
9
+ 'ISO-8859-5' => 'latin5',
10
+ 'ISO-8859-9' => 'latin9',
11
+ 'ISO-8859-10' => 'latin10',
12
+ 'CP850' => 'cp850',
13
+ 'CP852' => 'cp852',
14
+ 'CP858' => 'cp858',
15
+ 'CP437' => 'cp437',
16
+ 'CP865' => 'cp865',
17
+ 'CP1250' => 'cp120',
18
+ 'CP1252' => 'cp1252',
19
+ 'CP1257' => 'cp1257'
20
+ }
21
+ %>
22
+ \documentclass{scrartcl}
23
+ <% if RUBY_VERSION >= '1.9' %>
24
+ \usepackage[<%= encmap[@body.encoding.name] %>]{inputenc}
25
+ <% else %>
26
+ \usepackage[mathletters]{ucs}
27
+ \usepackage[utf8x]{inputenc}
28
+ <% end %>
29
+ \usepackage[T1]{fontenc}
30
+ \usepackage{listings}
31
+ <% @converter.data[:packages].each {|pkg| %>\usepackage{<%= pkg %>}
32
+ <% } %>
33
+ \usepackage{hyperref}
34
+
35
+ <% if @converter.data[:packages].include?('fancyvrb') %>
36
+ \VerbatimFootnotes
37
+ <% end %>
38
+
39
+ <% if @converter.data[:packages].include?('acronym') %>
40
+ <% @converter.root.options[:abbrev_defs].each_pair do |k,v| %>\acrodef{<%= @converter.normalize_abbreviation_key(k) %>}[<%= k %>]{<%= @converter.escape(v) %>}
41
+ <% end %>
42
+ <% end %>
43
+
44
+ \setcounter{footnote}{<%= @converter.options[:footnote_nr] - 1 %>}
45
+
46
+ \hypersetup{colorlinks=true,urlcolor=blue}
47
+
48
+ \begin{document}
49
+ <%= @body %>
50
+ \end{document}
@@ -0,0 +1,25 @@
1
+ require 'sqlite3'
2
+
3
+ module Db
4
+
5
+ def connect
6
+ begin
7
+ @conn = Sequel.connect('sqlite://docgen')
8
+ @substitutions = @conn.from(:substitutions)
9
+ rescue Exeption => e
10
+ puts "Database connection error: #{e.message}"
11
+ end
12
+ end
13
+
14
+ def close
15
+ @conn.disconnect
16
+ @conn = nil
17
+ end
18
+
19
+ def substitution_text_for set_id, key
20
+ connect unless @conn
21
+ res = @substitutions.where(:set_id => set_id, :key => key).select(:value)
22
+ res.get(:value)
23
+ end
24
+
25
+ end
@@ -0,0 +1,58 @@
1
+ require "docgen/version"
2
+ require_relative "./db"
3
+ require_relative "./gen_text"
4
+ require_relative "./gen_html"
5
+ require_relative "./gen_latex"
6
+ require_relative "./gen_pdf"
7
+ require_relative "./settings"
8
+ require_relative "./zip_utils"
9
+ require_relative "./process_pptx"
10
+
11
+ module Docgen
12
+ include Db, Settings, ZipUtils
13
+
14
+ # Apply customizations to a complex file type such as pptx, xlsx, docx, odp, ods, odt).
15
+ # "other_args" will be interpreted differently by the processor for each file type.
16
+ def process document_set, file_type, file_path, *other_args
17
+ processor_class_name = "Process#{file_type.split('_').collect(&:capitalize).join}"
18
+ begin
19
+ processor = Object::const_get("#{processor_class_name}").new
20
+ rescue NameError => e
21
+ raise "Undefined processor class: #{processor_class_name}"
22
+ end
23
+ processor.process document_set, file_path, other_args
24
+ end
25
+
26
+ # Substitute custom values for text placeholders
27
+ def gen document_set, format_name, boilerplate, *template
28
+ content = apply_substitutions_to document_set, boilerplate
29
+ get_formatter(format_name).format content, template
30
+ end
31
+
32
+ private
33
+
34
+ def get_formatter format_name
35
+ formatter_class_name = "Gen#{format_name.split('_').collect(&:capitalize).join}"
36
+ begin
37
+ formatter = Object::const_get("#{formatter_class_name}").new
38
+ rescue NameError => e
39
+ raise "Unsupported output format: #{format_name}"
40
+ end
41
+ end
42
+
43
+ def apply_substitutions_to document_set, boilerplate
44
+ keys = boilerplate.scan(/(::.*?::)/m)
45
+ return boilerplate unless keys.any?
46
+ content = boilerplate
47
+ keys.flatten!
48
+ keys.each do |key|
49
+ content = content.gsub(key,lookup(document_set, key))
50
+ end
51
+ content
52
+ end
53
+
54
+ def lookup document_set, key
55
+ substitution_text_for document_set, key.gsub(/::/,'')
56
+ end
57
+
58
+ end
@@ -0,0 +1,3 @@
1
+ module Docgen
2
+ VERSION = "0.1.0"
3
+ end
@@ -0,0 +1,14 @@
1
+ require 'kramdown'
2
+
3
+ class GenHtml
4
+
5
+ def format content, *template
6
+ use_template = template.any? ? template[0][0] : nil
7
+ begin
8
+ Kramdown::Document.new(content, :template => use_template).to_html
9
+ rescue
10
+ puts 'in rescue block'
11
+ end
12
+ end
13
+
14
+ end
@@ -0,0 +1,15 @@
1
+ require 'kramdown'
2
+
3
+ class GenLatex
4
+
5
+ def format content, *template
6
+ use_template = template.any? ? template[0][0] : nil
7
+ begin
8
+ Kramdown::Document.new(content, :template => use_template).to_latex
9
+ rescue RuntimeError => e
10
+ puts "RuntimeError: #{e}"
11
+ raise e
12
+ end
13
+ end
14
+
15
+ end
@@ -0,0 +1,18 @@
1
+ require 'kramdown'
2
+ require 'prawn'
3
+ require "prawn/table"
4
+
5
+ class GenPdf
6
+
7
+ def format content, *template
8
+ use_template = template.any? ? template[0][0] : nil
9
+ # Suppress a warning message regarding UTF-8 font support
10
+ Prawn::Font::AFM.hide_m17n_warning = true
11
+ begin
12
+ Kramdown::Document.new(content, :template => use_template).to_pdf
13
+ rescue RuntimeError => e
14
+ raise e
15
+ end
16
+ end
17
+
18
+ end
@@ -0,0 +1,7 @@
1
+ class GenText
2
+
3
+ def format content, *template
4
+ content
5
+ end
6
+
7
+ end
@@ -0,0 +1,230 @@
1
+ require 'nokogiri'
2
+ require_relative "./docgen"
3
+ require_relative "./db"
4
+ require_relative "./settings"
5
+
6
+ # 1. Insert one or more sets of slides into the pptx_file, if specified.
7
+ # 2. Replace text placeholders with values for the document set, if any.
8
+ # 3. Replace the presentation theme, if one is specified.
9
+ class ProcessPptx
10
+ include Docgen, Db, Settings
11
+
12
+ PATH_TO_PPT = 'ppt'
13
+ PATH_TO_PPT_RELS = 'ppt/_rels'
14
+ PATH_TO_SLIDE_RELS = 'ppt/slides/_rels'
15
+ SLIDES_START_WITH = 'ppt/slides/slide'
16
+ PRESENTATION_XML = 'presentation.xml'
17
+ PRESENTATION_XML_RELS = 'presentation.xml.rels'
18
+
19
+ def process document_set, pptx_file, *other_args
20
+ parse_arguments other_args
21
+ initialize_work_files
22
+ begin
23
+ package = Zip::File.open(pptx_file)
24
+ insert_slides_in package, other_args[0][0] if @insert_slides
25
+ apply_text_substitutions_to_slides_in document_set, package
26
+ replace_presentation_theme_in( package, @template ) unless @template == nil
27
+ ensure
28
+ package.close
29
+ end
30
+ end
31
+
32
+ private
33
+
34
+ # other_args are arguments specific to pptx processing
35
+ #
36
+ # other_args may contain one or both of:
37
+ # - the path to a template file (potx or ppts) containing a theme
38
+ # - an array of SlideSet objects containing slides to be inserted
39
+ # into the pptx_file. Both those arguments are optional.
40
+ def parse_arguments other_args
41
+ @template = nil
42
+ @insert_slides = false
43
+ unless other_args.empty?
44
+ if other_args[0][0].is_a?(String)
45
+ @template = other_args.shift
46
+ end
47
+ unless other_args.empty?
48
+ if other_args[0][0].is_a?(Array) && other_args[0][0][0].is_a?(SlideSet)
49
+ @insert_slides = true
50
+ end
51
+ end
52
+ end
53
+ end
54
+
55
+ def insert_slides_in package, slide_sets
56
+ insert_slide_entries_in package, slide_sets
57
+ renumber_slides_after_insertion_in package
58
+ add_rels_entries_after_insertion_in package
59
+ update_presentation_rels_in package
60
+ update_presentation_xml_entry_in package
61
+ end
62
+
63
+ def insert_slide_entries_in package, slide_sets
64
+ @original_slide_count = package.entries.map(&:name).select{|i| i.start_with?(SLIDES_START_WITH)}.size
65
+ package.entries.map(&:name).select{|i| i.start_with?(SLIDES_START_WITH)}.sort.each do |original_entry_name|
66
+ doc = package.find_entry(original_entry_name)
67
+ original_slide = Nokogiri::XML.parse(doc.get_input_stream)
68
+ slide_sets.each do |slide_set|
69
+ pattern = Regexp.new(slide_set.name).freeze
70
+ if pattern.match?(original_slide)
71
+
72
+ slide_number = 0
73
+ slide_set.package.entries.map(&:name).select{|i| i.start_with?(SLIDES_START_WITH)}.sort.each do |entry_name|
74
+
75
+ # ppt/slide entries from the source package
76
+
77
+ extracted_entry_name = "#{original_entry_name.chomp('.xml')}_#{slide_number}.xml"
78
+ slide_set.package.extract(entry_name, "#{@tempdir}/#{extracted_entry_name}")
79
+ package.add("#{extracted_entry_name}", "#{@tempdir}/#{extracted_entry_name}")
80
+ slide_number += 1
81
+ end
82
+ end
83
+ end
84
+ end
85
+ @slide_count = package.entries.map(&:name).select{|i| i.start_with?(SLIDES_START_WITH)}.size
86
+ end
87
+
88
+ def renumber_slides_after_insertion_in package
89
+ slide_number = @slide_count
90
+ package.entries.map(&:name).select{|i| i.start_with?('ppt/slides/slide')}.sort.reverse_each do |modified_entry_name|
91
+ name_start = modified_entry_name.slice(0..(modified_entry_name.index(SLIDES_START_WITH) + SLIDES_START_WITH.length))
92
+ package.rename(modified_entry_name, "#{SLIDES_START_WITH}#{slide_number}.xml") unless package.find_entry("#{SLIDES_START_WITH}#{slide_number}.xml")
93
+ slide_number -= 1
94
+ end
95
+ end
96
+
97
+ # First attempt: Add ppt/slides/_rels/slideN.xml.rels with <Relatioship Id="rId1" ... Target="../slideLayouts/slideLayout1.xml">
98
+ # for all entries in ppt/slides. That is, copy the entry ppt/slides/_rels/slide1.xml.rels from the original deck for each new entry.
99
+ # This will probably not be accurate for complicated slide decks. All rels point to rId1 and slideLayout1
100
+ # in the simple test deck I made for purposes of this rspec example. Inserted slides don't automatically have a corresponding entry in
101
+ # ppt/slides/_rels. (Note to self: Extract this info to the project wiki once the details have been worked out.)
102
+ def add_rels_entries_after_insertion_in package
103
+ extracted_file_name = "#{@tempdir}/base_slide_rel_name"
104
+ slide_number = @slide_count
105
+ @slide_count.times do
106
+ remove_file extracted_file_name
107
+ base_slide_rel_name = "#{PATH_TO_SLIDE_RELS}/slide1.xml.rels"
108
+ package.extract base_slide_rel_name, extracted_file_name
109
+ slide_rel_entry_name = "#{PATH_TO_SLIDE_RELS}/slide#{slide_number}.xml.rels"
110
+ package.add slide_rel_entry_name, extracted_file_name unless package.find_entry slide_rel_entry_name
111
+ slide_number -= 1
112
+ end
113
+ end
114
+
115
+ # Add elements in ppt/_rels/presentation.xml.rels for the inserted slides
116
+ # Increment Id value and slide number from the last Relationship node for a slide
117
+ def update_presentation_rels_in package
118
+ rels_entry_name = "#{PATH_TO_PPT_RELS}/#{PRESENTATION_XML_RELS}"
119
+ temp_file_name = "#{@tempdir}/#{PRESENTATION_XML_RELS}"
120
+ remove_file temp_file_name
121
+ package.extract rels_entry_name, temp_file_name
122
+
123
+ raw_text = get_text_from temp_file_name
124
+ # Nokogiri can't handle the xmlns attribute on the Relationships node
125
+ raw_text.gsub!(/xmlns/,'snlmx')
126
+ xml_doc = xml_doc_from raw_text
127
+ last_relationship = xml_doc.xpath('/Relationships/Relationship[starts-with(@Target, "slides/slide")]').last
128
+ last_id_value = last_relationship['Id'] # Id value looks like 'rId8' (ugh!)
129
+ last_id_value = last_id_value[3..last_id_value.length].to_i
130
+ last_slide_number = last_relationship['Target'].gsub(/slides\/slide/,'').gsub(/.xml/,'').to_i
131
+ rels_type = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/slide"
132
+
133
+ # Add a Relationship element for each new slide inserted into the deck.
134
+ rels_count = @slide_count - @original_slide_count
135
+ slide_number = @original_slide_count + 1
136
+ rels_count.times do
137
+ last_id_value += 1
138
+ last_id_value_str = 'rId' + last_id_value.to_s
139
+ rels_node = Nokogiri::XML::Node.new('Relationship',xml_doc)
140
+ rels_node['Id'] = last_id_value_str
141
+ rels_node['Type'] = rels_type
142
+ rels_node['Target'] = "slides/slide#{slide_number.to_s}.xml"
143
+ xml_doc.xpath('//Relationships/Relationship').last.add_next_sibling(rels_node)
144
+ slide_number += 1
145
+ end
146
+ updated_xml = xml_doc.to_s.gsub(/snlmx/,'xmlns') # reverse the workaround for Nokogiri
147
+ remove_file temp_file_name
148
+ new_rels_xml = File.open(temp_file_name, "w")
149
+ new_rels_xml.puts updated_xml
150
+ new_rels_xml.close
151
+ package.replace rels_entry_name, temp_file_name
152
+ end
153
+
154
+ # Add elements to ppt/presentation.xml for the inserted slides
155
+ def update_presentation_xml_entry_in package
156
+ presentation_entry_name = "#{PATH_TO_PPT}/#{PRESENTATION_XML}"
157
+ temp_file_name = "#{@tempdir}/#{PRESENTATION_XML}"
158
+ remove_file temp_file_name
159
+ package.extract presentation_entry_name, temp_file_name
160
+ raw_text = get_text_from temp_file_name
161
+ modified_text = raw_text.gsub(/:/,'__') # workaround - namespaces not declared in presentation.xml
162
+ xml_doc = xml_doc_from modified_text
163
+
164
+ # Start with the last sldId element in the document
165
+ last_sldId_element = xml_doc.xpath("//p__presentation/p__sldIdLst").last.last_element_child
166
+ last_sldId_id_value = last_sldId_element['id'].to_i
167
+ last_sldId_rid_value = last_sldId_element['r__id'] # r:id value looks like 'rId8' (ugh!)
168
+ last_sldId_rid_value = last_sldId_rid_value[3..last_sldId_rid_value.length].to_i
169
+
170
+ # Add a p:sldId element for each new slide inserted into the deck.
171
+ sldId_count = @slide_count - @original_slide_count
172
+ slide_number = @original_slide_count + 1
173
+ sldId_count.times do
174
+ # increment the id values and create a new child element under p:presentation/p:sldIdLst
175
+ last_sldId_id_value += 1
176
+ last_sldId_rid_value += 1
177
+ sldId_rid_value_str = 'rId' + last_sldId_rid_value.to_s
178
+ sldId_node = Nokogiri::XML::Node.new('p__sldId',xml_doc)
179
+ sldId_node['id'] = last_sldId_id_value.to_s
180
+ sldId_node['r__id'] = sldId_rid_value_str
181
+ xml_doc.xpath("//p__presentation/p__sldIdLst")[0] << sldId_node
182
+ slide_number += 1
183
+ end
184
+
185
+ updated_xml = xml_doc.to_s.gsub(/__/,':') # reverse the workaround for namespace prefixes
186
+ remove_file temp_file_name
187
+ new_presentation_xml = File.open(temp_file_name, "w")
188
+ new_presentation_xml.puts updated_xml
189
+ new_presentation_xml.close
190
+ package.replace presentation_entry_name, temp_file_name
191
+ end
192
+
193
+ def apply_text_substitutions_to_slides_in document_set, package
194
+ package.entries.map(&:name).select{|i| i.start_with?('ppt/slides/slide')}.each do |entry|
195
+ doc = package.find_entry(entry)
196
+ original_slide = Nokogiri::XML.parse(doc.get_input_stream)
197
+ modified_slide = gen document_set, 'text', original_slide.to_s
198
+ package.get_output_stream(entry) { |f| f << modified_slide.to_s }
199
+ end
200
+ end
201
+
202
+ def replace_presentation_theme_in package, template
203
+ theme_entry_name = 'ppt/theme/theme1.xml'
204
+ theme_source = Zip::File.open(template[0])
205
+ replacement_theme_entry = theme_source.find_entry(theme_entry_name)
206
+ replacement_theme = Nokogiri::XML.parse(replacement_theme_entry.get_input_stream)
207
+ original_theme = package.find_entry(theme_entry_name)
208
+ package.get_output_stream(original_theme) { |f| f << replacement_theme.to_s }
209
+ theme_source.close
210
+ end
211
+
212
+ def get_text_from file_name
213
+ IO.read(file_name).gsub(/\n/,'')
214
+ end
215
+
216
+ def xml_doc_from text
217
+ Nokogiri::XML(text) { |config| config.strict }
218
+ end
219
+
220
+ def initialize_work_files
221
+ @tempdir = settings 'ziptemp'
222
+ FileUtils.rm_rf "#{@tempdir}"
223
+ FileUtils.mkdir_p "#{@tempdir}/#{PATH_TO_SLIDE_RELS}"
224
+ end
225
+
226
+ def remove_file file_name
227
+ FileUtils.rm file_name if File.exists? file_name
228
+ end
229
+
230
+ end
@@ -0,0 +1,13 @@
1
+ require 'yaml'
2
+
3
+ module Settings
4
+
5
+ def initialize
6
+ @settings = YAML.load_file('settings.yml')
7
+ end
8
+
9
+ def settings name
10
+ @settings[name]
11
+ end
12
+
13
+ end