odt2html 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,329 @@
1
+ module ODT2HTML
2
+ module AnalyzeContent
3
+
4
+ def analyze_content_xml
5
+ #
6
+ # Get the namespaces from the root element; populate the
7
+ # dynamic instance variable names and the namespace hash from them.
8
+ #
9
+ get_namespaces
10
+
11
+ create_dispatch_table
12
+
13
+ # handle style:style elements
14
+ @doc.root.elements.each(
15
+ "#{@office_ns}:automatic-styles/#{@style_ns}:style") do |el|
16
+ process_style_style( el )
17
+ end
18
+
19
+ # handle text:list-style elements
20
+ @doc.root.elements.each(
21
+ "#{@office_ns}:automatic-styles/#{@text_ns}:list-style") do |el|
22
+ process_text_list_style( el )
23
+ end
24
+
25
+ @doc.root.elements.each(
26
+ "#{@office_ns}:body/#{@office_ns}:text") do |item|
27
+ process_children( item, @body )
28
+ end
29
+
30
+ end
31
+
32
+ # Process an element's children
33
+ # node: the context node
34
+ # output_node: the node to which to add the children
35
+ # xpath_expr: which children to process (default is all)
36
+ #
37
+ # Algorithm:
38
+ # If the node is a text node, output to the destination.
39
+ # If it's an element, munge its name into
40
+ # <tt>process_prefix_elementname</tt>. If that method exists,
41
+ # call it to handle the element. Otherwise, process this node's
42
+ # children recursively.
43
+ #
44
+ def process_children( node, output_node, xpath_expr="node()" )
45
+ REXML::XPath.each( node, xpath_expr ) do |item|
46
+ if (item.kind_of?(REXML::Element)) then
47
+ str = "process_" + @namespace_urn[item.namespace] + "_" +
48
+ item.name.tr_s(":-", "__")
49
+ if self.class.method_defined?( str ) then
50
+ self.send( str, item, output_node )
51
+ else
52
+ process_children(item, output_node)
53
+ end
54
+ elsif (item.kind_of?(REXML::Text) && !item.value.match(/^\s*$/))
55
+ output_node.add_text(item.value)
56
+ end
57
+ end
58
+ #
59
+ # If it's empty, add a null string to force a begin and end
60
+ # tag to be generated
61
+ if (!output_node.has_elements? && !output_node.has_text?) then
62
+ output_node.add_text("")
63
+ end
64
+ end
65
+
66
+ #
67
+ # Paragraphs are processed as <tt>&lt;p&gt;</tt> elements.
68
+ # / This is no longer valid
69
+ # A <tt>&lt;text:p&gt;</tt> with no children will generate
70
+ # a <tt>&lt;br /&gt;</tt>.
71
+ def process_text_p( element, output_node )
72
+ style_name = register_style( element )
73
+
74
+ # always include class attribute
75
+ attr_hash = {"class" => style_name}
76
+
77
+ # If this paragraph has the same style as the previous one,
78
+ # and a top border, and doesn't have style:join-border set to false
79
+ # then eliminate the top border to merge it with previous paragraph
80
+ if (style_name != nil && @previous_para_style == style_name) then
81
+ if (@style_info[style_name].has_top_border? &&
82
+ element.attribute_value("#{@style_ns}:join-border") !=
83
+ false) then
84
+ attr_hash["style"] = "border-top: none"
85
+ modify_style_attribute( @previous_para,
86
+ "border-bottom", "none")
87
+ end
88
+ end
89
+ para = emit_element( output_node, "p", attr_hash )
90
+ @previous_para_style = style_name
91
+ @previous_para = para
92
+ if (element.has_elements? || element.has_text?) then
93
+ process_children( element, para )
94
+ # else
95
+ # para.add_element("br")
96
+ end
97
+ end
98
+
99
+ #
100
+ # Headings are processed as <tt>&lt;h<i>n</i>&gt;</tt> elements.
101
+ # The heading level comes from the <tt>text:outline-level</tt>
102
+ # attribute, with a maximum of 6.
103
+ def process_text_h( element, output_node )
104
+ style_name = register_style( element )
105
+ level = element.attribute("#{@text_ns}:outline-level").value.to_i
106
+ if (level > 6) then
107
+ level = 6
108
+ end
109
+ heading = emit_element( output_node, "h" + level.to_s, {"class" => style_name} )
110
+ process_children( element, heading )
111
+ end
112
+
113
+ # Text spans cannot produce a newline after their
114
+ # opening tag, so the extra <tt>""</tt> parameter is
115
+ # passed to <tt>emit_start_tag</tt>
116
+ def process_text_span( element, output_node )
117
+ style_name = register_style( element )
118
+ span = emit_element( output_node, "span", {"class" => style_name} )
119
+ process_children( element, span )
120
+ end
121
+
122
+ def process_text_tab( element, output_node )
123
+ output_node.add_text( " " )
124
+ end
125
+
126
+ def process_text_s( element, output_node )
127
+ output_node.add_text( " " )
128
+ end
129
+
130
+ def process_text_a( element, output_node )
131
+ style_name = register_style( element )
132
+ href = element.attribute("#{@xlink_ns}:href").value
133
+ link = emit_element( output_node, "a",
134
+ {"class" => style_name, "href" => href} )
135
+ process_children( element, link )
136
+ end
137
+
138
+ def process_text_line_break( element, output_node )
139
+ br = emit_element( output_node, "br" )
140
+ end
141
+
142
+ def process_text_bookmark( element, output_node )
143
+ process_text_bookmark_start( element, output_node )
144
+ end
145
+
146
+ def process_text_bookmark_start( element, output_node )
147
+ style_name = register_style( element )
148
+ the_name = element.attribute("#{@text_ns}:name").value;
149
+ anchor = emit_element( output_node, "a",
150
+ {"class" => style_name, "name" => the_name} )
151
+ anchor.add_text("");
152
+ end
153
+
154
+ def process_text_list( element, output_node )
155
+ # determine the level
156
+ tag = "ul"
157
+ level = REXML::XPath.match( element, "ancestor::#{@text_ns}:list" ).size + 1
158
+ if (level == 1) then
159
+ style_name = element.attribute("#{@text_ns}:style-name")
160
+ else
161
+ style_name = REXML::XPath.match( element,
162
+ "ancestor::#{@text_ns}:list[last()]/@#{@text_ns}:style-name" )[0]
163
+ end
164
+
165
+ if (style_name != nil) then
166
+ style_name = style_name.value + "_" + level.to_s
167
+ style_name = style_name.tr_s('.','_')
168
+ @style_info[style_name].block_used = true
169
+
170
+ #
171
+ # Determine if this is a numbered or bulleted list
172
+ found = @style_info[style_name].find { |obj|
173
+ obj.property == "list-style-type" }
174
+ if (found) then
175
+ if (!found.value.match(/disc|circle|square/)) then
176
+ tag="ol"
177
+ end
178
+ end
179
+ end
180
+ list_el = emit_element( output_node, tag, {"class" => style_name} )
181
+ process_children(element, list_el)
182
+ end
183
+
184
+ #
185
+ # List items are easy; just put the children inside
186
+ # a <tt>&lt;li&gt;</tt> <tt>&lt;/li&gt;</tt> pair.
187
+ #
188
+ def process_text_list_item( element, output_node )
189
+ style_name = register_style( element )
190
+ item = emit_element( output_node, "li", {"class" => style_name} )
191
+ process_children( element, item )
192
+ end
193
+
194
+ def process_table_table( element, output_node )
195
+ style_name = register_style( element );
196
+ table_el = emit_element(output_node, "table", {"class" => style_name,
197
+ "cellpadding" => "0", "cellspacing" => "0"} )
198
+ process_children( element, table_el, "#{@table_ns}:table-column" )
199
+ if (REXML::XPath.match( element, "#{@table_ns}:table-header-rows" )) then
200
+ thead = emit_element( table_el, "thead" )
201
+ process_children( element, thead, "#{@table_ns}:table-header-rows/#{@table_ns}:table-row" )
202
+ end
203
+ tbody = emit_element( table_el, "tbody" )
204
+ process_children( element, tbody, "#{@table_ns}:table-row" )
205
+ end
206
+
207
+ def process_table_table_column( element, output_node )
208
+ style_name = register_style(element)
209
+ span = element.attribute("#{@table_ns}:number-columns-repeated")
210
+ if (span != nil) then
211
+ span = span.value
212
+ end
213
+ emit_element( output_node, "col", {"class" => style_name, "span" => span} )
214
+ end
215
+
216
+ def process_table_table_row( element, output_node )
217
+ style_name = register_style( element );
218
+ tr = emit_element( output_node, "tr", {"class" => style_name} )
219
+ process_children( element, tr, "#{@table_ns}:table-cell" )
220
+ end
221
+
222
+ def process_table_table_cell( element, output_node )
223
+ attr_hash = Hash.new
224
+ style_name = register_style( element );
225
+ if (style_name != nil) then
226
+ attr_hash["class"] = style_name
227
+ end
228
+ repeat = 1;
229
+ attr = element.attribute("#{@table_ns}:number-columns-repeated")
230
+ if (attr != nil) then
231
+ repeat = attr.value.to_i
232
+ end
233
+ attr = element.attribute("#{@table_ns}:number-columns-spanned")
234
+ if (attr != nil) then
235
+ attr_hash["colspan"] = attr.value
236
+ end
237
+ attr = element.attribute("#{@table_ns}:number-rows-spanned")
238
+ if (attr != nil) then
239
+ attr_hash["rowspan"] = attr.value
240
+ end
241
+ (1..repeat).each do |i|
242
+ td = emit_element( output_node, "td", attr_hash )
243
+ process_children( element, td )
244
+ end
245
+ end
246
+
247
+ #
248
+ # Return the style name for this element, with periods
249
+ # changed to underscores to make it valid CSS.
250
+ #
251
+ # Side effect: registers this style as "having been used"
252
+ # in the document
253
+ #
254
+ def register_style( element )
255
+ # get namespace prefix for this element
256
+ style_name = element.attribute("#{element.prefix}:style-name");
257
+ if (style_name != nil) then
258
+ style_name = style_name.value.tr_s('.','_')
259
+ if (@style_info[style_name] != nil) then
260
+ @style_info[style_name].block_used = true
261
+ end
262
+ end
263
+ return style_name
264
+ end
265
+
266
+ #
267
+ # Create styles for each level of a <tt>&lt;text:list-style&gt;</tt>
268
+ # element. For bulleted lists, it sets the bullet type by indexing
269
+ # into the <tt>marker</tt> array; for numbered lists, it uses the
270
+ # <tt>numbering</tt> hash to translate OpenDocument's
271
+ # <tt>style:num-format</tt> to the corresponding CSS
272
+ # <tt>list-style-type</tt>.
273
+ #
274
+ def process_text_list_style( element )
275
+ marker = ["circle", "disc", "square"];
276
+ numbering = {"1" => "decimal",
277
+ "a" => "lower-alpha", "A" => "upper-alpha",
278
+ "i" => "lower-roman", "I" => "upper-roman" }
279
+
280
+ main_name = element.attribute( "#{@style_ns}:name" ).value
281
+ element.elements.each do |child|
282
+ level = child.attribute("#{@text_ns}:level").value
283
+ selector = main_name + "_" + level
284
+
285
+ if (child.name == "list-level-style-bullet")
286
+ process_normal_style_attr( selector, "list-style-type",
287
+ marker[(level.to_i-1)%3] )
288
+ elsif (child.name == "list-level-style-number")
289
+ process_normal_style_attr( selector, "list-style-type",
290
+ numbering[child.attribute("#{@style_ns}:num-format").value] )
291
+ end
292
+ end
293
+ end
294
+
295
+ #
296
+ # Emit an element with the given <tt>element_name</tt> and
297
+ # <tt>attr_hash</tt> (as attributes) as a child of the
298
+ # <tt>output_node</tt>
299
+ def emit_element( output_node, element_name, attr_hash=nil )
300
+ if (attr_hash != nil) then
301
+ attr_hash.each do |key, value|
302
+ if (value == nil) then
303
+ attr_hash.delete( key )
304
+ end
305
+ end
306
+ if attr_hash.empty? then
307
+ attr_hash = nil
308
+ end
309
+ end
310
+ output_node.add_element( element_name, attr_hash )
311
+ end
312
+
313
+ #
314
+ # Modify the style attribute of <tt>output_element</tt> by adding
315
+ # the given <tt>property</tt> and <tt>value</tt>
316
+ #
317
+ # Algorithm:
318
+ # If there's no style attribute, create it.
319
+ # If it exists, look for the property.
320
+ # If the property doesn't exist, add it and its value
321
+ # If it does exist,
322
+ def modify_style_attribute( output_element, property, value )
323
+ current = output_element.attribute("style")
324
+ new_value = (current != nil) ? current.value + ";" : ""
325
+ new_value += "#{property}:#{value}"
326
+ output_element.attributes["style"] = new_value
327
+ end
328
+ end
329
+ end
@@ -0,0 +1,69 @@
1
+ module ODT2HTML
2
+ module AnalyzeGraphics
3
+
4
+ def process_draw_frame( element, output_node )
5
+ style_name = register_style( element );
6
+ div = emit_element( output_node, "div", {"class" => style_name} )
7
+ attr = element.attribute("#{@svg_ns}:width")
8
+ if (attr != nil) then
9
+ modify_style_attribute( div, "width", attr.value )
10
+ end
11
+ attr = element.attribute("#{@svg_ns}:height")
12
+ if (attr != nil) then
13
+ modify_style_attribute( div, "height", attr.value )
14
+ end
15
+ process_children( element, div )
16
+ end
17
+
18
+ #
19
+ # Copy an image into user-specified directory, and emit
20
+ # a corresponding <tt>&lt;img&gt;</tt> element.
21
+ #
22
+ # If the user has not specified an image directory,
23
+ # then emit a <tt>&lt;div&gt;</tt> containing the
24
+ # file name.
25
+ def process_draw_image( element, output_node )
26
+ pic_name = element.attribute("#{@xlink_ns}:href").value
27
+ if (@image_dir != nil) then
28
+ img = emit_element( output_node, "img" )
29
+ img.attributes["alt"] = pic_name
30
+
31
+ # Get rid of everything before the last / in the filename
32
+ base_name = pic_name;
33
+ if ((pos = base_name.rindex('/')) != nil) then
34
+ base_name = base_name[pos + 1 .. -1]
35
+ end
36
+ copy_image_file( pic_name, @image_dir, base_name )
37
+ img.attributes["src"] = "#{@image_dir}/#{base_name}"
38
+ width = element.parent.attribute("#{@svg_ns}:width")
39
+ height= element.parent.attribute("#{@svg_ns}:height")
40
+ if (width != nil && height != nil) then
41
+ img.attributes["style"] = "width:#{width.value}; " +
42
+ "height:#{height.value}"
43
+ end
44
+ else
45
+ div = emit_element( output_node, "div" )
46
+ div.add_text( pic_name )
47
+ end
48
+ end
49
+
50
+ def copy_image_file( pic_name, directory, filename )
51
+ zipfile = Zip::ZipFile::open( @input_filename )
52
+ inStream = zipfile.get_entry( pic_name )
53
+ if (inStream != nil) then
54
+ inStream = inStream.get_input_stream
55
+ outStream = File.new("#{directory}#{File::SEPARATOR}#{filename}", "w")
56
+ outStream.binmode
57
+ buf = inStream.read
58
+ outStream.print buf
59
+ outStream.close
60
+ inStream.close
61
+ end
62
+ zipfile.close
63
+ rescue Exception => e
64
+ #
65
+ # Uncomment next line if you want error output
66
+ # $stderr.puts "Could not find image #{pic_name}"
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,162 @@
1
+ module ODT2HTML
2
+ module AnalyzeStyles
3
+
4
+ def analyze_styles_xml
5
+
6
+ #
7
+ # Get the namespaces from the root element; populate the
8
+ # dynamic instance variable names and the namespace hash from them.
9
+ #
10
+ get_namespaces
11
+
12
+ create_dispatch_table
13
+
14
+ # handle default styles; attach to the body
15
+ @doc.root.elements.each(
16
+ "#{@office_ns}:styles/#{@style_ns}:default-style") do |el|
17
+ if (el.attribute("#{@style_ns}:family").value == "paragraph") then
18
+ process_style( "body",
19
+ el.elements["#{@style_ns}:paragraph-properties"])
20
+ process_style( "body",
21
+ el.elements["#{@style_ns}:text-properties"])
22
+ end
23
+ end
24
+
25
+ @doc.root.elements.each(
26
+ "#{@office_ns}:styles/#{@style_ns}:style") do |el|
27
+ process_style_style( el )
28
+ end
29
+
30
+ @doc.root.elements.each(
31
+ "#{@office_ns}:styles/#{@text_ns}:list-style") do |el|
32
+ process_text_list_style( el )
33
+ end
34
+
35
+ end
36
+
37
+ #
38
+ # Create the <tt>@style_dispatch</tt> hash by substituting the
39
+ # <tt>@valid_style</tt> array entries with their appropriate prefix
40
+ #
41
+ def create_dispatch_table
42
+ i = 0;
43
+ while (i < @valid_style.length) do
44
+ style_name = @valid_style[i].sub(/^([^:]+)/) { |pfx|
45
+ @nshash[pfx]
46
+ }
47
+ if (@valid_style[i].index("*") != nil) then
48
+ style_name = style_name.sub(/.$/, "" )
49
+ @style_dispatch[style_name] = @valid_style[i+1]
50
+ i+=1
51
+ else
52
+ @style_dispatch[style_name] = "process_normal_style_attr"
53
+ end
54
+ i+=1
55
+ end
56
+ end
57
+
58
+ #
59
+ # Handle a <style:foo-properties> element
60
+ #
61
+ def process_style( class_name, style_element )
62
+ if (style_element != nil) then
63
+ style_element.attributes.each_attribute do |attr|
64
+ if (@style_dispatch.has_key?(attr.expanded_name)) then
65
+ self.send( @style_dispatch[attr.expanded_name], class_name,
66
+ attr.name, attr.value )
67
+ end
68
+ end
69
+ end
70
+ end
71
+
72
+ #
73
+ # Handle a <style:style> element
74
+ #
75
+ def process_style_style( element )
76
+ style_name = element.attribute("#{@style_ns}:name").value.gsub(/\./, "_");
77
+ parent_name = element.attribute("#{@style_ns}:parent-style-name");
78
+ if (parent_name) then
79
+ parent_name = parent_name.value.gsub(/\./,"_")
80
+ if (@style_info[parent_name]) then
81
+ @style_info[style_name] = DeclarationBlock.new(
82
+ @style_info[parent_name] )
83
+ end
84
+ elsif (@style_info[style_name] == nil) then
85
+ @style_info[style_name] = DeclarationBlock.new( )
86
+ end
87
+
88
+ element.elements.each do |child|
89
+ process_style( style_name, child )
90
+ end
91
+ end
92
+
93
+ # The font-name attribute changes to font-family in CSS
94
+ def process_font_name( selector, property, value )
95
+ process_normal_style_attr(selector, "font-family", value)
96
+ end
97
+
98
+ # <tt>text-align:end</tt> becomes <tt>text-align:right</tt>
99
+ # and <tt>text-align:start</tt> becomes <tt>text-align:left</tt>
100
+ # in CSS.
101
+ def process_text_align( selector, property, value )
102
+ value = "right" if (value == "end")
103
+ value = "left" if (value == "start")
104
+ process_normal_style_attr( selector, property, value )
105
+ end
106
+
107
+ # <tt>style:column-width</tt> becomes <tt>width</tt>
108
+ #
109
+ def process_column_width( selector, property, value )
110
+ process_normal_style_attr( selector, "width", value )
111
+ end
112
+
113
+ # <tt>style:text-underline-style</tt> becomes <tt>text-decoration</tt>
114
+ def process_underline_style( selector, property, value )
115
+ process_normal_style_attr( selector, "text-decoration",
116
+ (value == "none") ? "none" : "underline" )
117
+ end
118
+
119
+ #
120
+ # The <tt>style:text-position</tt> attribute gives whitespace-separated
121
+ # distance above or below baseline and a scaling factor as percentages.
122
+ # If the distance is not 0%, then we have to process as sup/sub;
123
+ # otherwise, don't touch.
124
+ def process_style_text_position( selector, property, value )
125
+ data = value.split(' ')
126
+ if (data[0] != "0%") then
127
+ process_normal_style_attr( selector, "vertical-align", data[0] )
128
+ process_normal_style_attr( selector, "font-size", data[1] )
129
+ end
130
+ end
131
+
132
+ #
133
+ # If the style hasn't been registered yet, create a new array
134
+ # with the style property and value.
135
+ #
136
+ # If the style has been registered, and the property name is a duplicate,
137
+ # supplant the old property value with the new one.
138
+ #
139
+ # If the style has been registered, and the property is a new one,
140
+ # push the property and value onto the array.
141
+ #
142
+ def process_normal_style_attr( selector, property, value )
143
+ if (@style_info[selector] == nil) then
144
+ @style_info[selector] = DeclarationBlock.new( )
145
+ @style_info[selector].push Declaration.new(property, value)
146
+ else
147
+ found = @style_info[selector].find { |obj|
148
+ obj.property == property }
149
+ if (found != nil) then
150
+ found.value = value
151
+ else
152
+ @style_info[selector].push Declaration.new(property, value)
153
+ end
154
+ end
155
+ end
156
+
157
+ def style_to_s( selector )
158
+ str = "." + selector + @style_info[selector].to_s
159
+ return str
160
+ end
161
+ end
162
+ end