odt2html 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +1 -0
- data/CHANGELOG.md +24 -0
- data/Gemfile +2 -0
- data/LICENSE.txt +504 -0
- data/README.md +21 -0
- data/Rakefile +11 -0
- data/bin/odt2html +9 -0
- data/lib/odt2html/analyze_content.rb +329 -0
- data/lib/odt2html/analyze_graphics.rb +69 -0
- data/lib/odt2html/analyze_styles.rb +162 -0
- data/lib/odt2html/base.rb +238 -0
- data/lib/odt2html/declaration.rb +15 -0
- data/lib/odt2html/declaration_block.rb +35 -0
- data/lib/odt2html.rb +26 -0
- data/odt2html.gemspec +22 -0
- data/spec/fixtures/example.html +117 -0
- data/spec/fixtures/example.odt +0 -0
- data/spec/integration/files_spec.rb +24 -0
- data/spec/spec_helper.rb +4 -0
- metadata +118 -0
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
module ODT2HTML
|
|
2
|
+
class Base
|
|
3
|
+
include AnalyzeContent
|
|
4
|
+
include AnalyzeGraphics
|
|
5
|
+
include AnalyzeStyles
|
|
6
|
+
|
|
7
|
+
def initialize( )
|
|
8
|
+
|
|
9
|
+
@@debug = 0
|
|
10
|
+
|
|
11
|
+
@doc = nil
|
|
12
|
+
@input_filename = nil
|
|
13
|
+
|
|
14
|
+
@output_filename = nil
|
|
15
|
+
@output_doc = nil
|
|
16
|
+
|
|
17
|
+
@head = nil
|
|
18
|
+
@body = nil
|
|
19
|
+
|
|
20
|
+
@css_filename = nil
|
|
21
|
+
|
|
22
|
+
@image_dir = nil
|
|
23
|
+
|
|
24
|
+
@namespace_urn = {
|
|
25
|
+
"urn:oasis:names:tc:opendocument:xmlns:office:1.0"=>"office",
|
|
26
|
+
"urn:oasis:names:tc:opendocument:xmlns:style:1.0"=>"style",
|
|
27
|
+
"urn:oasis:names:tc:opendocument:xmlns:text:1.0"=>"text",
|
|
28
|
+
"urn:oasis:names:tc:opendocument:xmlns:table:1.0"=>"table",
|
|
29
|
+
"urn:oasis:names:tc:opendocument:xmlns:drawing:1.0"=>"draw",
|
|
30
|
+
"urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0"=>"fo",
|
|
31
|
+
"http://www.w3.org/1999/xlink"=>"xlink",
|
|
32
|
+
"http://purl.org/dc/elements/1.1/"=>"dc",
|
|
33
|
+
"urn:oasis:names:tc:opendocument:xmlns:meta:1.0"=>"meta",
|
|
34
|
+
"urn:oasis:names:tc:opendocument:xmlns:datastyle:1.0"=>"number",
|
|
35
|
+
"urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0"=>"svg",
|
|
36
|
+
"urn:oasis:names:tc:opendocument:xmlns:chart:1.0"=>"chart",
|
|
37
|
+
"urn:oasis:names:tc:opendocument:xmlns:dr3d:1.0"=>"dr3d",
|
|
38
|
+
"http://www.w3.org/1998/Math/MathML"=>"math",
|
|
39
|
+
"urn:oasis:names:tc:opendocument:xmlns:form:1.0"=>"form",
|
|
40
|
+
"urn:oasis:names:tc:opendocument:xmlns:script:1.0"=>"script",
|
|
41
|
+
"http://openoffice.org/2004/office"=>"ooo",
|
|
42
|
+
"http://openoffice.org/2004/writer"=>"ooow",
|
|
43
|
+
"http://openoffice.org/2004/calc"=>"oooc",
|
|
44
|
+
"http://www.w3.org/2001/xml-events"=>"dom"
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
#
|
|
48
|
+
# These are the "canonical forms" of the styles we want to process.
|
|
49
|
+
# when we get the namespaces, we'll push them into the @style_dispatch
|
|
50
|
+
# hash. If a style name ends with a *, the next entry is the name of
|
|
51
|
+
# a method that handles that entry. Otherwise, process_normal_style_attr
|
|
52
|
+
# gets put into @style_dispatch
|
|
53
|
+
#
|
|
54
|
+
@valid_style = %w(
|
|
55
|
+
style:font-name* process_font_name
|
|
56
|
+
fo:color
|
|
57
|
+
fo:background-color
|
|
58
|
+
fo:font-size
|
|
59
|
+
fo:font-style
|
|
60
|
+
fo:font-weight
|
|
61
|
+
fo:margin-top
|
|
62
|
+
fo:margin-right
|
|
63
|
+
fo:margin-bottom
|
|
64
|
+
fo:margin-left
|
|
65
|
+
fo:margin
|
|
66
|
+
fo:padding-top fo:padding-right fo:padding-bottom fo:padding-left
|
|
67
|
+
fo:padding
|
|
68
|
+
fo:border-top fo:border-right fo:border-bottom fo:border-left
|
|
69
|
+
fo:border
|
|
70
|
+
fo:text-align* process_text_align
|
|
71
|
+
fo:text-indent
|
|
72
|
+
style:column-width* process_column_width
|
|
73
|
+
style:text-underline-style* process_underline_style
|
|
74
|
+
style:text-position* process_style_text_position
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
# The style dispatch hash's key is a style name;
|
|
78
|
+
# the value is the name of the function to call to
|
|
79
|
+
# process that style.
|
|
80
|
+
@style_dispatch = Hash.new
|
|
81
|
+
|
|
82
|
+
# The keys for <tt>@nshash</tt> are canonical namespace names;
|
|
83
|
+
# the values are the actual namespace prefixes used in the
|
|
84
|
+
# document being processed.
|
|
85
|
+
@nshash = Hash.new
|
|
86
|
+
|
|
87
|
+
# The <tt>@style_info</tt> hash gives a style name as its key;
|
|
88
|
+
# the value is a <tt>DeclarationBlock</tt>. When a style is
|
|
89
|
+
# actually used in the document, we set the style's
|
|
90
|
+
# <tt>@block_used</tt> property to <tt>true</tt>.
|
|
91
|
+
#
|
|
92
|
+
@style_info = Hash.new
|
|
93
|
+
|
|
94
|
+
#
|
|
95
|
+
# Paragraphs merge borders by default; this means we
|
|
96
|
+
# must remember the last paragraph style emitted
|
|
97
|
+
# and a reference to the paragraph
|
|
98
|
+
@previous_para_style = nil
|
|
99
|
+
@previous_para = nil
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
#
|
|
103
|
+
# Establish a mapping between "standard" namespaces (in @namespace_urn)
|
|
104
|
+
# and namespace prefixes used in the document at hand.
|
|
105
|
+
#
|
|
106
|
+
# This code dynamically creates instance variables for the namespaces
|
|
107
|
+
# with "_ns" added to the variable name to avoid collisions.
|
|
108
|
+
# It is also added to the namespace hash <tt>@nshash</tt>
|
|
109
|
+
#
|
|
110
|
+
# The technique comes from a post to comp.lang.ruby by Guy Decoux
|
|
111
|
+
#
|
|
112
|
+
def get_namespaces
|
|
113
|
+
@nshash.clear
|
|
114
|
+
root_element = @doc.root
|
|
115
|
+
root_element.attributes.each_attribute do |attr|
|
|
116
|
+
if @namespace_urn.has_key?( attr.value ) then
|
|
117
|
+
@nshash[@namespace_urn[attr.value]] = attr.name
|
|
118
|
+
self.class.send(:attr_accessor, @namespace_urn[attr.value] + "_ns")
|
|
119
|
+
send("#{@namespace_urn[attr.value]+'_ns'}=", attr.name)
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
def get_options
|
|
125
|
+
opts = GetoptLong.new(
|
|
126
|
+
["--in", GetoptLong::REQUIRED_ARGUMENT],
|
|
127
|
+
["--out", GetoptLong::OPTIONAL_ARGUMENT],
|
|
128
|
+
["--css", GetoptLong::REQUIRED_ARGUMENT],
|
|
129
|
+
["--images", GetoptLong::REQUIRED_ARGUMENT]
|
|
130
|
+
)
|
|
131
|
+
opts.each do |opt, arg|
|
|
132
|
+
case opt
|
|
133
|
+
when "--in"
|
|
134
|
+
@input_filename = arg
|
|
135
|
+
when "--out"
|
|
136
|
+
@output_filename = arg
|
|
137
|
+
when "--css"
|
|
138
|
+
@css_filename = arg
|
|
139
|
+
when "--images"
|
|
140
|
+
@image_dir = arg
|
|
141
|
+
end
|
|
142
|
+
end
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
def get_xml( member_name )
|
|
146
|
+
zipfile = Zip::ZipFile::open( @input_filename )
|
|
147
|
+
stream = zipfile.get_entry( member_name ).get_input_stream
|
|
148
|
+
doc = REXML::Document.new stream.read
|
|
149
|
+
zipfile.close
|
|
150
|
+
return doc
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
def add_xhtml_head_info
|
|
154
|
+
@head.add_element("meta",
|
|
155
|
+
"http-equiv"=>"content-type", "content"=>"text/html; charset=utf-8")
|
|
156
|
+
@head.add_element("title").add_text( @input_filename )
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
def collect_styles
|
|
160
|
+
str = ""
|
|
161
|
+
@style_info.keys.sort.each do |style|
|
|
162
|
+
if (@style_info[style].length > 0 && yield(@style_info[style])) then
|
|
163
|
+
str << style_to_s(style) << "\n"
|
|
164
|
+
end
|
|
165
|
+
end
|
|
166
|
+
return str
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
def convert
|
|
170
|
+
get_options
|
|
171
|
+
|
|
172
|
+
if (@input_filename == nil)
|
|
173
|
+
usage
|
|
174
|
+
raise ArgumentError, "No input file name given"
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
# if (@output_filename == nil)
|
|
178
|
+
# usage
|
|
179
|
+
# raise ArgumentError, "No output file name given"
|
|
180
|
+
# end
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
if (@image_dir != nil)
|
|
184
|
+
if (!File.exist?(@image_dir))
|
|
185
|
+
Dir.mkdir(@image_dir)
|
|
186
|
+
end
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
str = <<HDR
|
|
190
|
+
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
|
191
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
|
192
|
+
<html>
|
|
193
|
+
</html>
|
|
194
|
+
HDR
|
|
195
|
+
@output_doc = REXML::Document.new str
|
|
196
|
+
@head = @output_doc.root.add_element("head")
|
|
197
|
+
@body = @output_doc.root.add_element("body")
|
|
198
|
+
add_xhtml_head_info
|
|
199
|
+
|
|
200
|
+
@doc = get_xml("styles.xml")
|
|
201
|
+
analyze_styles_xml
|
|
202
|
+
|
|
203
|
+
@doc = get_xml("content.xml")
|
|
204
|
+
analyze_content_xml
|
|
205
|
+
|
|
206
|
+
all_styles = collect_styles { |item| item.block_used }
|
|
207
|
+
|
|
208
|
+
if (@css_filename != nil) then
|
|
209
|
+
css_file = File.open( @css_filename, "w" )
|
|
210
|
+
@head.add_element("link",
|
|
211
|
+
{"rel" => "stylesheet", "type" => "text/css",
|
|
212
|
+
"href" => @css_filename} )
|
|
213
|
+
css_file.puts(all_styles)
|
|
214
|
+
else
|
|
215
|
+
style_el = @head.add_element("style", {"type" => "text/css"} )
|
|
216
|
+
style_el.add_text( all_styles )
|
|
217
|
+
end
|
|
218
|
+
|
|
219
|
+
if (@output_filename) then
|
|
220
|
+
output_file = File.open( @output_filename, "w")
|
|
221
|
+
else
|
|
222
|
+
output_file = $stdout
|
|
223
|
+
end
|
|
224
|
+
|
|
225
|
+
@output_doc.write( output_file, 2 )
|
|
226
|
+
output_file.close
|
|
227
|
+
|
|
228
|
+
rescue Exception => e
|
|
229
|
+
puts "Cannot convert file: #{e}"
|
|
230
|
+
puts e.backtrace.join("\n")
|
|
231
|
+
end
|
|
232
|
+
|
|
233
|
+
def usage
|
|
234
|
+
puts "Usage: #{$0} --in inputfile --out outputfile [--css cssfile] [--images imagedir]"
|
|
235
|
+
end
|
|
236
|
+
|
|
237
|
+
end
|
|
238
|
+
end
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
module ODT2HTML
|
|
2
|
+
# This class represents a CSS declaration; a
|
|
3
|
+
# property/value pair
|
|
4
|
+
class Declaration
|
|
5
|
+
attr_accessor( :property, :value )
|
|
6
|
+
def initialize( property=nil, value=nil )
|
|
7
|
+
@property = property
|
|
8
|
+
@value = value
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def to_s
|
|
12
|
+
return "#{property}: #{value}"
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
module ODT2HTML
|
|
2
|
+
# Represents a CSS declaration block; a sequence of zero
|
|
3
|
+
# or more +Declaration+s.
|
|
4
|
+
class DeclarationBlock < Array
|
|
5
|
+
attr_accessor( :block_used )
|
|
6
|
+
|
|
7
|
+
def initialize(*arglist)
|
|
8
|
+
if (arglist[0].kind_of? DeclarationBlock) then
|
|
9
|
+
dblock = arglist[0]
|
|
10
|
+
super( 0 )
|
|
11
|
+
dblock.each do |item|
|
|
12
|
+
push Declaration.new( item.property, item.value )
|
|
13
|
+
end
|
|
14
|
+
else
|
|
15
|
+
super
|
|
16
|
+
end
|
|
17
|
+
@block_used = false
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def has_top_border?
|
|
21
|
+
result = detect {|item| item.property =~ /border(-top)?/}
|
|
22
|
+
return (result != nil) ? true : nil
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def to_s
|
|
26
|
+
result = "{\n"
|
|
27
|
+
each { |item|
|
|
28
|
+
result << "\t#{item.property}: #{item.value};\n"
|
|
29
|
+
}
|
|
30
|
+
result << "}\n"
|
|
31
|
+
return result
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
end
|
|
35
|
+
end
|
data/lib/odt2html.rb
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
require 'rexml/document'
|
|
2
|
+
require 'rexml/xpath'
|
|
3
|
+
require 'zip/zip'
|
|
4
|
+
require 'stringio'
|
|
5
|
+
require 'getoptlong'
|
|
6
|
+
|
|
7
|
+
module ODT2HTML
|
|
8
|
+
|
|
9
|
+
VERSION = "0.1.0"
|
|
10
|
+
ROOT_PATH = File.expand_path(File.dirname(__FILE__))
|
|
11
|
+
|
|
12
|
+
autoload :Base, "#{ROOT_PATH}/odt2html/base"
|
|
13
|
+
autoload :AnalyzeContent, "#{ROOT_PATH}/odt2html/analyze_content"
|
|
14
|
+
autoload :AnalyzeGraphics, "#{ROOT_PATH}/odt2html/analyze_graphics"
|
|
15
|
+
autoload :AnalyzeStyles, "#{ROOT_PATH}/odt2html/analyze_styles"
|
|
16
|
+
autoload :Declaration, "#{ROOT_PATH}/odt2html/declaration"
|
|
17
|
+
autoload :DeclarationBlock, "#{ROOT_PATH}/odt2html/declaration_block"
|
|
18
|
+
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
class REXML::Element
|
|
22
|
+
def attribute_value( name, namespace=nil )
|
|
23
|
+
attr = attribute( name, namespace )
|
|
24
|
+
return (attr != nil) ? attr.value : nil
|
|
25
|
+
end
|
|
26
|
+
end
|
data/odt2html.gemspec
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
|
3
|
+
require "odt2html"
|
|
4
|
+
|
|
5
|
+
Gem::Specification.new do |s|
|
|
6
|
+
s.name = "odt2html"
|
|
7
|
+
s.version = ODT2HTML::VERSION
|
|
8
|
+
s.platform = Gem::Platform::RUBY
|
|
9
|
+
s.authors = ["Bernard Potocki"]
|
|
10
|
+
s.email = ["bernard.potocki@imanel.org"]
|
|
11
|
+
s.homepage = "http://github.com/imanel/odt2html"
|
|
12
|
+
s.summary = %q{OpenDocument text to HTML converter}
|
|
13
|
+
s.description = %q{OpenDocument text to HTML converter}
|
|
14
|
+
|
|
15
|
+
s.add_dependency 'rubyzip'
|
|
16
|
+
s.add_development_dependency 'rspec', '~> 2.4.0'
|
|
17
|
+
|
|
18
|
+
s.files = `git ls-files`.split("\n")
|
|
19
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
|
20
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
|
21
|
+
s.require_paths = ["lib"]
|
|
22
|
+
end
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
|
2
|
+
<html>
|
|
3
|
+
<head>
|
|
4
|
+
<meta content='text/html; charset=utf-8' http-equiv='content-type'/>
|
|
5
|
+
<title>
|
|
6
|
+
[TITLE_HERE]
|
|
7
|
+
</title>
|
|
8
|
+
<style type='text/css'>
|
|
9
|
+
.P1{ font-size: 18pt; color: #000000; font-family: Times New Roman;
|
|
10
|
+
text-align: center; font-weight: bold; } .P2{ font-size: 12pt; color:
|
|
11
|
+
#000000; font-family: Luxi Sans; margin-bottom: 0.0835in; margin-top: 0in;
|
|
12
|
+
text-align: center; } .P3{ font-size: 12pt; color: #000000; font-family:
|
|
13
|
+
Luxi Sans; margin-bottom: 0.0835in; margin-top: 0in; text-align: left; }
|
|
14
|
+
.Quotations{ font-size: 12pt; color: #000000; font-family: Luxi Sans;
|
|
15
|
+
margin-bottom: 0.1965in; text-indent: 0in; margin-top: 0in; margin-left:
|
|
16
|
+
0.3937in; margin-right: 0.3937in; } .T1{ font-weight: bold; }
|
|
17
|
+
.Text_20_body{ font-size: 12pt; color: #000000; font-family: Luxi Sans;
|
|
18
|
+
margin-bottom: 0.0835in; margin-top: 0in; }
|
|
19
|
+
</style>
|
|
20
|
+
</head>
|
|
21
|
+
<body>
|
|
22
|
+
<p class='P1'>
|
|
23
|
+
"ODF Alliance" formed to support OpenDocument format
|
|
24
|
+
</p>
|
|
25
|
+
<p class='P2'/>
|
|
26
|
+
<p class='P3'>
|
|
27
|
+
3/3/2006 8:25:36 PM, by
|
|
28
|
+
<a href='mailto:jeremy@arstechnica.com'>
|
|
29
|
+
Jeremy Reimer
|
|
30
|
+
</a>
|
|
31
|
+
</p>
|
|
32
|
+
<p class='Text_20_body'>
|
|
33
|
+
A consortium of companies and organizations have banded together to form
|
|
34
|
+
the
|
|
35
|
+
<a href='http://www.odfalliance.org/news.asp'>
|
|
36
|
+
"ODF Alliance,"
|
|
37
|
+
</a>
|
|
38
|
+
a group dedicated to promoting the office software file format first
|
|
39
|
+
implemented by OpenOffice.org. The alliance consists of more than 35
|
|
40
|
+
members from various countries around the world. It includes companies
|
|
41
|
+
such as Red Hat, IBM, Novell, Sun Microsystems, and Corel, and
|
|
42
|
+
governmental organizations such as the American Library Association and
|
|
43
|
+
the Information and Communications Technology council for the city of
|
|
44
|
+
Vienna.
|
|
45
|
+
</p>
|
|
46
|
+
<p class='Text_20_body'>
|
|
47
|
+
The
|
|
48
|
+
<span class='T1'>
|
|
49
|
+
OpenDocument
|
|
50
|
+
</span>
|
|
51
|
+
file format was formed by the industry consortium OASIS, a group headed
|
|
52
|
+
by Sun Microsystems, and was based on OpenOffice.org's native file
|
|
53
|
+
format. OpenOffice.org is itself an open-sourced version of Star Office,
|
|
54
|
+
the proprietary office suite that Sun purchased when it acquired the
|
|
55
|
+
German company Star Division in 1999. The idea behind OpenDocument was to
|
|
56
|
+
use a text-based XML format (compressed in a zip file to conserve disk
|
|
57
|
+
space) in order to make it easy for other products to interoperate with
|
|
58
|
+
it. The specification was finalized in 2005 and OpenOffice.org was the
|
|
59
|
+
first software suite to support it. Other projects, such as KOffice,
|
|
60
|
+
AbiWord, and IBM Workplace are adding support for the ODF format, either
|
|
61
|
+
natively or through plug-in format translators.
|
|
62
|
+
</p>
|
|
63
|
+
<p class='Text_20_body'>
|
|
64
|
+
In today's highly networked world, it turns out that operability is a
|
|
65
|
+
very useful thing to have, which is why Microsoft decided that they would
|
|
66
|
+
also jump on the XML bandwagon, introducing a new XML-based file format
|
|
67
|
+
(.docx) for Office 2003. Not only that, but the company is planning to
|
|
68
|
+
make the next version of the Office XML format the default for Office 2007
|
|
69
|
+
(formerly known as Office 12). This means that when users of Office 2007
|
|
70
|
+
go to save a file, they will automatically save in .docx, not .doc.
|
|
71
|
+
</p>
|
|
72
|
+
<p class='Text_20_body'>
|
|
73
|
+
While most people don't consider file formats to be terribly
|
|
74
|
+
exciting, the question of which format to adopt led to an increasingly
|
|
75
|
+
dramatic series of announcements from the government of Massachusetts. In
|
|
76
|
+
January 2005, the government
|
|
77
|
+
<a href='http://www.crn.com/sections/breakingnews/breakingnews.jhtml;jsessionid=GCCZBBT3QQVBGQSNDBOCKHSCJUMEKJVN?articleId=57701551'>
|
|
78
|
+
approved
|
|
79
|
+
</a>
|
|
80
|
+
Office XML 2003 as an appropriate file format, then in September of that
|
|
81
|
+
year
|
|
82
|
+
<a href='http://arstechnica.com/news.ars/post/20050906-5279.html'>
|
|
83
|
+
reversed
|
|
84
|
+
</a>
|
|
85
|
+
their decision, stating that Office XML was unacceptable and that only
|
|
86
|
+
OpenDocument and PDF files would be allowed. They went
|
|
87
|
+
<a href='http://arstechnica.com/news.ars/post/20051128-5637.html'>
|
|
88
|
+
back again
|
|
89
|
+
</a>
|
|
90
|
+
in November, stating that they were "very pleased" with
|
|
91
|
+
Microsoft's submission of Office XML to the ECMA standards body, and
|
|
92
|
+
that they were "optimistic that Office Open XML will meet our new
|
|
93
|
+
standards for acceptable open formats." The champion of ODF in
|
|
94
|
+
Massachusetts, CIO Peter Quinn, then suddenly announced his
|
|
95
|
+
<a href='http://arstechnica.com/news.ars/post/20060104-5895.html'>
|
|
96
|
+
resignation
|
|
97
|
+
</a>
|
|
98
|
+
in January. This announcement was followed with an assurance from
|
|
99
|
+
Quinn's former boss that their position on ODF "remained
|
|
100
|
+
unchanged" and that they were still committed to supporting that
|
|
101
|
+
format.
|
|
102
|
+
</p>
|
|
103
|
+
<p class='Text_20_body'>
|
|
104
|
+
Confused yet? The so-called "controversy" over the ODF switch
|
|
105
|
+
has generated an unbelievable amount of press, and various groups are now
|
|
106
|
+
busy pushing governments worldwide to switch over to the ODF format. This
|
|
107
|
+
new "alliance" joins the groups SpreadOpenDocument.org, the
|
|
108
|
+
OpenDocument Fellowship, and the "Friends of OpenDocument" in
|
|
109
|
+
their quest to promote the format. My own inbox is now slowly filling up
|
|
110
|
+
with pro-ODF announcements and press releases. The push seems overtly
|
|
111
|
+
political, as evidenced by the rhetoric used by the ODF Alliance:
|
|
112
|
+
</p>
|
|
113
|
+
<p class='Quotations'>
|
|
114
|
+
[...]
|
|
115
|
+
</p>
|
|
116
|
+
</body>
|
|
117
|
+
</html>
|
|
Binary file
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
|
|
3
|
+
describe "files" do
|
|
4
|
+
it "should read ODT file and generate matching HTML file" do
|
|
5
|
+
odt_path = File.join(File.dirname(__FILE__), *%w[.. fixtures example.odt])
|
|
6
|
+
html_path = File.join(File.dirname(__FILE__), *%w[.. fixtures example.html])
|
|
7
|
+
html_file = File.open(html_path)
|
|
8
|
+
tempfile = Tempfile.new('html')
|
|
9
|
+
|
|
10
|
+
begin
|
|
11
|
+
parser = ODT2HTML::Base.new
|
|
12
|
+
parser.instance_variable_set('@input_filename', odt_path)
|
|
13
|
+
parser.instance_variable_set('@output_filename', tempfile.path)
|
|
14
|
+
|
|
15
|
+
parser.convert
|
|
16
|
+
|
|
17
|
+
tempfile.rewind
|
|
18
|
+
html_content = html_file.read.gsub("[TITLE_HERE]", odt_path)
|
|
19
|
+
tempfile.read.should eql(html_content)
|
|
20
|
+
ensure
|
|
21
|
+
tempfile.close!
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
data/spec/spec_helper.rb
ADDED
metadata
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: odt2html
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
hash: 27
|
|
5
|
+
prerelease:
|
|
6
|
+
segments:
|
|
7
|
+
- 0
|
|
8
|
+
- 1
|
|
9
|
+
- 0
|
|
10
|
+
version: 0.1.0
|
|
11
|
+
platform: ruby
|
|
12
|
+
authors:
|
|
13
|
+
- Bernard Potocki
|
|
14
|
+
autorequire:
|
|
15
|
+
bindir: bin
|
|
16
|
+
cert_chain: []
|
|
17
|
+
|
|
18
|
+
date: 2011-03-02 00:00:00 +01:00
|
|
19
|
+
default_executable:
|
|
20
|
+
dependencies:
|
|
21
|
+
- !ruby/object:Gem::Dependency
|
|
22
|
+
name: rubyzip
|
|
23
|
+
prerelease: false
|
|
24
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
|
25
|
+
none: false
|
|
26
|
+
requirements:
|
|
27
|
+
- - ">="
|
|
28
|
+
- !ruby/object:Gem::Version
|
|
29
|
+
hash: 3
|
|
30
|
+
segments:
|
|
31
|
+
- 0
|
|
32
|
+
version: "0"
|
|
33
|
+
type: :runtime
|
|
34
|
+
version_requirements: *id001
|
|
35
|
+
- !ruby/object:Gem::Dependency
|
|
36
|
+
name: rspec
|
|
37
|
+
prerelease: false
|
|
38
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
|
39
|
+
none: false
|
|
40
|
+
requirements:
|
|
41
|
+
- - ~>
|
|
42
|
+
- !ruby/object:Gem::Version
|
|
43
|
+
hash: 31
|
|
44
|
+
segments:
|
|
45
|
+
- 2
|
|
46
|
+
- 4
|
|
47
|
+
- 0
|
|
48
|
+
version: 2.4.0
|
|
49
|
+
type: :development
|
|
50
|
+
version_requirements: *id002
|
|
51
|
+
description: OpenDocument text to HTML converter
|
|
52
|
+
email:
|
|
53
|
+
- bernard.potocki@imanel.org
|
|
54
|
+
executables:
|
|
55
|
+
- odt2html
|
|
56
|
+
extensions: []
|
|
57
|
+
|
|
58
|
+
extra_rdoc_files: []
|
|
59
|
+
|
|
60
|
+
files:
|
|
61
|
+
- .gitignore
|
|
62
|
+
- CHANGELOG.md
|
|
63
|
+
- Gemfile
|
|
64
|
+
- LICENSE.txt
|
|
65
|
+
- README.md
|
|
66
|
+
- Rakefile
|
|
67
|
+
- bin/odt2html
|
|
68
|
+
- lib/odt2html.rb
|
|
69
|
+
- lib/odt2html/analyze_content.rb
|
|
70
|
+
- lib/odt2html/analyze_graphics.rb
|
|
71
|
+
- lib/odt2html/analyze_styles.rb
|
|
72
|
+
- lib/odt2html/base.rb
|
|
73
|
+
- lib/odt2html/declaration.rb
|
|
74
|
+
- lib/odt2html/declaration_block.rb
|
|
75
|
+
- odt2html.gemspec
|
|
76
|
+
- spec/fixtures/example.html
|
|
77
|
+
- spec/fixtures/example.odt
|
|
78
|
+
- spec/integration/files_spec.rb
|
|
79
|
+
- spec/spec_helper.rb
|
|
80
|
+
has_rdoc: true
|
|
81
|
+
homepage: http://github.com/imanel/odt2html
|
|
82
|
+
licenses: []
|
|
83
|
+
|
|
84
|
+
post_install_message:
|
|
85
|
+
rdoc_options: []
|
|
86
|
+
|
|
87
|
+
require_paths:
|
|
88
|
+
- lib
|
|
89
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
90
|
+
none: false
|
|
91
|
+
requirements:
|
|
92
|
+
- - ">="
|
|
93
|
+
- !ruby/object:Gem::Version
|
|
94
|
+
hash: 3
|
|
95
|
+
segments:
|
|
96
|
+
- 0
|
|
97
|
+
version: "0"
|
|
98
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
99
|
+
none: false
|
|
100
|
+
requirements:
|
|
101
|
+
- - ">="
|
|
102
|
+
- !ruby/object:Gem::Version
|
|
103
|
+
hash: 3
|
|
104
|
+
segments:
|
|
105
|
+
- 0
|
|
106
|
+
version: "0"
|
|
107
|
+
requirements: []
|
|
108
|
+
|
|
109
|
+
rubyforge_project:
|
|
110
|
+
rubygems_version: 1.4.2
|
|
111
|
+
signing_key:
|
|
112
|
+
specification_version: 3
|
|
113
|
+
summary: OpenDocument text to HTML converter
|
|
114
|
+
test_files:
|
|
115
|
+
- spec/fixtures/example.html
|
|
116
|
+
- spec/fixtures/example.odt
|
|
117
|
+
- spec/integration/files_spec.rb
|
|
118
|
+
- spec/spec_helper.rb
|