documentalist 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,12 @@
1
+ README.rdoc
2
+ Rakefile
3
+ documentalist.gemspec
4
+ init.rb
5
+ lib/DocumentConverter.py
6
+ lib/documentalist.rb
7
+ lib/open_office/server.rb
8
+ rails/init.rb
9
+ test/documentalist_test.rb
10
+ test/fixtures/fixture.odt
11
+ test/open_office_test.rb
12
+ Manifest
@@ -0,0 +1,3 @@
1
+ = Documentalist
2
+
3
+ Rails gem for talking to OpenOffice.
@@ -0,0 +1,16 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+ require 'echoe'
4
+
5
+ Echoe.new('documentalist', '0.1.0') do |p|
6
+ p.description = "Ruby interface that talks to OpenOffice"
7
+ p.url = "http://github.com/davout/documentalist"
8
+ p.author = "David FRANCOIS"
9
+ p.email = "david.francois@webflows.fr"
10
+ p.ignore_pattern = ["tmp/*", "script/*"]
11
+ p.test_pattern = "test/**/*.rb"
12
+ p.development_dependencies = []
13
+ p.runtime_dependencies = ['zip >=2.0.2']
14
+ end
15
+
16
+ Dir["#{File.dirname(__FILE__)}/tasks/*.rake"].sort.each { |ext| load ext }
@@ -0,0 +1,34 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = %q{documentalist}
5
+ s.version = "0.1.0"
6
+
7
+ s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
8
+ s.authors = ["David FRANCOIS"]
9
+ s.date = %q{2010-06-08}
10
+ s.description = %q{Ruby interface that talks to OpenOffice}
11
+ s.email = %q{david.francois@webflows.fr}
12
+ s.extra_rdoc_files = ["README.rdoc", "lib/DocumentConverter.py", "lib/documentalist.rb", "lib/open_office/server.rb"]
13
+ s.files = ["README.rdoc", "Rakefile", "documentalist.gemspec", "init.rb", "lib/DocumentConverter.py", "lib/documentalist.rb", "lib/open_office/server.rb", "rails/init.rb", "test/documentalist_test.rb", "test/fixtures/fixture.odt", "test/open_office_test.rb", "Manifest"]
14
+ s.homepage = %q{http://github.com/davout/documentalist}
15
+ s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Documentalist", "--main", "README.rdoc"]
16
+ s.require_paths = ["lib"]
17
+ s.rubyforge_project = %q{documentalist}
18
+ s.rubygems_version = %q{1.3.7}
19
+ s.summary = %q{Ruby interface that talks to OpenOffice}
20
+ s.test_files = ["test/documentalist_test.rb", "test/open_office_test.rb"]
21
+
22
+ if s.respond_to? :specification_version then
23
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
24
+ s.specification_version = 3
25
+
26
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
27
+ s.add_runtime_dependency(%q<zip>, [">= 2.0.2"])
28
+ else
29
+ s.add_dependency(%q<zip>, [">= 2.0.2"])
30
+ end
31
+ else
32
+ s.add_dependency(%q<zip>, [">= 2.0.2"])
33
+ end
34
+ end
data/init.rb ADDED
File without changes
@@ -0,0 +1,151 @@
1
+ #!/usr/bin/python
2
+ #
3
+ # PyODConverter (Python OpenDocument Converter) v1.0.0 - 2008-05-05
4
+ #
5
+ # This script converts a document from one office format to another by
6
+ # connecting to an OpenOffice.org instance via Python-UNO bridge.
7
+ #
8
+ # Copyright (C) 2008 Mirko Nasato <mirko@artofsolving.com>
9
+ # Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl-2.1.html
10
+ # - or any later version.
11
+ #
12
+ DEFAULT_OPENOFFICE_PORT = 8100
13
+
14
+ import uno
15
+ from os.path import abspath, isfile, splitext
16
+ from com.sun.star.beans import PropertyValue
17
+ from com.sun.star.task import ErrorCodeIOException
18
+ from com.sun.star.connection import NoConnectException
19
+
20
+ FAMILY_TEXT = "Text"
21
+ FAMILY_SPREADSHEET = "Spreadsheet"
22
+ FAMILY_PRESENTATION = "Presentation"
23
+ FAMILY_DRAWING = "Drawing"
24
+
25
+ FILTER_MAP = {
26
+ "pdf": {
27
+ FAMILY_TEXT: "writer_pdf_Export",
28
+ FAMILY_SPREADSHEET: "calc_pdf_Export",
29
+ FAMILY_PRESENTATION: "impress_pdf_Export",
30
+ FAMILY_DRAWING: "draw_pdf_Export"
31
+ },
32
+ "html": {
33
+ FAMILY_TEXT: "HTML (StarWriter)",
34
+ FAMILY_SPREADSHEET: "HTML (StarCalc)",
35
+ FAMILY_PRESENTATION: "impress_html_Export"
36
+ },
37
+ "odt": { FAMILY_TEXT: "writer8" },
38
+ "doc": { FAMILY_TEXT: "MS Word 97" },
39
+ "rtf": { FAMILY_TEXT: "Rich Text Format" },
40
+ "txt": { FAMILY_TEXT: "Text" },
41
+ "ods": { FAMILY_SPREADSHEET: "calc8" },
42
+ "xls": { FAMILY_SPREADSHEET: "MS Excel 97" },
43
+ "odp": { FAMILY_PRESENTATION: "impress8" },
44
+ "ppt": { FAMILY_PRESENTATION: "MS PowerPoint 97" },
45
+ "swf": { FAMILY_PRESENTATION: "impress_flash_Export" }
46
+ }
47
+ # see http://wiki.services.openoffice.org/wiki/Framework/Article/Filter
48
+ # for more available filters
49
+
50
+
51
+ class DocumentConversionException(Exception):
52
+
53
+ def __init__(self, message):
54
+ self.message = message
55
+
56
+ def __str__(self):
57
+ return self.message
58
+
59
+
60
+ class DocumentConverter:
61
+
62
+ def __init__(self, port=DEFAULT_OPENOFFICE_PORT):
63
+ localContext = uno.getComponentContext()
64
+ resolver = localContext.ServiceManager.createInstanceWithContext("com.sun.star.bridge.UnoUrlResolver", localContext)
65
+ try:
66
+ context = resolver.resolve("uno:socket,host=localhost,port=%s;urp;StarOffice.ComponentContext" % port)
67
+ except NoConnectException:
68
+ raise DocumentConversionException, "failed to connect to OpenOffice.org on port %s" % port
69
+ self.desktop = context.ServiceManager.createInstanceWithContext("com.sun.star.frame.Desktop", context)
70
+
71
+ def convert(self, inputFile, outputFile):
72
+
73
+ inputUrl = self._toFileUrl(inputFile)
74
+ outputUrl = self._toFileUrl(outputFile)
75
+
76
+ document = self.desktop.loadComponentFromURL(inputUrl, "_blank", 0, self._toProperties(Hidden=True))
77
+ try:
78
+ document.refresh()
79
+ except AttributeError:
80
+ pass
81
+
82
+ outputExt = self._getFileExt(outputFile)
83
+ filterName = self._filterName(document, outputExt)
84
+
85
+ try:
86
+ document.storeToURL(outputUrl, self._toProperties(FilterName=filterName))
87
+ finally:
88
+ document.close(True)
89
+
90
+ def _filterName(self, document, outputExt):
91
+ family = self._detectFamily(document)
92
+ try:
93
+ filterByFamily = FILTER_MAP[outputExt]
94
+ except KeyError:
95
+ raise DocumentConversionException, "unknown output format: '%s'" % outputExt
96
+ try:
97
+ return filterByFamily[family]
98
+ except KeyError:
99
+ raise DocumentConversionException, "unsupported conversion: from '%s' to '%s'" % (family, outputExt)
100
+
101
+ def _detectFamily(self, document):
102
+ if document.supportsService("com.sun.star.text.GenericTextDocument"):
103
+ # NOTE: a GenericTextDocument is either a TextDocument, a WebDocument, or a GlobalDocument
104
+ # but this further distinction doesn't seem to matter for conversions
105
+ return FAMILY_TEXT
106
+ if document.supportsService("com.sun.star.sheet.SpreadsheetDocument"):
107
+ return FAMILY_SPREADSHEET
108
+ if document.supportsService("com.sun.star.presentation.PresentationDocument"):
109
+ return FAMILY_PRESENTATION
110
+ if document.supportsService("com.sun.star.drawing.DrawingDocument"):
111
+ return FAMILY_DRAWING
112
+ raise DocumentConversionException, "unknown document family: %s" % document
113
+
114
+ def _getFileExt(self, path):
115
+ ext = splitext(path)[1]
116
+ if ext is not None:
117
+ return ext[1:].lower()
118
+
119
+ def _toFileUrl(self, path):
120
+ return uno.systemPathToFileUrl(abspath(path))
121
+
122
+ def _toProperties(self, **args):
123
+ props = []
124
+ for key in args:
125
+ prop = PropertyValue()
126
+ prop.Name = key
127
+ prop.Value = args[key]
128
+ props.append(prop)
129
+ return tuple(props)
130
+
131
+
132
+ if __name__ == "__main__":
133
+ from sys import argv, exit
134
+
135
+ if len(argv) < 3:
136
+ print "USAGE: python %s <input-file> <output-file>" % argv[0]
137
+ exit(255)
138
+ if not isfile(argv[1]):
139
+ print "no such input file: %s" % argv[1]
140
+ exit(1)
141
+
142
+ try:
143
+ converter = DocumentConverter()
144
+ converter.convert(argv[1], argv[2])
145
+ except DocumentConversionException, exception:
146
+ print "ERROR!" + str(exception)
147
+ exit(1)
148
+ except ErrorCodeIOException, exception:
149
+ print "ERROR! ErrorCodeIOException %d" % exception.ErrCode
150
+ exit(1)
151
+
@@ -0,0 +1,63 @@
1
+ require 'rubygems'
2
+ require 'erb'
3
+ require 'fileutils'
4
+ require 'tmpdir'
5
+ require 'zip/zip'
6
+ require 'open_office/server'
7
+
8
+ module Documentalist
9
+ def self.merge(str, options = {})
10
+ locals = options[:locals]
11
+
12
+ if locals and locals.is_a? Hash
13
+ locals.each do |k,v|
14
+ instance_variable_set("@#{k.to_s}".to_sym, v)
15
+ end
16
+ end
17
+
18
+ ERB.new(str).result(binding)
19
+ end
20
+
21
+ def self.get_contents(odt_file)
22
+ contents = ""
23
+ Zip::ZipFile.open(odt_file) { |zip| contents = zip.read("content.xml") }
24
+ contents.gsub("&lt;%", "<%").gsub("%&gt;", "%>")
25
+ end
26
+
27
+ def self.merge_template(template, options = {})
28
+ # Get template contents
29
+ tmp_contents= Tempfile.new("officer-contents")
30
+ tmp_contents.write(merge(get_contents(template), :locals => options[:locals]))
31
+ tmp_contents.close
32
+
33
+ # Copy the template so we can merge the data into the copy
34
+ tmp_merged_template = File.join(Dir.tmpdir, "merged-template-#{rand(10**9)}#{File.extname(template)}")
35
+ FileUtils.cp(template, tmp_merged_template)
36
+
37
+ # Stuff the merged contents.xml into the OpenDocument zip
38
+ Zip::ZipFile.open(tmp_merged_template) do |zip|
39
+ zip.replace("content.xml", tmp_contents.path)
40
+ zip.commit
41
+ end
42
+
43
+ # Remove the merged contents.xml
44
+ tmp_contents.unlink
45
+
46
+ # Manages the converted file depending on the context
47
+ if options[:to]
48
+ if File.extname(options[:to]) == File.extname(template)
49
+ FileUtils.mv(tmp_merged_template, options[:to])
50
+ else
51
+ OpenOffice::Server.convert(tmp_merged_template, options[:to])
52
+ FileUtils.rm(tmp_merged_template)
53
+ end
54
+ else
55
+ FileUtils.rm(template)
56
+ FileUtils.mv(tmp_merged_template, template)
57
+ end
58
+ end
59
+
60
+ def self.convert(from, to)
61
+ OpenOffice::Server.convert(from, :to => to)
62
+ end
63
+ end
@@ -0,0 +1,148 @@
1
+ require 'timeout'
2
+ require 'tmpdir'
3
+
4
+ module OpenOffice
5
+ module Server
6
+ # Path to the Python executable
7
+ PYTHON_PATH = "/usr/bin/python"
8
+
9
+ # Server can convert from the following file formats
10
+ CONVERT_FROM = [:odt, :doc, :rtf, :docx, :txt, :html, :htm, :wps]
11
+
12
+ # To the following formats
13
+ CONVERT_TO = [:odt, :doc, :rtf, :pdf, :txt, :html, :htm, :wps]
14
+
15
+ # Python conversion script path
16
+ PY_OD_CONVERTER = File.join(File.dirname(__FILE__), "../DocumentConverter.py")
17
+
18
+ # Maximum allowed CPU usage for an OpenOffice process
19
+ MAX_CPU = 80
20
+
21
+ # Server start grace time
22
+ SERVER_START_DELAY = 4
23
+
24
+ # Log file
25
+ LOG_FILE = Object.const_defined?(:RAILS_ROOT) ? File.join(RAILS_ROOT, "log", "openoffice.log") : ""
26
+
27
+ def self.convert(origin, options = {:to => :txt})
28
+ if options and options[:to]
29
+ raise "#{origin} does not exist !" unless File.exist?(origin)
30
+ ensure_available
31
+
32
+ if options[:to].is_a? Symbol
33
+ destination = "#{origin.gsub(/[^\.]*$/, "")}#{options[:to].to_s}"
34
+ elsif options[:to].is_a? String
35
+ destination = options[:to]
36
+ else
37
+ raise "Can't convert #{origin} to #{options[:to]}"
38
+ end
39
+
40
+ timeout(10, :attempts => 2) do
41
+ system("#{PYTHON_PATH} #{PY_OD_CONVERTER} #{origin} #{destination} > /dev/null 2>&1")
42
+
43
+ # HACK : sometimes text files get saved in ISO-8859-1 instead of regular UTF-8, so we force
44
+ # a conversion if it's the case
45
+ if `file #{destination}` =~ /ISO/ and destination =~ /\.txt$/
46
+ temp_file = File.join(Dir.tmpdir, "tmp_iconv_#{rand(10**9)}.txt")
47
+ system("iconv --from-code ISO-8859-1 --to-code UTF-8 #{destination} > && #{temp_file} mv #{temp_file} #{destination}")
48
+ end
49
+ end
50
+
51
+ destination
52
+ end
53
+ end
54
+
55
+ private
56
+ # Is OpenOffice server running?
57
+ def self.running?
58
+ !`pgrep office`.empty?
59
+ end
60
+
61
+ # Restart if running or start new instance
62
+ def self.restart!
63
+ kill! if running?
64
+ start!
65
+ end
66
+
67
+ # Start new instance
68
+ def self.start!
69
+ raise "Already running!" if running?
70
+ system("/usr/bin/soffice -headless -accept=\"socket,host=127.0.0.1,port=8100;urp;\" -nofirststartwizard -nologo -nocrashreport -norestore -nolockcheck -nodefault #{">>" unless LOG_FILE.empty?} #{LOG_FILE} 2>&1 &")
71
+
72
+ begin
73
+ timeout(2) do
74
+ while !running?
75
+ print "."
76
+ end
77
+ end
78
+ rescue
79
+ raise "Could not start OpenOffice"
80
+ end
81
+
82
+ # OpenOffice needs some time to wake up
83
+ sleep(SERVER_START_DELAY)
84
+
85
+ nil
86
+ end
87
+
88
+ # Kill running instance
89
+ def self.kill!
90
+ raise "Not running!" unless running?
91
+
92
+ begin
93
+ timeout(3, :attempts => 2) do
94
+ while(running?)
95
+ system("pkill -9 office")
96
+ end
97
+ end
98
+ rescue Timeout::Error
99
+ raise "Mayday, mayday ! Could not kill OpenOffice !!"
100
+ ensure
101
+ # Remove user profile
102
+ system("rm -rf ~/openoffice.org*")
103
+ end
104
+ end
105
+
106
+ # Is the current instance stuck ?
107
+ def self.stalled?
108
+ if running?
109
+ cpu_usage = `ps -Ao pcpu,pid,cmd | grep office`
110
+ cpu_usage = cpu_usage.split(/\n/).select{|line| /#{pids.join("|")}/.match(line) }
111
+
112
+ cpu_usage.any?{|usage| /^\s*\d+/.match(usage)[0].strip.to_i > MAX_CPU}
113
+ end
114
+ end
115
+
116
+ # Make sure there will be an available instance
117
+ def self.ensure_available
118
+ start! unless running?
119
+ restart! if stalled?
120
+ end
121
+
122
+ # Get OO processes pids
123
+ def self.pids
124
+ `pgrep office`.split.map{|pid| pid.to_i } unless `pgrep office`.empty?
125
+ end
126
+
127
+ # Run a block with a timeout and retry if the first execution fails
128
+ def self.timeout(max_time = 0, options = {:attempts => 1, :sleep => nil})
129
+ if block_given?
130
+ attempts = options[:attempts] || 1
131
+ begin
132
+ Timeout::timeout(max_time) do
133
+ yield
134
+ end
135
+ rescue Timeout::Error
136
+ attempts -= 1
137
+ sleep(options[:sleep]) if options[:sleep]
138
+ retry unless attempts.zero?
139
+ raise
140
+ end
141
+ end
142
+ end
143
+
144
+ def self.convertible?(origin, destination)
145
+ CONVERT_FROM.include?(File.extname(origin)) && CONVERT_TO.include?(File.extname(destination))
146
+ end
147
+ end
148
+ end
@@ -0,0 +1 @@
1
+ require 'documentalist'
@@ -0,0 +1,40 @@
1
+ require 'test/unit'
2
+ require 'documentalist'
3
+
4
+ class DocumentalistTest < Test::Unit::TestCase
5
+ @@odt_fixture = File.join(File.dirname(__FILE__), "fixtures/fixture.odt")
6
+
7
+ def test_merge
8
+ template = "<%= @var1 %><%= 1.upto(3).map{ |n| n.to_s }.join %><%= @var2 %>"
9
+
10
+ merged = Documentalist.merge(template, :locals => {
11
+ :var1 => "test",
12
+ :var2 => "working?"
13
+ }
14
+ )
15
+
16
+ assert_equal "test123working?", merged, "Merge wasn't performed correctly"
17
+ end
18
+
19
+ def test_read_zipped_odt
20
+ contents = Documentalist.get_contents(@@odt_fixture)
21
+
22
+ assert_match /Hello/, contents
23
+ assert_match /thing/, contents
24
+ assert !(contents =~ /%&gt;/)
25
+ assert !(contents =~ /&lt;%=/)
26
+ end
27
+
28
+ def test_odt_merge
29
+ template = "#{File.join(File.dirname(__FILE__), "fixtures/fixture.odt")}"
30
+ result = "#{File.join(File.dirname(__FILE__), "fixtures/result.odt")}"
31
+
32
+ Documentalist.merge_template(template,
33
+ :locals => {:thing => "world"},
34
+ :to => result
35
+ )
36
+
37
+ assert /world/, Documentalist.get_contents(result)
38
+ File.delete(result)
39
+ end
40
+ end
Binary file
@@ -0,0 +1,21 @@
1
+ require 'test/unit'
2
+ require 'documentalist'
3
+ require 'fileutils'
4
+ require 'tmpdir'
5
+
6
+ class OpenOfficeTest < Test::Unit::TestCase
7
+ def test_open_office
8
+ destination = File.join(Dir.tmpdir, "fixture#{rand(10**9)}.pdf")
9
+
10
+ Documentalist.convert(
11
+ File.join(File.dirname(__FILE__), "fixtures", "fixture.odt"),
12
+ destination
13
+ )
14
+
15
+ assert File.exist?(destination)
16
+
17
+ FileUtils.rm(destination)
18
+
19
+ assert !File.exist?(destination)
20
+ end
21
+ end
metadata ADDED
@@ -0,0 +1,103 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: documentalist
3
+ version: !ruby/object:Gem::Version
4
+ hash: 27
5
+ prerelease: false
6
+ segments:
7
+ - 0
8
+ - 1
9
+ - 0
10
+ version: 0.1.0
11
+ platform: ruby
12
+ authors:
13
+ - David FRANCOIS
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2010-06-08 00:00:00 +02:00
19
+ default_executable:
20
+ dependencies:
21
+ - !ruby/object:Gem::Dependency
22
+ name: zip
23
+ prerelease: false
24
+ requirement: &id001 !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ">="
28
+ - !ruby/object:Gem::Version
29
+ hash: 11
30
+ segments:
31
+ - 2
32
+ - 0
33
+ - 2
34
+ version: 2.0.2
35
+ type: :runtime
36
+ version_requirements: *id001
37
+ description: Ruby interface that talks to OpenOffice
38
+ email: david.francois@webflows.fr
39
+ executables: []
40
+
41
+ extensions: []
42
+
43
+ extra_rdoc_files:
44
+ - README.rdoc
45
+ - lib/DocumentConverter.py
46
+ - lib/documentalist.rb
47
+ - lib/open_office/server.rb
48
+ files:
49
+ - README.rdoc
50
+ - Rakefile
51
+ - documentalist.gemspec
52
+ - init.rb
53
+ - lib/DocumentConverter.py
54
+ - lib/documentalist.rb
55
+ - lib/open_office/server.rb
56
+ - rails/init.rb
57
+ - test/documentalist_test.rb
58
+ - test/fixtures/fixture.odt
59
+ - test/open_office_test.rb
60
+ - Manifest
61
+ has_rdoc: true
62
+ homepage: http://github.com/davout/documentalist
63
+ licenses: []
64
+
65
+ post_install_message:
66
+ rdoc_options:
67
+ - --line-numbers
68
+ - --inline-source
69
+ - --title
70
+ - Documentalist
71
+ - --main
72
+ - README.rdoc
73
+ require_paths:
74
+ - lib
75
+ required_ruby_version: !ruby/object:Gem::Requirement
76
+ none: false
77
+ requirements:
78
+ - - ">="
79
+ - !ruby/object:Gem::Version
80
+ hash: 3
81
+ segments:
82
+ - 0
83
+ version: "0"
84
+ required_rubygems_version: !ruby/object:Gem::Requirement
85
+ none: false
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ hash: 11
90
+ segments:
91
+ - 1
92
+ - 2
93
+ version: "1.2"
94
+ requirements: []
95
+
96
+ rubyforge_project: documentalist
97
+ rubygems_version: 1.3.7
98
+ signing_key:
99
+ specification_version: 3
100
+ summary: Ruby interface that talks to OpenOffice
101
+ test_files:
102
+ - test/documentalist_test.rb
103
+ - test/open_office_test.rb