documentalist 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Manifest +4 -0
- data/README.rdoc +13 -1
- data/Rakefile +10 -14
- data/config/default.yml +2 -2
- data/documentalist.gemspec +5 -5
- data/init.rb +1 -0
- data/lib/backends/net_pbm.rb +6 -5
- data/lib/backends/odf_merge.rb +2 -0
- data/lib/backends/open_office.rb +28 -7
- data/lib/backends/pdf_tools.rb +4 -3
- data/lib/backends/wkhtmltopdf.rb +11 -0
- data/lib/dependencies.rb +21 -0
- data/lib/documentalist.rb +53 -36
- data/rails/init.rb +3 -3
- data/test/documentalist_test.rb +17 -3
- data/test/fixtures/fixture_002.html +13 -0
- data/test/rails_integration_test.rb +3 -1
- data/test/test_helper.rb +4 -0
- data/test/wkhtmltopdf_test.rb +21 -0
- metadata +11 -4
data/Manifest
CHANGED
@@ -32,6 +32,8 @@ lib/backends/open_office/bridges/jodconverter-2.2.2/src/jodconverter-cli-2.2.2-s
|
|
32
32
|
lib/backends/open_office/bridges/pyodconverter.py
|
33
33
|
lib/backends/open_office/server.rb
|
34
34
|
lib/backends/pdf_tools.rb
|
35
|
+
lib/backends/wkhtmltopdf.rb
|
36
|
+
lib/dependencies.rb
|
35
37
|
lib/documentalist.rb
|
36
38
|
lib/tasks/tasks.rb
|
37
39
|
rails/config/documentalist.yml.tpl
|
@@ -39,10 +41,12 @@ rails/init.rb
|
|
39
41
|
rails/initialize_configuration.rb
|
40
42
|
test/documentalist_test.rb
|
41
43
|
test/fixtures/fixture_001.odt
|
44
|
+
test/fixtures/fixture_002.html
|
42
45
|
test/net_pbm_test.rb
|
43
46
|
test/odf_merge_test.rb
|
44
47
|
test/open_office_test.rb
|
45
48
|
test/pdf_tools_test.rb
|
46
49
|
test/rails_integration_test.rb
|
47
50
|
test/test_helper.rb
|
51
|
+
test/wkhtmltopdf_test.rb
|
48
52
|
Manifest
|
data/README.rdoc
CHANGED
@@ -1,3 +1,15 @@
|
|
1
1
|
= Documentalist
|
2
|
+
Rails gem for easily managing documents, converting them from a format to another,
|
3
|
+
and merging data into ODF templates.
|
2
4
|
|
3
|
-
|
5
|
+
= Checking for external dependencies
|
6
|
+
Documentalist assembles various moving parts that are necessary to perform the actual
|
7
|
+
conversions, you can run run the documentalist:backends:checks task to see if these
|
8
|
+
dependencies are met on your system, if it isn't the case it will give you some tips
|
9
|
+
on how to fix it.
|
10
|
+
|
11
|
+
= Examples
|
12
|
+
Documentalist.convert('/home/somefile.doc', :to => '/home/someotherfile.pdf')
|
13
|
+
|
14
|
+
= Installation
|
15
|
+
Require the gem in your Rails or Ruby application and you should be good to go !
|
data/Rakefile
CHANGED
@@ -5,7 +5,7 @@ require 'rake'
|
|
5
5
|
require 'echoe' rescue nil
|
6
6
|
|
7
7
|
if Object.const_defined? :Echoe
|
8
|
-
Echoe.new('documentalist', '0.1.
|
8
|
+
Echoe.new('documentalist', '0.1.2') do |p|
|
9
9
|
p.description = "The smooth document management experience, usable as a Rails gem plugin or standalone in any ruby application"
|
10
10
|
p.url = "http://github.com/davout/documentalist"
|
11
11
|
p.author = "David FRANCOIS"
|
@@ -14,19 +14,15 @@ if Object.const_defined? :Echoe
|
|
14
14
|
p.test_pattern = "test/**/*.rb"
|
15
15
|
p.development_dependencies = ['flexmock >=0.8.6']
|
16
16
|
p.runtime_dependencies = ['zip >=2.0.2', 'SystemTimer >=1.2']
|
17
|
+
end
|
18
|
+
end
|
17
19
|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
# pkill
|
26
|
-
# grep
|
27
|
-
# file
|
28
|
-
# which
|
29
|
-
# jod converter
|
30
|
-
# open office 3
|
20
|
+
namespace :documentalist do
|
21
|
+
namespace :backends do
|
22
|
+
desc "Checks that the required dependencies are met for the different backends"
|
23
|
+
task :checks do
|
24
|
+
require File.join(File.dirname(__FILE__), "init")
|
25
|
+
Documentalist.check_dependencies
|
26
|
+
end
|
31
27
|
end
|
32
28
|
end
|
data/config/default.yml
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
# Default configuration in case none is provided
|
2
|
-
|
2
|
+
log_level: debug
|
3
3
|
|
4
4
|
python:
|
5
5
|
path: /usr/bin/python
|
@@ -12,6 +12,6 @@ open_office:
|
|
12
12
|
bridge: JOD
|
13
13
|
max_cpu: 80
|
14
14
|
max_startup_time: 3
|
15
|
-
wakeup_time:
|
15
|
+
wakeup_time: 2
|
16
16
|
max_conversion_attempts: 3
|
17
17
|
max_conversion_time: 5
|
data/documentalist.gemspec
CHANGED
@@ -2,22 +2,22 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |s|
|
4
4
|
s.name = %q{documentalist}
|
5
|
-
s.version = "0.1.
|
5
|
+
s.version = "0.1.2"
|
6
6
|
|
7
7
|
s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
|
8
8
|
s.authors = ["David FRANCOIS"]
|
9
|
-
s.date = %q{2010-
|
9
|
+
s.date = %q{2010-07-04}
|
10
10
|
s.description = %q{The smooth document management experience, usable as a Rails gem plugin or standalone in any ruby application}
|
11
11
|
s.email = %q{david.francois@webflows.fr}
|
12
|
-
s.extra_rdoc_files = ["README.rdoc", "lib/backends/net_pbm.rb", "lib/backends/odf_merge.rb", "lib/backends/open_office.rb", "lib/backends/open_office/bridges/jodconverter-2.2.2/ChangeLog.txt", "lib/backends/open_office/bridges/jodconverter-2.2.2/LICENSE.txt", "lib/backends/open_office/bridges/jodconverter-2.2.2/README.txt", "lib/backends/open_office/bridges/jodconverter-2.2.2/docs/jodconverter-2.2.2-javadoc.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/docs/third-party-licenses/license-commons-io.txt", "lib/backends/open_office/bridges/jodconverter-2.2.2/docs/third-party-licenses/license-openoffice.org.txt", "lib/backends/open_office/bridges/jodconverter-2.2.2/docs/third-party-licenses/license-slf4j.txt", "lib/backends/open_office/bridges/jodconverter-2.2.2/docs/third-party-licenses/license-xstream.txt", "lib/backends/open_office/bridges/jodconverter-2.2.2/document-formats.xml", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/DEPENDENCIES.txt", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/commons-cli-1.2.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/commons-io-1.4.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/jodconverter-2.2.2.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/jodconverter-cli-2.2.2.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/juh-3.0.1.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/jurt-3.0.1.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/ridl-3.0.1.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/slf4j-api-1.5.6.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/slf4j-jdk14-1.5.6.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/unoil-3.0.1.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/xstream-1.3.1.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/src/jodconverter-2.2.2-sources.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/src/jodconverter-cli-2.2.2-sources.jar", "lib/backends/open_office/bridges/pyodconverter.py", "lib/backends/open_office/server.rb", "lib/backends/pdf_tools.rb", "lib/documentalist.rb", "lib/tasks/tasks.rb"]
|
13
|
-
s.files = ["README.rdoc", "Rakefile", "config/default.yml", "documentalist.gemspec", "init.rb", "lib/backends/net_pbm.rb", "lib/backends/odf_merge.rb", "lib/backends/open_office.rb", "lib/backends/open_office/bridges/jodconverter-2.2.2/ChangeLog.txt", "lib/backends/open_office/bridges/jodconverter-2.2.2/LICENSE.txt", "lib/backends/open_office/bridges/jodconverter-2.2.2/README.txt", "lib/backends/open_office/bridges/jodconverter-2.2.2/docs/jodconverter-2.2.2-javadoc.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/docs/third-party-licenses/license-commons-io.txt", "lib/backends/open_office/bridges/jodconverter-2.2.2/docs/third-party-licenses/license-openoffice.org.txt", "lib/backends/open_office/bridges/jodconverter-2.2.2/docs/third-party-licenses/license-slf4j.txt", "lib/backends/open_office/bridges/jodconverter-2.2.2/docs/third-party-licenses/license-xstream.txt", "lib/backends/open_office/bridges/jodconverter-2.2.2/document-formats.xml", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/DEPENDENCIES.txt", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/commons-cli-1.2.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/commons-io-1.4.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/jodconverter-2.2.2.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/jodconverter-cli-2.2.2.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/juh-3.0.1.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/jurt-3.0.1.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/ridl-3.0.1.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/slf4j-api-1.5.6.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/slf4j-jdk14-1.5.6.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/unoil-3.0.1.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/xstream-1.3.1.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/src/jodconverter-2.2.2-sources.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/src/jodconverter-cli-2.2.2-sources.jar", "lib/backends/open_office/bridges/pyodconverter.py", "lib/backends/open_office/server.rb", "lib/backends/pdf_tools.rb", "lib/documentalist.rb", "lib/tasks/tasks.rb", "rails/config/documentalist.yml.tpl", "rails/init.rb", "rails/initialize_configuration.rb", "test/documentalist_test.rb", "test/fixtures/fixture_001.odt", "test/net_pbm_test.rb", "test/odf_merge_test.rb", "test/open_office_test.rb", "test/pdf_tools_test.rb", "test/rails_integration_test.rb", "test/test_helper.rb", "Manifest"]
|
12
|
+
s.extra_rdoc_files = ["README.rdoc", "lib/backends/net_pbm.rb", "lib/backends/odf_merge.rb", "lib/backends/open_office.rb", "lib/backends/open_office/bridges/jodconverter-2.2.2/ChangeLog.txt", "lib/backends/open_office/bridges/jodconverter-2.2.2/LICENSE.txt", "lib/backends/open_office/bridges/jodconverter-2.2.2/README.txt", "lib/backends/open_office/bridges/jodconverter-2.2.2/docs/jodconverter-2.2.2-javadoc.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/docs/third-party-licenses/license-commons-io.txt", "lib/backends/open_office/bridges/jodconverter-2.2.2/docs/third-party-licenses/license-openoffice.org.txt", "lib/backends/open_office/bridges/jodconverter-2.2.2/docs/third-party-licenses/license-slf4j.txt", "lib/backends/open_office/bridges/jodconverter-2.2.2/docs/third-party-licenses/license-xstream.txt", "lib/backends/open_office/bridges/jodconverter-2.2.2/document-formats.xml", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/DEPENDENCIES.txt", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/commons-cli-1.2.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/commons-io-1.4.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/jodconverter-2.2.2.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/jodconverter-cli-2.2.2.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/juh-3.0.1.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/jurt-3.0.1.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/ridl-3.0.1.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/slf4j-api-1.5.6.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/slf4j-jdk14-1.5.6.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/unoil-3.0.1.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/xstream-1.3.1.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/src/jodconverter-2.2.2-sources.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/src/jodconverter-cli-2.2.2-sources.jar", "lib/backends/open_office/bridges/pyodconverter.py", "lib/backends/open_office/server.rb", "lib/backends/pdf_tools.rb", "lib/backends/wkhtmltopdf.rb", "lib/dependencies.rb", "lib/documentalist.rb", "lib/tasks/tasks.rb"]
|
13
|
+
s.files = ["README.rdoc", "Rakefile", "config/default.yml", "documentalist.gemspec", "init.rb", "lib/backends/net_pbm.rb", "lib/backends/odf_merge.rb", "lib/backends/open_office.rb", "lib/backends/open_office/bridges/jodconverter-2.2.2/ChangeLog.txt", "lib/backends/open_office/bridges/jodconverter-2.2.2/LICENSE.txt", "lib/backends/open_office/bridges/jodconverter-2.2.2/README.txt", "lib/backends/open_office/bridges/jodconverter-2.2.2/docs/jodconverter-2.2.2-javadoc.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/docs/third-party-licenses/license-commons-io.txt", "lib/backends/open_office/bridges/jodconverter-2.2.2/docs/third-party-licenses/license-openoffice.org.txt", "lib/backends/open_office/bridges/jodconverter-2.2.2/docs/third-party-licenses/license-slf4j.txt", "lib/backends/open_office/bridges/jodconverter-2.2.2/docs/third-party-licenses/license-xstream.txt", "lib/backends/open_office/bridges/jodconverter-2.2.2/document-formats.xml", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/DEPENDENCIES.txt", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/commons-cli-1.2.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/commons-io-1.4.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/jodconverter-2.2.2.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/jodconverter-cli-2.2.2.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/juh-3.0.1.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/jurt-3.0.1.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/ridl-3.0.1.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/slf4j-api-1.5.6.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/slf4j-jdk14-1.5.6.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/unoil-3.0.1.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/xstream-1.3.1.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/src/jodconverter-2.2.2-sources.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/src/jodconverter-cli-2.2.2-sources.jar", "lib/backends/open_office/bridges/pyodconverter.py", "lib/backends/open_office/server.rb", "lib/backends/pdf_tools.rb", "lib/backends/wkhtmltopdf.rb", "lib/dependencies.rb", "lib/documentalist.rb", "lib/tasks/tasks.rb", "rails/config/documentalist.yml.tpl", "rails/init.rb", "rails/initialize_configuration.rb", "test/documentalist_test.rb", "test/fixtures/fixture_001.odt", "test/fixtures/fixture_002.html", "test/net_pbm_test.rb", "test/odf_merge_test.rb", "test/open_office_test.rb", "test/pdf_tools_test.rb", "test/rails_integration_test.rb", "test/test_helper.rb", "test/wkhtmltopdf_test.rb", "Manifest"]
|
14
14
|
s.homepage = %q{http://github.com/davout/documentalist}
|
15
15
|
s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Documentalist", "--main", "README.rdoc"]
|
16
16
|
s.require_paths = ["lib"]
|
17
17
|
s.rubyforge_project = %q{documentalist}
|
18
18
|
s.rubygems_version = %q{1.3.7}
|
19
19
|
s.summary = %q{The smooth document management experience, usable as a Rails gem plugin or standalone in any ruby application}
|
20
|
-
s.test_files = ["test/open_office_test.rb", "test/odf_merge_test.rb", "test/documentalist_test.rb", "test/rails_integration_test.rb", "test/test_helper.rb", "test/pdf_tools_test.rb", "test/net_pbm_test.rb"]
|
20
|
+
s.test_files = ["test/wkhtmltopdf_test.rb", "test/open_office_test.rb", "test/odf_merge_test.rb", "test/documentalist_test.rb", "test/rails_integration_test.rb", "test/test_helper.rb", "test/pdf_tools_test.rb", "test/net_pbm_test.rb"]
|
21
21
|
|
22
22
|
if s.respond_to? :specification_version then
|
23
23
|
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
data/init.rb
CHANGED
@@ -0,0 +1 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), %w{lib documentalist})
|
data/lib/backends/net_pbm.rb
CHANGED
@@ -1,10 +1,11 @@
|
|
1
|
-
# To change this template, choose Tools | Templates
|
2
|
-
# and open the template in the editor.
|
3
|
-
|
4
1
|
module Documentalist
|
5
2
|
module NetPBM
|
6
|
-
|
7
|
-
|
3
|
+
include Documentalist::Dependencies
|
4
|
+
|
5
|
+
depends_on_binaries! "ppmtojpeg" => "install netpbm package"
|
6
|
+
|
7
|
+
def self.convert(file, options)
|
8
|
+
system "ppmtojpeg #{file} > #{options[:to]}"
|
8
9
|
end
|
9
10
|
end
|
10
11
|
end
|
data/lib/backends/odf_merge.rb
CHANGED
data/lib/backends/open_office.rb
CHANGED
@@ -1,20 +1,40 @@
|
|
1
1
|
module Documentalist
|
2
2
|
module OpenOffice
|
3
|
+
include Documentalist::Dependencies
|
4
|
+
|
5
|
+
depends_on_binaries! "ps" => "use Documentalist in a Posix compliant OS",
|
6
|
+
Documentalist.config[:open_office][:path] => "install Open Office and correctly configure the path to its binary",
|
7
|
+
"pkill" => "install pkill binary and make it available through the PATH",
|
8
|
+
"pgrep" => "install pgrep binary and make it available through the PATH",
|
9
|
+
"java" => "install java",
|
10
|
+
"python" => "install python",
|
11
|
+
"file" => "install file binary",
|
12
|
+
"iconv" => "install iconv binary"
|
13
|
+
|
3
14
|
# Converts documents
|
4
15
|
def self.convert(origin, options)
|
16
|
+
Documentalist.logger.debug("Going to convert #{origin} to #{options[:to]}")
|
17
|
+
|
5
18
|
# See how to make OpenOffice startup as smooth as possible and not on first conversion
|
6
19
|
# OO auto-start option if in Rails app ?
|
7
20
|
Server.ensure_available
|
8
21
|
|
22
|
+
# TODO : manage multi OO instances : http://code.google.com/p/jodconverter/wiki/GettingStarted
|
23
|
+
|
9
24
|
Documentalist.timeout(Documentalist.config[:open_office][:max_conversion_time], :attempts => Documentalist.config[:open_office][:max_conversion_attempts]) do
|
10
25
|
if Documentalist.config[:open_office][:bridge] == 'JOD'
|
11
|
-
|
12
|
-
|
13
|
-
system("#{Documentalist.config[:java][:path]} -jar #{File.join(File.dirname(__FILE__), %w{open_office bridges jodconverter-2.2.2 lib jodconverter-cli-2.2.2.jar})} #{origin} #{options[:to]}")
|
26
|
+
command = "#{Documentalist.config[:java][:path]} -jar #{File.join(File.dirname(__FILE__), %w{open_office bridges jodconverter-2.2.2 lib jodconverter-cli-2.2.2.jar})} #{origin} #{options[:to]}"
|
14
27
|
elsif Documentalist.config[:open_office][:bridge] == 'PYOD'
|
15
|
-
|
28
|
+
command = "#{Documentalist.config[:python][:path]} #{File.join(File.dirname(__FILE__), %w{open_office bridges pyodconverter.py})} #{origin} #{options[:to]}"
|
16
29
|
end
|
17
30
|
|
31
|
+
if Documentalist.config[:log_file] and !Documentalist.config[:log_file].empty?
|
32
|
+
command += " >> #{Documentalist.config[:log_file]} 2>&1"
|
33
|
+
end
|
34
|
+
|
35
|
+
Documentalist.logger.debug("Going to run #{Documentalist.config[:open_office][:bridge]} bridge with command -- #{command}")
|
36
|
+
system(command)
|
37
|
+
|
18
38
|
self.convert_txt_to_utf8(options[:to]) if options[:to_format] == :txt
|
19
39
|
|
20
40
|
options[:to]
|
@@ -37,17 +57,18 @@ module Documentalist
|
|
37
57
|
|
38
58
|
# Restart if running or start new instance
|
39
59
|
def self.restart!
|
60
|
+
Documentalist.logger.debug("Restarting OpenOffice instance...")
|
40
61
|
(kill! if running?) and start!
|
62
|
+
Documentalist.logger.debug("...done !")
|
41
63
|
end
|
42
64
|
|
43
65
|
# Start new instance
|
44
66
|
def self.start!
|
67
|
+
Documentalist.logger.debug("Starting OpenOffice instance...")
|
45
68
|
raise "Already running!" if running?
|
46
69
|
|
47
|
-
log_path = Documentalist.config[:log_file] || "/dev/null"
|
48
|
-
|
49
70
|
command_line = "#{Documentalist.config[:open_office][:path]} -headless -accept=\"socket,host=127.0.0.1,port=8100\;urp\;\" -nofirststartwizard -nologo -nocrashreport -norestore -nolockcheck -nodefault"
|
50
|
-
command_line << " >> #{
|
71
|
+
command_line << " >> #{Documentalist.config[:log_file]} 2>&1"
|
51
72
|
command_line << " &"
|
52
73
|
|
53
74
|
system(command_line)
|
data/lib/backends/pdf_tools.rb
CHANGED
@@ -1,8 +1,9 @@
|
|
1
|
-
# To change this template, choose Tools | Templates
|
2
|
-
# and open the template in the editor.
|
3
|
-
|
4
1
|
module Documentalist
|
5
2
|
module PdfTools
|
3
|
+
include Documentalist::Dependencies
|
4
|
+
|
5
|
+
depends_on_binaries! "pdftotext" => "install pdftools package"
|
6
|
+
|
6
7
|
def self.convert(origin, options)
|
7
8
|
if system("pdftotext #{origin} #{options[:destination]} > /dev/null 2>&1")
|
8
9
|
options[:destination]
|
data/lib/dependencies.rb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
module Documentalist
|
2
|
+
module Dependencies
|
3
|
+
def self.included(base)
|
4
|
+
base.extend ClassMethods
|
5
|
+
end
|
6
|
+
|
7
|
+
module ClassMethods
|
8
|
+
def check_binary_dependency(binary, tip)
|
9
|
+
puts "Checking for presence of #{binary}... #{`which #{binary}`.empty? ? "Failed, you might want to #{tip}" : "OK"}"
|
10
|
+
end
|
11
|
+
|
12
|
+
def check_dependencies
|
13
|
+
@bin_dependencies.each { |k,v| check_binary_dependency(k,v) } if @bin_dependencies
|
14
|
+
end
|
15
|
+
|
16
|
+
def depends_on_binaries!(h)
|
17
|
+
@bin_dependencies = h
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
data/lib/documentalist.rb
CHANGED
@@ -2,11 +2,8 @@ require 'rubygems'
|
|
2
2
|
require 'yaml'
|
3
3
|
require 'system_timer'
|
4
4
|
require 'logger'
|
5
|
-
|
6
|
-
|
7
|
-
Dir.glob(File.join(File.dirname(__FILE__), 'backends', '*.rb')).each do |backend|
|
8
|
-
require backend
|
9
|
-
end
|
5
|
+
require 'kconv'
|
6
|
+
require File.join(File.dirname(__FILE__),'dependencies')
|
10
7
|
|
11
8
|
module Documentalist
|
12
9
|
@@config = {}
|
@@ -36,12 +33,11 @@ module Documentalist
|
|
36
33
|
end
|
37
34
|
|
38
35
|
BACKENDS = {
|
39
|
-
OpenOffice => {[:odt, :doc, :rtf, :docx, :txt, :html, :htm, :wps] => [:odt, :doc, :rtf, :pdf, :txt, :html, :htm, :wps]},
|
40
|
-
NetPBM => {:ppm => [:jpg, :jpeg]},
|
41
|
-
PdfTools => {:pdf => :txt},
|
42
|
-
|
43
36
|
# Find a better pattern to pick backend, this one smells pretty bad
|
44
|
-
|
37
|
+
:WkHtmlToPdf => {[:html, :htm] => :pdf},
|
38
|
+
:OpenOffice => {[:odt, :doc, :rtf, :docx, :txt, :wps] => [:odt, :doc, :rtf, :pdf, :txt, :html, :htm, :wps]},
|
39
|
+
:NetPBM => {:ppm => [:jpg, :jpeg]},
|
40
|
+
:PdfTools => {:pdf => :txt},
|
45
41
|
}
|
46
42
|
|
47
43
|
# Finds the relevant server to perform the conversion
|
@@ -49,7 +45,7 @@ module Documentalist
|
|
49
45
|
origin = origin.to_s.gsub(/.*\./, "").to_sym
|
50
46
|
destination = destination.to_s.gsub(/.*\./, "").to_sym
|
51
47
|
|
52
|
-
BACKENDS.detect do |s, conversions|
|
48
|
+
BACKENDS.map { |b| [send(:const_get, b[0]), b[1]] }.detect do |s, conversions|
|
53
49
|
conversions.keys.flatten.include?(origin) and conversions.values.flatten.include?(destination)
|
54
50
|
end.to_a.first
|
55
51
|
end
|
@@ -58,57 +54,57 @@ module Documentalist
|
|
58
54
|
def self.convert(file, options={})
|
59
55
|
raise "#{file} does not exist !" unless File.exist?(file)
|
60
56
|
|
61
|
-
|
62
|
-
raise Documentalist::Error.new("No destination or format was given")
|
63
|
-
end
|
64
|
-
|
65
|
-
# Convert to plain text by default
|
66
|
-
options[:to_format] = options[:to_format] ? options[:to_format].to_sym : :txt
|
67
|
-
|
68
|
-
unless options[:to]
|
57
|
+
if options[:to_format]
|
69
58
|
options[:to] = file.gsub(/#{"\\" + File.extname(file)}$/, ".#{options[:to_format].to_s}")
|
59
|
+
elsif options[:to]
|
60
|
+
options[:to_format] = File.extname(options[:to]).gsub(/\./, "").to_sym
|
61
|
+
else
|
62
|
+
raise Documentalist::Error.new("No destination or format was given")
|
70
63
|
end
|
71
64
|
|
72
65
|
options[:from_format] = File.extname(file).gsub(/\./, "").to_sym
|
73
66
|
|
74
67
|
backend = backend_for_conversion(options[:from_format], options[:to_format])
|
75
|
-
|
68
|
+
backend.convert(file, options)
|
76
69
|
|
77
|
-
yield(
|
78
|
-
|
70
|
+
yield(options[:to]) if block_given?
|
71
|
+
options[:to]
|
79
72
|
end
|
80
73
|
|
81
74
|
def self.extract_text(file)
|
82
|
-
converted = convert(file, :
|
75
|
+
converted = convert(file, :to_format => :txt)
|
76
|
+
|
83
77
|
if converted and File.exist?(converted)
|
84
|
-
text = File.open(converted).read
|
78
|
+
text = Kconv.toutf8(File.open(converted).read)
|
85
79
|
FileUtils.rm(converted)
|
86
|
-
|
87
|
-
yield(extracted_text) if block_given?
|
80
|
+
yield(text) if block_given?
|
88
81
|
text
|
89
82
|
end
|
90
83
|
end
|
91
84
|
|
92
85
|
def self.extract_images(file)
|
93
|
-
temp_dir = File.join(
|
86
|
+
temp_dir = File.join(Dir.tmpdir, rand(10**9).to_s)
|
94
87
|
|
95
88
|
if File.extname(file) == '.pdf'
|
96
89
|
temp_file = File.join(temp_dir, File.basename(file))
|
97
90
|
|
98
|
-
|
99
|
-
|
91
|
+
FileUtils.mkdir_p temp_dir
|
92
|
+
FileUtils.cp file, temp_file
|
93
|
+
|
94
|
+
system "pdfimages #{temp_file} '#{File.join(temp_dir, "img")}'"
|
100
95
|
|
101
96
|
Dir.glob(File.join(temp_dir, "*.ppm")).each do |ppm_image|
|
102
|
-
|
97
|
+
#raise ppm_image
|
98
|
+
Documentalist.convert(ppm_image, :to_format => :jpeg)
|
103
99
|
end
|
104
100
|
else
|
105
|
-
convert file, :
|
101
|
+
Documentalist.convert file, :to_format => :html
|
106
102
|
end
|
107
103
|
|
108
|
-
|
104
|
+
image_files = Dir.glob(File.join(temp_dir, "*.{jpg,jpeg,bmp,tif,tiff,gif,png}"))
|
109
105
|
|
110
|
-
yield(
|
111
|
-
|
106
|
+
yield(image_files) if block_given?
|
107
|
+
image_files
|
112
108
|
end
|
113
109
|
|
114
110
|
# Runs a block with a system-enforced timeout and optionally retry with an
|
@@ -130,8 +126,10 @@ module Documentalist
|
|
130
126
|
end
|
131
127
|
end
|
132
128
|
|
129
|
+
# Returns the logger object used to log documentalist operations
|
133
130
|
def self.logger
|
134
|
-
unless @@logger
|
131
|
+
unless @@logger
|
132
|
+
Documentalist.config[:log_file] ||= File.join(File.dirname(File.expand_path(__FILE__)), %w{.. documentalist.log})
|
135
133
|
@@logger = Logger.new(Documentalist.config[:log_file])
|
136
134
|
@@logger.level = Logger.const_get(config[:log_level] ? config[:log_level].upcase : "WARN")
|
137
135
|
end
|
@@ -139,6 +137,20 @@ module Documentalist
|
|
139
137
|
@@logger
|
140
138
|
end
|
141
139
|
|
140
|
+
# Checks the dependencies for backends
|
141
|
+
def self.check_dependencies
|
142
|
+
puts "Checking backends system dependencies"
|
143
|
+
|
144
|
+
Documentalist.constants.each do |backend|
|
145
|
+
backend = Documentalist.const_get backend.to_sym
|
146
|
+
|
147
|
+
if backend.respond_to? :check_dependencies
|
148
|
+
puts "Checking dependencies for #{backend.to_s}"
|
149
|
+
backend.send :check_dependencies
|
150
|
+
end
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
142
154
|
# Returns a new hash with recursively symbolized keys
|
143
155
|
def self.symbolize(hash)
|
144
156
|
hash.each_key do |key|
|
@@ -148,4 +160,9 @@ module Documentalist
|
|
148
160
|
end
|
149
161
|
|
150
162
|
class Error < RuntimeError; end
|
151
|
-
end
|
163
|
+
end
|
164
|
+
|
165
|
+
# Require all backends
|
166
|
+
Dir.glob(File.join(File.dirname(__FILE__), 'backends', '*.rb')).each do |backend|
|
167
|
+
require backend
|
168
|
+
end
|
data/rails/init.rb
CHANGED
@@ -1,12 +1,12 @@
|
|
1
1
|
require 'yaml'
|
2
2
|
|
3
3
|
require File.join(File.dirname(__FILE__), %w{initialize_configuration})
|
4
|
-
require File.join(File.dirname(__FILE__), %w{..
|
4
|
+
require File.join(File.dirname(__FILE__), %w{.. init})
|
5
5
|
|
6
6
|
# Load configuration from Rails.root/config/documentalist.yml
|
7
7
|
Documentalist.config_from_yaml! File.join(RAILS_ROOT, %w{config documentalist.yml}), :section => RAILS_ENV
|
8
8
|
|
9
9
|
# Set a default for the logfile if it hasn't been provided by the configuration file
|
10
|
-
unless Documentalist.config[:
|
11
|
-
Documentalist.config[:
|
10
|
+
unless Documentalist.config[:log_file]
|
11
|
+
Documentalist.config[:log_file] = File.join(RAILS_ROOT, "log", "documentalist-#{RAILS_ENV}.log")
|
12
12
|
end
|
data/test/documentalist_test.rb
CHANGED
@@ -18,8 +18,8 @@ class DocumentalistTest < Test::Unit::TestCase
|
|
18
18
|
:d => "e"
|
19
19
|
}
|
20
20
|
}
|
21
|
-
|
22
|
-
assert_equal Documentalist.send(:symbolize, hash),
|
21
|
+
|
22
|
+
assert_equal Documentalist.send(:symbolize, hash),
|
23
23
|
symbolized,
|
24
24
|
"Hash wasn't properly symbolized"
|
25
25
|
end
|
@@ -44,6 +44,11 @@ class DocumentalistTest < Test::Unit::TestCase
|
|
44
44
|
log_file = File.join(Dir.tmpdir, "#{rand(10 ** 9).to_s}.log")
|
45
45
|
|
46
46
|
Documentalist.config[:log_file] = log_file
|
47
|
+
Documentalist.config[:log_level] = 'warn'
|
48
|
+
|
49
|
+
# Reset logger
|
50
|
+
Documentalist.send :class_variable_set, :@@logger, nil
|
51
|
+
|
47
52
|
assert !File.exists?(log_file), "Log file already exists"
|
48
53
|
|
49
54
|
Documentalist.logger
|
@@ -56,8 +61,17 @@ class DocumentalistTest < Test::Unit::TestCase
|
|
56
61
|
assert_difference("File.size(\"#{log_file}\")", nil, "Nothing should have been written") do
|
57
62
|
Documentalist.logger.warn("This message should be written !")
|
58
63
|
end
|
59
|
-
|
64
|
+
|
65
|
+
# Reset logger
|
66
|
+
Documentalist.send :class_variable_set, :@@logger, nil
|
67
|
+
|
60
68
|
FileUtils.rm(log_file)
|
61
69
|
assert !File.exists?(log_file), "Log file hasn't been removed properly"
|
62
70
|
end
|
71
|
+
|
72
|
+
def test_extract_text
|
73
|
+
assert_match /thing/,
|
74
|
+
Documentalist.extract_text(fixture_001),
|
75
|
+
"Text was not properly extracted for fixture 001"
|
76
|
+
end
|
63
77
|
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
3
|
+
<html xmlns="http://www.w3.org/1999/xhtml">
|
4
|
+
<head>
|
5
|
+
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
|
6
|
+
<title>Title</title>
|
7
|
+
</head>
|
8
|
+
<body>
|
9
|
+
<p>
|
10
|
+
Test content
|
11
|
+
</p>
|
12
|
+
</body>
|
13
|
+
</html>
|
@@ -28,11 +28,13 @@ class RailsIntegrationTest < Test::Unit::TestCase
|
|
28
28
|
assert File.exists?(File.join(RAILS_ROOT, %w{config documentalist.yml})),
|
29
29
|
"Configuration file did not get copied properly"
|
30
30
|
|
31
|
-
assert_equal Documentalist.config[:
|
31
|
+
assert_equal Documentalist.config[:log_file], File.join(RAILS_ROOT, "log", "documentalist-#{RAILS_ENV}.log")
|
32
32
|
|
33
33
|
# Delete fake RAILS_ROOT
|
34
34
|
FileUtils.rm_rf tmp_dir
|
35
35
|
|
36
|
+
# Reset logger
|
37
|
+
|
36
38
|
# Check that we cleaned our mess up
|
37
39
|
assert !File.exist?(File.join(RAILS_ROOT, %w{config documentalist.yml})), "Temporary file hasn't been removed properly"
|
38
40
|
end
|
data/test/test_helper.rb
CHANGED
@@ -7,6 +7,10 @@ def fixture_001
|
|
7
7
|
File.join(File.dirname(__FILE__), "fixtures/fixture_001.odt")
|
8
8
|
end
|
9
9
|
|
10
|
+
def fixture_002
|
11
|
+
File.join(File.dirname(__FILE__), "fixtures/fixture_002.html")
|
12
|
+
end
|
13
|
+
|
10
14
|
class Test::Unit::TestCase
|
11
15
|
def assert_difference(code, difference = 0, message = nil)
|
12
16
|
message = "Returned values were equal" unless message
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
class WkHtmlToPdfTest < Test::Unit::TestCase
|
4
|
+
def test_right_backend_is_picked
|
5
|
+
assert_equal Documentalist.backend_for_conversion("test.html", "test.pdf"),
|
6
|
+
Documentalist::WkHtmlToPdf,
|
7
|
+
"Wrong backend picked"
|
8
|
+
end
|
9
|
+
|
10
|
+
def test_conversion
|
11
|
+
temp_file = File.join(Dir.tmpdir, "#{rand(10**9)}.pdf")
|
12
|
+
|
13
|
+
Documentalist.convert(fixture_002, :to => temp_file)
|
14
|
+
assert File.exists?(temp_file), "No converted PDF created"
|
15
|
+
|
16
|
+
assert_match /Test content/, Documentalist.extract_text(temp_file)
|
17
|
+
|
18
|
+
FileUtils.rm temp_file
|
19
|
+
assert !File.exists?(temp_file), "We didn't clean up properly"
|
20
|
+
end
|
21
|
+
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: documentalist
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 31
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 1
|
9
|
-
-
|
10
|
-
version: 0.1.
|
9
|
+
- 2
|
10
|
+
version: 0.1.2
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- David FRANCOIS
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2010-
|
18
|
+
date: 2010-07-04 00:00:00 +02:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
@@ -102,6 +102,8 @@ extra_rdoc_files:
|
|
102
102
|
- lib/backends/open_office/bridges/pyodconverter.py
|
103
103
|
- lib/backends/open_office/server.rb
|
104
104
|
- lib/backends/pdf_tools.rb
|
105
|
+
- lib/backends/wkhtmltopdf.rb
|
106
|
+
- lib/dependencies.rb
|
105
107
|
- lib/documentalist.rb
|
106
108
|
- lib/tasks/tasks.rb
|
107
109
|
files:
|
@@ -139,6 +141,8 @@ files:
|
|
139
141
|
- lib/backends/open_office/bridges/pyodconverter.py
|
140
142
|
- lib/backends/open_office/server.rb
|
141
143
|
- lib/backends/pdf_tools.rb
|
144
|
+
- lib/backends/wkhtmltopdf.rb
|
145
|
+
- lib/dependencies.rb
|
142
146
|
- lib/documentalist.rb
|
143
147
|
- lib/tasks/tasks.rb
|
144
148
|
- rails/config/documentalist.yml.tpl
|
@@ -146,12 +150,14 @@ files:
|
|
146
150
|
- rails/initialize_configuration.rb
|
147
151
|
- test/documentalist_test.rb
|
148
152
|
- test/fixtures/fixture_001.odt
|
153
|
+
- test/fixtures/fixture_002.html
|
149
154
|
- test/net_pbm_test.rb
|
150
155
|
- test/odf_merge_test.rb
|
151
156
|
- test/open_office_test.rb
|
152
157
|
- test/pdf_tools_test.rb
|
153
158
|
- test/rails_integration_test.rb
|
154
159
|
- test/test_helper.rb
|
160
|
+
- test/wkhtmltopdf_test.rb
|
155
161
|
- Manifest
|
156
162
|
has_rdoc: true
|
157
163
|
homepage: http://github.com/davout/documentalist
|
@@ -194,6 +200,7 @@ signing_key:
|
|
194
200
|
specification_version: 3
|
195
201
|
summary: The smooth document management experience, usable as a Rails gem plugin or standalone in any ruby application
|
196
202
|
test_files:
|
203
|
+
- test/wkhtmltopdf_test.rb
|
197
204
|
- test/open_office_test.rb
|
198
205
|
- test/odf_merge_test.rb
|
199
206
|
- test/documentalist_test.rb
|