documentalist 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Manifest CHANGED
@@ -32,6 +32,8 @@ lib/backends/open_office/bridges/jodconverter-2.2.2/src/jodconverter-cli-2.2.2-s
32
32
  lib/backends/open_office/bridges/pyodconverter.py
33
33
  lib/backends/open_office/server.rb
34
34
  lib/backends/pdf_tools.rb
35
+ lib/backends/wkhtmltopdf.rb
36
+ lib/dependencies.rb
35
37
  lib/documentalist.rb
36
38
  lib/tasks/tasks.rb
37
39
  rails/config/documentalist.yml.tpl
@@ -39,10 +41,12 @@ rails/init.rb
39
41
  rails/initialize_configuration.rb
40
42
  test/documentalist_test.rb
41
43
  test/fixtures/fixture_001.odt
44
+ test/fixtures/fixture_002.html
42
45
  test/net_pbm_test.rb
43
46
  test/odf_merge_test.rb
44
47
  test/open_office_test.rb
45
48
  test/pdf_tools_test.rb
46
49
  test/rails_integration_test.rb
47
50
  test/test_helper.rb
51
+ test/wkhtmltopdf_test.rb
48
52
  Manifest
data/README.rdoc CHANGED
@@ -1,3 +1,15 @@
1
1
  = Documentalist
2
+ Rails gem for easily managing documents, converting them from a format to another,
3
+ and merging data into ODF templates.
2
4
 
3
- Rails gem for talking to OpenOffice and merging data into OpenDocument templates using an ERB-like syntax.
5
+ = Checking for external dependencies
6
+ Documentalist assembles various moving parts that are necessary to perform the actual
7
+ conversions, you can run run the documentalist:backends:checks task to see if these
8
+ dependencies are met on your system, if it isn't the case it will give you some tips
9
+ on how to fix it.
10
+
11
+ = Examples
12
+ Documentalist.convert('/home/somefile.doc', :to => '/home/someotherfile.pdf')
13
+
14
+ = Installation
15
+ Require the gem in your Rails or Ruby application and you should be good to go !
data/Rakefile CHANGED
@@ -5,7 +5,7 @@ require 'rake'
5
5
  require 'echoe' rescue nil
6
6
 
7
7
  if Object.const_defined? :Echoe
8
- Echoe.new('documentalist', '0.1.1') do |p|
8
+ Echoe.new('documentalist', '0.1.2') do |p|
9
9
  p.description = "The smooth document management experience, usable as a Rails gem plugin or standalone in any ruby application"
10
10
  p.url = "http://github.com/davout/documentalist"
11
11
  p.author = "David FRANCOIS"
@@ -14,19 +14,15 @@ if Object.const_defined? :Echoe
14
14
  p.test_pattern = "test/**/*.rb"
15
15
  p.development_dependencies = ['flexmock >=0.8.6']
16
16
  p.runtime_dependencies = ['zip >=2.0.2', 'SystemTimer >=1.2']
17
+ end
18
+ end
17
19
 
18
- # TODO : Enforce some dependencies and don't make backend available if
19
- # the dependency is not met
20
- #
21
- # wkhtmltopdf
22
- # iconv
23
- # pgrep
24
- # ps
25
- # pkill
26
- # grep
27
- # file
28
- # which
29
- # jod converter
30
- # open office 3
20
+ namespace :documentalist do
21
+ namespace :backends do
22
+ desc "Checks that the required dependencies are met for the different backends"
23
+ task :checks do
24
+ require File.join(File.dirname(__FILE__), "init")
25
+ Documentalist.check_dependencies
26
+ end
31
27
  end
32
28
  end
data/config/default.yml CHANGED
@@ -1,5 +1,5 @@
1
1
  # Default configuration in case none is provided
2
- log_path: ~/documentalist.log
2
+ log_level: debug
3
3
 
4
4
  python:
5
5
  path: /usr/bin/python
@@ -12,6 +12,6 @@ open_office:
12
12
  bridge: JOD
13
13
  max_cpu: 80
14
14
  max_startup_time: 3
15
- wakeup_time: 3
15
+ wakeup_time: 2
16
16
  max_conversion_attempts: 3
17
17
  max_conversion_time: 5
@@ -2,22 +2,22 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = %q{documentalist}
5
- s.version = "0.1.1"
5
+ s.version = "0.1.2"
6
6
 
7
7
  s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
8
8
  s.authors = ["David FRANCOIS"]
9
- s.date = %q{2010-06-30}
9
+ s.date = %q{2010-07-04}
10
10
  s.description = %q{The smooth document management experience, usable as a Rails gem plugin or standalone in any ruby application}
11
11
  s.email = %q{david.francois@webflows.fr}
12
- s.extra_rdoc_files = ["README.rdoc", "lib/backends/net_pbm.rb", "lib/backends/odf_merge.rb", "lib/backends/open_office.rb", "lib/backends/open_office/bridges/jodconverter-2.2.2/ChangeLog.txt", "lib/backends/open_office/bridges/jodconverter-2.2.2/LICENSE.txt", "lib/backends/open_office/bridges/jodconverter-2.2.2/README.txt", "lib/backends/open_office/bridges/jodconverter-2.2.2/docs/jodconverter-2.2.2-javadoc.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/docs/third-party-licenses/license-commons-io.txt", "lib/backends/open_office/bridges/jodconverter-2.2.2/docs/third-party-licenses/license-openoffice.org.txt", "lib/backends/open_office/bridges/jodconverter-2.2.2/docs/third-party-licenses/license-slf4j.txt", "lib/backends/open_office/bridges/jodconverter-2.2.2/docs/third-party-licenses/license-xstream.txt", "lib/backends/open_office/bridges/jodconverter-2.2.2/document-formats.xml", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/DEPENDENCIES.txt", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/commons-cli-1.2.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/commons-io-1.4.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/jodconverter-2.2.2.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/jodconverter-cli-2.2.2.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/juh-3.0.1.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/jurt-3.0.1.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/ridl-3.0.1.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/slf4j-api-1.5.6.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/slf4j-jdk14-1.5.6.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/unoil-3.0.1.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/xstream-1.3.1.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/src/jodconverter-2.2.2-sources.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/src/jodconverter-cli-2.2.2-sources.jar", "lib/backends/open_office/bridges/pyodconverter.py", "lib/backends/open_office/server.rb", "lib/backends/pdf_tools.rb", "lib/documentalist.rb", "lib/tasks/tasks.rb"]
13
- s.files = ["README.rdoc", "Rakefile", "config/default.yml", "documentalist.gemspec", "init.rb", "lib/backends/net_pbm.rb", "lib/backends/odf_merge.rb", "lib/backends/open_office.rb", "lib/backends/open_office/bridges/jodconverter-2.2.2/ChangeLog.txt", "lib/backends/open_office/bridges/jodconverter-2.2.2/LICENSE.txt", "lib/backends/open_office/bridges/jodconverter-2.2.2/README.txt", "lib/backends/open_office/bridges/jodconverter-2.2.2/docs/jodconverter-2.2.2-javadoc.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/docs/third-party-licenses/license-commons-io.txt", "lib/backends/open_office/bridges/jodconverter-2.2.2/docs/third-party-licenses/license-openoffice.org.txt", "lib/backends/open_office/bridges/jodconverter-2.2.2/docs/third-party-licenses/license-slf4j.txt", "lib/backends/open_office/bridges/jodconverter-2.2.2/docs/third-party-licenses/license-xstream.txt", "lib/backends/open_office/bridges/jodconverter-2.2.2/document-formats.xml", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/DEPENDENCIES.txt", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/commons-cli-1.2.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/commons-io-1.4.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/jodconverter-2.2.2.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/jodconverter-cli-2.2.2.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/juh-3.0.1.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/jurt-3.0.1.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/ridl-3.0.1.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/slf4j-api-1.5.6.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/slf4j-jdk14-1.5.6.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/unoil-3.0.1.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/xstream-1.3.1.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/src/jodconverter-2.2.2-sources.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/src/jodconverter-cli-2.2.2-sources.jar", "lib/backends/open_office/bridges/pyodconverter.py", "lib/backends/open_office/server.rb", "lib/backends/pdf_tools.rb", "lib/documentalist.rb", "lib/tasks/tasks.rb", "rails/config/documentalist.yml.tpl", "rails/init.rb", "rails/initialize_configuration.rb", "test/documentalist_test.rb", "test/fixtures/fixture_001.odt", "test/net_pbm_test.rb", "test/odf_merge_test.rb", "test/open_office_test.rb", "test/pdf_tools_test.rb", "test/rails_integration_test.rb", "test/test_helper.rb", "Manifest"]
12
+ s.extra_rdoc_files = ["README.rdoc", "lib/backends/net_pbm.rb", "lib/backends/odf_merge.rb", "lib/backends/open_office.rb", "lib/backends/open_office/bridges/jodconverter-2.2.2/ChangeLog.txt", "lib/backends/open_office/bridges/jodconverter-2.2.2/LICENSE.txt", "lib/backends/open_office/bridges/jodconverter-2.2.2/README.txt", "lib/backends/open_office/bridges/jodconverter-2.2.2/docs/jodconverter-2.2.2-javadoc.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/docs/third-party-licenses/license-commons-io.txt", "lib/backends/open_office/bridges/jodconverter-2.2.2/docs/third-party-licenses/license-openoffice.org.txt", "lib/backends/open_office/bridges/jodconverter-2.2.2/docs/third-party-licenses/license-slf4j.txt", "lib/backends/open_office/bridges/jodconverter-2.2.2/docs/third-party-licenses/license-xstream.txt", "lib/backends/open_office/bridges/jodconverter-2.2.2/document-formats.xml", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/DEPENDENCIES.txt", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/commons-cli-1.2.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/commons-io-1.4.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/jodconverter-2.2.2.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/jodconverter-cli-2.2.2.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/juh-3.0.1.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/jurt-3.0.1.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/ridl-3.0.1.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/slf4j-api-1.5.6.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/slf4j-jdk14-1.5.6.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/unoil-3.0.1.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/xstream-1.3.1.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/src/jodconverter-2.2.2-sources.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/src/jodconverter-cli-2.2.2-sources.jar", "lib/backends/open_office/bridges/pyodconverter.py", "lib/backends/open_office/server.rb", "lib/backends/pdf_tools.rb", "lib/backends/wkhtmltopdf.rb", "lib/dependencies.rb", "lib/documentalist.rb", "lib/tasks/tasks.rb"]
13
+ s.files = ["README.rdoc", "Rakefile", "config/default.yml", "documentalist.gemspec", "init.rb", "lib/backends/net_pbm.rb", "lib/backends/odf_merge.rb", "lib/backends/open_office.rb", "lib/backends/open_office/bridges/jodconverter-2.2.2/ChangeLog.txt", "lib/backends/open_office/bridges/jodconverter-2.2.2/LICENSE.txt", "lib/backends/open_office/bridges/jodconverter-2.2.2/README.txt", "lib/backends/open_office/bridges/jodconverter-2.2.2/docs/jodconverter-2.2.2-javadoc.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/docs/third-party-licenses/license-commons-io.txt", "lib/backends/open_office/bridges/jodconverter-2.2.2/docs/third-party-licenses/license-openoffice.org.txt", "lib/backends/open_office/bridges/jodconverter-2.2.2/docs/third-party-licenses/license-slf4j.txt", "lib/backends/open_office/bridges/jodconverter-2.2.2/docs/third-party-licenses/license-xstream.txt", "lib/backends/open_office/bridges/jodconverter-2.2.2/document-formats.xml", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/DEPENDENCIES.txt", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/commons-cli-1.2.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/commons-io-1.4.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/jodconverter-2.2.2.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/jodconverter-cli-2.2.2.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/juh-3.0.1.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/jurt-3.0.1.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/ridl-3.0.1.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/slf4j-api-1.5.6.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/slf4j-jdk14-1.5.6.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/unoil-3.0.1.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/lib/xstream-1.3.1.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/src/jodconverter-2.2.2-sources.jar", "lib/backends/open_office/bridges/jodconverter-2.2.2/src/jodconverter-cli-2.2.2-sources.jar", "lib/backends/open_office/bridges/pyodconverter.py", "lib/backends/open_office/server.rb", "lib/backends/pdf_tools.rb", "lib/backends/wkhtmltopdf.rb", "lib/dependencies.rb", "lib/documentalist.rb", "lib/tasks/tasks.rb", "rails/config/documentalist.yml.tpl", "rails/init.rb", "rails/initialize_configuration.rb", "test/documentalist_test.rb", "test/fixtures/fixture_001.odt", "test/fixtures/fixture_002.html", "test/net_pbm_test.rb", "test/odf_merge_test.rb", "test/open_office_test.rb", "test/pdf_tools_test.rb", "test/rails_integration_test.rb", "test/test_helper.rb", "test/wkhtmltopdf_test.rb", "Manifest"]
14
14
  s.homepage = %q{http://github.com/davout/documentalist}
15
15
  s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Documentalist", "--main", "README.rdoc"]
16
16
  s.require_paths = ["lib"]
17
17
  s.rubyforge_project = %q{documentalist}
18
18
  s.rubygems_version = %q{1.3.7}
19
19
  s.summary = %q{The smooth document management experience, usable as a Rails gem plugin or standalone in any ruby application}
20
- s.test_files = ["test/open_office_test.rb", "test/odf_merge_test.rb", "test/documentalist_test.rb", "test/rails_integration_test.rb", "test/test_helper.rb", "test/pdf_tools_test.rb", "test/net_pbm_test.rb"]
20
+ s.test_files = ["test/wkhtmltopdf_test.rb", "test/open_office_test.rb", "test/odf_merge_test.rb", "test/documentalist_test.rb", "test/rails_integration_test.rb", "test/test_helper.rb", "test/pdf_tools_test.rb", "test/net_pbm_test.rb"]
21
21
 
22
22
  if s.respond_to? :specification_version then
23
23
  current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
data/init.rb CHANGED
@@ -0,0 +1 @@
1
+ require File.join(File.dirname(__FILE__), %w{lib documentalist})
@@ -1,10 +1,11 @@
1
- # To change this template, choose Tools | Templates
2
- # and open the template in the editor.
3
-
4
1
  module Documentalist
5
2
  module NetPBM
6
- def convert
7
- system("cd #{temp_dir} && ppmtojpeg #{ppm_image} > #{ppm_image.gsub(/ppm$/, "jpg")}")
3
+ include Documentalist::Dependencies
4
+
5
+ depends_on_binaries! "ppmtojpeg" => "install netpbm package"
6
+
7
+ def self.convert(file, options)
8
+ system "ppmtojpeg #{file} > #{options[:to]}"
8
9
  end
9
10
  end
10
11
  end
@@ -10,6 +10,8 @@ module Documentalist
10
10
 
11
11
  # This module provides open document merge functionality
12
12
  module ODFMerge
13
+ include Documentalist::Dependencies
14
+
13
15
  def self.merge_string(string, options = {})
14
16
  locals = options[:locals]
15
17
 
@@ -1,20 +1,40 @@
1
1
  module Documentalist
2
2
  module OpenOffice
3
+ include Documentalist::Dependencies
4
+
5
+ depends_on_binaries! "ps" => "use Documentalist in a Posix compliant OS",
6
+ Documentalist.config[:open_office][:path] => "install Open Office and correctly configure the path to its binary",
7
+ "pkill" => "install pkill binary and make it available through the PATH",
8
+ "pgrep" => "install pgrep binary and make it available through the PATH",
9
+ "java" => "install java",
10
+ "python" => "install python",
11
+ "file" => "install file binary",
12
+ "iconv" => "install iconv binary"
13
+
3
14
  # Converts documents
4
15
  def self.convert(origin, options)
16
+ Documentalist.logger.debug("Going to convert #{origin} to #{options[:to]}")
17
+
5
18
  # See how to make OpenOffice startup as smooth as possible and not on first conversion
6
19
  # OO auto-start option if in Rails app ?
7
20
  Server.ensure_available
8
21
 
22
+ # TODO : manage multi OO instances : http://code.google.com/p/jodconverter/wiki/GettingStarted
23
+
9
24
  Documentalist.timeout(Documentalist.config[:open_office][:max_conversion_time], :attempts => Documentalist.config[:open_office][:max_conversion_attempts]) do
10
25
  if Documentalist.config[:open_office][:bridge] == 'JOD'
11
- # TODO : manage multi ooo instances : http://code.google.com/p/jodconverter/wiki/GettingStarted
12
-
13
- system("#{Documentalist.config[:java][:path]} -jar #{File.join(File.dirname(__FILE__), %w{open_office bridges jodconverter-2.2.2 lib jodconverter-cli-2.2.2.jar})} #{origin} #{options[:to]}")
26
+ command = "#{Documentalist.config[:java][:path]} -jar #{File.join(File.dirname(__FILE__), %w{open_office bridges jodconverter-2.2.2 lib jodconverter-cli-2.2.2.jar})} #{origin} #{options[:to]}"
14
27
  elsif Documentalist.config[:open_office][:bridge] == 'PYOD'
15
- system("#{Documentalist.config[:python][:path]} #{File.join(File.dirname(__FILE__), %w{open_office bridges pyodconverter.py})} #{origin} #{options[:to]}")
28
+ command = "#{Documentalist.config[:python][:path]} #{File.join(File.dirname(__FILE__), %w{open_office bridges pyodconverter.py})} #{origin} #{options[:to]}"
16
29
  end
17
30
 
31
+ if Documentalist.config[:log_file] and !Documentalist.config[:log_file].empty?
32
+ command += " >> #{Documentalist.config[:log_file]} 2>&1"
33
+ end
34
+
35
+ Documentalist.logger.debug("Going to run #{Documentalist.config[:open_office][:bridge]} bridge with command -- #{command}")
36
+ system(command)
37
+
18
38
  self.convert_txt_to_utf8(options[:to]) if options[:to_format] == :txt
19
39
 
20
40
  options[:to]
@@ -37,17 +57,18 @@ module Documentalist
37
57
 
38
58
  # Restart if running or start new instance
39
59
  def self.restart!
60
+ Documentalist.logger.debug("Restarting OpenOffice instance...")
40
61
  (kill! if running?) and start!
62
+ Documentalist.logger.debug("...done !")
41
63
  end
42
64
 
43
65
  # Start new instance
44
66
  def self.start!
67
+ Documentalist.logger.debug("Starting OpenOffice instance...")
45
68
  raise "Already running!" if running?
46
69
 
47
- log_path = Documentalist.config[:log_file] || "/dev/null"
48
-
49
70
  command_line = "#{Documentalist.config[:open_office][:path]} -headless -accept=\"socket,host=127.0.0.1,port=8100\;urp\;\" -nofirststartwizard -nologo -nocrashreport -norestore -nolockcheck -nodefault"
50
- command_line << " >> #{log_path} 2>&1"
71
+ command_line << " >> #{Documentalist.config[:log_file]} 2>&1"
51
72
  command_line << " &"
52
73
 
53
74
  system(command_line)
@@ -1,8 +1,9 @@
1
- # To change this template, choose Tools | Templates
2
- # and open the template in the editor.
3
-
4
1
  module Documentalist
5
2
  module PdfTools
3
+ include Documentalist::Dependencies
4
+
5
+ depends_on_binaries! "pdftotext" => "install pdftools package"
6
+
6
7
  def self.convert(origin, options)
7
8
  if system("pdftotext #{origin} #{options[:destination]} > /dev/null 2>&1")
8
9
  options[:destination]
@@ -0,0 +1,11 @@
1
+ module Documentalist
2
+ module WkHtmlToPdf
3
+ include Documentalist::Dependencies
4
+
5
+ depends_on_binaries! "wkhtmltopdf" => "install wkhtmltopdf package"
6
+
7
+ def self.convert(file, options)
8
+ system "wkhtmltopdf -q #{file} #{options[:to]}"
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,21 @@
1
+ module Documentalist
2
+ module Dependencies
3
+ def self.included(base)
4
+ base.extend ClassMethods
5
+ end
6
+
7
+ module ClassMethods
8
+ def check_binary_dependency(binary, tip)
9
+ puts "Checking for presence of #{binary}... #{`which #{binary}`.empty? ? "Failed, you might want to #{tip}" : "OK"}"
10
+ end
11
+
12
+ def check_dependencies
13
+ @bin_dependencies.each { |k,v| check_binary_dependency(k,v) } if @bin_dependencies
14
+ end
15
+
16
+ def depends_on_binaries!(h)
17
+ @bin_dependencies = h
18
+ end
19
+ end
20
+ end
21
+ end
data/lib/documentalist.rb CHANGED
@@ -2,11 +2,8 @@ require 'rubygems'
2
2
  require 'yaml'
3
3
  require 'system_timer'
4
4
  require 'logger'
5
-
6
- # Require all backends
7
- Dir.glob(File.join(File.dirname(__FILE__), 'backends', '*.rb')).each do |backend|
8
- require backend
9
- end
5
+ require 'kconv'
6
+ require File.join(File.dirname(__FILE__),'dependencies')
10
7
 
11
8
  module Documentalist
12
9
  @@config = {}
@@ -36,12 +33,11 @@ module Documentalist
36
33
  end
37
34
 
38
35
  BACKENDS = {
39
- OpenOffice => {[:odt, :doc, :rtf, :docx, :txt, :html, :htm, :wps] => [:odt, :doc, :rtf, :pdf, :txt, :html, :htm, :wps]},
40
- NetPBM => {:ppm => [:jpg, :jpeg]},
41
- PdfTools => {:pdf => :txt},
42
-
43
36
  # Find a better pattern to pick backend, this one smells pretty bad
44
- # WkHTML2PDF => {[:html, :htm] => :pdf}
37
+ :WkHtmlToPdf => {[:html, :htm] => :pdf},
38
+ :OpenOffice => {[:odt, :doc, :rtf, :docx, :txt, :wps] => [:odt, :doc, :rtf, :pdf, :txt, :html, :htm, :wps]},
39
+ :NetPBM => {:ppm => [:jpg, :jpeg]},
40
+ :PdfTools => {:pdf => :txt},
45
41
  }
46
42
 
47
43
  # Finds the relevant server to perform the conversion
@@ -49,7 +45,7 @@ module Documentalist
49
45
  origin = origin.to_s.gsub(/.*\./, "").to_sym
50
46
  destination = destination.to_s.gsub(/.*\./, "").to_sym
51
47
 
52
- BACKENDS.detect do |s, conversions|
48
+ BACKENDS.map { |b| [send(:const_get, b[0]), b[1]] }.detect do |s, conversions|
53
49
  conversions.keys.flatten.include?(origin) and conversions.values.flatten.include?(destination)
54
50
  end.to_a.first
55
51
  end
@@ -58,57 +54,57 @@ module Documentalist
58
54
  def self.convert(file, options={})
59
55
  raise "#{file} does not exist !" unless File.exist?(file)
60
56
 
61
- unless options[:to] or options[:to_format]
62
- raise Documentalist::Error.new("No destination or format was given")
63
- end
64
-
65
- # Convert to plain text by default
66
- options[:to_format] = options[:to_format] ? options[:to_format].to_sym : :txt
67
-
68
- unless options[:to]
57
+ if options[:to_format]
69
58
  options[:to] = file.gsub(/#{"\\" + File.extname(file)}$/, ".#{options[:to_format].to_s}")
59
+ elsif options[:to]
60
+ options[:to_format] = File.extname(options[:to]).gsub(/\./, "").to_sym
61
+ else
62
+ raise Documentalist::Error.new("No destination or format was given")
70
63
  end
71
64
 
72
65
  options[:from_format] = File.extname(file).gsub(/\./, "").to_sym
73
66
 
74
67
  backend = backend_for_conversion(options[:from_format], options[:to_format])
75
- converted = backend.convert(file, options)
68
+ backend.convert(file, options)
76
69
 
77
- yield(converted) if block_given?
78
- converted
70
+ yield(options[:to]) if block_given?
71
+ options[:to]
79
72
  end
80
73
 
81
74
  def self.extract_text(file)
82
- converted = convert(file, :to => :txt)
75
+ converted = convert(file, :to_format => :txt)
76
+
83
77
  if converted and File.exist?(converted)
84
- text = File.open(converted).read.toutf8
78
+ text = Kconv.toutf8(File.open(converted).read)
85
79
  FileUtils.rm(converted)
86
-
87
- yield(extracted_text) if block_given?
80
+ yield(text) if block_given?
88
81
  text
89
82
  end
90
83
  end
91
84
 
92
85
  def self.extract_images(file)
93
- temp_dir = File.join(CONVERSIONS_PATH, (Time.new.to_f*100_000).to_i.to_s)
86
+ temp_dir = File.join(Dir.tmpdir, rand(10**9).to_s)
94
87
 
95
88
  if File.extname(file) == '.pdf'
96
89
  temp_file = File.join(temp_dir, File.basename(file))
97
90
 
98
- system "mkdir #{temp_dir} && cp #{file} #{temp_file}"
99
- system "cd #{temp_dir} && pdfimages #{temp_file} 'img'"
91
+ FileUtils.mkdir_p temp_dir
92
+ FileUtils.cp file, temp_file
93
+
94
+ system "pdfimages #{temp_file} '#{File.join(temp_dir, "img")}'"
100
95
 
101
96
  Dir.glob(File.join(temp_dir, "*.ppm")).each do |ppm_image|
102
- Documentalist.convert(ppm_image, :to => :jpeg)
97
+ #raise ppm_image
98
+ Documentalist.convert(ppm_image, :to_format => :jpeg)
103
99
  end
104
100
  else
105
- convert file, :to => :html, :directory => temp_dir
101
+ Documentalist.convert file, :to_format => :html
106
102
  end
107
103
 
108
- image_file_names = Dir.glob(File.join(temp_dir, "*.{jpg,jpeg,bmp,tif,tiff,gif,png}"))
104
+ image_files = Dir.glob(File.join(temp_dir, "*.{jpg,jpeg,bmp,tif,tiff,gif,png}"))
109
105
 
110
- yield(image_file_names) if block_given?
111
- image_file_names
106
+ yield(image_files) if block_given?
107
+ image_files
112
108
  end
113
109
 
114
110
  # Runs a block with a system-enforced timeout and optionally retry with an
@@ -130,8 +126,10 @@ module Documentalist
130
126
  end
131
127
  end
132
128
 
129
+ # Returns the logger object used to log documentalist operations
133
130
  def self.logger
134
- unless @@logger
131
+ unless @@logger
132
+ Documentalist.config[:log_file] ||= File.join(File.dirname(File.expand_path(__FILE__)), %w{.. documentalist.log})
135
133
  @@logger = Logger.new(Documentalist.config[:log_file])
136
134
  @@logger.level = Logger.const_get(config[:log_level] ? config[:log_level].upcase : "WARN")
137
135
  end
@@ -139,6 +137,20 @@ module Documentalist
139
137
  @@logger
140
138
  end
141
139
 
140
+ # Checks the dependencies for backends
141
+ def self.check_dependencies
142
+ puts "Checking backends system dependencies"
143
+
144
+ Documentalist.constants.each do |backend|
145
+ backend = Documentalist.const_get backend.to_sym
146
+
147
+ if backend.respond_to? :check_dependencies
148
+ puts "Checking dependencies for #{backend.to_s}"
149
+ backend.send :check_dependencies
150
+ end
151
+ end
152
+ end
153
+
142
154
  # Returns a new hash with recursively symbolized keys
143
155
  def self.symbolize(hash)
144
156
  hash.each_key do |key|
@@ -148,4 +160,9 @@ module Documentalist
148
160
  end
149
161
 
150
162
  class Error < RuntimeError; end
151
- end
163
+ end
164
+
165
+ # Require all backends
166
+ Dir.glob(File.join(File.dirname(__FILE__), 'backends', '*.rb')).each do |backend|
167
+ require backend
168
+ end
data/rails/init.rb CHANGED
@@ -1,12 +1,12 @@
1
1
  require 'yaml'
2
2
 
3
3
  require File.join(File.dirname(__FILE__), %w{initialize_configuration})
4
- require File.join(File.dirname(__FILE__), %w{.. lib documentalist})
4
+ require File.join(File.dirname(__FILE__), %w{.. init})
5
5
 
6
6
  # Load configuration from Rails.root/config/documentalist.yml
7
7
  Documentalist.config_from_yaml! File.join(RAILS_ROOT, %w{config documentalist.yml}), :section => RAILS_ENV
8
8
 
9
9
  # Set a default for the logfile if it hasn't been provided by the configuration file
10
- unless Documentalist.config[:logfile]
11
- Documentalist.config[:logfile] = File.join(RAILS_ROOT, %w{log documentalist-#{RAILS_ENV}.log})
10
+ unless Documentalist.config[:log_file]
11
+ Documentalist.config[:log_file] = File.join(RAILS_ROOT, "log", "documentalist-#{RAILS_ENV}.log")
12
12
  end
@@ -18,8 +18,8 @@ class DocumentalistTest < Test::Unit::TestCase
18
18
  :d => "e"
19
19
  }
20
20
  }
21
-
22
- assert_equal Documentalist.send(:symbolize, hash),
21
+
22
+ assert_equal Documentalist.send(:symbolize, hash),
23
23
  symbolized,
24
24
  "Hash wasn't properly symbolized"
25
25
  end
@@ -44,6 +44,11 @@ class DocumentalistTest < Test::Unit::TestCase
44
44
  log_file = File.join(Dir.tmpdir, "#{rand(10 ** 9).to_s}.log")
45
45
 
46
46
  Documentalist.config[:log_file] = log_file
47
+ Documentalist.config[:log_level] = 'warn'
48
+
49
+ # Reset logger
50
+ Documentalist.send :class_variable_set, :@@logger, nil
51
+
47
52
  assert !File.exists?(log_file), "Log file already exists"
48
53
 
49
54
  Documentalist.logger
@@ -56,8 +61,17 @@ class DocumentalistTest < Test::Unit::TestCase
56
61
  assert_difference("File.size(\"#{log_file}\")", nil, "Nothing should have been written") do
57
62
  Documentalist.logger.warn("This message should be written !")
58
63
  end
59
-
64
+
65
+ # Reset logger
66
+ Documentalist.send :class_variable_set, :@@logger, nil
67
+
60
68
  FileUtils.rm(log_file)
61
69
  assert !File.exists?(log_file), "Log file hasn't been removed properly"
62
70
  end
71
+
72
+ def test_extract_text
73
+ assert_match /thing/,
74
+ Documentalist.extract_text(fixture_001),
75
+ "Text was not properly extracted for fixture 001"
76
+ end
63
77
  end
@@ -0,0 +1,13 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
3
+ <html xmlns="http://www.w3.org/1999/xhtml">
4
+ <head>
5
+ <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
6
+ <title>Title</title>
7
+ </head>
8
+ <body>
9
+ <p>
10
+ Test content
11
+ </p>
12
+ </body>
13
+ </html>
@@ -28,11 +28,13 @@ class RailsIntegrationTest < Test::Unit::TestCase
28
28
  assert File.exists?(File.join(RAILS_ROOT, %w{config documentalist.yml})),
29
29
  "Configuration file did not get copied properly"
30
30
 
31
- assert_equal Documentalist.config[:logfile], File.join(RAILS_ROOT, %w{log documentalist-#{RAILS_ENV}.log})
31
+ assert_equal Documentalist.config[:log_file], File.join(RAILS_ROOT, "log", "documentalist-#{RAILS_ENV}.log")
32
32
 
33
33
  # Delete fake RAILS_ROOT
34
34
  FileUtils.rm_rf tmp_dir
35
35
 
36
+ # Reset logger
37
+
36
38
  # Check that we cleaned our mess up
37
39
  assert !File.exist?(File.join(RAILS_ROOT, %w{config documentalist.yml})), "Temporary file hasn't been removed properly"
38
40
  end
data/test/test_helper.rb CHANGED
@@ -7,6 +7,10 @@ def fixture_001
7
7
  File.join(File.dirname(__FILE__), "fixtures/fixture_001.odt")
8
8
  end
9
9
 
10
+ def fixture_002
11
+ File.join(File.dirname(__FILE__), "fixtures/fixture_002.html")
12
+ end
13
+
10
14
  class Test::Unit::TestCase
11
15
  def assert_difference(code, difference = 0, message = nil)
12
16
  message = "Returned values were equal" unless message
@@ -0,0 +1,21 @@
1
+ require 'test_helper'
2
+
3
+ class WkHtmlToPdfTest < Test::Unit::TestCase
4
+ def test_right_backend_is_picked
5
+ assert_equal Documentalist.backend_for_conversion("test.html", "test.pdf"),
6
+ Documentalist::WkHtmlToPdf,
7
+ "Wrong backend picked"
8
+ end
9
+
10
+ def test_conversion
11
+ temp_file = File.join(Dir.tmpdir, "#{rand(10**9)}.pdf")
12
+
13
+ Documentalist.convert(fixture_002, :to => temp_file)
14
+ assert File.exists?(temp_file), "No converted PDF created"
15
+
16
+ assert_match /Test content/, Documentalist.extract_text(temp_file)
17
+
18
+ FileUtils.rm temp_file
19
+ assert !File.exists?(temp_file), "We didn't clean up properly"
20
+ end
21
+ end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: documentalist
3
3
  version: !ruby/object:Gem::Version
4
- hash: 25
4
+ hash: 31
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
8
  - 1
9
- - 1
10
- version: 0.1.1
9
+ - 2
10
+ version: 0.1.2
11
11
  platform: ruby
12
12
  authors:
13
13
  - David FRANCOIS
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2010-06-30 00:00:00 +02:00
18
+ date: 2010-07-04 00:00:00 +02:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
@@ -102,6 +102,8 @@ extra_rdoc_files:
102
102
  - lib/backends/open_office/bridges/pyodconverter.py
103
103
  - lib/backends/open_office/server.rb
104
104
  - lib/backends/pdf_tools.rb
105
+ - lib/backends/wkhtmltopdf.rb
106
+ - lib/dependencies.rb
105
107
  - lib/documentalist.rb
106
108
  - lib/tasks/tasks.rb
107
109
  files:
@@ -139,6 +141,8 @@ files:
139
141
  - lib/backends/open_office/bridges/pyodconverter.py
140
142
  - lib/backends/open_office/server.rb
141
143
  - lib/backends/pdf_tools.rb
144
+ - lib/backends/wkhtmltopdf.rb
145
+ - lib/dependencies.rb
142
146
  - lib/documentalist.rb
143
147
  - lib/tasks/tasks.rb
144
148
  - rails/config/documentalist.yml.tpl
@@ -146,12 +150,14 @@ files:
146
150
  - rails/initialize_configuration.rb
147
151
  - test/documentalist_test.rb
148
152
  - test/fixtures/fixture_001.odt
153
+ - test/fixtures/fixture_002.html
149
154
  - test/net_pbm_test.rb
150
155
  - test/odf_merge_test.rb
151
156
  - test/open_office_test.rb
152
157
  - test/pdf_tools_test.rb
153
158
  - test/rails_integration_test.rb
154
159
  - test/test_helper.rb
160
+ - test/wkhtmltopdf_test.rb
155
161
  - Manifest
156
162
  has_rdoc: true
157
163
  homepage: http://github.com/davout/documentalist
@@ -194,6 +200,7 @@ signing_key:
194
200
  specification_version: 3
195
201
  summary: The smooth document management experience, usable as a Rails gem plugin or standalone in any ruby application
196
202
  test_files:
203
+ - test/wkhtmltopdf_test.rb
197
204
  - test/open_office_test.rb
198
205
  - test/odf_merge_test.rb
199
206
  - test/documentalist_test.rb