proselytism 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +49 -25
- data/lib/generators/proselytism/templates/config.yml +1 -0
- data/lib/generators/proselytism/templates/initializer.rb +9 -2
- data/lib/proselytism.rb +1 -1
- data/lib/proselytism/converter.rb +5 -6
- data/lib/proselytism/converters/open_office.rb +15 -13
- data/lib/proselytism/engine.rb +3 -2
- data/lib/proselytism/logger.rb +31 -0
- data/lib/proselytism/proselytism.rb +3 -1
- data/lib/proselytism/version.rb +1 -1
- data/spec/fixtures/001-latin.txt +63 -0
- data/spec/open_office_spec.rb +10 -0
- data/spec/spec_helper.rb +1 -1
- metadata +7 -7
- data/lib/proselytism/shared.rb +0 -22
- data/spec/shared_spec.rb +0 -26
data/README.md
CHANGED
@@ -1,13 +1,14 @@
|
|
1
1
|
# Proselytism
|
2
2
|
|
3
|
-
Document converter, text and image extractor using OpenOffice headless server, pdf_tools and net_pbm
|
3
|
+
Document converter, text and image extractor using OpenOffice headless server (JOD or PYOD converter), pdf_tools and net_pbm
|
4
|
+
|
5
|
+
Handled formats for document conversion : odt, doc, rtf, sxw, docx, txt, html, htm, wps, pdf
|
4
6
|
|
5
7
|
## Note
|
6
8
|
|
7
|
-
This gem has been originally written
|
8
|
-
It should be framework agnostic and has been tested on Ubuntu and MacOSX.
|
9
|
+
This gem has been originally written as a RoR 3.2 engine running on Ruby 1.8.7.
|
9
10
|
|
10
|
-
|
11
|
+
It is framework agnostic and has been tested on Ubuntu and MacOSX.
|
11
12
|
|
12
13
|
## Installation
|
13
14
|
|
@@ -16,28 +17,45 @@ Install the required external librairies :
|
|
16
17
|
# aptitude install netpbm
|
17
18
|
# aptitude install xpdf
|
18
19
|
# aptitude install libreoffice
|
19
|
-
|
20
|
+
|
20
21
|
Add this line to your application's Gemfile:
|
21
22
|
|
22
|
-
gem 'proselytism'
|
23
|
+
gem 'proselytism'
|
24
|
+
|
25
|
+
Note : for ruby 1.9 use the branch 1.9
|
26
|
+
|
27
|
+
gem 'proselytism', :git => "git://github.com/itkin/proselytism.git", :branch => "1.9"
|
23
28
|
|
24
29
|
And then execute:
|
25
30
|
|
26
31
|
$ bundle
|
27
32
|
|
28
|
-
|
33
|
+
Configure the gem:
|
29
34
|
|
30
|
-
|
31
|
-
$ rails g proselytism:initializer
|
35
|
+
- With a YAML config file:
|
32
36
|
|
33
|
-
As an engine, Proselytism automatically load and autoconfig with /config/proselytism.yml if it exists
|
34
|
-
You can override these configurations params with an initializer. This is especially usefull when you want a custom log file
|
35
|
-
|
36
37
|
```ruby
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
38
|
+
$ rails g proselytism:config
|
39
|
+
```
|
40
|
+
|
41
|
+
As an engine, Proselytism automatically load /config/proselytism.yml (if the file exists) and set its config params depending on the current rails env.
|
42
|
+
|
43
|
+
- With an initializer (optional for Rails App) :
|
44
|
+
|
45
|
+
You can override the configuration file params by adding a custom initializer to /config/initializers .
|
46
|
+
By default Proselytism will log in a separate log file, if you want to use the rails logger
|
47
|
+
|
48
|
+
```ruby
|
49
|
+
#/config/initializers/proselytism.rb
|
50
|
+
Proselytism.config do |config|
|
51
|
+
config.logger = Rails.logger
|
52
|
+
end
|
53
|
+
```
|
54
|
+
|
55
|
+
To generate a full config initializer:
|
56
|
+
|
57
|
+
```ruby
|
58
|
+
$ rails g proselytism:initializer
|
41
59
|
```
|
42
60
|
|
43
61
|
## Usage
|
@@ -54,32 +72,38 @@ Proselytism.extract_images source_file_path do |image_files_paths|
|
|
54
72
|
end
|
55
73
|
```
|
56
74
|
|
57
|
-
Proselytism
|
58
|
-
- If you pass a block to the method the folders are automatically deleted after the block is yield, so use or copy the file content within the block
|
59
|
-
- If you don't pass a block, don't forget to safely remove
|
75
|
+
Proselytism creates its converted files in temporary folders.
|
76
|
+
- If you pass a block to the method above the folders are automatically deleted after the block is yield, so use or copy the file content within the block
|
77
|
+
- If you don't pass a block, the mentioned folder and its content remains permanently, so don't forget to safely remove it yourself
|
60
78
|
|
61
79
|
```ruby
|
62
80
|
pdf_file_path = Proselytism.convert source_file_path, :to => :pdf
|
81
|
+
#my code
|
63
82
|
FileUtils.remove_entry_secure File.dirname(pdf_file_path)
|
64
83
|
```
|
65
84
|
|
66
|
-
## Add your own
|
85
|
+
## Add your own converters
|
67
86
|
|
68
87
|
Add your own converter by extending Proselytism::Converters::Base
|
69
|
-
- Your converter will be automatically selected and used related to the
|
88
|
+
- Your converter will be automatically selected and used related to the params given to the :from and :to methods
|
70
89
|
- Add a perform method which
|
71
|
-
-
|
72
|
-
-
|
73
|
-
|
90
|
+
- calls the execute method with your custom command
|
91
|
+
- returns the converted file(s) path(s)
|
92
|
+
|
93
|
+
Proselytism::Converters::Base takes care of
|
94
|
+
- raising error (if the command execution fail)
|
95
|
+
- logging the command output
|
74
96
|
|
75
97
|
```ruby
|
76
98
|
class MyConverter < Proselytism::Converters::Base
|
99
|
+
class Error < parent::Base::Error; end
|
100
|
+
|
77
101
|
form :ext1, :ext2
|
78
102
|
to :ext3, :ext4
|
79
103
|
|
80
104
|
def perform(origin, options={})
|
81
105
|
destination = destination_file_path(origin, options)
|
82
|
-
command = "
|
106
|
+
command = "mycommand #{origin} #{destination} 2>&1"
|
83
107
|
execute command
|
84
108
|
destination
|
85
109
|
end
|
@@ -25,6 +25,13 @@ Proselytism.config do |config|
|
|
25
25
|
#Path where conversion are done by default system temp dir
|
26
26
|
#config.tmp_path = File.expand_path("../tmp", __FILE__)
|
27
27
|
|
28
|
-
#
|
29
|
-
#config.
|
28
|
+
#Log level: By default env log level
|
29
|
+
#config.log_level = Rails.logger.level
|
30
|
+
|
31
|
+
#Log path :
|
32
|
+
#config.log_path = File.join(Rails.root, 'log', "proselytism.log")
|
33
|
+
|
34
|
+
#Logger instance
|
35
|
+
#config.logger = Proselytism::BufferedLogger.new Proselytism.config.log_path, Proselytism.config.log_level
|
36
|
+
|
30
37
|
end
|
data/lib/proselytism.rb
CHANGED
@@ -6,14 +6,13 @@ module Proselytism
|
|
6
6
|
module Converters
|
7
7
|
class Base
|
8
8
|
include ::Singleton
|
9
|
-
include Proselytism::Shared
|
10
9
|
class_attribute :from, :to, :subclasses
|
11
10
|
|
12
11
|
class Error < Exception; end
|
13
12
|
|
14
|
-
|
15
|
-
|
16
|
-
|
13
|
+
|
14
|
+
delegate :config, :log, :to => Proselytism
|
15
|
+
|
17
16
|
|
18
17
|
def destination_file_path(origin, options={})
|
19
18
|
if options[:dest]
|
@@ -25,11 +24,11 @@ module Proselytism
|
|
25
24
|
|
26
25
|
#call perform logging duration and potential errors
|
27
26
|
def convert(file_path, options={})
|
28
|
-
log :debug, "
|
27
|
+
log :debug, "#{self.class.name} converted #{file_path} to :#{options[:to]}" do
|
29
28
|
begin
|
30
29
|
perform(file_path, options)
|
31
30
|
rescue Error => e
|
32
|
-
log :error, e.message
|
31
|
+
log :error, "#{e.class.name} #{e.message}\n#{e.backtrace}\n"
|
33
32
|
raise e
|
34
33
|
end
|
35
34
|
end
|
@@ -28,16 +28,21 @@ class Proselytism::Converters::OpenOffice < Proselytism::Converters::Base
|
|
28
28
|
destination
|
29
29
|
end
|
30
30
|
|
31
|
-
|
32
|
-
#
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
31
|
+
# For unknown reason sometimes OpenOffice converts in ISO-8859-1,
|
32
|
+
# post process to ensure a conversion in UTF-8 when :to => :txt
|
33
|
+
def perform_with_ensure_utf8(origin, options={})
|
34
|
+
destination = perform_without_ensure_utf8(origin, options)
|
35
|
+
if options[:to].to_s == "txt" and `file #{destination}` =~ /ISO/
|
36
|
+
#lookup_on = Iconv.new('ASCII//TRANSLIT','UTF-8').iconv(str).upcase.strip.gsub(/'/, " ")
|
37
|
+
#log :warn, "***OOO has converted file in "
|
38
|
+
tmp_iconv_file = "#{destination}-tmp_iconv.txt"
|
39
|
+
execute("iconv --from-code ISO-8859-1 --to-code UTF-8 #{destination} > #{tmp_iconv_file} && mv #{tmp_iconv_file} #{destination}")
|
38
40
|
end
|
41
|
+
destination
|
39
42
|
end
|
40
43
|
|
44
|
+
alias_method_chain :perform, :ensure_utf8
|
45
|
+
|
41
46
|
def server
|
42
47
|
Server.instance
|
43
48
|
end
|
@@ -45,12 +50,9 @@ class Proselytism::Converters::OpenOffice < Proselytism::Converters::Base
|
|
45
50
|
|
46
51
|
class Server
|
47
52
|
include Singleton
|
48
|
-
include Proselytism::Shared
|
49
53
|
class Error < Proselytism::Converters::OpenOffice::Error; end
|
50
54
|
|
51
|
-
|
52
|
-
Proselytism.config
|
53
|
-
end
|
55
|
+
delegate :config, :log, :to => Proselytism
|
54
56
|
|
55
57
|
# Run a block with a timeout and retry if the first execution fails
|
56
58
|
def perform(&block)
|
@@ -108,9 +110,9 @@ class Proselytism::Converters::OpenOffice < Proselytism::Converters::Base
|
|
108
110
|
begin
|
109
111
|
Timeout::timeout(3) do
|
110
112
|
loop do
|
111
|
-
system("killall -9 soffice
|
113
|
+
system("killall -9 soffice > /dev/null 2>&1")
|
114
|
+
system("killall -9 soffice.bin > /dev/null 2>&1")
|
112
115
|
break unless running?
|
113
|
-
sleep(0.2)
|
114
116
|
end
|
115
117
|
end
|
116
118
|
rescue Timeout::Error
|
data/lib/proselytism/engine.rb
CHANGED
@@ -11,13 +11,14 @@ module Proselytism
|
|
11
11
|
params[Rails.env].each do |k, v|
|
12
12
|
config.send "#{k}=", v
|
13
13
|
end
|
14
|
-
Proselytism.config.logger = nil
|
15
14
|
end
|
16
15
|
end
|
17
16
|
end
|
18
17
|
|
19
18
|
ActiveSupport.on_load :after_initialize do |app|
|
20
|
-
Proselytism.config.
|
19
|
+
Proselytism.config.log_level ||= Rails.logger.level
|
20
|
+
Proselytism.config.log_path ||= File.join(Rails.root, 'log', "proselytism.log")
|
21
|
+
Proselytism.config.logger ||= Proselytism::BufferedLogger.new Proselytism.config.log_path, Proselytism.config.log_level
|
21
22
|
end
|
22
23
|
|
23
24
|
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
module Proselytism
|
2
|
+
|
3
|
+
class Logger < ActiveSupport::BufferedLogger
|
4
|
+
class Formatter
|
5
|
+
def call(severity, time, progname, msg)
|
6
|
+
formatted_time = time.strftime("%Y-%m-%d %H:%M:%S.") << time.usec.to_s[0..2].rjust(3)
|
7
|
+
"#{formatted_time} [#{severity}][pid:#{$$}] #{msg.strip}\n"
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
def initialize(log, level = DEBUG)
|
12
|
+
super(log, level)
|
13
|
+
@log.formatter = Formatter.new
|
14
|
+
end
|
15
|
+
|
16
|
+
end
|
17
|
+
|
18
|
+
def self.log(severity, message = nil, &block)
|
19
|
+
if config.logger
|
20
|
+
start_time = Time.now
|
21
|
+
if block_given?
|
22
|
+
result = yield
|
23
|
+
config.logger.send(severity, "#{message} in #{((Time.now - start_time)*1000).to_i} ms")
|
24
|
+
else
|
25
|
+
config.logger.send(severity, message.strip)
|
26
|
+
end
|
27
|
+
block_given? ? result : true
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
@@ -1,7 +1,6 @@
|
|
1
1
|
require "active_support/core_ext/module/attribute_accessors"
|
2
2
|
|
3
3
|
module Proselytism
|
4
|
-
extend Shared
|
5
4
|
mattr_accessor :config
|
6
5
|
|
7
6
|
def self.config(&block)
|
@@ -25,6 +24,9 @@ module Proselytism
|
|
25
24
|
end
|
26
25
|
end
|
27
26
|
|
27
|
+
|
28
|
+
|
29
|
+
|
28
30
|
# Finds the relevant converter
|
29
31
|
def self.get_converter(origin, destination)
|
30
32
|
Converters::Base.subclasses.detect do |converter|
|
data/lib/proselytism/version.rb
CHANGED
@@ -0,0 +1,63 @@
|
|
1
|
+
K�BIR BRAHIM
|
2
|
+
|
3
|
+
1 rue Pierre Bonnard
|
4
|
+
8 Pavillon Beethoven 62300 Lens
|
5
|
+
N� le�: 29/08/1973 � Calais
|
6
|
+
Nationalit�: Fran�aise
|
7
|
+
T�l�: 03/62/90/95/31 Portable�: 06/61/10/48/63
|
8
|
+
Email�: brahim62100@hotmail.fr
|
9
|
+
Vie maritale 3 enfants
|
10
|
+
|
11
|
+
|
12
|
+
|
13
|
+
EXPERIENCES PROFESSIONNELLES
|
14
|
+
|
15
|
+
|
16
|
+
Novembre 2008-Avril 2009� : Atlantique Automatisme Incendie
|
17
|
+
Monteur tuyauterie (Manpower)
|
18
|
+
|
19
|
+
Juin 2008-Octobre 2008 : Soci�t� d?Etude Fabrication Montage
|
20
|
+
Monteur (Manpower)
|
21
|
+
|
22
|
+
Avril 2008-Mai 2008 : Ateliers Bois
|
23
|
+
Monteur Charpentes m�talliques (Inter 5)
|
24
|
+
|
25
|
+
D�cembre 2007-F�vrier 2008� : Soci�t� d?Etude Fabrication Montage
|
26
|
+
Monteur (Manpower)
|
27
|
+
|
28
|
+
Aout 1998-Novembre 2007 : France Montage Fabrication
|
29
|
+
Monteur
|
30
|
+
|
31
|
+
Octobre 1996-Juillet 1998� : France Montage Fabrication
|
32
|
+
Monteur (Adia Interim)
|
33
|
+
|
34
|
+
D�cembre 1995-Septembre 1996�: Service Militaire
|
35
|
+
|
36
|
+
|
37
|
+
|
38
|
+
|
39
|
+
FORMATIONS ET DIPLOMES
|
40
|
+
|
41
|
+
|
42
|
+
2005�: Littoral Formation (Dunkerque)
|
43
|
+
CACES 3 Chariot El�vateur
|
44
|
+
CACES 9 Engins de Manutention
|
45
|
+
CACES 3B Cat�gorie de PEMP
|
46
|
+
|
47
|
+
1995�: Lyc�e Pierre de Coubertin � Calais
|
48
|
+
BTS Transport et Logistique (niveau)
|
49
|
+
|
50
|
+
1993�: Lyc�e Pierre de Coubertin � Calais
|
51
|
+
Bac G2 Techniques Quantitatives de Gestion
|
52
|
+
|
53
|
+
1988�: Coll�ge Lucien Vadez � Calais
|
54
|
+
BEPC
|
55
|
+
|
56
|
+
|
57
|
+
|
58
|
+
INFORMATIONS COMPLEMENTAIRES
|
59
|
+
|
60
|
+
|
61
|
+
Permis B + voiture personnel
|
62
|
+
Loisirs�: Lecture,sport,sorties p�destres
|
63
|
+
Maitrise de l?outil informatique�: Internet,Word,Excel
|
data/spec/open_office_spec.rb
CHANGED
@@ -55,6 +55,16 @@ describe Proselytism::Converters::OpenOffice.instance do
|
|
55
55
|
subject.perform fixture_path("002.doc"), :dir => tmp_dir, :to => :txt
|
56
56
|
}.should change(self, :tmp_dir_file_count).by 1
|
57
57
|
end
|
58
|
+
it "should ensure destination file encoding is utf8" do
|
59
|
+
FileUtils.cp(fixture_path("001-latin.txt"), tmp_path("001-latin.txt"))
|
60
|
+
subject.should_receive(:perform_without_ensure_utf8).
|
61
|
+
with(fixture_path("001.doc"), :dir => tmp_dir, :to => :txt).
|
62
|
+
and_return(tmp_path("001-latin.txt"))
|
63
|
+
subject.perform fixture_path("001.doc"), :dir => tmp_dir, :to => :txt do |converted_file|
|
64
|
+
`file #{converted_file}`.should match 'UTF-8'
|
65
|
+
File.read(converted_file).should match('é')
|
66
|
+
end
|
67
|
+
end
|
58
68
|
|
59
69
|
it "should not freeze" do
|
60
70
|
3.times do |j|
|
data/spec/spec_helper.rb
CHANGED
@@ -15,7 +15,7 @@ Proselytism.config do |config|
|
|
15
15
|
config.oo_conversion_max_time = 4 #seconds
|
16
16
|
|
17
17
|
config.tmp_path = File.expand_path("../tmp", __FILE__)
|
18
|
-
config.logger =
|
18
|
+
config.logger = Proselytism::Logger.new(File.expand_path("../tmp/log", __FILE__), 0)
|
19
19
|
end
|
20
20
|
|
21
21
|
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: proselytism
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 27
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 0
|
9
|
-
-
|
10
|
-
version: 0.0.
|
9
|
+
- 2
|
10
|
+
version: 0.0.2
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Itkin
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2013-03-
|
18
|
+
date: 2013-03-08 00:00:00 Z
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
21
21
|
name: activesupport
|
@@ -125,12 +125,13 @@ files:
|
|
125
125
|
- lib/proselytism/converters/pdf_to_text.rb
|
126
126
|
- lib/proselytism/converters/ppm_to_jpeg.rb
|
127
127
|
- lib/proselytism/engine.rb
|
128
|
+
- lib/proselytism/logger.rb
|
128
129
|
- lib/proselytism/proselytism.rb
|
129
|
-
- lib/proselytism/shared.rb
|
130
130
|
- lib/proselytism/version.rb
|
131
131
|
- proselytism.gemspec
|
132
132
|
- spec/.DS_Store
|
133
133
|
- spec/base_converter_spec.rb
|
134
|
+
- spec/fixtures/001-latin.txt
|
134
135
|
- spec/fixtures/001.doc
|
135
136
|
- spec/fixtures/001.pdf
|
136
137
|
- spec/fixtures/001.txt
|
@@ -142,7 +143,6 @@ files:
|
|
142
143
|
- spec/pdf_images_spec.rb
|
143
144
|
- spec/pdf_to_text_spec.rb
|
144
145
|
- spec/proselytism_spec.rb
|
145
|
-
- spec/shared_spec.rb
|
146
146
|
- spec/spec_helper.rb
|
147
147
|
homepage: https://github.com/itkin/proselytism.git
|
148
148
|
licenses: []
|
@@ -180,6 +180,7 @@ summary: document converter and plain text extractor
|
|
180
180
|
test_files:
|
181
181
|
- spec/.DS_Store
|
182
182
|
- spec/base_converter_spec.rb
|
183
|
+
- spec/fixtures/001-latin.txt
|
183
184
|
- spec/fixtures/001.doc
|
184
185
|
- spec/fixtures/001.pdf
|
185
186
|
- spec/fixtures/001.txt
|
@@ -191,5 +192,4 @@ test_files:
|
|
191
192
|
- spec/pdf_images_spec.rb
|
192
193
|
- spec/pdf_to_text_spec.rb
|
193
194
|
- spec/proselytism_spec.rb
|
194
|
-
- spec/shared_spec.rb
|
195
195
|
- spec/spec_helper.rb
|
data/lib/proselytism/shared.rb
DELETED
@@ -1,22 +0,0 @@
|
|
1
|
-
module Proselytism
|
2
|
-
module Shared
|
3
|
-
|
4
|
-
def log(severity, message = nil)
|
5
|
-
config.logger.send(severity, message) if config.logger
|
6
|
-
end
|
7
|
-
|
8
|
-
def log_with_time(severity, message = nil, &block)
|
9
|
-
start_time = Time.now
|
10
|
-
delay = nil
|
11
|
-
if block_given?
|
12
|
-
result = yield
|
13
|
-
delay = "(#{((Time.now - start_time)*1000).to_i} ms) "
|
14
|
-
end
|
15
|
-
message= "** Proselytism #{start_time.strftime("%Y-%m-%d %H:%M:%S")} #{delay}: " + message.to_s
|
16
|
-
log_without_time(severity, message)
|
17
|
-
block_given? ? result : true
|
18
|
-
end
|
19
|
-
alias_method_chain :log, :time
|
20
|
-
|
21
|
-
end
|
22
|
-
end
|
data/spec/shared_spec.rb
DELETED
@@ -1,26 +0,0 @@
|
|
1
|
-
require "spec_helper"
|
2
|
-
|
3
|
-
describe Proselytism do
|
4
|
-
context "log" do
|
5
|
-
it "should log with class and time data" do
|
6
|
-
subject.config.logger.should_receive(:debug).with do |message|
|
7
|
-
message.should match /Proselytism/
|
8
|
-
message.should match /io/
|
9
|
-
message.should match Time.now.strftime("%Y-%m-%d")
|
10
|
-
end
|
11
|
-
subject.log(:debug, 'io').should be_true
|
12
|
-
end
|
13
|
-
it "should log delay when a block is passed" do
|
14
|
-
subject.config.logger.should_receive(:debug).with do |message|
|
15
|
-
message.should match /Proselytism/
|
16
|
-
message.should match /io/
|
17
|
-
message.should match /([\d:]+)/
|
18
|
-
end
|
19
|
-
subject.log :debug , 'io' do
|
20
|
-
sleep(0.5)
|
21
|
-
false
|
22
|
-
end.should be_false
|
23
|
-
end
|
24
|
-
|
25
|
-
end
|
26
|
-
end
|