xtotxt 0.5 → 0.6
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/xtotxt.rb +23 -19
- data/spec/xtotxt_spec.rb +6 -0
- metadata +4 -4
data/lib/xtotxt.rb
CHANGED
@@ -3,8 +3,8 @@ require 'yaml'
|
|
3
3
|
class XtotxtError < StandardError; end
|
4
4
|
|
5
5
|
class Xtotxt
|
6
|
-
VERSION = 0.
|
7
|
-
SUPPORTED_EXTENSIONS = %w{pdf doc docx odt rtf html}
|
6
|
+
VERSION = 0.6
|
7
|
+
SUPPORTED_EXTENSIONS = %w{txt pdf doc docx odt rtf html}
|
8
8
|
|
9
9
|
@@config_file_name = "xtotxt.yml"
|
10
10
|
@@dirs_to_check = %w{. ~ /etc}
|
@@ -23,42 +23,46 @@ class Xtotxt
|
|
23
23
|
@@ext = @ext_default
|
24
24
|
end
|
25
25
|
|
26
|
-
def convert(input_file_name)
|
27
|
-
|
26
|
+
def convert(input_file_name,tmp_dir = "/tmp",retain_output=false)
|
27
|
+
dot_ext = File.extname(input_file_name)
|
28
|
+
file_ext = dot_ext.slice(1,dot_ext.length)
|
29
|
+
raise XtotxtError.new("not a supported document extension: #{file_ext}") unless SUPPORTED_EXTENSIONS.member?(file_ext)
|
28
30
|
|
29
|
-
|
31
|
+
file_base = File.basename(input_file_name, file_ext)
|
30
32
|
|
31
|
-
|
33
|
+
output_file_name = "#{tmp_dir}/#{file_base}txt"
|
32
34
|
|
33
|
-
|
34
|
-
|
35
|
-
|
35
|
+
command_line = case file_ext
|
36
|
+
when "txt"
|
37
|
+
"cp -p #{input_file_name} #{output_file_name}"
|
36
38
|
when "pdf"
|
37
|
-
"#{@ext[:pdf]} #{input_file_name}"
|
39
|
+
"#{@ext[:pdf]} #{input_file_name} - > #{output_file_name}"
|
38
40
|
when "doc"
|
39
|
-
"#{@ext[:doc]}
|
41
|
+
"#{@ext[:doc]} #{input_file_name} > #{output_file_name}"
|
40
42
|
when "docx"
|
41
|
-
"#{@ext[:docx]} #{input_file_name}"
|
43
|
+
"#{@ext[:docx]} #{input_file_name} #{output_file_name}"
|
42
44
|
when "odt":
|
43
|
-
"#{@ext[:odt]} #{input_file_name} --output=#{
|
45
|
+
"#{@ext[:odt]} #{input_file_name} --output=#{output_file_name}"
|
44
46
|
when "rtf":
|
45
|
-
"#{@ext[:rtf]} --text #{input_file_name} > #{
|
47
|
+
"#{@ext[:rtf]} --text #{input_file_name} > #{output_file_name}"
|
46
48
|
when "html":
|
47
|
-
"#{@ext[:html]} -o #{
|
49
|
+
"#{@ext[:html]} -o #{output_file_name} #{input_file_name}"
|
48
50
|
else
|
49
|
-
raise XtotxtError.new("have no way to convert #{
|
51
|
+
raise XtotxtError.new("have no way to convert #{file_ext} yet")
|
50
52
|
end
|
51
53
|
|
52
54
|
#puts "executing: #{command_line}"
|
53
55
|
|
54
|
-
command_output = `#{command_line} 2>/dev/null`
|
56
|
+
command_output = `#{command_line} 2>/dev/null` if command_line and not command_line.empty?
|
55
57
|
text = if $? == 0
|
56
|
-
File.read(
|
58
|
+
File.read(output_file_name)
|
57
59
|
else
|
58
60
|
raise XtotxtError.new("Failed to convert #{input_file_name}. Exit status: #{$?.exitstatus}. Output: #{command_output}")
|
59
61
|
end
|
60
62
|
|
61
|
-
|
63
|
+
File.delete(output_file_name) unless retain_output
|
64
|
+
|
65
|
+
case file_ext
|
62
66
|
when "rtf"
|
63
67
|
skip_unrtf_header(text)
|
64
68
|
else
|
data/spec/xtotxt_spec.rb
CHANGED
@@ -34,6 +34,12 @@ describe Xtotxt do
|
|
34
34
|
end
|
35
35
|
end
|
36
36
|
|
37
|
+
it "converts a text document correctly" do
|
38
|
+
text = @x.convert("#{@input_prefix}.txt")
|
39
|
+
|
40
|
+
text.strip.should == @text
|
41
|
+
end
|
42
|
+
|
37
43
|
it "converts a pdf document correctly" do
|
38
44
|
text = @x.convert("#{@input_prefix}.pdf")
|
39
45
|
|
metadata
CHANGED
@@ -1,12 +1,12 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: xtotxt
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 7
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
version: "0.
|
8
|
+
- 6
|
9
|
+
version: "0.6"
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Alexy Khrabrov
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2011-
|
17
|
+
date: 2011-10-03 00:00:00 -07:00
|
18
18
|
default_executable:
|
19
19
|
dependencies: []
|
20
20
|
|