xtotxt 0.5 → 0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. data/lib/xtotxt.rb +23 -19
  2. data/spec/xtotxt_spec.rb +6 -0
  3. metadata +4 -4
data/lib/xtotxt.rb CHANGED
@@ -3,8 +3,8 @@ require 'yaml'
3
3
  class XtotxtError < StandardError; end
4
4
 
5
5
  class Xtotxt
6
- VERSION = 0.5
7
- SUPPORTED_EXTENSIONS = %w{pdf doc docx odt rtf html}
6
+ VERSION = 0.6
7
+ SUPPORTED_EXTENSIONS = %w{txt pdf doc docx odt rtf html}
8
8
 
9
9
  @@config_file_name = "xtotxt.yml"
10
10
  @@dirs_to_check = %w{. ~ /etc}
@@ -23,42 +23,46 @@ class Xtotxt
23
23
  @@ext = @ext_default
24
24
  end
25
25
 
26
- def convert(input_file_name)
27
- path_list = input_file_name.split(".")
26
+ def convert(input_file_name,tmp_dir = "/tmp",retain_output=false)
27
+ dot_ext = File.extname(input_file_name)
28
+ file_ext = dot_ext.slice(1,dot_ext.length)
29
+ raise XtotxtError.new("not a supported document extension: #{file_ext}") unless SUPPORTED_EXTENSIONS.member?(file_ext)
28
30
 
29
- ext = path_list.pop
31
+ file_base = File.basename(input_file_name, file_ext)
30
32
 
31
- raise XtotxtError.new("not a supported document extension: #{ext}") unless SUPPORTED_EXTENSIONS.member?(ext)
33
+ output_file_name = "#{tmp_dir}/#{file_base}txt"
32
34
 
33
- output_file = (path_list << "txt").join(".")
34
-
35
- command_line = case ext
35
+ command_line = case file_ext
36
+ when "txt"
37
+ "cp -p #{input_file_name} #{output_file_name}"
36
38
  when "pdf"
37
- "#{@ext[:pdf]} #{input_file_name}"
39
+ "#{@ext[:pdf]} #{input_file_name} - > #{output_file_name}"
38
40
  when "doc"
39
- "#{@ext[:doc]} > #{output_file} #{input_file_name}"
41
+ "#{@ext[:doc]} #{input_file_name} > #{output_file_name}"
40
42
  when "docx"
41
- "#{@ext[:docx]} #{input_file_name}"
43
+ "#{@ext[:docx]} #{input_file_name} #{output_file_name}"
42
44
  when "odt":
43
- "#{@ext[:odt]} #{input_file_name} --output=#{output_file}"
45
+ "#{@ext[:odt]} #{input_file_name} --output=#{output_file_name}"
44
46
  when "rtf":
45
- "#{@ext[:rtf]} --text #{input_file_name} > #{output_file}"
47
+ "#{@ext[:rtf]} --text #{input_file_name} > #{output_file_name}"
46
48
  when "html":
47
- "#{@ext[:html]} -o #{output_file} #{input_file_name}"
49
+ "#{@ext[:html]} -o #{output_file_name} #{input_file_name}"
48
50
  else
49
- raise XtotxtError.new("have no way to convert #{ext} yet")
51
+ raise XtotxtError.new("have no way to convert #{file_ext} yet")
50
52
  end
51
53
 
52
54
  #puts "executing: #{command_line}"
53
55
 
54
- command_output = `#{command_line} 2>/dev/null`
56
+ command_output = `#{command_line} 2>/dev/null` if command_line and not command_line.empty?
55
57
  text = if $? == 0
56
- File.read(output_file)
58
+ File.read(output_file_name)
57
59
  else
58
60
  raise XtotxtError.new("Failed to convert #{input_file_name}. Exit status: #{$?.exitstatus}. Output: #{command_output}")
59
61
  end
60
62
 
61
- case ext
63
+ File.delete(output_file_name) unless retain_output
64
+
65
+ case file_ext
62
66
  when "rtf"
63
67
  skip_unrtf_header(text)
64
68
  else
data/spec/xtotxt_spec.rb CHANGED
@@ -34,6 +34,12 @@ describe Xtotxt do
34
34
  end
35
35
  end
36
36
 
37
+ it "converts a text document correctly" do
38
+ text = @x.convert("#{@input_prefix}.txt")
39
+
40
+ text.strip.should == @text
41
+ end
42
+
37
43
  it "converts a pdf document correctly" do
38
44
  text = @x.convert("#{@input_prefix}.pdf")
39
45
 
metadata CHANGED
@@ -1,12 +1,12 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: xtotxt
3
3
  version: !ruby/object:Gem::Version
4
- hash: 1
4
+ hash: 7
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
- - 5
9
- version: "0.5"
8
+ - 6
9
+ version: "0.6"
10
10
  platform: ruby
11
11
  authors:
12
12
  - Alexy Khrabrov
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2011-09-30 00:00:00 -07:00
17
+ date: 2011-10-03 00:00:00 -07:00
18
18
  default_executable:
19
19
  dependencies: []
20
20