docpdftotext 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 esilverberg
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,26 @@
1
+ = DocPdfToText
2
+
3
+ Author:: Eric Silverberg (http://www.ericsilverberg.com)
4
+ Copyright:: Copyright (c) 2009 Eric Silverberg
5
+ License:: MIT (Go Beavers!)
6
+ Git:: http://github.com/esilverberg/DocPdfToText/tree/master
7
+
8
+ This gem enables you to interact with document conversion libraries through Rails to convert .doc, .docx and .pdf files into text
9
+
10
+ == Requirements
11
+ * Antiword: http://www.winfield.demon.nl/
12
+ * pdf-reader: http://github.com/yob/pdf-reader
13
+ * OdfConverter: http://www.oooninja.com/2008/01/convert-openxml-docx-etc-in-linux-using.html
14
+ * Openoffice-headless: http://wiki.alfresco.com/wiki/Running_OpenOffice_From_Terminal
15
+ * DocumentConverter.py (included): http://artofsolving.com/opensource/pyodconverter
16
+
17
+ == Example Usage
18
+ DocPdfToText adds several methods to your model. The only one you will want to call is file_to_txt, shown below:
19
+
20
+ include DocPdfToText
21
+ ...
22
+ puts file_to_txt(test_file)
23
+
24
+ == Copyright
25
+
26
+ Copyright (c) 2009 esilverberg. See LICENSE for details.
@@ -0,0 +1,33 @@
1
+ require 'test_helper'
2
+ require 'tempfile'
3
+
4
+ class DocPdfToTextTest < Test::Unit::TestCase
5
+ include DocPdfToText
6
+
7
+ should "Convert a docx file" do
8
+ test_file = File.join(File.dirname(__FILE__), "test.docx")
9
+ assert(file_to_txt(test_file).length > 0)
10
+ end
11
+
12
+ should "Convert a doc file" do
13
+ test_file = File.join(File.dirname(__FILE__), "test.doc")
14
+ assert(file_to_txt(test_file).length > 0)
15
+ end
16
+
17
+ should "Convert a pdf file" do
18
+ test_file = File.join(File.dirname(__FILE__), "test.pdf")
19
+ assert(file_to_txt(test_file).length > 0)
20
+ end
21
+
22
+ should "raise invalid file format" do
23
+ assert_raise ArgumentError do
24
+ test_file = File.join(File.dirname(__FILE__), "test.pdf")
25
+ doc_to_txt(test_file)
26
+ end
27
+
28
+ assert_raise ArgumentError do
29
+ test_file = File.join(File.dirname(__FILE__), "test.doc")
30
+ pdf_to_txt(test_file)
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,10 @@
1
+ require 'rubygems'
2
+ require 'test/unit'
3
+ require 'shoulda'
4
+
5
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
6
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
7
+ require 'docpdftotext'
8
+
9
+ class Test::Unit::TestCase
10
+ end
metadata ADDED
@@ -0,0 +1,77 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: docpdftotext
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - esilverberg
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-11-23 00:00:00 -08:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: thoughtbot-shoulda
17
+ type: :development
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: "0"
24
+ version:
25
+ - !ruby/object:Gem::Dependency
26
+ name: pdf-reader
27
+ type: :runtime
28
+ version_requirement:
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: "0"
34
+ version:
35
+ description: wrappers for libraries to convert documents into text
36
+ email: eric@ericsilverberg.com
37
+ executables: []
38
+
39
+ extensions: []
40
+
41
+ extra_rdoc_files:
42
+ - LICENSE
43
+ - README.rdoc
44
+ files:
45
+ - LICENSE
46
+ - README.rdoc
47
+ has_rdoc: true
48
+ homepage: http://github.com/esilverberg/docpdftotext
49
+ licenses: []
50
+
51
+ post_install_message:
52
+ rdoc_options:
53
+ - --charset=UTF-8
54
+ require_paths:
55
+ - lib
56
+ required_ruby_version: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: "0"
61
+ version:
62
+ required_rubygems_version: !ruby/object:Gem::Requirement
63
+ requirements:
64
+ - - ">="
65
+ - !ruby/object:Gem::Version
66
+ version: "0"
67
+ version:
68
+ requirements: []
69
+
70
+ rubyforge_project:
71
+ rubygems_version: 1.3.5
72
+ signing_key:
73
+ specification_version: 3
74
+ summary: Convert word to text in ruby
75
+ test_files:
76
+ - test/test_helper.rb
77
+ - test/docpdftotext_test.rb