pdftohtmlr 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
data/MIT-LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ Copyright (c) 2009 kitplummer@gmail.com
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21
+
data/README.textile ADDED
@@ -0,0 +1,20 @@
1
+ h1. pdftohtmlr
2
+
3
+ Wrapper around the command line tool pdftohtml which converts PDF to HTML, go figure.
4
+
5
+ This gem was inspired by the MiniMagick gem - which does the same thing for ImageMagick (thanks Corey).
6
+
7
+ h1. requirements
8
+
9
+ Just pdftohtml and Ruby (1.8.6+ as far as I know).
10
+
11
+ h1. using
12
+ <pre>
13
+ <code>
14
+ file = PdfFile.new([Path to Source PDF],
15
+ [Target File (not implemented yet)],
16
+ [user password],
17
+ [owner password])
18
+ doc = file.convert
19
+ </code>
20
+ </pre>
data/Rakefile ADDED
@@ -0,0 +1,68 @@
1
+ require 'rake'
2
+ require 'rake/testtask'
3
+ require 'rake/rdoctask'
4
+ require 'rake/packagetask'
5
+ require 'rake/gempackagetask'
6
+
7
+ $:.unshift(File.dirname(__FILE__) + "/lib")
8
+ require 'pdftohtmlr'
9
+
10
+ PKG_NAME = 'pdftohtmlr'
11
+ PKG_VERSION = PDFToHTMLR::VERSION
12
+ PKG_FILE_NAME = "#{PKG_NAME}-#{PKG_VERSION}"
13
+
14
+ desc 'Default: run unit tests.'
15
+ task :default => :test
16
+
17
+ desc "Clean generated files"
18
+ task :clean do
19
+ rm FileList['test/output/*.png']
20
+ rm_rf 'pkg'
21
+ rm_rf 'doc'
22
+ end
23
+
24
+ desc 'Test the pdftohtmlr gem.'
25
+ Rake::TestTask.new(:test) do |t|
26
+ t.libs << 'lib'
27
+ t.pattern = 'test/**/*_test.rb'
28
+ t.verbose = true
29
+ end
30
+
31
+ desc 'Generate documentation for the pdftohtmlr gem.'
32
+ Rake::RDocTask.new(:rdoc) do |rdoc|
33
+ rdoc.rdoc_dir = 'rdoc'
34
+ rdoc.title = 'pdftohtmlr'
35
+ rdoc.options << '--line-numbers --inline-source'
36
+ rdoc.rdoc_files.include('README')
37
+ rdoc.rdoc_files.include('lib/**/*.rb')
38
+ end
39
+
40
+
41
+ # Create compressed packages
42
+ spec = Gem::Specification.new do |s|
43
+ s.platform = Gem::Platform::RUBY
44
+ s.name = PKG_NAME
45
+ s.summary = "Convert PDF documents to HTML."
46
+ s.description = %q{Uses command-line pdftohtml tools to convert PDF files to HTML.}
47
+ s.version = PKG_VERSION
48
+
49
+ s.author = "Kit Plummer"
50
+ s.email = "kitplummer@gmail.com"
51
+ s.rubyforge_project = PKG_NAME
52
+ s.homepage = "http://github.com/kitplummer/pdftohtmlr"
53
+
54
+ s.has_rdoc = true
55
+ s.requirements << 'none'
56
+ s.require_path = 'lib'
57
+ s.autorequire = 'pdftohtml'
58
+
59
+ s.files = [ "Rakefile", "README.textile", "MIT-LICENSE" ]
60
+ s.files = s.files + Dir.glob( "lib/**/*" ).delete_if { |item| item.include?( "\.svn" ) }
61
+ s.files = s.files + Dir.glob( "test/**/*" ).delete_if { |item| item.include?( "\.svn" ) || item.include?("\.png") }
62
+ end
63
+
64
+ Rake::GemPackageTask.new(spec) do |p|
65
+ p.gem_spec = spec
66
+ p.need_tar = false
67
+ p.need_zip = true
68
+ end
data/lib/pdftohtmlr.rb ADDED
@@ -0,0 +1,53 @@
1
+ require 'rubygems'
2
+ require 'open3'
3
+
4
+ module PDFToHTMLR
5
+ class PDFToHTMLRError < RuntimeError; end
6
+ VERSION = '0.2.0'
7
+
8
+ class PdfFile
9
+ attr :path
10
+ attr :target
11
+ attr :user_pwd
12
+ attr :owner_pwd
13
+
14
+ def initialize(input_path, target_path, user_pwd, owner_pwd)
15
+ @path = input_path
16
+ @target = target_path
17
+ @user_pwd = user_pwd
18
+ @owner_pwd = owner_pwd
19
+
20
+ # check to make sure file is legit
21
+ if (!File.exist?(@path))
22
+ raise PDFToHTMLRError, "invalid file path"
23
+ end
24
+
25
+ end
26
+
27
+ def convert()
28
+ errors = ""
29
+ output = ""
30
+ if @user_pwd
31
+ cmd = "pdftohtml -stdout -upw #{@user_pwd} #{@path}"
32
+ elsif @owner_pwd
33
+ cmd = "pdftohtml -stdout -opw #{@owner_pwd} #{@path}"
34
+ else
35
+ cmd = "pdftohtml -stdout #{@path}"
36
+ end
37
+
38
+ Open3.popen3 cmd do | stdin, stdout, stderr|
39
+ stdin.write cmd
40
+ stdin.close
41
+ output = stdout.read
42
+ errors = stderr.read
43
+ end
44
+
45
+ if (errors != "")
46
+ raise PDFToHTMLRError, errors.to_s
47
+ else
48
+ return output
49
+ end
50
+ end
51
+
52
+ end
53
+ end
@@ -0,0 +1,52 @@
1
+ require 'test/unit'
2
+ require File.join(File.dirname(__FILE__), '../lib/pdftohtmlr')
3
+
4
+ class PdfFileTest < Test::Unit::TestCase
5
+ include PDFToHTMLR
6
+
7
+ CURRENT_DIR = File.dirname(File.expand_path(__FILE__)) + "/"
8
+ TEST_PDF_PATH = CURRENT_DIR + "test.pdf"
9
+ TEST_PWD_PDF_PATH = CURRENT_DIR + "test_pw.pdf"
10
+ TEST_BAD_PATH = "blah.pdf"
11
+ TEST_NON_PDF = CURRENT_DIR + "pdftohtmlr_test.rb"
12
+
13
+ def test_pdffile_new
14
+ file = PdfFile.new(TEST_PDF_PATH, ".", nil, nil)
15
+ assert file
16
+ end
17
+
18
+ def test_invalid_pdffile
19
+ e = assert_raise PDFToHTMLRError do
20
+ file = PdfFile.new(TEST_NON_PDF, ".", nil, nil)
21
+ file.convert
22
+ end
23
+ puts e
24
+ end
25
+
26
+ def test_bad_pdffile_new
27
+ assert_raise PDFToHTMLRError do
28
+ file = PdfFile.new(TEST_BAD_PATH, ".", nil, nil)
29
+ end
30
+ end
31
+
32
+ def test_string_from_pdffile
33
+ file = PdfFile.new(TEST_PDF_PATH, ".", nil, nil)
34
+ assert_equal "String", file.convert().class.to_s
35
+ assert_equal `pdftohtml -stdout #{TEST_PDF_PATH}`, file.convert()
36
+ end
37
+
38
+ def test_invalid_pwd_pdffile
39
+ assert_raise PDFToHTMLRError do
40
+ file = PdfFile.new(TEST_PWD_PDF_PATH, ".", "blah", nil)
41
+ file.convert
42
+ end
43
+ end
44
+
45
+ def test_valid_pwd_pdffile
46
+ file = PdfFile.new(TEST_PWD_PDF_PATH, ".", "user", nil)
47
+ assert_equal "String", file.convert().class.to_s
48
+ assert_equal `pdftohtml -stdout -upw user #{TEST_PWD_PDF_PATH}`,
49
+ file.convert()
50
+ end
51
+
52
+ end
data/test/test.pdf ADDED
Binary file
data/test/test_pw.pdf ADDED
Binary file
metadata ADDED
@@ -0,0 +1,61 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: pdftohtmlr
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.0
5
+ platform: ruby
6
+ authors:
7
+ - Kit Plummer
8
+ autorequire: pdftohtml
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-12-10 00:00:00 -07:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description: Uses command-line pdftohtml tools to convert PDF files to HTML.
17
+ email: kitplummer@gmail.com
18
+ executables: []
19
+
20
+ extensions: []
21
+
22
+ extra_rdoc_files: []
23
+
24
+ files:
25
+ - Rakefile
26
+ - README.textile
27
+ - MIT-LICENSE
28
+ - lib/pdftohtmlr.rb
29
+ - test/pdftohtmlr_test.rb
30
+ - test/test.pdf
31
+ - test/test_pw.pdf
32
+ has_rdoc: true
33
+ homepage: http://github.com/kitplummer/pdftohtmlr
34
+ licenses: []
35
+
36
+ post_install_message:
37
+ rdoc_options: []
38
+
39
+ require_paths:
40
+ - lib
41
+ required_ruby_version: !ruby/object:Gem::Requirement
42
+ requirements:
43
+ - - ">="
44
+ - !ruby/object:Gem::Version
45
+ version: "0"
46
+ version:
47
+ required_rubygems_version: !ruby/object:Gem::Requirement
48
+ requirements:
49
+ - - ">="
50
+ - !ruby/object:Gem::Version
51
+ version: "0"
52
+ version:
53
+ requirements:
54
+ - none
55
+ rubyforge_project: pdftohtmlr
56
+ rubygems_version: 1.3.5
57
+ signing_key:
58
+ specification_version: 3
59
+ summary: Convert PDF documents to HTML.
60
+ test_files: []
61
+