pdftohtmlr 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/MIT-LICENSE +21 -0
- data/README.textile +20 -0
- data/Rakefile +68 -0
- data/lib/pdftohtmlr.rb +53 -0
- data/test/pdftohtmlr_test.rb +52 -0
- data/test/test.pdf +0 -0
- data/test/test_pw.pdf +0 -0
- metadata +61 -0
data/MIT-LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
Copyright (c) 2009 kitplummer@gmail.com
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
21
|
+
|
data/README.textile
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
h1. pdftohtmlr
|
2
|
+
|
3
|
+
Wrapper around the command line tool pdftohtml which converts PDF to HTML, go figure.
|
4
|
+
|
5
|
+
This gem was inspired by the MiniMagick gem - which does the same thing for ImageMagick (thanks Corey).
|
6
|
+
|
7
|
+
h1. requirements
|
8
|
+
|
9
|
+
Just pdftohtml and Ruby (1.8.6+ as far as I know).
|
10
|
+
|
11
|
+
h1. using
|
12
|
+
<pre>
|
13
|
+
<code>
|
14
|
+
file = PdfFile.new([Path to Source PDF],
|
15
|
+
[Target File (not implemented yet)],
|
16
|
+
[user password],
|
17
|
+
[owner password])
|
18
|
+
doc = file.convert
|
19
|
+
</code>
|
20
|
+
</pre>
|
data/Rakefile
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
require 'rake'
|
2
|
+
require 'rake/testtask'
|
3
|
+
require 'rake/rdoctask'
|
4
|
+
require 'rake/packagetask'
|
5
|
+
require 'rake/gempackagetask'
|
6
|
+
|
7
|
+
$:.unshift(File.dirname(__FILE__) + "/lib")
|
8
|
+
require 'pdftohtmlr'
|
9
|
+
|
10
|
+
PKG_NAME = 'pdftohtmlr'
|
11
|
+
PKG_VERSION = PDFToHTMLR::VERSION
|
12
|
+
PKG_FILE_NAME = "#{PKG_NAME}-#{PKG_VERSION}"
|
13
|
+
|
14
|
+
desc 'Default: run unit tests.'
|
15
|
+
task :default => :test
|
16
|
+
|
17
|
+
desc "Clean generated files"
|
18
|
+
task :clean do
|
19
|
+
rm FileList['test/output/*.png']
|
20
|
+
rm_rf 'pkg'
|
21
|
+
rm_rf 'doc'
|
22
|
+
end
|
23
|
+
|
24
|
+
desc 'Test the pdftohtmlr gem.'
|
25
|
+
Rake::TestTask.new(:test) do |t|
|
26
|
+
t.libs << 'lib'
|
27
|
+
t.pattern = 'test/**/*_test.rb'
|
28
|
+
t.verbose = true
|
29
|
+
end
|
30
|
+
|
31
|
+
desc 'Generate documentation for the pdftohtmlr gem.'
|
32
|
+
Rake::RDocTask.new(:rdoc) do |rdoc|
|
33
|
+
rdoc.rdoc_dir = 'rdoc'
|
34
|
+
rdoc.title = 'pdftohtmlr'
|
35
|
+
rdoc.options << '--line-numbers --inline-source'
|
36
|
+
rdoc.rdoc_files.include('README')
|
37
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
38
|
+
end
|
39
|
+
|
40
|
+
|
41
|
+
# Create compressed packages
|
42
|
+
spec = Gem::Specification.new do |s|
|
43
|
+
s.platform = Gem::Platform::RUBY
|
44
|
+
s.name = PKG_NAME
|
45
|
+
s.summary = "Convert PDF documents to HTML."
|
46
|
+
s.description = %q{Uses command-line pdftohtml tools to convert PDF files to HTML.}
|
47
|
+
s.version = PKG_VERSION
|
48
|
+
|
49
|
+
s.author = "Kit Plummer"
|
50
|
+
s.email = "kitplummer@gmail.com"
|
51
|
+
s.rubyforge_project = PKG_NAME
|
52
|
+
s.homepage = "http://github.com/kitplummer/pdftohtmlr"
|
53
|
+
|
54
|
+
s.has_rdoc = true
|
55
|
+
s.requirements << 'none'
|
56
|
+
s.require_path = 'lib'
|
57
|
+
s.autorequire = 'pdftohtml'
|
58
|
+
|
59
|
+
s.files = [ "Rakefile", "README.textile", "MIT-LICENSE" ]
|
60
|
+
s.files = s.files + Dir.glob( "lib/**/*" ).delete_if { |item| item.include?( "\.svn" ) }
|
61
|
+
s.files = s.files + Dir.glob( "test/**/*" ).delete_if { |item| item.include?( "\.svn" ) || item.include?("\.png") }
|
62
|
+
end
|
63
|
+
|
64
|
+
Rake::GemPackageTask.new(spec) do |p|
|
65
|
+
p.gem_spec = spec
|
66
|
+
p.need_tar = false
|
67
|
+
p.need_zip = true
|
68
|
+
end
|
data/lib/pdftohtmlr.rb
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'open3'
|
3
|
+
|
4
|
+
module PDFToHTMLR
|
5
|
+
class PDFToHTMLRError < RuntimeError; end
|
6
|
+
VERSION = '0.2.0'
|
7
|
+
|
8
|
+
class PdfFile
|
9
|
+
attr :path
|
10
|
+
attr :target
|
11
|
+
attr :user_pwd
|
12
|
+
attr :owner_pwd
|
13
|
+
|
14
|
+
def initialize(input_path, target_path, user_pwd, owner_pwd)
|
15
|
+
@path = input_path
|
16
|
+
@target = target_path
|
17
|
+
@user_pwd = user_pwd
|
18
|
+
@owner_pwd = owner_pwd
|
19
|
+
|
20
|
+
# check to make sure file is legit
|
21
|
+
if (!File.exist?(@path))
|
22
|
+
raise PDFToHTMLRError, "invalid file path"
|
23
|
+
end
|
24
|
+
|
25
|
+
end
|
26
|
+
|
27
|
+
def convert()
|
28
|
+
errors = ""
|
29
|
+
output = ""
|
30
|
+
if @user_pwd
|
31
|
+
cmd = "pdftohtml -stdout -upw #{@user_pwd} #{@path}"
|
32
|
+
elsif @owner_pwd
|
33
|
+
cmd = "pdftohtml -stdout -opw #{@owner_pwd} #{@path}"
|
34
|
+
else
|
35
|
+
cmd = "pdftohtml -stdout #{@path}"
|
36
|
+
end
|
37
|
+
|
38
|
+
Open3.popen3 cmd do | stdin, stdout, stderr|
|
39
|
+
stdin.write cmd
|
40
|
+
stdin.close
|
41
|
+
output = stdout.read
|
42
|
+
errors = stderr.read
|
43
|
+
end
|
44
|
+
|
45
|
+
if (errors != "")
|
46
|
+
raise PDFToHTMLRError, errors.to_s
|
47
|
+
else
|
48
|
+
return output
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require File.join(File.dirname(__FILE__), '../lib/pdftohtmlr')
|
3
|
+
|
4
|
+
class PdfFileTest < Test::Unit::TestCase
|
5
|
+
include PDFToHTMLR
|
6
|
+
|
7
|
+
CURRENT_DIR = File.dirname(File.expand_path(__FILE__)) + "/"
|
8
|
+
TEST_PDF_PATH = CURRENT_DIR + "test.pdf"
|
9
|
+
TEST_PWD_PDF_PATH = CURRENT_DIR + "test_pw.pdf"
|
10
|
+
TEST_BAD_PATH = "blah.pdf"
|
11
|
+
TEST_NON_PDF = CURRENT_DIR + "pdftohtmlr_test.rb"
|
12
|
+
|
13
|
+
def test_pdffile_new
|
14
|
+
file = PdfFile.new(TEST_PDF_PATH, ".", nil, nil)
|
15
|
+
assert file
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_invalid_pdffile
|
19
|
+
e = assert_raise PDFToHTMLRError do
|
20
|
+
file = PdfFile.new(TEST_NON_PDF, ".", nil, nil)
|
21
|
+
file.convert
|
22
|
+
end
|
23
|
+
puts e
|
24
|
+
end
|
25
|
+
|
26
|
+
def test_bad_pdffile_new
|
27
|
+
assert_raise PDFToHTMLRError do
|
28
|
+
file = PdfFile.new(TEST_BAD_PATH, ".", nil, nil)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def test_string_from_pdffile
|
33
|
+
file = PdfFile.new(TEST_PDF_PATH, ".", nil, nil)
|
34
|
+
assert_equal "String", file.convert().class.to_s
|
35
|
+
assert_equal `pdftohtml -stdout #{TEST_PDF_PATH}`, file.convert()
|
36
|
+
end
|
37
|
+
|
38
|
+
def test_invalid_pwd_pdffile
|
39
|
+
assert_raise PDFToHTMLRError do
|
40
|
+
file = PdfFile.new(TEST_PWD_PDF_PATH, ".", "blah", nil)
|
41
|
+
file.convert
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def test_valid_pwd_pdffile
|
46
|
+
file = PdfFile.new(TEST_PWD_PDF_PATH, ".", "user", nil)
|
47
|
+
assert_equal "String", file.convert().class.to_s
|
48
|
+
assert_equal `pdftohtml -stdout -upw user #{TEST_PWD_PDF_PATH}`,
|
49
|
+
file.convert()
|
50
|
+
end
|
51
|
+
|
52
|
+
end
|
data/test/test.pdf
ADDED
Binary file
|
data/test/test_pw.pdf
ADDED
Binary file
|
metadata
ADDED
@@ -0,0 +1,61 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: pdftohtmlr
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.2.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Kit Plummer
|
8
|
+
autorequire: pdftohtml
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-12-10 00:00:00 -07:00
|
13
|
+
default_executable:
|
14
|
+
dependencies: []
|
15
|
+
|
16
|
+
description: Uses command-line pdftohtml tools to convert PDF files to HTML.
|
17
|
+
email: kitplummer@gmail.com
|
18
|
+
executables: []
|
19
|
+
|
20
|
+
extensions: []
|
21
|
+
|
22
|
+
extra_rdoc_files: []
|
23
|
+
|
24
|
+
files:
|
25
|
+
- Rakefile
|
26
|
+
- README.textile
|
27
|
+
- MIT-LICENSE
|
28
|
+
- lib/pdftohtmlr.rb
|
29
|
+
- test/pdftohtmlr_test.rb
|
30
|
+
- test/test.pdf
|
31
|
+
- test/test_pw.pdf
|
32
|
+
has_rdoc: true
|
33
|
+
homepage: http://github.com/kitplummer/pdftohtmlr
|
34
|
+
licenses: []
|
35
|
+
|
36
|
+
post_install_message:
|
37
|
+
rdoc_options: []
|
38
|
+
|
39
|
+
require_paths:
|
40
|
+
- lib
|
41
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
42
|
+
requirements:
|
43
|
+
- - ">="
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: "0"
|
46
|
+
version:
|
47
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
48
|
+
requirements:
|
49
|
+
- - ">="
|
50
|
+
- !ruby/object:Gem::Version
|
51
|
+
version: "0"
|
52
|
+
version:
|
53
|
+
requirements:
|
54
|
+
- none
|
55
|
+
rubyforge_project: pdftohtmlr
|
56
|
+
rubygems_version: 1.3.5
|
57
|
+
signing_key:
|
58
|
+
specification_version: 3
|
59
|
+
summary: Convert PDF documents to HTML.
|
60
|
+
test_files: []
|
61
|
+
|