pdftohtmlr 0.3.0 → 0.3.1

Sign up to get free protection for your applications and to get access to all the features.
data/README.textile CHANGED
@@ -15,15 +15,16 @@ h1. install
15
15
  <pre><code>gem install pdftohtmlr</code></pre>
16
16
 
17
17
  h1. using
18
+ "gist examples":http://gist.github.com/254556.js?file=pdftohtmlr_example.rb"
19
+
18
20
  <pre><code lang="ruby">require 'pdftohtmlr'
19
21
  require 'nokogiri'
20
- file = PdfFile.new([Path to Source PDF],
21
- [Target File (not implemented yet)],
22
- [user password],
23
- [owner password])
22
+ file = PdfFilePath.new([Path to Source PDF])
24
23
  string = file.convert
25
24
  doc = file.convert_to_document()</code></pre>
26
25
 
26
+ See included test cases for more usage examples, including passwords and URL fetching.
27
+
27
28
  h1. license
28
29
 
29
- MIT
30
+ MIT (See included MIT-LICENSE)
data/Rakefile CHANGED
@@ -18,7 +18,7 @@ desc "Clean generated files"
18
18
  task :clean do
19
19
  rm FileList['test/output/*.png']
20
20
  rm_rf 'pkg'
21
- rm_rf 'doc'
21
+ rm_rf 'rdoc'
22
22
  end
23
23
 
24
24
  desc 'Test the pdftohtmlr gem.'
@@ -32,8 +32,8 @@ desc 'Generate documentation for the pdftohtmlr gem.'
32
32
  Rake::RDocTask.new(:rdoc) do |rdoc|
33
33
  rdoc.rdoc_dir = 'rdoc'
34
34
  rdoc.title = 'pdftohtmlr'
35
- rdoc.options << '--line-numbers --inline-source'
36
- rdoc.rdoc_files.include('README')
35
+ rdoc.options << '--line-numbers'
36
+ rdoc.rdoc_files.include('README.textile')
37
37
  rdoc.rdoc_files.include('lib/**/*.rb')
38
38
  end
39
39
 
data/lib/pdftohtmlr.rb CHANGED
@@ -12,13 +12,16 @@
12
12
  require 'rubygems'
13
13
  require 'open3'
14
14
  require 'nokogiri'
15
+ require 'uri'
16
+ require 'open-uri'
17
+ require 'tempfile'
15
18
 
16
19
  module PDFToHTMLR
17
20
 
18
21
  # Simple local error abstraction
19
22
  class PDFToHTMLRError < RuntimeError; end
20
23
 
21
- VERSION = '0.3.0'
24
+ VERSION = '0.3.1'
22
25
 
23
26
  # Provides facilities for converting PDFs to HTML from Ruby code.
24
27
  class PdfFile
@@ -27,17 +30,11 @@ module PDFToHTMLR
27
30
  attr :user_pwd
28
31
  attr :owner_pwd
29
32
 
30
- def initialize(input_path, target_path, user_pwd, owner_pwd)
33
+ def initialize(input_path, target_path=nil, user_pwd=nil, owner_pwd=nil)
31
34
  @path = input_path
32
35
  @target = target_path
33
36
  @user_pwd = user_pwd
34
- @owner_pwd = owner_pwd
35
-
36
- # check to make sure file is legit
37
- if (!File.exist?(@path))
38
- raise PDFToHTMLRError, "invalid file path"
39
- end
40
-
37
+ @owner_pwd = owner_pwd
41
38
  end
42
39
 
43
40
  # Convert the PDF document to HTML. Returns a string
@@ -60,7 +57,7 @@ module PDFToHTMLR
60
57
  end
61
58
 
62
59
  if (errors != "")
63
- raise PDFToHTMLRError, errors.to_s
60
+ raise PDFToHTMLRError, errors.first.to_s.chomp
64
61
  else
65
62
  return output
66
63
  end
@@ -72,4 +69,34 @@ module PDFToHTMLR
72
69
  end
73
70
 
74
71
  end
72
+
73
+ # Handle a string-based local path as input, extends PdfFile
74
+ class PdfFilePath < PdfFile
75
+ def initialize(input_path, target_path=nil, user_pwd=nil, owner_pwd=nil)
76
+ # check to make sure file is legit
77
+ if (!File.exist?(input_path))
78
+ raise PDFToHTMLRError, "invalid file path"
79
+ end
80
+
81
+ super(input_path, target_path, user_pwd, owner_pwd)
82
+
83
+ end
84
+ end
85
+
86
+ # Handle a URI as a remote path to a PDF, extends PdfFile
87
+ class PdfFileUrl < PdfFile
88
+ def initialize(input_url, target_path=nil, user_pwd=nil, owner_pwd=nil)
89
+ # check to make sure file is legit
90
+ begin
91
+ if ((input_url =~ URI::regexp).nil?)
92
+ raise PDFToHTMLRError, "invalid file url"
93
+ end
94
+ tempfile = Tempfile.new('pdftohtmlr')
95
+ File.open(tempfile.path, 'w') {|f| f.write(open(input_url).read) }
96
+ super(tempfile.path, target_path, user_pwd, owner_pwd)
97
+ rescue => bang
98
+ raise PDFToHTMLRError, bang.to_s
99
+ end
100
+ end
101
+ end
75
102
  end
@@ -9,53 +9,92 @@ class PdfFileTest < Test::Unit::TestCase
9
9
  TEST_PWD_PDF_PATH = CURRENT_DIR + "test_pw.pdf"
10
10
  TEST_BAD_PATH = "blah.pdf"
11
11
  TEST_NON_PDF = CURRENT_DIR + "pdftohtmlr_test.rb"
12
-
12
+ TEST_URL_PDF =
13
+ "http://github.com/kitplummer/pdftohtmlr/raw/master/test/test.pdf"
14
+ TEST_URL_NON_PDF =
15
+ "http://github.com/kitplummer/pdftohtmlr/raw/master/test/pdftohtmlr_test.rb"
13
16
  def test_pdffile_new
14
- file = PdfFile.new(TEST_PDF_PATH, ".", nil, nil)
17
+ file = PdfFilePath.new(TEST_PDF_PATH, ".", nil, nil)
15
18
  assert file
16
19
  end
17
20
 
18
21
  def test_invalid_pdffile
19
22
  e = assert_raise PDFToHTMLRError do
20
- file = PdfFile.new(TEST_NON_PDF, ".", nil, nil)
23
+ file = PdfFilePath.new(TEST_NON_PDF, ".", nil, nil)
21
24
  file.convert
22
25
  end
26
+ assert_equal "Error: May not be a PDF file (continuing anyway)", e.to_s
23
27
  end
24
28
 
25
29
  def test_bad_pdffile_new
26
- assert_raise PDFToHTMLRError do
27
- file = PdfFile.new(TEST_BAD_PATH, ".", nil, nil)
30
+ e = assert_raise PDFToHTMLRError do
31
+ file = PdfFilePath.new(TEST_BAD_PATH, ".", nil, nil)
28
32
  end
33
+ assert_equal "invalid file path", e.to_s
29
34
  end
30
35
 
31
36
  def test_string_from_pdffile
32
- file = PdfFile.new(TEST_PDF_PATH, ".", nil, nil)
37
+ file = PdfFilePath.new(TEST_PDF_PATH, ".", nil, nil)
33
38
  assert_equal "String", file.convert().class.to_s
34
39
  assert_equal `pdftohtml -stdout #{TEST_PDF_PATH}`, file.convert()
35
40
  end
36
41
 
37
42
  def test_invalid_pwd_pdffile
38
- assert_raise PDFToHTMLRError do
39
- file = PdfFile.new(TEST_PWD_PDF_PATH, ".", "blah", nil)
43
+ e = assert_raise PDFToHTMLRError do
44
+ file = PdfFilePath.new(TEST_PWD_PDF_PATH, ".", "blah", nil)
40
45
  file.convert
41
46
  end
47
+ assert_equal "Error: Incorrect password", e.to_s
42
48
  end
43
49
 
44
50
  def test_valid_pwd_pdffile
45
- file = PdfFile.new(TEST_PWD_PDF_PATH, ".", "user", nil)
51
+ file = PdfFilePath.new(TEST_PWD_PDF_PATH, ".", "user", nil)
46
52
  assert_equal "String", file.convert().class.to_s
47
53
  assert_equal `pdftohtml -stdout -upw user #{TEST_PWD_PDF_PATH}`,
48
54
  file.convert()
49
55
  end
50
56
 
51
57
  def test_return_document
52
- file = PdfFile.new(TEST_PDF_PATH, ".", nil, nil)
58
+ file = PdfFilePath.new(TEST_PDF_PATH, ".", nil, nil)
53
59
  assert_equal "Nokogiri::HTML::Document",
54
60
  file.convert_to_document().class.to_s
55
61
  assert_equal Nokogiri::HTML.parse(
56
- `pdftohtml -stdout -upw user #{TEST_PWD_PDF_PATH}`
62
+ `pdftohtml -stdout #{TEST_PDF_PATH}`
57
63
  ).css('body').first.to_s,
58
64
  file.convert_to_document().css('body').first.to_s
59
65
  end
60
66
 
67
+ def test_invalid_URL_pdffile
68
+ e = assert_raise PDFToHTMLRError do
69
+ file = PdfFileUrl.new("blah", ".", nil, nil)
70
+ end
71
+ assert_equal "invalid file url", e.to_s
72
+ end
73
+
74
+ def test_invalid_URL_resource_pdffile
75
+ e = assert_raise PDFToHTMLRError do
76
+ file = PdfFileUrl.new("http://github.com/kitplummer/blah", ".", nil, nil)
77
+ end
78
+ assert_equal "404 Not Found", e.to_s
79
+ end
80
+
81
+ def test_invalid_URL_pdf_pdffile
82
+ e = assert_raise PDFToHTMLRError do
83
+ file = PdfFileUrl.new(TEST_URL_NON_PDF, ".", nil, nil)
84
+ file.convert
85
+ end
86
+ assert_equal "Error: May not be a PDF file (continuing anyway)", e.to_s
87
+ end
88
+
89
+ def test_valid_URL_pdffile
90
+ # http://github.com/kitplummer/pdftohtmlr/raw/master/test/test.pdf
91
+ file = PdfFileUrl.new(TEST_URL_PDF, ".", nil, nil)
92
+ assert_equal "String", file.convert().class.to_s
93
+ assert_equal `pdftohtml -stdout #{TEST_PDF_PATH}`, file.convert()
94
+ end
95
+
96
+ def test_args
97
+ file = PdfFileUrl.new(TEST_URL_PDF)
98
+ assert_equal "String", file.convert().class.to_s
99
+ end
61
100
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pdftohtmlr
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kit Plummer
@@ -9,7 +9,7 @@ autorequire: pdftohtml
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-12-13 00:00:00 -07:00
12
+ date: 2009-12-14 00:00:00 -07:00
13
13
  default_executable:
14
14
  dependencies: []
15
15