pdftohtmlr 0.4 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Rakefile CHANGED
@@ -55,7 +55,7 @@ spec = Gem::Specification.new do |s|
55
55
  s.requirements << 'none'
56
56
  s.require_path = 'lib'
57
57
  s.autorequire = 'pdftohtml'
58
-
58
+ s.add_dependency("nokogiri", ">= 1.3.3")
59
59
  s.files = [ "Rakefile", "README.textile", "MIT-LICENSE" ]
60
60
  s.files = s.files + Dir.glob( "lib/**/*" ).delete_if { |item| item.include?( "\.svn" ) }
61
61
  s.files = s.files + Dir.glob( "test/**/*" ).delete_if { |item| item.include?( "\.svn" ) || item.include?("\.png") }
data/lib/pdftohtmlr.rb CHANGED
@@ -20,7 +20,7 @@ module PDFToHTMLR
20
20
  # Simple local error abstraction
21
21
  class PDFToHTMLRError < RuntimeError; end
22
22
 
23
- VERSION = '0.4'
23
+ VERSION = '0.4.1'
24
24
 
25
25
  # Provides facilities for converting PDFs to HTML from Ruby code.
26
26
  class PdfFile
@@ -28,7 +28,8 @@ module PDFToHTMLR
28
28
  attr :target
29
29
  attr :user_pwd
30
30
  attr :owner_pwd
31
-
31
+ attr :format
32
+
32
33
  def initialize(input_path, target_path=nil, user_pwd=nil, owner_pwd=nil)
33
34
  @path = input_path
34
35
  @target = target_path
@@ -40,12 +41,13 @@ module PDFToHTMLR
40
41
  def convert()
41
42
  errors = ""
42
43
  output = ""
44
+
43
45
  if @user_pwd
44
- cmd = "pdftohtml -stdout -upw #{@user_pwd}" + ' "' + @path + '"'
46
+ cmd = "pdftohtml -stdout #{@format} -upw #{@user_pwd}" + ' "' + @path + '"'
45
47
  elsif @owner_pwd
46
- cmd = "pdftohtml -stdout -opw #{@owner_pwd}" + ' "' + @path + '"'
48
+ cmd = "pdftohtml -stdout #{@format} -opw #{@owner_pwd}" + ' "' + @path + '"'
47
49
  else
48
- cmd = "pdftohtml -stdout" + ' "' + @path + '"'
50
+ cmd = "pdftohtml -stdout #{@format}" + ' "' + @path + '"'
49
51
  end
50
52
 
51
53
  output = `#{cmd} 2>&1`
@@ -63,6 +65,16 @@ module PDFToHTMLR
63
65
  def convert_to_document()
64
66
  Nokogiri::HTML.parse(convert())
65
67
  end
68
+
69
+ def convert_to_xml()
70
+ @format = "-xml"
71
+ convert()
72
+ end
73
+
74
+ def convert_to_xml_document()
75
+ @format = "-xml"
76
+ Nokogiri::XML.parse(convert())
77
+ end
66
78
  end
67
79
 
68
80
  # Handle a string-based local path as input, extends PdfFile
@@ -64,6 +64,21 @@ class PdfFileTest < Test::Unit::TestCase
64
64
  file.convert_to_document().css('body').first.to_s
65
65
  end
66
66
 
67
+ def test_return_xml
68
+ file = PdfFilePath.new(TEST_PDF_PATH, ".", nil, nil)
69
+ assert_equal "String", file.convert_to_xml().class.to_s
70
+ end
71
+
72
+ def test_return_xml_document
73
+ file = PdfFilePath.new(TEST_PDF_PATH, ".", nil, nil)
74
+ assert_equal "Nokogiri::XML::Document",
75
+ file.convert_to_xml_document().class.to_s
76
+ assert_equal Nokogiri::XML.parse(
77
+ `pdftohtml -stdout -xml "#{TEST_PDF_PATH}"`
78
+ ).css('text').first.to_s,
79
+ file.convert_to_document().css('text').first.to_s
80
+ end
81
+
67
82
  def test_invalid_URL_pdffile
68
83
  e = assert_raise PDFToHTMLRError do
69
84
  file = PdfFileUrl.new("blah", ".", nil, nil)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pdftohtmlr
3
3
  version: !ruby/object:Gem::Version
4
- version: "0.4"
4
+ version: 0.4.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kit Plummer
@@ -11,8 +11,17 @@ cert_chain: []
11
11
 
12
12
  date: 2009-12-18 00:00:00 -07:00
13
13
  default_executable:
14
- dependencies: []
15
-
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: nokogiri
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: 1.3.3
24
+ version:
16
25
  description: Uses command-line pdftohtml tools to convert PDF files to HTML.
17
26
  email: kitplummer@gmail.com
18
27
  executables: []