pdftohtmlr 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.textile CHANGED
@@ -8,13 +8,22 @@ h1. requirements
8
8
 
9
9
  Just pdftohtml and Ruby (1.8.6+ as far as I know).
10
10
 
11
+ h1. install
12
+
13
+ "http://gemcutter.org/gems/pdftohtmlr":http://gemcutter.org/gems/pdftohtmlr
14
+
15
+ <pre><code>gem install pdftohtmlr</code></pre>
16
+
11
17
  h1. using
12
- <pre>
13
- <code>
18
+ <pre><code lang="ruby">require 'pdftohtmlr'
19
+ require 'nokogiri'
14
20
  file = PdfFile.new([Path to Source PDF],
15
21
  [Target File (not implemented yet)],
16
22
  [user password],
17
23
  [owner password])
18
- doc = file.convert
19
- </code>
20
- </pre>
24
+ string = file.convert
25
+ doc = file.convert_to_document()</code></pre>
26
+
27
+ h1. license
28
+
29
+ MIT
data/lib/pdftohtmlr.rb CHANGED
@@ -1,10 +1,26 @@
1
+ # The library has a single method for converting PDF files into HTML. The
2
+ # method current takes in the source path, and either/both the user and owner
3
+ # passwords set on the source PDF document. The convert method returns the
4
+ # HTML as a string for further manipulation of loading into a Document.
5
+ #
6
+ # Requires that pdftohtml be installed and on the path
7
+ #
8
+ # Author:: Kit Plummer (mailto:kitplummer@gmail.com)
9
+ # Copyright:: Copyright (c) 2009 Kit Plummer
10
+ # License:: MIT
11
+
1
12
  require 'rubygems'
2
13
  require 'open3'
14
+ require 'nokogiri'
3
15
 
4
16
  module PDFToHTMLR
17
+
18
+ # Simple local error abstraction
5
19
  class PDFToHTMLRError < RuntimeError; end
6
- VERSION = '0.2.0'
20
+
21
+ VERSION = '0.3.0'
7
22
 
23
+ # Provides facilities for converting PDFs to HTML from Ruby code.
8
24
  class PdfFile
9
25
  attr :path
10
26
  attr :target
@@ -24,6 +40,7 @@ module PDFToHTMLR
24
40
 
25
41
  end
26
42
 
43
+ # Convert the PDF document to HTML. Returns a string
27
44
  def convert()
28
45
  errors = ""
29
46
  output = ""
@@ -49,5 +66,10 @@ module PDFToHTMLR
49
66
  end
50
67
  end
51
68
 
69
+ # Convert the PDF document to HTML. Returns a Nokogiri::HTML:Document
70
+ def convert_to_document()
71
+ Nokogiri::HTML.parse(convert())
72
+ end
73
+
52
74
  end
53
75
  end
@@ -20,7 +20,6 @@ class PdfFileTest < Test::Unit::TestCase
20
20
  file = PdfFile.new(TEST_NON_PDF, ".", nil, nil)
21
21
  file.convert
22
22
  end
23
- puts e
24
23
  end
25
24
 
26
25
  def test_bad_pdffile_new
@@ -48,5 +47,15 @@ class PdfFileTest < Test::Unit::TestCase
48
47
  assert_equal `pdftohtml -stdout -upw user #{TEST_PWD_PDF_PATH}`,
49
48
  file.convert()
50
49
  end
51
-
50
+
51
+ def test_return_document
52
+ file = PdfFile.new(TEST_PDF_PATH, ".", nil, nil)
53
+ assert_equal "Nokogiri::HTML::Document",
54
+ file.convert_to_document().class.to_s
55
+ assert_equal Nokogiri::HTML.parse(
56
+ `pdftohtml -stdout -upw user #{TEST_PWD_PDF_PATH}`
57
+ ).css('body').first.to_s,
58
+ file.convert_to_document().css('body').first.to_s
59
+ end
60
+
52
61
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pdftohtmlr
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kit Plummer
@@ -9,7 +9,7 @@ autorequire: pdftohtml
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-12-10 00:00:00 -07:00
12
+ date: 2009-12-13 00:00:00 -07:00
13
13
  default_executable:
14
14
  dependencies: []
15
15