pdftohtmlr 0.2.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
data/README.textile CHANGED
@@ -8,13 +8,22 @@ h1. requirements
8
8
 
9
9
  Just pdftohtml and Ruby (1.8.6+ as far as I know).
10
10
 
11
+ h1. install
12
+
13
+ "http://gemcutter.org/gems/pdftohtmlr":http://gemcutter.org/gems/pdftohtmlr
14
+
15
+ <pre><code>gem install pdftohtmlr</code></pre>
16
+
11
17
  h1. using
12
- <pre>
13
- <code>
18
+ <pre><code lang="ruby">require 'pdftohtmlr'
19
+ require 'nokogiri'
14
20
  file = PdfFile.new([Path to Source PDF],
15
21
  [Target File (not implemented yet)],
16
22
  [user password],
17
23
  [owner password])
18
- doc = file.convert
19
- </code>
20
- </pre>
24
+ string = file.convert
25
+ doc = file.convert_to_document()</code></pre>
26
+
27
+ h1. license
28
+
29
+ MIT
data/lib/pdftohtmlr.rb CHANGED
@@ -1,10 +1,26 @@
1
+ # The library has a single method for converting PDF files into HTML. The
2
+ # method current takes in the source path, and either/both the user and owner
3
+ # passwords set on the source PDF document. The convert method returns the
4
+ # HTML as a string for further manipulation of loading into a Document.
5
+ #
6
+ # Requires that pdftohtml be installed and on the path
7
+ #
8
+ # Author:: Kit Plummer (mailto:kitplummer@gmail.com)
9
+ # Copyright:: Copyright (c) 2009 Kit Plummer
10
+ # License:: MIT
11
+
1
12
  require 'rubygems'
2
13
  require 'open3'
14
+ require 'nokogiri'
3
15
 
4
16
  module PDFToHTMLR
17
+
18
+ # Simple local error abstraction
5
19
  class PDFToHTMLRError < RuntimeError; end
6
- VERSION = '0.2.0'
20
+
21
+ VERSION = '0.3.0'
7
22
 
23
+ # Provides facilities for converting PDFs to HTML from Ruby code.
8
24
  class PdfFile
9
25
  attr :path
10
26
  attr :target
@@ -24,6 +40,7 @@ module PDFToHTMLR
24
40
 
25
41
  end
26
42
 
43
+ # Convert the PDF document to HTML. Returns a string
27
44
  def convert()
28
45
  errors = ""
29
46
  output = ""
@@ -49,5 +66,10 @@ module PDFToHTMLR
49
66
  end
50
67
  end
51
68
 
69
+ # Convert the PDF document to HTML. Returns a Nokogiri::HTML:Document
70
+ def convert_to_document()
71
+ Nokogiri::HTML.parse(convert())
72
+ end
73
+
52
74
  end
53
75
  end
@@ -20,7 +20,6 @@ class PdfFileTest < Test::Unit::TestCase
20
20
  file = PdfFile.new(TEST_NON_PDF, ".", nil, nil)
21
21
  file.convert
22
22
  end
23
- puts e
24
23
  end
25
24
 
26
25
  def test_bad_pdffile_new
@@ -48,5 +47,15 @@ class PdfFileTest < Test::Unit::TestCase
48
47
  assert_equal `pdftohtml -stdout -upw user #{TEST_PWD_PDF_PATH}`,
49
48
  file.convert()
50
49
  end
51
-
50
+
51
+ def test_return_document
52
+ file = PdfFile.new(TEST_PDF_PATH, ".", nil, nil)
53
+ assert_equal "Nokogiri::HTML::Document",
54
+ file.convert_to_document().class.to_s
55
+ assert_equal Nokogiri::HTML.parse(
56
+ `pdftohtml -stdout -upw user #{TEST_PWD_PDF_PATH}`
57
+ ).css('body').first.to_s,
58
+ file.convert_to_document().css('body').first.to_s
59
+ end
60
+
52
61
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pdftohtmlr
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kit Plummer
@@ -9,7 +9,7 @@ autorequire: pdftohtml
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-12-10 00:00:00 -07:00
12
+ date: 2009-12-13 00:00:00 -07:00
13
13
  default_executable:
14
14
  dependencies: []
15
15