pdftohtmlr 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.textile +14 -5
- data/lib/pdftohtmlr.rb +23 -1
- data/test/pdftohtmlr_test.rb +11 -2
- metadata +2 -2
data/README.textile
CHANGED
@@ -8,13 +8,22 @@ h1. requirements
|
|
8
8
|
|
9
9
|
Just pdftohtml and Ruby (1.8.6+ as far as I know).
|
10
10
|
|
11
|
+
h1. install
|
12
|
+
|
13
|
+
"http://gemcutter.org/gems/pdftohtmlr":http://gemcutter.org/gems/pdftohtmlr
|
14
|
+
|
15
|
+
<pre><code>gem install pdftohtmlr</code></pre>
|
16
|
+
|
11
17
|
h1. using
|
12
|
-
<pre>
|
13
|
-
|
18
|
+
<pre><code lang="ruby">require 'pdftohtmlr'
|
19
|
+
require 'nokogiri'
|
14
20
|
file = PdfFile.new([Path to Source PDF],
|
15
21
|
[Target File (not implemented yet)],
|
16
22
|
[user password],
|
17
23
|
[owner password])
|
18
|
-
|
19
|
-
</code>
|
20
|
-
|
24
|
+
string = file.convert
|
25
|
+
doc = file.convert_to_document()</code></pre>
|
26
|
+
|
27
|
+
h1. license
|
28
|
+
|
29
|
+
MIT
|
data/lib/pdftohtmlr.rb
CHANGED
@@ -1,10 +1,26 @@
|
|
1
|
+
# The library has a single method for converting PDF files into HTML. The
|
2
|
+
# method current takes in the source path, and either/both the user and owner
|
3
|
+
# passwords set on the source PDF document. The convert method returns the
|
4
|
+
# HTML as a string for further manipulation of loading into a Document.
|
5
|
+
#
|
6
|
+
# Requires that pdftohtml be installed and on the path
|
7
|
+
#
|
8
|
+
# Author:: Kit Plummer (mailto:kitplummer@gmail.com)
|
9
|
+
# Copyright:: Copyright (c) 2009 Kit Plummer
|
10
|
+
# License:: MIT
|
11
|
+
|
1
12
|
require 'rubygems'
|
2
13
|
require 'open3'
|
14
|
+
require 'nokogiri'
|
3
15
|
|
4
16
|
module PDFToHTMLR
|
17
|
+
|
18
|
+
# Simple local error abstraction
|
5
19
|
class PDFToHTMLRError < RuntimeError; end
|
6
|
-
|
20
|
+
|
21
|
+
VERSION = '0.3.0'
|
7
22
|
|
23
|
+
# Provides facilities for converting PDFs to HTML from Ruby code.
|
8
24
|
class PdfFile
|
9
25
|
attr :path
|
10
26
|
attr :target
|
@@ -24,6 +40,7 @@ module PDFToHTMLR
|
|
24
40
|
|
25
41
|
end
|
26
42
|
|
43
|
+
# Convert the PDF document to HTML. Returns a string
|
27
44
|
def convert()
|
28
45
|
errors = ""
|
29
46
|
output = ""
|
@@ -49,5 +66,10 @@ module PDFToHTMLR
|
|
49
66
|
end
|
50
67
|
end
|
51
68
|
|
69
|
+
# Convert the PDF document to HTML. Returns a Nokogiri::HTML:Document
|
70
|
+
def convert_to_document()
|
71
|
+
Nokogiri::HTML.parse(convert())
|
72
|
+
end
|
73
|
+
|
52
74
|
end
|
53
75
|
end
|
data/test/pdftohtmlr_test.rb
CHANGED
@@ -20,7 +20,6 @@ class PdfFileTest < Test::Unit::TestCase
|
|
20
20
|
file = PdfFile.new(TEST_NON_PDF, ".", nil, nil)
|
21
21
|
file.convert
|
22
22
|
end
|
23
|
-
puts e
|
24
23
|
end
|
25
24
|
|
26
25
|
def test_bad_pdffile_new
|
@@ -48,5 +47,15 @@ class PdfFileTest < Test::Unit::TestCase
|
|
48
47
|
assert_equal `pdftohtml -stdout -upw user #{TEST_PWD_PDF_PATH}`,
|
49
48
|
file.convert()
|
50
49
|
end
|
51
|
-
|
50
|
+
|
51
|
+
def test_return_document
|
52
|
+
file = PdfFile.new(TEST_PDF_PATH, ".", nil, nil)
|
53
|
+
assert_equal "Nokogiri::HTML::Document",
|
54
|
+
file.convert_to_document().class.to_s
|
55
|
+
assert_equal Nokogiri::HTML.parse(
|
56
|
+
`pdftohtml -stdout -upw user #{TEST_PWD_PDF_PATH}`
|
57
|
+
).css('body').first.to_s,
|
58
|
+
file.convert_to_document().css('body').first.to_s
|
59
|
+
end
|
60
|
+
|
52
61
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pdftohtmlr
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kit Plummer
|
@@ -9,7 +9,7 @@ autorequire: pdftohtml
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-12-
|
12
|
+
date: 2009-12-13 00:00:00 -07:00
|
13
13
|
default_executable:
|
14
14
|
dependencies: []
|
15
15
|
|