pdftohtmlr 0.2.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.textile +14 -5
- data/lib/pdftohtmlr.rb +23 -1
- data/test/pdftohtmlr_test.rb +11 -2
- metadata +2 -2
data/README.textile
CHANGED
@@ -8,13 +8,22 @@ h1. requirements
|
|
8
8
|
|
9
9
|
Just pdftohtml and Ruby (1.8.6+ as far as I know).
|
10
10
|
|
11
|
+
h1. install
|
12
|
+
|
13
|
+
"http://gemcutter.org/gems/pdftohtmlr":http://gemcutter.org/gems/pdftohtmlr
|
14
|
+
|
15
|
+
<pre><code>gem install pdftohtmlr</code></pre>
|
16
|
+
|
11
17
|
h1. using
|
12
|
-
<pre>
|
13
|
-
|
18
|
+
<pre><code lang="ruby">require 'pdftohtmlr'
|
19
|
+
require 'nokogiri'
|
14
20
|
file = PdfFile.new([Path to Source PDF],
|
15
21
|
[Target File (not implemented yet)],
|
16
22
|
[user password],
|
17
23
|
[owner password])
|
18
|
-
|
19
|
-
</code>
|
20
|
-
|
24
|
+
string = file.convert
|
25
|
+
doc = file.convert_to_document()</code></pre>
|
26
|
+
|
27
|
+
h1. license
|
28
|
+
|
29
|
+
MIT
|
data/lib/pdftohtmlr.rb
CHANGED
@@ -1,10 +1,26 @@
|
|
1
|
+
# The library has a single method for converting PDF files into HTML. The
|
2
|
+
# method current takes in the source path, and either/both the user and owner
|
3
|
+
# passwords set on the source PDF document. The convert method returns the
|
4
|
+
# HTML as a string for further manipulation of loading into a Document.
|
5
|
+
#
|
6
|
+
# Requires that pdftohtml be installed and on the path
|
7
|
+
#
|
8
|
+
# Author:: Kit Plummer (mailto:kitplummer@gmail.com)
|
9
|
+
# Copyright:: Copyright (c) 2009 Kit Plummer
|
10
|
+
# License:: MIT
|
11
|
+
|
1
12
|
require 'rubygems'
|
2
13
|
require 'open3'
|
14
|
+
require 'nokogiri'
|
3
15
|
|
4
16
|
module PDFToHTMLR
|
17
|
+
|
18
|
+
# Simple local error abstraction
|
5
19
|
class PDFToHTMLRError < RuntimeError; end
|
6
|
-
|
20
|
+
|
21
|
+
VERSION = '0.3.0'
|
7
22
|
|
23
|
+
# Provides facilities for converting PDFs to HTML from Ruby code.
|
8
24
|
class PdfFile
|
9
25
|
attr :path
|
10
26
|
attr :target
|
@@ -24,6 +40,7 @@ module PDFToHTMLR
|
|
24
40
|
|
25
41
|
end
|
26
42
|
|
43
|
+
# Convert the PDF document to HTML. Returns a string
|
27
44
|
def convert()
|
28
45
|
errors = ""
|
29
46
|
output = ""
|
@@ -49,5 +66,10 @@ module PDFToHTMLR
|
|
49
66
|
end
|
50
67
|
end
|
51
68
|
|
69
|
+
# Convert the PDF document to HTML. Returns a Nokogiri::HTML:Document
|
70
|
+
def convert_to_document()
|
71
|
+
Nokogiri::HTML.parse(convert())
|
72
|
+
end
|
73
|
+
|
52
74
|
end
|
53
75
|
end
|
data/test/pdftohtmlr_test.rb
CHANGED
@@ -20,7 +20,6 @@ class PdfFileTest < Test::Unit::TestCase
|
|
20
20
|
file = PdfFile.new(TEST_NON_PDF, ".", nil, nil)
|
21
21
|
file.convert
|
22
22
|
end
|
23
|
-
puts e
|
24
23
|
end
|
25
24
|
|
26
25
|
def test_bad_pdffile_new
|
@@ -48,5 +47,15 @@ class PdfFileTest < Test::Unit::TestCase
|
|
48
47
|
assert_equal `pdftohtml -stdout -upw user #{TEST_PWD_PDF_PATH}`,
|
49
48
|
file.convert()
|
50
49
|
end
|
51
|
-
|
50
|
+
|
51
|
+
def test_return_document
|
52
|
+
file = PdfFile.new(TEST_PDF_PATH, ".", nil, nil)
|
53
|
+
assert_equal "Nokogiri::HTML::Document",
|
54
|
+
file.convert_to_document().class.to_s
|
55
|
+
assert_equal Nokogiri::HTML.parse(
|
56
|
+
`pdftohtml -stdout -upw user #{TEST_PWD_PDF_PATH}`
|
57
|
+
).css('body').first.to_s,
|
58
|
+
file.convert_to_document().css('body').first.to_s
|
59
|
+
end
|
60
|
+
|
52
61
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pdftohtmlr
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kit Plummer
|
@@ -9,7 +9,7 @@ autorequire: pdftohtml
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-12-
|
12
|
+
date: 2009-12-13 00:00:00 -07:00
|
13
13
|
default_executable:
|
14
14
|
dependencies: []
|
15
15
|
|