kristin 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +3 -0
- data/lib/kristin.rb +31 -15
- data/lib/kristin/version.rb +1 -1
- data/spec/kristin_spec.rb +16 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ea0935cdc2721d6950c396f8c1d99e8a191ed605
|
4
|
+
data.tar.gz: dbd8823558a32823e4d69dd085549656591b920b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 59665731c9121dcf98996d8cd64d07c2d4c31e29207af37d8124d075e19704451ed3b2d325689a2f28a2ba4f118e13b40ef876edeb04bc28f6c4c1be1f14a48d
|
7
|
+
data.tar.gz: 5c2dbe84b0e9a78d63b7219efba63b80fd15b4cec68058dba9e6844745f5ad03d0fc9cc9983ec06d3ea99005d24f6ab4509577e67de68bb0c73042c2cedb6df7
|
data/README.md
CHANGED
@@ -27,6 +27,9 @@ require 'kristin'
|
|
27
27
|
# Converts document.pdf to document.html
|
28
28
|
# This requires that the pdf2htmlEX command is present in your PATH.
|
29
29
|
Kristin.convert('document.pdf', 'document.html')
|
30
|
+
|
31
|
+
# You can also convert a source file directly from an URL
|
32
|
+
Kristin.convert('http://myserver.com/123/document.pdf', 'document.html')
|
30
33
|
```
|
31
34
|
|
32
35
|
## Contributing
|
data/lib/kristin.rb
CHANGED
@@ -1,34 +1,28 @@
|
|
1
1
|
require "kristin/version"
|
2
|
+
require 'open-uri'
|
3
|
+
require "net/http"
|
2
4
|
|
3
5
|
module Kristin
|
4
6
|
def self.convert(source, target)
|
5
|
-
|
6
|
-
raise IOError, "Source file (#{source}) does not exist." if not File.exists?(source)
|
7
7
|
raise IOError, "Can't find pdf2htmlex executable in PATH" if not command_available?
|
8
|
-
|
9
|
-
cmd = "#{pdf2htmlex_command} #{
|
8
|
+
src = determine_source(source)
|
9
|
+
cmd = "#{pdf2htmlex_command} #{src} #{target}"
|
10
10
|
|
11
11
|
`#{cmd}`
|
12
|
-
|
13
12
|
## TODO: Grab error message from pdf2htmlex and raise a better error
|
14
|
-
raise IOError, "Could not convert #{
|
13
|
+
raise IOError, "Could not convert #{src}" if $?.exitstatus != 0
|
15
14
|
end
|
16
15
|
|
17
16
|
private
|
18
17
|
|
19
18
|
def self.command_available?
|
20
|
-
|
19
|
+
pdf2htmlex_command
|
21
20
|
end
|
22
21
|
|
23
22
|
def self.pdf2htmlex_command
|
24
23
|
cmd = nil
|
25
|
-
if which("pdf2htmlex")
|
26
|
-
|
27
|
-
elsif which("pdf2htmlEX")
|
28
|
-
cmd = "pdf2htmlEX"
|
29
|
-
end
|
30
|
-
|
31
|
-
cmd
|
24
|
+
cmd = "pdf2htmlex" if which("pdf2htmlex")
|
25
|
+
cmd = "pdf2htmlEX" if which("pdf2htmlEX")
|
32
26
|
end
|
33
27
|
|
34
28
|
def self.which(cmd)
|
@@ -36,11 +30,33 @@ module Kristin
|
|
36
30
|
ENV['PATH'].split(File::PATH_SEPARATOR).each do |path|
|
37
31
|
exts.each do |ext|
|
38
32
|
exe = File.join(path, "#{cmd}#{ext}")
|
39
|
-
|
40
33
|
return exe if File.executable? exe
|
41
34
|
end
|
42
35
|
end
|
43
36
|
|
44
37
|
return nil
|
45
38
|
end
|
39
|
+
|
40
|
+
def self.random_source_name
|
41
|
+
rand(16**16).to_s(16)
|
42
|
+
end
|
43
|
+
|
44
|
+
def self.download_file(source)
|
45
|
+
tmp_file = "/tmp/#{random_source_name}.pdf"
|
46
|
+
File.open(tmp_file, "wb") do |saved_file|
|
47
|
+
open(source, 'rb') do |read_file|
|
48
|
+
saved_file.write(read_file.read)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
tmp_file
|
53
|
+
end
|
54
|
+
|
55
|
+
def self.determine_source(source)
|
56
|
+
is_file = File.exists?(source) && !File.directory?(source)
|
57
|
+
is_http = (URI(source).scheme == "http" || URI(source).scheme == "https") && Net::HTTP.get_response(URI(source)).is_a?(Net::HTTPSuccess)
|
58
|
+
raise IOError, "Source (#{source}) is neither a file nor an URL." unless is_file || is_http
|
59
|
+
|
60
|
+
is_file ? source : download_file(source)
|
61
|
+
end
|
46
62
|
end
|
data/lib/kristin/version.rb
CHANGED
data/spec/kristin_spec.rb
CHANGED
@@ -34,5 +34,21 @@ describe Kristin do
|
|
34
34
|
it "should raise error if pdf is not a real pdf" do
|
35
35
|
lambda { Kristin.convert(@no_pdf, "nonsense.html") }.should raise_error(IOError)
|
36
36
|
end
|
37
|
+
|
38
|
+
it "should convert a pdf from an URL" do
|
39
|
+
target = @target_path + "/from_url.html"
|
40
|
+
Kristin.convert("https://www.filepicker.io/api/file/vR0btUfRQiCF9ntRkW6Q", target)
|
41
|
+
File.exists?(target).should == true
|
42
|
+
end
|
43
|
+
|
44
|
+
it "should raise an error if URL does not exist" do
|
45
|
+
target = @target_path + "/from_url.html"
|
46
|
+
lambda { Kristin.convert("https://www.filepicker.io/api/file/donotexist.pdf", target) }.should raise_error(IOError)
|
47
|
+
end
|
48
|
+
|
49
|
+
it "should raise an error if URL file is not a real pdf" do
|
50
|
+
target = @target_path + "/from_url.html"
|
51
|
+
lambda { Kristin.convert("https://www.filepicker.io/api/file/agxKeTfQSWKvMR4CDXMq", target) }.should raise_error(IOError)
|
52
|
+
end
|
37
53
|
end
|
38
54
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: kristin
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Richard Nyström
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-03-
|
11
|
+
date: 2013-03-31 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|