kristin 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +3 -0
- data/lib/kristin.rb +31 -15
- data/lib/kristin/version.rb +1 -1
- data/spec/kristin_spec.rb +16 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ea0935cdc2721d6950c396f8c1d99e8a191ed605
|
4
|
+
data.tar.gz: dbd8823558a32823e4d69dd085549656591b920b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 59665731c9121dcf98996d8cd64d07c2d4c31e29207af37d8124d075e19704451ed3b2d325689a2f28a2ba4f118e13b40ef876edeb04bc28f6c4c1be1f14a48d
|
7
|
+
data.tar.gz: 5c2dbe84b0e9a78d63b7219efba63b80fd15b4cec68058dba9e6844745f5ad03d0fc9cc9983ec06d3ea99005d24f6ab4509577e67de68bb0c73042c2cedb6df7
|
data/README.md
CHANGED
@@ -27,6 +27,9 @@ require 'kristin'
|
|
27
27
|
# Converts document.pdf to document.html
|
28
28
|
# This requires that the pdf2htmlEX command is present in your PATH.
|
29
29
|
Kristin.convert('document.pdf', 'document.html')
|
30
|
+
|
31
|
+
# You can also convert a source file directly from an URL
|
32
|
+
Kristin.convert('http://myserver.com/123/document.pdf', 'document.html')
|
30
33
|
```
|
31
34
|
|
32
35
|
## Contributing
|
data/lib/kristin.rb
CHANGED
@@ -1,34 +1,28 @@
|
|
1
1
|
require "kristin/version"
|
2
|
+
require 'open-uri'
|
3
|
+
require "net/http"
|
2
4
|
|
3
5
|
module Kristin
|
4
6
|
def self.convert(source, target)
|
5
|
-
|
6
|
-
raise IOError, "Source file (#{source}) does not exist." if not File.exists?(source)
|
7
7
|
raise IOError, "Can't find pdf2htmlex executable in PATH" if not command_available?
|
8
|
-
|
9
|
-
cmd = "#{pdf2htmlex_command} #{
|
8
|
+
src = determine_source(source)
|
9
|
+
cmd = "#{pdf2htmlex_command} #{src} #{target}"
|
10
10
|
|
11
11
|
`#{cmd}`
|
12
|
-
|
13
12
|
## TODO: Grab error message from pdf2htmlex and raise a better error
|
14
|
-
raise IOError, "Could not convert #{
|
13
|
+
raise IOError, "Could not convert #{src}" if $?.exitstatus != 0
|
15
14
|
end
|
16
15
|
|
17
16
|
private
|
18
17
|
|
19
18
|
def self.command_available?
|
20
|
-
|
19
|
+
pdf2htmlex_command
|
21
20
|
end
|
22
21
|
|
23
22
|
def self.pdf2htmlex_command
|
24
23
|
cmd = nil
|
25
|
-
if which("pdf2htmlex")
|
26
|
-
|
27
|
-
elsif which("pdf2htmlEX")
|
28
|
-
cmd = "pdf2htmlEX"
|
29
|
-
end
|
30
|
-
|
31
|
-
cmd
|
24
|
+
cmd = "pdf2htmlex" if which("pdf2htmlex")
|
25
|
+
cmd = "pdf2htmlEX" if which("pdf2htmlEX")
|
32
26
|
end
|
33
27
|
|
34
28
|
def self.which(cmd)
|
@@ -36,11 +30,33 @@ module Kristin
|
|
36
30
|
ENV['PATH'].split(File::PATH_SEPARATOR).each do |path|
|
37
31
|
exts.each do |ext|
|
38
32
|
exe = File.join(path, "#{cmd}#{ext}")
|
39
|
-
|
40
33
|
return exe if File.executable? exe
|
41
34
|
end
|
42
35
|
end
|
43
36
|
|
44
37
|
return nil
|
45
38
|
end
|
39
|
+
|
40
|
+
def self.random_source_name
|
41
|
+
rand(16**16).to_s(16)
|
42
|
+
end
|
43
|
+
|
44
|
+
def self.download_file(source)
|
45
|
+
tmp_file = "/tmp/#{random_source_name}.pdf"
|
46
|
+
File.open(tmp_file, "wb") do |saved_file|
|
47
|
+
open(source, 'rb') do |read_file|
|
48
|
+
saved_file.write(read_file.read)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
tmp_file
|
53
|
+
end
|
54
|
+
|
55
|
+
def self.determine_source(source)
|
56
|
+
is_file = File.exists?(source) && !File.directory?(source)
|
57
|
+
is_http = (URI(source).scheme == "http" || URI(source).scheme == "https") && Net::HTTP.get_response(URI(source)).is_a?(Net::HTTPSuccess)
|
58
|
+
raise IOError, "Source (#{source}) is neither a file nor an URL." unless is_file || is_http
|
59
|
+
|
60
|
+
is_file ? source : download_file(source)
|
61
|
+
end
|
46
62
|
end
|
data/lib/kristin/version.rb
CHANGED
data/spec/kristin_spec.rb
CHANGED
@@ -34,5 +34,21 @@ describe Kristin do
|
|
34
34
|
it "should raise error if pdf is not a real pdf" do
|
35
35
|
lambda { Kristin.convert(@no_pdf, "nonsense.html") }.should raise_error(IOError)
|
36
36
|
end
|
37
|
+
|
38
|
+
it "should convert a pdf from an URL" do
|
39
|
+
target = @target_path + "/from_url.html"
|
40
|
+
Kristin.convert("https://www.filepicker.io/api/file/vR0btUfRQiCF9ntRkW6Q", target)
|
41
|
+
File.exists?(target).should == true
|
42
|
+
end
|
43
|
+
|
44
|
+
it "should raise an error if URL does not exist" do
|
45
|
+
target = @target_path + "/from_url.html"
|
46
|
+
lambda { Kristin.convert("https://www.filepicker.io/api/file/donotexist.pdf", target) }.should raise_error(IOError)
|
47
|
+
end
|
48
|
+
|
49
|
+
it "should raise an error if URL file is not a real pdf" do
|
50
|
+
target = @target_path + "/from_url.html"
|
51
|
+
lambda { Kristin.convert("https://www.filepicker.io/api/file/agxKeTfQSWKvMR4CDXMq", target) }.should raise_error(IOError)
|
52
|
+
end
|
37
53
|
end
|
38
54
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: kristin
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Richard Nyström
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-03-
|
11
|
+
date: 2013-03-31 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|