pdftohtml 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: da84e67f928a6e91206566e21239cd303aa5f9ac
4
+ data.tar.gz: 198671059eb9c471297e3449ef6e6fd2387a9b2a
5
+ SHA512:
6
+ metadata.gz: 5367455bb059fdb6fbde1507cdcf9a0135206ab447a5ec557b0924e811c847a4e3e32e9c5b98739944980ead617e2cd8caf7a14bdd7b2b6d3f08fb9daa25bcb5
7
+ data.tar.gz: 6b1c56570ddb89825b2137ad5c77ea873a0454033ad8e88e3d4895e52294af89c35ef30ff2246dae415f4cf8c77a96149a297ec85faf12018cb4a899d17ec5e0
data/.gitignore ADDED
@@ -0,0 +1,10 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ .idea
data/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ source 'https://rubygems.org'
2
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2016 Paul Duncan
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,8 @@
1
+ # PDF To HTML
2
+
3
+ Simplistic wrapper around poppler's <i>pdftohtml</i> utility.
4
+ Allows conversion of PDF files into HTML documents.
5
+
6
+ ## License
7
+
8
+ The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ require "bundler/gem_tasks"
2
+ task :default => :spec
data/lib/pdftohtml.rb ADDED
@@ -0,0 +1,36 @@
1
+ # PDF To HTML
2
+ # by Eresse <eresse@eresse.net>
3
+
4
+ # External Includes
5
+ include 'fileutils'
6
+
7
+ # PDF To HTML Module
8
+ module Pdftohtml
9
+
10
+ # Convert PDF to HTML
11
+ def convert pdf_file
12
+
13
+ # Generate Output Directory
14
+ out_path = "/tmp/pdftohtml-#{Time.now.to_f.to_s.gsub '.', (rand * 10000000000).to_i.to_s}"
15
+ FileUtils.rmtree out_path
16
+ FileUtils.mkdir out_path
17
+
18
+ # Run pdftohtml
19
+ `pdftohtml -c -i "#{pdf_file}" "#{out_path}/output"`
20
+
21
+ # Drop shit files
22
+ File.unlink "#{out_path}/output.html"
23
+ File.unlink "#{out_path}/output_ind.html"
24
+
25
+ # Acquire Files
26
+ files = Dir["#{out_path}/*.html"]
27
+
28
+ # Load up Document Pages
29
+ pages = files.collect { |f| { /#{out_path}\/output-([0-9]+).html/.match(f)[1].to_i => File.readlines(f) } }.inject({}, :merge)
30
+
31
+ # Drop temp files
32
+ FileUtils.rmtree out_path
33
+
34
+ pages
35
+ end
36
+ end
@@ -0,0 +1,9 @@
1
+ # PDF To HTML
2
+ # by Eresse <eresse@eresse.net>
3
+
4
+ # PDF To HTML Module
5
+ module Pdftohtml
6
+
7
+ # Version
8
+ VERSION = '0.1.0'
9
+ end
data/pdftohtml.gemspec ADDED
@@ -0,0 +1,24 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'pdftohtml/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "pdftohtml"
8
+ spec.version = Pdftohtml::VERSION
9
+ spec.authors = ["Eresse"]
10
+ spec.email = ["eresse@eresse.net"]
11
+
12
+ spec.summary = "Simple PDF Table Data Extractor"
13
+ spec.description = "Simplistic wrapper around poppler's pdftohtml utility"
14
+ spec.homepage = "http://redmine.eresse.net/projects/pdftohtml"
15
+ spec.license = "MIT"
16
+
17
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
18
+ spec.bindir = "exe"
19
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
20
+ spec.require_paths = ["lib"]
21
+
22
+ spec.add_development_dependency "bundler", "~> 1.12"
23
+ spec.add_development_dependency "rake", "~> 10.0"
24
+ end
metadata ADDED
@@ -0,0 +1,80 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: pdftohtml
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Eresse
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2016-12-08 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.12'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.12'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ description: Simplistic wrapper around poppler's pdftohtml utility
42
+ email:
43
+ - eresse@eresse.net
44
+ executables: []
45
+ extensions: []
46
+ extra_rdoc_files: []
47
+ files:
48
+ - ".gitignore"
49
+ - Gemfile
50
+ - LICENSE.txt
51
+ - README.md
52
+ - Rakefile
53
+ - lib/pdftohtml.rb
54
+ - lib/pdftohtml/version.rb
55
+ - pdftohtml.gemspec
56
+ homepage: http://redmine.eresse.net/projects/pdftohtml
57
+ licenses:
58
+ - MIT
59
+ metadata: {}
60
+ post_install_message:
61
+ rdoc_options: []
62
+ require_paths:
63
+ - lib
64
+ required_ruby_version: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ required_rubygems_version: !ruby/object:Gem::Requirement
70
+ requirements:
71
+ - - ">="
72
+ - !ruby/object:Gem::Version
73
+ version: '0'
74
+ requirements: []
75
+ rubyforge_project:
76
+ rubygems_version: 2.5.1
77
+ signing_key:
78
+ specification_version: 4
79
+ summary: Simple PDF Table Data Extractor
80
+ test_files: []