pdftotext 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: e5891b96ef5f0d107fd61fc981a15a34949bb7f2
4
+ data.tar.gz: c8b860266e848e94b3189e086aa9bef47c90332d
5
+ SHA512:
6
+ metadata.gz: 4e2b1edae3551f6438bfe5bb4116c9e02be22638fcb33d11c05b4a6c244a2312c67aafe7394234738116893f9f3688a69a60c949d1bade70e4ff086edfbb5b73
7
+ data.tar.gz: 317d262619e24bd2a2a6ca770010f782b596710422e399f73d30fbc7c2a2a71a654767f670c56f809cfccc8c1fbfc165d7c3a2fd5782cbb9cc779688dce027d1
@@ -0,0 +1,10 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ *.gem
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --format documentation
2
+ --color
@@ -0,0 +1,13 @@
1
+ language: ruby
2
+ sudo: required
3
+
4
+ cache:
5
+ - bundler
6
+ - apt
7
+
8
+ before_script:
9
+ - sudo apt-get update
10
+ - sudo apt-get install -y poppler-utils
11
+
12
+ install:
13
+ - gem install bundler --version '1.10.6'
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in pdftotext.gemspec
4
+ gemspec
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2015 Ben Balter
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
@@ -0,0 +1,24 @@
1
+ # Pdftotext
2
+
3
+ *A Ruby wrapper for the `pdftotext` command line library*
4
+
5
+ [![Build Status](https://travis-ci.org/benbalter/pdftotext.svg)](https://travis-ci.org/benbalter/pdftotext)
6
+
7
+ ## Installation
8
+
9
+ 1. You must first install [Poppler](http://poppler.freedesktop.org/). On OS X this can be done with `brew install poppler` if you have Homebrew installed
10
+ 2. Add `gem pdftotext` to your project's Gemfile
11
+ 3. `bundle install`
12
+
13
+ ## Usage
14
+
15
+ ```ruby
16
+ text = Pdftotext.text('path-to.pdf')
17
+ => "The text of the PDF"
18
+
19
+ pages = Pdftotext.pages('path-to.pdf')
20
+ pages.first.number
21
+ => 1
22
+ pages.first.text
23
+ => "The text of the PDF"
24
+ ```
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
@@ -0,0 +1,11 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "pdftotext"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ require "pry"
11
+ Pry.start
@@ -0,0 +1,7 @@
1
+ #!/bin/bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+
5
+ bundle install
6
+
7
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,21 @@
1
+ require "cliver"
2
+ require "open3"
3
+ require "pdftotext/version"
4
+ require "pdftotext/document"
5
+ require "pdftotext/cli"
6
+ require "pdftotext/page"
7
+
8
+ module Pdftotext
9
+
10
+ def self.text(path, options={})
11
+ Document.new(path).text(options)
12
+ end
13
+
14
+ def self.pages(path, options={})
15
+ Document.new(path).pages(options)
16
+ end
17
+
18
+ def self.cli
19
+ @cli ||= CLI.new
20
+ end
21
+ end
@@ -0,0 +1,35 @@
1
+ module Pdftotext
2
+ class CLI
3
+
4
+ DEFAULT_OPTIONS = {
5
+ :layout => true
6
+ }
7
+
8
+ def run_command(*args)
9
+ options = DEFAULT_OPTIONS.merge(args.pop)
10
+ args = args.concat options_to_args(options)
11
+ output, status = Open3.capture2e(bin_path, *args)
12
+ raise "Command `#{bin_path} #{args.join(" ")}` failed: #{output}" if status.exitstatus != 0
13
+ output
14
+ end
15
+
16
+ private
17
+
18
+ def bin_path
19
+ @bin_path ||= Cliver.detect!('pdftotext')
20
+ end
21
+
22
+ def options_to_args(options)
23
+ args = []
24
+ options.each do |key, value|
25
+ next if value === false
26
+ if value === true
27
+ args.push "-#{key}"
28
+ else
29
+ args.push "-#{key} #{value}"
30
+ end
31
+ end
32
+ args
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,27 @@
1
+ module Pdftotext
2
+ class Document
3
+ attr_reader :path
4
+
5
+ def initialize(path)
6
+ @path = File.expand_path(path)
7
+ end
8
+
9
+ def text(options={})
10
+ Pdftotext.cli.run_command path, tempfile.path, options
11
+ text = tempfile.read
12
+ tempfile.close!
13
+ text
14
+ end
15
+
16
+ def pages(options={})
17
+ pages = text(options).split("\f")
18
+ pages.each_with_index.map { |t,i| Page.new text: t, number: i+1 }
19
+ end
20
+
21
+ private
22
+
23
+ def tempfile
24
+ @tempfile ||= Tempfile.new(['pdftotext', '.txt'])
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,10 @@
1
+ module Pdftotext
2
+ class Page
3
+ attr_reader :text, :number
4
+
5
+ def initialize(options)
6
+ @text = options[:text]
7
+ @number = options[:number]
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,3 @@
1
+ module Pdftotext
2
+ VERSION = "0.1.0"
3
+ end
@@ -0,0 +1,26 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'pdftotext/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "pdftotext"
8
+ spec.version = Pdftotext::VERSION
9
+ spec.authors = ["Ben Balter"]
10
+ spec.email = ["ben.balter@github.com"]
11
+
12
+ spec.summary = "A Ruby wrapper for the `pdftotext` command line library"
13
+ spec.homepage = "https://github.com/benbalter/pdftotext"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
17
+ spec.bindir = "exe"
18
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_dependency "cliver"
22
+ spec.add_development_dependency "bundler", "~> 1.10"
23
+ spec.add_development_dependency "rake", "~> 10.0"
24
+ spec.add_development_dependency "rspec"
25
+ spec.add_development_dependency "pry"
26
+ end
metadata ADDED
@@ -0,0 +1,129 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: pdftotext
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Ben Balter
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2015-12-16 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: cliver
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: bundler
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.10'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.10'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '10.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '10.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rspec
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: pry
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ description:
84
+ email:
85
+ - ben.balter@github.com
86
+ executables: []
87
+ extensions: []
88
+ extra_rdoc_files: []
89
+ files:
90
+ - ".gitignore"
91
+ - ".rspec"
92
+ - ".travis.yml"
93
+ - Gemfile
94
+ - LICENSE.txt
95
+ - README.md
96
+ - Rakefile
97
+ - bin/console
98
+ - bin/setup
99
+ - lib/pdftotext.rb
100
+ - lib/pdftotext/cli.rb
101
+ - lib/pdftotext/document.rb
102
+ - lib/pdftotext/page.rb
103
+ - lib/pdftotext/version.rb
104
+ - pdftotext.gemspec
105
+ homepage: https://github.com/benbalter/pdftotext
106
+ licenses:
107
+ - MIT
108
+ metadata: {}
109
+ post_install_message:
110
+ rdoc_options: []
111
+ require_paths:
112
+ - lib
113
+ required_ruby_version: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ required_rubygems_version: !ruby/object:Gem::Requirement
119
+ requirements:
120
+ - - ">="
121
+ - !ruby/object:Gem::Version
122
+ version: '0'
123
+ requirements: []
124
+ rubyforge_project:
125
+ rubygems_version: 2.5.1
126
+ signing_key:
127
+ specification_version: 4
128
+ summary: A Ruby wrapper for the `pdftotext` command line library
129
+ test_files: []