pdftotext 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: e5891b96ef5f0d107fd61fc981a15a34949bb7f2
4
+ data.tar.gz: c8b860266e848e94b3189e086aa9bef47c90332d
5
+ SHA512:
6
+ metadata.gz: 4e2b1edae3551f6438bfe5bb4116c9e02be22638fcb33d11c05b4a6c244a2312c67aafe7394234738116893f9f3688a69a60c949d1bade70e4ff086edfbb5b73
7
+ data.tar.gz: 317d262619e24bd2a2a6ca770010f782b596710422e399f73d30fbc7c2a2a71a654767f670c56f809cfccc8c1fbfc165d7c3a2fd5782cbb9cc779688dce027d1
@@ -0,0 +1,10 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ *.gem
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --format documentation
2
+ --color
@@ -0,0 +1,13 @@
1
+ language: ruby
2
+ sudo: required
3
+
4
+ cache:
5
+ - bundler
6
+ - apt
7
+
8
+ before_script:
9
+ - sudo apt-get update
10
+ - sudo apt-get install -y poppler-utils
11
+
12
+ install:
13
+ - gem install bundler --version '1.10.6'
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in pdftotext.gemspec
4
+ gemspec
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2015 Ben Balter
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
@@ -0,0 +1,24 @@
1
+ # Pdftotext
2
+
3
+ *A Ruby wrapper for the `pdftotext` command line library*
4
+
5
+ [![Build Status](https://travis-ci.org/benbalter/pdftotext.svg)](https://travis-ci.org/benbalter/pdftotext)
6
+
7
+ ## Installation
8
+
9
+ 1. You must first install [Poppler](http://poppler.freedesktop.org/). On OS X this can be done with `brew install poppler` if you have Homebrew installed
10
+ 2. Add `gem pdftotext` to your project's Gemfile
11
+ 3. `bundle install`
12
+
13
+ ## Usage
14
+
15
+ ```ruby
16
+ text = Pdftotext.text('path-to.pdf')
17
+ => "The text of the PDF"
18
+
19
+ pages = Pdftotext.pages('path-to.pdf')
20
+ pages.first.number
21
+ => 1
22
+ pages.first.text
23
+ => "The text of the PDF"
24
+ ```
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
@@ -0,0 +1,11 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "pdftotext"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ require "pry"
11
+ Pry.start
@@ -0,0 +1,7 @@
1
+ #!/bin/bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+
5
+ bundle install
6
+
7
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,21 @@
1
+ require "cliver"
2
+ require "open3"
3
+ require "pdftotext/version"
4
+ require "pdftotext/document"
5
+ require "pdftotext/cli"
6
+ require "pdftotext/page"
7
+
8
+ module Pdftotext
9
+
10
+ def self.text(path, options={})
11
+ Document.new(path).text(options)
12
+ end
13
+
14
+ def self.pages(path, options={})
15
+ Document.new(path).pages(options)
16
+ end
17
+
18
+ def self.cli
19
+ @cli ||= CLI.new
20
+ end
21
+ end
@@ -0,0 +1,35 @@
1
+ module Pdftotext
2
+ class CLI
3
+
4
+ DEFAULT_OPTIONS = {
5
+ :layout => true
6
+ }
7
+
8
+ def run_command(*args)
9
+ options = DEFAULT_OPTIONS.merge(args.pop)
10
+ args = args.concat options_to_args(options)
11
+ output, status = Open3.capture2e(bin_path, *args)
12
+ raise "Command `#{bin_path} #{args.join(" ")}` failed: #{output}" if status.exitstatus != 0
13
+ output
14
+ end
15
+
16
+ private
17
+
18
+ def bin_path
19
+ @bin_path ||= Cliver.detect!('pdftotext')
20
+ end
21
+
22
+ def options_to_args(options)
23
+ args = []
24
+ options.each do |key, value|
25
+ next if value === false
26
+ if value === true
27
+ args.push "-#{key}"
28
+ else
29
+ args.push "-#{key} #{value}"
30
+ end
31
+ end
32
+ args
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,27 @@
1
+ module Pdftotext
2
+ class Document
3
+ attr_reader :path
4
+
5
+ def initialize(path)
6
+ @path = File.expand_path(path)
7
+ end
8
+
9
+ def text(options={})
10
+ Pdftotext.cli.run_command path, tempfile.path, options
11
+ text = tempfile.read
12
+ tempfile.close!
13
+ text
14
+ end
15
+
16
+ def pages(options={})
17
+ pages = text(options).split("\f")
18
+ pages.each_with_index.map { |t,i| Page.new text: t, number: i+1 }
19
+ end
20
+
21
+ private
22
+
23
+ def tempfile
24
+ @tempfile ||= Tempfile.new(['pdftotext', '.txt'])
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,10 @@
1
+ module Pdftotext
2
+ class Page
3
+ attr_reader :text, :number
4
+
5
+ def initialize(options)
6
+ @text = options[:text]
7
+ @number = options[:number]
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,3 @@
1
+ module Pdftotext
2
+ VERSION = "0.1.0"
3
+ end
@@ -0,0 +1,26 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'pdftotext/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "pdftotext"
8
+ spec.version = Pdftotext::VERSION
9
+ spec.authors = ["Ben Balter"]
10
+ spec.email = ["ben.balter@github.com"]
11
+
12
+ spec.summary = "A Ruby wrapper for the `pdftotext` command line library"
13
+ spec.homepage = "https://github.com/benbalter/pdftotext"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
17
+ spec.bindir = "exe"
18
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_dependency "cliver"
22
+ spec.add_development_dependency "bundler", "~> 1.10"
23
+ spec.add_development_dependency "rake", "~> 10.0"
24
+ spec.add_development_dependency "rspec"
25
+ spec.add_development_dependency "pry"
26
+ end
metadata ADDED
@@ -0,0 +1,129 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: pdftotext
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Ben Balter
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2015-12-16 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: cliver
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: bundler
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.10'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.10'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '10.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '10.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rspec
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: pry
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ description:
84
+ email:
85
+ - ben.balter@github.com
86
+ executables: []
87
+ extensions: []
88
+ extra_rdoc_files: []
89
+ files:
90
+ - ".gitignore"
91
+ - ".rspec"
92
+ - ".travis.yml"
93
+ - Gemfile
94
+ - LICENSE.txt
95
+ - README.md
96
+ - Rakefile
97
+ - bin/console
98
+ - bin/setup
99
+ - lib/pdftotext.rb
100
+ - lib/pdftotext/cli.rb
101
+ - lib/pdftotext/document.rb
102
+ - lib/pdftotext/page.rb
103
+ - lib/pdftotext/version.rb
104
+ - pdftotext.gemspec
105
+ homepage: https://github.com/benbalter/pdftotext
106
+ licenses:
107
+ - MIT
108
+ metadata: {}
109
+ post_install_message:
110
+ rdoc_options: []
111
+ require_paths:
112
+ - lib
113
+ required_ruby_version: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ required_rubygems_version: !ruby/object:Gem::Requirement
119
+ requirements:
120
+ - - ">="
121
+ - !ruby/object:Gem::Version
122
+ version: '0'
123
+ requirements: []
124
+ rubyforge_project:
125
+ rubygems_version: 2.5.1
126
+ signing_key:
127
+ specification_version: 4
128
+ summary: A Ruby wrapper for the `pdftotext` command line library
129
+ test_files: []