pdf_extract 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,5 @@
1
+ lib/**/*.rb
2
+ bin/*
3
+ -
4
+ features/**/*.feature
5
+ LICENSE.txt
@@ -0,0 +1 @@
1
+ jruby-1.7.3
data/Gemfile ADDED
@@ -0,0 +1,21 @@
1
+ source "http://rubygems.org"
2
+ # Add dependencies required to use your gem here.
3
+ # Example:
4
+ # gem "activesupport", ">= 2.3.5"
5
+
6
+ platform :jruby do
7
+ # gem 'tabula-extractor',github: "jazzido/tabula-extractor", :require => "tabula"
8
+ gem 'tesseract-ocr'
9
+ gem 'docsplit'
10
+ gem "bundler", ">= 1.0.0"
11
+ end
12
+
13
+ # Add dependencies to develop your gem here.
14
+ # Include everything needed to run rake, tests, features, etc.
15
+ group :development do
16
+ gem "jeweler", "~> 1.8.4"
17
+ gem "rake", ">= 10.0.3"
18
+
19
+ # gem "shoulda", ">= 0"
20
+ # gem "rdoc", "~> 3.12"
21
+ end
@@ -0,0 +1,70 @@
1
+ GEM
2
+ remote: http://rubygems.org/
3
+ specs:
4
+ addressable (2.3.5)
5
+ builder (3.2.2)
6
+ call-me (0.0.2.3)
7
+ refining
8
+ docsplit (0.7.2)
9
+ faraday (0.8.7)
10
+ multipart-post (~> 1.1)
11
+ ffi (1.9.0-java)
12
+ ffi-extra (0.1.0)
13
+ ffi
14
+ ffi-inline (0.0.4.3)
15
+ ffi (>= 0.4.0)
16
+ git (1.2.5)
17
+ github_api (0.10.1)
18
+ addressable
19
+ faraday (~> 0.8.1)
20
+ hashie (>= 1.2)
21
+ multi_json (~> 1.4)
22
+ nokogiri (~> 1.5.2)
23
+ oauth2
24
+ hashie (2.0.5)
25
+ highline (1.6.19)
26
+ httpauth (0.2.0)
27
+ iso-639 (0.1.0)
28
+ jeweler (1.8.6)
29
+ builder
30
+ bundler (~> 1.0)
31
+ git (>= 1.2.5)
32
+ github_api (= 0.10.1)
33
+ highline (>= 1.6.15)
34
+ nokogiri (= 1.5.10)
35
+ rake
36
+ rdoc
37
+ json (1.8.0-java)
38
+ jwt (0.1.8)
39
+ multi_json (>= 1.5)
40
+ multi_json (1.7.7)
41
+ multi_xml (0.5.4)
42
+ multipart-post (1.2.0)
43
+ nokogiri (1.5.10-java)
44
+ oauth2 (0.9.2)
45
+ faraday (~> 0.8)
46
+ httpauth (~> 0.2)
47
+ jwt (~> 0.1.4)
48
+ multi_json (~> 1.0)
49
+ multi_xml (~> 0.5)
50
+ rack (~> 1.2)
51
+ rack (1.5.2)
52
+ rake (10.1.0)
53
+ rdoc (4.0.1)
54
+ json (~> 1.4)
55
+ refining (0.0.5.5)
56
+ tesseract-ocr (0.1.5)
57
+ call-me
58
+ ffi-extra
59
+ ffi-inline
60
+ iso-639
61
+
62
+ PLATFORMS
63
+ java
64
+
65
+ DEPENDENCIES
66
+ bundler (>= 1.0.0)
67
+ docsplit
68
+ jeweler (~> 1.8.4)
69
+ rake (>= 10.0.3)
70
+ tesseract-ocr
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2013 noah pryor
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,19 @@
1
+ = pdf_extract
2
+
3
+ Description goes here.
4
+
5
+ == Contributing to pdf_extract
6
+
7
+ * Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet.
8
+ * Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it.
9
+ * Fork the project.
10
+ * Start a feature/bugfix branch.
11
+ * Commit and push until you are happy with your contribution.
12
+ * Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
13
+ * Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
14
+
15
+ == Copyright
16
+
17
+ Copyright (c) 2013 noah pryor. See LICENSE.txt for
18
+ further details.
19
+
@@ -0,0 +1,45 @@
1
+ # encoding: utf-8
2
+
3
+ require 'rubygems'
4
+ require 'bundler'
5
+ begin
6
+ Bundler.setup(:default, :development)
7
+ rescue Bundler::BundlerError => e
8
+ $stderr.puts e.message
9
+ $stderr.puts "Run `bundle install` to install missing gems"
10
+ exit e.status_code
11
+ end
12
+ require 'rake'
13
+
14
+ require 'jeweler'
15
+ Jeweler::Tasks.new do |gem|
16
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
17
+ gem.name = "pdf_extract"
18
+ gem.homepage = "http://github.com/noahpryor/pdf_extract"
19
+ gem.license = "MIT"
20
+ gem.summary = "PDF extraction. Best with jRuby"
21
+ gem.description = "description yo "
22
+ gem.email = "noah@noahpryor.com"
23
+ gem.authors = ["noah pryor"]
24
+ # dependencies defined in Gemfile
25
+ end
26
+ Jeweler::RubygemsDotOrgTasks.new
27
+
28
+ require 'rake/testtask'
29
+ Rake::TestTask.new(:test) do |test|
30
+ test.libs << 'lib' << 'test'
31
+ test.pattern = 'test/**/test_*.rb'
32
+ test.verbose = true
33
+ end
34
+
35
+ task :default => :test
36
+
37
+ require 'rdoc/task'
38
+ Rake::RDocTask.new do |rdoc|
39
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
40
+
41
+ rdoc.rdoc_dir = 'rdoc'
42
+ rdoc.title = "pdf_extract #{version}"
43
+ rdoc.rdoc_files.include('README*')
44
+ rdoc.rdoc_files.include('lib/**/*.rb')
45
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.5.0
Binary file
Binary file
Binary file
@@ -0,0 +1,246 @@
1
+ require 'java'
2
+ require 'tesseract'
3
+ require 'docsplit'
4
+ #require 'image_voodoo'
5
+ require 'json'
6
+ require 'open-uri'
7
+ class PageExtractor
8
+ attr_accessor :page, :results, :items, :image_path, :pdf_path, :results
9
+ def initialize(page)
10
+ @image_path = page[:image_path]
11
+ @pdf_path = page[:pdf_path]
12
+ @items = page[:items]
13
+ @page_num = page[:page] ||= 1
14
+ @results = {}
15
+ end
16
+
17
+ def process
18
+ items.each do |item|
19
+ case item[:kind]
20
+ when 'ocr' then extract_ocr(item)
21
+ when 'table' then extract_table(item)
22
+ end
23
+ end
24
+
25
+ end
26
+
27
+ def extract_ocr(item)
28
+ dimensions = item[:dimensions]
29
+ @results[item[:name]] = ocr_text(crop_image(dimensions))
30
+ end
31
+
32
+ def crop_image(d)
33
+ new_image_name = "CR.png"
34
+ # ImageVoodoo.with_image(image_path) do |img|
35
+ x1 = d[:x1]
36
+ x2 = d[:x2]
37
+ y1 = d[:y1]
38
+ y2 = d[:y2]
39
+ # img.with_crop(x1,y1,x2,y2) { |img2| img2.save new_image_name }
40
+ # end
41
+ return new_image_name
42
+ end
43
+
44
+ def extract_table(item)
45
+ table = run_tabula(item[:dimensions])
46
+ @results[item[:name]] = lines_to_array(table)
47
+ end
48
+
49
+ def run_tabula(d)
50
+ area = [d[:y1],d[:x1],d[:y2],d[:x2]].join(", ")
51
+ table = `tabula --area='#{area}' #{pdf_path} --page=#{page_num}`
52
+ return table
53
+ end
54
+
55
+ def lines_to_array(table)
56
+ table.lines.map(&:chomp).map { |l|
57
+ l.split(",")
58
+ }
59
+ end
60
+
61
+ def ocr_text(image_path,blacklist='|',language=:eng)
62
+ e = Tesseract::Engine.new {|e|
63
+ e.language = language
64
+ e.blacklist = blacklist
65
+ }
66
+ return e.text_for(image_path).strip
67
+ end
68
+ end
69
+
70
+ class Hash
71
+ def symbolize_keys!
72
+ keys.each do |key|
73
+ self[(key.to_sym rescue key) || key] = delete(key)
74
+ end
75
+ self
76
+ end
77
+ end
78
+
79
+ class PDFextract
80
+ attr_accessor :file_path, :results
81
+ attr_accessor :options,:text_dir,:base_dir
82
+ attr_accessor :image_dir, :output_dir, :pages
83
+
84
+ def initialize(schema)
85
+ schema.symbolize_keys!
86
+
87
+ @base_dir = Time.now.to_i.to_s
88
+ setup_folders(@base_dir)
89
+ @text_dir = @base_dir+'/text_files'
90
+ @image_dir = @base_dir+'/image_files'
91
+ @output_dir = @base_dir+'/output'
92
+ if schema[:file_url]
93
+ @file_path = get_file_from_url(schema[:file_url])
94
+ else
95
+ @file_path = get_file_from_path(schema[:file_path])
96
+ puts @file_path
97
+ end
98
+ @options = schema[:options] if schema[:options]
99
+ @pages = schema[:pages] if schema[:options]
100
+ @results = {}
101
+
102
+ end
103
+ def setup_folders(folder_name)
104
+ `rm -r #{folder_name}` if Dir.exists? folder_name
105
+ `mkdir #{folder_name}`
106
+ `mkdir #{text_dir}`
107
+ `mkdir #{output_dir}`
108
+ end
109
+
110
+ def get_file_from_url(file_url)
111
+ file_data = open(file_url).read
112
+ temp_file = open(@base_dir+"/temp-file.pdf","w")
113
+ temp_file.write file_data
114
+ temp_file.close
115
+ return temp_file.path
116
+ end
117
+ def get_file_from_path(path)
118
+ new_path = @base_dir+"/temp-file.pdf"
119
+ `cp #{path} #{new_path}`
120
+ return new_path
121
+ end
122
+
123
+ def process
124
+ remove_protection if options[:remove_protection] == true
125
+ results[:images] = pdf_to_image_files("all")
126
+ results[:text] = convert_to_text if options[:extract_all_text] == true
127
+ process_pages
128
+ cleanup
129
+ end
130
+ def cleanup
131
+ `rm -r #{base_dir}`
132
+ end
133
+ def remove_protection
134
+ #todo
135
+
136
+ end
137
+
138
+ def process_pages
139
+ pages.each do |page|
140
+ if page[:match] == "page_num"
141
+ page_num = page[:page]
142
+ page[:image_path] = image_dir+"/temp-file_#{page_num}.png"
143
+ page[:pdf_path] = file_path
144
+
145
+ end
146
+ page_extractor = PageExtractor.new(page)
147
+ page_extractor.process
148
+ results[page_num] = page_extractor.results
149
+ end
150
+
151
+ end
152
+
153
+
154
+ def convert_to_text(pages = "all")
155
+ pdf_to_text_files(pages)
156
+ text = {}
157
+ #take the text from the pdf pages and load em into this shit
158
+ Dir.glob(text_dir+"/*.txt").each do |file|
159
+ page_num = file.split("_")[-1].split(".")[0]
160
+ text[page_num] = File.open(file).read
161
+ end
162
+ puts text
163
+ return text
164
+ end
165
+ def convert_to_image(pages = "all")
166
+ pdf_to_image_files(pages)
167
+ images = []
168
+ Dir.glob(image_dir+"/*.png").each do |file|
169
+ images << file
170
+ end
171
+ end
172
+
173
+ def pdf_to_image_files(pages)
174
+ Docsplit.extract_images(file_path,:output => image_dir, :format => [:png])
175
+ end
176
+
177
+ def pdf_to_text_files(pages)
178
+ Docsplit.extract_text(file_path, :output => text_dir,:pages => pages)
179
+ end
180
+ def extract_with_ocr(page_path,dimensions)
181
+ engine = Tesseract::Engine.new(language: :eng)
182
+ engine.image = page_path
183
+ engine.select 1,34,59,281
184
+ text = engine.text.strip
185
+ dimensions[:result] = text
186
+ return text
187
+ end
188
+ def self.extract_ocr(image_path,coords)
189
+
190
+ x = coords["x1"]
191
+ y = coords["y1"]
192
+ width = coords["x2"] - x
193
+ height = coords["y2"] - y
194
+ puts image_path
195
+ puts [x,y,width,height]
196
+ engine = Tesseract::Engine.new(language: :eng)
197
+ engine.image = image_path
198
+ engine.select x,y,width,height
199
+ text = engine.text.strip
200
+ return text
201
+ end
202
+
203
+ def self.example_schema
204
+ {
205
+ file_path: "test_files/dream-may.pdf",
206
+ options: {
207
+ remove_protection: false,
208
+ password: nil,
209
+ extract_all_text: true,
210
+ extract_text: []
211
+ },
212
+ pages: [{
213
+ match: "page_num",
214
+ page: 1,
215
+ items: [
216
+ {
217
+ name: 'title',
218
+ kind: 'ocr', #alternative is kind table
219
+ dimensions: {
220
+ x1: 10,
221
+ x2: 282,
222
+ y1: 50,
223
+ y2: 100
224
+ }
225
+ },
226
+ {
227
+ name: 'units_table',
228
+ kind: 'table',
229
+ dimensions: {
230
+ x1: 0,
231
+ x2: 265.73,
232
+ y1: 184.94,
233
+ y2: 233.84
234
+ }
235
+ }
236
+ ]
237
+ }]
238
+ }
239
+ end
240
+
241
+ end
242
+ puts 'woo'
243
+ #coords = '[{"x1":59,"y1":55,"x2":237,"y2":95,"width":178,"height":40,"id":0,"page":1}]'
244
+ #parsed = JSON.parse(coords)
245
+ #puts parsed[0]
246
+ #puts PDFextract.extract_ocr("document_560_1.pdf",parsed[0])
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
@@ -0,0 +1,72 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = "pdf_extract"
8
+ s.version = "0.5.0"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["noah pryor"]
12
+ s.date = "2013-07-16"
13
+ s.description = "description yo "
14
+ s.email = "noah@noahpryor.com"
15
+ s.extra_rdoc_files = [
16
+ "LICENSE.txt",
17
+ "README.rdoc"
18
+ ]
19
+ s.files = [
20
+ ".document",
21
+ ".ruby-version",
22
+ "Gemfile",
23
+ "Gemfile.lock",
24
+ "LICENSE.txt",
25
+ "README.rdoc",
26
+ "Rakefile",
27
+ "VERSION",
28
+ "lib/document.pdf",
29
+ "lib/document_1.png",
30
+ "lib/document_560_1.png",
31
+ "lib/pdf_extract.rb",
32
+ "pdf_extract-0.1.0.gem",
33
+ "pdf_extract-0.2.0.gem",
34
+ "pdf_extract-0.2.1.gem",
35
+ "pdf_extract-0.2.2.gem",
36
+ "pdf_extract-0.3.0.gem",
37
+ "pdf_extract-0.3.1.gem",
38
+ "pdf_extract.gemspec",
39
+ "test/helper.rb",
40
+ "test/test_pdf_extract.rb"
41
+ ]
42
+ s.homepage = "http://github.com/noahpryor/pdf_extract"
43
+ s.licenses = ["MIT"]
44
+ s.require_paths = ["lib"]
45
+ s.rubygems_version = "1.8.24"
46
+ s.summary = "PDF extraction. Best with jRuby"
47
+
48
+ if s.respond_to? :specification_version then
49
+ s.specification_version = 3
50
+
51
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
52
+ s.add_runtime_dependency(%q<tesseract-ocr>, [">= 0"])
53
+ s.add_runtime_dependency(%q<docsplit>, [">= 0"])
54
+ s.add_runtime_dependency(%q<bundler>, [">= 1.0.0"])
55
+ s.add_development_dependency(%q<jeweler>, ["~> 1.8.4"])
56
+ s.add_development_dependency(%q<rake>, [">= 10.0.3"])
57
+ else
58
+ s.add_dependency(%q<tesseract-ocr>, [">= 0"])
59
+ s.add_dependency(%q<docsplit>, [">= 0"])
60
+ s.add_dependency(%q<bundler>, [">= 1.0.0"])
61
+ s.add_dependency(%q<jeweler>, ["~> 1.8.4"])
62
+ s.add_dependency(%q<rake>, [">= 10.0.3"])
63
+ end
64
+ else
65
+ s.add_dependency(%q<tesseract-ocr>, [">= 0"])
66
+ s.add_dependency(%q<docsplit>, [">= 0"])
67
+ s.add_dependency(%q<bundler>, [">= 1.0.0"])
68
+ s.add_dependency(%q<jeweler>, ["~> 1.8.4"])
69
+ s.add_dependency(%q<rake>, [">= 10.0.3"])
70
+ end
71
+ end
72
+
@@ -0,0 +1,18 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+ begin
4
+ Bundler.setup(:default, :development)
5
+ rescue Bundler::BundlerError => e
6
+ $stderr.puts e.message
7
+ $stderr.puts "Run `bundle install` to install missing gems"
8
+ exit e.status_code
9
+ end
10
+ require 'test/unit'
11
+ require 'shoulda'
12
+
13
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
14
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
15
+ require 'pdf_extract'
16
+
17
+ class Test::Unit::TestCase
18
+ end
@@ -0,0 +1,7 @@
1
+ require 'helper'
2
+
3
+ class TestPdfExtract < Test::Unit::TestCase
4
+ should "probably rename this file and start testing for real" do
5
+ flunk "hey buddy, you should probably rename this file and start testing for real"
6
+ end
7
+ end
metadata ADDED
@@ -0,0 +1,157 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: pdf_extract
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.5.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - noah pryor
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-07-16 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: tesseract-ocr
16
+ version_requirements: !ruby/object:Gem::Requirement
17
+ requirements:
18
+ - - ">="
19
+ - !ruby/object:Gem::Version
20
+ version: !binary |-
21
+ MA==
22
+ none: false
23
+ requirement: !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - ">="
26
+ - !ruby/object:Gem::Version
27
+ version: !binary |-
28
+ MA==
29
+ none: false
30
+ prerelease: false
31
+ type: :runtime
32
+ - !ruby/object:Gem::Dependency
33
+ name: docsplit
34
+ version_requirements: !ruby/object:Gem::Requirement
35
+ requirements:
36
+ - - ">="
37
+ - !ruby/object:Gem::Version
38
+ version: !binary |-
39
+ MA==
40
+ none: false
41
+ requirement: !ruby/object:Gem::Requirement
42
+ requirements:
43
+ - - ">="
44
+ - !ruby/object:Gem::Version
45
+ version: !binary |-
46
+ MA==
47
+ none: false
48
+ prerelease: false
49
+ type: :runtime
50
+ - !ruby/object:Gem::Dependency
51
+ name: bundler
52
+ version_requirements: !ruby/object:Gem::Requirement
53
+ requirements:
54
+ - - ">="
55
+ - !ruby/object:Gem::Version
56
+ version: 1.0.0
57
+ none: false
58
+ requirement: !ruby/object:Gem::Requirement
59
+ requirements:
60
+ - - ">="
61
+ - !ruby/object:Gem::Version
62
+ version: 1.0.0
63
+ none: false
64
+ prerelease: false
65
+ type: :runtime
66
+ - !ruby/object:Gem::Dependency
67
+ name: jeweler
68
+ version_requirements: !ruby/object:Gem::Requirement
69
+ requirements:
70
+ - - "~>"
71
+ - !ruby/object:Gem::Version
72
+ version: 1.8.4
73
+ none: false
74
+ requirement: !ruby/object:Gem::Requirement
75
+ requirements:
76
+ - - "~>"
77
+ - !ruby/object:Gem::Version
78
+ version: 1.8.4
79
+ none: false
80
+ prerelease: false
81
+ type: :development
82
+ - !ruby/object:Gem::Dependency
83
+ name: rake
84
+ version_requirements: !ruby/object:Gem::Requirement
85
+ requirements:
86
+ - - ">="
87
+ - !ruby/object:Gem::Version
88
+ version: 10.0.3
89
+ none: false
90
+ requirement: !ruby/object:Gem::Requirement
91
+ requirements:
92
+ - - ">="
93
+ - !ruby/object:Gem::Version
94
+ version: 10.0.3
95
+ none: false
96
+ prerelease: false
97
+ type: :development
98
+ description: ! 'description yo '
99
+ email: noah@noahpryor.com
100
+ executables: []
101
+ extensions: []
102
+ extra_rdoc_files:
103
+ - LICENSE.txt
104
+ - README.rdoc
105
+ files:
106
+ - ".document"
107
+ - ".ruby-version"
108
+ - Gemfile
109
+ - Gemfile.lock
110
+ - LICENSE.txt
111
+ - README.rdoc
112
+ - Rakefile
113
+ - VERSION
114
+ - lib/document.pdf
115
+ - lib/document_1.png
116
+ - lib/document_560_1.png
117
+ - lib/pdf_extract.rb
118
+ - pdf_extract-0.1.0.gem
119
+ - pdf_extract-0.2.0.gem
120
+ - pdf_extract-0.2.1.gem
121
+ - pdf_extract-0.2.2.gem
122
+ - pdf_extract-0.3.0.gem
123
+ - pdf_extract-0.3.1.gem
124
+ - pdf_extract.gemspec
125
+ - test/helper.rb
126
+ - test/test_pdf_extract.rb
127
+ homepage: http://github.com/noahpryor/pdf_extract
128
+ licenses:
129
+ - MIT
130
+ post_install_message:
131
+ rdoc_options: []
132
+ require_paths:
133
+ - lib
134
+ required_ruby_version: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - ">="
137
+ - !ruby/object:Gem::Version
138
+ segments:
139
+ - 0
140
+ hash: 2
141
+ version: !binary |-
142
+ MA==
143
+ none: false
144
+ required_rubygems_version: !ruby/object:Gem::Requirement
145
+ requirements:
146
+ - - ">="
147
+ - !ruby/object:Gem::Version
148
+ version: !binary |-
149
+ MA==
150
+ none: false
151
+ requirements: []
152
+ rubyforge_project:
153
+ rubygems_version: 1.8.24
154
+ signing_key:
155
+ specification_version: 3
156
+ summary: PDF extraction. Best with jRuby
157
+ test_files: []