tabula-extractor 0.7.5-java → 0.7.6-java

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4391bc1af8143d2f60ed2ad11a66cba9f96c955e
4
- data.tar.gz: d0d905fbd5b2bae105a11a9fd01470921b7dd4f3
3
+ metadata.gz: 9ac7f1daa082acce10e82b94b01b31e07813ad4d
4
+ data.tar.gz: ac521bbba80d6b0571d904565cd31d9af5e7947a
5
5
  SHA512:
6
- metadata.gz: 4ef1e681e511dc074381696689b8d86915f262d4538cf107be6b8844fcbf7102f20cedcdc8964f326178095e7cd9b47d912386801e57a0e4645ee27faacff4a5
7
- data.tar.gz: 336c19a84cd2cf430e24ce0728a5f7753ca78e3b08e2f12251274da577dd2f0cfe2d6a5cf1b9df44dd09e85d9c4e5a80d2672e53632785bc70461f8ca10c3e17
6
+ metadata.gz: 0389d96e5a7a8ad20c147ed3170b922a501126211bec58b012f39662425599437f3f869002825b562cae57d44645ddb776088804f237e168b71473211a86c67a
7
+ data.tar.gz: 53dd7bd11684bf8b8ccd03ea9352140eb3dcc346b7018bee7d6b8049e7e70ee02f59b83e360d97ddf5b7f211a1dccbb83694aab8cb016c7a4ba656f46a37c4c4
data/README.md CHANGED
@@ -1,7 +1,7 @@
1
1
  tabula-extractor
2
2
  ================
3
3
 
4
- [![Build Status](https://travis-ci.org/jazzido/tabula-extractor.png)](https://travis-ci.org/jazzido/tabula-extractor)
4
+ [![Build Status](https://travis-ci.org/tabulapdf/tabula-extractor.png)](https://travis-ci.org/tabulapdf/tabula-extractor)
5
5
 
6
6
  Extract tables from PDF files. `tabula-extractor` is the table extraction engine that powers [Tabula](http://tabula.technology), now available as a library and command line program.
7
7
 
@@ -125,7 +125,7 @@ extractor.extract.each_with_index do |pdf_page, page_index|
125
125
  page_areas = [[250, 0, 325, 1700]]
126
126
 
127
127
  page_areas.each do |page_area|
128
- out << pdf_page.get_area(page_area).make_table.to_csv
128
+ out << pdf_page.get_area(page_area).get_table.to_csv
129
129
  out << "\n\n"
130
130
  end
131
131
 
@@ -155,7 +155,7 @@ extractor.extract.each_with_index do |pdf_page, page_index|
155
155
  vertical_rulings = vertical_ruling_locations.map{|n| Tabula::Ruling.new(0, n * scale_factor, 0, 1000)}
156
156
 
157
157
  page_areas.each do |page_area|
158
- out << pdf_page.get_area(page_area).make_table(:vertical_rulings => vertical_rulings).to_csv
158
+ out << pdf_page.get_area(page_area).get_table(:vertical_rulings => vertical_rulings).to_csv
159
159
  out << "\n\n"
160
160
  end
161
161
  end
@@ -191,6 +191,10 @@ module Tabula
191
191
  end
192
192
  end
193
193
 
194
+ def has_text?
195
+ !self.texts.empty?
196
+ end
197
+
194
198
  # TODO no need for this, let's choose one name
195
199
  def ruling_lines
196
200
  get_ruling_lines!
@@ -258,7 +262,7 @@ module Tabula
258
262
  :height => self.height,
259
263
  :number => self.number,
260
264
  :rotation => self.rotation,
261
- :texts => self.texts
265
+ :hasText => self.has_text?
262
266
  }.to_json(options)
263
267
  end
264
268
 
@@ -239,7 +239,8 @@ module Tabula
239
239
  end
240
240
 
241
241
  def finite?
242
- top != ::Float::INFINITY && left != ::Float::INFINITY && bottom != ::Float::INFINITY && right != ::Float::INFINITY
242
+ top != ::Float::INFINITY && left != ::Float::INFINITY && bottom != ::Float::INFINITY && right != ::Float::INFINITY &&
243
+ !top.nan? && !left.nan? && !bottom.nan? && !right.nan?
243
244
  end
244
245
 
245
246
  ##
@@ -371,26 +371,37 @@ module Tabula
371
371
 
372
372
 
373
373
  class PagesInfoExtractor
374
- def initialize(pdf_filename, password='')
375
- @pdf_filename = pdf_filename
376
- @pdf_file = Extraction.openPDF(pdf_filename, password)
374
+ def initialize(pdf_file_path, password='')
375
+ @pdf_filename = pdf_file_path
376
+ @pdf_file = Extraction.openPDF(pdf_file_path, password)
377
377
  @all_pages = @pdf_file.getDocumentCatalog.getAllPages
378
+
379
+ @extractor = Tabula::Extraction::ObjectExtractor.new(pdf_file_path, :all )
378
380
  end
379
381
 
380
382
  def pages
383
+ found_page_with_texts = false
381
384
  Enumerator.new do |y|
382
385
  begin
383
386
  @all_pages.each_with_index do |page, i|
384
387
  contents = page.getContents
385
388
 
386
- y.yield Tabula::Page.new(@pdf_filename,
389
+ if found_page_with_texts
390
+ page = Tabula::Page.new(@pdf_filename,
387
391
  page.findCropBox.width,
388
392
  page.findCropBox.height,
389
393
  page.getRotation.to_i,
390
394
  i+1) #remember, these are one-indexed
395
+ else
396
+ page = @extractor.extract_page(i+1)
397
+ found_page_with_texts = page.has_text?
398
+ end
399
+
400
+ y.yield page
391
401
  end
392
402
  ensure
393
403
  @pdf_file.close
404
+ @extractor.close!
394
405
  end
395
406
  end
396
407
  end
@@ -1,3 +1,3 @@
1
1
  module Tabula
2
- VERSION = '0.7.5'
2
+ VERSION = '0.7.6'
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tabula-extractor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.5
4
+ version: 0.7.6
5
5
  platform: java
6
6
  authors:
7
7
  - Manuel Aristarán
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2014-09-29 00:00:00.000000000 Z
13
+ date: 2015-01-31 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: bundler
@@ -94,7 +94,6 @@ files:
94
94
  - .travis.yml
95
95
  - AUTHORS.md
96
96
  - Gemfile
97
- - Gemfile.lock
98
97
  - LICENSE.md
99
98
  - NOTICE.txt
100
99
  - README.md
@@ -147,7 +146,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
147
146
  version: '0'
148
147
  requirements: []
149
148
  rubyforge_project:
150
- rubygems_version: 2.4.1
149
+ rubygems_version: 2.1.9
151
150
  signing_key:
152
151
  specification_version: 4
153
152
  summary: extract tables from PDF files
@@ -1,39 +0,0 @@
1
- PATH
2
- remote: .
3
- specs:
4
- tabula-extractor (0.7.5-java)
5
- trollop (~> 2.0)
6
-
7
- GEM
8
- remote: http://rubygems.org/
9
- specs:
10
- coderay (1.1.0)
11
- columnize (0.8.9)
12
- ffi (1.9.5-java)
13
- method_source (0.8.2)
14
- minitest (5.4.2)
15
- pry (0.10.1-java)
16
- coderay (~> 1.1.0)
17
- method_source (~> 0.8.1)
18
- slop (~> 3.4)
19
- spoon (~> 0.0)
20
- rake (10.3.2)
21
- ruby-debug (0.10.4)
22
- columnize (>= 0.1)
23
- ruby-debug-base (~> 0.10.4.0)
24
- ruby-debug-base (0.10.4-java)
25
- slop (3.6.0)
26
- spoon (0.0.4)
27
- ffi
28
- trollop (2.0)
29
-
30
- PLATFORMS
31
- java
32
-
33
- DEPENDENCIES
34
- bundler (>= 1.3.4)
35
- minitest
36
- pry
37
- rake
38
- ruby-debug
39
- tabula-extractor!