tabula-extractor 0.7.0-java → 0.7.1-java

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: fa69052647e565cd996f92f1c73e6d00deceea54
4
- data.tar.gz: 7d76ccc9b445e9138f65920cd0a401fbee3ababf
3
+ metadata.gz: fea5751c9b78705fbfda50be1e29011d40249b04
4
+ data.tar.gz: 2d7bb156a073636467b34e44b3f8b6a364920354
5
5
  SHA512:
6
- metadata.gz: d17ad7e967407711d60d9b30e8d231ab9adef313b8ede84e1960e35f24c5374f472a500e7f7148cc485408f796f1d7a67e96393d0fc5331e0f3f88971dad76c4
7
- data.tar.gz: 84121390715280e86fb3ff1dd82533863dfabbb0b27582fb0d3bf9ad088f8d05e4c8f7dfbcfd2f621c232accd6145968e263a4e0e081431cf941939974999b20
6
+ metadata.gz: d8d1f1932397b599b3d724bb4e8ca5fd73f516e198a391625031ab83f8e5f0a2743d51dcc02953ee5a53c0664330c8e8e1f5309f72493e9f2e232068b06085a1
7
+ data.tar.gz: e292d6320db6a42799418b6651f63a88446f85c6787659597e77b9f7c38e698b69a10c78edbdeea49baedc0d92f77602b847aef90ab1f8f2ab10356493dbb397
data/bin/tabula CHANGED
@@ -6,6 +6,10 @@ require_relative '../lib/tabula'
6
6
  FORMATS = ['CSV', 'TSV', 'HTML', 'JSON']
7
7
 
8
8
  def parse_pages_arg(pages_arg)
9
+ if(pages_arg == 'all')
10
+ return :all
11
+ end
12
+
9
13
  ranges = pages_arg.split(',').map(&:strip)
10
14
  pages = []
11
15
  ranges.each do |range|
@@ -32,7 +36,7 @@ Usage:
32
36
  where [options] are:
33
37
  EOS
34
38
 
35
- opt :pages, 'Comma separated list of ranges. Examples: --pages 1-3,5-7 or --pages 3. Default is --pages 1', :default => '1', :type => String
39
+ opt :pages, 'Comma separated list of ranges, or all. Examples: --pages 1-3,5-7, --pages 3 or --pages all. Default is --pages 1', :default => '1', :type => String
36
40
  opt :area, 'Portion of the page to analyze (top,left,bottom,right). Example: --area 269.875,12.75,790.5,561. Default is entire page', :type => String, :default => nil
37
41
  opt :columns, 'X coordinates of column boundaries. Example --columns 10.1,20.2,30.3', :default => nil, :type => String
38
42
  opt :password, 'Password to decrypt document. Default is empty', :default => ''
@@ -38,5 +38,16 @@ module Tabula
38
38
  end
39
39
  output.strip
40
40
  end
41
+
42
+ def to_json(*a)
43
+ {
44
+ 'json_class' => self.class.name,
45
+ 'text' => text,
46
+ 'top' => top,
47
+ 'left' => left,
48
+ 'width' => width,
49
+ 'height' => height
50
+ }.to_json(*a)
51
+ end
41
52
  end
42
53
  end
@@ -6,7 +6,6 @@ module Tabula
6
6
  # subclasses must define cells, vertical_ruling_lines, horizontal_ruling_lines accessors; ruling_lines reader
7
7
  module HasCells
8
8
 
9
- IS_TABULAR_HEURISTIC_RATIO = 0.8
10
9
  ANOTHER_MAGIC_NUMBER = 0.75
11
10
 
12
11
  def is_tabular?
@@ -83,11 +83,7 @@ module Tabula
83
83
 
84
84
  #for API backwards-compatibility reasons, this returns an array of arrays.
85
85
  def make_table(options={})
86
- get_table(options).lines.map do |l|
87
- l.text_elements.map! do |te|
88
- te || TextElement.new(nil, nil, nil, nil, nil, nil, '', nil)
89
- end
90
- end.sort_by { |l| l.map { |te| te.top or 0 }.max }
86
+ get_table(options).rows
91
87
  end
92
88
 
93
89
  # returns the Spreadsheets; creating them if they're not memoized
@@ -241,13 +237,17 @@ module Tabula
241
237
  end
242
238
 
243
239
  lines_to_points.each do |l, p1_p2|
244
- l.java_send :setLine, [java.awt.geom.Point2D, java.awt.geom.Point2D], p1_p2[0],
245
- p1_p2[1]
240
+ l.java_send :setLine, [java.awt.geom.Point2D, java.awt.geom.Point2D], p1_p2[0], p1_p2[1]
246
241
  end
247
242
  end
248
243
 
249
244
  def collapse_oriented_rulings(lines)
250
245
  # lines must all be of one orientation (i.e. horizontal, vertical)
246
+
247
+ if lines.empty?
248
+ return []
249
+ end
250
+
251
251
  lines.sort! {|a, b| a.position != b.position ? a.position <=> b.position : a.start <=> b.start }
252
252
 
253
253
  lines = lines.inject([lines.shift]) do |memo, next_line|
@@ -262,7 +262,6 @@ module Tabula
262
262
  memo << next_line
263
263
  end
264
264
  end
265
- lines
266
265
  end
267
266
  end
268
267
 
@@ -1,9 +1,13 @@
1
1
  module Tabula
2
2
  # a counterpart of Table, to be sure.
3
3
  # not sure yet what their relationship ought to be.
4
+
5
+ # the both should implement `cells`, `rows`, `cols`, `extraction_method`
6
+
4
7
  class Spreadsheet < ZoneEntity
5
8
  include Tabula::HasCells
6
9
  attr_accessor :cells, :vertical_ruling_lines, :horizontal_ruling_lines, :cells_resolved
10
+ attr_reader :extraction_method, :page
7
11
 
8
12
  def initialize(top, left, width, height, page, cells, vertical_ruling_lines, horizontal_ruling_lines) #, lines)
9
13
  super(top, left, width, height)
@@ -11,6 +15,7 @@ module Tabula
11
15
  @page = page
12
16
  @vertical_ruling_lines = vertical_ruling_lines
13
17
  @horizontal_ruling_lines = horizontal_ruling_lines
18
+ @extraction_method = "spreadsheet"
14
19
  end
15
20
 
16
21
  def ruling_lines
@@ -88,5 +93,18 @@ module Tabula
88
93
  Tabula::Writers.TSV(rows, out)
89
94
  out.string
90
95
  end
96
+
97
+ def to_json(*a)
98
+ {
99
+ 'json_class' => self.class.name,
100
+ 'extraction_method' => @extraction_method,
101
+ 'data' => rows,
102
+ }.to_json(*a)
103
+ end
104
+
105
+ def +(other)
106
+ raise ArgumentError unless other.page == @page
107
+ Spreadsheet.new(nil, nil, nil, nil, @page, @cells + other.cells, nil, nil )
108
+ end
91
109
  end
92
110
  end
@@ -1,9 +1,11 @@
1
1
  module Tabula
2
2
  class Table
3
- attr_reader :lines
3
+ attr_reader :extraction_method
4
+ attr_accessor :lines
4
5
  def initialize(line_count, separators)
5
6
  @separators = separators
6
7
  @lines = (0...line_count).inject([]) { |m| m << Line.new }
8
+ @extraction_method = "original"
7
9
  end
8
10
 
9
11
  def add_text_element(text_element, i, j)
@@ -28,22 +30,27 @@ module Tabula
28
30
  end
29
31
 
30
32
  def cols
31
- self.rpad!
32
- lines.map(&:text_elements).transpose
33
+ rows.transpose
33
34
  end
34
35
 
35
36
  def rows
36
37
  self.rpad!
37
- lines.map(&:text_elements)
38
+ lines.map do |l|
39
+ l.text_elements.map! do |te|
40
+ te || TextElement.new(nil, nil, nil, nil, nil, nil, '', nil)
41
+ end
42
+ end.sort_by { |l| l.map { |te| te.top || 0 }.max }
38
43
  end
39
44
 
40
45
  # create a new Table object from an array of arrays, representing a list of rows in a spreadsheet
41
46
  # probably only used for testing
42
47
  def self.new_from_array(array_of_rows)
43
48
  t = Table.new(array_of_rows.size, [])
49
+ @extraction_method = "testing"
44
50
  array_of_rows.each_with_index do |row, index|
45
- t.lines[index].text_elements = row.map{|cell| TextElement.new(nil, nil, nil, nil, nil, nil, cell, nil)}
51
+ t.lines[index].text_elements = row.each_with_index.map{|cell, inner_index| TextElement.new(index, inner_index, 1, 1, nil, nil, cell, nil)}
46
52
  end
53
+ t.rpad!
47
54
  t
48
55
  end
49
56
 
@@ -77,5 +84,13 @@ module Tabula
77
84
  self.lines.zip(other.lines).all? { |my, yours| my == yours }
78
85
 
79
86
  end
87
+
88
+ def to_json(*a)
89
+ {
90
+ 'json_class' => self.class.name,
91
+ 'extraction_method' => @extraction_method,
92
+ 'data' => rows,
93
+ }.to_json(*a)
94
+ end
80
95
  end
81
96
  end
@@ -1,3 +1,4 @@
1
+ # -*- coding: utf-8 -*-
1
2
  java_import org.apache.pdfbox.pdfparser.PDFParser
2
3
  java_import org.apache.pdfbox.util.TextPosition
3
4
  java_import org.apache.pdfbox.pdmodel.PDDocument
@@ -198,7 +199,12 @@ module Tabula
198
199
 
199
200
  def processTextPosition(text)
200
201
  c = text.getCharacter
201
- h = c == ' ' ? text.getWidthDirAdj.round(2) : text.getHeightDir.round(2)
202
+ h = text.getHeightDir.round(2)
203
+
204
+ if c == ' ' || c == ' ' # replace non-breaking space for space
205
+ c = ' '
206
+ h = text.getWidthDirAdj.round(2)
207
+ end
202
208
 
203
209
  te = Tabula::TextElement.new(text.getYDirAdj.round(2) - h,
204
210
  text.getXDirAdj.round(2),
@@ -28,7 +28,8 @@ module Tabula
28
28
  options = {
29
29
  :password => '',
30
30
  :detect_ruling_lines => true,
31
- :vertical_rulings => []
31
+ :vertical_rulings => [],
32
+ :extraction_method => "guess",
32
33
  }.merge(options)
33
34
 
34
35
  if area.instance_of?(Array)
@@ -41,32 +42,51 @@ module Tabula
41
42
  page = [page]
42
43
  end
43
44
 
44
- page_obj = Extraction::ObjectExtractor.new(pdf_path,
45
+ pdf_page = Extraction::ObjectExtractor.new(pdf_path,
45
46
  page,
46
47
  options[:password]) \
47
48
  .extract.next
48
49
 
49
- use_detected_lines = false
50
- if options[:detect_ruling_lines] && options[:vertical_rulings].empty?
51
- detected_vertical_rulings = Ruling.crop_rulings_to_area(page_obj.vertical_ruling_lines,
52
- area)
50
+ if ["spreadsheet", "original"].include? options[:extraction_method]
51
+ use_spreadsheet_extraction_method = options[:extraction_method] == "spreadsheet"
52
+ else
53
+ use_spreadsheet_extraction_method = pdf_page.is_tabular?
54
+ end
53
55
 
54
- # only use lines if at least 80% of them cover at least 90%
55
- # of the height of area of interest
56
+ if use_spreadsheet_extraction_method
57
+ table = pdf_page.get_area(area).spreadsheets.inject(&:+)
58
+ else
59
+ use_detected_lines = false
60
+ if options[:detect_ruling_lines] && options[:vertical_rulings].empty?
61
+ detected_vertical_rulings = Ruling.crop_rulings_to_area(pdf_page.vertical_ruling_lines,
62
+ area)
56
63
 
57
- # TODO this heuristic SUCKS
58
- # what if only a couple columns is delimited with vertical rulings?
59
- # ie: https://www.dropbox.com/s/lpydler5c3pn408/S2MNCEbirdisland.pdf (see 7th column)
60
- # idea: detect columns without considering rulings, detect vertical rulings
61
- # calculate ratio and try to come up with a threshold
62
- use_detected_lines = detected_vertical_rulings.size > 2 \
63
- && (detected_vertical_rulings.count { |vl|
64
- vl.height / area.height > 0.9
65
- } / detected_vertical_rulings.size.to_f) >= 0.8
64
+ # only use lines if at least 80% of them cover at least 90%
65
+ # of the height of area of interest
66
66
 
67
- end
67
+ # TODO this heuristic SUCKS
68
+ # what if only a couple columns is delimited with vertical rulings?
69
+ # ie: https://www.dropbox.com/s/lpydler5c3pn408/S2MNCEbirdisland.pdf (see 7th column)
70
+ # idea: detect columns without considering rulings, detect vertical rulings
71
+ # calculate ratio and try to come up with a threshold
72
+ use_detected_lines = detected_vertical_rulings.size > 2 \
73
+ && (detected_vertical_rulings.count { |vl|
74
+ vl.height / area.height > 0.9
75
+ } / detected_vertical_rulings.size.to_f) >= 0.8
68
76
 
69
- page_obj.get_area(area).make_table(:vertical_rulings => use_detected_lines ? detected_vertical_rulings : options[:vertical_rulings])
77
+ end
70
78
 
79
+ table = pdf_page.get_area(area).get_table(:vertical_rulings => use_detected_lines ? detected_vertical_rulings : options[:vertical_rulings])
80
+
81
+ # fixes up the table a little bit, replacing nils with empty TextElements
82
+ # and sorting the lines.
83
+ table.lines.each do |l|
84
+ l.text_elements = l.text_elements.map do |te|
85
+ te || TextElement.new(nil, nil, nil, nil, nil, nil, '', nil)
86
+ end
87
+ end
88
+ table.lines.sort_by! { |l| l.text_elements.map { |te| te.top or 0 }.max }
89
+ table
90
+ end
71
91
  end
72
92
  end
@@ -1,3 +1,3 @@
1
1
  module Tabula
2
- VERSION = '0.7.0'
2
+ VERSION = '0.7.1'
3
3
  end
data/test/tests.rb CHANGED
@@ -4,10 +4,14 @@ require 'minitest/autorun'
4
4
 
5
5
  require_relative '../lib/tabula'
6
6
 
7
+ def table_to_array(table)
8
+ lines_to_array(table.rows)
9
+ end
10
+
7
11
  def lines_to_array(lines)
8
- lines.map { |l|
12
+ lines.map do |l|
9
13
  l.map { |te| te.text.strip }
10
- }
14
+ end
11
15
  end
12
16
 
13
17
  def lines_to_table(lines)
@@ -15,7 +19,7 @@ def lines_to_table(lines)
15
19
  end
16
20
 
17
21
 
18
- # I don't want to pollute the "real" clasend a funny inspect method. Just for testing comparisons.
22
+ # I don't want to pollute the "real" class with a funny inspect method. Just for testing comparisons.
19
23
  module Tabula
20
24
  class Table
21
25
  def inspect
@@ -27,7 +31,7 @@ end
27
31
  module Tabula
28
32
  class Line
29
33
  def inspect
30
- @text_elements.map(&:text).inspect
34
+ @text_elements.map{|te| te.nil? ? '' : te.text}.inspect
31
35
  end
32
36
  end
33
37
  end
@@ -173,7 +177,7 @@ end
173
177
  class TestExtractor < Minitest::Test
174
178
 
175
179
  def test_table_extraction_1
176
- table = lines_to_array Tabula.extract_table(File.expand_path('data/gre.pdf', File.dirname(__FILE__)),
180
+ table = table_to_array Tabula.extract_table(File.expand_path('data/gre.pdf', File.dirname(__FILE__)),
177
181
  1,
178
182
  [107.1, 57.9214, 394.5214, 290.7],
179
183
  :detect_ruling_lines => false)
@@ -184,7 +188,7 @@ class TestExtractor < Minitest::Test
184
188
  end
185
189
 
186
190
  def test_diputados_voting_record
187
- table = lines_to_array Tabula.extract_table(File.expand_path('data/argentina_diputados_voting_record.pdf', File.dirname(__FILE__)),
191
+ table = table_to_array Tabula.extract_table(File.expand_path('data/argentina_diputados_voting_record.pdf', File.dirname(__FILE__)),
188
192
  1,
189
193
  [269.875, 12.75, 790.5, 561])
190
194
 
@@ -199,14 +203,13 @@ class TestExtractor < Minitest::Test
199
203
  # and a solution for half-x-height-offset lines.
200
204
  pdf_file_path = File.expand_path('data/frx_2012_disclosure.pdf', File.dirname(__FILE__))
201
205
 
202
- table = lines_to_table Tabula.extract_table(pdf_file_path,
206
+ table = Tabula.extract_table(pdf_file_path,
203
207
  1,
204
208
  [106.01, 48.09, 227.31, 551.89],
205
209
  :detect_ruling_lines => true)
206
210
 
207
211
  expected = Tabula::Table.new_from_array([["AANONSEN, DEBORAH, A", "", "STATEN ISLAND, NY", "MEALS", "$85.00"], ["TOTAL", "", "", "", "$85.00"], ["AARON, CAREN, T", "", "RICHMOND, VA", "EDUCATIONAL ITEMS", "$78.80"], ["AARON, CAREN, T", "", "RICHMOND, VA", "MEALS", "$392.45"], ["TOTAL", "", "", "", "$471.25"], ["AARON, JOHN", "", "CLARKSVILLE, TN", "MEALS", "$20.39"], ["TOTAL", "", "", "", "$20.39"], ["AARON, JOSHUA, N", "", "WEST GROVE, PA", "MEALS", "$310.33"], ["", "REGIONAL PULMONARY & SLEEP"], ["AARON, JOSHUA, N", "", "WEST GROVE, PA", "SPEAKING FEES", "$4,700.00"], ["", "MEDICINE"], ["TOTAL", "", "", "", "$5,010.33"], ["AARON, MAUREEN, M", "", "MARTINSVILLE, VA", "MEALS", "$193.67"], ["TOTAL", "", "", "", "$193.67"], ["AARON, MICHAEL, L", "", "WEST ISLIP, NY", "MEALS", "$19.50"], ["TOTAL", "", "", "", "$19.50"], ["AARON, MICHAEL, R", "", "BROOKLYN, NY", "MEALS", "$65.92"]])
208
212
 
209
-
210
213
  assert_equal expected, table
211
214
  end
212
215
 
@@ -259,7 +262,7 @@ class TestExtractor < Minitest::Test
259
262
 
260
263
  # TODO Spaces inserted in words - fails
261
264
  def test_bo_page24
262
- table = lines_to_array Tabula.extract_table(File.expand_path('data/bo_page24.pdf', File.dirname(__FILE__)),
265
+ table = table_to_array Tabula.extract_table(File.expand_path('data/bo_page24.pdf', File.dirname(__FILE__)),
263
266
  1,
264
267
  [425.625, 53.125, 575.714, 810.535],
265
268
  :detect_ruling_lines => false)
@@ -312,7 +315,7 @@ class TestExtractor < Minitest::Test
312
315
 
313
316
  vertical_rulings = [47,147,256,310,375,431,504].map{|n| Tabula::Ruling.new(0, n, 0, 1000)}
314
317
 
315
- table = lines_to_array Tabula.extract_table(File.expand_path('data/campaign_donors.pdf', File.dirname(__FILE__)),
318
+ table = table_to_array Tabula.extract_table(File.expand_path('data/campaign_donors.pdf', File.dirname(__FILE__)),
316
319
  1,
317
320
  [255.57,40.43,398.76,557.35],
318
321
  :vertical_rulings => vertical_rulings)
@@ -321,12 +324,12 @@ class TestExtractor < Minitest::Test
321
324
  end
322
325
 
323
326
  def test_get_spacing_and_merging_right
324
- table = lines_to_array Tabula.extract_table(File.expand_path('data/strongschools.pdf', File.dirname(__FILE__)),
327
+ table = table_to_array Tabula.extract_table(File.expand_path('data/strongschools.pdf', File.dirname(__FILE__)),
325
328
  1,
326
329
  [52.32857142857143,15.557142857142859,128.70000000000002,767.9571428571429],
327
330
  :detect_ruling_lines => true)
328
331
 
329
- expected = [["Last Name", "First Name", "Address", "City", "State", "Zip", "Occupation", "Employer", "Date", "Amount"], ["Lidstad", "Dick & Peg", "62 Mississippi River Blvd N", "Saint Paul", "MN", "55104", "retired", "", "10/12/2012", "60.00"], ["Strom", "Pam", "1229 Hague Ave", "St. Paul", "MN", "55104", "", "", "9/12/2012", "60.00"], ["Seeba", "Louise & Paul", "1399 Sheldon St", "Saint Paul", "MN", "55108", "BOE", "City of Saint Paul", "10/12/2012", "60.00"], ["Schumacher / Bales", "Douglas L. / Patricia ", "948 County Rd. D W", "Saint Paul", "MN", "55126", "", "", "10/13/2012", "60.00"], ["Abrams", "Marjorie", "238 8th St east", "St Paul", "MN", "55101", "Retired", "Retired", "8/8/2012", "75.00"], ["Crouse / Schroeder", "Abigail / Jonathan", "1545 Branston St.", "Saint Paul", "MN", "55108", "", "", "10/6/2012", "75.00"]]
332
+ expected = [["Last Name", "First Name", "Address", "City", "State", "Zip", "Occupation", "Employer", "Date", "Amount"], ["Lidstad", "Dick & Peg", "62 Mississippi River Blvd N", "Saint Paul", "MN", "55104", "retired", "", "10/12/2012", "60.00"], ["Strom", "Pam", "1229 Hague Ave", "St. Paul", "MN", "55104", "", "", "9/12/2012", "60.00"], ["Seeba", "Louise & Paul", "1399 Sheldon St", "Saint Paul", "MN", "55108", "BOE", "City of Saint Paul", "10/12/2012", "60.00"], ["Schumacher / Bales", "Douglas L. / Patricia", "948 County Rd. D W", "Saint Paul", "MN", "55126", "", "", "10/13/2012", "60.00"], ["Abrams", "Marjorie", "238 8th St east", "St Paul", "MN", "55101", "Retired", "Retired", "8/8/2012", "75.00"], ["Crouse / Schroeder", "Abigail / Jonathan", "1545 Branston St.", "Saint Paul", "MN", "55108", "", "", "10/6/2012", "75.00"]]
330
333
 
331
334
  assert_equal expected, table
332
335
 
@@ -539,7 +542,7 @@ class TestIsTabularHeuristic < Minitest::Test
539
542
  extractor = Tabula::Extraction::ObjectExtractor.new(path, [1])
540
543
  page = extractor.extract.first
541
544
  page.get_ruling_lines!
542
- assert page.is_tabular?
545
+ assert page.is_tabular?, "failed on file #{f}"
543
546
  end
544
547
  end
545
548
 
@@ -549,11 +552,8 @@ class TestIsTabularHeuristic < Minitest::Test
549
552
  extractor = Tabula::Extraction::ObjectExtractor.new(path, [1])
550
553
  page = extractor.extract.first
551
554
  page.get_ruling_lines!
552
- assert !page.is_tabular?
555
+ assert !page.is_tabular?, "failed on file #{f}"
553
556
  end
554
557
  end
555
558
 
556
-
557
-
558
-
559
559
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tabula-extractor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.0
4
+ version: 0.7.1
5
5
  platform: java
6
6
  authors:
7
7
  - Manuel Aristarán
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2014-01-07 00:00:00.000000000 Z
13
+ date: 2014-01-18 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: minitest
@@ -162,6 +162,7 @@ files:
162
162
  - test/heuristic-test-set/original/S2MNCEbirdisland.pdf
163
163
  - test/heuristic-test-set/original/bo_page24.pdf
164
164
  - test/heuristic-test-set/original/campaign_donors.pdf
165
+ - test/heuristic-test-set/original/cs076pct.pdf
165
166
  - test/heuristic-test-set/spreadsheet/47008204D_USA.page4.pdf
166
167
  - test/heuristic-test-set/spreadsheet/GSK_2012_Q4.page437.pdf
167
168
  - test/heuristic-test-set/spreadsheet/strongschools.pdf
@@ -189,7 +190,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
189
190
  version: '0'
190
191
  requirements: []
191
192
  rubyforge_project:
192
- rubygems_version: 2.1.9
193
+ rubygems_version: 2.2.1
193
194
  signing_key:
194
195
  specification_version: 4
195
196
  summary: extract tables from PDF files
@@ -217,6 +218,7 @@ test_files:
217
218
  - test/heuristic-test-set/original/S2MNCEbirdisland.pdf
218
219
  - test/heuristic-test-set/original/bo_page24.pdf
219
220
  - test/heuristic-test-set/original/campaign_donors.pdf
221
+ - test/heuristic-test-set/original/cs076pct.pdf
220
222
  - test/heuristic-test-set/spreadsheet/47008204D_USA.page4.pdf
221
223
  - test/heuristic-test-set/spreadsheet/GSK_2012_Q4.page437.pdf
222
224
  - test/heuristic-test-set/spreadsheet/strongschools.pdf