tabula-extractor 0.7.0-java → 0.7.1-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: fa69052647e565cd996f92f1c73e6d00deceea54
4
- data.tar.gz: 7d76ccc9b445e9138f65920cd0a401fbee3ababf
3
+ metadata.gz: fea5751c9b78705fbfda50be1e29011d40249b04
4
+ data.tar.gz: 2d7bb156a073636467b34e44b3f8b6a364920354
5
5
  SHA512:
6
- metadata.gz: d17ad7e967407711d60d9b30e8d231ab9adef313b8ede84e1960e35f24c5374f472a500e7f7148cc485408f796f1d7a67e96393d0fc5331e0f3f88971dad76c4
7
- data.tar.gz: 84121390715280e86fb3ff1dd82533863dfabbb0b27582fb0d3bf9ad088f8d05e4c8f7dfbcfd2f621c232accd6145968e263a4e0e081431cf941939974999b20
6
+ metadata.gz: d8d1f1932397b599b3d724bb4e8ca5fd73f516e198a391625031ab83f8e5f0a2743d51dcc02953ee5a53c0664330c8e8e1f5309f72493e9f2e232068b06085a1
7
+ data.tar.gz: e292d6320db6a42799418b6651f63a88446f85c6787659597e77b9f7c38e698b69a10c78edbdeea49baedc0d92f77602b847aef90ab1f8f2ab10356493dbb397
data/bin/tabula CHANGED
@@ -6,6 +6,10 @@ require_relative '../lib/tabula'
6
6
  FORMATS = ['CSV', 'TSV', 'HTML', 'JSON']
7
7
 
8
8
  def parse_pages_arg(pages_arg)
9
+ if(pages_arg == 'all')
10
+ return :all
11
+ end
12
+
9
13
  ranges = pages_arg.split(',').map(&:strip)
10
14
  pages = []
11
15
  ranges.each do |range|
@@ -32,7 +36,7 @@ Usage:
32
36
  where [options] are:
33
37
  EOS
34
38
 
35
- opt :pages, 'Comma separated list of ranges. Examples: --pages 1-3,5-7 or --pages 3. Default is --pages 1', :default => '1', :type => String
39
+ opt :pages, 'Comma separated list of ranges, or all. Examples: --pages 1-3,5-7, --pages 3 or --pages all. Default is --pages 1', :default => '1', :type => String
36
40
  opt :area, 'Portion of the page to analyze (top,left,bottom,right). Example: --area 269.875,12.75,790.5,561. Default is entire page', :type => String, :default => nil
37
41
  opt :columns, 'X coordinates of column boundaries. Example --columns 10.1,20.2,30.3', :default => nil, :type => String
38
42
  opt :password, 'Password to decrypt document. Default is empty', :default => ''
@@ -38,5 +38,16 @@ module Tabula
38
38
  end
39
39
  output.strip
40
40
  end
41
+
42
+ def to_json(*a)
43
+ {
44
+ 'json_class' => self.class.name,
45
+ 'text' => text,
46
+ 'top' => top,
47
+ 'left' => left,
48
+ 'width' => width,
49
+ 'height' => height
50
+ }.to_json(*a)
51
+ end
41
52
  end
42
53
  end
@@ -6,7 +6,6 @@ module Tabula
6
6
  # subclasses must define cells, vertical_ruling_lines, horizontal_ruling_lines accessors; ruling_lines reader
7
7
  module HasCells
8
8
 
9
- IS_TABULAR_HEURISTIC_RATIO = 0.8
10
9
  ANOTHER_MAGIC_NUMBER = 0.75
11
10
 
12
11
  def is_tabular?
@@ -83,11 +83,7 @@ module Tabula
83
83
 
84
84
  #for API backwards-compatibility reasons, this returns an array of arrays.
85
85
  def make_table(options={})
86
- get_table(options).lines.map do |l|
87
- l.text_elements.map! do |te|
88
- te || TextElement.new(nil, nil, nil, nil, nil, nil, '', nil)
89
- end
90
- end.sort_by { |l| l.map { |te| te.top or 0 }.max }
86
+ get_table(options).rows
91
87
  end
92
88
 
93
89
  # returns the Spreadsheets; creating them if they're not memoized
@@ -241,13 +237,17 @@ module Tabula
241
237
  end
242
238
 
243
239
  lines_to_points.each do |l, p1_p2|
244
- l.java_send :setLine, [java.awt.geom.Point2D, java.awt.geom.Point2D], p1_p2[0],
245
- p1_p2[1]
240
+ l.java_send :setLine, [java.awt.geom.Point2D, java.awt.geom.Point2D], p1_p2[0], p1_p2[1]
246
241
  end
247
242
  end
248
243
 
249
244
  def collapse_oriented_rulings(lines)
250
245
  # lines must all be of one orientation (i.e. horizontal, vertical)
246
+
247
+ if lines.empty?
248
+ return []
249
+ end
250
+
251
251
  lines.sort! {|a, b| a.position != b.position ? a.position <=> b.position : a.start <=> b.start }
252
252
 
253
253
  lines = lines.inject([lines.shift]) do |memo, next_line|
@@ -262,7 +262,6 @@ module Tabula
262
262
  memo << next_line
263
263
  end
264
264
  end
265
- lines
266
265
  end
267
266
  end
268
267
 
@@ -1,9 +1,13 @@
1
1
  module Tabula
2
2
  # a counterpart of Table, to be sure.
3
3
  # not sure yet what their relationship ought to be.
4
+
5
+ # the both should implement `cells`, `rows`, `cols`, `extraction_method`
6
+
4
7
  class Spreadsheet < ZoneEntity
5
8
  include Tabula::HasCells
6
9
  attr_accessor :cells, :vertical_ruling_lines, :horizontal_ruling_lines, :cells_resolved
10
+ attr_reader :extraction_method, :page
7
11
 
8
12
  def initialize(top, left, width, height, page, cells, vertical_ruling_lines, horizontal_ruling_lines) #, lines)
9
13
  super(top, left, width, height)
@@ -11,6 +15,7 @@ module Tabula
11
15
  @page = page
12
16
  @vertical_ruling_lines = vertical_ruling_lines
13
17
  @horizontal_ruling_lines = horizontal_ruling_lines
18
+ @extraction_method = "spreadsheet"
14
19
  end
15
20
 
16
21
  def ruling_lines
@@ -88,5 +93,18 @@ module Tabula
88
93
  Tabula::Writers.TSV(rows, out)
89
94
  out.string
90
95
  end
96
+
97
+ def to_json(*a)
98
+ {
99
+ 'json_class' => self.class.name,
100
+ 'extraction_method' => @extraction_method,
101
+ 'data' => rows,
102
+ }.to_json(*a)
103
+ end
104
+
105
+ def +(other)
106
+ raise ArgumentError unless other.page == @page
107
+ Spreadsheet.new(nil, nil, nil, nil, @page, @cells + other.cells, nil, nil )
108
+ end
91
109
  end
92
110
  end
@@ -1,9 +1,11 @@
1
1
  module Tabula
2
2
  class Table
3
- attr_reader :lines
3
+ attr_reader :extraction_method
4
+ attr_accessor :lines
4
5
  def initialize(line_count, separators)
5
6
  @separators = separators
6
7
  @lines = (0...line_count).inject([]) { |m| m << Line.new }
8
+ @extraction_method = "original"
7
9
  end
8
10
 
9
11
  def add_text_element(text_element, i, j)
@@ -28,22 +30,27 @@ module Tabula
28
30
  end
29
31
 
30
32
  def cols
31
- self.rpad!
32
- lines.map(&:text_elements).transpose
33
+ rows.transpose
33
34
  end
34
35
 
35
36
  def rows
36
37
  self.rpad!
37
- lines.map(&:text_elements)
38
+ lines.map do |l|
39
+ l.text_elements.map! do |te|
40
+ te || TextElement.new(nil, nil, nil, nil, nil, nil, '', nil)
41
+ end
42
+ end.sort_by { |l| l.map { |te| te.top || 0 }.max }
38
43
  end
39
44
 
40
45
  # create a new Table object from an array of arrays, representing a list of rows in a spreadsheet
41
46
  # probably only used for testing
42
47
  def self.new_from_array(array_of_rows)
43
48
  t = Table.new(array_of_rows.size, [])
49
+ @extraction_method = "testing"
44
50
  array_of_rows.each_with_index do |row, index|
45
- t.lines[index].text_elements = row.map{|cell| TextElement.new(nil, nil, nil, nil, nil, nil, cell, nil)}
51
+ t.lines[index].text_elements = row.each_with_index.map{|cell, inner_index| TextElement.new(index, inner_index, 1, 1, nil, nil, cell, nil)}
46
52
  end
53
+ t.rpad!
47
54
  t
48
55
  end
49
56
 
@@ -77,5 +84,13 @@ module Tabula
77
84
  self.lines.zip(other.lines).all? { |my, yours| my == yours }
78
85
 
79
86
  end
87
+
88
+ def to_json(*a)
89
+ {
90
+ 'json_class' => self.class.name,
91
+ 'extraction_method' => @extraction_method,
92
+ 'data' => rows,
93
+ }.to_json(*a)
94
+ end
80
95
  end
81
96
  end
@@ -1,3 +1,4 @@
1
+ # -*- coding: utf-8 -*-
1
2
  java_import org.apache.pdfbox.pdfparser.PDFParser
2
3
  java_import org.apache.pdfbox.util.TextPosition
3
4
  java_import org.apache.pdfbox.pdmodel.PDDocument
@@ -198,7 +199,12 @@ module Tabula
198
199
 
199
200
  def processTextPosition(text)
200
201
  c = text.getCharacter
201
- h = c == ' ' ? text.getWidthDirAdj.round(2) : text.getHeightDir.round(2)
202
+ h = text.getHeightDir.round(2)
203
+
204
+ if c == ' ' || c == ' ' # replace non-breaking space for space
205
+ c = ' '
206
+ h = text.getWidthDirAdj.round(2)
207
+ end
202
208
 
203
209
  te = Tabula::TextElement.new(text.getYDirAdj.round(2) - h,
204
210
  text.getXDirAdj.round(2),
@@ -28,7 +28,8 @@ module Tabula
28
28
  options = {
29
29
  :password => '',
30
30
  :detect_ruling_lines => true,
31
- :vertical_rulings => []
31
+ :vertical_rulings => [],
32
+ :extraction_method => "guess",
32
33
  }.merge(options)
33
34
 
34
35
  if area.instance_of?(Array)
@@ -41,32 +42,51 @@ module Tabula
41
42
  page = [page]
42
43
  end
43
44
 
44
- page_obj = Extraction::ObjectExtractor.new(pdf_path,
45
+ pdf_page = Extraction::ObjectExtractor.new(pdf_path,
45
46
  page,
46
47
  options[:password]) \
47
48
  .extract.next
48
49
 
49
- use_detected_lines = false
50
- if options[:detect_ruling_lines] && options[:vertical_rulings].empty?
51
- detected_vertical_rulings = Ruling.crop_rulings_to_area(page_obj.vertical_ruling_lines,
52
- area)
50
+ if ["spreadsheet", "original"].include? options[:extraction_method]
51
+ use_spreadsheet_extraction_method = options[:extraction_method] == "spreadsheet"
52
+ else
53
+ use_spreadsheet_extraction_method = pdf_page.is_tabular?
54
+ end
53
55
 
54
- # only use lines if at least 80% of them cover at least 90%
55
- # of the height of area of interest
56
+ if use_spreadsheet_extraction_method
57
+ table = pdf_page.get_area(area).spreadsheets.inject(&:+)
58
+ else
59
+ use_detected_lines = false
60
+ if options[:detect_ruling_lines] && options[:vertical_rulings].empty?
61
+ detected_vertical_rulings = Ruling.crop_rulings_to_area(pdf_page.vertical_ruling_lines,
62
+ area)
56
63
 
57
- # TODO this heuristic SUCKS
58
- # what if only a couple columns is delimited with vertical rulings?
59
- # ie: https://www.dropbox.com/s/lpydler5c3pn408/S2MNCEbirdisland.pdf (see 7th column)
60
- # idea: detect columns without considering rulings, detect vertical rulings
61
- # calculate ratio and try to come up with a threshold
62
- use_detected_lines = detected_vertical_rulings.size > 2 \
63
- && (detected_vertical_rulings.count { |vl|
64
- vl.height / area.height > 0.9
65
- } / detected_vertical_rulings.size.to_f) >= 0.8
64
+ # only use lines if at least 80% of them cover at least 90%
65
+ # of the height of area of interest
66
66
 
67
- end
67
+ # TODO this heuristic SUCKS
68
+ # what if only a couple columns is delimited with vertical rulings?
69
+ # ie: https://www.dropbox.com/s/lpydler5c3pn408/S2MNCEbirdisland.pdf (see 7th column)
70
+ # idea: detect columns without considering rulings, detect vertical rulings
71
+ # calculate ratio and try to come up with a threshold
72
+ use_detected_lines = detected_vertical_rulings.size > 2 \
73
+ && (detected_vertical_rulings.count { |vl|
74
+ vl.height / area.height > 0.9
75
+ } / detected_vertical_rulings.size.to_f) >= 0.8
68
76
 
69
- page_obj.get_area(area).make_table(:vertical_rulings => use_detected_lines ? detected_vertical_rulings : options[:vertical_rulings])
77
+ end
70
78
 
79
+ table = pdf_page.get_area(area).get_table(:vertical_rulings => use_detected_lines ? detected_vertical_rulings : options[:vertical_rulings])
80
+
81
+ # fixes up the table a little bit, replacing nils with empty TextElements
82
+ # and sorting the lines.
83
+ table.lines.each do |l|
84
+ l.text_elements = l.text_elements.map do |te|
85
+ te || TextElement.new(nil, nil, nil, nil, nil, nil, '', nil)
86
+ end
87
+ end
88
+ table.lines.sort_by! { |l| l.text_elements.map { |te| te.top or 0 }.max }
89
+ table
90
+ end
71
91
  end
72
92
  end
@@ -1,3 +1,3 @@
1
1
  module Tabula
2
- VERSION = '0.7.0'
2
+ VERSION = '0.7.1'
3
3
  end
data/test/tests.rb CHANGED
@@ -4,10 +4,14 @@ require 'minitest/autorun'
4
4
 
5
5
  require_relative '../lib/tabula'
6
6
 
7
+ def table_to_array(table)
8
+ lines_to_array(table.rows)
9
+ end
10
+
7
11
  def lines_to_array(lines)
8
- lines.map { |l|
12
+ lines.map do |l|
9
13
  l.map { |te| te.text.strip }
10
- }
14
+ end
11
15
  end
12
16
 
13
17
  def lines_to_table(lines)
@@ -15,7 +19,7 @@ def lines_to_table(lines)
15
19
  end
16
20
 
17
21
 
18
- # I don't want to pollute the "real" clasend a funny inspect method. Just for testing comparisons.
22
+ # I don't want to pollute the "real" class with a funny inspect method. Just for testing comparisons.
19
23
  module Tabula
20
24
  class Table
21
25
  def inspect
@@ -27,7 +31,7 @@ end
27
31
  module Tabula
28
32
  class Line
29
33
  def inspect
30
- @text_elements.map(&:text).inspect
34
+ @text_elements.map{|te| te.nil? ? '' : te.text}.inspect
31
35
  end
32
36
  end
33
37
  end
@@ -173,7 +177,7 @@ end
173
177
  class TestExtractor < Minitest::Test
174
178
 
175
179
  def test_table_extraction_1
176
- table = lines_to_array Tabula.extract_table(File.expand_path('data/gre.pdf', File.dirname(__FILE__)),
180
+ table = table_to_array Tabula.extract_table(File.expand_path('data/gre.pdf', File.dirname(__FILE__)),
177
181
  1,
178
182
  [107.1, 57.9214, 394.5214, 290.7],
179
183
  :detect_ruling_lines => false)
@@ -184,7 +188,7 @@ class TestExtractor < Minitest::Test
184
188
  end
185
189
 
186
190
  def test_diputados_voting_record
187
- table = lines_to_array Tabula.extract_table(File.expand_path('data/argentina_diputados_voting_record.pdf', File.dirname(__FILE__)),
191
+ table = table_to_array Tabula.extract_table(File.expand_path('data/argentina_diputados_voting_record.pdf', File.dirname(__FILE__)),
188
192
  1,
189
193
  [269.875, 12.75, 790.5, 561])
190
194
 
@@ -199,14 +203,13 @@ class TestExtractor < Minitest::Test
199
203
  # and a solution for half-x-height-offset lines.
200
204
  pdf_file_path = File.expand_path('data/frx_2012_disclosure.pdf', File.dirname(__FILE__))
201
205
 
202
- table = lines_to_table Tabula.extract_table(pdf_file_path,
206
+ table = Tabula.extract_table(pdf_file_path,
203
207
  1,
204
208
  [106.01, 48.09, 227.31, 551.89],
205
209
  :detect_ruling_lines => true)
206
210
 
207
211
  expected = Tabula::Table.new_from_array([["AANONSEN, DEBORAH, A", "", "STATEN ISLAND, NY", "MEALS", "$85.00"], ["TOTAL", "", "", "", "$85.00"], ["AARON, CAREN, T", "", "RICHMOND, VA", "EDUCATIONAL ITEMS", "$78.80"], ["AARON, CAREN, T", "", "RICHMOND, VA", "MEALS", "$392.45"], ["TOTAL", "", "", "", "$471.25"], ["AARON, JOHN", "", "CLARKSVILLE, TN", "MEALS", "$20.39"], ["TOTAL", "", "", "", "$20.39"], ["AARON, JOSHUA, N", "", "WEST GROVE, PA", "MEALS", "$310.33"], ["", "REGIONAL PULMONARY & SLEEP"], ["AARON, JOSHUA, N", "", "WEST GROVE, PA", "SPEAKING FEES", "$4,700.00"], ["", "MEDICINE"], ["TOTAL", "", "", "", "$5,010.33"], ["AARON, MAUREEN, M", "", "MARTINSVILLE, VA", "MEALS", "$193.67"], ["TOTAL", "", "", "", "$193.67"], ["AARON, MICHAEL, L", "", "WEST ISLIP, NY", "MEALS", "$19.50"], ["TOTAL", "", "", "", "$19.50"], ["AARON, MICHAEL, R", "", "BROOKLYN, NY", "MEALS", "$65.92"]])
208
212
 
209
-
210
213
  assert_equal expected, table
211
214
  end
212
215
 
@@ -259,7 +262,7 @@ class TestExtractor < Minitest::Test
259
262
 
260
263
  # TODO Spaces inserted in words - fails
261
264
  def test_bo_page24
262
- table = lines_to_array Tabula.extract_table(File.expand_path('data/bo_page24.pdf', File.dirname(__FILE__)),
265
+ table = table_to_array Tabula.extract_table(File.expand_path('data/bo_page24.pdf', File.dirname(__FILE__)),
263
266
  1,
264
267
  [425.625, 53.125, 575.714, 810.535],
265
268
  :detect_ruling_lines => false)
@@ -312,7 +315,7 @@ class TestExtractor < Minitest::Test
312
315
 
313
316
  vertical_rulings = [47,147,256,310,375,431,504].map{|n| Tabula::Ruling.new(0, n, 0, 1000)}
314
317
 
315
- table = lines_to_array Tabula.extract_table(File.expand_path('data/campaign_donors.pdf', File.dirname(__FILE__)),
318
+ table = table_to_array Tabula.extract_table(File.expand_path('data/campaign_donors.pdf', File.dirname(__FILE__)),
316
319
  1,
317
320
  [255.57,40.43,398.76,557.35],
318
321
  :vertical_rulings => vertical_rulings)
@@ -321,12 +324,12 @@ class TestExtractor < Minitest::Test
321
324
  end
322
325
 
323
326
  def test_get_spacing_and_merging_right
324
- table = lines_to_array Tabula.extract_table(File.expand_path('data/strongschools.pdf', File.dirname(__FILE__)),
327
+ table = table_to_array Tabula.extract_table(File.expand_path('data/strongschools.pdf', File.dirname(__FILE__)),
325
328
  1,
326
329
  [52.32857142857143,15.557142857142859,128.70000000000002,767.9571428571429],
327
330
  :detect_ruling_lines => true)
328
331
 
329
- expected = [["Last Name", "First Name", "Address", "City", "State", "Zip", "Occupation", "Employer", "Date", "Amount"], ["Lidstad", "Dick & Peg", "62 Mississippi River Blvd N", "Saint Paul", "MN", "55104", "retired", "", "10/12/2012", "60.00"], ["Strom", "Pam", "1229 Hague Ave", "St. Paul", "MN", "55104", "", "", "9/12/2012", "60.00"], ["Seeba", "Louise & Paul", "1399 Sheldon St", "Saint Paul", "MN", "55108", "BOE", "City of Saint Paul", "10/12/2012", "60.00"], ["Schumacher / Bales", "Douglas L. / Patricia ", "948 County Rd. D W", "Saint Paul", "MN", "55126", "", "", "10/13/2012", "60.00"], ["Abrams", "Marjorie", "238 8th St east", "St Paul", "MN", "55101", "Retired", "Retired", "8/8/2012", "75.00"], ["Crouse / Schroeder", "Abigail / Jonathan", "1545 Branston St.", "Saint Paul", "MN", "55108", "", "", "10/6/2012", "75.00"]]
332
+ expected = [["Last Name", "First Name", "Address", "City", "State", "Zip", "Occupation", "Employer", "Date", "Amount"], ["Lidstad", "Dick & Peg", "62 Mississippi River Blvd N", "Saint Paul", "MN", "55104", "retired", "", "10/12/2012", "60.00"], ["Strom", "Pam", "1229 Hague Ave", "St. Paul", "MN", "55104", "", "", "9/12/2012", "60.00"], ["Seeba", "Louise & Paul", "1399 Sheldon St", "Saint Paul", "MN", "55108", "BOE", "City of Saint Paul", "10/12/2012", "60.00"], ["Schumacher / Bales", "Douglas L. / Patricia", "948 County Rd. D W", "Saint Paul", "MN", "55126", "", "", "10/13/2012", "60.00"], ["Abrams", "Marjorie", "238 8th St east", "St Paul", "MN", "55101", "Retired", "Retired", "8/8/2012", "75.00"], ["Crouse / Schroeder", "Abigail / Jonathan", "1545 Branston St.", "Saint Paul", "MN", "55108", "", "", "10/6/2012", "75.00"]]
330
333
 
331
334
  assert_equal expected, table
332
335
 
@@ -539,7 +542,7 @@ class TestIsTabularHeuristic < Minitest::Test
539
542
  extractor = Tabula::Extraction::ObjectExtractor.new(path, [1])
540
543
  page = extractor.extract.first
541
544
  page.get_ruling_lines!
542
- assert page.is_tabular?
545
+ assert page.is_tabular?, "failed on file #{f}"
543
546
  end
544
547
  end
545
548
 
@@ -549,11 +552,8 @@ class TestIsTabularHeuristic < Minitest::Test
549
552
  extractor = Tabula::Extraction::ObjectExtractor.new(path, [1])
550
553
  page = extractor.extract.first
551
554
  page.get_ruling_lines!
552
- assert !page.is_tabular?
555
+ assert !page.is_tabular?, "failed on file #{f}"
553
556
  end
554
557
  end
555
558
 
556
-
557
-
558
-
559
559
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tabula-extractor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.0
4
+ version: 0.7.1
5
5
  platform: java
6
6
  authors:
7
7
  - Manuel Aristarán
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2014-01-07 00:00:00.000000000 Z
13
+ date: 2014-01-18 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: minitest
@@ -162,6 +162,7 @@ files:
162
162
  - test/heuristic-test-set/original/S2MNCEbirdisland.pdf
163
163
  - test/heuristic-test-set/original/bo_page24.pdf
164
164
  - test/heuristic-test-set/original/campaign_donors.pdf
165
+ - test/heuristic-test-set/original/cs076pct.pdf
165
166
  - test/heuristic-test-set/spreadsheet/47008204D_USA.page4.pdf
166
167
  - test/heuristic-test-set/spreadsheet/GSK_2012_Q4.page437.pdf
167
168
  - test/heuristic-test-set/spreadsheet/strongschools.pdf
@@ -189,7 +190,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
189
190
  version: '0'
190
191
  requirements: []
191
192
  rubyforge_project:
192
- rubygems_version: 2.1.9
193
+ rubygems_version: 2.2.1
193
194
  signing_key:
194
195
  specification_version: 4
195
196
  summary: extract tables from PDF files
@@ -217,6 +218,7 @@ test_files:
217
218
  - test/heuristic-test-set/original/S2MNCEbirdisland.pdf
218
219
  - test/heuristic-test-set/original/bo_page24.pdf
219
220
  - test/heuristic-test-set/original/campaign_donors.pdf
221
+ - test/heuristic-test-set/original/cs076pct.pdf
220
222
  - test/heuristic-test-set/spreadsheet/47008204D_USA.page4.pdf
221
223
  - test/heuristic-test-set/spreadsheet/GSK_2012_Q4.page437.pdf
222
224
  - test/heuristic-test-set/spreadsheet/strongschools.pdf