tabula-extractor 0.7.0-java → 0.7.1-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/tabula +5 -1
- data/lib/tabula/entities/cell.rb +11 -0
- data/lib/tabula/entities/has_cells.rb +0 -1
- data/lib/tabula/entities/page.rb +7 -8
- data/lib/tabula/entities/spreadsheet.rb +18 -0
- data/lib/tabula/entities/table.rb +20 -5
- data/lib/tabula/extraction.rb +7 -1
- data/lib/tabula/table_extractor.rb +39 -19
- data/lib/tabula/version.rb +1 -1
- data/test/heuristic-test-set/original/cs076pct.pdf +0 -0
- data/test/tests.rb +17 -17
- metadata +5 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fea5751c9b78705fbfda50be1e29011d40249b04
|
4
|
+
data.tar.gz: 2d7bb156a073636467b34e44b3f8b6a364920354
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d8d1f1932397b599b3d724bb4e8ca5fd73f516e198a391625031ab83f8e5f0a2743d51dcc02953ee5a53c0664330c8e8e1f5309f72493e9f2e232068b06085a1
|
7
|
+
data.tar.gz: e292d6320db6a42799418b6651f63a88446f85c6787659597e77b9f7c38e698b69a10c78edbdeea49baedc0d92f77602b847aef90ab1f8f2ab10356493dbb397
|
data/bin/tabula
CHANGED
@@ -6,6 +6,10 @@ require_relative '../lib/tabula'
|
|
6
6
|
FORMATS = ['CSV', 'TSV', 'HTML', 'JSON']
|
7
7
|
|
8
8
|
def parse_pages_arg(pages_arg)
|
9
|
+
if(pages_arg == 'all')
|
10
|
+
return :all
|
11
|
+
end
|
12
|
+
|
9
13
|
ranges = pages_arg.split(',').map(&:strip)
|
10
14
|
pages = []
|
11
15
|
ranges.each do |range|
|
@@ -32,7 +36,7 @@ Usage:
|
|
32
36
|
where [options] are:
|
33
37
|
EOS
|
34
38
|
|
35
|
-
opt :pages, 'Comma separated list of ranges. Examples: --pages 1-3,5-7 or --pages
|
39
|
+
opt :pages, 'Comma separated list of ranges, or all. Examples: --pages 1-3,5-7, --pages 3 or --pages all. Default is --pages 1', :default => '1', :type => String
|
36
40
|
opt :area, 'Portion of the page to analyze (top,left,bottom,right). Example: --area 269.875,12.75,790.5,561. Default is entire page', :type => String, :default => nil
|
37
41
|
opt :columns, 'X coordinates of column boundaries. Example --columns 10.1,20.2,30.3', :default => nil, :type => String
|
38
42
|
opt :password, 'Password to decrypt document. Default is empty', :default => ''
|
data/lib/tabula/entities/cell.rb
CHANGED
data/lib/tabula/entities/page.rb
CHANGED
@@ -83,11 +83,7 @@ module Tabula
|
|
83
83
|
|
84
84
|
#for API backwards-compatibility reasons, this returns an array of arrays.
|
85
85
|
def make_table(options={})
|
86
|
-
get_table(options).
|
87
|
-
l.text_elements.map! do |te|
|
88
|
-
te || TextElement.new(nil, nil, nil, nil, nil, nil, '', nil)
|
89
|
-
end
|
90
|
-
end.sort_by { |l| l.map { |te| te.top or 0 }.max }
|
86
|
+
get_table(options).rows
|
91
87
|
end
|
92
88
|
|
93
89
|
# returns the Spreadsheets; creating them if they're not memoized
|
@@ -241,13 +237,17 @@ module Tabula
|
|
241
237
|
end
|
242
238
|
|
243
239
|
lines_to_points.each do |l, p1_p2|
|
244
|
-
l.java_send :setLine, [java.awt.geom.Point2D, java.awt.geom.Point2D], p1_p2[0],
|
245
|
-
p1_p2[1]
|
240
|
+
l.java_send :setLine, [java.awt.geom.Point2D, java.awt.geom.Point2D], p1_p2[0], p1_p2[1]
|
246
241
|
end
|
247
242
|
end
|
248
243
|
|
249
244
|
def collapse_oriented_rulings(lines)
|
250
245
|
# lines must all be of one orientation (i.e. horizontal, vertical)
|
246
|
+
|
247
|
+
if lines.empty?
|
248
|
+
return []
|
249
|
+
end
|
250
|
+
|
251
251
|
lines.sort! {|a, b| a.position != b.position ? a.position <=> b.position : a.start <=> b.start }
|
252
252
|
|
253
253
|
lines = lines.inject([lines.shift]) do |memo, next_line|
|
@@ -262,7 +262,6 @@ module Tabula
|
|
262
262
|
memo << next_line
|
263
263
|
end
|
264
264
|
end
|
265
|
-
lines
|
266
265
|
end
|
267
266
|
end
|
268
267
|
|
@@ -1,9 +1,13 @@
|
|
1
1
|
module Tabula
|
2
2
|
# a counterpart of Table, to be sure.
|
3
3
|
# not sure yet what their relationship ought to be.
|
4
|
+
|
5
|
+
# the both should implement `cells`, `rows`, `cols`, `extraction_method`
|
6
|
+
|
4
7
|
class Spreadsheet < ZoneEntity
|
5
8
|
include Tabula::HasCells
|
6
9
|
attr_accessor :cells, :vertical_ruling_lines, :horizontal_ruling_lines, :cells_resolved
|
10
|
+
attr_reader :extraction_method, :page
|
7
11
|
|
8
12
|
def initialize(top, left, width, height, page, cells, vertical_ruling_lines, horizontal_ruling_lines) #, lines)
|
9
13
|
super(top, left, width, height)
|
@@ -11,6 +15,7 @@ module Tabula
|
|
11
15
|
@page = page
|
12
16
|
@vertical_ruling_lines = vertical_ruling_lines
|
13
17
|
@horizontal_ruling_lines = horizontal_ruling_lines
|
18
|
+
@extraction_method = "spreadsheet"
|
14
19
|
end
|
15
20
|
|
16
21
|
def ruling_lines
|
@@ -88,5 +93,18 @@ module Tabula
|
|
88
93
|
Tabula::Writers.TSV(rows, out)
|
89
94
|
out.string
|
90
95
|
end
|
96
|
+
|
97
|
+
def to_json(*a)
|
98
|
+
{
|
99
|
+
'json_class' => self.class.name,
|
100
|
+
'extraction_method' => @extraction_method,
|
101
|
+
'data' => rows,
|
102
|
+
}.to_json(*a)
|
103
|
+
end
|
104
|
+
|
105
|
+
def +(other)
|
106
|
+
raise ArgumentError unless other.page == @page
|
107
|
+
Spreadsheet.new(nil, nil, nil, nil, @page, @cells + other.cells, nil, nil )
|
108
|
+
end
|
91
109
|
end
|
92
110
|
end
|
@@ -1,9 +1,11 @@
|
|
1
1
|
module Tabula
|
2
2
|
class Table
|
3
|
-
attr_reader :
|
3
|
+
attr_reader :extraction_method
|
4
|
+
attr_accessor :lines
|
4
5
|
def initialize(line_count, separators)
|
5
6
|
@separators = separators
|
6
7
|
@lines = (0...line_count).inject([]) { |m| m << Line.new }
|
8
|
+
@extraction_method = "original"
|
7
9
|
end
|
8
10
|
|
9
11
|
def add_text_element(text_element, i, j)
|
@@ -28,22 +30,27 @@ module Tabula
|
|
28
30
|
end
|
29
31
|
|
30
32
|
def cols
|
31
|
-
|
32
|
-
lines.map(&:text_elements).transpose
|
33
|
+
rows.transpose
|
33
34
|
end
|
34
35
|
|
35
36
|
def rows
|
36
37
|
self.rpad!
|
37
|
-
lines.map
|
38
|
+
lines.map do |l|
|
39
|
+
l.text_elements.map! do |te|
|
40
|
+
te || TextElement.new(nil, nil, nil, nil, nil, nil, '', nil)
|
41
|
+
end
|
42
|
+
end.sort_by { |l| l.map { |te| te.top || 0 }.max }
|
38
43
|
end
|
39
44
|
|
40
45
|
# create a new Table object from an array of arrays, representing a list of rows in a spreadsheet
|
41
46
|
# probably only used for testing
|
42
47
|
def self.new_from_array(array_of_rows)
|
43
48
|
t = Table.new(array_of_rows.size, [])
|
49
|
+
@extraction_method = "testing"
|
44
50
|
array_of_rows.each_with_index do |row, index|
|
45
|
-
t.lines[index].text_elements = row.map{|cell| TextElement.new(
|
51
|
+
t.lines[index].text_elements = row.each_with_index.map{|cell, inner_index| TextElement.new(index, inner_index, 1, 1, nil, nil, cell, nil)}
|
46
52
|
end
|
53
|
+
t.rpad!
|
47
54
|
t
|
48
55
|
end
|
49
56
|
|
@@ -77,5 +84,13 @@ module Tabula
|
|
77
84
|
self.lines.zip(other.lines).all? { |my, yours| my == yours }
|
78
85
|
|
79
86
|
end
|
87
|
+
|
88
|
+
def to_json(*a)
|
89
|
+
{
|
90
|
+
'json_class' => self.class.name,
|
91
|
+
'extraction_method' => @extraction_method,
|
92
|
+
'data' => rows,
|
93
|
+
}.to_json(*a)
|
94
|
+
end
|
80
95
|
end
|
81
96
|
end
|
data/lib/tabula/extraction.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
1
2
|
java_import org.apache.pdfbox.pdfparser.PDFParser
|
2
3
|
java_import org.apache.pdfbox.util.TextPosition
|
3
4
|
java_import org.apache.pdfbox.pdmodel.PDDocument
|
@@ -198,7 +199,12 @@ module Tabula
|
|
198
199
|
|
199
200
|
def processTextPosition(text)
|
200
201
|
c = text.getCharacter
|
201
|
-
h =
|
202
|
+
h = text.getHeightDir.round(2)
|
203
|
+
|
204
|
+
if c == ' ' || c == ' ' # replace non-breaking space for space
|
205
|
+
c = ' '
|
206
|
+
h = text.getWidthDirAdj.round(2)
|
207
|
+
end
|
202
208
|
|
203
209
|
te = Tabula::TextElement.new(text.getYDirAdj.round(2) - h,
|
204
210
|
text.getXDirAdj.round(2),
|
@@ -28,7 +28,8 @@ module Tabula
|
|
28
28
|
options = {
|
29
29
|
:password => '',
|
30
30
|
:detect_ruling_lines => true,
|
31
|
-
:vertical_rulings => []
|
31
|
+
:vertical_rulings => [],
|
32
|
+
:extraction_method => "guess",
|
32
33
|
}.merge(options)
|
33
34
|
|
34
35
|
if area.instance_of?(Array)
|
@@ -41,32 +42,51 @@ module Tabula
|
|
41
42
|
page = [page]
|
42
43
|
end
|
43
44
|
|
44
|
-
|
45
|
+
pdf_page = Extraction::ObjectExtractor.new(pdf_path,
|
45
46
|
page,
|
46
47
|
options[:password]) \
|
47
48
|
.extract.next
|
48
49
|
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
50
|
+
if ["spreadsheet", "original"].include? options[:extraction_method]
|
51
|
+
use_spreadsheet_extraction_method = options[:extraction_method] == "spreadsheet"
|
52
|
+
else
|
53
|
+
use_spreadsheet_extraction_method = pdf_page.is_tabular?
|
54
|
+
end
|
53
55
|
|
54
|
-
|
55
|
-
|
56
|
+
if use_spreadsheet_extraction_method
|
57
|
+
table = pdf_page.get_area(area).spreadsheets.inject(&:+)
|
58
|
+
else
|
59
|
+
use_detected_lines = false
|
60
|
+
if options[:detect_ruling_lines] && options[:vertical_rulings].empty?
|
61
|
+
detected_vertical_rulings = Ruling.crop_rulings_to_area(pdf_page.vertical_ruling_lines,
|
62
|
+
area)
|
56
63
|
|
57
|
-
|
58
|
-
|
59
|
-
# ie: https://www.dropbox.com/s/lpydler5c3pn408/S2MNCEbirdisland.pdf (see 7th column)
|
60
|
-
# idea: detect columns without considering rulings, detect vertical rulings
|
61
|
-
# calculate ratio and try to come up with a threshold
|
62
|
-
use_detected_lines = detected_vertical_rulings.size > 2 \
|
63
|
-
&& (detected_vertical_rulings.count { |vl|
|
64
|
-
vl.height / area.height > 0.9
|
65
|
-
} / detected_vertical_rulings.size.to_f) >= 0.8
|
64
|
+
# only use lines if at least 80% of them cover at least 90%
|
65
|
+
# of the height of area of interest
|
66
66
|
|
67
|
-
|
67
|
+
# TODO this heuristic SUCKS
|
68
|
+
# what if only a couple columns is delimited with vertical rulings?
|
69
|
+
# ie: https://www.dropbox.com/s/lpydler5c3pn408/S2MNCEbirdisland.pdf (see 7th column)
|
70
|
+
# idea: detect columns without considering rulings, detect vertical rulings
|
71
|
+
# calculate ratio and try to come up with a threshold
|
72
|
+
use_detected_lines = detected_vertical_rulings.size > 2 \
|
73
|
+
&& (detected_vertical_rulings.count { |vl|
|
74
|
+
vl.height / area.height > 0.9
|
75
|
+
} / detected_vertical_rulings.size.to_f) >= 0.8
|
68
76
|
|
69
|
-
|
77
|
+
end
|
70
78
|
|
79
|
+
table = pdf_page.get_area(area).get_table(:vertical_rulings => use_detected_lines ? detected_vertical_rulings : options[:vertical_rulings])
|
80
|
+
|
81
|
+
# fixes up the table a little bit, replacing nils with empty TextElements
|
82
|
+
# and sorting the lines.
|
83
|
+
table.lines.each do |l|
|
84
|
+
l.text_elements = l.text_elements.map do |te|
|
85
|
+
te || TextElement.new(nil, nil, nil, nil, nil, nil, '', nil)
|
86
|
+
end
|
87
|
+
end
|
88
|
+
table.lines.sort_by! { |l| l.text_elements.map { |te| te.top or 0 }.max }
|
89
|
+
table
|
90
|
+
end
|
71
91
|
end
|
72
92
|
end
|
data/lib/tabula/version.rb
CHANGED
Binary file
|
data/test/tests.rb
CHANGED
@@ -4,10 +4,14 @@ require 'minitest/autorun'
|
|
4
4
|
|
5
5
|
require_relative '../lib/tabula'
|
6
6
|
|
7
|
+
def table_to_array(table)
|
8
|
+
lines_to_array(table.rows)
|
9
|
+
end
|
10
|
+
|
7
11
|
def lines_to_array(lines)
|
8
|
-
lines.map
|
12
|
+
lines.map do |l|
|
9
13
|
l.map { |te| te.text.strip }
|
10
|
-
|
14
|
+
end
|
11
15
|
end
|
12
16
|
|
13
17
|
def lines_to_table(lines)
|
@@ -15,7 +19,7 @@ def lines_to_table(lines)
|
|
15
19
|
end
|
16
20
|
|
17
21
|
|
18
|
-
# I don't want to pollute the "real"
|
22
|
+
# I don't want to pollute the "real" class with a funny inspect method. Just for testing comparisons.
|
19
23
|
module Tabula
|
20
24
|
class Table
|
21
25
|
def inspect
|
@@ -27,7 +31,7 @@ end
|
|
27
31
|
module Tabula
|
28
32
|
class Line
|
29
33
|
def inspect
|
30
|
-
@text_elements.map
|
34
|
+
@text_elements.map{|te| te.nil? ? '' : te.text}.inspect
|
31
35
|
end
|
32
36
|
end
|
33
37
|
end
|
@@ -173,7 +177,7 @@ end
|
|
173
177
|
class TestExtractor < Minitest::Test
|
174
178
|
|
175
179
|
def test_table_extraction_1
|
176
|
-
table =
|
180
|
+
table = table_to_array Tabula.extract_table(File.expand_path('data/gre.pdf', File.dirname(__FILE__)),
|
177
181
|
1,
|
178
182
|
[107.1, 57.9214, 394.5214, 290.7],
|
179
183
|
:detect_ruling_lines => false)
|
@@ -184,7 +188,7 @@ class TestExtractor < Minitest::Test
|
|
184
188
|
end
|
185
189
|
|
186
190
|
def test_diputados_voting_record
|
187
|
-
table =
|
191
|
+
table = table_to_array Tabula.extract_table(File.expand_path('data/argentina_diputados_voting_record.pdf', File.dirname(__FILE__)),
|
188
192
|
1,
|
189
193
|
[269.875, 12.75, 790.5, 561])
|
190
194
|
|
@@ -199,14 +203,13 @@ class TestExtractor < Minitest::Test
|
|
199
203
|
# and a solution for half-x-height-offset lines.
|
200
204
|
pdf_file_path = File.expand_path('data/frx_2012_disclosure.pdf', File.dirname(__FILE__))
|
201
205
|
|
202
|
-
table =
|
206
|
+
table = Tabula.extract_table(pdf_file_path,
|
203
207
|
1,
|
204
208
|
[106.01, 48.09, 227.31, 551.89],
|
205
209
|
:detect_ruling_lines => true)
|
206
210
|
|
207
211
|
expected = Tabula::Table.new_from_array([["AANONSEN, DEBORAH, A", "", "STATEN ISLAND, NY", "MEALS", "$85.00"], ["TOTAL", "", "", "", "$85.00"], ["AARON, CAREN, T", "", "RICHMOND, VA", "EDUCATIONAL ITEMS", "$78.80"], ["AARON, CAREN, T", "", "RICHMOND, VA", "MEALS", "$392.45"], ["TOTAL", "", "", "", "$471.25"], ["AARON, JOHN", "", "CLARKSVILLE, TN", "MEALS", "$20.39"], ["TOTAL", "", "", "", "$20.39"], ["AARON, JOSHUA, N", "", "WEST GROVE, PA", "MEALS", "$310.33"], ["", "REGIONAL PULMONARY & SLEEP"], ["AARON, JOSHUA, N", "", "WEST GROVE, PA", "SPEAKING FEES", "$4,700.00"], ["", "MEDICINE"], ["TOTAL", "", "", "", "$5,010.33"], ["AARON, MAUREEN, M", "", "MARTINSVILLE, VA", "MEALS", "$193.67"], ["TOTAL", "", "", "", "$193.67"], ["AARON, MICHAEL, L", "", "WEST ISLIP, NY", "MEALS", "$19.50"], ["TOTAL", "", "", "", "$19.50"], ["AARON, MICHAEL, R", "", "BROOKLYN, NY", "MEALS", "$65.92"]])
|
208
212
|
|
209
|
-
|
210
213
|
assert_equal expected, table
|
211
214
|
end
|
212
215
|
|
@@ -259,7 +262,7 @@ class TestExtractor < Minitest::Test
|
|
259
262
|
|
260
263
|
# TODO Spaces inserted in words - fails
|
261
264
|
def test_bo_page24
|
262
|
-
table =
|
265
|
+
table = table_to_array Tabula.extract_table(File.expand_path('data/bo_page24.pdf', File.dirname(__FILE__)),
|
263
266
|
1,
|
264
267
|
[425.625, 53.125, 575.714, 810.535],
|
265
268
|
:detect_ruling_lines => false)
|
@@ -312,7 +315,7 @@ class TestExtractor < Minitest::Test
|
|
312
315
|
|
313
316
|
vertical_rulings = [47,147,256,310,375,431,504].map{|n| Tabula::Ruling.new(0, n, 0, 1000)}
|
314
317
|
|
315
|
-
table =
|
318
|
+
table = table_to_array Tabula.extract_table(File.expand_path('data/campaign_donors.pdf', File.dirname(__FILE__)),
|
316
319
|
1,
|
317
320
|
[255.57,40.43,398.76,557.35],
|
318
321
|
:vertical_rulings => vertical_rulings)
|
@@ -321,12 +324,12 @@ class TestExtractor < Minitest::Test
|
|
321
324
|
end
|
322
325
|
|
323
326
|
def test_get_spacing_and_merging_right
|
324
|
-
table =
|
327
|
+
table = table_to_array Tabula.extract_table(File.expand_path('data/strongschools.pdf', File.dirname(__FILE__)),
|
325
328
|
1,
|
326
329
|
[52.32857142857143,15.557142857142859,128.70000000000002,767.9571428571429],
|
327
330
|
:detect_ruling_lines => true)
|
328
331
|
|
329
|
-
expected = [["Last Name", "First Name", "Address", "City", "State", "Zip", "Occupation", "Employer", "Date", "Amount"], ["Lidstad", "Dick
|
332
|
+
expected = [["Last Name", "First Name", "Address", "City", "State", "Zip", "Occupation", "Employer", "Date", "Amount"], ["Lidstad", "Dick & Peg", "62 Mississippi River Blvd N", "Saint Paul", "MN", "55104", "retired", "", "10/12/2012", "60.00"], ["Strom", "Pam", "1229 Hague Ave", "St. Paul", "MN", "55104", "", "", "9/12/2012", "60.00"], ["Seeba", "Louise & Paul", "1399 Sheldon St", "Saint Paul", "MN", "55108", "BOE", "City of Saint Paul", "10/12/2012", "60.00"], ["Schumacher / Bales", "Douglas L. / Patricia", "948 County Rd. D W", "Saint Paul", "MN", "55126", "", "", "10/13/2012", "60.00"], ["Abrams", "Marjorie", "238 8th St east", "St Paul", "MN", "55101", "Retired", "Retired", "8/8/2012", "75.00"], ["Crouse / Schroeder", "Abigail / Jonathan", "1545 Branston St.", "Saint Paul", "MN", "55108", "", "", "10/6/2012", "75.00"]]
|
330
333
|
|
331
334
|
assert_equal expected, table
|
332
335
|
|
@@ -539,7 +542,7 @@ class TestIsTabularHeuristic < Minitest::Test
|
|
539
542
|
extractor = Tabula::Extraction::ObjectExtractor.new(path, [1])
|
540
543
|
page = extractor.extract.first
|
541
544
|
page.get_ruling_lines!
|
542
|
-
assert page.is_tabular
|
545
|
+
assert page.is_tabular?, "failed on file #{f}"
|
543
546
|
end
|
544
547
|
end
|
545
548
|
|
@@ -549,11 +552,8 @@ class TestIsTabularHeuristic < Minitest::Test
|
|
549
552
|
extractor = Tabula::Extraction::ObjectExtractor.new(path, [1])
|
550
553
|
page = extractor.extract.first
|
551
554
|
page.get_ruling_lines!
|
552
|
-
assert !page.is_tabular
|
555
|
+
assert !page.is_tabular?, "failed on file #{f}"
|
553
556
|
end
|
554
557
|
end
|
555
558
|
|
556
|
-
|
557
|
-
|
558
|
-
|
559
559
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tabula-extractor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.7.
|
4
|
+
version: 0.7.1
|
5
5
|
platform: java
|
6
6
|
authors:
|
7
7
|
- Manuel Aristarán
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2014-01-
|
13
|
+
date: 2014-01-18 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: minitest
|
@@ -162,6 +162,7 @@ files:
|
|
162
162
|
- test/heuristic-test-set/original/S2MNCEbirdisland.pdf
|
163
163
|
- test/heuristic-test-set/original/bo_page24.pdf
|
164
164
|
- test/heuristic-test-set/original/campaign_donors.pdf
|
165
|
+
- test/heuristic-test-set/original/cs076pct.pdf
|
165
166
|
- test/heuristic-test-set/spreadsheet/47008204D_USA.page4.pdf
|
166
167
|
- test/heuristic-test-set/spreadsheet/GSK_2012_Q4.page437.pdf
|
167
168
|
- test/heuristic-test-set/spreadsheet/strongschools.pdf
|
@@ -189,7 +190,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
189
190
|
version: '0'
|
190
191
|
requirements: []
|
191
192
|
rubyforge_project:
|
192
|
-
rubygems_version: 2.1
|
193
|
+
rubygems_version: 2.2.1
|
193
194
|
signing_key:
|
194
195
|
specification_version: 4
|
195
196
|
summary: extract tables from PDF files
|
@@ -217,6 +218,7 @@ test_files:
|
|
217
218
|
- test/heuristic-test-set/original/S2MNCEbirdisland.pdf
|
218
219
|
- test/heuristic-test-set/original/bo_page24.pdf
|
219
220
|
- test/heuristic-test-set/original/campaign_donors.pdf
|
221
|
+
- test/heuristic-test-set/original/cs076pct.pdf
|
220
222
|
- test/heuristic-test-set/spreadsheet/47008204D_USA.page4.pdf
|
221
223
|
- test/heuristic-test-set/spreadsheet/GSK_2012_Q4.page437.pdf
|
222
224
|
- test/heuristic-test-set/spreadsheet/strongschools.pdf
|