tabula-extractor 0.6.6-java → 0.7.0-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. checksums.yaml +7 -0
  2. data/AUTHORS.md +1 -0
  3. data/README.md +27 -11
  4. data/bin/tabula +61 -19
  5. data/ext/liblsd-linux32.so +0 -0
  6. data/ext/liblsd-linux64.so +0 -0
  7. data/ext/liblsd.dll +0 -0
  8. data/ext/liblsd.dylib +0 -0
  9. data/ext/liblsd64.dll +0 -0
  10. data/ext/lsd.c +137 -137
  11. data/ext/lsd.h +9 -9
  12. data/lib/tabula.rb +20 -3
  13. data/lib/tabula/core_ext.rb +261 -0
  14. data/lib/tabula/entities.rb +11 -456
  15. data/lib/tabula/entities/cell.rb +42 -0
  16. data/lib/tabula/entities/has_cells.rb +244 -0
  17. data/lib/tabula/entities/line.rb +39 -0
  18. data/lib/tabula/entities/page.rb +269 -0
  19. data/lib/tabula/entities/page_area.rb +7 -0
  20. data/lib/tabula/entities/ruling.rb +300 -0
  21. data/lib/tabula/entities/spreadsheet.rb +92 -0
  22. data/lib/tabula/entities/table.rb +81 -0
  23. data/lib/tabula/entities/text_chunk.rb +114 -0
  24. data/lib/tabula/entities/text_element.rb +112 -0
  25. data/lib/tabula/entities/zone_entity.rb +57 -0
  26. data/lib/tabula/extraction.rb +327 -0
  27. data/lib/tabula/line_segment_detector.rb +9 -7
  28. data/lib/tabula/pdf_line_extractor.rb +319 -0
  29. data/lib/tabula/pdf_render.rb +1 -5
  30. data/lib/tabula/spreadsheet_extractor.rb +52 -0
  31. data/lib/tabula/table_extractor.rb +50 -348
  32. data/lib/tabula/table_guesser.rb +21 -23
  33. data/lib/tabula/version.rb +1 -1
  34. data/lib/tabula/writers.rb +5 -6
  35. data/tabula-extractor.gemspec +1 -0
  36. data/target/pdfbox-app-2.0.0-SNAPSHOT.jar +0 -0
  37. data/test/data/47008204D_USA.page4.pdf +0 -0
  38. data/test/data/560015757GV_China.page1.pdf +0 -0
  39. data/test/data/GSK_2012_Q4.page437.pdf +0 -0
  40. data/test/data/S2MNCEbirdisland.pdf +0 -0
  41. data/test/data/campaign_donors.pdf +0 -0
  42. data/test/data/frx_2012_disclosure.tsv +88 -0
  43. data/test/data/no_tables.pdf +0 -0
  44. data/test/data/puertos1.pdf +0 -0
  45. data/test/data/spanning_cells.csv +21 -0
  46. data/test/data/spanning_cells.pdf +0 -0
  47. data/test/data/strongschools.pdf +0 -0
  48. data/{vertical_rulings_bug.pdf → test/data/vertical_rulings_bug.pdf} +0 -0
  49. data/test/data/vietnam3.pdf +0 -0
  50. data/test/heuristic-test-set/original/560015757GV_China.page1.pdf +0 -0
  51. data/test/heuristic-test-set/original/S2MNCEbirdisland.pdf +0 -0
  52. data/test/heuristic-test-set/original/bo_page24.pdf +0 -0
  53. data/test/heuristic-test-set/original/campaign_donors.pdf +0 -0
  54. data/test/heuristic-test-set/spreadsheet/47008204D_USA.page4.pdf +0 -0
  55. data/test/heuristic-test-set/spreadsheet/GSK_2012_Q4.page437.pdf +0 -0
  56. data/test/heuristic-test-set/spreadsheet/strongschools.pdf +0 -0
  57. data/test/heuristic-test-set/spreadsheet/tabla_subsidios.pdf +0 -0
  58. data/test/heuristic.rb +50 -0
  59. data/test/test_bin_tabula.sh +7 -0
  60. data/test/tests.rb +476 -63
  61. metadata +79 -28
  62. data/lib/geom/point.rb +0 -21
  63. data/lib/geom/rectangle.rb +0 -101
  64. data/lib/geom/segment.rb +0 -82
  65. data/lib/tabula/pdf_dump.rb +0 -132
  66. data/lib/tabula/whitespace.rb +0 -50
  67. data/vertical_rulings_bug.rb +0 -29
@@ -1,370 +1,72 @@
1
- require 'csv'
2
-
3
1
  module Tabula
4
- class TableExtractor
5
- attr_accessor :text_elements, :options
6
-
7
- DEFAULT_OPTIONS = {
8
- :horizontal_rulings => [],
9
- :vertical_rulings => [],
10
- :merge_words => true,
11
- :split_multiline_cells => false
12
- }
13
-
14
- def initialize(text_elements, options = {})
15
- self.text_elements = text_elements
16
- self.options = DEFAULT_OPTIONS.merge(options)
17
-
18
- if self.options[:merge_words]
19
- if self.options[:vertical_rulings]
20
- merge_words_in_a_vertical_rulings_aware_manner!(self.options[:vertical_rulings])
21
- else
22
- merge_words!
23
- end
24
- end
25
-
26
- end
27
-
28
- def get_rows
29
- hg = self.get_line_boundaries
30
- hg.sort_by(&:top).map { |r| {'top' => r.top, 'bottom' => r.bottom, 'text' => r.texts} }
31
- end
32
-
33
- # TODO finish writing this method
34
- # it should be analogous to get_line_boundaries
35
- # (ie, take into account vertical ruling lines if available)
36
- def group_by_columns
37
- columns = []
38
- tes = self.text_elements.sort_by &:left
39
-
40
- # we don't have vertical rulings
41
- if self.options[:vertical_rulings].empty?
42
- tes.each do |te|
43
- if column = columns.detect { |c| te.horizontally_overlaps?(c) }
44
- column << te
45
- else
46
- columns << Column.new(te.left, te.width, [te])
47
- end
48
- end
49
- else
50
- self.options[:vertical_rulings].sort_by! &:left
51
- 1.upto(self.options[:vertical_rulings].size - 1) do |i|
52
- left_ruling_line = self.options[:vertical_rulings][i - 1]
53
- right_ruling_line = self.options[:vertical_rulings][i]
54
- columns << Column.new(left_ruling_line.left, right_ruling_line.left - left_ruling_line.left, []) if (right_ruling_line.left - left_ruling_line.left > 10)
55
- end
56
- tes.each do |te|
57
- if column = columns.detect { |c| te.horizontally_overlaps?(c) }
58
- column << te
59
- else
60
- #puts "couldn't find a place for #{te.inspect}"
61
- #columns << Column.new(te.left, te.width, [te])
62
- end
63
- end
64
- end
65
- columns
66
- end
67
-
68
- def get_columns
69
- TableExtractor.new(text_elements).group_by_columns.map do |c|
70
- {'left' => c.left, 'right' => c.right, 'width' => c.width}
71
- end
72
- end
73
-
74
- def get_line_boundaries
75
- boundaries = []
76
-
77
- if self.options[:horizontal_rulings].empty?
78
- # we don't have rulings
79
- # iteratively grow boundaries to construct lines
80
- self.text_elements.each do |te|
81
- row = boundaries.detect { |l| l.vertically_overlaps?(te) }
82
- ze = ZoneEntity.new(te.top, te.left, te.width, te.height)
83
- if row.nil?
84
- boundaries << ze
85
- ze.texts << te.text
86
- else
87
- row.merge!(ze)
88
- row.texts << te.text
89
- end
90
- end
91
- else
92
- self.options[:horizontal_rulings].sort_by!(&:top)
93
- 1.upto(self.options[:horizontal_rulings].size - 1) do |i|
94
- above = self.options[:horizontal_rulings][i - 1]
95
- below = self.options[:horizontal_rulings][i]
96
-
97
- # construct zone between a horizontal ruling and the next
98
- ze = ZoneEntity.new(above.top,
99
- [above.left, below.left].min,
100
- [above.width, below.width].max,
101
- below.top - above.top)
102
-
103
- # skip areas shorter than some threshold
104
- # TODO: this should be the height of the shortest character, or something like that
105
- next if ze.height < 2
106
-
107
- boundaries << ze
108
- end
109
- end
110
- boundaries
111
- end
112
-
113
- private
114
-
115
- #this is where spaces come from!
116
- def merge_words!
117
- return self.text_elements if @merged # only merge once. awful hack.
118
- @merged = true
119
- current_word_index = i = 0
120
- char1 = self.text_elements[i]
121
-
122
- while i < self.text_elements.size-1 do
123
-
124
- char2 = self.text_elements[i+1]
125
-
126
- next if char2.nil? or char1.nil?
127
-
128
- if self.text_elements[current_word_index].should_merge?(char2)
129
- self.text_elements[current_word_index].merge!(char2)
130
- char1 = char2
131
- self.text_elements[i+1] = nil
132
- else
133
- # is there a space? is this within `CHARACTER_DISTANCE_THRESHOLD` points of previous char?
134
- if (char1.text != " ") and (char2.text != " ") and self.text_elements[current_word_index].should_add_space?(char2)
135
- self.text_elements[current_word_index].text += " "
136
- #self.text_elements[current_word_index].width += self.text_elements[current_word_index].width_of_space
137
- end
138
- current_word_index = i+1
139
- end
140
- i += 1
141
- end
142
- self.text_elements.compact!
143
- return self.text_elements
144
- end
145
-
146
- #this is where spaces come from!
147
- def merge_words_in_a_vertical_rulings_aware_manner!(vertical_rulings)
148
- #don't merge words across a ruling.
149
-
150
- return self.text_elements if @merged # only merge once. awful hack.
151
- @merged = true
152
- current_word_index = i = 0
153
- char1 = self.text_elements[i]
154
- vertical_ruling_locations = vertical_rulings.map &:left
155
-
156
- while i < self.text_elements.size-1 do
157
-
158
- char2 = self.text_elements[i+1]
159
-
160
- next if char2.nil? or char1.nil?
161
-
162
- if self.text_elements[current_word_index].should_merge?(char2)
163
- unless vertical_ruling_locations.map{|loc| self.text_elements[current_word_index].left < loc && char2.left > loc}.include?(true)
164
- self.text_elements[current_word_index].merge!(char2)
165
- end
166
-
167
- char1 = char2
168
- self.text_elements[i+1] = nil
169
- else
170
- # is there a space? is this within `CHARACTER_DISTANCE_THRESHOLD` points of previous char?
171
- if (char1.text != " ") and (char2.text != " ") and self.text_elements[current_word_index].should_add_space?(char2)
172
- self.text_elements[current_word_index].text += " "
173
- #self.text_elements[current_word_index].width += self.text_elements[current_word_index].width_of_space
174
- end
175
- current_word_index = i+1
176
- end
177
- i += 1
178
- end
179
- self.text_elements.compact!
180
- return self.text_elements
181
- end
182
- end
183
2
 
184
- ##
185
- # Deprecated.
186
- ##
187
- def Tabula.group_by_columns(text_elements, merge_words=false)
188
- TableExtractor.new(text_elements, :merge_words => merge_words).group_by_columns
3
+ def Tabula.merge_words(text_elements, options={})
4
+ warn 'Tabula.merge_words is DEPRECATED. Use Tabula::TextElement.merge_words instead'
5
+ TextElement.merge_words(text_elements, options)
189
6
  end
190
7
 
191
- ##
192
- # Deprecated.
193
- ##
194
- def Tabula.get_line_boundaries(text_elements)
195
- TableExtractor.new(text_elements).get_line_boundaries
196
- end
197
-
198
- ##
199
- # Deprecated.
200
- ##
201
- def Tabula.get_columns(text_elements, merge_words=true)
202
- TableExtractor.new(text_elements, :merge_words => merge_words).get_columns
203
- end
204
-
205
- ##
206
- # Deprecated.
207
- ##
208
- def Tabula.get_rows(text_elements, merge_words=true)
209
- TableExtractor.new(text_elements, :merge_words => merge_words).get_rows
210
- end
211
-
212
- def Tabula.lines_to_csv(lines)
213
- CSV.generate do |csv|
214
- lines.each do |l|
215
- csv << l.map { |c| c.text.strip }
216
- end
217
- end
218
- end
219
-
220
- ONLY_SPACES_RE = Regexp.new('^\s+$')
221
-
222
- def Tabula.group_by_lines(text_elements)
223
- lines = []
224
- text_elements.each do |te|
225
- next if te.text =~ ONLY_SPACES_RE
226
- l = lines.find { |line| line.horizontal_overlap_ratio(te) >= 0.01 }
227
- if l.nil?
228
- l = Line.new
229
- lines << l
230
- end
231
- l << te
232
- end
233
- lines
8
+ def Tabula.group_by_lines(text_chunks)
9
+ warn 'Tabula.group_by_lines is DEPRECATED. Use Tabula::TextChunk.group_by_lines instead.'
10
+ TextChunk.group_by_lines(text_chunks)
234
11
  end
235
12
 
236
13
  # Returns an array of Tabula::Line
237
- def Tabula.make_table(text_elements, options={})
238
- default_options = {:separators => []}
239
- options = default_options.merge(options)
240
-
241
- if text_elements.empty?
242
- return []
243
- end
244
-
245
- extractor = TableExtractor.new(text_elements, options).text_elements
246
- lines = group_by_lines(text_elements)
247
- top = lines[0].text_elements.map(&:top).min
248
- right = 0
249
- columns = []
250
-
251
- text_elements.sort_by(&:left).each do |te|
252
- next if te.text =~ ONLY_SPACES_RE
253
- if te.top >= top
254
- left = te.left
255
- if (left > right)
256
- columns << right
257
- right = te.right
258
- elsif te.right > right
259
- right = te.right
260
- end
261
- end
262
- end
263
-
264
- separators = columns[1..-1].sort.reverse
265
-
266
- table = Table.new(lines.count, separators)
267
- lines.each_with_index do |line, i|
268
- line.text_elements.each do |te|
269
- j = separators.find_index { |s| te.left > s } || separators.count
270
- table.add_text_element(te, i, separators.count - j)
271
- end
272
- end
273
-
274
- table.lines.map { |l|
275
- l.text_elements.map! { |te|
276
- te.nil? ? TextElement.new(nil, nil, nil, nil, nil, nil, '', nil) : te
277
- }
278
- }.sort_by { |l| l.map { |te| te.top or 0 }.max }
279
-
14
+ def Tabula.make_table(page, area, options={})
15
+ warn 'Tabula.make_table is DEPRECATED. Use Tabula::Page#make_table instead.'
16
+ page.get_area(area).make_table(options)
280
17
  end
281
18
 
19
+ # extract a table from file +pdf_path+, +pages+ and +area+
20
+ #
21
+ # +pages+ can be a single integer (1-based) or an array of integers
22
+ #
23
+ # ==== Options
24
+ # +:password+ - Password if encrypted PDF (default: empty)
25
+ # +:detect_ruling_lines+ - Try to detect vertical (default: true)
26
+ # +:vertical_rulings+ - List of positions for vertical rulings. Overrides +:detect_ruling_lines+. (default: [])
27
+ def Tabula.extract_table(pdf_path, page, area, options={})
28
+ options = {
29
+ :password => '',
30
+ :detect_ruling_lines => true,
31
+ :vertical_rulings => []
32
+ }.merge(options)
282
33
 
283
- def Tabula.make_table_with_vertical_rulings(text_elements, options={})
284
- extractor = TableExtractor.new(text_elements, options)
285
-
286
- # group by lines
287
- lines = []
288
- line_boundaries = extractor.get_line_boundaries
289
-
290
- # find all the text elements
291
- # contained within each detected line (table row) boundary
292
- line_boundaries.each do |lb|
293
- line = Line.new
294
-
295
- line_members = text_elements.find_all do |te|
296
- te.vertically_overlaps?(lb)
297
- end
298
-
299
- text_elements -= line_members
300
-
301
- line_members.sort_by(&:left).each do |te|
302
- # skip text_elements that only contain spaces
303
- next if te.text =~ ONLY_SPACES_RE
304
- line << te
305
- end
306
-
307
- lines << line if line.text_elements.size > 0
34
+ if area.instance_of?(Array)
35
+ top, left, bottom, right = area
36
+ area = Tabula::ZoneEntity.new(top, left,
37
+ right - left, bottom - top)
308
38
  end
309
39
 
310
- lines.sort_by!(&:top)
311
-
312
- vertical_rulings = options[:vertical_rulings]
313
- columns = TableExtractor.new(lines.map(&:text_elements).flatten.compact.uniq, {:merge_words => options[:merge_words], :vertical_rulings => vertical_rulings}).group_by_columns.sort_by(&:left)
314
-
315
- # insert an empty cell in a given column if there's no text elements within that column's boundaries
316
- lines.each_with_index do |l, line_index|
317
- next if l.text_elements.nil?
318
- l.text_elements.compact! # TODO WHY do I have to do this?
319
- l.text_elements.uniq! # TODO WHY do I have to do this?
320
- l.text_elements.sort_by!(&:left)
321
-
322
- columns.each_with_index do |c, i|
323
- if (l.text_elements.select{|te| te && te.left >= c.left && te.right <= (c.left + c.width)}.empty?)
324
- l.text_elements.insert(i, TextElement.new(l.top, c.left, c.width, l.height, nil, 0, '', 0))
325
- end
326
- end
40
+ if page.is_a?(Integer)
41
+ page = [page]
327
42
  end
328
43
 
329
- # merge elements that are in the same column
330
- unless options[:dontmerge]
331
- lines.each_with_index do |l, line_index|
332
- next if l.text_elements.nil?
44
+ page_obj = Extraction::ObjectExtractor.new(pdf_path,
45
+ page,
46
+ options[:password]) \
47
+ .extract.next
333
48
 
334
- (0..l.text_elements.size-1).to_a.combination(2).each do |t1, t2| #don't remove a string of empty cells
335
- next if l.text_elements[t1].nil? or l.text_elements[t2].nil? or l.text_elements[t1].text.empty? or l.text_elements[t2].text.empty?
49
+ use_detected_lines = false
50
+ if options[:detect_ruling_lines] && options[:vertical_rulings].empty?
51
+ detected_vertical_rulings = Ruling.crop_rulings_to_area(page_obj.vertical_ruling_lines,
52
+ area)
336
53
 
337
- # if same column...
338
- if columns.detect { |c| c.text_elements.include? l.text_elements[t1] } \
339
- == columns.detect { |c| c.text_elements.include? l.text_elements[t2] }
340
- if l.text_elements[t1].bottom <= l.text_elements[t2].bottom
341
- l.text_elements[t1].merge!(l.text_elements[t2])
342
- l.text_elements[t2] = nil
343
- else
344
- l.text_elements[t2].merge!(l.text_elements[t1])
345
- l.text_elements[t1] = nil
346
- end
347
- end
348
- end
54
+ # only use lines if at least 80% of them cover at least 90%
55
+ # of the height of area of interest
349
56
 
350
- l.text_elements.compact!
351
- end
352
- end
57
+ # TODO this heuristic SUCKS
58
+ # what if only a couple columns is delimited with vertical rulings?
59
+ # ie: https://www.dropbox.com/s/lpydler5c3pn408/S2MNCEbirdisland.pdf (see 7th column)
60
+ # idea: detect columns without considering rulings, detect vertical rulings
61
+ # calculate ratio and try to come up with a threshold
62
+ use_detected_lines = detected_vertical_rulings.size > 2 \
63
+ && (detected_vertical_rulings.count { |vl|
64
+ vl.height / area.height > 0.9
65
+ } / detected_vertical_rulings.size.to_f) >= 0.8
353
66
 
354
- # remove duplicate lines
355
- # TODO this shouldn't have happened here, check why we have to do
356
- # this (maybe duplication is happening in the column merging phase?)
357
- (0..lines.size - 2).each do |i|
358
- next if lines[i].nil?
359
- # if any of the elements on the next line is duplicated, kill
360
- # the next line
361
- if (0..lines[i].text_elements.size-1).any? { |j| lines[i].text_elements[j] == lines[i+1].text_elements[j] }
362
- lines[i+1] = nil
363
- end
364
67
  end
365
68
 
366
- lines.compact.map do |line|
367
- line.text_elements.sort_by(&:left)
368
- end
69
+ page_obj.get_area(area).make_table(:vertical_rulings => use_detected_lines ? detected_vertical_rulings : options[:vertical_rulings])
70
+
369
71
  end
370
72
  end
@@ -1,11 +1,6 @@
1
- require 'java'
2
1
  require 'json'
3
- require_relative '../geom/point'
4
- require_relative '../geom/segment'
5
- require_relative '../geom/rectangle'
6
- require_relative './pdf_render'
7
- #CLASSPATH=:./target/javacpp.jar:./target/javacv.jar:./target/javacv-macosx-x86_64.jar:./target/PDFRenderer-0.9.1.jar
8
2
 
3
+ warn 'Tabula::TableGuesser is DEPRECATED and will be removed'
9
4
 
10
5
  module Tabula
11
6
  module TableGuesser
@@ -13,7 +8,7 @@ module Tabula
13
8
  def TableGuesser.find_and_write_rects(filename, output_dir)
14
9
  #writes to JSON the rectangles on each page in the specified PDF.
15
10
  open(File.join(output_dir, "tables.json"), 'w') do |f|
16
- f.write( JSON.dump(find_rects(filename).map{|a| a.map{|r| r.dims.map &:to_i }} ))
11
+ f.write( JSON.dump(find_rects(filename).map{|a| a.map{|r| r.dims.map(&:to_i) }} ))
17
12
  end
18
13
  end
19
14
 
@@ -50,8 +45,8 @@ module Tabula
50
45
  lines
51
46
  end
52
47
 
53
- def TableGuesser.find_lines_on_page(pdf, page_index)
54
- Tabula::LSD.detect_lines_in_pdf_page(pdf, page_index)
48
+ def TableGuesser.find_lines_on_page(pdf, page_number_zero_indexed)
49
+ Tabula::Extraction::LineExtractor.lines_in_pdf_page(pdf, page_number_zero_indexed, {:render_pdf => false})
55
50
  end
56
51
 
57
52
  def TableGuesser.find_rects_on_page(pdf, page_index)
@@ -59,9 +54,11 @@ module Tabula
59
54
  end
60
55
 
61
56
  def TableGuesser.find_rects_from_lines(lines)
62
- horizontal_lines = lines.select &:horizontal?
63
- vertical_lines = lines.select &:vertical?
64
- find_tables(vertical_lines, horizontal_lines).inject([]){|memo, next_rect| Geometry::Rectangle.unionize(memo, next_rect )}.sort_by(&:area).reverse
57
+ horizontal_lines = lines.select(&:horizontal?)
58
+ vertical_lines = lines.select(&:vertical?)
59
+ find_tables(vertical_lines, horizontal_lines).inject([]) do |memo, next_rect|
60
+ java.awt.geom.Rectangle2D::Float.unionize( memo, next_rect )
61
+ end.compact.reject{|r| r.area == 0 }.sort_by(&:area).reverse
65
62
  end
66
63
 
67
64
 
@@ -75,14 +72,14 @@ module Tabula
75
72
  end
76
73
 
77
74
  def TableGuesser.find_tables(verticals, horizontals)
78
- # /*
79
- # * Find all the rectangles in the vertical and horizontal lines given.
80
- # *
81
- # * Rectangles are deduped with hashRectangle, which considers two rectangles identical if each point rounds to the same tens place as the other.
82
- # *
83
- # * TODO: generalize this.
84
- # */
85
- corner_proximity_threshold = 0.10;
75
+ #
76
+ # Find all the rectangles in the vertical and horizontal lines given.
77
+ #
78
+ # Rectangles are deduped with hashRectangle, which considers two rectangles identical if each point rounds to the same tens place as the other.
79
+ #
80
+ # TODO: generalize this.
81
+ #
82
+ corner_proximity_threshold = 0.005;
86
83
 
87
84
  rectangles = []
88
85
  #find rectangles with one horizontal line and two vertical lines that end within $threshold to the ends of the horizontal line.
@@ -137,9 +134,10 @@ module Tabula
137
134
  #in case we eventually tolerate not-quite-vertical lines, this computers the distance in Y directly, rather than depending on the vertical lines' lengths.
138
135
  height = [left_vertical_line.bottom - left_vertical_line.top, right_vertical_line.bottom - right_vertical_line.top].max
139
136
 
140
- y = [left_vertical_line.top, right_vertical_line.top].min
137
+ top = [left_vertical_line.top, right_vertical_line.top].min
141
138
  width = horizontal_line.right - horizontal_line.left
142
- r = Geometry::Rectangle.new_by_x_y_dims(horizontal_line.left, y, width, height ) #x, y, w, h
139
+ left = horizontal_line.left
140
+ r = java.awt.geom.Rectangle2D::Float.new( left, top, width, height ) #x, y, w, h
143
141
  #rectangles.put(hashRectangle(r), r); #TODO: I dont' think I need this now that I'm in Rubyland
144
142
  rectangles << r
145
143
  end
@@ -187,7 +185,7 @@ module Tabula
187
185
  y = vertical_line.top
188
186
  width = [top_horizontal_line.right - top_horizontal_line.left, bottom_horizontal_line.right - bottom_horizontal_line.right].max
189
187
  height = vertical_line.bottom - vertical_line.top
190
- r = Geometry::Rectangle.new_by_x_y_dims(x, y, width, height); #x, y, w, h
188
+ r = java.awt.geom.Rectangle2D::Float.new( x, y, width, height ) #x, y, w, h
191
189
  #rectangles.put(hashRectangle(r), r);
192
190
  rectangles << r
193
191
  end