tabula-extractor 0.7.2-java → 0.7.4-java
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/README.md +4 -8
- data/bin/tabula +3 -3
- data/lib/tabula.rb +9 -5
- data/lib/tabula/entities.rb +1 -0
- data/lib/tabula/entities/cell.rb +6 -4
- data/lib/tabula/entities/has_cells.rb +22 -78
- data/lib/tabula/entities/line.rb +52 -6
- data/lib/tabula/entities/page.rb +43 -50
- data/lib/tabula/entities/ruling.rb +83 -105
- data/lib/tabula/entities/spreadsheet.rb +74 -11
- data/lib/tabula/entities/table.rb +55 -37
- data/lib/tabula/entities/tabular.rb +42 -0
- data/lib/tabula/entities/text_chunk.rb +55 -52
- data/lib/tabula/entities/text_element.rb +129 -62
- data/lib/tabula/entities/zone_entity.rb +15 -6
- data/lib/tabula/extraction.rb +114 -49
- data/lib/tabula/line_segment_detector.rb +0 -5
- data/lib/tabula/table_extractor.rb +32 -37
- data/lib/tabula/version.rb +1 -1
- data/tabula-extractor.gemspec +2 -5
- metadata +13 -95
- data/ext/COPYING +0 -661
- data/ext/Makefile.OSX +0 -18
- data/ext/Makefile.defaults +0 -9
- data/ext/Makefile.linux32 +0 -11
- data/ext/Makefile.linux64 +0 -12
- data/ext/Makefile.mingw +0 -10
- data/ext/Makefile.mingw64 +0 -10
- data/ext/liblsd-linux32.so +0 -0
- data/ext/liblsd-linux64.so +0 -0
- data/ext/liblsd.def +0 -3
- data/ext/liblsd.dll +0 -0
- data/ext/liblsd.dylib +0 -0
- data/ext/liblsd64.dll +0 -0
- data/ext/lsd.c +0 -2270
- data/ext/lsd.h +0 -283
- data/test/data/47008204D_USA.page4.pdf +0 -0
- data/test/data/560015757GV_China.page1.pdf +0 -0
- data/test/data/ClinicalResearchDisclosureReport2012Q2.pdf +0 -0
- data/test/data/GSK_2012_Q4.page437.pdf +0 -0
- data/test/data/S2MNCEbirdisland.pdf +0 -0
- data/test/data/argentina_diputados_voting_record.pdf +0 -0
- data/test/data/bo_page24.pdf +0 -0
- data/test/data/campaign_donors.pdf +0 -0
- data/test/data/frx_2012_disclosure.pdf +0 -0
- data/test/data/frx_2012_disclosure.tsv +0 -88
- data/test/data/gre.pdf +0 -0
- data/test/data/no_tables.pdf +0 -0
- data/test/data/nyc_2013fiscalreporttables.pdf +0 -0
- data/test/data/puertos1.pdf +0 -0
- data/test/data/spanning_cells.csv +0 -21
- data/test/data/spanning_cells.pdf +0 -0
- data/test/data/strongschools.pdf +0 -0
- data/test/data/sydney_disclosure_contract.pdf +0 -0
- data/test/data/tabla_subsidios.pdf +0 -0
- data/test/data/vertical_rulings_bug.pdf +0 -0
- data/test/data/vietnam3.pdf +0 -0
- data/test/data/wc2012.pdf +0 -0
- data/test/heuristic-test-set/original/560015757GV_China.page1.pdf +0 -0
- data/test/heuristic-test-set/original/S2MNCEbirdisland.pdf +0 -0
- data/test/heuristic-test-set/original/bo_page24.pdf +0 -0
- data/test/heuristic-test-set/original/campaign_donors.pdf +0 -0
- data/test/heuristic-test-set/original/cs076pct.pdf +0 -0
- data/test/heuristic-test-set/spreadsheet/47008204D_USA.page4.pdf +0 -0
- data/test/heuristic-test-set/spreadsheet/GSK_2012_Q4.page437.pdf +0 -0
- data/test/heuristic-test-set/spreadsheet/strongschools.pdf +0 -0
- data/test/heuristic-test-set/spreadsheet/tabla_subsidios.pdf +0 -0
- data/test/heuristic.rb +0 -50
- data/test/test_bin_tabula.sh +0 -7
- data/test/tests.rb +0 -603
@@ -0,0 +1,42 @@
|
|
1
|
+
module Tabula
|
2
|
+
module AbstractInterface
|
3
|
+
|
4
|
+
class InterfaceNotImplementedError < NoMethodError
|
5
|
+
end
|
6
|
+
|
7
|
+
def self.included(klass)
|
8
|
+
klass.send(:include, AbstractInterface::Methods)
|
9
|
+
klass.send(:extend, AbstractInterface::Methods)
|
10
|
+
end
|
11
|
+
|
12
|
+
module Methods
|
13
|
+
def api_not_implemented(klass)
|
14
|
+
caller.first.match(/in \`(.+)\'/)
|
15
|
+
method_name = $1
|
16
|
+
raise AbstractInterface::InterfaceNotImplementedError.new("#{klass.class.name} needs to implement '#{method_name}' for interface #{self.name}!")
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
|
22
|
+
module Tabular
|
23
|
+
include AbstractInterface
|
24
|
+
# this is a pseudo-interface as described here:
|
25
|
+
# http://metabates.com/2011/02/07/building-interfaces-and-abstract-classes-in-ruby/
|
26
|
+
# Table and Spreadsheet implement this interface, so should any class
|
27
|
+
# intended to represent tabular data from a PDF, e.g. if another extraction
|
28
|
+
# method were created, so that Tabula GUI and API can correctly handle
|
29
|
+
# its data.
|
30
|
+
|
31
|
+
def extraction_method; raise Tabular.api_not_implemented(self); end
|
32
|
+
|
33
|
+
def page; Tabular.api_not_implemented(self); end
|
34
|
+
def rows; Tabular.api_not_implemented(self); end
|
35
|
+
def cols; Tabular.api_not_implemented(self); end
|
36
|
+
|
37
|
+
def to_csv; Tabular.api_not_implemented(self); end
|
38
|
+
def to_tsv; Tabular.api_not_implemented(self); end
|
39
|
+
def to_a; Tabular.api_not_implemented(self); end
|
40
|
+
def to_json; Tabular.api_not_implemented(self); end
|
41
|
+
end
|
42
|
+
end
|
@@ -8,46 +8,71 @@ module Tabula
|
|
8
8
|
# initialize a new TextChunk from a TextElement
|
9
9
|
def self.create_from_text_element(text_element)
|
10
10
|
raise TypeError, "argument is not a TextElement" unless text_element.instance_of?(TextElement)
|
11
|
-
tc = self.new(text_element.
|
11
|
+
tc = self.new(*text_element.tlwh)
|
12
12
|
tc.text_elements = [text_element]
|
13
13
|
return tc
|
14
14
|
end
|
15
15
|
|
16
|
-
##
|
17
|
-
# group an iterable of TextChunk into a list of Line
|
18
16
|
def self.group_by_lines(text_chunks)
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
17
|
+
bbwidth = text_chunks.max_by(&:right).right - text_chunks.min_by(&:left).left
|
18
|
+
|
19
|
+
l = Line.new
|
20
|
+
l << text_chunks.first
|
21
|
+
|
22
|
+
lines = text_chunks[1..-1].inject([l]) do |lines, te|
|
23
|
+
if lines.last.horizontal_overlap_ratio(te) < 0.01
|
24
|
+
# skip lines such that:
|
25
|
+
# - are wider than the 90% of the width of the text_chunks bounding box
|
26
|
+
# - it contains a single repeated character
|
27
|
+
if lines.last.width / bbwidth > 0.9 \
|
28
|
+
&& l.text_elements.all? { |te| te.text =~ SAME_CHAR_RE }
|
29
|
+
lines.pop
|
30
|
+
end
|
31
|
+
lines << Line.new
|
26
32
|
end
|
27
|
-
|
33
|
+
lines.last << te
|
34
|
+
lines
|
28
35
|
end
|
29
|
-
|
36
|
+
|
37
|
+
if lines.last.width / bbwidth > 0.9 \
|
38
|
+
&& l.text_elements.all? { |te| te.text =~ SAME_CHAR_RE }
|
39
|
+
lines.pop
|
40
|
+
end
|
41
|
+
|
42
|
+
lines.map!(&:remove_sequential_spaces!)
|
30
43
|
end
|
31
44
|
|
32
45
|
##
|
33
|
-
#
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
46
|
+
# returns a list of column boundaries (x axis)
|
47
|
+
# +lines+ must be an array of lines sorted by their +top+ attribute
|
48
|
+
def self.column_positions(lines)
|
49
|
+
init = lines.first.text_elements.inject([]) { |memo, text_chunk|
|
50
|
+
next memo if text_chunk.text =~ ONLY_SPACES_RE
|
51
|
+
memo << Tabula::ZoneEntity.new(*text_chunk.tlwh)
|
52
|
+
memo
|
53
|
+
}
|
54
|
+
|
55
|
+
regions = lines[1..-1]
|
56
|
+
.inject(init) do |column_regions, line|
|
57
|
+
|
58
|
+
line_text_elements = line.text_elements.clone.select { |te| te.text !~ ONLY_SPACES_RE }
|
59
|
+
|
60
|
+
column_regions.each do |cr|
|
61
|
+
|
62
|
+
overlaps = line_text_elements
|
63
|
+
.select { |te| te.text !~ ONLY_SPACES_RE && cr.horizontally_overlaps?(te) }
|
64
|
+
|
65
|
+
overlaps.inject(cr) do |memo, te|
|
66
|
+
cr.merge!(te)
|
47
67
|
end
|
68
|
+
|
69
|
+
line_text_elements = line_text_elements - overlaps
|
48
70
|
end
|
71
|
+
|
72
|
+
column_regions += line_text_elements.map { |te| Tabula::ZoneEntity.new(*te.tlwh) }
|
49
73
|
end
|
50
|
-
|
74
|
+
|
75
|
+
regions.map { |r| r.right.round(2) }.uniq
|
51
76
|
end
|
52
77
|
|
53
78
|
##
|
@@ -59,10 +84,10 @@ module Tabula
|
|
59
84
|
|
60
85
|
def merge!(other)
|
61
86
|
if other.instance_of?(TextChunk)
|
62
|
-
if self
|
63
|
-
self.text_elements = other.text_elements + self.text_elements
|
64
|
-
else
|
87
|
+
if (self <=> other) < 0
|
65
88
|
self.text_elements = self.text_elements + other.text_elements
|
89
|
+
else
|
90
|
+
self.text_elements = other.text_elements + self.text_elements
|
66
91
|
end
|
67
92
|
end
|
68
93
|
super(other)
|
@@ -75,28 +100,6 @@ module Tabula
|
|
75
100
|
raise "Not Implemented"
|
76
101
|
end
|
77
102
|
|
78
|
-
##
|
79
|
-
# remove leading and trailing whitespace
|
80
|
-
# (changes geometry accordingly)
|
81
|
-
# TODO horrible implementation - fix.
|
82
|
-
def strip!
|
83
|
-
acc = 0
|
84
|
-
new_te = self.text_elements.drop_while { |te|
|
85
|
-
te.text == ' ' && acc += 1
|
86
|
-
}
|
87
|
-
self.left += self.text_elements.take(acc).inject(0) { |m, te| m += te.width }
|
88
|
-
self.text_elements = new_te
|
89
|
-
|
90
|
-
self.text_elements.reverse!
|
91
|
-
acc = 0
|
92
|
-
new_te = self.text_elements.drop_while { |te|
|
93
|
-
te.text == ' ' && acc += 1
|
94
|
-
}
|
95
|
-
self.right -= self.text_elements.take(acc).inject(0) { |m, te| m += te.width }
|
96
|
-
self.text_elements = new_te.reverse
|
97
|
-
self
|
98
|
-
end
|
99
|
-
|
100
103
|
def text
|
101
104
|
self.text_elements.map(&:text).join
|
102
105
|
end
|
@@ -1,4 +1,6 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
1
2
|
module Tabula
|
3
|
+
|
2
4
|
##
|
3
5
|
# a Glyph
|
4
6
|
class TextElement < ZoneEntity
|
@@ -17,8 +19,20 @@ module Tabula
|
|
17
19
|
|
18
20
|
EMPTY = TextElement.new(0, 0, 0, 0, nil, 0, '', 0)
|
19
21
|
|
22
|
+
def self.within(first, second, variance )
|
23
|
+
second < first + variance && second > first - variance
|
24
|
+
end
|
25
|
+
|
26
|
+
def self.overlap(y1, height1, y2, height2, variance=0.1)
|
27
|
+
within( y1, y2, variance) || (y2 <= y1 && y2 >= y1 - height1) \
|
28
|
+
|| (y1 <= y2 && y1 >= y2-height2)
|
29
|
+
end
|
30
|
+
|
31
|
+
|
20
32
|
##
|
21
33
|
# heuristically merge an iterable of TextElement into a list of TextChunk
|
34
|
+
# lots of ideas taken from PDFBox's PDFTextStripper.writePage
|
35
|
+
# here be dragons
|
22
36
|
def self.merge_words(text_elements, options={})
|
23
37
|
default_options = {:vertical_rulings => []}
|
24
38
|
options = default_options.merge(options)
|
@@ -28,74 +42,138 @@ module Tabula
|
|
28
42
|
|
29
43
|
text_chunks = [TextChunk.create_from_text_element(text_elements.shift)]
|
30
44
|
|
45
|
+
|
46
|
+
previousAveCharWidth = text_chunks.first.width
|
47
|
+
endOfLastTextX = text_chunks.first.right
|
48
|
+
maxYForLine = text_chunks.first.bottom
|
49
|
+
maxHeightForLine = text_chunks.first.height
|
50
|
+
minYTopForLine = text_chunks.first.top
|
51
|
+
lastWordSpacing = -1
|
52
|
+
sp = nil
|
53
|
+
|
31
54
|
text_elements.inject(text_chunks) do |chunks, char|
|
55
|
+
|
32
56
|
current_chunk = chunks.last
|
33
57
|
prev_char = current_chunk.text_elements.last
|
34
58
|
|
59
|
+
# Resets the average character width when we see a change in font
|
60
|
+
# or a change in the font size
|
61
|
+
if (char.font != prev_char.font) || (char.font_size != prev_char.font_size)
|
62
|
+
previousAveCharWidth = -1;
|
63
|
+
end
|
64
|
+
|
35
65
|
# if same char AND overlapped, skip
|
36
|
-
if prev_char.text == char.text && prev_char.overlaps_with_ratio?(char, 0.
|
37
|
-
chunks
|
66
|
+
if (prev_char.text == char.text) && prev_char.overlaps_with_ratio?(char, 0.5)
|
67
|
+
next chunks
|
68
|
+
end
|
69
|
+
|
70
|
+
# if char is a space that overlaps with the prev_char, skip
|
71
|
+
if char.text == ' ' && prev_char.left == char.left && prev_char.top == char.top
|
72
|
+
next chunks
|
73
|
+
end
|
74
|
+
|
75
|
+
# any vertical ruling goes across prev_char and char?
|
76
|
+
across_vertical_ruling = vertical_ruling_locations.any? { |loc|
|
77
|
+
prev_char.left < loc && char.left > loc
|
78
|
+
}
|
79
|
+
|
80
|
+
# Estimate the expected width of the space based on the
|
81
|
+
# space character with some margin.
|
82
|
+
wordSpacing = char.width_of_space
|
83
|
+
deltaSpace = 0
|
84
|
+
deltaSpace = if (wordSpacing.nan? || wordSpacing == 0)
|
85
|
+
::Float::MAX
|
86
|
+
elsif lastWordSpacing < 0
|
87
|
+
wordSpacing * 0.5 # 0.5 == spacingTolerance
|
88
|
+
else
|
89
|
+
((wordSpacing + lastWordSpacing) / 2.0) * 0.5
|
90
|
+
end
|
91
|
+
|
92
|
+
# Estimate the expected width of the space based on the
|
93
|
+
# average character width with some margin. This calculation does not
|
94
|
+
# make a true average (average of averages) but we found that it gave the
|
95
|
+
# best results after numerous experiments. Based on experiments we also found that
|
96
|
+
# .3 worked well.
|
97
|
+
averageCharWidth = if previousAveCharWidth < 0
|
98
|
+
char.width / char.text.size
|
99
|
+
else
|
100
|
+
(previousAveCharWidth + (char.width / char.text.size)) / 2.0
|
101
|
+
end
|
102
|
+
deltaCharWidth = averageCharWidth * 0.3 # 0.3 == averageCharTolerance
|
103
|
+
|
104
|
+
# Compares the values obtained by the average method and the wordSpacing method and picks
|
105
|
+
# the smaller number.
|
106
|
+
expectedStartOfNextWordX = -::Float::MAX
|
107
|
+
|
108
|
+
if endOfLastTextX != -1
|
109
|
+
expectedStartOfNextWordX = endOfLastTextX + [deltaCharWidth, deltaSpace].min
|
110
|
+
end
|
111
|
+
|
112
|
+
sameLine = true
|
113
|
+
if !overlap(char.bottom, char.height, maxYForLine, maxHeightForLine)
|
114
|
+
endOfLastTextX = -1
|
115
|
+
expectedStartOfNextWordX = -::Float::MAX
|
116
|
+
maxYForLine = -::Float::MAX
|
117
|
+
maxHeightForLine = -1
|
118
|
+
minYTopForLine = ::Float::MAX
|
119
|
+
sameLine = false
|
120
|
+
end
|
121
|
+
|
122
|
+
endOfLastTextX = char.right
|
123
|
+
# should we add a space?
|
124
|
+
if !across_vertical_ruling \
|
125
|
+
&& sameLine \
|
126
|
+
&& expectedStartOfNextWordX < char.left \
|
127
|
+
&& !prev_char.text.end_with?(' ')
|
128
|
+
|
129
|
+
sp = self.new(prev_char.top,
|
130
|
+
prev_char.right,
|
131
|
+
expectedStartOfNextWordX - prev_char.right,
|
132
|
+
prev_char.height,
|
133
|
+
prev_char.font,
|
134
|
+
prev_char.font_size,
|
135
|
+
' ',
|
136
|
+
prev_char.width_of_space)
|
137
|
+
current_chunk << sp
|
38
138
|
else
|
39
|
-
|
40
|
-
across_vertical_ruling = vertical_ruling_locations.any? { |loc|
|
41
|
-
prev_char.left < loc && char.left > loc
|
42
|
-
}
|
43
|
-
|
44
|
-
# should we add a space?
|
45
|
-
if (prev_char.text != " ") && (char.text != " ") \
|
46
|
-
&& !across_vertical_ruling \
|
47
|
-
&& prev_char.should_add_space?(char)
|
48
|
-
|
49
|
-
sp = self.new(prev_char.top,
|
50
|
-
prev_char.right,
|
51
|
-
prev_char.width_of_space,
|
52
|
-
prev_char.width_of_space, # width == height for spaces
|
53
|
-
prev_char.font,
|
54
|
-
prev_char.font_size,
|
55
|
-
' ',
|
56
|
-
prev_char.width_of_space)
|
57
|
-
chunks.last << sp
|
58
|
-
prev_char = sp
|
59
|
-
end
|
60
|
-
|
61
|
-
# should_merge? isn't aware of vertical rulings, so even if two text elements are close enough
|
62
|
-
# that they ought to be merged by that account.
|
63
|
-
# we still shouldn't merge them if the two elements are on opposite sides of a vertical ruling.
|
64
|
-
# Why are both of those `.left`?, you might ask. The intuition is that a letter
|
65
|
-
# that starts on the left of a vertical ruling ought to remain on the left of it.
|
66
|
-
if !across_vertical_ruling && prev_char.should_merge?(char)
|
67
|
-
chunks.last << char
|
68
|
-
else
|
69
|
-
# create a new chunk
|
70
|
-
chunks << TextChunk.create_from_text_element(char)
|
71
|
-
end
|
72
|
-
chunks
|
139
|
+
sp = nil
|
73
140
|
end
|
74
|
-
end
|
75
|
-
end
|
76
141
|
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
142
|
+
maxYForLine = [char.bottom, maxYForLine].max
|
143
|
+
maxHeightForLine = [maxHeightForLine, char.height].max
|
144
|
+
minYTopForLine = [minYTopForLine, char.top].min
|
145
|
+
|
146
|
+
# if sameLine
|
147
|
+
# puts "prev: #{prev_char.text} - char: #{char.text} - diff: #{char.left - prev_char.right} - space: #{[deltaCharWidth, deltaSpace].min} - spacing: #{wordSpacing} - sp: #{!sp.nil?}"
|
148
|
+
# else
|
149
|
+
# puts
|
150
|
+
# end
|
82
151
|
|
83
|
-
# more or less returns True if (tolerance <= distance < CHARACTER_DISTANCE_THRESHOLD*tolerance)
|
84
|
-
def should_add_space?(other)
|
85
|
-
raise TypeError, "argument is not a TextElement" unless other.instance_of?(TextElement)
|
86
152
|
|
87
|
-
|
153
|
+
dist = (char.left - (sp ? sp.right : prev_char.right))
|
88
154
|
|
89
|
-
|
90
|
-
|
91
|
-
|
155
|
+
if !across_vertical_ruling \
|
156
|
+
&& sameLine \
|
157
|
+
&& (dist < 0 ? current_chunk.vertically_overlaps?(char) : dist < wordSpacing)
|
158
|
+
current_chunk << char
|
159
|
+
else
|
160
|
+
# create a new chunk
|
161
|
+
chunks << TextChunk.create_from_text_element(char)
|
162
|
+
end
|
163
|
+
|
164
|
+
lastWordSpacing = wordSpacing
|
165
|
+
previousAveCharWidth = sp ? (averageCharWidth + sp.width) / 2.0 : averageCharWidth
|
166
|
+
|
167
|
+
chunks
|
168
|
+
end
|
92
169
|
end
|
93
170
|
|
94
171
|
##
|
95
172
|
# merge this TextElement with another (adjust size and text content accordingly)
|
96
173
|
def merge!(other)
|
97
174
|
raise TypeError, "argument is not a TextElement" unless other.instance_of?(TextElement)
|
98
|
-
|
175
|
+
|
176
|
+
if (self <=> other) < 0
|
99
177
|
self.text = other.text + self.text
|
100
178
|
else
|
101
179
|
self.text << other.text
|
@@ -115,16 +193,5 @@ module Tabula
|
|
115
193
|
self.text.strip == other.text.strip
|
116
194
|
end
|
117
195
|
|
118
|
-
# sort in lexicographic (reading) order
|
119
|
-
def <=>(other)
|
120
|
-
if self.vertically_overlaps?(other)
|
121
|
-
self.left <=> other.left
|
122
|
-
elsif self.top < other.top
|
123
|
-
-1
|
124
|
-
else
|
125
|
-
1
|
126
|
-
end
|
127
|
-
end
|
128
|
-
|
129
196
|
end
|
130
197
|
end
|
@@ -4,6 +4,7 @@ module Tabula
|
|
4
4
|
|
5
5
|
class ZoneEntity < java.awt.geom.Rectangle2D::Float
|
6
6
|
|
7
|
+
# TODO used? remove if not.
|
7
8
|
attr_accessor :texts
|
8
9
|
|
9
10
|
def initialize(top, left, width, height)
|
@@ -11,6 +12,7 @@ module Tabula
|
|
11
12
|
if left && top && width && height
|
12
13
|
self.java_send :setRect, [Java::float, Java::float, Java::float, Java::float,], left, top, width, height
|
13
14
|
end
|
15
|
+
# TODO used? remove if not.
|
14
16
|
self.texts = []
|
15
17
|
end
|
16
18
|
|
@@ -21,18 +23,21 @@ module Tabula
|
|
21
23
|
self.height = [self.bottom, other.bottom].max - top
|
22
24
|
|
23
25
|
self.java_send :setRect, [Java::float, Java::float, Java::float, Java::float,], self.left, self.top, self.width, self.height
|
26
|
+
self
|
24
27
|
end
|
25
28
|
|
26
29
|
##
|
27
30
|
# default sorting order for ZoneEntity objects
|
28
31
|
# is lexicographical (left to right, top to bottom)
|
29
32
|
def <=>(other)
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
33
|
+
yDifference = (self.bottom - other.bottom).abs
|
34
|
+
if yDifference < 0.1 ||
|
35
|
+
(other.bottom >= self.top && other.bottom <= self.bottom) ||
|
36
|
+
(self.bottom >= other.top && self.bottom <= other.bottom)
|
37
|
+
self.left <=> other.left
|
38
|
+
else
|
39
|
+
self.bottom <=> other.bottom
|
40
|
+
end
|
36
41
|
end
|
37
42
|
|
38
43
|
def to_json(options={})
|
@@ -47,6 +52,10 @@ module Tabula
|
|
47
52
|
[top, left, bottom, right]
|
48
53
|
end
|
49
54
|
|
55
|
+
def tlwh
|
56
|
+
[top, left, width, height]
|
57
|
+
end
|
58
|
+
|
50
59
|
def points
|
51
60
|
[ Point2D::Float.new(left, top),
|
52
61
|
Point2D::Float.new(right, top),
|