tabula-extractor 0.7.2-java → 0.7.4-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/README.md +4 -8
- data/bin/tabula +3 -3
- data/lib/tabula.rb +9 -5
- data/lib/tabula/entities.rb +1 -0
- data/lib/tabula/entities/cell.rb +6 -4
- data/lib/tabula/entities/has_cells.rb +22 -78
- data/lib/tabula/entities/line.rb +52 -6
- data/lib/tabula/entities/page.rb +43 -50
- data/lib/tabula/entities/ruling.rb +83 -105
- data/lib/tabula/entities/spreadsheet.rb +74 -11
- data/lib/tabula/entities/table.rb +55 -37
- data/lib/tabula/entities/tabular.rb +42 -0
- data/lib/tabula/entities/text_chunk.rb +55 -52
- data/lib/tabula/entities/text_element.rb +129 -62
- data/lib/tabula/entities/zone_entity.rb +15 -6
- data/lib/tabula/extraction.rb +114 -49
- data/lib/tabula/line_segment_detector.rb +0 -5
- data/lib/tabula/table_extractor.rb +32 -37
- data/lib/tabula/version.rb +1 -1
- data/tabula-extractor.gemspec +2 -5
- metadata +13 -95
- data/ext/COPYING +0 -661
- data/ext/Makefile.OSX +0 -18
- data/ext/Makefile.defaults +0 -9
- data/ext/Makefile.linux32 +0 -11
- data/ext/Makefile.linux64 +0 -12
- data/ext/Makefile.mingw +0 -10
- data/ext/Makefile.mingw64 +0 -10
- data/ext/liblsd-linux32.so +0 -0
- data/ext/liblsd-linux64.so +0 -0
- data/ext/liblsd.def +0 -3
- data/ext/liblsd.dll +0 -0
- data/ext/liblsd.dylib +0 -0
- data/ext/liblsd64.dll +0 -0
- data/ext/lsd.c +0 -2270
- data/ext/lsd.h +0 -283
- data/test/data/47008204D_USA.page4.pdf +0 -0
- data/test/data/560015757GV_China.page1.pdf +0 -0
- data/test/data/ClinicalResearchDisclosureReport2012Q2.pdf +0 -0
- data/test/data/GSK_2012_Q4.page437.pdf +0 -0
- data/test/data/S2MNCEbirdisland.pdf +0 -0
- data/test/data/argentina_diputados_voting_record.pdf +0 -0
- data/test/data/bo_page24.pdf +0 -0
- data/test/data/campaign_donors.pdf +0 -0
- data/test/data/frx_2012_disclosure.pdf +0 -0
- data/test/data/frx_2012_disclosure.tsv +0 -88
- data/test/data/gre.pdf +0 -0
- data/test/data/no_tables.pdf +0 -0
- data/test/data/nyc_2013fiscalreporttables.pdf +0 -0
- data/test/data/puertos1.pdf +0 -0
- data/test/data/spanning_cells.csv +0 -21
- data/test/data/spanning_cells.pdf +0 -0
- data/test/data/strongschools.pdf +0 -0
- data/test/data/sydney_disclosure_contract.pdf +0 -0
- data/test/data/tabla_subsidios.pdf +0 -0
- data/test/data/vertical_rulings_bug.pdf +0 -0
- data/test/data/vietnam3.pdf +0 -0
- data/test/data/wc2012.pdf +0 -0
- data/test/heuristic-test-set/original/560015757GV_China.page1.pdf +0 -0
- data/test/heuristic-test-set/original/S2MNCEbirdisland.pdf +0 -0
- data/test/heuristic-test-set/original/bo_page24.pdf +0 -0
- data/test/heuristic-test-set/original/campaign_donors.pdf +0 -0
- data/test/heuristic-test-set/original/cs076pct.pdf +0 -0
- data/test/heuristic-test-set/spreadsheet/47008204D_USA.page4.pdf +0 -0
- data/test/heuristic-test-set/spreadsheet/GSK_2012_Q4.page437.pdf +0 -0
- data/test/heuristic-test-set/spreadsheet/strongschools.pdf +0 -0
- data/test/heuristic-test-set/spreadsheet/tabla_subsidios.pdf +0 -0
- data/test/heuristic.rb +0 -50
- data/test/test_bin_tabula.sh +0 -7
- data/test/tests.rb +0 -603
@@ -0,0 +1,42 @@
|
|
1
|
+
module Tabula
|
2
|
+
module AbstractInterface
|
3
|
+
|
4
|
+
class InterfaceNotImplementedError < NoMethodError
|
5
|
+
end
|
6
|
+
|
7
|
+
def self.included(klass)
|
8
|
+
klass.send(:include, AbstractInterface::Methods)
|
9
|
+
klass.send(:extend, AbstractInterface::Methods)
|
10
|
+
end
|
11
|
+
|
12
|
+
module Methods
|
13
|
+
def api_not_implemented(klass)
|
14
|
+
caller.first.match(/in \`(.+)\'/)
|
15
|
+
method_name = $1
|
16
|
+
raise AbstractInterface::InterfaceNotImplementedError.new("#{klass.class.name} needs to implement '#{method_name}' for interface #{self.name}!")
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
|
22
|
+
module Tabular
|
23
|
+
include AbstractInterface
|
24
|
+
# this is a pseudo-interface as described here:
|
25
|
+
# http://metabates.com/2011/02/07/building-interfaces-and-abstract-classes-in-ruby/
|
26
|
+
# Table and Spreadsheet implement this interface, so should any class
|
27
|
+
# intended to represent tabular data from a PDF, e.g. if another extraction
|
28
|
+
# method were created, so that Tabula GUI and API can correctly handle
|
29
|
+
# its data.
|
30
|
+
|
31
|
+
def extraction_method; raise Tabular.api_not_implemented(self); end
|
32
|
+
|
33
|
+
def page; Tabular.api_not_implemented(self); end
|
34
|
+
def rows; Tabular.api_not_implemented(self); end
|
35
|
+
def cols; Tabular.api_not_implemented(self); end
|
36
|
+
|
37
|
+
def to_csv; Tabular.api_not_implemented(self); end
|
38
|
+
def to_tsv; Tabular.api_not_implemented(self); end
|
39
|
+
def to_a; Tabular.api_not_implemented(self); end
|
40
|
+
def to_json; Tabular.api_not_implemented(self); end
|
41
|
+
end
|
42
|
+
end
|
@@ -8,46 +8,71 @@ module Tabula
|
|
8
8
|
# initialize a new TextChunk from a TextElement
|
9
9
|
def self.create_from_text_element(text_element)
|
10
10
|
raise TypeError, "argument is not a TextElement" unless text_element.instance_of?(TextElement)
|
11
|
-
tc = self.new(text_element.
|
11
|
+
tc = self.new(*text_element.tlwh)
|
12
12
|
tc.text_elements = [text_element]
|
13
13
|
return tc
|
14
14
|
end
|
15
15
|
|
16
|
-
##
|
17
|
-
# group an iterable of TextChunk into a list of Line
|
18
16
|
def self.group_by_lines(text_chunks)
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
17
|
+
bbwidth = text_chunks.max_by(&:right).right - text_chunks.min_by(&:left).left
|
18
|
+
|
19
|
+
l = Line.new
|
20
|
+
l << text_chunks.first
|
21
|
+
|
22
|
+
lines = text_chunks[1..-1].inject([l]) do |lines, te|
|
23
|
+
if lines.last.horizontal_overlap_ratio(te) < 0.01
|
24
|
+
# skip lines such that:
|
25
|
+
# - are wider than the 90% of the width of the text_chunks bounding box
|
26
|
+
# - it contains a single repeated character
|
27
|
+
if lines.last.width / bbwidth > 0.9 \
|
28
|
+
&& l.text_elements.all? { |te| te.text =~ SAME_CHAR_RE }
|
29
|
+
lines.pop
|
30
|
+
end
|
31
|
+
lines << Line.new
|
26
32
|
end
|
27
|
-
|
33
|
+
lines.last << te
|
34
|
+
lines
|
28
35
|
end
|
29
|
-
|
36
|
+
|
37
|
+
if lines.last.width / bbwidth > 0.9 \
|
38
|
+
&& l.text_elements.all? { |te| te.text =~ SAME_CHAR_RE }
|
39
|
+
lines.pop
|
40
|
+
end
|
41
|
+
|
42
|
+
lines.map!(&:remove_sequential_spaces!)
|
30
43
|
end
|
31
44
|
|
32
45
|
##
|
33
|
-
#
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
46
|
+
# returns a list of column boundaries (x axis)
|
47
|
+
# +lines+ must be an array of lines sorted by their +top+ attribute
|
48
|
+
def self.column_positions(lines)
|
49
|
+
init = lines.first.text_elements.inject([]) { |memo, text_chunk|
|
50
|
+
next memo if text_chunk.text =~ ONLY_SPACES_RE
|
51
|
+
memo << Tabula::ZoneEntity.new(*text_chunk.tlwh)
|
52
|
+
memo
|
53
|
+
}
|
54
|
+
|
55
|
+
regions = lines[1..-1]
|
56
|
+
.inject(init) do |column_regions, line|
|
57
|
+
|
58
|
+
line_text_elements = line.text_elements.clone.select { |te| te.text !~ ONLY_SPACES_RE }
|
59
|
+
|
60
|
+
column_regions.each do |cr|
|
61
|
+
|
62
|
+
overlaps = line_text_elements
|
63
|
+
.select { |te| te.text !~ ONLY_SPACES_RE && cr.horizontally_overlaps?(te) }
|
64
|
+
|
65
|
+
overlaps.inject(cr) do |memo, te|
|
66
|
+
cr.merge!(te)
|
47
67
|
end
|
68
|
+
|
69
|
+
line_text_elements = line_text_elements - overlaps
|
48
70
|
end
|
71
|
+
|
72
|
+
column_regions += line_text_elements.map { |te| Tabula::ZoneEntity.new(*te.tlwh) }
|
49
73
|
end
|
50
|
-
|
74
|
+
|
75
|
+
regions.map { |r| r.right.round(2) }.uniq
|
51
76
|
end
|
52
77
|
|
53
78
|
##
|
@@ -59,10 +84,10 @@ module Tabula
|
|
59
84
|
|
60
85
|
def merge!(other)
|
61
86
|
if other.instance_of?(TextChunk)
|
62
|
-
if self
|
63
|
-
self.text_elements = other.text_elements + self.text_elements
|
64
|
-
else
|
87
|
+
if (self <=> other) < 0
|
65
88
|
self.text_elements = self.text_elements + other.text_elements
|
89
|
+
else
|
90
|
+
self.text_elements = other.text_elements + self.text_elements
|
66
91
|
end
|
67
92
|
end
|
68
93
|
super(other)
|
@@ -75,28 +100,6 @@ module Tabula
|
|
75
100
|
raise "Not Implemented"
|
76
101
|
end
|
77
102
|
|
78
|
-
##
|
79
|
-
# remove leading and trailing whitespace
|
80
|
-
# (changes geometry accordingly)
|
81
|
-
# TODO horrible implementation - fix.
|
82
|
-
def strip!
|
83
|
-
acc = 0
|
84
|
-
new_te = self.text_elements.drop_while { |te|
|
85
|
-
te.text == ' ' && acc += 1
|
86
|
-
}
|
87
|
-
self.left += self.text_elements.take(acc).inject(0) { |m, te| m += te.width }
|
88
|
-
self.text_elements = new_te
|
89
|
-
|
90
|
-
self.text_elements.reverse!
|
91
|
-
acc = 0
|
92
|
-
new_te = self.text_elements.drop_while { |te|
|
93
|
-
te.text == ' ' && acc += 1
|
94
|
-
}
|
95
|
-
self.right -= self.text_elements.take(acc).inject(0) { |m, te| m += te.width }
|
96
|
-
self.text_elements = new_te.reverse
|
97
|
-
self
|
98
|
-
end
|
99
|
-
|
100
103
|
def text
|
101
104
|
self.text_elements.map(&:text).join
|
102
105
|
end
|
@@ -1,4 +1,6 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
1
2
|
module Tabula
|
3
|
+
|
2
4
|
##
|
3
5
|
# a Glyph
|
4
6
|
class TextElement < ZoneEntity
|
@@ -17,8 +19,20 @@ module Tabula
|
|
17
19
|
|
18
20
|
EMPTY = TextElement.new(0, 0, 0, 0, nil, 0, '', 0)
|
19
21
|
|
22
|
+
def self.within(first, second, variance )
|
23
|
+
second < first + variance && second > first - variance
|
24
|
+
end
|
25
|
+
|
26
|
+
def self.overlap(y1, height1, y2, height2, variance=0.1)
|
27
|
+
within( y1, y2, variance) || (y2 <= y1 && y2 >= y1 - height1) \
|
28
|
+
|| (y1 <= y2 && y1 >= y2-height2)
|
29
|
+
end
|
30
|
+
|
31
|
+
|
20
32
|
##
|
21
33
|
# heuristically merge an iterable of TextElement into a list of TextChunk
|
34
|
+
# lots of ideas taken from PDFBox's PDFTextStripper.writePage
|
35
|
+
# here be dragons
|
22
36
|
def self.merge_words(text_elements, options={})
|
23
37
|
default_options = {:vertical_rulings => []}
|
24
38
|
options = default_options.merge(options)
|
@@ -28,74 +42,138 @@ module Tabula
|
|
28
42
|
|
29
43
|
text_chunks = [TextChunk.create_from_text_element(text_elements.shift)]
|
30
44
|
|
45
|
+
|
46
|
+
previousAveCharWidth = text_chunks.first.width
|
47
|
+
endOfLastTextX = text_chunks.first.right
|
48
|
+
maxYForLine = text_chunks.first.bottom
|
49
|
+
maxHeightForLine = text_chunks.first.height
|
50
|
+
minYTopForLine = text_chunks.first.top
|
51
|
+
lastWordSpacing = -1
|
52
|
+
sp = nil
|
53
|
+
|
31
54
|
text_elements.inject(text_chunks) do |chunks, char|
|
55
|
+
|
32
56
|
current_chunk = chunks.last
|
33
57
|
prev_char = current_chunk.text_elements.last
|
34
58
|
|
59
|
+
# Resets the average character width when we see a change in font
|
60
|
+
# or a change in the font size
|
61
|
+
if (char.font != prev_char.font) || (char.font_size != prev_char.font_size)
|
62
|
+
previousAveCharWidth = -1;
|
63
|
+
end
|
64
|
+
|
35
65
|
# if same char AND overlapped, skip
|
36
|
-
if prev_char.text == char.text && prev_char.overlaps_with_ratio?(char, 0.
|
37
|
-
chunks
|
66
|
+
if (prev_char.text == char.text) && prev_char.overlaps_with_ratio?(char, 0.5)
|
67
|
+
next chunks
|
68
|
+
end
|
69
|
+
|
70
|
+
# if char is a space that overlaps with the prev_char, skip
|
71
|
+
if char.text == ' ' && prev_char.left == char.left && prev_char.top == char.top
|
72
|
+
next chunks
|
73
|
+
end
|
74
|
+
|
75
|
+
# any vertical ruling goes across prev_char and char?
|
76
|
+
across_vertical_ruling = vertical_ruling_locations.any? { |loc|
|
77
|
+
prev_char.left < loc && char.left > loc
|
78
|
+
}
|
79
|
+
|
80
|
+
# Estimate the expected width of the space based on the
|
81
|
+
# space character with some margin.
|
82
|
+
wordSpacing = char.width_of_space
|
83
|
+
deltaSpace = 0
|
84
|
+
deltaSpace = if (wordSpacing.nan? || wordSpacing == 0)
|
85
|
+
::Float::MAX
|
86
|
+
elsif lastWordSpacing < 0
|
87
|
+
wordSpacing * 0.5 # 0.5 == spacingTolerance
|
88
|
+
else
|
89
|
+
((wordSpacing + lastWordSpacing) / 2.0) * 0.5
|
90
|
+
end
|
91
|
+
|
92
|
+
# Estimate the expected width of the space based on the
|
93
|
+
# average character width with some margin. This calculation does not
|
94
|
+
# make a true average (average of averages) but we found that it gave the
|
95
|
+
# best results after numerous experiments. Based on experiments we also found that
|
96
|
+
# .3 worked well.
|
97
|
+
averageCharWidth = if previousAveCharWidth < 0
|
98
|
+
char.width / char.text.size
|
99
|
+
else
|
100
|
+
(previousAveCharWidth + (char.width / char.text.size)) / 2.0
|
101
|
+
end
|
102
|
+
deltaCharWidth = averageCharWidth * 0.3 # 0.3 == averageCharTolerance
|
103
|
+
|
104
|
+
# Compares the values obtained by the average method and the wordSpacing method and picks
|
105
|
+
# the smaller number.
|
106
|
+
expectedStartOfNextWordX = -::Float::MAX
|
107
|
+
|
108
|
+
if endOfLastTextX != -1
|
109
|
+
expectedStartOfNextWordX = endOfLastTextX + [deltaCharWidth, deltaSpace].min
|
110
|
+
end
|
111
|
+
|
112
|
+
sameLine = true
|
113
|
+
if !overlap(char.bottom, char.height, maxYForLine, maxHeightForLine)
|
114
|
+
endOfLastTextX = -1
|
115
|
+
expectedStartOfNextWordX = -::Float::MAX
|
116
|
+
maxYForLine = -::Float::MAX
|
117
|
+
maxHeightForLine = -1
|
118
|
+
minYTopForLine = ::Float::MAX
|
119
|
+
sameLine = false
|
120
|
+
end
|
121
|
+
|
122
|
+
endOfLastTextX = char.right
|
123
|
+
# should we add a space?
|
124
|
+
if !across_vertical_ruling \
|
125
|
+
&& sameLine \
|
126
|
+
&& expectedStartOfNextWordX < char.left \
|
127
|
+
&& !prev_char.text.end_with?(' ')
|
128
|
+
|
129
|
+
sp = self.new(prev_char.top,
|
130
|
+
prev_char.right,
|
131
|
+
expectedStartOfNextWordX - prev_char.right,
|
132
|
+
prev_char.height,
|
133
|
+
prev_char.font,
|
134
|
+
prev_char.font_size,
|
135
|
+
' ',
|
136
|
+
prev_char.width_of_space)
|
137
|
+
current_chunk << sp
|
38
138
|
else
|
39
|
-
|
40
|
-
across_vertical_ruling = vertical_ruling_locations.any? { |loc|
|
41
|
-
prev_char.left < loc && char.left > loc
|
42
|
-
}
|
43
|
-
|
44
|
-
# should we add a space?
|
45
|
-
if (prev_char.text != " ") && (char.text != " ") \
|
46
|
-
&& !across_vertical_ruling \
|
47
|
-
&& prev_char.should_add_space?(char)
|
48
|
-
|
49
|
-
sp = self.new(prev_char.top,
|
50
|
-
prev_char.right,
|
51
|
-
prev_char.width_of_space,
|
52
|
-
prev_char.width_of_space, # width == height for spaces
|
53
|
-
prev_char.font,
|
54
|
-
prev_char.font_size,
|
55
|
-
' ',
|
56
|
-
prev_char.width_of_space)
|
57
|
-
chunks.last << sp
|
58
|
-
prev_char = sp
|
59
|
-
end
|
60
|
-
|
61
|
-
# should_merge? isn't aware of vertical rulings, so even if two text elements are close enough
|
62
|
-
# that they ought to be merged by that account.
|
63
|
-
# we still shouldn't merge them if the two elements are on opposite sides of a vertical ruling.
|
64
|
-
# Why are both of those `.left`?, you might ask. The intuition is that a letter
|
65
|
-
# that starts on the left of a vertical ruling ought to remain on the left of it.
|
66
|
-
if !across_vertical_ruling && prev_char.should_merge?(char)
|
67
|
-
chunks.last << char
|
68
|
-
else
|
69
|
-
# create a new chunk
|
70
|
-
chunks << TextChunk.create_from_text_element(char)
|
71
|
-
end
|
72
|
-
chunks
|
139
|
+
sp = nil
|
73
140
|
end
|
74
|
-
end
|
75
|
-
end
|
76
141
|
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
142
|
+
maxYForLine = [char.bottom, maxYForLine].max
|
143
|
+
maxHeightForLine = [maxHeightForLine, char.height].max
|
144
|
+
minYTopForLine = [minYTopForLine, char.top].min
|
145
|
+
|
146
|
+
# if sameLine
|
147
|
+
# puts "prev: #{prev_char.text} - char: #{char.text} - diff: #{char.left - prev_char.right} - space: #{[deltaCharWidth, deltaSpace].min} - spacing: #{wordSpacing} - sp: #{!sp.nil?}"
|
148
|
+
# else
|
149
|
+
# puts
|
150
|
+
# end
|
82
151
|
|
83
|
-
# more or less returns True if (tolerance <= distance < CHARACTER_DISTANCE_THRESHOLD*tolerance)
|
84
|
-
def should_add_space?(other)
|
85
|
-
raise TypeError, "argument is not a TextElement" unless other.instance_of?(TextElement)
|
86
152
|
|
87
|
-
|
153
|
+
dist = (char.left - (sp ? sp.right : prev_char.right))
|
88
154
|
|
89
|
-
|
90
|
-
|
91
|
-
|
155
|
+
if !across_vertical_ruling \
|
156
|
+
&& sameLine \
|
157
|
+
&& (dist < 0 ? current_chunk.vertically_overlaps?(char) : dist < wordSpacing)
|
158
|
+
current_chunk << char
|
159
|
+
else
|
160
|
+
# create a new chunk
|
161
|
+
chunks << TextChunk.create_from_text_element(char)
|
162
|
+
end
|
163
|
+
|
164
|
+
lastWordSpacing = wordSpacing
|
165
|
+
previousAveCharWidth = sp ? (averageCharWidth + sp.width) / 2.0 : averageCharWidth
|
166
|
+
|
167
|
+
chunks
|
168
|
+
end
|
92
169
|
end
|
93
170
|
|
94
171
|
##
|
95
172
|
# merge this TextElement with another (adjust size and text content accordingly)
|
96
173
|
def merge!(other)
|
97
174
|
raise TypeError, "argument is not a TextElement" unless other.instance_of?(TextElement)
|
98
|
-
|
175
|
+
|
176
|
+
if (self <=> other) < 0
|
99
177
|
self.text = other.text + self.text
|
100
178
|
else
|
101
179
|
self.text << other.text
|
@@ -115,16 +193,5 @@ module Tabula
|
|
115
193
|
self.text.strip == other.text.strip
|
116
194
|
end
|
117
195
|
|
118
|
-
# sort in lexicographic (reading) order
|
119
|
-
def <=>(other)
|
120
|
-
if self.vertically_overlaps?(other)
|
121
|
-
self.left <=> other.left
|
122
|
-
elsif self.top < other.top
|
123
|
-
-1
|
124
|
-
else
|
125
|
-
1
|
126
|
-
end
|
127
|
-
end
|
128
|
-
|
129
196
|
end
|
130
197
|
end
|
@@ -4,6 +4,7 @@ module Tabula
|
|
4
4
|
|
5
5
|
class ZoneEntity < java.awt.geom.Rectangle2D::Float
|
6
6
|
|
7
|
+
# TODO used? remove if not.
|
7
8
|
attr_accessor :texts
|
8
9
|
|
9
10
|
def initialize(top, left, width, height)
|
@@ -11,6 +12,7 @@ module Tabula
|
|
11
12
|
if left && top && width && height
|
12
13
|
self.java_send :setRect, [Java::float, Java::float, Java::float, Java::float,], left, top, width, height
|
13
14
|
end
|
15
|
+
# TODO used? remove if not.
|
14
16
|
self.texts = []
|
15
17
|
end
|
16
18
|
|
@@ -21,18 +23,21 @@ module Tabula
|
|
21
23
|
self.height = [self.bottom, other.bottom].max - top
|
22
24
|
|
23
25
|
self.java_send :setRect, [Java::float, Java::float, Java::float, Java::float,], self.left, self.top, self.width, self.height
|
26
|
+
self
|
24
27
|
end
|
25
28
|
|
26
29
|
##
|
27
30
|
# default sorting order for ZoneEntity objects
|
28
31
|
# is lexicographical (left to right, top to bottom)
|
29
32
|
def <=>(other)
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
33
|
+
yDifference = (self.bottom - other.bottom).abs
|
34
|
+
if yDifference < 0.1 ||
|
35
|
+
(other.bottom >= self.top && other.bottom <= self.bottom) ||
|
36
|
+
(self.bottom >= other.top && self.bottom <= other.bottom)
|
37
|
+
self.left <=> other.left
|
38
|
+
else
|
39
|
+
self.bottom <=> other.bottom
|
40
|
+
end
|
36
41
|
end
|
37
42
|
|
38
43
|
def to_json(options={})
|
@@ -47,6 +52,10 @@ module Tabula
|
|
47
52
|
[top, left, bottom, right]
|
48
53
|
end
|
49
54
|
|
55
|
+
def tlwh
|
56
|
+
[top, left, width, height]
|
57
|
+
end
|
58
|
+
|
50
59
|
def points
|
51
60
|
[ Point2D::Float.new(left, top),
|
52
61
|
Point2D::Float.new(right, top),
|