tabula-rb 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/.rubocop.yml +39 -0
- data/CHANGELOG.md +59 -0
- data/LICENSE +21 -0
- data/README.md +176 -0
- data/Rakefile +28 -0
- data/exe/tabula +7 -0
- data/lib/tabula/algorithms/cohen_sutherland_clipping.rb +94 -0
- data/lib/tabula/algorithms/projection_profile.rb +109 -0
- data/lib/tabula/cli.rb +271 -0
- data/lib/tabula/configuration.rb +119 -0
- data/lib/tabula/core/point.rb +60 -0
- data/lib/tabula/core/rectangle.rb +218 -0
- data/lib/tabula/core/ruling.rb +303 -0
- data/lib/tabula/core/spatial_index.rb +120 -0
- data/lib/tabula/detectors/detection_algorithm.rb +34 -0
- data/lib/tabula/detectors/nurminen_detection_algorithm.rb +211 -0
- data/lib/tabula/detectors/spreadsheet_detection_algorithm.rb +142 -0
- data/lib/tabula/extractors/basic_extraction_algorithm.rb +168 -0
- data/lib/tabula/extractors/extraction_algorithm.rb +34 -0
- data/lib/tabula/extractors/spreadsheet_extraction_algorithm.rb +299 -0
- data/lib/tabula/pdf/object_extractor.rb +400 -0
- data/lib/tabula/pdf/page.rb +230 -0
- data/lib/tabula/pdf/text_stripper.rb +150 -0
- data/lib/tabula/table/cell.rb +110 -0
- data/lib/tabula/table/table.rb +184 -0
- data/lib/tabula/text/line.rb +133 -0
- data/lib/tabula/text/text_chunk.rb +185 -0
- data/lib/tabula/text/text_element.rb +120 -0
- data/lib/tabula/version.rb +5 -0
- data/lib/tabula/writers/csv_writer.rb +49 -0
- data/lib/tabula/writers/json_writer.rb +41 -0
- data/lib/tabula/writers/markdown_writer.rb +71 -0
- data/lib/tabula/writers/tsv_writer.rb +35 -0
- data/lib/tabula/writers/writer.rb +39 -0
- data/lib/tabula.rb +160 -0
- data/mise.toml +2 -0
- data/tabula-rb.gemspec +44 -0
- metadata +115 -0
data/lib/tabula/cli.rb
ADDED
|
@@ -0,0 +1,271 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'optparse'
|
|
4
|
+
|
|
5
|
+
module Tabula
|
|
6
|
+
# Command-line interface for tabula
|
|
7
|
+
class CLI
|
|
8
|
+
FORMATS = %w[CSV TSV JSON MARKDOWN].freeze
|
|
9
|
+
|
|
10
|
+
def self.run(args)
|
|
11
|
+
new.run(args)
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def initialize
|
|
15
|
+
@options = default_options
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def run(args)
|
|
19
|
+
parser = build_parser
|
|
20
|
+
files = parser.parse(args)
|
|
21
|
+
|
|
22
|
+
if @options[:help]
|
|
23
|
+
puts parser
|
|
24
|
+
return 0
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
if @options[:version]
|
|
28
|
+
puts "tabula #{Tabula::VERSION}"
|
|
29
|
+
return 0
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
if files.empty?
|
|
33
|
+
warn 'Error: No PDF file specified'
|
|
34
|
+
warn parser
|
|
35
|
+
return 1
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
process_files(files)
|
|
39
|
+
rescue OptionParser::InvalidOption => e
|
|
40
|
+
warn "Error: #{e.message}"
|
|
41
|
+
warn parser
|
|
42
|
+
1
|
|
43
|
+
rescue Tabula::FileNotFoundError => e
|
|
44
|
+
warn "Error: #{e.message}"
|
|
45
|
+
warn 'Please check that the file path is correct and the file exists.'
|
|
46
|
+
1
|
|
47
|
+
rescue Tabula::InvalidOptionsError => e
|
|
48
|
+
warn "Error: Invalid option - #{e.message}"
|
|
49
|
+
warn 'Use --help to see available options and their valid values.'
|
|
50
|
+
1
|
|
51
|
+
rescue Tabula::InvalidPDFError => e
|
|
52
|
+
warn "Error: Invalid PDF file - #{e.message}"
|
|
53
|
+
1
|
|
54
|
+
rescue Tabula::PasswordRequiredError => e
|
|
55
|
+
warn "Error: PDF is password protected - #{e.message}"
|
|
56
|
+
warn 'Use the -s/--password option to provide the password.'
|
|
57
|
+
1
|
|
58
|
+
rescue StandardError => e
|
|
59
|
+
warn "Error: #{e.message}"
|
|
60
|
+
warn e.backtrace.first(5).join("\n") if @options[:debug]
|
|
61
|
+
1
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
private
|
|
65
|
+
|
|
66
|
+
def default_options
|
|
67
|
+
{
|
|
68
|
+
area: nil,
|
|
69
|
+
columns: nil,
|
|
70
|
+
format: 'CSV',
|
|
71
|
+
guess: false,
|
|
72
|
+
lattice: false,
|
|
73
|
+
stream: false,
|
|
74
|
+
pages: nil,
|
|
75
|
+
output: nil,
|
|
76
|
+
password: nil,
|
|
77
|
+
help: false,
|
|
78
|
+
version: false,
|
|
79
|
+
debug: false
|
|
80
|
+
}
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
def build_parser
|
|
84
|
+
OptionParser.new do |opts|
|
|
85
|
+
opts.banner = 'Usage: tabula [OPTIONS] <pdf_file> [<pdf_file> ...]'
|
|
86
|
+
opts.separator ''
|
|
87
|
+
opts.separator 'Extract tables from PDF files'
|
|
88
|
+
opts.separator ''
|
|
89
|
+
opts.separator 'Options:'
|
|
90
|
+
|
|
91
|
+
opts.on('-a', '--area AREA', 'Extraction area (top,left,bottom,right in points)') do |v|
|
|
92
|
+
@options[:area] = parse_area(v)
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
opts.on('-c', '--columns COLUMNS', 'Column boundaries (comma-separated x coordinates)') do |v|
|
|
96
|
+
@options[:columns] = v.split(',').map(&:to_f)
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
opts.on('-f', '--format FORMAT', FORMATS, "Output format: #{FORMATS.join(', ')} (default: CSV)") do |v|
|
|
100
|
+
@options[:format] = v.upcase
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
opts.on('-g', '--guess', 'Guess table areas (use detection algorithm)') do
|
|
104
|
+
@options[:guess] = true
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
opts.on('-l', '--lattice', 'Force lattice mode (use ruling lines)') do
|
|
108
|
+
@options[:lattice] = true
|
|
109
|
+
@options[:stream] = false
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
opts.on('-t', '--stream', 'Force stream mode (use text positions)') do
|
|
113
|
+
@options[:stream] = true
|
|
114
|
+
@options[:lattice] = false
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
opts.on('-p', '--pages PAGES', "Pages to extract (e.g., '1,2,3' or '1-5' or 'all')") do |v|
|
|
118
|
+
@options[:pages] = parse_pages(v)
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
opts.on('-o', '--output FILE', 'Output file (default: stdout)') do |v|
|
|
122
|
+
@options[:output] = v
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
opts.on('-s', '--password PASSWORD', 'PDF password') do |v|
|
|
126
|
+
@options[:password] = v
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
opts.on('--debug', 'Show debug information') do
|
|
130
|
+
@options[:debug] = true
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
opts.on('-v', '--version', 'Show version') do
|
|
134
|
+
@options[:version] = true
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
opts.on('-h', '--help', 'Show this help') do
|
|
138
|
+
@options[:help] = true
|
|
139
|
+
end
|
|
140
|
+
end
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
def parse_area(value)
|
|
144
|
+
parts = value.split(',').map(&:strip)
|
|
145
|
+
unless parts.size == 4
|
|
146
|
+
raise Tabula::InvalidOptionsError, "Area must have 4 values: top,left,bottom,right (got #{parts.size} values)"
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
parts.each_with_index do |p, idx|
|
|
150
|
+
labels = %w[top left bottom right]
|
|
151
|
+
unless p.match?(/\A-?\d+(\.\d+)?\z/)
|
|
152
|
+
raise Tabula::InvalidOptionsError, "Area #{labels[idx]} must be numeric, got '#{p}'"
|
|
153
|
+
end
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
parts.map(&:to_f)
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
def parse_pages(value)
|
|
160
|
+
return nil if value.downcase == 'all'
|
|
161
|
+
|
|
162
|
+
pages = []
|
|
163
|
+
value.split(',').each do |part|
|
|
164
|
+
part = part.strip
|
|
165
|
+
if part.include?('-')
|
|
166
|
+
range_parts = part.split('-')
|
|
167
|
+
unless range_parts.size == 2 && range_parts.all? { |p| p.match?(/\A\d+\z/) }
|
|
168
|
+
raise Tabula::InvalidOptionsError, "Invalid page range: '#{part}'. Use format like '1-5'"
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
range = range_parts.map(&:to_i)
|
|
172
|
+
if range[0] <= 0 || range[1] <= 0
|
|
173
|
+
raise Tabula::InvalidOptionsError, "Page numbers must be positive integers, got '#{part}'"
|
|
174
|
+
end
|
|
175
|
+
if range[0] > range[1]
|
|
176
|
+
raise Tabula::InvalidOptionsError, "Invalid page range: '#{part}'. Start must be less than or equal to end"
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
pages.concat((range[0]..range[1]).to_a)
|
|
180
|
+
else
|
|
181
|
+
unless part.match?(/\A\d+\z/)
|
|
182
|
+
raise Tabula::InvalidOptionsError, "Invalid page number: '#{part}'. Page numbers must be positive integers"
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
page_num = part.to_i
|
|
186
|
+
raise Tabula::InvalidOptionsError, "Page numbers must be positive integers, got '#{part}'" if page_num <= 0
|
|
187
|
+
|
|
188
|
+
pages << page_num
|
|
189
|
+
end
|
|
190
|
+
end
|
|
191
|
+
pages.uniq.sort
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
def process_files(files)
|
|
195
|
+
output_io = @options[:output] ? File.open(@options[:output], 'w') : $stdout
|
|
196
|
+
had_error = false
|
|
197
|
+
|
|
198
|
+
begin
|
|
199
|
+
files.each_with_index do |file, idx|
|
|
200
|
+
output_io.puts if idx.positive? # Separate multiple files
|
|
201
|
+
|
|
202
|
+
unless File.exist?(file)
|
|
203
|
+
warn "Error: File not found: #{file}"
|
|
204
|
+
warn 'Please check that the file path is correct and the file exists.'
|
|
205
|
+
had_error = true
|
|
206
|
+
next
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
process_file(file, output_io)
|
|
210
|
+
end
|
|
211
|
+
ensure
|
|
212
|
+
output_io.close if @options[:output]
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
had_error ? 1 : 0
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
def process_file(file, output_io)
|
|
219
|
+
extraction_options = build_extraction_options
|
|
220
|
+
|
|
221
|
+
tables = Tabula.extract(file, **extraction_options)
|
|
222
|
+
|
|
223
|
+
if tables.empty?
|
|
224
|
+
warn "No tables found in #{file}" if @options[:debug]
|
|
225
|
+
return
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
write_tables(tables, output_io)
|
|
229
|
+
end
|
|
230
|
+
|
|
231
|
+
def build_extraction_options
|
|
232
|
+
options = {
|
|
233
|
+
password: @options[:password],
|
|
234
|
+
guess: @options[:guess]
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
# Set extraction method
|
|
238
|
+
options[:method] = if @options[:lattice]
|
|
239
|
+
:lattice
|
|
240
|
+
elsif @options[:stream]
|
|
241
|
+
:stream
|
|
242
|
+
else
|
|
243
|
+
:auto
|
|
244
|
+
end
|
|
245
|
+
|
|
246
|
+
# Set pages
|
|
247
|
+
options[:pages] = @options[:pages] if @options[:pages]
|
|
248
|
+
|
|
249
|
+
# Set area
|
|
250
|
+
options[:area] = @options[:area] if @options[:area]
|
|
251
|
+
|
|
252
|
+
# Set columns
|
|
253
|
+
options[:columns] = @options[:columns] if @options[:columns]
|
|
254
|
+
|
|
255
|
+
options
|
|
256
|
+
end
|
|
257
|
+
|
|
258
|
+
def write_tables(tables, output_io)
|
|
259
|
+
case @options[:format]
|
|
260
|
+
when 'CSV'
|
|
261
|
+
Writers::CSVWriter.new.write(tables, output_io)
|
|
262
|
+
when 'TSV'
|
|
263
|
+
Writers::TSVWriter.new.write(tables, output_io)
|
|
264
|
+
when 'JSON'
|
|
265
|
+
Writers::JSONWriter.new(pretty: true).write(tables, output_io)
|
|
266
|
+
when 'MARKDOWN'
|
|
267
|
+
Writers::MarkdownWriter.new.write(tables, output_io)
|
|
268
|
+
end
|
|
269
|
+
end
|
|
270
|
+
end
|
|
271
|
+
end
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Tabula
|
|
4
|
+
# Configuration class for customizable extraction parameters.
|
|
5
|
+
# All thresholds can be adjusted to tune extraction behavior.
|
|
6
|
+
class Configuration
|
|
7
|
+
# --- Ruling Detection ---
|
|
8
|
+
|
|
9
|
+
# Tolerance for determining if a ruling is horizontal or vertical (in points)
|
|
10
|
+
# Lines with less than this difference in position are considered aligned
|
|
11
|
+
attr_accessor :orientation_tolerance
|
|
12
|
+
|
|
13
|
+
# Tolerance for ruling intersection detection (in points)
|
|
14
|
+
attr_accessor :intersection_tolerance
|
|
15
|
+
|
|
16
|
+
# Maximum thickness of a filled rectangle to be treated as a ruling line (in points)
|
|
17
|
+
attr_accessor :ruling_thickness_threshold
|
|
18
|
+
|
|
19
|
+
# --- Text Element Merging ---
|
|
20
|
+
|
|
21
|
+
# Multiplier for space width when determining word boundaries
|
|
22
|
+
# Lower values = more aggressive word merging
|
|
23
|
+
attr_accessor :word_gap_multiplier
|
|
24
|
+
|
|
25
|
+
# Multiplier for determining line boundaries
|
|
26
|
+
attr_accessor :line_gap_multiplier
|
|
27
|
+
|
|
28
|
+
# --- Cell Detection ---
|
|
29
|
+
|
|
30
|
+
# Minimum number of cells required for a valid table
|
|
31
|
+
attr_accessor :min_cells
|
|
32
|
+
|
|
33
|
+
# Minimum dimension (width or height) for a valid table region (in points)
|
|
34
|
+
attr_accessor :min_table_dimension
|
|
35
|
+
|
|
36
|
+
# Tolerance for cell corner detection (in points)
|
|
37
|
+
attr_accessor :cell_tolerance
|
|
38
|
+
|
|
39
|
+
# --- Table Detection ---
|
|
40
|
+
|
|
41
|
+
# Minimum number of rows required for table detection
|
|
42
|
+
attr_accessor :min_rows
|
|
43
|
+
|
|
44
|
+
# Overlap threshold for merging duplicate table detections
|
|
45
|
+
attr_accessor :overlap_threshold
|
|
46
|
+
|
|
47
|
+
# Threshold for determining if a table has valid row/column ratio
|
|
48
|
+
attr_accessor :tabular_ratio_threshold
|
|
49
|
+
|
|
50
|
+
# Tolerance for clustering text edges during detection (in points)
|
|
51
|
+
attr_accessor :edge_clustering_tolerance
|
|
52
|
+
|
|
53
|
+
# Padding around detected table areas (in points)
|
|
54
|
+
attr_accessor :detection_padding
|
|
55
|
+
|
|
56
|
+
# --- Rectangle Comparison ---
|
|
57
|
+
|
|
58
|
+
# Threshold for vertical overlap comparison
|
|
59
|
+
attr_accessor :vertical_comparison_threshold
|
|
60
|
+
|
|
61
|
+
def initialize
|
|
62
|
+
# Ruling detection
|
|
63
|
+
@orientation_tolerance = 1.0
|
|
64
|
+
@intersection_tolerance = 1.0
|
|
65
|
+
@ruling_thickness_threshold = 8.0
|
|
66
|
+
|
|
67
|
+
# Text element merging
|
|
68
|
+
@word_gap_multiplier = 0.5
|
|
69
|
+
@line_gap_multiplier = 0.5
|
|
70
|
+
|
|
71
|
+
# Cell detection
|
|
72
|
+
@min_cells = 4
|
|
73
|
+
@min_table_dimension = 10.0
|
|
74
|
+
@cell_tolerance = 2.0
|
|
75
|
+
|
|
76
|
+
# Table detection
|
|
77
|
+
@min_rows = 2
|
|
78
|
+
@overlap_threshold = 0.9
|
|
79
|
+
@tabular_ratio_threshold = 0.65
|
|
80
|
+
@edge_clustering_tolerance = 8.0
|
|
81
|
+
@detection_padding = 2.0
|
|
82
|
+
|
|
83
|
+
# Rectangle comparison
|
|
84
|
+
@vertical_comparison_threshold = 0.4
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# Create a copy with overrides
|
|
88
|
+
# @param overrides [Hash] values to override
|
|
89
|
+
# @return [Configuration]
|
|
90
|
+
def with(**overrides)
|
|
91
|
+
dup.tap do |config|
|
|
92
|
+
overrides.each { |key, value| config.send("#{key}=", value) }
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
# Default configuration instance
|
|
98
|
+
@default_configuration = Configuration.new
|
|
99
|
+
|
|
100
|
+
class << self
|
|
101
|
+
# Get the default configuration
|
|
102
|
+
# @return [Configuration]
|
|
103
|
+
def configuration
|
|
104
|
+
@default_configuration
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
# Set the default configuration
|
|
108
|
+
# @param config [Configuration]
|
|
109
|
+
def configuration=(config)
|
|
110
|
+
@default_configuration = config
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
# Configure with a block
|
|
114
|
+
# @yield [Configuration]
|
|
115
|
+
def configure
|
|
116
|
+
yield configuration
|
|
117
|
+
end
|
|
118
|
+
end
|
|
119
|
+
end
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Tabula
|
|
4
|
+
# Represents a 2D point with x and y coordinates
|
|
5
|
+
class Point
|
|
6
|
+
attr_accessor :x, :y
|
|
7
|
+
|
|
8
|
+
def initialize(x, y)
|
|
9
|
+
@x = x.to_f
|
|
10
|
+
@y = y.to_f
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def to_a
|
|
14
|
+
[x, y]
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def ==(other)
|
|
18
|
+
return false unless other.is_a?(Point)
|
|
19
|
+
|
|
20
|
+
x == other.x && y == other.y
|
|
21
|
+
end
|
|
22
|
+
alias eql? ==
|
|
23
|
+
|
|
24
|
+
def hash
|
|
25
|
+
[x, y].hash
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def distance_to(other)
|
|
29
|
+
Math.sqrt(((x - other.x)**2) + ((y - other.y)**2))
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def distance_squared_to(other)
|
|
33
|
+
((x - other.x)**2) + ((y - other.y)**2)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def +(other)
|
|
37
|
+
Point.new(x + other.x, y + other.y)
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def -(other)
|
|
41
|
+
Point.new(x - other.x, y - other.y)
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def *(other)
|
|
45
|
+
Point.new(x * other, y * other)
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def /(other)
|
|
49
|
+
Point.new(x / other, y / other)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def to_s
|
|
53
|
+
"Point(#{x}, #{y})"
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def inspect
|
|
57
|
+
to_s
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
end
|
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Tabula
|
|
4
|
+
# Represents a rectangle with position and dimensions.
|
|
5
|
+
# Coordinates use PDF coordinate system (origin at bottom-left).
|
|
6
|
+
class Rectangle
|
|
7
|
+
# Threshold for vertical overlap comparison (40% overlap)
|
|
8
|
+
VERTICAL_COMPARISON_THRESHOLD = 0.4
|
|
9
|
+
|
|
10
|
+
attr_accessor :top, :left, :width, :height
|
|
11
|
+
|
|
12
|
+
def initialize(top, left, width, height)
|
|
13
|
+
@top = top.to_f
|
|
14
|
+
@left = left.to_f
|
|
15
|
+
@width = width.to_f
|
|
16
|
+
@height = height.to_f
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
# Create rectangle from bounds [top, left, bottom, right]
|
|
20
|
+
def self.from_bounds(top, left, bottom, right)
|
|
21
|
+
new(top, left, right - left, bottom - top)
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
# Create rectangle from two points
|
|
25
|
+
def self.from_points(p1, p2)
|
|
26
|
+
top = [p1.y, p2.y].min
|
|
27
|
+
left = [p1.x, p2.x].min
|
|
28
|
+
bottom = [p1.y, p2.y].max
|
|
29
|
+
right = [p1.x, p2.x].max
|
|
30
|
+
from_bounds(top, left, bottom, right)
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# Compute bounding box of multiple rectangles
|
|
34
|
+
def self.bounding_box_of(rectangles)
|
|
35
|
+
return nil if rectangles.empty?
|
|
36
|
+
|
|
37
|
+
top = rectangles.map(&:top).min
|
|
38
|
+
left = rectangles.map(&:left).min
|
|
39
|
+
bottom = rectangles.map(&:bottom).max
|
|
40
|
+
right = rectangles.map(&:right).max
|
|
41
|
+
|
|
42
|
+
from_bounds(top, left, bottom, right)
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def bottom
|
|
46
|
+
top + height
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def bottom=(value)
|
|
50
|
+
@height = value - top
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def right
|
|
54
|
+
left + width
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def right=(value)
|
|
58
|
+
@width = value - left
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def x
|
|
62
|
+
left
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def x=(value)
|
|
66
|
+
self.left = value
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def y
|
|
70
|
+
top
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def y=(value)
|
|
74
|
+
self.top = value
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def area
|
|
78
|
+
width * height
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def center
|
|
82
|
+
Point.new(left + (width / 2.0), top + (height / 2.0))
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def bounds
|
|
86
|
+
[top, left, bottom, right]
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
def points
|
|
90
|
+
[
|
|
91
|
+
Point.new(left, top),
|
|
92
|
+
Point.new(right, top),
|
|
93
|
+
Point.new(right, bottom),
|
|
94
|
+
Point.new(left, bottom)
|
|
95
|
+
]
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
# Calculate vertical overlap with another rectangle
|
|
99
|
+
def vertical_overlap(other)
|
|
100
|
+
[0, [bottom, other.bottom].min - [top, other.top].max].max
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
# Calculate horizontal overlap with another rectangle
|
|
104
|
+
def horizontal_overlap(other)
|
|
105
|
+
[0, [right, other.right].min - [left, other.left].max].max
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
# Check if rectangles overlap vertically
|
|
109
|
+
def vertically_overlaps?(other, threshold = VERTICAL_COMPARISON_THRESHOLD)
|
|
110
|
+
overlap = vertical_overlap(other)
|
|
111
|
+
min_height = [height, other.height].min
|
|
112
|
+
return false if min_height.zero?
|
|
113
|
+
|
|
114
|
+
(overlap / min_height) >= threshold
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
# Check if rectangles overlap horizontally
|
|
118
|
+
def horizontally_overlaps?(other, threshold = 0.0)
|
|
119
|
+
overlap = horizontal_overlap(other)
|
|
120
|
+
min_width = [width, other.width].min
|
|
121
|
+
return true if min_width.zero? && overlap.zero?
|
|
122
|
+
return false if min_width.zero?
|
|
123
|
+
|
|
124
|
+
(overlap / min_width) > threshold
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
# Calculate overlap ratio (intersection area / union area)
|
|
128
|
+
def overlap_ratio(other)
|
|
129
|
+
intersection_area = vertical_overlap(other) * horizontal_overlap(other)
|
|
130
|
+
return 0.0 if intersection_area.zero?
|
|
131
|
+
|
|
132
|
+
union_area = area + other.area - intersection_area
|
|
133
|
+
return 0.0 if union_area.zero?
|
|
134
|
+
|
|
135
|
+
intersection_area / union_area
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
# Check if this rectangle contains a point
|
|
139
|
+
def contains_point?(point)
|
|
140
|
+
point.x.between?(left, right) && point.y >= top && point.y <= bottom
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
# Check if this rectangle fully contains another
|
|
144
|
+
def contains?(other)
|
|
145
|
+
left <= other.left && right >= other.right && top <= other.top && bottom >= other.bottom
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
# Check if this rectangle intersects another
|
|
149
|
+
def intersects?(other)
|
|
150
|
+
!(other.left > right || other.right < left || other.top > bottom || other.bottom < top)
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
# Merge this rectangle with another, returning the bounding box
|
|
154
|
+
def merge(other)
|
|
155
|
+
Rectangle.from_bounds(
|
|
156
|
+
[top, other.top].min,
|
|
157
|
+
[left, other.left].min,
|
|
158
|
+
[bottom, other.bottom].max,
|
|
159
|
+
[right, other.right].max
|
|
160
|
+
)
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
# Merge in place
|
|
164
|
+
def merge!(other)
|
|
165
|
+
merged = merge(other)
|
|
166
|
+
@top = merged.top
|
|
167
|
+
@left = merged.left
|
|
168
|
+
@width = merged.width
|
|
169
|
+
@height = merged.height
|
|
170
|
+
self
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
# Return intersection rectangle, or nil if no intersection
|
|
174
|
+
def intersection(other)
|
|
175
|
+
return nil unless intersects?(other)
|
|
176
|
+
|
|
177
|
+
Rectangle.from_bounds(
|
|
178
|
+
[top, other.top].max,
|
|
179
|
+
[left, other.left].max,
|
|
180
|
+
[bottom, other.bottom].min,
|
|
181
|
+
[right, other.right].min
|
|
182
|
+
)
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
def ==(other)
|
|
186
|
+
return false unless other.is_a?(Rectangle)
|
|
187
|
+
|
|
188
|
+
top == other.top && left == other.left && width == other.width && height == other.height
|
|
189
|
+
end
|
|
190
|
+
alias eql? ==
|
|
191
|
+
|
|
192
|
+
def hash
|
|
193
|
+
[top, left, width, height].hash
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
def dup
|
|
197
|
+
Rectangle.new(top, left, width, height)
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
def to_s
|
|
201
|
+
"Rectangle[top=#{top}, left=#{left}, width=#{width}, height=#{height}]"
|
|
202
|
+
end
|
|
203
|
+
|
|
204
|
+
def inspect
|
|
205
|
+
to_s
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
# Comparator for sorting by position (top to bottom, left to right)
|
|
209
|
+
def <=>(other)
|
|
210
|
+
result = top <=> other.top
|
|
211
|
+
return result unless result.zero?
|
|
212
|
+
|
|
213
|
+
left <=> other.left
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
include Comparable
|
|
217
|
+
end
|
|
218
|
+
end
|