ruby_pager 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +11 -0
- data/.idea/.rakeTasks +7 -0
- data/.idea/inspectionProfiles/Project_Default.xml +6 -0
- data/.idea/misc.xml +4 -0
- data/.idea/modules.xml +8 -0
- data/.idea/ruby_pager.iml +60 -0
- data/.idea/vcs.xml +6 -0
- data/.rspec +3 -0
- data/.travis.yml +5 -0
- data/CODE_OF_CONDUCT.md +74 -0
- data/Gemfile +6 -0
- data/Gemfile.lock +101 -0
- data/LICENSE.txt +21 -0
- data/README.md +43 -0
- data/Rakefile +6 -0
- data/TODO.txt +14 -0
- data/bin/baseline_noise +43 -0
- data/bin/console +14 -0
- data/bin/line_edit +56 -0
- data/bin/page_create +51 -0
- data/bin/region_edit +55 -0
- data/bin/setup +8 -0
- data/lib/ruby_pager/application_logger.rb +15 -0
- data/lib/ruby_pager/coord.rb +50 -0
- data/lib/ruby_pager/coords.rb +81 -0
- data/lib/ruby_pager/extendmatrix2.rb +138 -0
- data/lib/ruby_pager/gaussian_noise.rb +36 -0
- data/lib/ruby_pager/histogram.rb +102 -0
- data/lib/ruby_pager/image.rb +338 -0
- data/lib/ruby_pager/image_data.rb +53 -0
- data/lib/ruby_pager/intersect.rb +33 -0
- data/lib/ruby_pager/metadata.rb +56 -0
- data/lib/ruby_pager/page.rb +167 -0
- data/lib/ruby_pager/reading_order.rb +18 -0
- data/lib/ruby_pager/text_line.rb +72 -0
- data/lib/ruby_pager/text_region.rb +130 -0
- data/lib/ruby_pager/version.rb +3 -0
- data/lib/ruby_pager/xml.rb +90 -0
- data/lib/ruby_pager.rb +18 -0
- data/no_lines.xml +14 -0
- data/no_regions.xml +11 -0
- data/one_line.xml +21 -0
- data/ruby_pager.gemspec +45 -0
- data/test.jpg +0 -0
- data/test.xml +281 -0
- metadata +286 -0
|
@@ -0,0 +1,338 @@
|
|
|
1
|
+
require 'rmagick'
|
|
2
|
+
#require 'set'
|
|
3
|
+
#require 'ap'
|
|
4
|
+
|
|
5
|
+
module Utils
|
|
6
|
+
class Image
|
|
7
|
+
def initialize(ex_path)
|
|
8
|
+
@logger = Utils::ApplicationLogger.instance
|
|
9
|
+
@image_path = ex_path
|
|
10
|
+
@logger.info("Loading image")
|
|
11
|
+
@img = Magick::ImageList.new(@image_path)
|
|
12
|
+
@increment = 200
|
|
13
|
+
@img_aux = Magick::Image.new(@img.columns+@increment,@img.rows){ self.background_color = "white" }
|
|
14
|
+
@intensity_calculated = false
|
|
15
|
+
@integral_calculated = false
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def calculate_intensity_matrix
|
|
19
|
+
unless @intensity_calculated
|
|
20
|
+
@logger.info("Calculating intensity matrix")
|
|
21
|
+
@intensity_matrix = obtain_image_intensity_matrix #obtains the gray channel of the image
|
|
22
|
+
@logger.info("Calculating normalized intensity histogram")
|
|
23
|
+
@histogram = @intensity_matrix.to_normalized_histogram
|
|
24
|
+
@intensity_calculated=true
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def rows
|
|
30
|
+
@img.rows
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def columns
|
|
34
|
+
@img.columns
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def display
|
|
39
|
+
@img.display {server_name = @image_path; delay=1}
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def mean_of_region(from_x=@integral_image.row_size-1,from_y=@integral_image.column_size-1,region_height=@integral_image.row_size-1,region_width=@integral_image.column_size-1)
|
|
43
|
+
#if no parameters are given the input variables get assigned the default values indicated in the above specification and hence return the mean of the whole image
|
|
44
|
+
#raise "Region size provided exceeds the image boundaries" if from_y - region_width < 0 or from_x - region_height < 0
|
|
45
|
+
|
|
46
|
+
=begin
|
|
47
|
+
|
|
48
|
+
[1]-------[2] height + 1
|
|
49
|
+
| |
|
|
50
|
+
| | basic formula is [4] - [2] - [3] + [1] but if [2] or [3] coincide with boundary size then
|
|
51
|
+
| | we do not have to delete the region beyond width or height ( as there is no region beyond)
|
|
52
|
+
| | hence we do not have to add [1] to recover that corner section from being substracted twice
|
|
53
|
+
[3]-------[4]
|
|
54
|
+
width +1
|
|
55
|
+
=end
|
|
56
|
+
|
|
57
|
+
res = @integral_image[from_x,from_y]
|
|
58
|
+
region_cuts = 0
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
if from_y - region_width > 0
|
|
62
|
+
res -= @integral_image[from_x,from_y-(region_width+1)]
|
|
63
|
+
region_cuts+=1
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
if from_x - region_height > 0
|
|
67
|
+
res -= @integral_image[from_x-(region_height+1),from_y]
|
|
68
|
+
region_cuts+=1
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
if region_cuts == 2
|
|
72
|
+
res += @integral_image[from_x-(region_height+1),from_y-(region_width+1)]
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
return res.to_f/(region_height*region_width).to_f
|
|
76
|
+
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
def standard_deviation_of_region(mean,from_x=@integral_image.row_size-1,from_y=@integral_image.column_size-1,region_height=@integral_image.row_size-1,region_width=@integral_image.column_size-1)
|
|
80
|
+
#if no parameters are given the input variables get assigned the default values indicated in the above specification and hence return the deviation of the whole image
|
|
81
|
+
#raise "Region size provided exceeds the image boundaries" if from_y - region_width < 0 or from_x - region_height < 0
|
|
82
|
+
|
|
83
|
+
res = @sum_of_squares_intensity_matrix[from_x,from_y]
|
|
84
|
+
region_cuts = 0
|
|
85
|
+
|
|
86
|
+
if from_y - region_width > 0
|
|
87
|
+
res -= @sum_of_squares_intensity_matrix[from_x,from_y-(region_width+1)]
|
|
88
|
+
region_cuts+=1
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
if from_x - region_height > 0
|
|
92
|
+
res -= @sum_of_squares_intensity_matrix[from_x-(region_height+1),from_y]
|
|
93
|
+
region_cuts+=1
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
if region_cuts == 2
|
|
97
|
+
res += @sum_of_squares_intensity_matrix[from_x-(region_height+1),from_y-(region_width+1)]
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
#region_width = region_width - from_y if from_y - region_width < 0
|
|
101
|
+
# region_height = region_height - from_x if from_x - region_height < 0
|
|
102
|
+
|
|
103
|
+
return Math.sqrt(((res.to_f/(region_height*region_width).to_f)-mean**2).abs)
|
|
104
|
+
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
def obtain_image_intensity_matrix(img=@img)
|
|
108
|
+
return Matrix.rows(img.export_pixels(0,0,img.columns,img.rows,"I").each_slice(img.columns).reduce([]) {|x,y| x<<y })
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
def intensity_matrix_to_image(intensity_matrix=@intensity_matrix)
|
|
112
|
+
@logger.info("Transforiming intensity matrix to image")
|
|
113
|
+
return Magick::Image.constitute(intensity_matrix.column_size, intensity_matrix.row_size,"I", intensity_matrix.to_elements)
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
def to_file(path,img=@img)
|
|
117
|
+
@logger.info("Saving image to path: #{path}")
|
|
118
|
+
img.write(path)
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
def calculate_integral_images(intensity_matrix=@intensity_matrix)
|
|
122
|
+
if not @integral_calculated
|
|
123
|
+
calculate_intensity_matrix unless @intensity_calculated
|
|
124
|
+
@logger.info("Calculating integral images")
|
|
125
|
+
res = intensity_matrix.clone
|
|
126
|
+
res2= intensity_matrix.clone
|
|
127
|
+
res.each_with_index do |val,x,y|
|
|
128
|
+
res2[x,y]=res2[x,y]**2
|
|
129
|
+
if x !=0 and y != 0
|
|
130
|
+
res[x,y]+=res[x-1,y]+res[x,y-1]-res[x-1,y-1]
|
|
131
|
+
res2[x,y]+=res2[x-1,y]+res2[x,y-1]-res2[x-1,y-1]
|
|
132
|
+
elsif x ==0 and y != 0
|
|
133
|
+
res[x,y]+=res[x,y-1]
|
|
134
|
+
res2[x,y]+=res2[x,y-1]
|
|
135
|
+
elsif x !=0 and y == 0
|
|
136
|
+
res[x,y]+=res[x-1,y]
|
|
137
|
+
res2[x,y]+=res2[x-1,y]
|
|
138
|
+
end
|
|
139
|
+
end
|
|
140
|
+
@integral_calculated=true
|
|
141
|
+
@sum_of_squares_intensity_matrix = res2
|
|
142
|
+
@integral_image = res
|
|
143
|
+
end
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
def apply_threshold(threshold=0.5,intensity_matrix=@intensity_matrix)
|
|
147
|
+
threshold *= 255
|
|
148
|
+
intensity_matrix.map{|val| val < threshold ? 0.0 : 1.0 }
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
def simple_binarization(intensity_matrix=@intensity_matrix,*params)
|
|
152
|
+
#1. select initial estimate T (between mean gray value and (min+max)/2, depending on the expected area of the objects)
|
|
153
|
+
median=(intensity_matrix.max+intensity_matrix.max)/2
|
|
154
|
+
mean=mean_of_region #uses the method with integral image, no input params means default input params
|
|
155
|
+
threshold_old = Random.new.rand([mean,median].min..[mean,median].max)#selects a random value between the median and the mean
|
|
156
|
+
threshold_new = threshold_old
|
|
157
|
+
begin
|
|
158
|
+
threshold_old = threshold_new
|
|
159
|
+
mean_lower = 0; count_lower = 0 ; mean_higher = 0; count_higher = 0;
|
|
160
|
+
#2. compute average value μ1 of pixels > T and μ2 of pixels ≤ T
|
|
161
|
+
intensity_matrix.each do |val|
|
|
162
|
+
if val < threshold_old
|
|
163
|
+
mean_lower+=val
|
|
164
|
+
count_lower+=1.0
|
|
165
|
+
else
|
|
166
|
+
mean_higher+=val
|
|
167
|
+
count_higher+=1.0
|
|
168
|
+
end
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
mean_lower /= count_lower
|
|
172
|
+
mean_higher /= count_higher
|
|
173
|
+
|
|
174
|
+
#3. T ← (μ1 + μ2)/2
|
|
175
|
+
threshold_new = (mean_lower + mean_higher) / 2
|
|
176
|
+
|
|
177
|
+
end while (threshold_old - threshold_new).abs > 0.0001 #4. repeat from (2) until convergence
|
|
178
|
+
|
|
179
|
+
threshold_new/=255.0
|
|
180
|
+
|
|
181
|
+
return apply_threshold(threshold_new,intensity_matrix)
|
|
182
|
+
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
def otsu_binarization(intensity_matrix=@intensity_matrix,*params)
|
|
186
|
+
|
|
187
|
+
percentage_calculated = 0
|
|
188
|
+
csum=0.0
|
|
189
|
+
sbmax=0.0
|
|
190
|
+
threshold = -1
|
|
191
|
+
sum = @histogram.inject(0.0){|res,(key,val)| res += key * val} #total sum of the histogram, in one line :-)
|
|
192
|
+
@histogram.each do |key,value|
|
|
193
|
+
#we only visit the keys with values, hence we do not need to check if we have visited all filled buckets
|
|
194
|
+
percentage_calculated += value.to_f
|
|
195
|
+
percentage_pending = 1.0- percentage_calculated
|
|
196
|
+
|
|
197
|
+
csum += (key * value).to_f
|
|
198
|
+
m1 = csum / percentage_calculated.to_f
|
|
199
|
+
m2 = (sum - csum) / percentage_pending.to_f
|
|
200
|
+
sb = (percentage_calculated * percentage_pending *((m1 - m2)**2)).to_f
|
|
201
|
+
if (sb > sbmax)
|
|
202
|
+
sbmax = sb
|
|
203
|
+
threshold = key
|
|
204
|
+
end
|
|
205
|
+
end
|
|
206
|
+
threshold/=255.0
|
|
207
|
+
return apply_threshold(threshold,intensity_matrix)
|
|
208
|
+
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
def niblack_binarization(intensity_matrix=@intensity_matrix,region_height=30,region_width=30)
|
|
212
|
+
|
|
213
|
+
vec = intensity_matrix.each_with_index.map do |val,x,y|
|
|
214
|
+
|
|
215
|
+
from_x = x+((region_height-1)/2) > intensity_matrix.row_size-1 ? intensity_matrix.row_size-1 : x+((region_height-1)/2)
|
|
216
|
+
from_y = y+((region_width-1)/2) > intensity_matrix.column_size-1 ? intensity_matrix.column_size-1 : y+((region_width-1)/2)
|
|
217
|
+
|
|
218
|
+
@logger.debug("Im on #{x} #{y} - region is going to start on #{from_x} #{from_y}")
|
|
219
|
+
|
|
220
|
+
mean = mean_of_region(from_x,from_y,region_height,region_width).to_f
|
|
221
|
+
|
|
222
|
+
threshold = (mean - 0.2 * standard_deviation_of_region(mean,from_x,from_y,region_height,region_width).to_f).to_f
|
|
223
|
+
val < threshold ? 0.0 : 1.0
|
|
224
|
+
end
|
|
225
|
+
|
|
226
|
+
return Matrix.rows(vec.each_slice(intensity_matrix.column_size).reduce([]) {|x,y| x += [y] })
|
|
227
|
+
|
|
228
|
+
end
|
|
229
|
+
|
|
230
|
+
def binarize!(method,*params)
|
|
231
|
+
@logger.info("Performing #{method} binarization on image")
|
|
232
|
+
calculate_intensity_matrix
|
|
233
|
+
@intensity_matrix = send(method,@intensity_matrix,*params)
|
|
234
|
+
@logger.info("IMAGE CHANGED!! Recalculating internal representation")
|
|
235
|
+
@img=intensity_matrix_to_image(@intensity_matrix)
|
|
236
|
+
calculate_integral_images
|
|
237
|
+
@histogram = @intensity_matrix.to_normalized_histogram
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
def draw_intensity_matrix(intensity_matrix=@intensity_matrix)
|
|
241
|
+
@logger.info("Drawing intensity matrix")
|
|
242
|
+
intensity_matrix_to_image(intensity_matrix).display
|
|
243
|
+
end
|
|
244
|
+
|
|
245
|
+
def draw_binary_image(method,*params)
|
|
246
|
+
@logger.info("Performing #{method} binarization on image")
|
|
247
|
+
bin_mat = send(method,@intensity_matrix,*params)
|
|
248
|
+
draw_intensity_matrix(bin_mat)
|
|
249
|
+
end
|
|
250
|
+
|
|
251
|
+
def save_binary_image(path,method,*params)
|
|
252
|
+
@logger.info("Performing #{method} binarization on image")
|
|
253
|
+
bin_mat = send(method)
|
|
254
|
+
to_file(path,intensity_matrix_to_image(bin_mat))
|
|
255
|
+
end
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
def detect_connected_regions(neighbourhood,output_file,intensity_matrix=@intensity_matrix)
|
|
260
|
+
label_assignment_matrix = Matrix.build(intensity_matrix.row_size,intensity_matrix.column_size){0}
|
|
261
|
+
labels_translation = Hash.new
|
|
262
|
+
labels_translation[0]=0 #label of the empty regions
|
|
263
|
+
@logger.info("Detecting connected regions")
|
|
264
|
+
intensity_matrix.each_with_index do |val,x,y|
|
|
265
|
+
|
|
266
|
+
next unless val.zero?
|
|
267
|
+
|
|
268
|
+
neighbour_labels = neighbour_labels_set(label_assignment_matrix,x,y,neighbourhood)
|
|
269
|
+
|
|
270
|
+
if neighbour_labels.empty?
|
|
271
|
+
new_label=labels_translation.keys.last+1
|
|
272
|
+
labels_translation[new_label]=new_label
|
|
273
|
+
label_assignment_matrix[x,y]=new_label
|
|
274
|
+
else
|
|
275
|
+
#we always assign the label with least number, so that level equivalence translation will be linear
|
|
276
|
+
new_label=neighbour_labels.first
|
|
277
|
+
label_assignment_matrix[x,y]=new_label
|
|
278
|
+
neighbour_labels.each{|label| labels_translation[label]=new_label } if neighbour_labels.size > 1
|
|
279
|
+
end
|
|
280
|
+
end
|
|
281
|
+
|
|
282
|
+
@logger.info("Translating region labels as per equivalence detected")
|
|
283
|
+
labels_translation.each{|key,val|labels_translation[key]=labels_translation[val] if key != val}
|
|
284
|
+
draw_connected_regions(extract_regions(label_assignment_matrix,labels_translation),output_file)
|
|
285
|
+
|
|
286
|
+
end
|
|
287
|
+
|
|
288
|
+
def neighbour_labels_set(label_assignment_matrix,x,y,neighbourhood)
|
|
289
|
+
result = SortedSet.new
|
|
290
|
+
|
|
291
|
+
result.add(label_assignment_matrix[x-1,y])
|
|
292
|
+
result.add(label_assignment_matrix[x,y-1])
|
|
293
|
+
|
|
294
|
+
if neighbourhood == 8
|
|
295
|
+
result.add(label_assignment_matrix[x-1,y-1])
|
|
296
|
+
result.add(label_assignment_matrix[x-1,y+1])
|
|
297
|
+
end
|
|
298
|
+
|
|
299
|
+
result.delete(nil)
|
|
300
|
+
result.delete(0)
|
|
301
|
+
|
|
302
|
+
return result
|
|
303
|
+
end
|
|
304
|
+
|
|
305
|
+
def extract_regions(label_assignment_matrix,label_translation)
|
|
306
|
+
@logger.info("Extracting regions to drawable format")
|
|
307
|
+
regions= Hash.new{|h,key|h[key]=Array.new} #Array that will initialize an array for the key if not specified yet
|
|
308
|
+
label_assignment_matrix.each_with_index do |val,x,y|
|
|
309
|
+
unless label_translation[val].zero?
|
|
310
|
+
region_target=label_translation[val]
|
|
311
|
+
regions[region_target].push([y,x])
|
|
312
|
+
end
|
|
313
|
+
end
|
|
314
|
+
|
|
315
|
+
return regions
|
|
316
|
+
end
|
|
317
|
+
|
|
318
|
+
def draw_connected_regions(region_list,output_file)
|
|
319
|
+
@logger.info("Drawing regions")
|
|
320
|
+
colors=["red","seagreen","royalblue","purple","sienna","steelblue","khaki","lightcoral","olive"]
|
|
321
|
+
region_list.each_value do |region|
|
|
322
|
+
gc = Magick::Draw.new
|
|
323
|
+
gc.stroke_width = 1
|
|
324
|
+
gc.fill=colors.rotate![0]
|
|
325
|
+
region.each do |point|
|
|
326
|
+
gc.point(point[0],point[1])
|
|
327
|
+
end
|
|
328
|
+
gc.draw(@img)
|
|
329
|
+
end
|
|
330
|
+
@img.display
|
|
331
|
+
to_file(output_file,@img)
|
|
332
|
+
end
|
|
333
|
+
|
|
334
|
+
private :niblack_binarization , :otsu_binarization, :simple_binarization, :apply_threshold, :calculate_integral_images
|
|
335
|
+
end
|
|
336
|
+
|
|
337
|
+
|
|
338
|
+
end
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
|
|
2
|
+
module RubyPager
|
|
3
|
+
|
|
4
|
+
class Image_Data
|
|
5
|
+
|
|
6
|
+
attr_reader :file_name ,:width ,:height
|
|
7
|
+
def initialize(ex_data)
|
|
8
|
+
@data=Hash.new
|
|
9
|
+
@data["@imageFilename"]=ex_data["@imageFilename"]
|
|
10
|
+
@data["@imageWidth"]=ex_data["@imageWidth"]
|
|
11
|
+
@data["@imageHeight"]=ex_data["@imageHeight"]
|
|
12
|
+
@file_name = @data["@imageFilename"]
|
|
13
|
+
@width = @data["@imageWidth"].to_i
|
|
14
|
+
@height = @data["@imageHeight"].to_i
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def file_name= (ex_file_name)
|
|
18
|
+
raise(ArgumentError, "Got passed a non string object") if ex_file_name.class != String
|
|
19
|
+
@file_name=ex_file_name
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def width=(ex_width)
|
|
23
|
+
raise(ArgumentError, "Got passed a non integer object") if ex_width.class != Fixnum or ex_width < 0
|
|
24
|
+
@width=ex_width
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def height=(ex_height)
|
|
28
|
+
raise(ArgumentError, "Got passed a non integer object") if ex_height.class != Fixnum or ex_height < 0
|
|
29
|
+
@height=ex_height
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def self.blank_data
|
|
33
|
+
res=Hash.new
|
|
34
|
+
res["@imageFilename"]=""
|
|
35
|
+
res["@imageWidth"]="0"
|
|
36
|
+
res["@imageHeight"]="0"
|
|
37
|
+
return res
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def get_consolidated_data
|
|
41
|
+
consolidate_data()
|
|
42
|
+
return @data
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
private
|
|
46
|
+
|
|
47
|
+
def consolidate_data()
|
|
48
|
+
@data["@imageFilename"]=@file_name
|
|
49
|
+
@data["@imageWidth"]=@width.to_s
|
|
50
|
+
@data["@imageHeight"]=@height.to_s
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
|
|
2
|
+
require 'rgeo'
|
|
3
|
+
|
|
4
|
+
module RubyPager
|
|
5
|
+
class Intersect
|
|
6
|
+
|
|
7
|
+
def initialize(ex_object1, ex_object2, ex_mode)
|
|
8
|
+
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def intersects?
|
|
12
|
+
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def overlap_percentage
|
|
16
|
+
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
private
|
|
20
|
+
|
|
21
|
+
def run
|
|
22
|
+
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def calculate_polygon_line_intersection
|
|
26
|
+
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def calculate_polygon_polygon_intersection
|
|
30
|
+
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
module RubyPager
|
|
2
|
+
class Metadata
|
|
3
|
+
|
|
4
|
+
attr_reader :creator
|
|
5
|
+
def initialize(ex_data)
|
|
6
|
+
@data=ex_data
|
|
7
|
+
@creator = @data["Creator"]
|
|
8
|
+
@created = DateTime.parse(@data["Created"])
|
|
9
|
+
@lastchange = DateTime.parse(@data["LastChange"])
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def creator= (ex_creator)
|
|
13
|
+
raise(ArgumentError, "Got passed a non string object") if ex_creator.class != String
|
|
14
|
+
@creator=ex_creator
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def created
|
|
18
|
+
return @created.strftime("%FT%T")
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def created= (ex_created)
|
|
22
|
+
raise(ArgumentError, "Got passed a non DateTime object") if ex_created.class != DateTime
|
|
23
|
+
@created=ex_created
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def lastchange
|
|
27
|
+
return @lastchange.strftime("%FT%T")
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def lastchange= (ex_lastchange)
|
|
31
|
+
raise(ArgumentError, "Got passed a non DateTime object") if ex_lastchange.class != DateTime
|
|
32
|
+
@lastchange=ex_lastchange
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def self.blank_data
|
|
36
|
+
res=Hash.new
|
|
37
|
+
res["Creator"]="Ruby Page"
|
|
38
|
+
res["Created"]=DateTime.now.strftime("%FT%T")
|
|
39
|
+
res["LastChange"]=DateTime.now.strftime("%FT%T")
|
|
40
|
+
return res
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def get_consolidated_data
|
|
44
|
+
consolidate_data()
|
|
45
|
+
return @data
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
private
|
|
49
|
+
|
|
50
|
+
def consolidate_data()
|
|
51
|
+
@data["Creator"]=@creator
|
|
52
|
+
@data["Created"]=self.created
|
|
53
|
+
@data["LastChange"]=self.lastchange
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
end
|
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
|
|
2
|
+
module RubyPager
|
|
3
|
+
|
|
4
|
+
class Page
|
|
5
|
+
attr_reader :file_name, :metadata, :image_data, :xmlns, :xmlns_xsi, :xsi_schemaLocation
|
|
6
|
+
def initialize(ex_file_name,ex_data)
|
|
7
|
+
@logger = Utils::ApplicationLogger.instance
|
|
8
|
+
@logger.info("Loading data from XML #{ex_file_name}")
|
|
9
|
+
@file_name=ex_file_name
|
|
10
|
+
@data=ex_data
|
|
11
|
+
@text_regions=Hash.new
|
|
12
|
+
@metadata=Metadata.new(@data["PcGts"]["Metadata"])
|
|
13
|
+
load_xml_schema_data
|
|
14
|
+
load_xml_image_info
|
|
15
|
+
load_text_regions
|
|
16
|
+
@reading_order=Reading_Order.new(@data["PcGts"]["Page"]["ReadingOrder"])
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def self.load_from_xml(ex_file_name)
|
|
20
|
+
logger = Utils::ApplicationLogger.instance
|
|
21
|
+
logger.info("Loading XML #{ex_file_name}")
|
|
22
|
+
data=XML.load(ex_file_name)
|
|
23
|
+
return Page.new(ex_file_name,data)
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def self.create_from_image(ex_image_name)
|
|
27
|
+
logger = Utils::ApplicationLogger.instance
|
|
28
|
+
logger.info("Generating XML for image #{ex_image_name}")
|
|
29
|
+
image=Utils::Image.new(ex_image_name)
|
|
30
|
+
data=self.blank_data
|
|
31
|
+
data["PcGts"]["Page"]["@imageFilename"]=ex_image_name
|
|
32
|
+
data["PcGts"]["Page"]["@imageWidth"]=image.rows.to_s
|
|
33
|
+
data["PcGts"]["Page"]["@imageHeight"]=image.columns.to_s
|
|
34
|
+
return Page.new(ex_image_name,data)
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def self.blank(ex_image_name)
|
|
38
|
+
data=self.blank_data
|
|
39
|
+
return Page.new(ex_image_name,data)
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def create_full_page_region(region_id)
|
|
43
|
+
@logger.info("Creating full page region #{region_id}")
|
|
44
|
+
data=Text_Region.blank_data
|
|
45
|
+
raise(ArgumentError, "Region id #{region_id} is already in use") if @text_regions.has_key? region_id
|
|
46
|
+
data["Coords"]["@points"]="0,0 0,#{@image_data.width} #{@image_data.height},#{@image_data.width} #{@image_data.height},0"
|
|
47
|
+
data["@id"]=region_id
|
|
48
|
+
push(Text_Region.new(0,data))
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def save(ex_save_name=@file_name)
|
|
52
|
+
@logger.info("Saving page object #{@file_name} to #{ex_save_name}")
|
|
53
|
+
consolidate_data
|
|
54
|
+
XML.save(ex_save_name, @data)
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def [](ex_key)
|
|
59
|
+
raise(RangeError, "Index #{ex_key} is out of range") unless @text_regions.has_key? ex_key
|
|
60
|
+
return @text_regions[ex_key]
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def has_region?(ex_region_id)
|
|
64
|
+
return @text_regions.has_key? ex_region_id
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def each_region
|
|
68
|
+
@text_regions.values.each {|text_region| yield text_region}
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def delete(ex_region_id)
|
|
72
|
+
if has_region? ex_region_id
|
|
73
|
+
@logger.info("Deleting text region #{ex_region_id}")
|
|
74
|
+
@text_regions.delete(ex_region_id)
|
|
75
|
+
review_regions_index()
|
|
76
|
+
else
|
|
77
|
+
raise(ArgumentError, "Region id #{ex_region_id} does not exist so it can not be deleted")
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def push(ex_text_region)
|
|
82
|
+
raise(ArgumentError, "Got passed a non text region object") if ex_text_region.class != RubyPager::Text_Region
|
|
83
|
+
raise(ArgumentError, "Region id #{ex_text_region.id} is already in use") if @text_regions.has_key? ex_text_region.id
|
|
84
|
+
ex_text_region.index=@text_regions.size
|
|
85
|
+
@text_regions[ex_text_region.id]=ex_text_region
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def size
|
|
89
|
+
return @text_regions.size
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
def self.blank_data
|
|
93
|
+
logger = Utils::ApplicationLogger.instance
|
|
94
|
+
logger.info("Creating blank XML data")
|
|
95
|
+
res=Hash.new
|
|
96
|
+
res["PcGts"]=Hash.new
|
|
97
|
+
res["PcGts"]["Metadata"]=Metadata.blank_data
|
|
98
|
+
res["PcGts"]["@xmlns:xsi"]="http://www.w3.org/2001/XMLSchema-instance"
|
|
99
|
+
res["PcGts"]["@xmlns"]="http://schema.primaresearch.org/PAGE/gts/pagecontent/2013-07-15"
|
|
100
|
+
res["PcGts"]["@xsi:schemaLocation"]="http://schema.primaresearch.org/PAGE/gts/pagecontent/2013-07-15 http://schema.primaresearch.org/PAGE/gts/pagecontent/2013-07-15/pagecontent.xsd"
|
|
101
|
+
res["PcGts"]["@pcGtsId"]=""
|
|
102
|
+
res["PcGts"]["Page"]=Hash.new
|
|
103
|
+
res["PcGts"]["Page"]["TextRegion"]=Array.new
|
|
104
|
+
res["PcGts"]["Page"]["ReadingOrder"]=Hash.new
|
|
105
|
+
res["PcGts"]["Page"]["@imageFilename"]="blank.jpg"
|
|
106
|
+
res["PcGts"]["Page"]["@imageWidth"]="0"
|
|
107
|
+
res["PcGts"]["Page"]["@imageHeight"]="0"
|
|
108
|
+
return res
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
private
|
|
112
|
+
|
|
113
|
+
def review_regions_index
|
|
114
|
+
index =0
|
|
115
|
+
@text_regions.values.each {|region|
|
|
116
|
+
region.index=index
|
|
117
|
+
index+=1
|
|
118
|
+
}
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
def load_text_regions
|
|
122
|
+
if @data["PcGts"]["Page"]["TextRegion"]
|
|
123
|
+
if @data["PcGts"]["Page"]["TextRegion"].class == Array
|
|
124
|
+
region_array= @data["PcGts"]["Page"]["TextRegion"]
|
|
125
|
+
region_array.each_with_index {|text_region,index |
|
|
126
|
+
@text_regions[text_region["@id"]]=Text_Region.new(index,text_region)
|
|
127
|
+
}
|
|
128
|
+
end
|
|
129
|
+
if @data["PcGts"]["Page"]["TextRegion"].class == Hash
|
|
130
|
+
text_region= @data["PcGts"]["Page"]["TextRegion"]
|
|
131
|
+
@text_regions[text_region["@id"]]=Text_Region.new(0,text_region)
|
|
132
|
+
end
|
|
133
|
+
end
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
def load_xml_schema_data
|
|
137
|
+
@xmlns_xsi=@data["PcGts"]["@xmlns:xsi"]
|
|
138
|
+
@xmlns= @data["PcGts"]["@xmlns"]
|
|
139
|
+
@xsi_schemaLocation=@data["PcGts"]["@xsi:schemaLocation"]
|
|
140
|
+
@pc_gts_id=@data["PcGts"]["@pcGtsId"]
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
def load_xml_image_info
|
|
144
|
+
@image_data = Image_Data.new(@data["PcGts"]["Page"])
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
def consolidate_data
|
|
148
|
+
@data["PcGts"]["Metadata"]=@metadata.get_consolidated_data
|
|
149
|
+
@data["PcGts"]["@xmlns:xsi"]=@xmlns_xsi
|
|
150
|
+
@data["PcGts"]["@xmlns"]=@xmlns
|
|
151
|
+
@data["PcGts"]["@xsi:schemaLocation"]=@xsi_schemaLocation
|
|
152
|
+
@data["PcGts"]["@pcGtsId"]=@pc_gts_id
|
|
153
|
+
@data["PcGts"]["Page"]["ReadingOrder"]=@reading_order.get_consolidated_data
|
|
154
|
+
img_cons = @image_data.get_consolidated_data
|
|
155
|
+
@data["PcGts"]["Page"]["@imageFilename"]=img_cons["@imageFilename"]
|
|
156
|
+
@data["PcGts"]["Page"]["@imageWidth"]=img_cons["@imageWidth"]
|
|
157
|
+
@data["PcGts"]["Page"]["@imageHeight"]=img_cons["@imageHeight"]
|
|
158
|
+
@data["PcGts"]["Page"]["TextRegion"]=Array.new
|
|
159
|
+
@text_regions.values.each {|text_region|
|
|
160
|
+
@data["PcGts"]["Page"]["TextRegion"].push(text_region.get_consolidated_data)
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
end
|