ruby_pager 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +11 -0
- data/.idea/.rakeTasks +7 -0
- data/.idea/inspectionProfiles/Project_Default.xml +6 -0
- data/.idea/misc.xml +4 -0
- data/.idea/modules.xml +8 -0
- data/.idea/ruby_pager.iml +60 -0
- data/.idea/vcs.xml +6 -0
- data/.rspec +3 -0
- data/.travis.yml +5 -0
- data/CODE_OF_CONDUCT.md +74 -0
- data/Gemfile +6 -0
- data/Gemfile.lock +101 -0
- data/LICENSE.txt +21 -0
- data/README.md +43 -0
- data/Rakefile +6 -0
- data/TODO.txt +14 -0
- data/bin/baseline_noise +43 -0
- data/bin/console +14 -0
- data/bin/line_edit +56 -0
- data/bin/page_create +51 -0
- data/bin/region_edit +55 -0
- data/bin/setup +8 -0
- data/lib/ruby_pager/application_logger.rb +15 -0
- data/lib/ruby_pager/coord.rb +50 -0
- data/lib/ruby_pager/coords.rb +81 -0
- data/lib/ruby_pager/extendmatrix2.rb +138 -0
- data/lib/ruby_pager/gaussian_noise.rb +36 -0
- data/lib/ruby_pager/histogram.rb +102 -0
- data/lib/ruby_pager/image.rb +338 -0
- data/lib/ruby_pager/image_data.rb +53 -0
- data/lib/ruby_pager/intersect.rb +33 -0
- data/lib/ruby_pager/metadata.rb +56 -0
- data/lib/ruby_pager/page.rb +167 -0
- data/lib/ruby_pager/reading_order.rb +18 -0
- data/lib/ruby_pager/text_line.rb +72 -0
- data/lib/ruby_pager/text_region.rb +130 -0
- data/lib/ruby_pager/version.rb +3 -0
- data/lib/ruby_pager/xml.rb +90 -0
- data/lib/ruby_pager.rb +18 -0
- data/no_lines.xml +14 -0
- data/no_regions.xml +11 -0
- data/one_line.xml +21 -0
- data/ruby_pager.gemspec +45 -0
- data/test.jpg +0 -0
- data/test.xml +281 -0
- metadata +286 -0
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
|
|
2
|
+
module RubyPager
|
|
3
|
+
class Coord
|
|
4
|
+
|
|
5
|
+
attr_reader :id, :x , :y
|
|
6
|
+
def initialize(ex_index,ex_data)
|
|
7
|
+
@data = ex_data
|
|
8
|
+
@id = ex_index
|
|
9
|
+
load_coords()
|
|
10
|
+
end
|
|
11
|
+
def id=(ex_id)
|
|
12
|
+
raise(StandardError, "Got passed a negative value to update the x coord") if ex_id.to_i < 0
|
|
13
|
+
@id=ex_id.to_i
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def x=(x_coord)
|
|
17
|
+
raise(StandardError, "Got passed a negative value to update the x coord") if x_coord.to_i < 0
|
|
18
|
+
@x=x_coord.to_i
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def y=(y_coord)
|
|
22
|
+
raise(StandardError, "Got passed a negative value to update the y coord") if y_coord.to_i < 0
|
|
23
|
+
@y=y_coord.to_i
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def get_consolidated_data
|
|
27
|
+
consolidate_data()
|
|
28
|
+
return @data
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def vertical_noise(ex_std_dev)
|
|
32
|
+
noise_generator=GaussianNoise.new(@y,ex_std_dev)
|
|
33
|
+
@y=noise_generator.rand.to_i
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
private
|
|
37
|
+
|
|
38
|
+
def load_coords()
|
|
39
|
+
separate = @data.split(",")
|
|
40
|
+
raise(StandardError,"Got passed coord data that doesn't have exactly two dimensions")if separate.size !=2
|
|
41
|
+
@x = separate[0].to_i
|
|
42
|
+
@y = separate[1].to_i
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def consolidate_data
|
|
46
|
+
@data="#{@x},#{@y}"
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
end
|
|
50
|
+
end
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
|
|
2
|
+
module RubyPager
|
|
3
|
+
|
|
4
|
+
class Coords
|
|
5
|
+
def initialize(ex_coords_string)
|
|
6
|
+
@data = ex_coords_string
|
|
7
|
+
@points=Array.new
|
|
8
|
+
load_points()
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def self.blank
|
|
12
|
+
return Coords.new(Coords.blank_data)
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def size
|
|
16
|
+
return @points.size
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def [](ex_index)
|
|
20
|
+
raise(RangeError, "Index #{ex_index} is out of range") unless ex_index.between?(0,@points.size-1)
|
|
21
|
+
return @points[ex_index]
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def get_consolidated_data
|
|
25
|
+
consolidate_data()
|
|
26
|
+
return @data
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def push(ex_coord)
|
|
30
|
+
raise(ArgumentError, "Got passed a non coord object") if ex_coord.class != RubyPager::Coord
|
|
31
|
+
ex_coord.id=@points.size
|
|
32
|
+
@points.push(ex_coord)
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def clear
|
|
36
|
+
@points.clear
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def reload(ex_coord_string)
|
|
40
|
+
clear
|
|
41
|
+
@data=ex_coord_string
|
|
42
|
+
load_points
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def delete(ex_delete_index)
|
|
46
|
+
@points.delete_at(ex_delete_index)
|
|
47
|
+
review_points_index()
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def vertical_noise(ex_std_dev)
|
|
51
|
+
@points.each {|point| point.vertical_noise(ex_std_dev)}
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def self.blank_data
|
|
55
|
+
res = ""
|
|
56
|
+
return res
|
|
57
|
+
end
|
|
58
|
+
private
|
|
59
|
+
|
|
60
|
+
def load_points()
|
|
61
|
+
coord_string_array= @data.split
|
|
62
|
+
coord_string_array.each_with_index {|string_coord,index|
|
|
63
|
+
@points.push(Coord.new(index,string_coord))
|
|
64
|
+
}
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def consolidate_data
|
|
68
|
+
@data=""
|
|
69
|
+
@points.each {|point|
|
|
70
|
+
@data+=" " if @data.size() > 0
|
|
71
|
+
@data+= point.get_consolidated_data}
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def review_points_index
|
|
75
|
+
@points.each_with_index {|point,index |
|
|
76
|
+
point.id=index}
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
end
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
require 'extendmatrix'
|
|
2
|
+
|
|
3
|
+
class Matrix
|
|
4
|
+
|
|
5
|
+
def normalize!(avg)
|
|
6
|
+
square=0.0
|
|
7
|
+
# require 'ruby-debug'; debugger
|
|
8
|
+
@rows.each do |row|
|
|
9
|
+
row.each {|val| square+=(val-avg)**2}
|
|
10
|
+
end
|
|
11
|
+
deviation = Math.sqrt(square/(@rows.size*column_size).to_f)
|
|
12
|
+
deviation = 1.0 if deviation == 0.0
|
|
13
|
+
@rows.each_with_index do |row, row_index|
|
|
14
|
+
row.each_with_index do |e, col_index|
|
|
15
|
+
@rows[row_index][col_index] = (e - avg) / deviation
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
return self
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def to_histogram
|
|
22
|
+
histogram = Hash.new
|
|
23
|
+
histogram.default = 0
|
|
24
|
+
@rows.each_with_index do |row, row_index|
|
|
25
|
+
row.each_with_index do |e, col_index|
|
|
26
|
+
histogram[e]+=1
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
return histogram
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def to_normalized_histogram
|
|
33
|
+
histogram = Hash.new
|
|
34
|
+
histogram.default = 0.0
|
|
35
|
+
@rows.each_with_index do |row, row_index|
|
|
36
|
+
row.each_with_index do |e, col_index|
|
|
37
|
+
histogram[e]+=1.0
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
num_elements = (row_size * column_size).to_f
|
|
41
|
+
histogram.each do |index,value|
|
|
42
|
+
histogram[index]/=num_elements
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
return histogram
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def row_vector_accumulation!(row_index,v)
|
|
49
|
+
@rows[row_index].size.times{|i| @rows[row_index][i] += v[i] }
|
|
50
|
+
return self
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def row_vector_decrement!(row_index,v)
|
|
54
|
+
@rows[row_index].size.times{|i| @rows[row_index][i] -= v[i] }
|
|
55
|
+
return self
|
|
56
|
+
end
|
|
57
|
+
def row_vector_scalar_division!(row_index,val)
|
|
58
|
+
@rows[row_index].size.times{|i| @rows[row_index][i] /= val }
|
|
59
|
+
return self
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def row_vector_distance(row_index,v2)
|
|
63
|
+
dist = 0.0
|
|
64
|
+
@rows[row_index].size.times{|i|dist += (@rows[row_index][i]-v2[i])**2}
|
|
65
|
+
return Math.sqrt(dist)
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def to_elements
|
|
69
|
+
elements = Array.new
|
|
70
|
+
|
|
71
|
+
@rows.each_with_index do |row, row_index|
|
|
72
|
+
elements.concat(@rows[row_index])
|
|
73
|
+
end
|
|
74
|
+
return elements
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def minor_to_elements(*param)
|
|
79
|
+
case param.size
|
|
80
|
+
when 2
|
|
81
|
+
row_range, col_range = param
|
|
82
|
+
from_row = row_range.first
|
|
83
|
+
from_row += row_size if from_row < 0
|
|
84
|
+
to_row = row_range.end
|
|
85
|
+
to_row += row_size if to_row < 0
|
|
86
|
+
to_row += 1 unless row_range.exclude_end?
|
|
87
|
+
size_row = to_row - from_row
|
|
88
|
+
|
|
89
|
+
from_col = col_range.first
|
|
90
|
+
from_col += column_size if from_col < 0
|
|
91
|
+
to_col = col_range.end
|
|
92
|
+
to_col += column_size if to_col < 0
|
|
93
|
+
to_col += 1 unless col_range.exclude_end?
|
|
94
|
+
size_col = to_col - from_col
|
|
95
|
+
when 4
|
|
96
|
+
from_row, size_row, from_col, size_col = param
|
|
97
|
+
return nil if size_row < 0 || size_col < 0
|
|
98
|
+
from_row += row_size if from_row < 0
|
|
99
|
+
from_col += column_size if from_col < 0
|
|
100
|
+
else
|
|
101
|
+
Matrix.Raise ArgumentError, param.inspect
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
return nil if from_row > row_size || from_col > column_size || from_row < 0 || from_col < 0
|
|
105
|
+
|
|
106
|
+
elements = Array.new
|
|
107
|
+
|
|
108
|
+
rows = @rows[from_row, size_row].each{|row|
|
|
109
|
+
elements.concat(row[from_col, size_col])
|
|
110
|
+
}
|
|
111
|
+
return elements
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
class Vector
|
|
117
|
+
def calculate_distance(v2)
|
|
118
|
+
dist = 0.0
|
|
119
|
+
@elements.size.times{|i|dist += (@elements[i]-v2[i])**2}
|
|
120
|
+
return Math.sqrt(dist)
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
def scalar_division!(val)
|
|
124
|
+
(0...size).each{|i| @elements[i]/=val}
|
|
125
|
+
return self
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
def minus_vector!(v)
|
|
129
|
+
Vector.Raise ErrDimensionMismatch if size != v.size
|
|
130
|
+
(0...size).each{|i| @elements[i]-=v[i]}
|
|
131
|
+
return self
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
def minus_scalar!(val)
|
|
135
|
+
(0...size).each{|i| @elements[i]-=val}
|
|
136
|
+
return self
|
|
137
|
+
end
|
|
138
|
+
end
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
|
|
2
|
+
module RubyPager
|
|
3
|
+
|
|
4
|
+
class GaussianNoise
|
|
5
|
+
def initialize(ex_mean, ex_stddev, ex_rand_helper = lambda { Kernel.rand })
|
|
6
|
+
@rand_helper = ex_rand_helper
|
|
7
|
+
@mean = ex_mean
|
|
8
|
+
@stddev = ex_stddev
|
|
9
|
+
@valid = false
|
|
10
|
+
@next = 0
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def rand
|
|
14
|
+
if @valid then
|
|
15
|
+
@valid = false
|
|
16
|
+
return @next
|
|
17
|
+
else
|
|
18
|
+
@valid = true
|
|
19
|
+
x, y = self.class.gaussian(@mean, @stddev, @rand_helper)
|
|
20
|
+
@next = y
|
|
21
|
+
return x
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
private
|
|
26
|
+
def self.gaussian(mean, stddev, rand)
|
|
27
|
+
theta = 2 * Math::PI * rand.call
|
|
28
|
+
rho = Math.sqrt(-2 * Math.log(1 - rand.call))
|
|
29
|
+
scale = stddev * rho
|
|
30
|
+
x = mean + scale * Math.cos(theta)
|
|
31
|
+
y = mean + scale * Math.sin(theta)
|
|
32
|
+
return x, y
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
end
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
require 'ap'
|
|
2
|
+
#require 'ruby-debug'
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
module Utils
|
|
6
|
+
class Histogram
|
|
7
|
+
def initialize(file_path,ex_type)
|
|
8
|
+
@logger = Utils::ApplicationLogger.instance
|
|
9
|
+
@logger.level = Logger::INFO
|
|
10
|
+
@limits= []
|
|
11
|
+
@type = ex_type.to_sym
|
|
12
|
+
@histogram= Hash.new{|h,key|h[key]=Array.new}
|
|
13
|
+
@derivate= Hash.new{|h,key|h[key]=Array.new}
|
|
14
|
+
@logger.info("Loading histogram")
|
|
15
|
+
load_file(file_path)
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def load_file(file_path)
|
|
19
|
+
|
|
20
|
+
File.open(file_path, "r") do |file|
|
|
21
|
+
while (line = file.gets)
|
|
22
|
+
process_line(line)
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
puts "BUCKETS ARE #{@buckets}"
|
|
26
|
+
puts "HISTOGRAMS ARE #{@histogram.size}"
|
|
27
|
+
puts "HISTOGRAMS ARE #{@histogram[0].size}"
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def process_line(line)
|
|
31
|
+
|
|
32
|
+
values = line.split
|
|
33
|
+
@buckets = values[1].to_i if values[0] == "NumVect"
|
|
34
|
+
|
|
35
|
+
if values[0]== "NumParam"
|
|
36
|
+
@num_histograms = values[1].to_i
|
|
37
|
+
@num_histograms/=2 if @type == :derivate
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
process_limit(values[1..2].map{|val| val.to_i}) if values[0] == "Limit"
|
|
41
|
+
@logger.info("Processing data") if values[0] == "Data"
|
|
42
|
+
|
|
43
|
+
process_data_line(values.map{|val| val.to_f})if values[0] =~ /\d/
|
|
44
|
+
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def process_limit(values)
|
|
48
|
+
|
|
49
|
+
@limits.push({:start => values[0],:end => values[1]})
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def process_data_line(values)
|
|
53
|
+
if @type == :basic
|
|
54
|
+
values.each_index{|i| @histogram[i].push(values[i])}
|
|
55
|
+
else
|
|
56
|
+
values.each_index do |i|
|
|
57
|
+
if i.even?
|
|
58
|
+
@histogram[i/2].push(values[i])
|
|
59
|
+
else
|
|
60
|
+
@derivate[(i-1)/2].push(values[i])
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def to_line(hist_index,row_index)
|
|
68
|
+
#return [@limits[hist_index][:start],((@limits[hist_index][:end] - @limits[hist_index][:start])*@histogram[hist_index][row_index]*100).to_i+ @limits[hist_index][:start]]
|
|
69
|
+
|
|
70
|
+
return [@limits[hist_index][:start],((@limits[hist_index][:end] - @limits[hist_index][:start])*(@histogram[hist_index][row_index])/100).to_i+ @limits[hist_index][:start]] if hist_index < 3
|
|
71
|
+
return [@limits[hist_index][:start],((@limits[hist_index][:end] - @limits[hist_index][:start])*@histogram[hist_index][row_index]*200).to_i+ @limits[hist_index][:start]] if hist_index == 3
|
|
72
|
+
return [@limits[hist_index][:start],((@limits[hist_index][:end] - @limits[hist_index][:start])*@histogram[hist_index][row_index]*2000).to_i+ @limits[hist_index][:start]] if hist_index > 3
|
|
73
|
+
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def derivate_to_line(hist_index,row_index)
|
|
78
|
+
return [@limits[hist_index][:start],((@limits[hist_index][:end] - @limits[hist_index][:start])*@derivate[hist_index][row_index]*2000).to_i+ @limits[hist_index][:start]]
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def each_line
|
|
82
|
+
@buckets.times do |r|
|
|
83
|
+
temp = []
|
|
84
|
+
@num_histograms.times do |h|
|
|
85
|
+
temp.push(to_line(h,r))
|
|
86
|
+
end
|
|
87
|
+
yield r,temp
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
def each_derivate
|
|
92
|
+
@buckets.times do |r|
|
|
93
|
+
temp = []
|
|
94
|
+
@num_histograms.times do |h|
|
|
95
|
+
temp.push(derivate_to_line(h,r))
|
|
96
|
+
end
|
|
97
|
+
yield r,temp
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
|