ruby_pager 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,72 @@
1
+
2
+ module RubyPager
3
+
4
+ class Text_Line
5
+ attr_reader :id, :index ,:text ,:contour, :baseline
6
+ def initialize(ex_index, ex_data)
7
+ @data=ex_data
8
+ @index=ex_index
9
+ @id = @data["@id"]
10
+ @text = @data["TextEquiv"]["Unicode"]
11
+ load_coords()
12
+ load_baseline()
13
+ end
14
+
15
+ def self.blank
16
+ return Text_Line.new(0,Text_Line.blank_data)
17
+ end
18
+
19
+ def id= ex_id
20
+ raise(ArgumentError, "Got passed a non string object") if ex_id.class != String
21
+ @id=ex_id
22
+ end
23
+
24
+ def text= ex_text
25
+ raise(ArgumentError, "Got passed a non string object") if ex_text.class != String
26
+ @text=ex_text
27
+ end
28
+
29
+ def index=(ex_index)
30
+ raise(ArgumentError, "Got passed a negative value to update the index") if ex_index.to_i < 0
31
+ @index=ex_index.to_i
32
+ end
33
+
34
+ def get_consolidated_data
35
+ consolidate_data()
36
+ return @data
37
+ end
38
+
39
+ def baseline_vertical_noise(ex_std_dev)
40
+ @baseline.vertical_noise(ex_std_dev)
41
+ end
42
+
43
+ def self.blank_data
44
+ res=Hash.new
45
+ res["@id"]=""
46
+ res["TextEquiv"]=Hash.new
47
+ res["TextEquiv"]["Unicode"]=""
48
+ res["Baseline"]=Hash.new
49
+ res["Baseline"]["@points"]=Coords.blank_data
50
+ res["Coords"]=Hash.new
51
+ res["Coords"]["@points"]=Coords.blank_data
52
+ return res
53
+ end
54
+ private
55
+
56
+ def load_coords()
57
+ @contour = Coords.new(@data["Coords"]["@points"]);
58
+ end
59
+
60
+ def load_baseline()
61
+ @baseline = Coords.new(@data["Baseline"]["@points"]);
62
+ end
63
+
64
+ def consolidate_data()
65
+ @data["@id"]=@id
66
+ @data["TextEquiv"]["Unicode"]=@text
67
+ @data["Baseline"]["@points"]=@baseline.get_consolidated_data
68
+ @data["Coords"]["@points"]=@contour.get_consolidated_data
69
+ end
70
+ end
71
+
72
+ end
@@ -0,0 +1,130 @@
1
+
2
+ module RubyPager
3
+
4
+ class Text_Region
5
+ attr_reader :id, :index, :custom, :contour
6
+ def initialize(ex_index, ex_data)
7
+ @logger = Utils::ApplicationLogger.instance
8
+ @data=ex_data
9
+ @index=ex_index
10
+ @id = @data["@id"]
11
+ @custom=@data["@custom"]
12
+ @text_lines=Hash.new
13
+ load_text_lines()
14
+ load_contour()
15
+ end
16
+
17
+ def self.blank
18
+ return Text_Region.new(0,Text_Region.blank_data)
19
+ end
20
+
21
+ def size
22
+ return @text_lines.size
23
+ end
24
+
25
+ def id= (ex_id)
26
+ raise(ArgumentError, "Got passed a non string object") if ex_id.class != String
27
+ @id=ex_id
28
+ end
29
+
30
+ def index= (ex_index)
31
+ raise(ArgumentError, "Got passed a negative value to update the index") if ex_index.to_i < 0
32
+ @index=ex_index.to_i
33
+ end
34
+
35
+ def [](ex_key)
36
+ raise(RangeError, "Index #{ex_key} is out of range") unless @text_lines.has_key? ex_key
37
+ return @text_lines[ex_key]
38
+ end
39
+
40
+ def has_line? line_id
41
+ return @text_lines.has_key? line_id
42
+ end
43
+
44
+ def delete(ex_line_id)
45
+ if has_line? ex_line_id
46
+ @logger.info("Deleting text region #{ex_line_id}")
47
+ @text_lines.delete(ex_line_id)
48
+ review_lines_index
49
+ else
50
+ raise(ArgumentError, "Line id #{ex_line_id} does not exist so it can not be deleted")
51
+ end
52
+ end
53
+
54
+ def clear_text_lines()
55
+ @text_lines.clear
56
+ end
57
+
58
+ def push(ex_line)
59
+ raise(ArgumentError, "Got passed a non text line object") if ex_line.class != RubyPager::Text_Line
60
+ raise(ArgumentError, "Text line id already in use") if @text_lines.has_key? ex_line.id
61
+ ex_line.index=@text_lines.size
62
+ @text_lines[ex_line.id]=ex_line
63
+
64
+ end
65
+
66
+ def get_consolidated_data
67
+ consolidate_data()
68
+ return @data
69
+ end
70
+
71
+ def self.blank_data
72
+ res=Hash.new
73
+ res["@id"]=""
74
+ res["@custom"]=""
75
+ res["TextLine"]=Array.new
76
+ res["Coords"]=Hash.new
77
+ res["Coords"]["@points"]=Coords.blank_data
78
+ return res
79
+ end
80
+
81
+ def baseline_vertical_noise(ex_std_dev)
82
+ @text_lines.values.each {|text_line| text_line.baseline_vertical_noise(ex_std_dev) }
83
+ end
84
+
85
+ private
86
+
87
+ def load_text_lines()
88
+ if @data["TextLine"]
89
+ if @data["TextLine"].class == Array
90
+ line_array=@data["TextLine"]
91
+ line_array.each_with_index {|text_line,index |
92
+ @text_lines[text_line["@id"]]=Text_Line.new(index,text_line)
93
+ }
94
+ end
95
+
96
+ if @data["TextLine"].class == Hash
97
+ text_line=@data["TextLine"]
98
+ @text_lines[text_line["@id"]]=Text_Line.new(0,text_line)
99
+ end
100
+ end
101
+ end
102
+
103
+ def load_contour()
104
+ @contour = Coords.new(@data["Coords"]["@points"]);
105
+ end
106
+
107
+ def consolidate_data
108
+ @data["@custom"]=@custom
109
+ @data["@id"]=@id
110
+ @data["Coords"]["@points"]=@contour.get_consolidated_data
111
+ @data["TextLine"].clear if @data["TextLine"] and @data["TextLine"].class == Array
112
+ if@text_lines.length>1
113
+ @text_lines.values.each {|text_line|
114
+ @data["TextLine"].push(text_line.get_consolidated_data)
115
+ }
116
+ end
117
+ @data["TextLine"]=@text_lines.values[0].get_consolidated_data if@text_lines.length==1
118
+ end
119
+
120
+ def review_lines_index
121
+ index =0
122
+ @text_lines.values.each {|line|
123
+ line.index=index
124
+ index+=1
125
+ }
126
+ end
127
+
128
+ end
129
+
130
+ end
@@ -0,0 +1,3 @@
1
+ module RubyPager
2
+ VERSION = "0.1.1"
3
+ end
@@ -0,0 +1,90 @@
1
+ require 'nokogiri'
2
+ require 'nori'
3
+ require 'ap'
4
+ module RubyPager
5
+
6
+
7
+ class XML
8
+ def self.exists?(file_name)
9
+ if file_name.end_with?(".xml")
10
+ return File.file?(file_name)
11
+ end
12
+ return false
13
+ end
14
+
15
+ def self.load(file_name)
16
+ if self.exists?(file_name)
17
+ xml = Nokogiri::XML(File.open(file_name)) { |config| config.strict.noblanks }
18
+ hash = Nori.new(:parser => :nokogiri, :advanced_typecasting => false).parse(xml.to_s)
19
+ return hash
20
+ end
21
+ return nil
22
+ end
23
+
24
+ def self.save(file_name,hash)
25
+ File.open(file_name, 'w') { |file| file.write(self.generate_xml(hash)) } ;
26
+ end
27
+
28
+ def self.generate_xml(data, parent = false, opt = {})
29
+ return if data.to_s.empty?
30
+ return unless data.is_a?(Hash)
31
+
32
+ unless parent
33
+ # assume that if the hash has a single key that it should be the root
34
+ root, data = (data.length == 1) ? data.shift : ["root", data]
35
+ attr = Hash.new
36
+ data.each{|label,value|
37
+ if(label.start_with?("@")) then
38
+ new_label = label.dup
39
+ new_label[0]= ''
40
+ attr[new_label]=value
41
+ end
42
+ }
43
+ builder = Nokogiri::XML::Builder.new(opt) do |xml|
44
+ xml.send(root,attr) {
45
+ data.each{|label,value|
46
+ if(label.start_with?("@")) then
47
+ data.delete(label)
48
+ end
49
+ }
50
+ generate_xml(data, xml)
51
+ }
52
+ end
53
+
54
+ return builder.to_xml
55
+ end
56
+ data.each { |label, value|
57
+ if value.is_a?(Hash)
58
+ attr = Hash.new
59
+ value.each{|vlabel,vvalue|
60
+ if(vlabel.start_with?("@")) then
61
+ new_label = vlabel.dup
62
+ new_label[0]= ''
63
+ attr[new_label]=vvalue
64
+ end
65
+ }
66
+
67
+ parent.send(label,attr) {
68
+ value.each{|vlabel,vvalue|
69
+ if(vlabel.start_with?("@")) then
70
+ value.delete(vlabel)
71
+ end
72
+ }
73
+ generate_xml(value, parent)
74
+ }
75
+
76
+ elsif value.is_a?(Array)
77
+ value.each { |el|
78
+ # lets trick the above into firing so we do not need to rewrite the checks
79
+ el = {label => el}
80
+ generate_xml(el, parent)
81
+ }
82
+
83
+ else
84
+ parent.send(label, value)
85
+ end
86
+ }
87
+ end
88
+ end
89
+
90
+ end
data/lib/ruby_pager.rb ADDED
@@ -0,0 +1,18 @@
1
+ require "ruby_pager/version"
2
+ require "ruby_pager/xml"
3
+ require "ruby_pager/page"
4
+ require "ruby_pager/metadata"
5
+ require "ruby_pager/reading_order"
6
+ require "ruby_pager/text_region"
7
+ require "ruby_pager/text_line"
8
+ require "ruby_pager/coords"
9
+ require "ruby_pager/coord"
10
+ require "ruby_pager/image_data"
11
+ require "ruby_pager/image"
12
+ require "ruby_pager/extendmatrix2"
13
+ require "ruby_pager/histogram"
14
+ require "ruby_pager/gaussian_noise"
15
+ require "ruby_pager/application_logger"
16
+ module RubyPager
17
+ # Your code goes here...
18
+ end
data/no_lines.xml ADDED
@@ -0,0 +1,14 @@
1
+ <?xml version="1.0"?>
2
+ <PcGts xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://schema.primaresearch.org/PAGE/gts/pagecontent/2013-07-15" xsi:schemaLocation="http://schema.primaresearch.org/PAGE/gts/pagecontent/2013-07-15 http://schema.primaresearch.org/PAGE/gts/pagecontent/2013-07-15/pagecontent.xsd" pcGtsId="">
3
+ <Metadata>
4
+ <Creator>Ruby Page</Creator>
5
+ <Created>2018-02-08T16:46:58</Created>
6
+ <LastChange>2018-02-08T16:46:58</LastChange>
7
+ </Metadata>
8
+ <Page imageFilename="test.jpg" imageWidth="4715" imageHeight="3483">
9
+ <TextRegion id="empty_region" custom="">
10
+ <Coords points="0,0 0,4715 3483,4715 3483,0"/>
11
+ </TextRegion>
12
+ <ReadingOrder/>
13
+ </Page>
14
+ </PcGts>
data/no_regions.xml ADDED
@@ -0,0 +1,11 @@
1
+ <?xml version="1.0"?>
2
+ <PcGts xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://schema.primaresearch.org/PAGE/gts/pagecontent/2013-07-15" xsi:schemaLocation="http://schema.primaresearch.org/PAGE/gts/pagecontent/2013-07-15 http://schema.primaresearch.org/PAGE/gts/pagecontent/2013-07-15/pagecontent.xsd" pcGtsId="">
3
+ <Metadata>
4
+ <Creator>Ruby Page</Creator>
5
+ <Created>2018-02-08T16:46:58</Created>
6
+ <LastChange>2018-02-08T16:46:58</LastChange>
7
+ </Metadata>
8
+ <Page imageFilename="test.jpg" imageWidth="4715" imageHeight="3483">
9
+ <ReadingOrder/>
10
+ </Page>
11
+ </PcGts>
data/one_line.xml ADDED
@@ -0,0 +1,21 @@
1
+ <?xml version="1.0"?>
2
+ <PcGts xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://schema.primaresearch.org/PAGE/gts/pagecontent/2013-07-15" xsi:schemaLocation="http://schema.primaresearch.org/PAGE/gts/pagecontent/2013-07-15 http://schema.primaresearch.org/PAGE/gts/pagecontent/2013-07-15/pagecontent.xsd" pcGtsId="">
3
+ <Metadata>
4
+ <Creator>Ruby Page</Creator>
5
+ <Created>2018-02-08T16:46:58</Created>
6
+ <LastChange>2018-02-08T16:46:58</LastChange>
7
+ </Metadata>
8
+ <Page imageFilename="test.jpg" imageWidth="4715" imageHeight="3483">
9
+ <TextRegion id="line_region" custom="">
10
+ <Coords points="0,0 0,4715 3483,4715 3483,0"/>
11
+ <TextLine id="line_0">
12
+ <Coords points="16,0 305,0 305,112 16,112" />
13
+ <Baseline points="" />
14
+ <TextEquiv>
15
+ <Unicode>QUE 0.0260531 SE 0.03597 CE 0.042705 A 0.00188892 LES 0.214666 TE 0.0261389 LE 0.0465788 DE 0.201388 PER 0.0331285 DES 0.0896861 ME 0.0215401 EN 0.0273062 ES 0.0502558 ET 0.0750531 PRE 0.0306363 NOB 2.82646e-05 NOI 9.50498e-05 RE 0.0464671 NE 0.026413 NOM 0.000249426 NON 0.000673057 NOS 0.00177215 NOT 5.88758e-05 NOU 0.00122318 NOV 1.05446e-05 NOZ 1.33179e-05</Unicode>
16
+ </TextEquiv>
17
+ </TextLine>
18
+ </TextRegion>
19
+ <ReadingOrder/>
20
+ </Page>
21
+ </PcGts>
@@ -0,0 +1,45 @@
1
+ lib = File.expand_path("../lib", __FILE__)
2
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
3
+ require "ruby_pager/version"
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = "ruby_pager"
7
+ spec.version = RubyPager::VERSION
8
+ spec.authors = ["Vicente Bosch"]
9
+ spec.email = ["vbosch@gmail.com"]
10
+
11
+ spec.summary = "command line tools to read and modify PAGE xmls"
12
+ spec.description = "command line tools to read and modify PAGE xmls"
13
+ spec.homepage = "https://github.com/vbosch/ruby_pager"
14
+ spec.license = "MIT"
15
+
16
+ # Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host'
17
+ # to allow pushing to a single host or delete this section to allow pushing to any host.
18
+ if spec.respond_to?(:metadata)
19
+ spec.metadata["allowed_push_host"] = "https://rubygems.org"
20
+ else
21
+ raise "RubyGems 2.0 or newer is required to protect against " \
22
+ "public gem pushes."
23
+ end
24
+
25
+ spec.files = `git ls-files -z`.split("\x0").reject do |f|
26
+ f.match(%r{^(test|spec|features)/})
27
+ end
28
+ spec.bindir = "exe"
29
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
30
+ spec.require_paths = ["lib"]
31
+ spec.add_development_dependency "bundler"
32
+ spec.add_development_dependency "rake"
33
+ spec.add_development_dependency "rspec"
34
+ spec.add_development_dependency "aruba"
35
+ spec.add_development_dependency "simplecov"
36
+ spec.add_dependency "pry"
37
+ spec.add_dependency "rgeo"
38
+ spec.add_dependency "gosu"
39
+ spec.add_dependency "rmagick"
40
+ spec.add_dependency "trollop"
41
+ spec.add_dependency "nori"
42
+ spec.add_dependency "awesome_print"
43
+ spec.add_dependency "nokogiri"
44
+ spec.add_dependency "extendmatrix"
45
+ end
data/test.jpg ADDED
Binary file