biblicit 2.0.3 → 2.0.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,232 +0,0 @@
1
- package Omni::Omnipara;
2
-
3
- # Configuration
4
- use strict;
5
-
6
- # Local libraries
7
- use Omni::Config;
8
- use Omni::Omniword;
9
- use Omni::Omnirun;
10
- use Omni::Omniline;
11
-
12
- # Extern libraries
13
- use XML::Twig;
14
- use XML::Parser;
15
-
16
- # Global variables
17
- my $tag_list = $Omni::Config::tag_list;
18
- my $att_list = $Omni::Config::att_list;
19
- my $obj_list = $Omni::Config::obj_list;
20
-
21
- # Temporary variables
22
- my $tmp_content = undef;
23
- my $tmp_bottom = undef;
24
- my $tmp_top = undef;
25
- my $tmp_left = undef;
26
- my $tmp_right = undef;
27
- my $tmp_language = undef;
28
- my $tmp_alignment = undef;
29
- my $tmp_spaceb = undef;
30
- my @tmp_lines = ();
31
-
32
- ###
33
- # A para object in Omnipage xml: a paragraph contains zero or many lines
34
- #
35
- # Do Hoang Nhat Huy, 09 Jan 2011
36
- ###
37
- # Initialization
38
- sub new
39
- {
40
- my ($class) = @_;
41
-
42
- # Lines: a paragraph can have multiple lines
43
- my @lines = ();
44
-
45
- # Class members
46
- my $self = { '_self' => $obj_list->{ 'OMNIPARA' },
47
- '_raw' => undef,
48
- '_content' => undef,
49
- '_bottom' => undef,
50
- '_top' => undef,
51
- '_left' => undef,
52
- '_right' => undef,
53
- '_language' => undef,
54
- '_alignment' => undef,
55
- '_spaceb' => undef,
56
- '_lines' => \@lines };
57
-
58
- bless $self, $class;
59
- return $self;
60
- }
61
-
62
- #
63
- sub set_raw
64
- {
65
- my ($self, $raw) = @_;
66
-
67
- # Save the raw xml <para> ... </para>
68
- $self->{ '_raw' } = $raw;
69
-
70
- # Parse the raw string
71
- my $twig_roots = { $tag_list->{ 'PARA' } => 1 };
72
- my $twig_handlers = { $tag_list->{ 'PARA' } => \&parse};
73
-
74
- # XML::Twig
75
- my $twig = new XML::Twig( twig_roots => $twig_roots,
76
- twig_handlers => $twig_handlers,
77
- pretty_print => 'indented' );
78
-
79
- # Start the XML parsing
80
- $twig->parse($raw);
81
- $twig->purge;
82
-
83
- # Copy information from temporary variables to class members
84
- $self->{ '_bottom' } = $tmp_bottom;
85
- $self->{ '_top' } = $tmp_top;
86
- $self->{ '_left' } = $tmp_left;
87
- $self->{ '_right' } = $tmp_right;
88
- $self->{ '_language' } = $tmp_language;
89
- $self->{ '_alignment' } = $tmp_alignment;
90
- $self->{ '_spaceb' } = $tmp_spaceb;
91
-
92
- # Copy all lines
93
- @{$self->{ '_lines' } } = @tmp_lines;
94
-
95
- # Copy content
96
- $self->{ '_content' } = $tmp_content;
97
- }
98
-
99
- sub get_raw
100
- {
101
- my ($self) = @_;
102
- return $self->{ '_raw' };
103
- }
104
-
105
- sub parse
106
- {
107
- my ($twig, $node) = @_;
108
-
109
- # At first, content is blank
110
- $tmp_content = "";
111
- # because there's no line
112
- @tmp_lines = ();
113
-
114
- # Get <para> node attributes
115
- $tmp_bottom = GetNodeAttr($node, $att_list->{ 'BOTTOM' });
116
- $tmp_top = GetNodeAttr($node, $att_list->{ 'TOP' });
117
- $tmp_left = GetNodeAttr($node, $att_list->{ 'LEFT' });
118
- $tmp_right = GetNodeAttr($node, $att_list->{ 'RIGHT' });
119
- $tmp_language = GetNodeAttr($node, $att_list->{ 'LANGUAGE' });
120
- $tmp_alignment = GetNodeAttr($node, $att_list->{ 'ALIGN' });
121
- $tmp_spaceb = GetNodeAttr($node, $att_list->{ 'SPACEB' });
122
-
123
- # Check if there's any bullet
124
- my $bullet = $node->first_child( $tag_list->{ 'BULLET' } );
125
- my $has_bullet = (defined $bullet) ? 1 : 0;
126
-
127
- # Check if there's any line
128
- my @all_lines = $node->descendants( $tag_list->{ 'LINE' } );
129
- foreach my $ln (@all_lines)
130
- {
131
- my $line = new Omni::Omniline();
132
-
133
- # Set raw content
134
- $line->set_raw($ln->sprint());
135
-
136
- # Set bullet if needed
137
- if ($has_bullet == 1) { $line->set_bullet('true'); }
138
-
139
- # Update line list
140
- push @tmp_lines, $line;
141
-
142
- # Update content
143
- $tmp_content = $tmp_content . $line->get_content() . "\n";
144
- }
145
- }
146
-
147
- sub get_name
148
- {
149
- my ($self) = @_;
150
- return $self->{ '_self' };
151
- }
152
-
153
- sub get_objs_ref
154
- {
155
- my ($self) = @_;
156
- return $self->{ '_lines' };
157
- }
158
-
159
- sub get_content
160
- {
161
- my ($self) = @_;
162
- return $self->{ '_content' };
163
- }
164
-
165
- sub get_bottom_pos
166
- {
167
- my ($self) = @_;
168
- return $self->{ '_bottom' };
169
- }
170
-
171
- sub get_top_pos
172
- {
173
- my ($self) = @_;
174
- return $self->{ '_top' };
175
- }
176
-
177
- sub get_left_pos
178
- {
179
- my ($self) = @_;
180
- return $self->{ '_left' };
181
- }
182
-
183
- sub get_right_pos
184
- {
185
- my ($self) = @_;
186
- return $self->{ '_right' };
187
- }
188
-
189
- sub get_language
190
- {
191
- my ($self) = @_;
192
- return $self->{ '_language' };
193
- }
194
-
195
- sub get_alignment
196
- {
197
- my ($self) = @_;
198
- return $self->{ '_alignment' };
199
- }
200
-
201
- sub get_space_before
202
- {
203
- my ($self) = @_;
204
- return $self->{ '_spaceb' };
205
- }
206
-
207
- # Support functions
208
- sub GetNodeAttr
209
- {
210
- my ($node, $attr) = @_;
211
- return ($node->att($attr) ? $node->att($attr) : "");
212
- }
213
-
214
- sub SetNodeAttr
215
- {
216
- my ($node, $attr, $value) = @_;
217
- $node->set_att($attr, $value);
218
- }
219
-
220
- sub GetNodeText
221
- {
222
- my ($node) = @_;
223
- return $node->text;
224
- }
225
-
226
- sub SetNodeText
227
- {
228
- my ($node, $value) = @_;
229
- $node->set_text($value);
230
- }
231
-
232
- 1;
@@ -1,303 +0,0 @@
1
- package Omni::Omnirun;
2
-
3
- # Configuration
4
- use strict;
5
-
6
- # Local libraries
7
- use Omni::Config;
8
- use Omni::Omniword;
9
-
10
- # Extern libraries
11
- use XML::Twig;
12
- use XML::Parser;
13
-
14
- # Global variables
15
- my $tag_list = $Omni::Config::tag_list;
16
- my $att_list = $Omni::Config::att_list;
17
- my $obj_list = $Omni::Config::obj_list;
18
-
19
- # Temporary variables
20
- my $tmp_content = undef;
21
- my $tmp_font_face = undef;
22
- my $tmp_font_family = undef;
23
- my $tmp_font_pitch = undef;
24
- my $tmp_font_size = undef;
25
- my $tmp_spacing = undef;
26
- my $tmp_su_script = undef; # sub-script or super-script
27
- my $tmp_underline = undef;
28
- my $tmp_bold = undef;
29
- my $tmp_italic = undef;
30
- my @tmp_words = ();
31
-
32
- ###
33
- # A run object in Omnipage xml: a run contains zero or many words
34
- #
35
- # Do Hoang Nhat Huy, 07 Jan 2011
36
- ###
37
- # Initialization
38
- sub new
39
- {
40
- my ($class) = @_;
41
-
42
- # Words: a run can have multiple words
43
- my @words = ();
44
-
45
- # Class members
46
- my $self = { '_self' => $obj_list->{ 'OMNIRUN' },
47
- '_raw' => undef,
48
- '_content' => undef,
49
- '_font_face' => undef,
50
- '_font_family' => undef,
51
- '_font_pitch' => undef,
52
- '_font_size' => undef,
53
- '_spacing' => undef,
54
- '_su_script' => undef, # sub-script or super-script
55
- '_underline' => undef,
56
- '_bold' => undef,
57
- '_italic' => undef,
58
- '_words' => \@words };
59
-
60
- bless $self, $class;
61
- return $self;
62
- }
63
-
64
- #
65
- sub set_raw
66
- {
67
- my ($self, $raw) = @_;
68
-
69
- # Save the raw xml <run> ... </run>
70
- $self->{ '_raw' } = $raw;
71
-
72
- # Parse the raw string
73
- my $twig_roots = { $tag_list->{ 'RUN' } => 1 };
74
- my $twig_handlers = { $tag_list->{ 'RUN' } => \&parse};
75
-
76
- # XML::Twig
77
- my $twig= new XML::Twig( twig_roots => $twig_roots,
78
- twig_handlers => $twig_handlers,
79
- pretty_print => 'indented' );
80
-
81
- # Start the XML parsing
82
- $twig->parse($raw);
83
- $twig->purge;
84
-
85
- # Copy information from temporary variables to class members
86
- $self->{ '_font_face' } = $tmp_font_face;
87
- $self->{ '_font_family' } = $tmp_font_family;
88
- $self->{ '_font_pitch' } = $tmp_font_pitch;
89
- $self->{ '_font_size' } = $tmp_font_size;
90
- $self->{ '_spacing' } = $tmp_spacing;
91
- $self->{ '_su_script' } = $tmp_su_script;
92
- $self->{ '_underline' } = $tmp_underline;
93
- $self->{ '_bold' } = $tmp_bold;
94
- $self->{ '_italic' } = $tmp_italic;
95
-
96
- # Copy all words
97
- @{ $self->{ '_words' } } = @tmp_words;
98
-
99
- # Copy content
100
- $self->{ '_content' } = $tmp_content;
101
- }
102
-
103
- sub get_raw
104
- {
105
- my ($self) = @_;
106
- return $self->{ '_raw' };
107
- }
108
-
109
- sub parse
110
- {
111
- my ($twig, $node) = @_;
112
-
113
- # Get <run> node attributes
114
- $tmp_font_face = GetNodeAttr($node, $att_list->{ 'FONTFACE' });
115
- $tmp_font_family = GetNodeAttr($node, $att_list->{ 'FONTFAMILY' });
116
- $tmp_font_pitch = GetNodeAttr($node, $att_list->{ 'FONTPITCH' });
117
- $tmp_font_size = GetNodeAttr($node, $att_list->{ 'FONTSIZE' });
118
- $tmp_spacing = GetNodeAttr($node, $att_list->{ 'SPACING' });
119
- $tmp_su_script = GetNodeAttr($node, $att_list->{ 'SUSCRIPT' }); # sub-script or super-script
120
- $tmp_underline = GetNodeAttr($node, $att_list->{ 'UNDERLINE' });
121
- $tmp_bold = GetNodeAttr($node, $att_list->{ 'BOLD' });
122
- $tmp_italic = GetNodeAttr($node, $att_list->{ 'ITALIC' });
123
-
124
- # At first, content is blank
125
- $tmp_content = "";
126
- # because there's no word
127
- @tmp_words = ();
128
-
129
- # Check if there's any child
130
- my $child = $node->first_child();
131
-
132
- # Has some child
133
- # #PCDATA$ is the returned path from XML::Twig if $child is data content
134
- if ((defined $child) && ($child->path() =~ m/#PCDATA$/))
135
- {
136
- my $content = undef;
137
- $content = GetNodeText($node);
138
- $content =~ s/^\s+|\s+$//g;
139
-
140
- # Save the content
141
- $tmp_content = $tmp_content . $content;
142
- }
143
- else
144
- {
145
- # Some type of separator
146
- my $space_tag = $tag_list->{ 'SPACE' };
147
- my $tab_tag = $tag_list->{ 'TAB' };
148
- my $newline_tag = $tag_list->{ 'NEWLINE' };
149
- my $word_tag = $tag_list->{ 'WORD' };
150
-
151
- # Get every word in the <run> together with <space> and <tab> ...
152
- my $obj = $node->first_child();
153
- while (defined $obj)
154
- {
155
- my $xpath = $obj->path();
156
-
157
- # if this child is <wd>
158
- if ($xpath =~ m/\/$word_tag$/)
159
- {
160
- my $word = new Omni::Omniword();
161
-
162
- # Set raw content
163
- $word->set_raw($obj->sprint);
164
-
165
- # Update word list
166
- push @tmp_words, $word;
167
-
168
- # Update content
169
- $tmp_content = $tmp_content . $word->get_content;
170
- }
171
- # if this child is <space>
172
- elsif ($xpath =~ m/\/$space_tag$/)
173
- {
174
- $tmp_content = $tmp_content . " ";
175
- }
176
- # if this child is <tab>
177
- elsif ($xpath =~ m/\/$tab_tag$/)
178
- {
179
- $tmp_content = $tmp_content . "\t";
180
- }
181
- # if this child is <nl>
182
- #elsif ($xpath =~ m/\/$newline_tag$/)
183
- #{
184
- # $tmp_content = $tmp_content . "\n";
185
- #}
186
-
187
- # Little brother
188
- if ($obj->is_last_child)
189
- {
190
- last;
191
- }
192
- else
193
- {
194
- $obj = $obj->next_sibling();
195
- }
196
- }
197
- }
198
- }
199
-
200
- sub add_word
201
- {
202
- my ($self, $word) = @_;
203
- push @{ $self->{ '_words' } }, $word;
204
- }
205
-
206
- sub get_name
207
- {
208
- my ($self) = @_;
209
- return $self->{ '_self' };
210
- }
211
-
212
- sub get_objs_ref
213
- {
214
- my ($self) = @_;
215
- return $self->{ '_words' };
216
- }
217
-
218
- sub get_content
219
- {
220
- my ($self) = @_;
221
- return $self->{ '_content' };
222
- }
223
-
224
- sub get_font_face
225
- {
226
- my ($self) = @_;
227
- return $self->{ '_font_face' };
228
- }
229
-
230
- sub get_font_family
231
- {
232
- my ($self) = @_;
233
- return $self->{ '_font_family' };
234
- }
235
-
236
- sub get_font_pitch
237
- {
238
- my ($self) = @_;
239
- return $self->{ '_font_pitch' };
240
- }
241
-
242
- sub get_font_size
243
- {
244
- my ($self) = @_;
245
- return $self->{ '_font_size' };
246
- }
247
-
248
- sub get_spacing
249
- {
250
- my ($self) = @_;
251
- return $self->{ '_spacing' };
252
- }
253
-
254
- sub get_suscript
255
- {
256
- my ($self) = @_;
257
- return $self->{ '_su_script' };
258
- }
259
-
260
- sub get_underline
261
- {
262
- my ($self) = @_;
263
- return $self->{ '_underline' };
264
- }
265
-
266
- sub get_bold
267
- {
268
- my ($self) = @_;
269
- return $self->{ '_bold' };
270
- }
271
-
272
- sub get_italic
273
- {
274
- my ($self) = @_;
275
- return $self->{ '_italic' };
276
- }
277
-
278
- # Support functions
279
- sub GetNodeAttr
280
- {
281
- my ($node, $attr) = @_;
282
- return ($node->att($attr) ? $node->att($attr) : "");
283
- }
284
-
285
- sub SetNodeAttr
286
- {
287
- my ($node, $attr, $value) = @_;
288
- $node->set_att($attr, $value);
289
- }
290
-
291
- sub GetNodeText
292
- {
293
- my ($node) = @_;
294
- return $node->text;
295
- }
296
-
297
- sub SetNodeText
298
- {
299
- my ($node, $value) = @_;
300
- $node->set_text($value);
301
- }
302
-
303
- 1;