biblicit 2.0.3 → 2.0.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,263 +0,0 @@
1
- package Omni::Omnicell;
2
-
3
- # Configuration
4
- use strict;
5
-
6
- # Local libraries
7
- use Omni::Config;
8
- use Omni::Omnipara;
9
-
10
- # Extern libraries
11
- use XML::Twig;
12
- use XML::Parser;
13
-
14
- # Global variables
15
- my $tag_list = $Omni::Config::tag_list;
16
- my $att_list = $Omni::Config::att_list;
17
- my $obj_list = $Omni::Config::obj_list;
18
-
19
- # Temporary variables
20
- my $tmp_content = undef;
21
- my $tmp_alignment = undef;
22
- my $tmp_grid_col_from = undef;
23
- my $tmp_grid_col_to = undef;
24
- my $tmp_grid_row_from = undef;
25
- my $tmp_grid_row_to = undef;
26
- my $tmp_vertical_align = undef;
27
-
28
- # My observation is that <table> contains <gridTable> and <cell>
29
- # <gridTable> contain the base grid's coordinates
30
- # <cell> contain the cell's position based on <gridTable> coordinates
31
- # and various types of objects: <picture>, <para>, may be even <dd> but
32
- # I'm not quite sure about this
33
- my @tmp_objs = ();
34
-
35
- ###
36
- # A cell object in Omnipage xml: a cell is an essential member of <table> object
37
- #
38
- # Do Hoang Nhat Huy, 14 Feb 2011
39
- ###
40
- # Initialization
41
- sub new
42
- {
43
- my ($class) = @_;
44
-
45
- # Objs: a paragraph can have many cells
46
- my @objs = ();
47
-
48
- # Class members
49
- my $self = { '_self' => $obj_list->{ 'OMNICELL' },
50
- '_raw' => undef,
51
- '_content' => undef,
52
- '_alignment' => undef,
53
- '_row_from' => undef,
54
- '_row_to' => undef,
55
- '_col_from' => undef,
56
- '_col_to' => undef,
57
- '_v_alignment' => undef,
58
- '_objs' => \@objs };
59
-
60
- bless $self, $class;
61
- return $self;
62
- }
63
-
64
- #
65
- sub set_raw
66
- {
67
- my ($self, $raw) = @_;
68
-
69
- # Save the raw xml <cell> ... </cell>
70
- $self->{ '_raw' } = $raw;
71
-
72
- # Parse the raw string
73
- my $twig_roots = { $tag_list->{ 'CELL' } => 1 };
74
- my $twig_handlers = { $tag_list->{ 'CELL' } => \&parse};
75
-
76
- # XML::Twig
77
- my $twig = new XML::Twig( twig_roots => $twig_roots,
78
- twig_handlers => $twig_handlers,
79
- pretty_print => 'indented' );
80
-
81
- # Start the XML parsing
82
- $twig->parse($raw);
83
- $twig->purge;
84
-
85
- # Copy information from temporary variables to class members
86
- $self->{ '_alignment' } = $tmp_alignment;
87
- $self->{ '_row_from' } = $tmp_grid_row_from;
88
- $self->{ '_row_to' } = $tmp_grid_row_to;
89
- $self->{ '_col_from' } = $tmp_grid_col_from;
90
- $self->{ '_col_to' } = $tmp_grid_col_to;
91
- $self->{ '_v_alignment' } = $tmp_vertical_align;
92
-
93
- # Copy all objects
94
- @{$self->{ '_objs' } } = @tmp_objs;
95
-
96
- # Copy content
97
- $self->{ '_content' } = $tmp_content;
98
- }
99
-
100
- sub get_raw
101
- {
102
- my ($self) = @_;
103
- return $self->{ '_raw' };
104
- }
105
-
106
- sub parse
107
- {
108
- my ($twig, $node) = @_;
109
-
110
- # At first, content is blank
111
- $tmp_content = "";
112
- # because there's no line
113
- @tmp_objs = ();
114
-
115
- # Get <cell> node attributes
116
- $tmp_alignment = GetNodeAttr($node, $att_list->{ 'ALIGN' });
117
-
118
- $tmp_grid_row_from = GetNodeAttr($node, $att_list->{ 'GROWFROM' });
119
- $tmp_grid_row_to = GetNodeAttr($node, $att_list->{ 'GROWTO' });
120
- $tmp_grid_col_from = GetNodeAttr($node, $att_list->{ 'GCOLFROM' });
121
- $tmp_grid_col_to = GetNodeAttr($node, $att_list->{ 'GCOLTO' });
122
-
123
- # TODO: don't understand, attribute with value = 0 will be returned as undef by twig
124
- $tmp_grid_row_from = ($tmp_grid_row_from ne "") ? $tmp_grid_row_from : 0;
125
- $tmp_grid_row_to = ($tmp_grid_row_to ne "") ? $tmp_grid_row_to : 0;
126
- $tmp_grid_col_from = ($tmp_grid_col_from ne "") ? $tmp_grid_col_from : 0;
127
- $tmp_grid_col_to = ($tmp_grid_col_to ne "") ? $tmp_grid_col_to : 0;
128
-
129
- $tmp_vertical_align = GetNodeAttr($node, $att_list->{ 'VALIGN' });
130
-
131
- # Check if there's any object: <para> and <picture object: <para> and <picture>
132
- my $img_tag = $tag_list->{ 'PICTURE' };
133
- my $para_tag = $tag_list->{ 'PARA' };
134
-
135
- my $child = undef;
136
- # Get the first child in the body text
137
- $child = $node->first_child();
138
-
139
- while (defined $child)
140
- {
141
- my $xpath = $child->path();
142
-
143
- # if this child is a <para> tag
144
- if ($xpath =~ m/\/$para_tag$/)
145
- {
146
- my $para = new Omni::Omnipara();
147
-
148
- # Set raw content
149
- $para->set_raw($child->sprint());
150
-
151
- # Update paragraph list
152
- push @tmp_objs, $para;
153
-
154
- # Update content
155
- $tmp_content = $tmp_content . $para->get_content() . "\n";
156
- }
157
- # if this child is a <picture> tag
158
- elsif ($xpath =~ m/\/$img_tag$/)
159
- {
160
- #my $img = new Omni::Omniimg();
161
-
162
- # Set raw content
163
- #$img->set_raw($child->sprint());
164
-
165
- # Update paragraph list
166
- #push @tmp_objs, $img;
167
-
168
- # Update content
169
- #$tmp_content = $tmp_content . $img->get_content() . "\n";
170
- }
171
-
172
- # Little brother
173
- if ($child->is_last_child)
174
- {
175
- last;
176
- }
177
- else
178
- {
179
- $child = $child->next_sibling();
180
- }
181
- }
182
- }
183
-
184
- sub get_name
185
- {
186
- my ($self) = @_;
187
- return $self->{ '_self' };
188
- }
189
-
190
- sub get_objs_ref
191
- {
192
- my ($self) = @_;
193
- return $self->{ '_objs' };
194
- }
195
-
196
- sub get_content
197
- {
198
- my ($self) = @_;
199
- return $self->{ '_content' };
200
- }
201
-
202
- sub get_alignment
203
- {
204
- my ($self) = @_;
205
- return $self->{ '_alignment' };
206
- }
207
-
208
- sub get_grid_row_from
209
- {
210
- my ($self) = @_;
211
- return $self->{ '_row_from' };
212
- }
213
-
214
- sub get_grid_row_to
215
- {
216
- my ($self) = @_;
217
- return $self->{ '_row_to' };
218
- }
219
-
220
- sub get_grid_col_from
221
- {
222
- my ($self) = @_;
223
- return $self->{ '_col_from' };
224
- }
225
-
226
- sub get_grid_col_to
227
- {
228
- my ($self) = @_;
229
- return $self->{ '_col_to' };
230
- }
231
-
232
- sub get_vertical_alignment
233
- {
234
- my ($self) = @_;
235
- return $self->{ '_v_alignment' };
236
- }
237
-
238
- # Support functions
239
- sub GetNodeAttr
240
- {
241
- my ($node, $attr) = @_;
242
- return ($node->att($attr) ? $node->att($attr) : "");
243
- }
244
-
245
- sub SetNodeAttr
246
- {
247
- my ($node, $attr, $value) = @_;
248
- $node->set_att($attr, $value);
249
- }
250
-
251
- sub GetNodeText
252
- {
253
- my ($node) = @_;
254
- return $node->text;
255
- }
256
-
257
- sub SetNodeText
258
- {
259
- my ($node, $value) = @_;
260
- $node->set_text($value);
261
- }
262
-
263
- 1;
@@ -1,292 +0,0 @@
1
- package Omni::Omnicol;
2
-
3
- # Configuration
4
- use strict;
5
-
6
- # Local libraries
7
- use Omni::Config;
8
- use Omni::Omnidd;
9
- use Omni::Omnipara;
10
- use Omni::Omniframe;
11
- use Omni::Omnitable;
12
-
13
- # Extern libraries
14
- use XML::Twig;
15
- use XML::Parser;
16
-
17
- # Global variables
18
- my $tag_list = $Omni::Config::tag_list;
19
- my $att_list = $Omni::Config::att_list;
20
- my $obj_list = $Omni::Config::obj_list;
21
-
22
- # Temporary variables
23
-
24
- ###
25
- # A column object in Omnipage xml: a column contains zero or many paragraphs
26
- #
27
- # Do Hoang Nhat Huy, 11 Jan 2011
28
- ###
29
- # Initialization
30
- sub new
31
- {
32
- my ($class) = @_;
33
-
34
- # Column: a column can have many paragraphs, dd, tables, or pictures
35
- my @objs = ();
36
-
37
- # Class members
38
- my $self = { '_self' => $obj_list->{ 'OMNICOL' },
39
- '_raw' => undef,
40
- '_content' => undef,
41
- '_bottom' => undef,
42
- '_top' => undef,
43
- '_left' => undef,
44
- '_right' => undef,
45
- '_objs' => \@objs };
46
-
47
- bless $self, $class;
48
- return $self;
49
- }
50
-
51
- #
52
- sub set_raw
53
- {
54
- my ($self, $raw) = @_;
55
-
56
- # Save the raw xml <column> ... </column>
57
- $self->{ '_raw' } = $raw;
58
-
59
- # Parse the raw string
60
- my $twig_roots = { $tag_list->{ 'COLUMN' } => 1 };
61
- my $twig_handlers = { $tag_list->{ 'COLUMN' } => sub { parse(@_, \$self); } };
62
-
63
- # XML::Twig
64
- my $twig = new XML::Twig( twig_roots => $twig_roots,
65
- twig_handlers => $twig_handlers,
66
- pretty_print => 'indented' );
67
-
68
- # Start the XML parsing
69
- $twig->parse($raw, \$self);
70
- $twig->purge;
71
- }
72
-
73
- sub get_raw
74
- {
75
- my ($self) = @_;
76
- return $self->{ '_raw' };
77
- }
78
-
79
- sub parse
80
- {
81
- my ($twig, $node, $self) = @_;
82
-
83
- # At first, content is blank
84
- my $tmp_content = "";
85
- # because there's no object
86
- my @tmp_objs = ();
87
-
88
- # Get <column> node attributes
89
- my $tmp_bottom = GetNodeAttr($node, $att_list->{ 'BOTTOM' });
90
- my $tmp_top = GetNodeAttr($node, $att_list->{ 'TOP' });
91
- my $tmp_left = GetNodeAttr($node, $att_list->{ 'LEFT' });
92
- my $tmp_right = GetNodeAttr($node, $att_list->{ 'RIGHT' });
93
-
94
- # Check if there's any paragraph, dd, table, or picture
95
- # The large number of possible children is due to the
96
- # ambiguous structure of the Omnipage XML
97
- my $dd_tag = $tag_list->{ 'DD' };
98
- my $img_tag = $tag_list->{ 'PICTURE' };
99
- my $para_tag = $tag_list->{ 'PARA' };
100
- my $table_tag = $tag_list->{ 'TABLE' };
101
- my $column_tag = $tag_list->{ 'COLUMN' };
102
- my $frame_tag = $tag_list->{ 'FRAME' };
103
-
104
- my $child = undef;
105
- # Get the first child in the body text
106
- $child = $node->first_child();
107
-
108
- while (defined $child)
109
- {
110
- my $xpath = $child->path();
111
-
112
- # if this child is a <para> tag
113
- if ($xpath =~ m/\/$para_tag$/)
114
- {
115
- my $para = new Omni::Omnipara();
116
-
117
- # Set raw content
118
- $para->set_raw($child->sprint());
119
-
120
- # Update paragraph list
121
- push @tmp_objs, $para;
122
-
123
- # Update content
124
- $tmp_content = $tmp_content . $para->get_content() . "\n";
125
- }
126
- # TODO: I'll handle this one later. Seriously
127
- # if this child is a <dd> tag
128
- elsif ($xpath =~ m/\/$dd_tag$/)
129
- {
130
- #my $dd = new Omni::Omnidd();
131
-
132
- # Set raw content
133
- #$dd->set_raw($child->sprint());
134
-
135
- # Update paragraph list
136
- #push @tmp_objs, $dd;
137
-
138
- # Update content
139
- #$tmp_content = $tmp_content . $dd->get_content() . "\n";
140
- }
141
- # if this child is a <table> tag
142
- elsif ($xpath =~ m/\/$table_tag$/)
143
- {
144
- my $table = new Omni::Omnitable();
145
-
146
- # Set raw content
147
- $table->set_raw($child->sprint());
148
-
149
- # Update paragraph list
150
- push @tmp_objs, $table;
151
-
152
- # Update content
153
- $tmp_content = $tmp_content . $table->get_content() . "\n";
154
- }
155
- # if this child is a <picture> tag
156
- elsif ($xpath =~ m/\/$img_tag$/)
157
- {
158
- #my $img = new Omni::Omniimg();
159
-
160
- # Set raw content
161
- #$img->set_raw($child->sprint());
162
-
163
- # Update paragraph list
164
- #push @tmp_objs, $img;
165
-
166
- # Update content
167
- #$tmp_content = $tmp_content . $img->get_content() . "\n";
168
- }
169
- # if this child is a <column> tag
170
- elsif ($xpath =~ m/\/$column_tag$/)
171
- {
172
- my $col = new Omni::Omnicol();
173
-
174
- # Set raw content
175
- $col->set_raw($child->sprint());
176
-
177
- # Nested <column> is not allowed so we copy the objects
178
- my $objects = $col->get_objs_ref();
179
-
180
- # Update <column> objects list
181
- push @tmp_objs, @{ $objects };
182
-
183
- # Update content
184
- $tmp_content = $tmp_content . $col->get_content() . "\n";
185
- }
186
- # if this child is <frame>
187
- elsif ($xpath =~ m/\/$frame_tag$/)
188
- {
189
- my $frame = new Omni::Omniframe();
190
-
191
- # Set raw content
192
- $frame->set_raw($child->sprint());
193
-
194
- # Update column list
195
- push @tmp_objs, $frame;
196
-
197
- # Update content
198
- $tmp_content = $tmp_content . $frame->get_content() . "\n";
199
- }
200
-
201
- # Little brother
202
- if ($child->is_last_child)
203
- {
204
- last;
205
- }
206
- else
207
- {
208
- $child = $child->next_sibling();
209
- }
210
- }
211
-
212
- # Copy information from temporary variables to class members
213
- $$self->{ '_bottom' } = $tmp_bottom;
214
- $$self->{ '_top' } = $tmp_top;
215
- $$self->{ '_left' } = $tmp_left;
216
- $$self->{ '_right' } = $tmp_right;
217
-
218
- # Copy all objects
219
- @{$$self->{ '_objs' } } = @tmp_objs;
220
-
221
- # Copy content
222
- $$self->{ '_content' } = $tmp_content;
223
- }
224
-
225
- sub get_name
226
- {
227
- my ($self) = @_;
228
- return $self->{ '_self' };
229
- }
230
-
231
- sub get_objs_ref
232
- {
233
- my ($self) = @_;
234
- return $self->{ '_objs' };
235
- }
236
-
237
- sub get_content
238
- {
239
- my ($self) = @_;
240
- return $self->{ '_content' };
241
- }
242
-
243
- sub get_bottom_pos
244
- {
245
- my ($self) = @_;
246
- return $self->{ '_bottom' };
247
- }
248
-
249
- sub get_top_pos
250
- {
251
- my ($self) = @_;
252
- return $self->{ '_top' };
253
- }
254
-
255
- sub get_left_pos
256
- {
257
- my ($self) = @_;
258
- return $self->{ '_left' };
259
- }
260
-
261
- sub get_right_pos
262
- {
263
- my ($self) = @_;
264
- return $self->{ '_right' };
265
- }
266
-
267
- # Support functions
268
- sub GetNodeAttr
269
- {
270
- my ($node, $attr) = @_;
271
- return ($node->att($attr) ? $node->att($attr) : "");
272
- }
273
-
274
- sub SetNodeAttr
275
- {
276
- my ($node, $attr, $value) = @_;
277
- $node->set_att($attr, $value);
278
- }
279
-
280
- sub GetNodeText
281
- {
282
- my ($node) = @_;
283
- return $node->text;
284
- }
285
-
286
- sub SetNodeText
287
- {
288
- my ($node, $value) = @_;
289
- $node->set_text($value);
290
- }
291
-
292
- 1;