biblicit 2.0.3 → 2.0.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,328 +0,0 @@
1
- package Omni::Omnidd;
2
-
3
- # Configuration
4
- use strict;
5
-
6
- # Local libraries
7
- use Omni::Config;
8
- use Omni::Omnicol;
9
- use Omni::Omnipara;
10
- use Omni::Omniframe;
11
- use Omni::Omnitable;
12
-
13
- # Extern libraries
14
- use XML::Twig;
15
- use XML::Parser;
16
-
17
- # Global variables
18
- my $tag_list = $Omni::Config::tag_list;
19
- my $att_list = $Omni::Config::att_list;
20
- my $obj_list = $Omni::Config::obj_list;
21
-
22
- # Temporary variables
23
-
24
- ###
25
- # A dd object in Omnipage xml: a dd, don't know what it is, but its structure
26
- # is quite similar to a column
27
- #
28
- # Do Hoang Nhat Huy, 11 Jan 2011
29
- ###
30
- # Initialization
31
- sub new
32
- {
33
- my ($class) = @_;
34
-
35
- # dd: a dd can have many tables, or pictures, may be paras, and columns
36
- my @objs = ();
37
-
38
- # Class members
39
- my $self = { '_self' => $obj_list->{ 'OMNIDD' },
40
- '_raw' => undef,
41
- '_content' => undef,
42
- '_bottom' => undef,
43
- '_top' => undef,
44
- '_left' => undef,
45
- '_right' => undef,
46
- '_bottom_dist' => undef,
47
- '_top_dist' => undef,
48
- '_left_dist' => undef,
49
- '_right_dist' => undef,
50
- '_objs' => \@objs };
51
-
52
- bless $self, $class;
53
- return $self;
54
- }
55
-
56
- #
57
- sub set_raw
58
- {
59
- my ($self, $raw) = @_;
60
-
61
- # Save the raw xml <column> ... </column>
62
- $self->{ '_raw' } = $raw;
63
-
64
- # Parse the raw string
65
- my $twig_roots = { $tag_list->{ 'DD' } => 1 };
66
- my $twig_handlers = { $tag_list->{ 'DD' } => sub { parse(@_, \$self); } };
67
-
68
- # XML::Twig
69
- my $twig = new XML::Twig( twig_roots => $twig_roots,
70
- twig_handlers => $twig_handlers,
71
- pretty_print => 'indented' );
72
-
73
- # Start the XML parsing
74
- $twig->parse($raw, \$self);
75
- $twig->purge;
76
- }
77
-
78
- sub get_raw
79
- {
80
- my ($self) = @_;
81
- return $self->{ '_raw' };
82
- }
83
-
84
- sub parse
85
- {
86
- my ($twig, $node, $self) = @_;
87
-
88
- # At first, content is blank
89
- my $tmp_content = "";
90
- # because there's no object
91
- my @tmp_objs = ();
92
-
93
- # Get <dd> node attributes
94
- my $tmp_bottom = GetNodeAttr($node, $att_list->{ 'BOTTOM' });
95
- my $tmp_top = GetNodeAttr($node, $att_list->{ 'TOP' });
96
- my $tmp_left = GetNodeAttr($node, $att_list->{ 'LEFT' });
97
- my $tmp_right = GetNodeAttr($node, $att_list->{ 'RIGHT' });
98
- my $tmp_bottom_dist = GetNodeAttr($node, $att_list->{ 'BOTTOMDIST' });
99
- my $tmp_top_dist = GetNodeAttr($node, $att_list->{ 'TOPDIST' });
100
- my $tmp_left_dist = GetNodeAttr($node, $att_list->{ 'LEFTDIST' });
101
- my $tmp_right_dist = GetNodeAttr($node, $att_list->{ 'RIGHTDIST' });
102
-
103
- # Check if there's any paragraph, col, table, or picture
104
- # The large number of possible children is due to the
105
- # ambiguous structure of the Omnipage XML
106
- my $dd_tag = $tag_list->{ 'DD' };
107
- my $img_tag = $tag_list->{ 'PICTURE' };
108
- my $para_tag = $tag_list->{ 'PARA' };
109
- my $table_tag = $tag_list->{ 'TABLE' };
110
- my $column_tag = $tag_list->{ 'COLUMN' };
111
- my $frame_tag = $tag_list->{ 'FRAME' };
112
-
113
- my $child = undef;
114
- # Get the first child in the body text
115
- $child = $node->first_child();
116
-
117
- while (defined $child)
118
- {
119
- my $xpath = $child->path();
120
-
121
- # if this child is a <para> tag
122
- if ($xpath =~ m/\/$para_tag$/)
123
- {
124
- my $para = new Omni::Omnipara();
125
-
126
- # Set raw content
127
- $para->set_raw($child->sprint());
128
-
129
- # Update paragraph list
130
- push @tmp_objs, $para;
131
-
132
- # Update content
133
- $tmp_content = $tmp_content . $para->get_content() . "\n";
134
- }
135
- # if this child is a <dd> tag
136
- elsif ($xpath =~ m/\/$dd_tag$/)
137
- {
138
- my $dd = new Omni::Omnidd();
139
-
140
- # Set raw content
141
- $dd->set_raw($child->sprint());
142
-
143
- # Nested <dd> is not allowed so we copy the objects
144
- my $objects = $dd->get_objs_ref();
145
-
146
- # Update <dd> objects list
147
- push @tmp_objs, @{ $objects };
148
-
149
- # Update content
150
- $tmp_content = $tmp_content . $dd->get_content() . "\n";
151
- }
152
- # if this child is a <table> tag
153
- elsif ($xpath =~ m/\/$table_tag$/)
154
- {
155
- my $table = new Omni::Omnitable();
156
-
157
- # Set raw content
158
- $table->set_raw($child->sprint());
159
-
160
- # Update paragraph list
161
- push @tmp_objs, $table;
162
-
163
- # Update content
164
- $tmp_content = $tmp_content . $table->get_content() . "\n";
165
- }
166
- # if this child is a <picture> tag
167
- elsif ($xpath =~ m/\/$img_tag$/)
168
- {
169
- #my $img = new Omni::Omniimg();
170
-
171
- # Set raw content
172
- #$img->set_raw($child->sprint());
173
-
174
- # Update paragraph list
175
- #push @tmp_objs, $img;
176
-
177
- # Update content
178
- #$tmp_content = $tmp_content . $img->get_content() . "\n";
179
- }
180
- # if this child is a <column> tag
181
- elsif ($xpath =~ m/\/$column_tag$/)
182
- {
183
- #my $col = new Omni::Omnicol();
184
-
185
- # Set raw content
186
- #$col->set_raw($child->sprint());
187
-
188
- # Update paragraph list
189
- #push @tmp_objs, $col;
190
-
191
- # Update content
192
- #$tmp_content = $tmp_content . $col->get_content() . "\n";
193
- }
194
- # if this child is <frame>
195
- elsif ($xpath =~ m/\/$frame_tag$/)
196
- {
197
- my $frame = new Omni::Omniframe();
198
-
199
- # Set raw content
200
- $frame->set_raw($child->sprint());
201
-
202
- # Update column list
203
- push @tmp_objs, $frame;
204
-
205
- # Update content
206
- $tmp_content = $tmp_content . $frame->get_content() . "\n";
207
- }
208
-
209
- # Little brother
210
- if ($child->is_last_child)
211
- {
212
- last;
213
- }
214
- else
215
- {
216
- $child = $child->next_sibling();
217
- }
218
- }
219
-
220
- # Copy information from temporary variables to class members
221
- $$self->{ '_bottom' } = $tmp_bottom;
222
- $$self->{ '_top' } = $tmp_top;
223
- $$self->{ '_left' } = $tmp_left;
224
- $$self->{ '_right' } = $tmp_right;
225
- $$self->{ '_bottom_dist' } = $tmp_bottom_dist;
226
- $$self->{ '_top_dist' } = $tmp_top_dist;
227
- $$self->{ '_left_dist' } = $tmp_left_dist;
228
- $$self->{ '_right_dist' } = $tmp_right_dist;
229
-
230
- # Copy content
231
- $$self->{ '_content' } = $tmp_content;
232
-
233
- # Copy all objects
234
- @{$$self->{ '_objs' } } = @tmp_objs;
235
- }
236
-
237
- sub get_name
238
- {
239
- my ($self) = @_;
240
- return $self->{ '_self' };
241
- }
242
-
243
- sub get_objs_ref
244
- {
245
- my ($self) = @_;
246
- return $self->{ '_objs' };
247
- }
248
-
249
- sub get_content
250
- {
251
- my ($self) = @_;
252
- return $self->{ '_content' };
253
- }
254
-
255
- sub get_bottom_pos
256
- {
257
- my ($self) = @_;
258
- return $self->{ '_bottom' };
259
- }
260
-
261
- sub get_top_pos
262
- {
263
- my ($self) = @_;
264
- return $self->{ '_top' };
265
- }
266
-
267
- sub get_left_pos
268
- {
269
- my ($self) = @_;
270
- return $self->{ '_left' };
271
- }
272
-
273
- sub get_right_pos
274
- {
275
- my ($self) = @_;
276
- return $self->{ '_right' };
277
- }
278
-
279
- sub get_bottom_distance
280
- {
281
- my ($self) = @_;
282
- return $self->{ '_bottom_dist' };
283
- }
284
-
285
- sub get_top_distance
286
- {
287
- my ($self) = @_;
288
- return $self->{ '_top_dist' };
289
- }
290
-
291
- sub get_left_distance
292
- {
293
- my ($self) = @_;
294
- return $self->{ '_left_dist' };
295
- }
296
-
297
- sub get_right_distance
298
- {
299
- my ($self) = @_;
300
- return $self->{ '_right_dist' };
301
- }
302
-
303
- # Support functions
304
- sub GetNodeAttr
305
- {
306
- my ($node, $attr) = @_;
307
- return ($node->att($attr) ? $node->att($attr) : "");
308
- }
309
-
310
- sub SetNodeAttr
311
- {
312
- my ($node, $attr, $value) = @_;
313
- $node->set_att($attr, $value);
314
- }
315
-
316
- sub GetNodeText
317
- {
318
- my ($node) = @_;
319
- return $node->text;
320
- }
321
-
322
- sub SetNodeText
323
- {
324
- my ($node, $value) = @_;
325
- $node->set_text($value);
326
- }
327
-
328
- 1;
@@ -1,153 +0,0 @@
1
- package Omni::Omnidoc;
2
-
3
- # Configuration
4
- use strict;
5
-
6
- # Local libraries
7
- use Omni::Config;
8
- use Omni::Omnipage;
9
-
10
- # Extern libraries
11
- use XML::Twig;
12
- use XML::Parser;
13
-
14
- # Global variables
15
- my $tag_list = $Omni::Config::tag_list;
16
- my $att_list = $Omni::Config::att_list;
17
- my $obj_list = $Omni::Config::obj_list;
18
-
19
- # Temporary variables
20
- my $tmp_content = undef;
21
- my @tmp_pages = ();
22
-
23
- ###
24
- # A whole document object in Omnipage xml: a document contains many pages
25
- #
26
- # Do Hoang Nhat Huy, 09 Jan 2011
27
- ###
28
- # Initialization
29
- sub new
30
- {
31
- my ($class) = @_;
32
-
33
- # Lines: a paragraph can have multiple lines
34
- my @pages = ();
35
-
36
- # Class members
37
- my $self = { '_self' => $obj_list->{ 'OMNIDOC' },
38
- '_raw' => undef,
39
- '_content' => undef,
40
- '_pages' => \@pages };
41
-
42
- bless $self, $class;
43
- return $self;
44
- }
45
-
46
- #
47
- sub set_raw
48
- {
49
- my ($self, $raw) = @_;
50
-
51
- # Save the raw xml <para> ... </para>
52
- $self->{ '_raw' } = $raw;
53
-
54
- # At first, content is blank
55
- $tmp_content = "";
56
- # because there's no document
57
- @tmp_pages = ();
58
-
59
- # Parse the raw string
60
- my $twig_roots = { $tag_list->{ 'DOCUMENT' } => 1 };
61
- my $twig_handlers = { $tag_list->{ 'DOCUMENT' } => \&parse};
62
-
63
- # XML::Twig
64
- my $twig = new XML::Twig( twig_roots => $twig_roots,
65
- twig_handlers => $twig_handlers,
66
- pretty_print => 'indented' );
67
-
68
- # Start the XML parsing
69
- $twig->parse($raw);
70
- $twig->purge;
71
-
72
- # Copy information from temporary variables to class members
73
-
74
- # Copy all pages
75
- @{$self->{ '_pages' } } = @tmp_pages;
76
-
77
- # Copy content
78
- $self->{ '_content' } = $tmp_content;
79
- }
80
-
81
- sub get_raw
82
- {
83
- my ($self) = @_;
84
- return $self->{ '_raw' };
85
- }
86
-
87
- sub parse
88
- {
89
- my ($twig, $node) = @_;
90
-
91
- # Get <document> node attributes
92
-
93
- # Check if there's any para
94
- my @all_pages = $node->descendants( $tag_list->{ 'PAGE' } );
95
- foreach my $pg (@all_pages)
96
- {
97
- my $page = new Omni::Omnipage();
98
-
99
- # Set raw content
100
- $page->set_raw($pg->sprint());
101
-
102
- # Update page list
103
- push @tmp_pages, $page;
104
-
105
- # Update content
106
- $tmp_content = $tmp_content . $page->get_content() . "\n";
107
- }
108
- }
109
-
110
- sub get_name
111
- {
112
- my ($self) = @_;
113
- return $self->{ '_self' };
114
- }
115
-
116
- sub get_objs_ref
117
- {
118
- my ($self) = @_;
119
- return $self->{ '_pages' };
120
- }
121
-
122
- sub get_content
123
- {
124
- my ($self) = @_;
125
- return $self->{ '_content' };
126
- }
127
-
128
- # Support functions
129
- sub GetNodeAttr
130
- {
131
- my ($node, $attr) = @_;
132
- return ($node->att($attr) ? $node->att($attr) : "");
133
- }
134
-
135
- sub SetNodeAttr
136
- {
137
- my ($node, $attr, $value) = @_;
138
- $node->set_att($attr, $value);
139
- }
140
-
141
- sub GetNodeText
142
- {
143
- my ($node) = @_;
144
- return $node->text;
145
- }
146
-
147
- sub SetNodeText
148
- {
149
- my ($node, $value) = @_;
150
- $node->set_text($value);
151
- }
152
-
153
- 1;
@@ -1,223 +0,0 @@
1
- package Omni::Omniframe;
2
-
3
- # Configuration
4
- use strict;
5
-
6
- # Local libraries
7
- use Omni::Config;
8
- use Omni::Omnipara;
9
- use Omni::Omnitable;
10
-
11
- # Extern libraries
12
- use XML::Twig;
13
- use XML::Parser;
14
-
15
- # Global variables
16
- my $tag_list = $Omni::Config::tag_list;
17
- my $att_list = $Omni::Config::att_list;
18
- my $obj_list = $Omni::Config::obj_list;
19
-
20
- ###
21
- # A frame object in Omnipage xml: a frame contains paragraphs
22
- # (this is my observation and can be invalid under close scrunity
23
- # of new "evidence")
24
- #
25
- # Do Hoang Nhat Huy, 23 Feb 2011
26
- ###
27
- # Initialization
28
- sub new
29
- {
30
- my ($class) = @_;
31
-
32
- # Objs: paragraphs
33
- my @objs = ();
34
-
35
- # Class members
36
- my $self = { '_self' => $obj_list->{ 'OMNIFRAME' },
37
- '_raw' => undef,
38
- '_content' => undef,
39
- '_bottom' => undef,
40
- '_top' => undef,
41
- '_left' => undef,
42
- '_right' => undef,
43
- '_objs' => \@objs };
44
-
45
- bless $self, $class;
46
- return $self;
47
- }
48
-
49
- sub set_raw
50
- {
51
- my ($self, $raw) = @_;
52
-
53
- # Save the raw xml <column> ... </column>
54
- $self->{ '_raw' } = $raw;
55
-
56
- # Parse the raw string
57
- my $twig_roots = { $tag_list->{ 'FRAME' } => 1 };
58
- my $twig_handlers = { $tag_list->{ 'FRAME' } => sub { parse(@_, \$self); } };
59
-
60
- # XML::Twig
61
- my $twig = new XML::Twig( twig_roots => $twig_roots,
62
- twig_handlers => $twig_handlers,
63
- pretty_print => 'indented' );
64
-
65
- # Start the XML parsing
66
- $twig->parse($raw, \$self);
67
- $twig->purge;
68
- }
69
-
70
- sub get_raw
71
- {
72
- my ($self) = @_;
73
- return $self->{ '_raw' };
74
- }
75
-
76
- sub parse
77
- {
78
- my ($twig, $node, $self) = @_;
79
-
80
- # At first, content is blank
81
- my $tmp_content = "";
82
- # because there's no object
83
- my @tmp_objs = ();
84
-
85
- # Get <column> node attributes
86
- my $tmp_bottom = GetNodeAttr($node, $att_list->{ 'BOTTOM' });
87
- my $tmp_top = GetNodeAttr($node, $att_list->{ 'TOP' });
88
- my $tmp_left = GetNodeAttr($node, $att_list->{ 'LEFT' });
89
- my $tmp_right = GetNodeAttr($node, $att_list->{ 'RIGHT' });
90
-
91
- # Check if there's any paragraph, dd, table, or picture
92
- # The large number of possible children is due to the
93
- # ambiguous structure of the Omnipage XML
94
- my $para_tag = $tag_list->{ 'PARA' };
95
- my $table_tag = $tag_list->{ 'TABLE' };
96
-
97
- # Get the first child in the body text
98
- my $child = $node->first_child();
99
-
100
- while (defined $child)
101
- {
102
- my $xpath = $child->path();
103
-
104
- # if this child is <para>
105
- if ($xpath =~ m/\/$para_tag$/)
106
- {
107
- my $para = new Omni::Omnipara();
108
-
109
- # Set raw content
110
- $para->set_raw($child->sprint());
111
-
112
- # Update paragraph list
113
- push @tmp_objs, $para;
114
-
115
- # Update content
116
- $tmp_content = $tmp_content . $para->get_content() . "\n";
117
- }
118
- elsif ($xpath =~ m/\/$table_tag$/)
119
- {
120
- my $table = new Omni::Omnitable();
121
-
122
- # Set raw content
123
- $table->set_raw($child->sprint());
124
-
125
- # Update paragraph list
126
- push @tmp_objs, $table;
127
-
128
- # Update content
129
- $tmp_content = $tmp_content . $table->get_content() . "\n";
130
- }
131
-
132
- # Little brother
133
- if ($child->is_last_child)
134
- {
135
- last;
136
- }
137
- else
138
- {
139
- $child = $child->next_sibling();
140
- }
141
- }
142
-
143
- # Copy information from temporary variables to class members
144
- $$self->{ '_bottom' } = $tmp_bottom;
145
- $$self->{ '_top' } = $tmp_top;
146
- $$self->{ '_left' } = $tmp_left;
147
- $$self->{ '_right' } = $tmp_right;
148
-
149
- # Copy all objects
150
- @{$$self->{ '_objs' } } = @tmp_objs;
151
-
152
- # Copy content
153
- $$self->{ '_content' } = $tmp_content;
154
- }
155
-
156
- sub get_name
157
- {
158
- my ($self) = @_;
159
- return $self->{ '_self' };
160
- }
161
-
162
- sub get_objs_ref
163
- {
164
- my ($self) = @_;
165
- return $self->{ '_objs' };
166
- }
167
-
168
- sub get_content
169
- {
170
- my ($self) = @_;
171
- return $self->{ '_content' };
172
- }
173
-
174
- sub get_bottom_pos
175
- {
176
- my ($self) = @_;
177
- return $self->{ '_bottom' };
178
- }
179
-
180
- sub get_top_pos
181
- {
182
- my ($self) = @_;
183
- return $self->{ '_top' };
184
- }
185
-
186
- sub get_left_pos
187
- {
188
- my ($self) = @_;
189
- return $self->{ '_left' };
190
- }
191
-
192
- sub get_right_pos
193
- {
194
- my ($self) = @_;
195
- return $self->{ '_right' };
196
- }
197
-
198
- # Support functions
199
- sub GetNodeAttr
200
- {
201
- my ($node, $attr) = @_;
202
- return ($node->att($attr) ? $node->att($attr) : "");
203
- }
204
-
205
- sub SetNodeAttr
206
- {
207
- my ($node, $attr, $value) = @_;
208
- $node->set_att($attr, $value);
209
- }
210
-
211
- sub GetNodeText
212
- {
213
- my ($node) = @_;
214
- return $node->text;
215
- }
216
-
217
- sub SetNodeText
218
- {
219
- my ($node, $value) = @_;
220
- $node->set_text($value);
221
- }
222
-
223
- 1;