biblicit 2.0.3 → 2.0.4
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +0 -2
- data/biblicit.gemspec +1 -1
- data/parscit/bin/citeExtract.pl +9 -161
- data/parscit/bin/sectExtract.pl +0 -14
- data/parscit/lib/ParsCit/Controller.pm +0 -59
- data/parscit/lib/ParsCit/PreProcess.pm +0 -4
- data/parscit/lib/ParsCit/Tr2crfpp.pm +1 -7
- metadata +4 -24
- data/parscit/bin/sectLabel/processOmniXML.pl +0 -1427
- data/parscit/bin/sectLabel/processOmniXML_new.pl +0 -1025
- data/parscit/bin/sectLabel/processOmniXMLv2.pl +0 -1529
- data/parscit/bin/sectLabel/processOmniXMLv3.pl +0 -964
- data/parscit/bin/sectLabel/simplifyOmniXML.pl +0 -382
- data/parscit/bin/xml2train.pl +0 -193
- data/parscit/lib/Omni/Config.pm +0 -93
- data/parscit/lib/Omni/Omnicell.pm +0 -263
- data/parscit/lib/Omni/Omnicol.pm +0 -292
- data/parscit/lib/Omni/Omnidd.pm +0 -328
- data/parscit/lib/Omni/Omnidoc.pm +0 -153
- data/parscit/lib/Omni/Omniframe.pm +0 -223
- data/parscit/lib/Omni/Omniline.pm +0 -423
- data/parscit/lib/Omni/Omnipage.pm +0 -282
- data/parscit/lib/Omni/Omnipara.pm +0 -232
- data/parscit/lib/Omni/Omnirun.pm +0 -303
- data/parscit/lib/Omni/Omnitable.pm +0 -336
- data/parscit/lib/Omni/Omniword.pm +0 -162
- data/parscit/lib/Omni/Traversal.pm +0 -313
- data/parscit/lib/SectLabel/AAMatching.pm +0 -1949
data/README.md
CHANGED
@@ -119,8 +119,6 @@ More than these might be required; this is what I had to add to my default insta
|
|
119
119
|
|
120
120
|
sudo cpan install Digest::SHA1
|
121
121
|
sudo cpan install String::Approx
|
122
|
-
sudo cpan install XML::Writer::String
|
123
|
-
sudo cpan install XML::Twig
|
124
122
|
|
125
123
|
## Required to use the ParsCit algorithm
|
126
124
|
|
data/biblicit.gemspec
CHANGED
@@ -5,7 +5,7 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |gem|
|
7
7
|
gem.name = "biblicit"
|
8
|
-
gem.version = "2.0.
|
8
|
+
gem.version = "2.0.4"
|
9
9
|
gem.authors = ["David Judd"]
|
10
10
|
gem.email = ["david@academia.edu"]
|
11
11
|
gem.summary = %q{Extract citations from PDFs.}
|
data/parscit/bin/citeExtract.pl
CHANGED
@@ -41,10 +41,7 @@ use File::Spec;
|
|
41
41
|
use File::Basename;
|
42
42
|
|
43
43
|
# Local libraries
|
44
|
-
use Omni::Omnidoc;
|
45
|
-
use Omni::Traversal;
|
46
44
|
use ParsCit::Controller;
|
47
|
-
use SectLabel::AAMatching;
|
48
45
|
|
49
46
|
# USER customizable section
|
50
47
|
my $tmpfile .= $0;
|
@@ -195,113 +192,17 @@ if (defined $opt_e && $opt_e ne "")
|
|
195
192
|
}
|
196
193
|
|
197
194
|
my $doc = undef;
|
198
|
-
my $text_file =
|
199
|
-
# Extracting text from Omnipage XML output
|
200
|
-
if ($is_xml_input)
|
201
|
-
{
|
202
|
-
$text_file = "/tmp/" . NewTmpFile();
|
203
|
-
my $cmd = $FindBin::Bin . "/sectLabel/processOmniXMLv2.pl -q -in $in -out $text_file -decode";
|
204
|
-
system($cmd);
|
205
|
-
|
206
|
-
###
|
207
|
-
# Huydhn: input is xml from Omnipage
|
208
|
-
###
|
209
|
-
if (! open(IN, "<:utf8", $in)) { return (-1, "Could not open xml file " . $in . ": " . $!); }
|
210
|
-
my $xml = do { local $/; <IN> };
|
211
|
-
close IN;
|
212
|
-
|
213
|
-
###
|
214
|
-
# Huydhn
|
215
|
-
# NOTE: the omnipage xml is not well constructed (concatenated multiple xml files).
|
216
|
-
# This merged xml need to be fixed first before pass it to xml processing libraries, e.g. xml::twig
|
217
|
-
###
|
218
|
-
# Convert to Unix format
|
219
|
-
$xml =~ s/\r//g;
|
220
|
-
# Remove <?xml version="1.0" encoding="UTF-8"?>
|
221
|
-
$xml =~ s/<\?xml.+?>\n//g;
|
222
|
-
# Remove <!--XML document generated using OCR technology from ScanSoft, Inc.-->
|
223
|
-
$xml =~ s/<\!\-\-XML.+?>\n//g;
|
224
|
-
# Declaration and root
|
225
|
-
$xml = "<?xml version=\"1.0\"?>" . "\n" . "<root>" . "\n" . $xml . "\n" . "</root>";
|
226
|
-
|
227
|
-
# New document
|
228
|
-
$doc = new Omni::Omnidoc();
|
229
|
-
$doc->set_raw($xml);
|
230
|
-
}
|
231
|
-
else
|
232
|
-
{
|
233
|
-
$text_file = $in;
|
234
|
-
}
|
195
|
+
my $text_file = $in;
|
235
196
|
|
236
197
|
# SECTLABEL
|
237
198
|
if (($mode & $SECTLABEL) == $SECTLABEL)
|
238
199
|
{
|
239
200
|
my $sect_label_input = $text_file;
|
240
201
|
|
241
|
-
|
242
|
-
if ($is_xml_input)
|
243
|
-
{
|
244
|
-
my $cmd = $FindBin::Bin . "/sectLabel/processOmniXMLv3.pl -q -in $in -out $text_file.feature -decode";
|
245
|
-
system($cmd);
|
246
|
-
|
247
|
-
my $address_file = $text_file . ".feature" . ".address";
|
248
|
-
if (! open(IN, "<:utf8", $address_file)) { return (-1, "Could not open address file " . $address_file . ": " . $!); }
|
249
|
-
|
250
|
-
my @omni_address = ();
|
251
|
-
# Read the address file provided by process OmniXML script
|
252
|
-
while (<IN>)
|
253
|
-
{
|
254
|
-
chomp;
|
255
|
-
# Save and split the line
|
256
|
-
my $line = $_;
|
257
|
-
my @element = split(/\s+/, $line);
|
258
|
-
|
259
|
-
my %addr = ();
|
260
|
-
# Address
|
261
|
-
$addr{ 'L1' } = $element[ 0 ];
|
262
|
-
$addr{ 'L2' } = $element[ 1 ];
|
263
|
-
$addr{ 'L3' } = $element[ 2 ];
|
264
|
-
$addr{ 'L4' } = $element[ 3 ];
|
265
|
-
|
266
|
-
# Save the address
|
267
|
-
push @omni_address, { %addr };
|
268
|
-
}
|
269
|
-
close IN;
|
270
|
-
unlink($address_file);
|
271
|
-
|
272
|
-
$sect_label_input .= ".feature";
|
273
|
-
my ($sl_xml, $aut_lines, $aff_lines) = SectLabel($sect_label_input, $is_xml_input, 0);
|
274
|
-
|
275
|
-
# Remove first line <?xml/>
|
276
|
-
$rxml .= RemoveTopLines($sl_xml, 1) . "\n";
|
277
|
-
|
278
|
-
# Only run author - affiliation if "something" is provided
|
279
|
-
if ($opt_a)
|
280
|
-
{
|
281
|
-
my @aut_addrs = ();
|
282
|
-
my @aff_addrs = ();
|
283
|
-
# Address of author section
|
284
|
-
for my $lindex (@{ $aut_lines }) { push @aut_addrs, $omni_address[ $lindex ]; }
|
285
|
-
# Address of affiliation section
|
286
|
-
for my $lindex (@{ $aff_lines }) { push @aff_addrs, $omni_address[ $lindex ]; }
|
287
|
-
|
288
|
-
# The tarpit
|
289
|
-
my $aa_xml = SectLabel::AAMatching::AAMatching($doc, \@aut_addrs, \@aff_addrs);
|
290
|
-
|
291
|
-
# Author-Affiliation Matching result
|
292
|
-
$rxml .= $aa_xml . "\n";
|
293
|
-
}
|
294
|
-
|
295
|
-
# Remove XML feature file
|
296
|
-
unlink($sect_label_input);
|
297
|
-
}
|
298
|
-
else
|
299
|
-
{
|
300
|
-
my ($sl_xml, $aut_lines, $aff_lines) = SectLabel($sect_label_input, $is_xml_input, 0);
|
202
|
+
my ($sl_xml, $aut_lines, $aff_lines) = SectLabel($sect_label_input, $is_xml_input, 0);
|
301
203
|
|
302
|
-
|
303
|
-
|
304
|
-
}
|
204
|
+
# Remove first line <?xml/>
|
205
|
+
$rxml .= RemoveTopLines($sl_xml, 1) . "\n";
|
305
206
|
}
|
306
207
|
|
307
208
|
# PARSHED
|
@@ -318,66 +219,13 @@ if (($mode & $PARSHED) == $PARSHED)
|
|
318
219
|
# PARSCIT
|
319
220
|
if (($mode & $PARSCIT) == $PARSCIT)
|
320
221
|
{
|
321
|
-
|
322
|
-
{
|
323
|
-
my $cmd = $FindBin::Bin . "/sectLabel/processOmniXMLv3.pl -q -in $in -out $text_file.feature -decode";
|
324
|
-
system($cmd);
|
325
|
-
|
326
|
-
my $address_file = $text_file . ".feature" . ".address";
|
327
|
-
if (! open(IN, "<:utf8", $address_file)) { return (-1, "Could not open address file " . $address_file . ": " . $!); }
|
328
|
-
|
329
|
-
my @omni_address = ();
|
330
|
-
# Read the address file provided by process OmniXML script
|
331
|
-
while (<IN>)
|
332
|
-
{
|
333
|
-
chomp;
|
334
|
-
# Save and split the line
|
335
|
-
my $line = $_;
|
336
|
-
my @element = split(/\s+/, $line);
|
337
|
-
|
338
|
-
my %addr = ();
|
339
|
-
# Address
|
340
|
-
$addr{ 'L1' } = $element[ 0 ];
|
341
|
-
$addr{ 'L2' } = $element[ 1 ];
|
342
|
-
$addr{ 'L3' } = $element[ 2 ];
|
343
|
-
$addr{ 'L4' } = $element[ 3 ];
|
344
|
-
|
345
|
-
# Save the address
|
346
|
-
push @omni_address, { %addr };
|
347
|
-
}
|
348
|
-
close IN;
|
349
|
-
unlink($address_file);
|
350
|
-
|
351
|
-
my $sect_label_input = $text_file . ".feature";
|
352
|
-
# Output of sectlabel becomes input for parscit
|
353
|
-
my ($all_text, $cit_lines) = SectLabel($sect_label_input, $is_xml_input, 1);
|
354
|
-
# Remove XML feature file
|
355
|
-
unlink($sect_label_input);
|
356
|
-
|
357
|
-
my @cit_addrs = ();
|
358
|
-
# Address of reference section
|
359
|
-
for my $lindex (@{ $cit_lines }) { push @cit_addrs, $omni_address[ $lindex ]; }
|
360
|
-
|
361
|
-
my $pc_xml = undef;
|
362
|
-
# Huydhn: add xml features to parscit in case of unmarked reference
|
363
|
-
$pc_xml = ParsCit::Controller::ExtractCitations2(\$all_text, $cit_lines, $is_xml_input, $doc, \@cit_addrs);
|
364
|
-
|
365
|
-
# Remove first line <?xml/>
|
366
|
-
$rxml .= RemoveTopLines($$pc_xml, 1) . "\n";
|
222
|
+
my $pc_xml = ParsCit::Controller::ExtractCitations($text_file, $in, $is_xml_input);
|
367
223
|
|
368
|
-
|
369
|
-
|
370
|
-
}
|
371
|
-
else
|
372
|
-
{
|
373
|
-
my $pc_xml = ParsCit::Controller::ExtractCitations($text_file, $in, $is_xml_input);
|
374
|
-
|
375
|
-
# Remove first line <?xml/>
|
376
|
-
$rxml .= RemoveTopLines($$pc_xml, 1) . "\n";
|
224
|
+
# Remove first line <?xml/>
|
225
|
+
$rxml .= RemoveTopLines($$pc_xml, 1) . "\n";
|
377
226
|
|
378
|
-
|
379
|
-
|
380
|
-
}
|
227
|
+
# Thang v100901: call to BiblioScript
|
228
|
+
if (scalar(@export_types) != 0) { BiblioScript(\@export_types, $$pc_xml, $out); }
|
381
229
|
}
|
382
230
|
|
383
231
|
$rxml .= "</algorithms>";
|
data/parscit/bin/sectExtract.pl
CHANGED
@@ -82,16 +82,6 @@ $modelFile = "$path/../$modelFile";
|
|
82
82
|
my $configFile = $isXmlInput ? $SectLabel::Config::configXmlFile : $SectLabel::Config::configFile;
|
83
83
|
$configFile = "$path/../$configFile";
|
84
84
|
|
85
|
-
if($isXmlInput){
|
86
|
-
my $xmlInFile = newTmpFile();
|
87
|
-
$xmlInFile = untaintPath($xmlInFile);
|
88
|
-
my $cmd = "$path/sectLabel/";
|
89
|
-
$cmd .= ($isNew) ? "processOmniXMLv2.pl" : "processOmniXML.pl";
|
90
|
-
$cmd .= " -in $inFile -out $xmlInFile -xmlFeature -decode";
|
91
|
-
execute($cmd);
|
92
|
-
$inFile = $xmlInFile;
|
93
|
-
}
|
94
|
-
|
95
85
|
my $dictFile = $SectLabel::Config::dictFile;
|
96
86
|
$dictFile = "$path/../$dictFile";
|
97
87
|
|
@@ -99,10 +89,6 @@ my $funcFile = $SectLabel::Config::funcFile;
|
|
99
89
|
$funcFile = "$path/../$funcFile";
|
100
90
|
my $rXML = SectLabel::Controller::extractSection($inFile, $isXmlOutput, $modelFile, $dictFile, $funcFile, $configFile, $isXmlInput, $isDebug);
|
101
91
|
|
102
|
-
if($isXmlInput){
|
103
|
-
unlink($inFile);
|
104
|
-
}
|
105
|
-
|
106
92
|
if (defined $outFile) {
|
107
93
|
$outFile = untaintPath($outFile);
|
108
94
|
|
@@ -21,8 +21,6 @@ use ParsCit::Tr2crfpp;
|
|
21
21
|
use ParsCit::PreProcess;
|
22
22
|
use ParsCit::PostProcess;
|
23
23
|
use ParsCit::CitationContext;
|
24
|
-
# Omnipage libraries
|
25
|
-
use Omni::Omnidoc;
|
26
24
|
# Dependencies
|
27
25
|
use CSXUtil::SafeText qw(cleanXML);
|
28
26
|
|
@@ -228,62 +226,6 @@ sub ExtractCitationsImpl
|
|
228
226
|
# Reference to an array of single reference
|
229
227
|
my $rraw_citations = undef;
|
230
228
|
|
231
|
-
# Find and separate reference
|
232
|
-
if ($is_xml)
|
233
|
-
{
|
234
|
-
###
|
235
|
-
# Huydhn: input is xml from Omnipage
|
236
|
-
###
|
237
|
-
if (! open(IN, "<:utf8", $orgfile)) { return (-1, "Could not open xml file " . $orgfile . ": " . $!); }
|
238
|
-
my $xml = do { local $/; <IN> };
|
239
|
-
close IN;
|
240
|
-
|
241
|
-
###
|
242
|
-
# Huydhn
|
243
|
-
# NOTE: the omnipage xml is not well constructed (concatenated multiple xml files).
|
244
|
-
# This merged xml need to be fixed first before pass it to xml processing libraries, e.g. xml::twig
|
245
|
-
###
|
246
|
-
# Convert to Unix format
|
247
|
-
$xml =~ s/\r//g;
|
248
|
-
# Remove <?xml version="1.0" encoding="UTF-8"?>
|
249
|
-
$xml =~ s/<\?xml.+?>\n//g;
|
250
|
-
# Remove <!--XML document generated using OCR technology from ScanSoft, Inc.-->
|
251
|
-
$xml =~ s/<\!\-\-XML.+?>\n//g;
|
252
|
-
# Declaration and root
|
253
|
-
$xml = "<?xml version=\"1.0\"?>" . "\n" . "<root>" . "\n" . $xml . "\n" . "</root>";
|
254
|
-
|
255
|
-
# New document
|
256
|
-
my $doc = new Omni::Omnidoc();
|
257
|
-
$doc->set_raw($xml);
|
258
|
-
|
259
|
-
# Extract the reference portion from the XML
|
260
|
-
my ($start_ref, $end_ref, $rcite_text_from_xml, $rcit_addrs) = ParsCit::PreProcess::FindCitationTextXML($doc);
|
261
|
-
|
262
|
-
# Extract the reference portion from the text.
|
263
|
-
# TODO: NEED TO BE REMOVED FROM HERE
|
264
|
-
my $content = $doc->get_content();
|
265
|
-
($rcite_text, $rnorm_body_text, $rbody_text) = ParsCit::PreProcess::FindCitationText(\$content, \@pos_array);
|
266
|
-
|
267
|
-
my @norm_body_tokens = split(/\s+/, $$rnorm_body_text);
|
268
|
-
my @body_tokens = split(/\s+/, $$rbody_text);
|
269
|
-
|
270
|
-
my $size = scalar(@norm_body_tokens);
|
271
|
-
my $size1 = scalar(@pos_array);
|
272
|
-
|
273
|
-
if($size != $size1) { die "ParsCit::Controller::extractCitationsImpl: normBodyText size $size != posArray size $size1\n"; }
|
274
|
-
# TODO: TO HERE
|
275
|
-
|
276
|
-
# Filename initialization
|
277
|
-
if ($bwrite_split > 0) { ($citefile, $bodyfile) = WriteSplit($textfile, $rcite_text_from_xml, $rbody_text); }
|
278
|
-
|
279
|
-
# Prepare to split unmarked reference portion
|
280
|
-
my $tmp_file = ParsCit::Tr2crfpp::PrepDataUnmarked($doc, $rcit_addrs);
|
281
|
-
|
282
|
-
# Extract citations from citation text
|
283
|
-
$rraw_citations = ParsCit::PreProcess::SegmentCitationsXML($rcite_text_from_xml, $tmp_file);
|
284
|
-
}
|
285
|
-
else
|
286
|
-
{
|
287
229
|
if (! open(IN, "<:utf8", $textfile)) { return (-1, "Could not open text file " . $textfile . ": " . $!); }
|
288
230
|
my $text = do { local $/; <IN> };
|
289
231
|
close IN;
|
@@ -309,7 +251,6 @@ sub ExtractCitationsImpl
|
|
309
251
|
|
310
252
|
# Extract citations from citation text
|
311
253
|
$rraw_citations = ParsCit::PreProcess::SegmentCitations($rcite_text);
|
312
|
-
}
|
313
254
|
|
314
255
|
my @citations = ();
|
315
256
|
my @valid_citations = ();
|
@@ -11,7 +11,6 @@ package ParsCit::PreProcess;
|
|
11
11
|
use utf8;
|
12
12
|
use strict;
|
13
13
|
|
14
|
-
use Omni::Config;
|
15
14
|
use ParsCit::Citation;
|
16
15
|
|
17
16
|
my %marker_types = ( 'SQUARE' => '\\[.+?\\]',
|
@@ -22,9 +21,6 @@ my %marker_types = ( 'SQUARE' => '\\[.+?\\]',
|
|
22
21
|
#'NAKEDNUMDOT' => '\\d{1,3}\\.' # Modified by Artemy Kolchinsky (v090625)
|
23
22
|
);
|
24
23
|
|
25
|
-
# Omnilib configuration: object name
|
26
|
-
my $obj_list = $Omni::Config::obj_list;
|
27
|
-
|
28
24
|
###
|
29
25
|
# Huydhn: similar to findCitationText, find the citation portion using regular expression.
|
30
26
|
# However the input is an omnipage xml document object, not the raw text
|
@@ -15,7 +15,6 @@ use strict 'vars';
|
|
15
15
|
use FindBin;
|
16
16
|
use Encode ();
|
17
17
|
|
18
|
-
use Omni::Config;
|
19
18
|
use ParsCit::Config;
|
20
19
|
|
21
20
|
### USER customizable section
|
@@ -38,8 +37,6 @@ $split_model_file = "$FindBin::Bin/../$split_model_file";
|
|
38
37
|
# Huydhn: don't know its function
|
39
38
|
###
|
40
39
|
my %dict = ();
|
41
|
-
# Omnilib configuration: object name
|
42
|
-
my $obj_list = $Omni::Config::obj_list;
|
43
40
|
|
44
41
|
###
|
45
42
|
# Huydhn: prepare data for trfpp, segmenting unmarked reference
|
@@ -124,7 +121,7 @@ sub PrepDataUnmarked
|
|
124
121
|
# Trim line
|
125
122
|
$ln =~ s/^\s+|\s+$//g;
|
126
123
|
# Skip blank lines
|
127
|
-
if (($ln =~ m/^\s*$/)
|
124
|
+
if (($ln =~ m/^\s*$/))
|
128
125
|
{
|
129
126
|
$addr_index++;
|
130
127
|
next;
|
@@ -355,7 +352,6 @@ sub PrepDataUnmarked
|
|
355
352
|
# XML features
|
356
353
|
# Bullet
|
357
354
|
my $bullet = undef;
|
358
|
-
if ($lines->[ $t ]->get_name() eq $obj_list->{ 'OMNILINE' }) { $bullet = $lines->[ $t ]->get_bullet(); }
|
359
355
|
if ((defined $bullet) && ($bullet eq 'true'))
|
360
356
|
{
|
361
357
|
push @feats, 'xmlBullet_yes';
|
@@ -368,7 +364,6 @@ sub PrepDataUnmarked
|
|
368
364
|
|
369
365
|
# First word format: bold, italic, font size
|
370
366
|
my $xml_runs = undef;
|
371
|
-
if (($lines->[ $t ]->get_name() eq $obj_list->{ 'OMNILINE' })) { $xml_runs = $lines->[ $t ]->get_objs_ref(); }
|
372
367
|
|
373
368
|
# First word format: bold
|
374
369
|
my $bold = undef;
|
@@ -415,7 +410,6 @@ sub PrepDataUnmarked
|
|
415
410
|
|
416
411
|
# First word format: starting point, left alignment
|
417
412
|
my $start_point = undef;
|
418
|
-
if (($lines->[ $t ]->get_name() eq $obj_list->{ 'OMNILINE' })) { $start_point = $lines->[ $t ]->get_left_pos(); }
|
419
413
|
if ((defined $start_point) && ($start_point > $avg_start_point * $start_upper_ratio))
|
420
414
|
{
|
421
415
|
push @feats, 'xmlBeginLine_right';
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: biblicit
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0.
|
4
|
+
version: 2.0.4
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-
|
12
|
+
date: 2013-03-07 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: activesupport
|
@@ -402,30 +402,11 @@ files:
|
|
402
402
|
- parscit/bin/sectLabel/genericSect/extractFeature.rb
|
403
403
|
- parscit/bin/sectLabel/genericSectExtract.rb
|
404
404
|
- parscit/bin/sectLabel/getStructureInfo.pl
|
405
|
-
- parscit/bin/sectLabel/processOmniXML.pl
|
406
|
-
- parscit/bin/sectLabel/processOmniXML_new.pl
|
407
|
-
- parscit/bin/sectLabel/processOmniXMLv2.pl
|
408
|
-
- parscit/bin/sectLabel/processOmniXMLv3.pl
|
409
405
|
- parscit/bin/sectLabel/redo.sectLabel.pl
|
410
|
-
- parscit/bin/sectLabel/simplifyOmniXML.pl
|
411
406
|
- parscit/bin/sectLabel/single2multi.pl
|
412
407
|
- parscit/bin/sectLabel/tr2crfpp.pl
|
413
408
|
- parscit/bin/tr2crfpp.pl
|
414
|
-
- parscit/bin/xml2train.pl
|
415
409
|
- parscit/lib/CSXUtil/SafeText.pm
|
416
|
-
- parscit/lib/Omni/Config.pm
|
417
|
-
- parscit/lib/Omni/Omnicell.pm
|
418
|
-
- parscit/lib/Omni/Omnicol.pm
|
419
|
-
- parscit/lib/Omni/Omnidd.pm
|
420
|
-
- parscit/lib/Omni/Omnidoc.pm
|
421
|
-
- parscit/lib/Omni/Omniframe.pm
|
422
|
-
- parscit/lib/Omni/Omniline.pm
|
423
|
-
- parscit/lib/Omni/Omnipage.pm
|
424
|
-
- parscit/lib/Omni/Omnipara.pm
|
425
|
-
- parscit/lib/Omni/Omnirun.pm
|
426
|
-
- parscit/lib/Omni/Omnitable.pm
|
427
|
-
- parscit/lib/Omni/Omniword.pm
|
428
|
-
- parscit/lib/Omni/Traversal.pm
|
429
410
|
- parscit/lib/ParsCit/.PostProcess.pm.swp
|
430
411
|
- parscit/lib/ParsCit/Citation.pm
|
431
412
|
- parscit/lib/ParsCit/CitationContext.pm
|
@@ -439,7 +420,6 @@ files:
|
|
439
420
|
- parscit/lib/ParsHed/PostProcess.pm
|
440
421
|
- parscit/lib/ParsHed/Tr2crfpp.pm
|
441
422
|
- parscit/lib/ParsHed/Tr2crfpp_token.pm
|
442
|
-
- parscit/lib/SectLabel/AAMatching.pm
|
443
423
|
- parscit/lib/SectLabel/Config.pm
|
444
424
|
- parscit/lib/SectLabel/Controller.pm
|
445
425
|
- parscit/lib/SectLabel/PostProcess.pm
|
@@ -473,7 +453,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
473
453
|
version: '0'
|
474
454
|
segments:
|
475
455
|
- 0
|
476
|
-
hash:
|
456
|
+
hash: -2794280872000100021
|
477
457
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
478
458
|
none: false
|
479
459
|
requirements:
|
@@ -482,7 +462,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
482
462
|
version: '0'
|
483
463
|
segments:
|
484
464
|
- 0
|
485
|
-
hash:
|
465
|
+
hash: -2794280872000100021
|
486
466
|
requirements:
|
487
467
|
- For PDFs, Poppler or XPDF (try "which pdftotext")
|
488
468
|
- For Postscript files, Ghostscript (try "which ps2ascii")
|