biblicit 2.2.0 → 2.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/biblicit/version.rb +1 -1
- data/parscit/lib/HeaderParse/API/ParserMethods.pm +0 -1
- data/parscit/lib/HeaderParse/Config/API_Config.pm +5 -8
- data/parscit/lib/ParsCit/Config.pm +1 -1
- data/parscit/lib/ParsCit/Tr2crfpp.pm +9 -14
- data/parscit/lib/ParsHed/Config.pm +1 -1
- data/parscit/lib/ParsHed/Tr2crfpp.pm +10 -7
- data/parscit/lib/ParsHed/Tr2crfpp_token.pm +7 -10
- data/parscit/lib/SectLabel/AAMatching.pm +6 -2
- data/parscit/lib/SectLabel/Config.pm +1 -1
- data/parscit/lib/SectLabel/Controller.pm +4 -2
- data/parscit/lib/SectLabel/Tr2crfpp.pm +4 -2
- metadata +4 -4
data/lib/biblicit/version.rb
CHANGED
|
@@ -17,7 +17,6 @@ package HeaderParse::API::ParserMethods;
|
|
|
17
17
|
|
|
18
18
|
use utf8;
|
|
19
19
|
use Data::Dumper;
|
|
20
|
-
use FindBin;
|
|
21
20
|
use HeaderParse::API::NamePatternMatch;
|
|
22
21
|
use HeaderParse::API::MultiClassChunking; #default to use all export by this module
|
|
23
22
|
use HeaderParse::API::LoadInformation;
|
|
@@ -12,7 +12,6 @@
|
|
|
12
12
|
#
|
|
13
13
|
package HeaderParse::Config::API_Config;
|
|
14
14
|
|
|
15
|
-
use FindBin qw($Bin);
|
|
16
15
|
require Exporter;
|
|
17
16
|
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
|
|
18
17
|
|
|
@@ -26,17 +25,15 @@ else {
|
|
|
26
25
|
$Classifier = "svm_classify5"; # assume on path
|
|
27
26
|
}
|
|
28
27
|
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
if ($FindBin::Bin =~ /(.*)/) { $path = $1; }
|
|
33
|
-
}
|
|
28
|
+
use File::Basename;
|
|
29
|
+
my $dir = dirname(__FILE__);
|
|
30
|
+
my $parscitHome = "$dir/../../../";
|
|
34
31
|
|
|
35
|
-
$Resource_Dir = "$
|
|
32
|
+
$Resource_Dir = "$parscitHome/resources/headerParse";
|
|
36
33
|
$Database_Dir = "$Resource_Dir/database/";
|
|
37
34
|
$Data_Dir = "$Resource_Dir/data/";
|
|
38
35
|
$offlineD = "$Resource_Dir/models/";
|
|
39
|
-
$Tmp_Dir = "$
|
|
36
|
+
$Tmp_Dir = "$parscitHome/tmp";
|
|
40
37
|
|
|
41
38
|
$nMinHeaderLength = 50;
|
|
42
39
|
$nMaxHeaderLength = 2500;
|
|
@@ -15,7 +15,7 @@ $serverPort = 10555;
|
|
|
15
15
|
$URI = 'http://citeseerx.org/algorithms/parscit/wsdl';
|
|
16
16
|
|
|
17
17
|
## Tr2crfpp
|
|
18
|
-
## Paths relative to ParsCit root dir
|
|
18
|
+
## Paths relative to ParsCit root dir
|
|
19
19
|
$tmpDir = "tmp";
|
|
20
20
|
$dictFile = "resources/parsCitDict.txt";
|
|
21
21
|
$crf_test = $ENV{'CRFPP_HOME'} ? "$ENV{'CRFPP_HOME'}/bin/crf_test" : "crfpp/crf_test";
|
|
@@ -12,25 +12,20 @@ package ParsCit::Tr2crfpp;
|
|
|
12
12
|
use utf8;
|
|
13
13
|
use strict 'vars';
|
|
14
14
|
|
|
15
|
-
use FindBin;
|
|
16
15
|
use Encode ();
|
|
17
16
|
|
|
18
17
|
use ParsCit::Config;
|
|
19
18
|
|
|
20
|
-
|
|
21
|
-
my $
|
|
22
|
-
$
|
|
23
|
-
|
|
24
|
-
my $dict_file = $ParsCit::Config::dictFile;
|
|
25
|
-
$dict_file = "$FindBin::Bin/../$dict_file";
|
|
26
|
-
|
|
27
|
-
my $crf_test = $ENV{'CRFPP_HOME'} ? "$ENV{'CRFPP_HOME'}/bin/crf_test" : "$FindBin::Bin/../$ParsCit::Config::crf_test";
|
|
19
|
+
use File::Basename;
|
|
20
|
+
my $dir = dirname(__FILE__);
|
|
21
|
+
my $parscitHome = "$dir/../../";
|
|
28
22
|
|
|
29
|
-
|
|
30
|
-
$
|
|
31
|
-
|
|
32
|
-
my $
|
|
33
|
-
$
|
|
23
|
+
### USER customizable section
|
|
24
|
+
my $tmp_dir = "$parscitHome/$ParsCit::Config::tmpDir";
|
|
25
|
+
my $dict_file = "$parscitHome/$ParsCit::Config::dictFile";
|
|
26
|
+
my $crf_test = $ENV{'CRFPP_HOME'} ? "$ENV{'CRFPP_HOME'}/bin/crf_test" : "$parscitHome/$ParsCit::Config::crf_test";
|
|
27
|
+
my $model_file = "$parscitHome/$ParsCit::Config::modelFile";
|
|
28
|
+
my $split_model_file = "$parscitHome/$ParsCit::Config::splitModelFile";
|
|
34
29
|
### END user customizable section
|
|
35
30
|
|
|
36
31
|
###
|
|
@@ -4,7 +4,7 @@ $algorithmName = "ParsHed";
|
|
|
4
4
|
$algorithmVersion = "110505";
|
|
5
5
|
|
|
6
6
|
## Tr2crfpp
|
|
7
|
-
## Paths relative to ParsCit root dir
|
|
7
|
+
## Paths relative to ParsCit root dir
|
|
8
8
|
$tmpDir = "tmp";
|
|
9
9
|
$dictFile = "resources/parsCitDict.txt";
|
|
10
10
|
$keywordFile = "resources/parsHed/keywords";
|
|
@@ -10,28 +10,31 @@ package ParsHed::Tr2crfpp;
|
|
|
10
10
|
|
|
11
11
|
use strict 'vars';
|
|
12
12
|
use utf8;
|
|
13
|
-
use FindBin;
|
|
14
13
|
use ParsHed::Config;
|
|
15
14
|
use Encode ();
|
|
16
15
|
|
|
16
|
+
use File::Basename;
|
|
17
|
+
my $dir = dirname(__FILE__);
|
|
18
|
+
my $parscitHome = "$dir/../../";
|
|
19
|
+
|
|
17
20
|
### USER customizable section
|
|
18
21
|
|
|
19
22
|
my $tmpDir = $ParsHed::Config::tmpDir;
|
|
20
|
-
$tmpDir = "$
|
|
23
|
+
$tmpDir = "$parscitHome/$tmpDir";
|
|
21
24
|
|
|
22
25
|
my $dictFile = $ParsHed::Config::dictFile;
|
|
23
|
-
$dictFile = "$
|
|
26
|
+
$dictFile = "$parscitHome/$dictFile";
|
|
24
27
|
|
|
25
28
|
my $keywordFile = $ParsHed::Config::keywordFile;
|
|
26
|
-
$keywordFile = "$
|
|
29
|
+
$keywordFile = "$parscitHome/$keywordFile";
|
|
27
30
|
|
|
28
31
|
my $bigramFile = $ParsHed::Config::bigramFile;
|
|
29
|
-
$bigramFile = "$
|
|
32
|
+
$bigramFile = "$parscitHome/$bigramFile";
|
|
30
33
|
|
|
31
|
-
my $crf_test = $ENV{'CRFPP_HOME'} ? "$ENV{'CRFPP_HOME'}/bin/crf_test" : "$
|
|
34
|
+
my $crf_test = $ENV{'CRFPP_HOME'} ? "$ENV{'CRFPP_HOME'}/bin/crf_test" : "$parscitHome/$ParsHed::Config::crf_test";
|
|
32
35
|
|
|
33
36
|
my $modelFile = $ParsHed::Config::modelFile;
|
|
34
|
-
$modelFile = "$
|
|
37
|
+
$modelFile = "$parscitHome/$modelFile";
|
|
35
38
|
|
|
36
39
|
### END user customizable section
|
|
37
40
|
|
|
@@ -10,22 +10,19 @@ package ParsHed::Tr2crfpp_token;
|
|
|
10
10
|
|
|
11
11
|
use strict 'vars';
|
|
12
12
|
use utf8;
|
|
13
|
-
use FindBin;
|
|
14
13
|
use ParsHed::Config;
|
|
15
14
|
use Encode ();
|
|
16
15
|
|
|
17
16
|
### USER customizable section
|
|
18
17
|
|
|
19
|
-
|
|
20
|
-
$
|
|
18
|
+
use File::Basename;
|
|
19
|
+
my $dir = dirname(__FILE__);
|
|
20
|
+
my $parscitHome = "$dir/../../";
|
|
21
21
|
|
|
22
|
-
my $
|
|
23
|
-
$dictFile = "$
|
|
24
|
-
|
|
25
|
-
my $
|
|
26
|
-
|
|
27
|
-
my $modelFile = $ParsHed::Config::oldModelFile;
|
|
28
|
-
$modelFile = "$FindBin::Bin/../$modelFile";
|
|
22
|
+
my $tmpDir = "$parscitHome/$ParsHed::Config::tmpDir";
|
|
23
|
+
my $dictFile = "$parscitHome/$ParsHed::Config::dictFile";
|
|
24
|
+
my $crf_test = $ENV{'CRFPP_HOME'} ? "$ENV{'CRFPP_HOME'}/bin/crf_test" : "$parscitHome/$ParsHed::Config::crf_test";
|
|
25
|
+
my $modelFile = "$parscitHome/$ParsHed::Config::oldModelFile";
|
|
29
26
|
|
|
30
27
|
### END user customizable section
|
|
31
28
|
|
|
@@ -21,10 +21,14 @@ use Class::Struct;
|
|
|
21
21
|
use SectLabel::Config;
|
|
22
22
|
use ParsCit::PostProcess;
|
|
23
23
|
|
|
24
|
+
use File::Basename;
|
|
25
|
+
my $dir = dirname(__FILE__);
|
|
26
|
+
my $parscitHome = "$dir/../../";
|
|
27
|
+
|
|
24
28
|
# Dictionary
|
|
25
29
|
my %dict = ();
|
|
26
30
|
# CRF++
|
|
27
|
-
my $crft = $ENV{'CRFPP_HOME'} ? "$ENV{'CRFPP_HOME'}/bin/crf_test" : "$
|
|
31
|
+
my $crft = $ENV{'CRFPP_HOME'} ? "$ENV{'CRFPP_HOME'}/bin/crf_test" : "$parscitHome/$SectLabel::Config::crf_test";
|
|
28
32
|
|
|
29
33
|
# Matching features of each author, including
|
|
30
34
|
# Signals
|
|
@@ -77,7 +81,7 @@ sub AAMatching
|
|
|
77
81
|
my $aff_lines = Omni::Traversal::OmniCollector($doc, $aff_addrs, $need_object);
|
|
78
82
|
|
|
79
83
|
# Dictionary
|
|
80
|
-
ReadDict($
|
|
84
|
+
ReadDict($parscitHome . $SectLabel::Config::dictFile);
|
|
81
85
|
|
|
82
86
|
# Authors
|
|
83
87
|
my ($aut_features, $aut_rc_features) = AuthorFeatureExtraction($aut_lines, $aut_addrs);
|
|
@@ -17,7 +17,9 @@ require 'dumpvar.pl';
|
|
|
17
17
|
use strict;
|
|
18
18
|
|
|
19
19
|
# Dependencies
|
|
20
|
-
use
|
|
20
|
+
use File::Basename;
|
|
21
|
+
my $dir = dirname(__FILE__);
|
|
22
|
+
my $parscitHome = "$dir/../../";
|
|
21
23
|
|
|
22
24
|
# Local libraries
|
|
23
25
|
use SectLabel::Config;
|
|
@@ -25,7 +27,7 @@ use SectLabel::Tr2crfpp;
|
|
|
25
27
|
use SectLabel::PostProcess;
|
|
26
28
|
use CSXUtil::SafeText qw(cleanXML);
|
|
27
29
|
|
|
28
|
-
my $generic_sect_path = $
|
|
30
|
+
my $generic_sect_path = $parscitHome . "bin/sectLabel/genericSectExtract.rb";
|
|
29
31
|
|
|
30
32
|
###
|
|
31
33
|
# Main API method for generating an XML document including all
|
|
@@ -13,14 +13,16 @@ use utf8;
|
|
|
13
13
|
use strict 'vars';
|
|
14
14
|
|
|
15
15
|
# Dependencies
|
|
16
|
-
use FindBin;
|
|
17
16
|
use Encode ();
|
|
17
|
+
use File::Basename;
|
|
18
|
+
my $dir = dirname(__FILE__);
|
|
19
|
+
my $parscitHome = "$dir/../../";
|
|
18
20
|
|
|
19
21
|
# Local libraries
|
|
20
22
|
use SectLabel::Config;
|
|
21
23
|
|
|
22
24
|
### USER customizable section
|
|
23
|
-
my $crf_test = $ENV{'CRFPP_HOME'} ? "$ENV{'CRFPP_HOME'}/bin/crf_test" : "$
|
|
25
|
+
my $crf_test = $ENV{'CRFPP_HOME'} ? "$ENV{'CRFPP_HOME'}/bin/crf_test" : "$parscitHome/$SectLabel::Config::crf_test";
|
|
24
26
|
### END user customizable section
|
|
25
27
|
|
|
26
28
|
my %dict = ();
|
metadata
CHANGED
|
@@ -2,14 +2,14 @@
|
|
|
2
2
|
name: biblicit
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
4
|
prerelease:
|
|
5
|
-
version: 2.2.
|
|
5
|
+
version: 2.2.1
|
|
6
6
|
platform: ruby
|
|
7
7
|
authors:
|
|
8
8
|
- David Judd
|
|
9
9
|
autorequire:
|
|
10
10
|
bindir: bin
|
|
11
11
|
cert_chain: []
|
|
12
|
-
date: 2013-05-
|
|
12
|
+
date: 2013-05-08 00:00:00.000000000 Z
|
|
13
13
|
dependencies:
|
|
14
14
|
- !ruby/object:Gem::Dependency
|
|
15
15
|
prerelease: false
|
|
@@ -435,7 +435,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
435
435
|
- !ruby/object:Gem::Version
|
|
436
436
|
segments:
|
|
437
437
|
- 0
|
|
438
|
-
hash:
|
|
438
|
+
hash: 782848681955337634
|
|
439
439
|
version: '0'
|
|
440
440
|
none: false
|
|
441
441
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
@@ -444,7 +444,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
444
444
|
- !ruby/object:Gem::Version
|
|
445
445
|
segments:
|
|
446
446
|
- 0
|
|
447
|
-
hash:
|
|
447
|
+
hash: 782848681955337634
|
|
448
448
|
version: '0'
|
|
449
449
|
none: false
|
|
450
450
|
requirements:
|