biblicit 2.2.3 → 2.3.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -17,14 +17,31 @@ module ParsCit
17
17
  attr_reader :result
18
18
 
19
19
  def initialize(in_txt, opts={})
20
- mode = (opts.fetch :include_citations, false) ? 'extract_all' : 'extract_header'
21
-
20
+ parse_modes = if (opts.fetch :include_citations, false)
21
+ { extract_all: [:citeseer, :parshed, :citations] }
22
+ else
23
+ {
24
+ extract_header_svm_only: [:citeseer],
25
+ extract_header_crf_only: [:parshed]
26
+ }
27
+ end
28
+
29
+ ENV['SVM_LIGHT_HOME'] ||= "#{File.dirname(`which svm_classify`)}"
22
30
  ENV['CRFPP_HOME'] ||= "#{File.dirname(`which crf_test`)}/../"
23
31
  ENV['PARSCIT_TMPDIR'] ||= "/tmp/"
24
32
 
25
- output = `#{PERL_DIR}/bin/citeExtract.pl -q -m #{mode} #{in_txt.path}`
26
-
27
- @result = parse(Nokogiri::XML output)
33
+ @result = {}
34
+
35
+ parse_modes.map do |mode, keys|
36
+ outf = Tempfile.new mode.to_s
37
+ pid = spawn("#{PERL_DIR}/bin/citeExtract.pl -q -m #{mode} #{in_txt.path}", out: outf.path)
38
+ [pid, outf, keys]
39
+ end.each do |pid, outf, keys|
40
+ Process.wait pid
41
+ output = File.read outf
42
+ outf.unlink
43
+ @result.merge! parse(Nokogiri::XML output).slice(*keys)
44
+ end
28
45
  end
29
46
 
30
47
  private
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Biblicit
4
4
 
5
- VERSION = '2.2.3'
5
+ VERSION = '2.3.0'
6
6
 
7
7
  end
data/lib/biblicit.rb CHANGED
@@ -2,5 +2,6 @@
2
2
 
3
3
  require 'active_support/core_ext/object'
4
4
  require 'active_support/core_ext/string'
5
+ require 'active_support/core_ext/hash'
5
6
 
6
7
  require 'biblicit/extractor'
@@ -86,7 +86,7 @@ sub Help
86
86
  print STDERR "\t-q\tQuiet Mode (don't echo license)\n";
87
87
 
88
88
  # Thang v100401: add new mode (extract_section), and -i <inputType>
89
- print STDERR "\t-m <mode> \tMode (extract_citations, extract_header, extract_section, extract_meta, extract_all, default: extract_citations)\n";
89
+ print STDERR "\t-m <mode> \tMode (extract_citations, extract_header, extract_header_svm_only, extract_header_crf_only, extract_section, extract_meta, extract_all, default: extract_citations)\n";
90
90
  print STDERR "\t-i <inputType> \tType (raw, xml, default: raw)\n";
91
91
  print STDERR "\t-e <exportType>\tExport citations into multiple types (ads|bib|end|isi|ris|wordbib). Multiple types could be specified by contatenating with \"-\" e.g., bib-end-ris. Output files will be named as outfile.exportFormat, with outfile being the input argument, and exportFormat being each individual format supplied by -e option.\n";
92
92
  print STDERR "\t-t\tUse token level model instead\n";
@@ -281,6 +281,14 @@ sub ParseMode
281
281
  {
282
282
  return ($PARSHED | $SVM);
283
283
  }
284
+ elsif ($arg eq "extract_header_svm_only")
285
+ {
286
+ return $SVM;
287
+ }
288
+ elsif ($arg eq "extract_header_crf_only")
289
+ {
290
+ return $PARSHED;
291
+ }
284
292
  elsif ($arg eq "extract_citations")
285
293
  {
286
294
  return $PARSCIT;
metadata CHANGED
@@ -2,25 +2,25 @@
2
2
  name: biblicit
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 2.2.3
5
+ version: 2.3.0
6
6
  platform: ruby
7
7
  authors:
8
8
  - David Judd
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-05-13 00:00:00.000000000 Z
12
+ date: 2013-06-13 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
- prerelease: false
16
- name: activesupport
17
- type: :runtime
18
15
  version_requirements: !ruby/object:Gem::Requirement
19
16
  requirements:
20
17
  - - ! '>='
21
18
  - !ruby/object:Gem::Version
22
19
  version: '0'
23
20
  none: false
21
+ name: activesupport
22
+ type: :runtime
23
+ prerelease: false
24
24
  requirement: !ruby/object:Gem::Requirement
25
25
  requirements:
26
26
  - - ! '>='
@@ -28,15 +28,15 @@ dependencies:
28
28
  version: '0'
29
29
  none: false
30
30
  - !ruby/object:Gem::Dependency
31
- prerelease: false
32
- name: nokogiri
33
- type: :runtime
34
31
  version_requirements: !ruby/object:Gem::Requirement
35
32
  requirements:
36
33
  - - ! '>='
37
34
  - !ruby/object:Gem::Version
38
35
  version: '0'
39
36
  none: false
37
+ name: nokogiri
38
+ type: :runtime
39
+ prerelease: false
40
40
  requirement: !ruby/object:Gem::Requirement
41
41
  requirements:
42
42
  - - ! '>='
@@ -44,15 +44,15 @@ dependencies:
44
44
  version: '0'
45
45
  none: false
46
46
  - !ruby/object:Gem::Dependency
47
- prerelease: false
48
- name: rake
49
- type: :development
50
47
  version_requirements: !ruby/object:Gem::Requirement
51
48
  requirements:
52
49
  - - ! '>='
53
50
  - !ruby/object:Gem::Version
54
51
  version: '0'
55
52
  none: false
53
+ name: rake
54
+ type: :development
55
+ prerelease: false
56
56
  requirement: !ruby/object:Gem::Requirement
57
57
  requirements:
58
58
  - - ! '>='
@@ -60,15 +60,15 @@ dependencies:
60
60
  version: '0'
61
61
  none: false
62
62
  - !ruby/object:Gem::Dependency
63
- prerelease: false
64
- name: rspec
65
- type: :development
66
63
  version_requirements: !ruby/object:Gem::Requirement
67
64
  requirements:
68
65
  - - ! '>='
69
66
  - !ruby/object:Gem::Version
70
67
  version: '0'
71
68
  none: false
69
+ name: rspec
70
+ type: :development
71
+ prerelease: false
72
72
  requirement: !ruby/object:Gem::Requirement
73
73
  requirements:
74
74
  - - ! '>='
@@ -76,15 +76,15 @@ dependencies:
76
76
  version: '0'
77
77
  none: false
78
78
  - !ruby/object:Gem::Dependency
79
- prerelease: false
80
- name: pry
81
- type: :development
82
79
  version_requirements: !ruby/object:Gem::Requirement
83
80
  requirements:
84
81
  - - ! '>='
85
82
  - !ruby/object:Gem::Version
86
83
  version: '0'
87
84
  none: false
85
+ name: pry
86
+ type: :development
87
+ prerelease: false
88
88
  requirement: !ruby/object:Gem::Requirement
89
89
  requirements:
90
90
  - - ! '>='
@@ -432,18 +432,12 @@ required_ruby_version: !ruby/object:Gem::Requirement
432
432
  requirements:
433
433
  - - ! '>='
434
434
  - !ruby/object:Gem::Version
435
- segments:
436
- - 0
437
- hash: 3256160240819012702
438
435
  version: '0'
439
436
  none: false
440
437
  required_rubygems_version: !ruby/object:Gem::Requirement
441
438
  requirements:
442
439
  - - ! '>='
443
440
  - !ruby/object:Gem::Version
444
- segments:
445
- - 0
446
- hash: 3256160240819012702
447
441
  version: '0'
448
442
  none: false
449
443
  requirements:
@@ -469,3 +463,4 @@ test_files:
469
463
  - spec/fixtures/txt/sample1.txt
470
464
  - spec/fixtures/txt/sample2.txt
471
465
  - spec/spec_helper.rb
466
+ has_rdoc: