biblicit 2.2.3 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -17,14 +17,31 @@ module ParsCit
17
17
  attr_reader :result
18
18
 
19
19
  def initialize(in_txt, opts={})
20
- mode = (opts.fetch :include_citations, false) ? 'extract_all' : 'extract_header'
21
-
20
+ parse_modes = if (opts.fetch :include_citations, false)
21
+ { extract_all: [:citeseer, :parshed, :citations] }
22
+ else
23
+ {
24
+ extract_header_svm_only: [:citeseer],
25
+ extract_header_crf_only: [:parshed]
26
+ }
27
+ end
28
+
29
+ ENV['SVM_LIGHT_HOME'] ||= "#{File.dirname(`which svm_classify`)}"
22
30
  ENV['CRFPP_HOME'] ||= "#{File.dirname(`which crf_test`)}/../"
23
31
  ENV['PARSCIT_TMPDIR'] ||= "/tmp/"
24
32
 
25
- output = `#{PERL_DIR}/bin/citeExtract.pl -q -m #{mode} #{in_txt.path}`
26
-
27
- @result = parse(Nokogiri::XML output)
33
+ @result = {}
34
+
35
+ parse_modes.map do |mode, keys|
36
+ outf = Tempfile.new mode.to_s
37
+ pid = spawn("#{PERL_DIR}/bin/citeExtract.pl -q -m #{mode} #{in_txt.path}", out: outf.path)
38
+ [pid, outf, keys]
39
+ end.each do |pid, outf, keys|
40
+ Process.wait pid
41
+ output = File.read outf
42
+ outf.unlink
43
+ @result.merge! parse(Nokogiri::XML output).slice(*keys)
44
+ end
28
45
  end
29
46
 
30
47
  private
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Biblicit
4
4
 
5
- VERSION = '2.2.3'
5
+ VERSION = '2.3.0'
6
6
 
7
7
  end
data/lib/biblicit.rb CHANGED
@@ -2,5 +2,6 @@
2
2
 
3
3
  require 'active_support/core_ext/object'
4
4
  require 'active_support/core_ext/string'
5
+ require 'active_support/core_ext/hash'
5
6
 
6
7
  require 'biblicit/extractor'
@@ -86,7 +86,7 @@ sub Help
86
86
  print STDERR "\t-q\tQuiet Mode (don't echo license)\n";
87
87
 
88
88
  # Thang v100401: add new mode (extract_section), and -i <inputType>
89
- print STDERR "\t-m <mode> \tMode (extract_citations, extract_header, extract_section, extract_meta, extract_all, default: extract_citations)\n";
89
+ print STDERR "\t-m <mode> \tMode (extract_citations, extract_header, extract_header_svm_only, extract_header_crf_only, extract_section, extract_meta, extract_all, default: extract_citations)\n";
90
90
  print STDERR "\t-i <inputType> \tType (raw, xml, default: raw)\n";
91
91
  print STDERR "\t-e <exportType>\tExport citations into multiple types (ads|bib|end|isi|ris|wordbib). Multiple types could be specified by contatenating with \"-\" e.g., bib-end-ris. Output files will be named as outfile.exportFormat, with outfile being the input argument, and exportFormat being each individual format supplied by -e option.\n";
92
92
  print STDERR "\t-t\tUse token level model instead\n";
@@ -281,6 +281,14 @@ sub ParseMode
281
281
  {
282
282
  return ($PARSHED | $SVM);
283
283
  }
284
+ elsif ($arg eq "extract_header_svm_only")
285
+ {
286
+ return $SVM;
287
+ }
288
+ elsif ($arg eq "extract_header_crf_only")
289
+ {
290
+ return $PARSHED;
291
+ }
284
292
  elsif ($arg eq "extract_citations")
285
293
  {
286
294
  return $PARSCIT;
metadata CHANGED
@@ -2,25 +2,25 @@
2
2
  name: biblicit
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 2.2.3
5
+ version: 2.3.0
6
6
  platform: ruby
7
7
  authors:
8
8
  - David Judd
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-05-13 00:00:00.000000000 Z
12
+ date: 2013-06-13 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
- prerelease: false
16
- name: activesupport
17
- type: :runtime
18
15
  version_requirements: !ruby/object:Gem::Requirement
19
16
  requirements:
20
17
  - - ! '>='
21
18
  - !ruby/object:Gem::Version
22
19
  version: '0'
23
20
  none: false
21
+ name: activesupport
22
+ type: :runtime
23
+ prerelease: false
24
24
  requirement: !ruby/object:Gem::Requirement
25
25
  requirements:
26
26
  - - ! '>='
@@ -28,15 +28,15 @@ dependencies:
28
28
  version: '0'
29
29
  none: false
30
30
  - !ruby/object:Gem::Dependency
31
- prerelease: false
32
- name: nokogiri
33
- type: :runtime
34
31
  version_requirements: !ruby/object:Gem::Requirement
35
32
  requirements:
36
33
  - - ! '>='
37
34
  - !ruby/object:Gem::Version
38
35
  version: '0'
39
36
  none: false
37
+ name: nokogiri
38
+ type: :runtime
39
+ prerelease: false
40
40
  requirement: !ruby/object:Gem::Requirement
41
41
  requirements:
42
42
  - - ! '>='
@@ -44,15 +44,15 @@ dependencies:
44
44
  version: '0'
45
45
  none: false
46
46
  - !ruby/object:Gem::Dependency
47
- prerelease: false
48
- name: rake
49
- type: :development
50
47
  version_requirements: !ruby/object:Gem::Requirement
51
48
  requirements:
52
49
  - - ! '>='
53
50
  - !ruby/object:Gem::Version
54
51
  version: '0'
55
52
  none: false
53
+ name: rake
54
+ type: :development
55
+ prerelease: false
56
56
  requirement: !ruby/object:Gem::Requirement
57
57
  requirements:
58
58
  - - ! '>='
@@ -60,15 +60,15 @@ dependencies:
60
60
  version: '0'
61
61
  none: false
62
62
  - !ruby/object:Gem::Dependency
63
- prerelease: false
64
- name: rspec
65
- type: :development
66
63
  version_requirements: !ruby/object:Gem::Requirement
67
64
  requirements:
68
65
  - - ! '>='
69
66
  - !ruby/object:Gem::Version
70
67
  version: '0'
71
68
  none: false
69
+ name: rspec
70
+ type: :development
71
+ prerelease: false
72
72
  requirement: !ruby/object:Gem::Requirement
73
73
  requirements:
74
74
  - - ! '>='
@@ -76,15 +76,15 @@ dependencies:
76
76
  version: '0'
77
77
  none: false
78
78
  - !ruby/object:Gem::Dependency
79
- prerelease: false
80
- name: pry
81
- type: :development
82
79
  version_requirements: !ruby/object:Gem::Requirement
83
80
  requirements:
84
81
  - - ! '>='
85
82
  - !ruby/object:Gem::Version
86
83
  version: '0'
87
84
  none: false
85
+ name: pry
86
+ type: :development
87
+ prerelease: false
88
88
  requirement: !ruby/object:Gem::Requirement
89
89
  requirements:
90
90
  - - ! '>='
@@ -432,18 +432,12 @@ required_ruby_version: !ruby/object:Gem::Requirement
432
432
  requirements:
433
433
  - - ! '>='
434
434
  - !ruby/object:Gem::Version
435
- segments:
436
- - 0
437
- hash: 3256160240819012702
438
435
  version: '0'
439
436
  none: false
440
437
  required_rubygems_version: !ruby/object:Gem::Requirement
441
438
  requirements:
442
439
  - - ! '>='
443
440
  - !ruby/object:Gem::Version
444
- segments:
445
- - 0
446
- hash: 3256160240819012702
447
441
  version: '0'
448
442
  none: false
449
443
  requirements:
@@ -469,3 +463,4 @@ test_files:
469
463
  - spec/fixtures/txt/sample1.txt
470
464
  - spec/fixtures/txt/sample2.txt
471
465
  - spec/spec_helper.rb
466
+ has_rdoc: