biblicit 2.0.4 → 2.0.5
Sign up to get free protection for your applications and to get access to all the features.
data/biblicit.gemspec
CHANGED
@@ -3,9 +3,11 @@
|
|
3
3
|
lib = File.expand_path('../lib', __FILE__)
|
4
4
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
5
5
|
|
6
|
+
require 'biblicit/version'
|
7
|
+
|
6
8
|
Gem::Specification.new do |gem|
|
7
9
|
gem.name = "biblicit"
|
8
|
-
gem.version =
|
10
|
+
gem.version = Biblicit::VERSION
|
9
11
|
gem.authors = ["David Judd"]
|
10
12
|
gem.email = ["david@academia.edu"]
|
11
13
|
gem.summary = %q{Extract citations from PDFs.}
|
@@ -1,4 +1,5 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
+
# encoding: UTF-8
|
2
3
|
#Author Nguyen Thuy Dung
|
3
4
|
require 'find'
|
4
5
|
#get relative pos in ingeter, values range from 0-10
|
@@ -8,7 +9,7 @@ def getPos (val)
|
|
8
9
|
end
|
9
10
|
i = 1
|
10
11
|
while i <= 10 do
|
11
|
-
if val <= (i/10.0)
|
12
|
+
if val <= (i/10.0)
|
12
13
|
return i
|
13
14
|
end
|
14
15
|
i = i + 1
|
@@ -34,10 +35,10 @@ while !f.eof do
|
|
34
35
|
if l != ""
|
35
36
|
tmp_array = l.split("|||")
|
36
37
|
if tmp_array.length == 1
|
37
|
-
hea_array << tmp_array[0].strip
|
38
|
+
hea_array << tmp_array[0].strip
|
38
39
|
ahea_array << "?"
|
39
40
|
else
|
40
|
-
hea_array << tmp_array[1].strip
|
41
|
+
hea_array << tmp_array[1].strip
|
41
42
|
ahea_array << tmp_array[0].strip
|
42
43
|
end
|
43
44
|
else
|
@@ -46,7 +47,7 @@ while !f.eof do
|
|
46
47
|
if hea_array.length == 1
|
47
48
|
pos = 0
|
48
49
|
else
|
49
|
-
pos = getPos(index*1.0/(hea_array.length - 1))
|
50
|
+
pos = getPos(index*1.0/(hea_array.length - 1))
|
50
51
|
end
|
51
52
|
currHeader = getHeader(hea_array.at(index))
|
52
53
|
assignedHeader = getHeader(ahea_array.at(index))
|
@@ -54,7 +55,7 @@ while !f.eof do
|
|
54
55
|
len = tmp.length
|
55
56
|
if len > 3
|
56
57
|
len = 3
|
57
|
-
end
|
58
|
+
end
|
58
59
|
firstWord = tmp.at(0)
|
59
60
|
secondWord = "null"
|
60
61
|
if len >= 2
|
@@ -75,7 +76,7 @@ while index < hea_array.length do
|
|
75
76
|
if hea_array.length == 1
|
76
77
|
pos = 0
|
77
78
|
else
|
78
|
-
pos = getPos(index*1.0/(hea_array.length - 1))
|
79
|
+
pos = getPos(index*1.0/(hea_array.length - 1))
|
79
80
|
end
|
80
81
|
currHeader = getHeader(hea_array.at(index))
|
81
82
|
assignedHeader = getHeader(ahea_array.at(index))
|
@@ -83,8 +84,8 @@ while index < hea_array.length do
|
|
83
84
|
len = tmp.length
|
84
85
|
if len > 3
|
85
86
|
len = 3
|
86
|
-
end
|
87
|
-
|
87
|
+
end
|
88
|
+
|
88
89
|
firstWord = tmp.at(0).strip
|
89
90
|
secondWord = "null"
|
90
91
|
if /[0-9]+.?/.match(firstWord) and len > 1
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: biblicit
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0.
|
4
|
+
version: 2.0.5
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -127,6 +127,7 @@ files:
|
|
127
127
|
- lib/biblicit/citeseer.rb
|
128
128
|
- lib/biblicit/extractor.rb
|
129
129
|
- lib/biblicit/parscit.rb
|
130
|
+
- lib/biblicit/version.rb
|
130
131
|
- sh/convert_to_text.sh
|
131
132
|
- spec/biblicit/extractor_spec.rb
|
132
133
|
- spec/fixtures/Review_of_Michael_Tyes_Consciousness_Revisited.docx
|
@@ -453,7 +454,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
453
454
|
version: '0'
|
454
455
|
segments:
|
455
456
|
- 0
|
456
|
-
hash: -
|
457
|
+
hash: -1430171946966686142
|
457
458
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
458
459
|
none: false
|
459
460
|
requirements:
|
@@ -462,7 +463,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
462
463
|
version: '0'
|
463
464
|
segments:
|
464
465
|
- 0
|
465
|
-
hash: -
|
466
|
+
hash: -1430171946966686142
|
466
467
|
requirements:
|
467
468
|
- For PDFs, Poppler or XPDF (try "which pdftotext")
|
468
469
|
- For Postscript files, Ghostscript (try "which ps2ascii")
|