biblicit 2.0.4 → 2.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/biblicit.gemspec
CHANGED
|
@@ -3,9 +3,11 @@
|
|
|
3
3
|
lib = File.expand_path('../lib', __FILE__)
|
|
4
4
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
5
5
|
|
|
6
|
+
require 'biblicit/version'
|
|
7
|
+
|
|
6
8
|
Gem::Specification.new do |gem|
|
|
7
9
|
gem.name = "biblicit"
|
|
8
|
-
gem.version =
|
|
10
|
+
gem.version = Biblicit::VERSION
|
|
9
11
|
gem.authors = ["David Judd"]
|
|
10
12
|
gem.email = ["david@academia.edu"]
|
|
11
13
|
gem.summary = %q{Extract citations from PDFs.}
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
#!/usr/bin/env ruby
|
|
2
|
+
# encoding: UTF-8
|
|
2
3
|
#Author Nguyen Thuy Dung
|
|
3
4
|
require 'find'
|
|
4
5
|
#get relative pos in ingeter, values range from 0-10
|
|
@@ -8,7 +9,7 @@ def getPos (val)
|
|
|
8
9
|
end
|
|
9
10
|
i = 1
|
|
10
11
|
while i <= 10 do
|
|
11
|
-
if val <= (i/10.0)
|
|
12
|
+
if val <= (i/10.0)
|
|
12
13
|
return i
|
|
13
14
|
end
|
|
14
15
|
i = i + 1
|
|
@@ -34,10 +35,10 @@ while !f.eof do
|
|
|
34
35
|
if l != ""
|
|
35
36
|
tmp_array = l.split("|||")
|
|
36
37
|
if tmp_array.length == 1
|
|
37
|
-
hea_array << tmp_array[0].strip
|
|
38
|
+
hea_array << tmp_array[0].strip
|
|
38
39
|
ahea_array << "?"
|
|
39
40
|
else
|
|
40
|
-
hea_array << tmp_array[1].strip
|
|
41
|
+
hea_array << tmp_array[1].strip
|
|
41
42
|
ahea_array << tmp_array[0].strip
|
|
42
43
|
end
|
|
43
44
|
else
|
|
@@ -46,7 +47,7 @@ while !f.eof do
|
|
|
46
47
|
if hea_array.length == 1
|
|
47
48
|
pos = 0
|
|
48
49
|
else
|
|
49
|
-
pos = getPos(index*1.0/(hea_array.length - 1))
|
|
50
|
+
pos = getPos(index*1.0/(hea_array.length - 1))
|
|
50
51
|
end
|
|
51
52
|
currHeader = getHeader(hea_array.at(index))
|
|
52
53
|
assignedHeader = getHeader(ahea_array.at(index))
|
|
@@ -54,7 +55,7 @@ while !f.eof do
|
|
|
54
55
|
len = tmp.length
|
|
55
56
|
if len > 3
|
|
56
57
|
len = 3
|
|
57
|
-
end
|
|
58
|
+
end
|
|
58
59
|
firstWord = tmp.at(0)
|
|
59
60
|
secondWord = "null"
|
|
60
61
|
if len >= 2
|
|
@@ -75,7 +76,7 @@ while index < hea_array.length do
|
|
|
75
76
|
if hea_array.length == 1
|
|
76
77
|
pos = 0
|
|
77
78
|
else
|
|
78
|
-
pos = getPos(index*1.0/(hea_array.length - 1))
|
|
79
|
+
pos = getPos(index*1.0/(hea_array.length - 1))
|
|
79
80
|
end
|
|
80
81
|
currHeader = getHeader(hea_array.at(index))
|
|
81
82
|
assignedHeader = getHeader(ahea_array.at(index))
|
|
@@ -83,8 +84,8 @@ while index < hea_array.length do
|
|
|
83
84
|
len = tmp.length
|
|
84
85
|
if len > 3
|
|
85
86
|
len = 3
|
|
86
|
-
end
|
|
87
|
-
|
|
87
|
+
end
|
|
88
|
+
|
|
88
89
|
firstWord = tmp.at(0).strip
|
|
89
90
|
secondWord = "null"
|
|
90
91
|
if /[0-9]+.?/.match(firstWord) and len > 1
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: biblicit
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 2.0.
|
|
4
|
+
version: 2.0.5
|
|
5
5
|
prerelease:
|
|
6
6
|
platform: ruby
|
|
7
7
|
authors:
|
|
@@ -127,6 +127,7 @@ files:
|
|
|
127
127
|
- lib/biblicit/citeseer.rb
|
|
128
128
|
- lib/biblicit/extractor.rb
|
|
129
129
|
- lib/biblicit/parscit.rb
|
|
130
|
+
- lib/biblicit/version.rb
|
|
130
131
|
- sh/convert_to_text.sh
|
|
131
132
|
- spec/biblicit/extractor_spec.rb
|
|
132
133
|
- spec/fixtures/Review_of_Michael_Tyes_Consciousness_Revisited.docx
|
|
@@ -453,7 +454,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
453
454
|
version: '0'
|
|
454
455
|
segments:
|
|
455
456
|
- 0
|
|
456
|
-
hash: -
|
|
457
|
+
hash: -1430171946966686142
|
|
457
458
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
458
459
|
none: false
|
|
459
460
|
requirements:
|
|
@@ -462,7 +463,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
462
463
|
version: '0'
|
|
463
464
|
segments:
|
|
464
465
|
- 0
|
|
465
|
-
hash: -
|
|
466
|
+
hash: -1430171946966686142
|
|
466
467
|
requirements:
|
|
467
468
|
- For PDFs, Poppler or XPDF (try "which pdftotext")
|
|
468
469
|
- For Postscript files, Ghostscript (try "which ps2ascii")
|