arxiv-references 0.1.7.4 → 0.1.7.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/arxiv-references.gemspec +2 -2
- data/lib/arxiv/references/P3.rb +34 -32
- data/lib/arxiv/references/version.rb +1 -1
- metadata +5 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ed3c2af4712d721c218dac464f0892942710707e
|
4
|
+
data.tar.gz: 57ac87263191f21e977cddac6243d42cc989872e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 03b50cf6076c5aa43c8193d2ce099db7bcab38efc476500f4ee80e348f1e6c18d16c307494db9f4416fa8e60926d5a6335d0d5917bd89f5e9dfe1a5730eeb33e
|
7
|
+
data.tar.gz: 3dafd54af66bfb13b71a39c430293d669e7b6812458d689806c83f6fcce3cc43db6ede5eaf092f4f19bf9539c1edb85f988a91419c4a804e963b1ac0c71490b9
|
data/arxiv-references.gemspec
CHANGED
@@ -9,8 +9,8 @@ Gem::Specification.new do |spec|
|
|
9
9
|
spec.authors = ["Takahiro Nishimura"]
|
10
10
|
spec.email = ["tkhr.nishimura@gmail.com"]
|
11
11
|
|
12
|
-
spec.summary = %q{This library is to fetch
|
13
|
-
spec.description = %q{
|
12
|
+
spec.summary = %q{This library is to fetch title, author, abstruct and etc. of paper in arXiv. Moreover, analyze pdf file to fetch citation list in paper}
|
13
|
+
spec.description = %q{Fetch title, author, abstruct, and citations list of paper in arXiv from pdf file}
|
14
14
|
spec.homepage = "https://github.com/nishimuuu/Arxiv-references"
|
15
15
|
spec.license = "GPL"
|
16
16
|
|
data/lib/arxiv/references/P3.rb
CHANGED
@@ -6,15 +6,16 @@ require 'expect'
|
|
6
6
|
require 'pdf-reader'
|
7
7
|
|
8
8
|
class P3
|
9
|
-
BASE_URL =
|
9
|
+
BASE_URL = 'https://arxiv.org'
|
10
10
|
REFERENCE_START_REGEXP = Regexp.new('\n*[rR][eE][fF][eE][rR][eE][nN][cC][eE][sS]?( +|\n+)?$')
|
11
11
|
REFERENCE_REGEXP = Regexp.new('(\[[0-9]?[0-9]\]|\[.+?\])')
|
12
|
+
|
12
13
|
def self.makeId
|
13
|
-
return Digest::SHA256.hexdigest Time.now.strftime(
|
14
|
+
return Digest::SHA256.hexdigest Time.now.strftime('%F %H:%M:%S')
|
14
15
|
end
|
15
16
|
|
16
17
|
def self.makeDir(id, work_dir)
|
17
|
-
Dir.mkdir("#{work_dir}/#{id}")
|
18
|
+
Dir.mkdir("#{work_dir}/#{id}")
|
18
19
|
end
|
19
20
|
|
20
21
|
def self.removeDir(id, work_dir)
|
@@ -29,7 +30,7 @@ class P3
|
|
29
30
|
end
|
30
31
|
end
|
31
32
|
|
32
|
-
def self.
|
33
|
+
def self.getK2Pdf(id, work_dir, use_dir)
|
33
34
|
if use_dir
|
34
35
|
return "#{work_dir}/#{id}/output_k2opt.pdf"
|
35
36
|
else
|
@@ -41,7 +42,8 @@ class P3
|
|
41
42
|
File.delete("#{work_dir}/#{id}-output.pdf")
|
42
43
|
File.delete("#{work_dir}/#{id}-output_k2opt.pdf")
|
43
44
|
end
|
44
|
-
|
45
|
+
|
46
|
+
def self.fetchPdfFile(pdfUrl, file_name)
|
45
47
|
open(file_name, 'wb') do |o|
|
46
48
|
open(pdfUrl) do |data|
|
47
49
|
o.write(data.read)
|
@@ -49,15 +51,15 @@ class P3
|
|
49
51
|
end
|
50
52
|
end
|
51
53
|
|
52
|
-
def self.convertSingleColPdf(job_id, work_dir,file_name, use_dir)
|
54
|
+
def self.convertSingleColPdf(job_id, work_dir, file_name, use_dir)
|
53
55
|
cmd = "k2pdfopt -dev kpw #{file_name}"
|
54
|
-
PTY.spawn(cmd) do |i,o|
|
56
|
+
PTY.spawn(cmd) do |i, o|
|
55
57
|
o.sync = true
|
56
|
-
i.expect(/\S.*Enter option above \(h=help, q=quit\):/,10){
|
58
|
+
i.expect(/\S.*Enter option above \(h=help, q=quit\):/, 10) {
|
57
59
|
o.puts "\n"
|
58
60
|
o.flush
|
59
61
|
}
|
60
|
-
while(
|
62
|
+
while (i.eof? == false)
|
61
63
|
res = i.gets
|
62
64
|
print res
|
63
65
|
break unless res.index('written').nil?
|
@@ -69,34 +71,34 @@ class P3
|
|
69
71
|
def self.fetchReference(file_name)
|
70
72
|
reader = PDF::Reader.new(file_name)
|
71
73
|
page_no = reader.
|
72
|
-
pages.
|
73
|
-
reject{|i|
|
74
|
-
i.text.index(REFERENCE_START_REGEXP).nil?
|
75
|
-
}.
|
76
|
-
map(&:number).
|
77
|
-
sort.
|
78
|
-
shift
|
79
|
-
|
80
|
-
ref_page = reader.
|
81
74
|
pages.
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
map
|
86
|
-
|
87
|
-
|
75
|
+
reject { |i|
|
76
|
+
i.text.index(REFERENCE_START_REGEXP).nil?
|
77
|
+
}.
|
78
|
+
map(&:number).
|
79
|
+
sort.
|
80
|
+
shift
|
81
|
+
|
82
|
+
ref_page = reader.
|
83
|
+
pages.
|
84
|
+
select { |i|
|
85
|
+
i.number >= page_no
|
86
|
+
}.
|
87
|
+
map { |i|
|
88
|
+
i.text.gsub(/\n\n+/, "\n").gsub(/ +/, ' ').gsub(/-\n +/, '')
|
89
|
+
}
|
88
90
|
|
89
|
-
|
91
|
+
ref_page.shift
|
90
92
|
|
91
|
-
|
93
|
+
ref_page = ref_page.
|
92
94
|
join(' ').
|
93
|
-
gsub(REFERENCE_REGEXP,"\n\\1")
|
95
|
+
gsub(REFERENCE_REGEXP, "\n\\1")
|
94
96
|
|
95
|
-
|
97
|
+
ref_page = ref_page.
|
96
98
|
split(/\n *\n/).
|
97
|
-
map{|i| i.gsub("\n",'')}.
|
98
|
-
select{|i| i.length > 15}
|
99
|
-
|
99
|
+
map { |i| i.gsub("\n", '') }.
|
100
|
+
select { |i| i.length > 15 }
|
101
|
+
return ref_page
|
100
102
|
end
|
101
103
|
|
102
104
|
def self.fetchFromPdfUrl(pdfUrl, work_dir=true, use_dir=true)
|
@@ -108,7 +110,7 @@ class P3
|
|
108
110
|
executed_pdf = convertSingleColPdf(job_id, work_dir, file_name, use_dir)
|
109
111
|
references = fetchReference(executed_pdf)
|
110
112
|
if use_dir
|
111
|
-
removeDir(job_id, work_dir)
|
113
|
+
removeDir(job_id, work_dir)
|
112
114
|
else
|
113
115
|
removeFile(job_id, work_dir)
|
114
116
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: arxiv-references
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.7.
|
4
|
+
version: 0.1.7.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Takahiro Nishimura
|
@@ -94,7 +94,8 @@ dependencies:
|
|
94
94
|
- - "~>"
|
95
95
|
- !ruby/object:Gem::Version
|
96
96
|
version: '1.6'
|
97
|
-
description:
|
97
|
+
description: Fetch title, author, abstruct, and citations list of paper in arXiv from
|
98
|
+
pdf file
|
98
99
|
email:
|
99
100
|
- tkhr.nishimura@gmail.com
|
100
101
|
executables:
|
@@ -144,6 +145,6 @@ rubyforge_project:
|
|
144
145
|
rubygems_version: 2.4.5.1
|
145
146
|
signing_key:
|
146
147
|
specification_version: 4
|
147
|
-
summary: This library is to fetch
|
148
|
-
|
148
|
+
summary: This library is to fetch title, author, abstruct and etc. of paper in arXiv.
|
149
|
+
Moreover, analyze pdf file to fetch citation list in paper
|
149
150
|
test_files: []
|