arxiv-references 0.1.4 → 0.1.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/arxiv/references/ArxivReferences.rb +8 -3
- data/lib/arxiv/references/myUtil.rb +17 -17
- data/lib/arxiv/references/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 225161a6b0503ca774a846ad804eb205144d6c5a
|
4
|
+
data.tar.gz: c48bee9f466545bc878471e022bce09e936a0b05
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ee917aeaf67117fa8a9041574262e4c939908dd95a362c5530ca86c97400d4776c329b12e8f0c7ca1c1c62ed9ce56feaccfea2e95d032d8af0f4e1b2f3e739d8
|
7
|
+
data.tar.gz: 7e20169f3ed005131be57a33d3e4563a430121f553974b3e2dc4e5241b071200c6da8cfb4ecdfba7108b943b03e9a1ab926ed51a53105f2dca29c19b046ea0bc
|
@@ -11,18 +11,23 @@ module ArxivReferences
|
|
11
11
|
class CLI < Thor
|
12
12
|
include ArxivUtil
|
13
13
|
desc 'url', 'Extract references from arxiv URL'
|
14
|
+
option 'work_dir', type: :string, aliases: '-work', desc: 'Set working dir(default: /tmp)'
|
14
15
|
def url(urlName)
|
15
|
-
puts ArxivUtil.fetchFromUrl(urlName)
|
16
|
+
puts ArxivUtil.fetchFromUrl(urlName, work_dir)
|
16
17
|
end
|
17
18
|
|
18
19
|
desc 'id', 'Extract references from Arxiv id'
|
20
|
+
option 'work_dir', type: :string, aliases: '-work', desc: 'Set working dir(default: /tmp)'
|
19
21
|
def arxivid(idName)
|
20
|
-
|
22
|
+
work_dir = options['work_dir'].nil? ? '/tmp' : options['work_dir']
|
23
|
+
puts ArxivUtil.fetchFromArxivId(idName, work_dir)
|
21
24
|
end
|
22
25
|
|
23
26
|
desc 'pdfurl', 'Extract references from pdf URL'
|
27
|
+
option 'work_dir', type: :string, aliases: '-work', desc: 'Set working dir(default: /tmp)'
|
24
28
|
def pdfurl(pdfUrlName)
|
25
|
-
|
29
|
+
work_dir = options['work_dir'].nil? ? '/tmp' : options['work_dir']
|
30
|
+
puts ArxivUtil.fetchFromPdfUrl(pdfUrlName, work_dir)
|
26
31
|
end
|
27
32
|
end
|
28
33
|
end
|
@@ -15,24 +15,24 @@ module ArxivUtil
|
|
15
15
|
return Digest::SHA256.hexdigest Time.now.strftime("%F %H:%M:%S")
|
16
16
|
end
|
17
17
|
|
18
|
-
def self.makeDir(id)
|
19
|
-
Dir.mkdir("
|
18
|
+
def self.makeDir(id, work_dir)
|
19
|
+
Dir.mkdir("#{work_dir}/#{id}")
|
20
20
|
end
|
21
21
|
|
22
|
-
def self.removeDir(id)
|
23
|
-
FileUtils.rm_rf("
|
22
|
+
def self.removeDir(id, work_dir)
|
23
|
+
FileUtils.rm_rf("#{work_dir}/#{id}")
|
24
24
|
end
|
25
25
|
|
26
|
-
def self.makeFile(id)
|
27
|
-
return "
|
26
|
+
def self.makeFile(id, work_dir)
|
27
|
+
return "#{work_dir}/#{id}/output.pdf"
|
28
28
|
end
|
29
29
|
|
30
|
-
def self. getK2Pdf(id)
|
31
|
-
return "
|
30
|
+
def self. getK2Pdf(id, work_dir)
|
31
|
+
return "#{work_dir}/#{id}/output_k2opt.pdf"
|
32
32
|
end
|
33
33
|
|
34
34
|
|
35
|
-
def self.fetchFromUrl(urlName)
|
35
|
+
def self.fetchFromUrl(urlName, work_dir)
|
36
36
|
puts "fetch => #{urlName}"
|
37
37
|
charset = nil
|
38
38
|
html = open(urlName) do |f|
|
@@ -46,19 +46,19 @@ module ArxivUtil
|
|
46
46
|
result[:authors] = page.xpath('//*[@id="abs"]/div[2]/div[2]/a').text
|
47
47
|
result[:abstruct] = page.xpath('//*[@id="abs"]/div[2]/blockquote').text
|
48
48
|
result[:pdfurl] = "#{BASE_URL}#{page.xpath('//*[@id="abs"]/div[1]/div[1]/ul/li[1]/a').attr('href').value}"
|
49
|
-
result[:references] = fetchFromPdfUrl(result[:pdfurl])
|
49
|
+
result[:references] = fetchFromPdfUrl(result[:pdfurl], work_dir)
|
50
50
|
return result.to_json
|
51
51
|
end
|
52
52
|
|
53
|
-
def self.fetchFromArxivId(id)
|
53
|
+
def self.fetchFromArxivId(id, work_dir)
|
54
54
|
target_url = "#{BASE_URL}/abs/#{id}"
|
55
|
-
fetchFromUrl(target_url)
|
55
|
+
fetchFromUrl(target_url, work_dir)
|
56
56
|
end
|
57
57
|
|
58
|
-
def self.fetchFromPdfUrl(pdfUrl)
|
58
|
+
def self.fetchFromPdfUrl(pdfUrl, work_dir)
|
59
59
|
job_id = makeId
|
60
|
-
makeDir(job_id)
|
61
|
-
file_name = makeFile(job_id)
|
60
|
+
makeDir(job_id, work_dir)
|
61
|
+
file_name = makeFile(job_id, work_dir)
|
62
62
|
|
63
63
|
open(file_name, 'wb') do |o|
|
64
64
|
open(pdfUrl) do |data|
|
@@ -78,7 +78,7 @@ module ArxivUtil
|
|
78
78
|
break unless res.index('written').nil?
|
79
79
|
end
|
80
80
|
end
|
81
|
-
executed_pdf = getK2Pdf(job_id)
|
81
|
+
executed_pdf = getK2Pdf(job_id, work_dir)
|
82
82
|
reader = PDF::Reader.new(executed_pdf)
|
83
83
|
page_no = reader.
|
84
84
|
pages.
|
@@ -106,7 +106,7 @@ module ArxivUtil
|
|
106
106
|
select{|i|
|
107
107
|
i.length > 5
|
108
108
|
}
|
109
|
-
removeDir(job_id)
|
109
|
+
removeDir(job_id, work_dir)
|
110
110
|
return references
|
111
111
|
end
|
112
112
|
end
|