arxiv-references 0.1.4 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/arxiv/references/ArxivReferences.rb +8 -3
- data/lib/arxiv/references/myUtil.rb +17 -17
- data/lib/arxiv/references/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 225161a6b0503ca774a846ad804eb205144d6c5a
|
4
|
+
data.tar.gz: c48bee9f466545bc878471e022bce09e936a0b05
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ee917aeaf67117fa8a9041574262e4c939908dd95a362c5530ca86c97400d4776c329b12e8f0c7ca1c1c62ed9ce56feaccfea2e95d032d8af0f4e1b2f3e739d8
|
7
|
+
data.tar.gz: 7e20169f3ed005131be57a33d3e4563a430121f553974b3e2dc4e5241b071200c6da8cfb4ecdfba7108b943b03e9a1ab926ed51a53105f2dca29c19b046ea0bc
|
@@ -11,18 +11,23 @@ module ArxivReferences
|
|
11
11
|
class CLI < Thor
|
12
12
|
include ArxivUtil
|
13
13
|
desc 'url', 'Extract references from arxiv URL'
|
14
|
+
option 'work_dir', type: :string, aliases: '-work', desc: 'Set working dir(default: /tmp)'
|
14
15
|
def url(urlName)
|
15
|
-
puts ArxivUtil.fetchFromUrl(urlName)
|
16
|
+
puts ArxivUtil.fetchFromUrl(urlName, work_dir)
|
16
17
|
end
|
17
18
|
|
18
19
|
desc 'id', 'Extract references from Arxiv id'
|
20
|
+
option 'work_dir', type: :string, aliases: '-work', desc: 'Set working dir(default: /tmp)'
|
19
21
|
def arxivid(idName)
|
20
|
-
|
22
|
+
work_dir = options['work_dir'].nil? ? '/tmp' : options['work_dir']
|
23
|
+
puts ArxivUtil.fetchFromArxivId(idName, work_dir)
|
21
24
|
end
|
22
25
|
|
23
26
|
desc 'pdfurl', 'Extract references from pdf URL'
|
27
|
+
option 'work_dir', type: :string, aliases: '-work', desc: 'Set working dir(default: /tmp)'
|
24
28
|
def pdfurl(pdfUrlName)
|
25
|
-
|
29
|
+
work_dir = options['work_dir'].nil? ? '/tmp' : options['work_dir']
|
30
|
+
puts ArxivUtil.fetchFromPdfUrl(pdfUrlName, work_dir)
|
26
31
|
end
|
27
32
|
end
|
28
33
|
end
|
@@ -15,24 +15,24 @@ module ArxivUtil
|
|
15
15
|
return Digest::SHA256.hexdigest Time.now.strftime("%F %H:%M:%S")
|
16
16
|
end
|
17
17
|
|
18
|
-
def self.makeDir(id)
|
19
|
-
Dir.mkdir("
|
18
|
+
def self.makeDir(id, work_dir)
|
19
|
+
Dir.mkdir("#{work_dir}/#{id}")
|
20
20
|
end
|
21
21
|
|
22
|
-
def self.removeDir(id)
|
23
|
-
FileUtils.rm_rf("
|
22
|
+
def self.removeDir(id, work_dir)
|
23
|
+
FileUtils.rm_rf("#{work_dir}/#{id}")
|
24
24
|
end
|
25
25
|
|
26
|
-
def self.makeFile(id)
|
27
|
-
return "
|
26
|
+
def self.makeFile(id, work_dir)
|
27
|
+
return "#{work_dir}/#{id}/output.pdf"
|
28
28
|
end
|
29
29
|
|
30
|
-
def self. getK2Pdf(id)
|
31
|
-
return "
|
30
|
+
def self. getK2Pdf(id, work_dir)
|
31
|
+
return "#{work_dir}/#{id}/output_k2opt.pdf"
|
32
32
|
end
|
33
33
|
|
34
34
|
|
35
|
-
def self.fetchFromUrl(urlName)
|
35
|
+
def self.fetchFromUrl(urlName, work_dir)
|
36
36
|
puts "fetch => #{urlName}"
|
37
37
|
charset = nil
|
38
38
|
html = open(urlName) do |f|
|
@@ -46,19 +46,19 @@ module ArxivUtil
|
|
46
46
|
result[:authors] = page.xpath('//*[@id="abs"]/div[2]/div[2]/a').text
|
47
47
|
result[:abstruct] = page.xpath('//*[@id="abs"]/div[2]/blockquote').text
|
48
48
|
result[:pdfurl] = "#{BASE_URL}#{page.xpath('//*[@id="abs"]/div[1]/div[1]/ul/li[1]/a').attr('href').value}"
|
49
|
-
result[:references] = fetchFromPdfUrl(result[:pdfurl])
|
49
|
+
result[:references] = fetchFromPdfUrl(result[:pdfurl], work_dir)
|
50
50
|
return result.to_json
|
51
51
|
end
|
52
52
|
|
53
|
-
def self.fetchFromArxivId(id)
|
53
|
+
def self.fetchFromArxivId(id, work_dir)
|
54
54
|
target_url = "#{BASE_URL}/abs/#{id}"
|
55
|
-
fetchFromUrl(target_url)
|
55
|
+
fetchFromUrl(target_url, work_dir)
|
56
56
|
end
|
57
57
|
|
58
|
-
def self.fetchFromPdfUrl(pdfUrl)
|
58
|
+
def self.fetchFromPdfUrl(pdfUrl, work_dir)
|
59
59
|
job_id = makeId
|
60
|
-
makeDir(job_id)
|
61
|
-
file_name = makeFile(job_id)
|
60
|
+
makeDir(job_id, work_dir)
|
61
|
+
file_name = makeFile(job_id, work_dir)
|
62
62
|
|
63
63
|
open(file_name, 'wb') do |o|
|
64
64
|
open(pdfUrl) do |data|
|
@@ -78,7 +78,7 @@ module ArxivUtil
|
|
78
78
|
break unless res.index('written').nil?
|
79
79
|
end
|
80
80
|
end
|
81
|
-
executed_pdf = getK2Pdf(job_id)
|
81
|
+
executed_pdf = getK2Pdf(job_id, work_dir)
|
82
82
|
reader = PDF::Reader.new(executed_pdf)
|
83
83
|
page_no = reader.
|
84
84
|
pages.
|
@@ -106,7 +106,7 @@ module ArxivUtil
|
|
106
106
|
select{|i|
|
107
107
|
i.length > 5
|
108
108
|
}
|
109
|
-
removeDir(job_id)
|
109
|
+
removeDir(job_id, work_dir)
|
110
110
|
return references
|
111
111
|
end
|
112
112
|
end
|