arxiv-references 0.1.5.1 → 0.1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/arxiv/references/myUtil.rb +39 -31
- data/lib/arxiv/references/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f0a36d9c1f438e17231af3e1f12584893253e287
|
4
|
+
data.tar.gz: 68d7700f4a942e532c0639584a3c391feb2c0122
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a56c94bce408510688871319d74f7524e5e17c3a51fe7053ae6256fb2b0ac1c4e94890cc571ca153240e049b9f3764d3388ea78dcdb523a30af390b87942edab
|
7
|
+
data.tar.gz: fd90127fea1ca2368134aaedc5bce4ad80d50934c594afe9a6e34b1dc861cfd8bd34173b14a9fa8b19d7f331e0e2a5dbdb4557e2dfb86dfbed8794ffb5038ac5
|
@@ -23,16 +23,24 @@ module ArxivUtil
|
|
23
23
|
FileUtils.rm_rf("#{work_dir}/#{id}")
|
24
24
|
end
|
25
25
|
|
26
|
-
def self.makeFile(id, work_dir)
|
27
|
-
|
26
|
+
def self.makeFile(id, work_dir, use_dir)
|
27
|
+
if use_dir
|
28
|
+
return "#{work_dir}/#{id}/output.pdf"
|
29
|
+
else
|
30
|
+
return "#{work_dir}-#{id}-output.pdf"
|
31
|
+
end
|
28
32
|
end
|
29
33
|
|
30
|
-
def self. getK2Pdf(id, work_dir)
|
31
|
-
|
34
|
+
def self. getK2Pdf(id, work_dir, use_dir)
|
35
|
+
if use_dir
|
36
|
+
return "#{work_dir}/#{id}/output_k2opt.pdf"
|
37
|
+
else
|
38
|
+
return "#{work_dir}-#{id}-output_k2opt.pdf"
|
39
|
+
end
|
32
40
|
end
|
33
41
|
|
34
42
|
|
35
|
-
def self.fetchFromUrl(urlName, work_dir)
|
43
|
+
def self.fetchFromUrl(urlName, work_dir, use_dir)
|
36
44
|
puts "fetch => #{urlName}"
|
37
45
|
charset = nil
|
38
46
|
html = open(urlName) do |f|
|
@@ -46,7 +54,7 @@ module ArxivUtil
|
|
46
54
|
result[:authors] = page.xpath('//*[@id="abs"]/div[2]/div[2]/a').text
|
47
55
|
result[:abstruct] = page.xpath('//*[@id="abs"]/div[2]/blockquote').text
|
48
56
|
result[:pdfurl] = "#{BASE_URL}#{page.xpath('//*[@id="abs"]/div[1]/div[1]/ul/li[1]/a').attr('href').value}"
|
49
|
-
result[:references] = fetchFromPdfUrl(result[:pdfurl], work_dir)
|
57
|
+
result[:references] = fetchFromPdfUrl(result[:pdfurl], work_dir, use_dir)
|
50
58
|
return result.to_json
|
51
59
|
end
|
52
60
|
|
@@ -63,7 +71,7 @@ module ArxivUtil
|
|
63
71
|
end
|
64
72
|
end
|
65
73
|
|
66
|
-
def self.convertSingleColPdf(job_id, work_dir,file_name)
|
74
|
+
def self.convertSingleColPdf(job_id, work_dir,file_name, use_dir)
|
67
75
|
cmd = "k2pdfopt -dev kpw #{file_name}"
|
68
76
|
PTY.spawn(cmd) do |i,o|
|
69
77
|
o.sync = true
|
@@ -77,7 +85,7 @@ module ArxivUtil
|
|
77
85
|
break unless res.index('written').nil?
|
78
86
|
end
|
79
87
|
end
|
80
|
-
return getK2Pdf(job_id, work_dir)
|
88
|
+
return getK2Pdf(job_id, work_dir, use_dir)
|
81
89
|
end
|
82
90
|
|
83
91
|
def self.fetchReference(file_name)
|
@@ -90,35 +98,35 @@ module ArxivUtil
|
|
90
98
|
map(&:number).
|
91
99
|
sort.
|
92
100
|
shift
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
101
|
+
puts "Detect References page=> #{page_no} "
|
102
|
+
ref_page = reader.
|
103
|
+
pages.
|
104
|
+
select{|i|
|
105
|
+
i.number >= page_no
|
106
|
+
}.
|
107
|
+
map{|i|
|
108
|
+
i.text.gsub(/\n+/,"\n").gsub(/ +/,' ')
|
109
|
+
}.
|
110
|
+
join(' ').
|
111
|
+
gsub(REFERENCE_REGEXP,"\n\\1").
|
112
|
+
gsub('- ','').
|
113
|
+
split("\n")
|
106
114
|
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
115
|
+
return ref_page[(ref_page.index{|i| i =~ REFERENCE_START_REGEXP}+1)..ref_page.length].
|
116
|
+
select{|i|
|
117
|
+
i.length > 5
|
118
|
+
}
|
111
119
|
end
|
112
120
|
|
113
|
-
def self.fetchFromPdfUrl(pdfUrl, work_dir)
|
121
|
+
def self.fetchFromPdfUrl(pdfUrl, work_dir, use_dir)
|
114
122
|
job_id = makeId
|
115
|
-
makeDir(job_id, work_dir)
|
116
|
-
file_name = makeFile(job_id, work_dir)
|
117
|
-
|
123
|
+
makeDir(job_id, work_dir) unless use_dir
|
124
|
+
file_name = makeFile(job_id, work_dir, use_dir)
|
125
|
+
|
118
126
|
fetchPdfFile(pdfUrl, file_name)
|
119
|
-
executed_pdf = convertSingleColPdf(job_id, work_dir, file_name)
|
127
|
+
executed_pdf = convertSingleColPdf(job_id, work_dir, file_name, use_dir)
|
120
128
|
references = fetchReference(executed_pdf)
|
121
|
-
removeDir(job_id, work_dir)
|
129
|
+
removeDir(job_id, work_dir) unless use_dir
|
122
130
|
return references
|
123
131
|
end
|
124
132
|
end
|