arxiv-references 0.1.5.1 → 0.1.6.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: daa78fd2737bd135435527f1534ced7910016f66
4
- data.tar.gz: 9d5bc8e4f8046160ff32135b450191a11e44c833
3
+ metadata.gz: f0a36d9c1f438e17231af3e1f12584893253e287
4
+ data.tar.gz: 68d7700f4a942e532c0639584a3c391feb2c0122
5
5
  SHA512:
6
- metadata.gz: 30c204729334a420e27e2f0b5111bb00c46ccf5a3da26cc12736b8a65f109290e3cd6ca855f54c170d41578ab11a2373a03d1e7139f426b24d249a9f00ba99ed
7
- data.tar.gz: 25e3705840d15c2e78efd358aa1680c50807f5af3ed09402d0f915acd4286631adadd820cdec89e932fa69602b3d4fbad65ce81603754575acfaf68f54f5a09e
6
+ metadata.gz: a56c94bce408510688871319d74f7524e5e17c3a51fe7053ae6256fb2b0ac1c4e94890cc571ca153240e049b9f3764d3388ea78dcdb523a30af390b87942edab
7
+ data.tar.gz: fd90127fea1ca2368134aaedc5bce4ad80d50934c594afe9a6e34b1dc861cfd8bd34173b14a9fa8b19d7f331e0e2a5dbdb4557e2dfb86dfbed8794ffb5038ac5
@@ -23,16 +23,24 @@ module ArxivUtil
23
23
  FileUtils.rm_rf("#{work_dir}/#{id}")
24
24
  end
25
25
 
26
- def self.makeFile(id, work_dir)
27
- return "#{work_dir}/#{id}/output.pdf"
26
+ def self.makeFile(id, work_dir, use_dir)
27
+ if use_dir
28
+ return "#{work_dir}/#{id}/output.pdf"
29
+ else
30
+ return "#{work_dir}-#{id}-output.pdf"
31
+ end
28
32
  end
29
33
 
30
- def self. getK2Pdf(id, work_dir)
31
- return "#{work_dir}/#{id}/output_k2opt.pdf"
34
+ def self. getK2Pdf(id, work_dir, use_dir)
35
+ if use_dir
36
+ return "#{work_dir}/#{id}/output_k2opt.pdf"
37
+ else
38
+ return "#{work_dir}-#{id}-output_k2opt.pdf"
39
+ end
32
40
  end
33
41
 
34
42
 
35
- def self.fetchFromUrl(urlName, work_dir)
43
+ def self.fetchFromUrl(urlName, work_dir, use_dir)
36
44
  puts "fetch => #{urlName}"
37
45
  charset = nil
38
46
  html = open(urlName) do |f|
@@ -46,7 +54,7 @@ module ArxivUtil
46
54
  result[:authors] = page.xpath('//*[@id="abs"]/div[2]/div[2]/a').text
47
55
  result[:abstruct] = page.xpath('//*[@id="abs"]/div[2]/blockquote').text
48
56
  result[:pdfurl] = "#{BASE_URL}#{page.xpath('//*[@id="abs"]/div[1]/div[1]/ul/li[1]/a').attr('href').value}"
49
- result[:references] = fetchFromPdfUrl(result[:pdfurl], work_dir)
57
+ result[:references] = fetchFromPdfUrl(result[:pdfurl], work_dir, use_dir)
50
58
  return result.to_json
51
59
  end
52
60
 
@@ -63,7 +71,7 @@ module ArxivUtil
63
71
  end
64
72
  end
65
73
 
66
- def self.convertSingleColPdf(job_id, work_dir,file_name)
74
+ def self.convertSingleColPdf(job_id, work_dir,file_name, use_dir)
67
75
  cmd = "k2pdfopt -dev kpw #{file_name}"
68
76
  PTY.spawn(cmd) do |i,o|
69
77
  o.sync = true
@@ -77,7 +85,7 @@ module ArxivUtil
77
85
  break unless res.index('written').nil?
78
86
  end
79
87
  end
80
- return getK2Pdf(job_id, work_dir)
88
+ return getK2Pdf(job_id, work_dir, use_dir)
81
89
  end
82
90
 
83
91
  def self.fetchReference(file_name)
@@ -90,35 +98,35 @@ module ArxivUtil
90
98
  map(&:number).
91
99
  sort.
92
100
  shift
93
- puts "Detect References page=> #{page_no} "
94
- ref_page = reader.
95
- pages.
96
- select{|i|
97
- i.number >= page_no
98
- }.
99
- map{|i|
100
- i.text.gsub(/\n+/,"\n").gsub(/ +/,' ')
101
- }.
102
- join(' ').
103
- gsub(REFERENCE_REGEXP,"\n\\1").
104
- gsub('- ','').
105
- split("\n")
101
+ puts "Detect References page=> #{page_no} "
102
+ ref_page = reader.
103
+ pages.
104
+ select{|i|
105
+ i.number >= page_no
106
+ }.
107
+ map{|i|
108
+ i.text.gsub(/\n+/,"\n").gsub(/ +/,' ')
109
+ }.
110
+ join(' ').
111
+ gsub(REFERENCE_REGEXP,"\n\\1").
112
+ gsub('- ','').
113
+ split("\n")
106
114
 
107
- return ref_page[(ref_page.index{|i| i =~ REFERENCE_START_REGEXP}+1)..ref_page.length].
108
- select{|i|
109
- i.length > 5
110
- }
115
+ return ref_page[(ref_page.index{|i| i =~ REFERENCE_START_REGEXP}+1)..ref_page.length].
116
+ select{|i|
117
+ i.length > 5
118
+ }
111
119
  end
112
120
 
113
- def self.fetchFromPdfUrl(pdfUrl, work_dir)
121
+ def self.fetchFromPdfUrl(pdfUrl, work_dir, use_dir)
114
122
  job_id = makeId
115
- makeDir(job_id, work_dir)
116
- file_name = makeFile(job_id, work_dir)
117
-
123
+ makeDir(job_id, work_dir) unless use_dir
124
+ file_name = makeFile(job_id, work_dir, use_dir)
125
+
118
126
  fetchPdfFile(pdfUrl, file_name)
119
- executed_pdf = convertSingleColPdf(job_id, work_dir, file_name)
127
+ executed_pdf = convertSingleColPdf(job_id, work_dir, file_name, use_dir)
120
128
  references = fetchReference(executed_pdf)
121
- removeDir(job_id, work_dir)
129
+ removeDir(job_id, work_dir) unless use_dir
122
130
  return references
123
131
  end
124
132
  end
@@ -1,5 +1,5 @@
1
1
  module Arxiv
2
2
  module References
3
- VERSION = "0.1.5.1"
3
+ VERSION = "0.1.6.0"
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: arxiv-references
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.5.1
4
+ version: 0.1.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Takahiro Nishimura