arxiv-references 0.1.5.1 → 0.1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: daa78fd2737bd135435527f1534ced7910016f66
4
- data.tar.gz: 9d5bc8e4f8046160ff32135b450191a11e44c833
3
+ metadata.gz: f0a36d9c1f438e17231af3e1f12584893253e287
4
+ data.tar.gz: 68d7700f4a942e532c0639584a3c391feb2c0122
5
5
  SHA512:
6
- metadata.gz: 30c204729334a420e27e2f0b5111bb00c46ccf5a3da26cc12736b8a65f109290e3cd6ca855f54c170d41578ab11a2373a03d1e7139f426b24d249a9f00ba99ed
7
- data.tar.gz: 25e3705840d15c2e78efd358aa1680c50807f5af3ed09402d0f915acd4286631adadd820cdec89e932fa69602b3d4fbad65ce81603754575acfaf68f54f5a09e
6
+ metadata.gz: a56c94bce408510688871319d74f7524e5e17c3a51fe7053ae6256fb2b0ac1c4e94890cc571ca153240e049b9f3764d3388ea78dcdb523a30af390b87942edab
7
+ data.tar.gz: fd90127fea1ca2368134aaedc5bce4ad80d50934c594afe9a6e34b1dc861cfd8bd34173b14a9fa8b19d7f331e0e2a5dbdb4557e2dfb86dfbed8794ffb5038ac5
@@ -23,16 +23,24 @@ module ArxivUtil
23
23
  FileUtils.rm_rf("#{work_dir}/#{id}")
24
24
  end
25
25
 
26
- def self.makeFile(id, work_dir)
27
- return "#{work_dir}/#{id}/output.pdf"
26
+ def self.makeFile(id, work_dir, use_dir)
27
+ if use_dir
28
+ return "#{work_dir}/#{id}/output.pdf"
29
+ else
30
+ return "#{work_dir}-#{id}-output.pdf"
31
+ end
28
32
  end
29
33
 
30
- def self. getK2Pdf(id, work_dir)
31
- return "#{work_dir}/#{id}/output_k2opt.pdf"
34
+ def self. getK2Pdf(id, work_dir, use_dir)
35
+ if use_dir
36
+ return "#{work_dir}/#{id}/output_k2opt.pdf"
37
+ else
38
+ return "#{work_dir}-#{id}-output_k2opt.pdf"
39
+ end
32
40
  end
33
41
 
34
42
 
35
- def self.fetchFromUrl(urlName, work_dir)
43
+ def self.fetchFromUrl(urlName, work_dir, use_dir)
36
44
  puts "fetch => #{urlName}"
37
45
  charset = nil
38
46
  html = open(urlName) do |f|
@@ -46,7 +54,7 @@ module ArxivUtil
46
54
  result[:authors] = page.xpath('//*[@id="abs"]/div[2]/div[2]/a').text
47
55
  result[:abstruct] = page.xpath('//*[@id="abs"]/div[2]/blockquote').text
48
56
  result[:pdfurl] = "#{BASE_URL}#{page.xpath('//*[@id="abs"]/div[1]/div[1]/ul/li[1]/a').attr('href').value}"
49
- result[:references] = fetchFromPdfUrl(result[:pdfurl], work_dir)
57
+ result[:references] = fetchFromPdfUrl(result[:pdfurl], work_dir, use_dir)
50
58
  return result.to_json
51
59
  end
52
60
 
@@ -63,7 +71,7 @@ module ArxivUtil
63
71
  end
64
72
  end
65
73
 
66
- def self.convertSingleColPdf(job_id, work_dir,file_name)
74
+ def self.convertSingleColPdf(job_id, work_dir,file_name, use_dir)
67
75
  cmd = "k2pdfopt -dev kpw #{file_name}"
68
76
  PTY.spawn(cmd) do |i,o|
69
77
  o.sync = true
@@ -77,7 +85,7 @@ module ArxivUtil
77
85
  break unless res.index('written').nil?
78
86
  end
79
87
  end
80
- return getK2Pdf(job_id, work_dir)
88
+ return getK2Pdf(job_id, work_dir, use_dir)
81
89
  end
82
90
 
83
91
  def self.fetchReference(file_name)
@@ -90,35 +98,35 @@ module ArxivUtil
90
98
  map(&:number).
91
99
  sort.
92
100
  shift
93
- puts "Detect References page=> #{page_no} "
94
- ref_page = reader.
95
- pages.
96
- select{|i|
97
- i.number >= page_no
98
- }.
99
- map{|i|
100
- i.text.gsub(/\n+/,"\n").gsub(/ +/,' ')
101
- }.
102
- join(' ').
103
- gsub(REFERENCE_REGEXP,"\n\\1").
104
- gsub('- ','').
105
- split("\n")
101
+ puts "Detect References page=> #{page_no} "
102
+ ref_page = reader.
103
+ pages.
104
+ select{|i|
105
+ i.number >= page_no
106
+ }.
107
+ map{|i|
108
+ i.text.gsub(/\n+/,"\n").gsub(/ +/,' ')
109
+ }.
110
+ join(' ').
111
+ gsub(REFERENCE_REGEXP,"\n\\1").
112
+ gsub('- ','').
113
+ split("\n")
106
114
 
107
- return ref_page[(ref_page.index{|i| i =~ REFERENCE_START_REGEXP}+1)..ref_page.length].
108
- select{|i|
109
- i.length > 5
110
- }
115
+ return ref_page[(ref_page.index{|i| i =~ REFERENCE_START_REGEXP}+1)..ref_page.length].
116
+ select{|i|
117
+ i.length > 5
118
+ }
111
119
  end
112
120
 
113
- def self.fetchFromPdfUrl(pdfUrl, work_dir)
121
+ def self.fetchFromPdfUrl(pdfUrl, work_dir, use_dir)
114
122
  job_id = makeId
115
- makeDir(job_id, work_dir)
116
- file_name = makeFile(job_id, work_dir)
117
-
123
+ makeDir(job_id, work_dir) unless use_dir
124
+ file_name = makeFile(job_id, work_dir, use_dir)
125
+
118
126
  fetchPdfFile(pdfUrl, file_name)
119
- executed_pdf = convertSingleColPdf(job_id, work_dir, file_name)
127
+ executed_pdf = convertSingleColPdf(job_id, work_dir, file_name, use_dir)
120
128
  references = fetchReference(executed_pdf)
121
- removeDir(job_id, work_dir)
129
+ removeDir(job_id, work_dir) unless use_dir
122
130
  return references
123
131
  end
124
132
  end
@@ -1,5 +1,5 @@
1
1
  module Arxiv
2
2
  module References
3
- VERSION = "0.1.5.1"
3
+ VERSION = "0.1.6.0"
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: arxiv-references
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.5.1
4
+ version: 0.1.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Takahiro Nishimura