arxiv-references 0.1.7.4 → 0.1.7.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1565bf69f1cd06eb52a452b3e4f518b8c80962e3
4
- data.tar.gz: a7ee2b0cc1ce970bc3837f7a637baa64aa381bc1
3
+ metadata.gz: ed3c2af4712d721c218dac464f0892942710707e
4
+ data.tar.gz: 57ac87263191f21e977cddac6243d42cc989872e
5
5
  SHA512:
6
- metadata.gz: 0d3edf4f92b6ce080a4c2b2240765c965e6e2f2344f2864469934ef5663799e42750a2c5d75117f58a6e8a7d025378064bb2a19ed715ff4195e880b6baf4f059
7
- data.tar.gz: 98600b69e73d4e6a850b5cdcd1b9ea2804d3a6c4fd9e5df5cfffd12c2cb9f5ec3b07404b72a1f08d4ec67ab4079548ab97fba31cbd334c09cbfa88979e19419e
6
+ metadata.gz: 03b50cf6076c5aa43c8193d2ce099db7bcab38efc476500f4ee80e348f1e6c18d16c307494db9f4416fa8e60926d5a6335d0d5917bd89f5e9dfe1a5730eeb33e
7
+ data.tar.gz: 3dafd54af66bfb13b71a39c430293d669e7b6812458d689806c83f6fcce3cc43db6ede5eaf092f4f19bf9539c1edb85f988a91419c4a804e963b1ac0c71490b9
@@ -9,8 +9,8 @@ Gem::Specification.new do |spec|
9
9
  spec.authors = ["Takahiro Nishimura"]
10
10
  spec.email = ["tkhr.nishimura@gmail.com"]
11
11
 
12
- spec.summary = %q{This library is to fetch information of Title, Author, Abstruct and Citations from Arxiv Paper}
13
- spec.description = %q{This library is for suvey.}
12
+ spec.summary = %q{This library is to fetch title, author, abstruct and etc. of paper in arXiv. Moreover, analyze pdf file to fetch citation list in paper}
13
+ spec.description = %q{Fetch title, author, abstruct, and citations list of paper in arXiv from pdf file}
14
14
  spec.homepage = "https://github.com/nishimuuu/Arxiv-references"
15
15
  spec.license = "GPL"
16
16
 
@@ -6,15 +6,16 @@ require 'expect'
6
6
  require 'pdf-reader'
7
7
 
8
8
  class P3
9
- BASE_URL = "https://arxiv.org"
9
+ BASE_URL = 'https://arxiv.org'
10
10
  REFERENCE_START_REGEXP = Regexp.new('\n*[rR][eE][fF][eE][rR][eE][nN][cC][eE][sS]?( +|\n+)?$')
11
11
  REFERENCE_REGEXP = Regexp.new('(\[[0-9]?[0-9]\]|\[.+?\])')
12
+
12
13
  def self.makeId
13
- return Digest::SHA256.hexdigest Time.now.strftime("%F %H:%M:%S")
14
+ return Digest::SHA256.hexdigest Time.now.strftime('%F %H:%M:%S')
14
15
  end
15
16
 
16
17
  def self.makeDir(id, work_dir)
17
- Dir.mkdir("#{work_dir}/#{id}")
18
+ Dir.mkdir("#{work_dir}/#{id}")
18
19
  end
19
20
 
20
21
  def self.removeDir(id, work_dir)
@@ -29,7 +30,7 @@ class P3
29
30
  end
30
31
  end
31
32
 
32
- def self. getK2Pdf(id, work_dir, use_dir)
33
+ def self.getK2Pdf(id, work_dir, use_dir)
33
34
  if use_dir
34
35
  return "#{work_dir}/#{id}/output_k2opt.pdf"
35
36
  else
@@ -41,7 +42,8 @@ class P3
41
42
  File.delete("#{work_dir}/#{id}-output.pdf")
42
43
  File.delete("#{work_dir}/#{id}-output_k2opt.pdf")
43
44
  end
44
- def self.fetchPdfFile(pdfUrl,file_name)
45
+
46
+ def self.fetchPdfFile(pdfUrl, file_name)
45
47
  open(file_name, 'wb') do |o|
46
48
  open(pdfUrl) do |data|
47
49
  o.write(data.read)
@@ -49,15 +51,15 @@ class P3
49
51
  end
50
52
  end
51
53
 
52
- def self.convertSingleColPdf(job_id, work_dir,file_name, use_dir)
54
+ def self.convertSingleColPdf(job_id, work_dir, file_name, use_dir)
53
55
  cmd = "k2pdfopt -dev kpw #{file_name}"
54
- PTY.spawn(cmd) do |i,o|
56
+ PTY.spawn(cmd) do |i, o|
55
57
  o.sync = true
56
- i.expect(/\S.*Enter option above \(h=help, q=quit\):/,10){
58
+ i.expect(/\S.*Enter option above \(h=help, q=quit\):/, 10) {
57
59
  o.puts "\n"
58
60
  o.flush
59
61
  }
60
- while( i.eof? == false )
62
+ while (i.eof? == false)
61
63
  res = i.gets
62
64
  print res
63
65
  break unless res.index('written').nil?
@@ -69,34 +71,34 @@ class P3
69
71
  def self.fetchReference(file_name)
70
72
  reader = PDF::Reader.new(file_name)
71
73
  page_no = reader.
72
- pages.
73
- reject{|i|
74
- i.text.index(REFERENCE_START_REGEXP).nil?
75
- }.
76
- map(&:number).
77
- sort.
78
- shift
79
-
80
- ref_page = reader.
81
74
  pages.
82
- select{|i|
83
- i.number >= page_no
84
- }.
85
- map{|i|
86
- i.text.gsub(/\n\n+/,"\n").gsub(/ +/,' ').gsub(/-\n +/,'')
87
- }
75
+ reject { |i|
76
+ i.text.index(REFERENCE_START_REGEXP).nil?
77
+ }.
78
+ map(&:number).
79
+ sort.
80
+ shift
81
+
82
+ ref_page = reader.
83
+ pages.
84
+ select { |i|
85
+ i.number >= page_no
86
+ }.
87
+ map { |i|
88
+ i.text.gsub(/\n\n+/, "\n").gsub(/ +/, ' ').gsub(/-\n +/, '')
89
+ }
88
90
 
89
- ref_page.shift
91
+ ref_page.shift
90
92
 
91
- ref_page = ref_page.
93
+ ref_page = ref_page.
92
94
  join(' ').
93
- gsub(REFERENCE_REGEXP,"\n\\1")
95
+ gsub(REFERENCE_REGEXP, "\n\\1")
94
96
 
95
- ref_page = ref_page.
97
+ ref_page = ref_page.
96
98
  split(/\n *\n/).
97
- map{|i| i.gsub("\n",'')}.
98
- select{|i| i.length > 15}
99
- return ref_page
99
+ map { |i| i.gsub("\n", '') }.
100
+ select { |i| i.length > 15 }
101
+ return ref_page
100
102
  end
101
103
 
102
104
  def self.fetchFromPdfUrl(pdfUrl, work_dir=true, use_dir=true)
@@ -108,7 +110,7 @@ class P3
108
110
  executed_pdf = convertSingleColPdf(job_id, work_dir, file_name, use_dir)
109
111
  references = fetchReference(executed_pdf)
110
112
  if use_dir
111
- removeDir(job_id, work_dir)
113
+ removeDir(job_id, work_dir)
112
114
  else
113
115
  removeFile(job_id, work_dir)
114
116
  end
@@ -1,5 +1,5 @@
1
1
  module Arxiv
2
2
  module References
3
- VERSION = "0.1.7.4"
3
+ VERSION = "0.1.7.5"
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: arxiv-references
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.7.4
4
+ version: 0.1.7.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Takahiro Nishimura
@@ -94,7 +94,8 @@ dependencies:
94
94
  - - "~>"
95
95
  - !ruby/object:Gem::Version
96
96
  version: '1.6'
97
- description: This library is for suvey.
97
+ description: Fetch title, author, abstruct, and citations list of paper in arXiv from
98
+ pdf file
98
99
  email:
99
100
  - tkhr.nishimura@gmail.com
100
101
  executables:
@@ -144,6 +145,6 @@ rubyforge_project:
144
145
  rubygems_version: 2.4.5.1
145
146
  signing_key:
146
147
  specification_version: 4
147
- summary: This library is to fetch information of Title, Author, Abstruct and Citations
148
- from Arxiv Paper
148
+ summary: This library is to fetch title, author, abstruct and etc. of paper in arXiv.
149
+ Moreover, analyze pdf file to fetch citation list in paper
149
150
  test_files: []