arxiv-references 0.1.7.4 → 0.1.7.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1565bf69f1cd06eb52a452b3e4f518b8c80962e3
4
- data.tar.gz: a7ee2b0cc1ce970bc3837f7a637baa64aa381bc1
3
+ metadata.gz: ed3c2af4712d721c218dac464f0892942710707e
4
+ data.tar.gz: 57ac87263191f21e977cddac6243d42cc989872e
5
5
  SHA512:
6
- metadata.gz: 0d3edf4f92b6ce080a4c2b2240765c965e6e2f2344f2864469934ef5663799e42750a2c5d75117f58a6e8a7d025378064bb2a19ed715ff4195e880b6baf4f059
7
- data.tar.gz: 98600b69e73d4e6a850b5cdcd1b9ea2804d3a6c4fd9e5df5cfffd12c2cb9f5ec3b07404b72a1f08d4ec67ab4079548ab97fba31cbd334c09cbfa88979e19419e
6
+ metadata.gz: 03b50cf6076c5aa43c8193d2ce099db7bcab38efc476500f4ee80e348f1e6c18d16c307494db9f4416fa8e60926d5a6335d0d5917bd89f5e9dfe1a5730eeb33e
7
+ data.tar.gz: 3dafd54af66bfb13b71a39c430293d669e7b6812458d689806c83f6fcce3cc43db6ede5eaf092f4f19bf9539c1edb85f988a91419c4a804e963b1ac0c71490b9
@@ -9,8 +9,8 @@ Gem::Specification.new do |spec|
9
9
  spec.authors = ["Takahiro Nishimura"]
10
10
  spec.email = ["tkhr.nishimura@gmail.com"]
11
11
 
12
- spec.summary = %q{This library is to fetch information of Title, Author, Abstruct and Citations from Arxiv Paper}
13
- spec.description = %q{This library is for suvey.}
12
+ spec.summary = %q{This library is to fetch title, author, abstruct and etc. of paper in arXiv. Moreover, analyze pdf file to fetch citation list in paper}
13
+ spec.description = %q{Fetch title, author, abstruct, and citations list of paper in arXiv from pdf file}
14
14
  spec.homepage = "https://github.com/nishimuuu/Arxiv-references"
15
15
  spec.license = "GPL"
16
16
 
@@ -6,15 +6,16 @@ require 'expect'
6
6
  require 'pdf-reader'
7
7
 
8
8
  class P3
9
- BASE_URL = "https://arxiv.org"
9
+ BASE_URL = 'https://arxiv.org'
10
10
  REFERENCE_START_REGEXP = Regexp.new('\n*[rR][eE][fF][eE][rR][eE][nN][cC][eE][sS]?( +|\n+)?$')
11
11
  REFERENCE_REGEXP = Regexp.new('(\[[0-9]?[0-9]\]|\[.+?\])')
12
+
12
13
  def self.makeId
13
- return Digest::SHA256.hexdigest Time.now.strftime("%F %H:%M:%S")
14
+ return Digest::SHA256.hexdigest Time.now.strftime('%F %H:%M:%S')
14
15
  end
15
16
 
16
17
  def self.makeDir(id, work_dir)
17
- Dir.mkdir("#{work_dir}/#{id}")
18
+ Dir.mkdir("#{work_dir}/#{id}")
18
19
  end
19
20
 
20
21
  def self.removeDir(id, work_dir)
@@ -29,7 +30,7 @@ class P3
29
30
  end
30
31
  end
31
32
 
32
- def self. getK2Pdf(id, work_dir, use_dir)
33
+ def self.getK2Pdf(id, work_dir, use_dir)
33
34
  if use_dir
34
35
  return "#{work_dir}/#{id}/output_k2opt.pdf"
35
36
  else
@@ -41,7 +42,8 @@ class P3
41
42
  File.delete("#{work_dir}/#{id}-output.pdf")
42
43
  File.delete("#{work_dir}/#{id}-output_k2opt.pdf")
43
44
  end
44
- def self.fetchPdfFile(pdfUrl,file_name)
45
+
46
+ def self.fetchPdfFile(pdfUrl, file_name)
45
47
  open(file_name, 'wb') do |o|
46
48
  open(pdfUrl) do |data|
47
49
  o.write(data.read)
@@ -49,15 +51,15 @@ class P3
49
51
  end
50
52
  end
51
53
 
52
- def self.convertSingleColPdf(job_id, work_dir,file_name, use_dir)
54
+ def self.convertSingleColPdf(job_id, work_dir, file_name, use_dir)
53
55
  cmd = "k2pdfopt -dev kpw #{file_name}"
54
- PTY.spawn(cmd) do |i,o|
56
+ PTY.spawn(cmd) do |i, o|
55
57
  o.sync = true
56
- i.expect(/\S.*Enter option above \(h=help, q=quit\):/,10){
58
+ i.expect(/\S.*Enter option above \(h=help, q=quit\):/, 10) {
57
59
  o.puts "\n"
58
60
  o.flush
59
61
  }
60
- while( i.eof? == false )
62
+ while (i.eof? == false)
61
63
  res = i.gets
62
64
  print res
63
65
  break unless res.index('written').nil?
@@ -69,34 +71,34 @@ class P3
69
71
  def self.fetchReference(file_name)
70
72
  reader = PDF::Reader.new(file_name)
71
73
  page_no = reader.
72
- pages.
73
- reject{|i|
74
- i.text.index(REFERENCE_START_REGEXP).nil?
75
- }.
76
- map(&:number).
77
- sort.
78
- shift
79
-
80
- ref_page = reader.
81
74
  pages.
82
- select{|i|
83
- i.number >= page_no
84
- }.
85
- map{|i|
86
- i.text.gsub(/\n\n+/,"\n").gsub(/ +/,' ').gsub(/-\n +/,'')
87
- }
75
+ reject { |i|
76
+ i.text.index(REFERENCE_START_REGEXP).nil?
77
+ }.
78
+ map(&:number).
79
+ sort.
80
+ shift
81
+
82
+ ref_page = reader.
83
+ pages.
84
+ select { |i|
85
+ i.number >= page_no
86
+ }.
87
+ map { |i|
88
+ i.text.gsub(/\n\n+/, "\n").gsub(/ +/, ' ').gsub(/-\n +/, '')
89
+ }
88
90
 
89
- ref_page.shift
91
+ ref_page.shift
90
92
 
91
- ref_page = ref_page.
93
+ ref_page = ref_page.
92
94
  join(' ').
93
- gsub(REFERENCE_REGEXP,"\n\\1")
95
+ gsub(REFERENCE_REGEXP, "\n\\1")
94
96
 
95
- ref_page = ref_page.
97
+ ref_page = ref_page.
96
98
  split(/\n *\n/).
97
- map{|i| i.gsub("\n",'')}.
98
- select{|i| i.length > 15}
99
- return ref_page
99
+ map { |i| i.gsub("\n", '') }.
100
+ select { |i| i.length > 15 }
101
+ return ref_page
100
102
  end
101
103
 
102
104
  def self.fetchFromPdfUrl(pdfUrl, work_dir=true, use_dir=true)
@@ -108,7 +110,7 @@ class P3
108
110
  executed_pdf = convertSingleColPdf(job_id, work_dir, file_name, use_dir)
109
111
  references = fetchReference(executed_pdf)
110
112
  if use_dir
111
- removeDir(job_id, work_dir)
113
+ removeDir(job_id, work_dir)
112
114
  else
113
115
  removeFile(job_id, work_dir)
114
116
  end
@@ -1,5 +1,5 @@
1
1
  module Arxiv
2
2
  module References
3
- VERSION = "0.1.7.4"
3
+ VERSION = "0.1.7.5"
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: arxiv-references
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.7.4
4
+ version: 0.1.7.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Takahiro Nishimura
@@ -94,7 +94,8 @@ dependencies:
94
94
  - - "~>"
95
95
  - !ruby/object:Gem::Version
96
96
  version: '1.6'
97
- description: This library is for suvey.
97
+ description: Fetch title, author, abstruct, and citations list of paper in arXiv from
98
+ pdf file
98
99
  email:
99
100
  - tkhr.nishimura@gmail.com
100
101
  executables:
@@ -144,6 +145,6 @@ rubyforge_project:
144
145
  rubygems_version: 2.4.5.1
145
146
  signing_key:
146
147
  specification_version: 4
147
- summary: This library is to fetch information of Title, Author, Abstruct and Citations
148
- from Arxiv Paper
148
+ summary: This library is to fetch title, author, abstruct and etc. of paper in arXiv.
149
+ Moreover, analyze pdf file to fetch citation list in paper
149
150
  test_files: []