arxiv-references 0.1.7.5 → 0.1.8.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ed3c2af4712d721c218dac464f0892942710707e
4
- data.tar.gz: 57ac87263191f21e977cddac6243d42cc989872e
3
+ metadata.gz: 1c07650299da297e7abd5f714470efbe8c1aa500
4
+ data.tar.gz: c1a1a70fffd28a828df681589d931006c70b88e3
5
5
  SHA512:
6
- metadata.gz: 03b50cf6076c5aa43c8193d2ce099db7bcab38efc476500f4ee80e348f1e6c18d16c307494db9f4416fa8e60926d5a6335d0d5917bd89f5e9dfe1a5730eeb33e
7
- data.tar.gz: 3dafd54af66bfb13b71a39c430293d669e7b6812458d689806c83f6fcce3cc43db6ede5eaf092f4f19bf9539c1edb85f988a91419c4a804e963b1ac0c71490b9
6
+ metadata.gz: d1add731422ded82ad458ce2f0b8755c847f27b6d417d9f7d584e8f4123ab66d6a3e0b2ef180adac79b685e1aa3e3e34c82d02544afada68a0ba7301c79065c7
7
+ data.tar.gz: 618bb9c0161613c0d8974f27e931fe50931b4a34008d6114386cc092c4d9d98c9544b520306662dc73caac2bc92b7037195dd631a1cd6c92ec855da55c12eb0d
@@ -33,4 +33,5 @@ Gem::Specification.new do |spec|
33
33
  spec.add_dependency 'thor', "~> 0.19"
34
34
  spec.add_dependency 'pdf-reader', "~> 1.4"
35
35
  spec.add_dependency 'nokogiri', "~> 1.6"
36
+ spec.add_dependency 'paper-pdf-parser', "~> 0.1"
36
37
  end
@@ -1,7 +1,7 @@
1
1
 
2
2
  $:.unshift Pathname.new(__FILE__).dirname.join().expand_path.to_s
3
3
  require 'ArxivApi'
4
- require 'P3'
4
+ require 'paper/pdf/parser/p3'
5
5
 
6
6
 
7
7
  module ArxivUtil
@@ -1,5 +1,5 @@
1
1
  module Arxiv
2
2
  module References
3
- VERSION = "0.1.7.5"
3
+ VERSION = "0.1.8.0"
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: arxiv-references
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.7.5
4
+ version: 0.1.8.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Takahiro Nishimura
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-05-04 00:00:00.000000000 Z
11
+ date: 2016-05-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -94,6 +94,20 @@ dependencies:
94
94
  - - "~>"
95
95
  - !ruby/object:Gem::Version
96
96
  version: '1.6'
97
+ - !ruby/object:Gem::Dependency
98
+ name: paper-pdf-parser
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - "~>"
102
+ - !ruby/object:Gem::Version
103
+ version: '0.1'
104
+ type: :runtime
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - "~>"
109
+ - !ruby/object:Gem::Version
110
+ version: '0.1'
97
111
  description: Fetch title, author, abstruct, and citations list of paper in arXiv from
98
112
  pdf file
99
113
  email:
@@ -119,7 +133,6 @@ files:
119
133
  - lib/arxiv/references/ArxivApi.rb
120
134
  - lib/arxiv/references/ArxivReferences.rb
121
135
  - lib/arxiv/references/ArxivUtil.rb
122
- - lib/arxiv/references/P3.rb
123
136
  - lib/arxiv/references/version.rb
124
137
  homepage: https://github.com/nishimuuu/Arxiv-references
125
138
  licenses:
@@ -1,119 +0,0 @@
1
- require 'digest/sha2'
2
- require 'time'
3
- require 'fileutils'
4
- require 'pty'
5
- require 'expect'
6
- require 'pdf-reader'
7
-
8
- class P3
9
- BASE_URL = 'https://arxiv.org'
10
- REFERENCE_START_REGEXP = Regexp.new('\n*[rR][eE][fF][eE][rR][eE][nN][cC][eE][sS]?( +|\n+)?$')
11
- REFERENCE_REGEXP = Regexp.new('(\[[0-9]?[0-9]\]|\[.+?\])')
12
-
13
- def self.makeId
14
- return Digest::SHA256.hexdigest Time.now.strftime('%F %H:%M:%S')
15
- end
16
-
17
- def self.makeDir(id, work_dir)
18
- Dir.mkdir("#{work_dir}/#{id}")
19
- end
20
-
21
- def self.removeDir(id, work_dir)
22
- FileUtils.rm_rf("#{work_dir}/#{id}")
23
- end
24
-
25
- def self.makeFile(id, work_dir, use_dir)
26
- if use_dir
27
- return "#{work_dir}/#{id}/output.pdf"
28
- else
29
- return "#{work_dir}/#{id}-output.pdf"
30
- end
31
- end
32
-
33
- def self.getK2Pdf(id, work_dir, use_dir)
34
- if use_dir
35
- return "#{work_dir}/#{id}/output_k2opt.pdf"
36
- else
37
- return "#{work_dir}/#{id}-output_k2opt.pdf"
38
- end
39
- end
40
-
41
- def self.removeFile(id, work_dir)
42
- File.delete("#{work_dir}/#{id}-output.pdf")
43
- File.delete("#{work_dir}/#{id}-output_k2opt.pdf")
44
- end
45
-
46
- def self.fetchPdfFile(pdfUrl, file_name)
47
- open(file_name, 'wb') do |o|
48
- open(pdfUrl) do |data|
49
- o.write(data.read)
50
- end
51
- end
52
- end
53
-
54
- def self.convertSingleColPdf(job_id, work_dir, file_name, use_dir)
55
- cmd = "k2pdfopt -dev kpw #{file_name}"
56
- PTY.spawn(cmd) do |i, o|
57
- o.sync = true
58
- i.expect(/\S.*Enter option above \(h=help, q=quit\):/, 10) {
59
- o.puts "\n"
60
- o.flush
61
- }
62
- while (i.eof? == false)
63
- res = i.gets
64
- print res
65
- break unless res.index('written').nil?
66
- end
67
- end
68
- return getK2Pdf(job_id, work_dir, use_dir)
69
- end
70
-
71
- def self.fetchReference(file_name)
72
- reader = PDF::Reader.new(file_name)
73
- page_no = reader.
74
- pages.
75
- reject { |i|
76
- i.text.index(REFERENCE_START_REGEXP).nil?
77
- }.
78
- map(&:number).
79
- sort.
80
- shift
81
-
82
- ref_page = reader.
83
- pages.
84
- select { |i|
85
- i.number >= page_no
86
- }.
87
- map { |i|
88
- i.text.gsub(/\n\n+/, "\n").gsub(/ +/, ' ').gsub(/-\n +/, '')
89
- }
90
-
91
- ref_page.shift
92
-
93
- ref_page = ref_page.
94
- join(' ').
95
- gsub(REFERENCE_REGEXP, "\n\\1")
96
-
97
- ref_page = ref_page.
98
- split(/\n *\n/).
99
- map { |i| i.gsub("\n", '') }.
100
- select { |i| i.length > 15 }
101
- return ref_page
102
- end
103
-
104
- def self.fetchFromPdfUrl(pdfUrl, work_dir=true, use_dir=true)
105
- job_id = makeId
106
- makeDir(job_id, work_dir) if use_dir
107
- file_name = makeFile(job_id, work_dir, use_dir)
108
-
109
- fetchPdfFile(pdfUrl, file_name)
110
- executed_pdf = convertSingleColPdf(job_id, work_dir, file_name, use_dir)
111
- references = fetchReference(executed_pdf)
112
- if use_dir
113
- removeDir(job_id, work_dir)
114
- else
115
- removeFile(job_id, work_dir)
116
- end
117
- return references
118
- end
119
- end