extcite 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,7 +6,7 @@ require 'extcite/version'
6
6
  Gem::Specification.new do |s|
7
7
  s.name = 'extcite'
8
8
  s.version = Extcite::VERSION
9
- s.date = '2020-04-07'
9
+ s.date = '2020-04-15'
10
10
  s.summary = "Citations from PDFs"
11
11
  s.description = "Gets DOIS and generates citations for your papers"
12
12
  s.authors = "Scott Chamberlain"
@@ -26,12 +26,22 @@ Gem::Specification.new do |s|
26
26
  s.add_development_dependency 'simplecov', '~> 0.18.0'
27
27
  s.add_development_dependency 'codecov', '~> 0.1.16'
28
28
 
29
+ s.add_runtime_dependency 'timeout', '~> 0.1.0'
29
30
  s.add_runtime_dependency 'faraday', '>= 0.15', '< 1.1'
30
- s.add_runtime_dependency 'faraday_middleware', '>= 0.12.2', '< 0.14.0'
31
- s.add_runtime_dependency 'thor', '~> 0.20.3'
31
+ s.add_runtime_dependency 'faraday_middleware', '>= 0.12.2', '< 1.1.0'
32
+ s.add_runtime_dependency 'thor', '>= 0.20.3', '< 1.1.0'
32
33
  s.add_runtime_dependency 'parallel', '~> 1.19'
33
34
  s.add_runtime_dependency 'oga', '>= 2.15', '< 4.0'
34
- s.add_runtime_dependency 'serrano', '~> 0.5.2'
35
+ s.add_runtime_dependency 'serrano', '>= 0.5.2', '< 0.7.0'
35
36
  s.add_runtime_dependency 'bibtex-ruby', '~> 5.0', '>= 5.0.1'
36
37
  s.add_runtime_dependency 'pdf-reader', '~> 2.4'
38
+
39
+ s.metadata = {
40
+ 'homepage_uri' => 'https://github.com/sckott/extcite',
41
+ 'changelog_uri' =>
42
+ "https://github.com/sckott/extcite/releases/tag/v#{s.version}",
43
+ 'source_code_uri' => 'https://github.com/sckott/extcite',
44
+ 'bug_tracker_uri' => 'https://github.com/sckott/extcite/issues',
45
+ 'documentation_uri' => 'https://xenodochial-stonebraker-ad2732.netlify.app'
46
+ }
37
47
  end
@@ -7,7 +7,7 @@ require "extcite/version"
7
7
  require 'serrano'
8
8
  require 'pdf-reader'
9
9
  require 'faraday'
10
-
10
+ require 'timeout'
11
11
 
12
12
 
13
13
  module Extcite
@@ -142,36 +142,43 @@ module Extcite
142
142
  rr = PDF::Reader.new(x)
143
143
  pdfmeta = rr.metadata
144
144
  if !pdfmeta.nil?
145
- xml = Oga.parse_xml(pdfmeta);
146
145
  begin
147
- tt = xml.xpath('//rdf:Description')
148
- # try dc:identifier attribute
149
- ss = tt.attr('dc:identifier')[0]
150
- if !ss.nil?
151
- ids = ss.text.sub(/doi:/, '')
152
- else
153
- # try prism:doi node
154
- pdoi = xml.xpath('//rdf:Description//prism:doi')
155
- if pdoi.length == 1
156
- ids = pdoi.text
146
+ xml = Oga.parse_xml(pdfmeta);
147
+ rescue Exception => e
148
+ xml = nil
149
+ end
150
+
151
+ if !xml.nil?
152
+ begin
153
+ tt = xml.xpath('//rdf:Description')
154
+ # try dc:identifier attribute
155
+ ss = tt.attr('dc:identifier')[0]
156
+ if !ss.nil?
157
+ ids = ss.text.sub(/doi:/, '')
157
158
  else
158
- # try pdf:WPS-ARTICLEDOI node
159
- wpsdoi = xml.xpath('//rdf:Description//pdf:WPS-ARTICLEDOI')
160
- if wpsdoi.length == 1
161
- ids = wpsdoi.text
159
+ # try prism:doi node
160
+ pdoi = xml.xpath('//rdf:Description//prism:doi')
161
+ if pdoi.length == 1
162
+ ids = pdoi.text
162
163
  else
163
- # try pdfx:WPS-ARTICLEDOI node
164
- pdfxwpsdoi = xml.xpath('//rdf:Description//pdfx:WPS-ARTICLEDOI')
165
- if pdfxwpsdoi.length == 1
166
- ids = pdfxwpsdoi.text
164
+ # try pdf:WPS-ARTICLEDOI node
165
+ wpsdoi = xml.xpath('//rdf:Description//pdf:WPS-ARTICLEDOI')
166
+ if wpsdoi.length == 1
167
+ ids = wpsdoi.text
167
168
  else
168
- ids = nil
169
+ # try pdfx:WPS-ARTICLEDOI node
170
+ pdfxwpsdoi = xml.xpath('//rdf:Description//pdfx:WPS-ARTICLEDOI')
171
+ if pdfxwpsdoi.length == 1
172
+ ids = pdfxwpsdoi.text
173
+ else
174
+ ids = nil
175
+ end
169
176
  end
170
177
  end
171
178
  end
179
+ rescue
180
+ ids = nil
172
181
  end
173
- rescue
174
- ids = nil
175
182
  end
176
183
  end
177
184
 
@@ -295,7 +302,15 @@ module Extcite
295
302
 
296
303
  def self.extract_text_one(x)
297
304
  rr = PDF::Reader.new(x)
298
- return rr.pages.map { |page| page.text }.join("\n")
305
+ return rr.pages.map { |page|
306
+ begin
307
+ Timeout.timeout(1) do
308
+ page.text
309
+ end
310
+ rescue Timeout::Error
311
+ next
312
+ end
313
+ }.join("\n")
299
314
  end
300
315
 
301
316
  end
@@ -1,3 +1,3 @@
1
1
  module Extcite
2
- VERSION = "0.3.0"
2
+ VERSION = "0.4.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: extcite
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Scott Chamberlain
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-04-07 00:00:00.000000000 Z
11
+ date: 2020-04-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -98,6 +98,20 @@ dependencies:
98
98
  - - "~>"
99
99
  - !ruby/object:Gem::Version
100
100
  version: 0.1.16
101
+ - !ruby/object:Gem::Dependency
102
+ name: timeout
103
+ requirement: !ruby/object:Gem::Requirement
104
+ requirements:
105
+ - - "~>"
106
+ - !ruby/object:Gem::Version
107
+ version: 0.1.0
108
+ type: :runtime
109
+ prerelease: false
110
+ version_requirements: !ruby/object:Gem::Requirement
111
+ requirements:
112
+ - - "~>"
113
+ - !ruby/object:Gem::Version
114
+ version: 0.1.0
101
115
  - !ruby/object:Gem::Dependency
102
116
  name: faraday
103
117
  requirement: !ruby/object:Gem::Requirement
@@ -127,7 +141,7 @@ dependencies:
127
141
  version: 0.12.2
128
142
  - - "<"
129
143
  - !ruby/object:Gem::Version
130
- version: 0.14.0
144
+ version: 1.1.0
131
145
  type: :runtime
132
146
  prerelease: false
133
147
  version_requirements: !ruby/object:Gem::Requirement
@@ -137,21 +151,27 @@ dependencies:
137
151
  version: 0.12.2
138
152
  - - "<"
139
153
  - !ruby/object:Gem::Version
140
- version: 0.14.0
154
+ version: 1.1.0
141
155
  - !ruby/object:Gem::Dependency
142
156
  name: thor
143
157
  requirement: !ruby/object:Gem::Requirement
144
158
  requirements:
145
- - - "~>"
159
+ - - ">="
146
160
  - !ruby/object:Gem::Version
147
161
  version: 0.20.3
162
+ - - "<"
163
+ - !ruby/object:Gem::Version
164
+ version: 1.1.0
148
165
  type: :runtime
149
166
  prerelease: false
150
167
  version_requirements: !ruby/object:Gem::Requirement
151
168
  requirements:
152
- - - "~>"
169
+ - - ">="
153
170
  - !ruby/object:Gem::Version
154
171
  version: 0.20.3
172
+ - - "<"
173
+ - !ruby/object:Gem::Version
174
+ version: 1.1.0
155
175
  - !ruby/object:Gem::Dependency
156
176
  name: parallel
157
177
  requirement: !ruby/object:Gem::Requirement
@@ -190,16 +210,22 @@ dependencies:
190
210
  name: serrano
191
211
  requirement: !ruby/object:Gem::Requirement
192
212
  requirements:
193
- - - "~>"
213
+ - - ">="
194
214
  - !ruby/object:Gem::Version
195
215
  version: 0.5.2
216
+ - - "<"
217
+ - !ruby/object:Gem::Version
218
+ version: 0.7.0
196
219
  type: :runtime
197
220
  prerelease: false
198
221
  version_requirements: !ruby/object:Gem::Requirement
199
222
  requirements:
200
- - - "~>"
223
+ - - ">="
201
224
  - !ruby/object:Gem::Version
202
225
  version: 0.5.2
226
+ - - "<"
227
+ - !ruby/object:Gem::Version
228
+ version: 0.7.0
203
229
  - !ruby/object:Gem::Dependency
204
230
  name: bibtex-ruby
205
231
  requirement: !ruby/object:Gem::Requirement
@@ -250,6 +276,31 @@ files:
250
276
  - README.md
251
277
  - Rakefile
252
278
  - bin/extcite
279
+ - doc/Array.html
280
+ - doc/Configuration.html
281
+ - doc/Extcite.html
282
+ - doc/Hash.html
283
+ - doc/String.html
284
+ - doc/Textminer.html
285
+ - doc/Textminer/Fetch.html
286
+ - doc/Textminer/Mined.html
287
+ - doc/Textminer/Miner.html
288
+ - doc/Textminer/Request.html
289
+ - doc/Textminer/Response.html
290
+ - doc/_index.html
291
+ - doc/class_list.html
292
+ - doc/css/common.css
293
+ - doc/css/full_list.css
294
+ - doc/css/style.css
295
+ - doc/file.README.html
296
+ - doc/file_list.html
297
+ - doc/frames.html
298
+ - doc/index.html
299
+ - doc/js/app.js
300
+ - doc/js/full_list.js
301
+ - doc/js/jquery.js
302
+ - doc/method_list.html
303
+ - doc/top-level-namespace.html
253
304
  - extcite.gemspec
254
305
  - extra/fetch.rb
255
306
  - extra/fetch_method.rb
@@ -261,7 +312,12 @@ files:
261
312
  homepage: https://github.com/sckott/extcite
262
313
  licenses:
263
314
  - MIT
264
- metadata: {}
315
+ metadata:
316
+ homepage_uri: https://github.com/sckott/extcite
317
+ changelog_uri: https://github.com/sckott/extcite/releases/tag/v0.4.0
318
+ source_code_uri: https://github.com/sckott/extcite
319
+ bug_tracker_uri: https://github.com/sckott/extcite/issues
320
+ documentation_uri: https://xenodochial-stonebraker-ad2732.netlify.app
265
321
  post_install_message:
266
322
  rdoc_options: []
267
323
  require_paths: