extcite 0.3.0 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +0 -1
- data/CHANGELOG.md +9 -0
- data/Gemfile.lock +13 -11
- data/README.md +1 -1
- data/doc/Array.html +205 -0
- data/doc/Configuration.html +296 -0
- data/doc/Extcite.html +1122 -0
- data/doc/Hash.html +380 -0
- data/doc/String.html +289 -0
- data/doc/Textminer.html +601 -0
- data/doc/Textminer/Fetch.html +447 -0
- data/doc/Textminer/Mined.html +509 -0
- data/doc/Textminer/Miner.html +385 -0
- data/doc/Textminer/Request.html +669 -0
- data/doc/Textminer/Response.html +923 -0
- data/doc/_index.html +135 -0
- data/doc/class_list.html +51 -0
- data/doc/css/common.css +1 -0
- data/doc/css/full_list.css +58 -0
- data/doc/css/style.css +496 -0
- data/doc/file.README.html +139 -0
- data/doc/file_list.html +56 -0
- data/doc/frames.html +17 -0
- data/doc/index.html +139 -0
- data/doc/js/app.js +314 -0
- data/doc/js/full_list.js +216 -0
- data/doc/js/jquery.js +4 -0
- data/doc/method_list.html +155 -0
- data/doc/top-level-namespace.html +397 -0
- data/extcite.gemspec +14 -4
- data/lib/extcite.rb +39 -24
- data/lib/extcite/version.rb +1 -1
- metadata +65 -9
data/extcite.gemspec
CHANGED
@@ -6,7 +6,7 @@ require 'extcite/version'
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = 'extcite'
|
8
8
|
s.version = Extcite::VERSION
|
9
|
-
s.date = '2020-04-
|
9
|
+
s.date = '2020-04-15'
|
10
10
|
s.summary = "Citations from PDFs"
|
11
11
|
s.description = "Gets DOIS and generates citations for your papers"
|
12
12
|
s.authors = "Scott Chamberlain"
|
@@ -26,12 +26,22 @@ Gem::Specification.new do |s|
|
|
26
26
|
s.add_development_dependency 'simplecov', '~> 0.18.0'
|
27
27
|
s.add_development_dependency 'codecov', '~> 0.1.16'
|
28
28
|
|
29
|
+
s.add_runtime_dependency 'timeout', '~> 0.1.0'
|
29
30
|
s.add_runtime_dependency 'faraday', '>= 0.15', '< 1.1'
|
30
|
-
s.add_runtime_dependency 'faraday_middleware', '>= 0.12.2', '<
|
31
|
-
s.add_runtime_dependency 'thor', '
|
31
|
+
s.add_runtime_dependency 'faraday_middleware', '>= 0.12.2', '< 1.1.0'
|
32
|
+
s.add_runtime_dependency 'thor', '>= 0.20.3', '< 1.1.0'
|
32
33
|
s.add_runtime_dependency 'parallel', '~> 1.19'
|
33
34
|
s.add_runtime_dependency 'oga', '>= 2.15', '< 4.0'
|
34
|
-
s.add_runtime_dependency 'serrano', '
|
35
|
+
s.add_runtime_dependency 'serrano', '>= 0.5.2', '< 0.7.0'
|
35
36
|
s.add_runtime_dependency 'bibtex-ruby', '~> 5.0', '>= 5.0.1'
|
36
37
|
s.add_runtime_dependency 'pdf-reader', '~> 2.4'
|
38
|
+
|
39
|
+
s.metadata = {
|
40
|
+
'homepage_uri' => 'https://github.com/sckott/extcite',
|
41
|
+
'changelog_uri' =>
|
42
|
+
"https://github.com/sckott/extcite/releases/tag/v#{s.version}",
|
43
|
+
'source_code_uri' => 'https://github.com/sckott/extcite',
|
44
|
+
'bug_tracker_uri' => 'https://github.com/sckott/extcite/issues',
|
45
|
+
'documentation_uri' => 'https://xenodochial-stonebraker-ad2732.netlify.app'
|
46
|
+
}
|
37
47
|
end
|
data/lib/extcite.rb
CHANGED
@@ -7,7 +7,7 @@ require "extcite/version"
|
|
7
7
|
require 'serrano'
|
8
8
|
require 'pdf-reader'
|
9
9
|
require 'faraday'
|
10
|
-
|
10
|
+
require 'timeout'
|
11
11
|
|
12
12
|
|
13
13
|
module Extcite
|
@@ -142,36 +142,43 @@ module Extcite
|
|
142
142
|
rr = PDF::Reader.new(x)
|
143
143
|
pdfmeta = rr.metadata
|
144
144
|
if !pdfmeta.nil?
|
145
|
-
xml = Oga.parse_xml(pdfmeta);
|
146
145
|
begin
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
146
|
+
xml = Oga.parse_xml(pdfmeta);
|
147
|
+
rescue Exception => e
|
148
|
+
xml = nil
|
149
|
+
end
|
150
|
+
|
151
|
+
if !xml.nil?
|
152
|
+
begin
|
153
|
+
tt = xml.xpath('//rdf:Description')
|
154
|
+
# try dc:identifier attribute
|
155
|
+
ss = tt.attr('dc:identifier')[0]
|
156
|
+
if !ss.nil?
|
157
|
+
ids = ss.text.sub(/doi:/, '')
|
157
158
|
else
|
158
|
-
# try
|
159
|
-
|
160
|
-
if
|
161
|
-
ids =
|
159
|
+
# try prism:doi node
|
160
|
+
pdoi = xml.xpath('//rdf:Description//prism:doi')
|
161
|
+
if pdoi.length == 1
|
162
|
+
ids = pdoi.text
|
162
163
|
else
|
163
|
-
# try
|
164
|
-
|
165
|
-
if
|
166
|
-
ids =
|
164
|
+
# try pdf:WPS-ARTICLEDOI node
|
165
|
+
wpsdoi = xml.xpath('//rdf:Description//pdf:WPS-ARTICLEDOI')
|
166
|
+
if wpsdoi.length == 1
|
167
|
+
ids = wpsdoi.text
|
167
168
|
else
|
168
|
-
|
169
|
+
# try pdfx:WPS-ARTICLEDOI node
|
170
|
+
pdfxwpsdoi = xml.xpath('//rdf:Description//pdfx:WPS-ARTICLEDOI')
|
171
|
+
if pdfxwpsdoi.length == 1
|
172
|
+
ids = pdfxwpsdoi.text
|
173
|
+
else
|
174
|
+
ids = nil
|
175
|
+
end
|
169
176
|
end
|
170
177
|
end
|
171
178
|
end
|
179
|
+
rescue
|
180
|
+
ids = nil
|
172
181
|
end
|
173
|
-
rescue
|
174
|
-
ids = nil
|
175
182
|
end
|
176
183
|
end
|
177
184
|
|
@@ -295,7 +302,15 @@ module Extcite
|
|
295
302
|
|
296
303
|
def self.extract_text_one(x)
|
297
304
|
rr = PDF::Reader.new(x)
|
298
|
-
return rr.pages.map { |page|
|
305
|
+
return rr.pages.map { |page|
|
306
|
+
begin
|
307
|
+
Timeout.timeout(1) do
|
308
|
+
page.text
|
309
|
+
end
|
310
|
+
rescue Timeout::Error
|
311
|
+
next
|
312
|
+
end
|
313
|
+
}.join("\n")
|
299
314
|
end
|
300
315
|
|
301
316
|
end
|
data/lib/extcite/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: extcite
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Scott Chamberlain
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-04-
|
11
|
+
date: 2020-04-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -98,6 +98,20 @@ dependencies:
|
|
98
98
|
- - "~>"
|
99
99
|
- !ruby/object:Gem::Version
|
100
100
|
version: 0.1.16
|
101
|
+
- !ruby/object:Gem::Dependency
|
102
|
+
name: timeout
|
103
|
+
requirement: !ruby/object:Gem::Requirement
|
104
|
+
requirements:
|
105
|
+
- - "~>"
|
106
|
+
- !ruby/object:Gem::Version
|
107
|
+
version: 0.1.0
|
108
|
+
type: :runtime
|
109
|
+
prerelease: false
|
110
|
+
version_requirements: !ruby/object:Gem::Requirement
|
111
|
+
requirements:
|
112
|
+
- - "~>"
|
113
|
+
- !ruby/object:Gem::Version
|
114
|
+
version: 0.1.0
|
101
115
|
- !ruby/object:Gem::Dependency
|
102
116
|
name: faraday
|
103
117
|
requirement: !ruby/object:Gem::Requirement
|
@@ -127,7 +141,7 @@ dependencies:
|
|
127
141
|
version: 0.12.2
|
128
142
|
- - "<"
|
129
143
|
- !ruby/object:Gem::Version
|
130
|
-
version:
|
144
|
+
version: 1.1.0
|
131
145
|
type: :runtime
|
132
146
|
prerelease: false
|
133
147
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -137,21 +151,27 @@ dependencies:
|
|
137
151
|
version: 0.12.2
|
138
152
|
- - "<"
|
139
153
|
- !ruby/object:Gem::Version
|
140
|
-
version:
|
154
|
+
version: 1.1.0
|
141
155
|
- !ruby/object:Gem::Dependency
|
142
156
|
name: thor
|
143
157
|
requirement: !ruby/object:Gem::Requirement
|
144
158
|
requirements:
|
145
|
-
- - "
|
159
|
+
- - ">="
|
146
160
|
- !ruby/object:Gem::Version
|
147
161
|
version: 0.20.3
|
162
|
+
- - "<"
|
163
|
+
- !ruby/object:Gem::Version
|
164
|
+
version: 1.1.0
|
148
165
|
type: :runtime
|
149
166
|
prerelease: false
|
150
167
|
version_requirements: !ruby/object:Gem::Requirement
|
151
168
|
requirements:
|
152
|
-
- - "
|
169
|
+
- - ">="
|
153
170
|
- !ruby/object:Gem::Version
|
154
171
|
version: 0.20.3
|
172
|
+
- - "<"
|
173
|
+
- !ruby/object:Gem::Version
|
174
|
+
version: 1.1.0
|
155
175
|
- !ruby/object:Gem::Dependency
|
156
176
|
name: parallel
|
157
177
|
requirement: !ruby/object:Gem::Requirement
|
@@ -190,16 +210,22 @@ dependencies:
|
|
190
210
|
name: serrano
|
191
211
|
requirement: !ruby/object:Gem::Requirement
|
192
212
|
requirements:
|
193
|
-
- - "
|
213
|
+
- - ">="
|
194
214
|
- !ruby/object:Gem::Version
|
195
215
|
version: 0.5.2
|
216
|
+
- - "<"
|
217
|
+
- !ruby/object:Gem::Version
|
218
|
+
version: 0.7.0
|
196
219
|
type: :runtime
|
197
220
|
prerelease: false
|
198
221
|
version_requirements: !ruby/object:Gem::Requirement
|
199
222
|
requirements:
|
200
|
-
- - "
|
223
|
+
- - ">="
|
201
224
|
- !ruby/object:Gem::Version
|
202
225
|
version: 0.5.2
|
226
|
+
- - "<"
|
227
|
+
- !ruby/object:Gem::Version
|
228
|
+
version: 0.7.0
|
203
229
|
- !ruby/object:Gem::Dependency
|
204
230
|
name: bibtex-ruby
|
205
231
|
requirement: !ruby/object:Gem::Requirement
|
@@ -250,6 +276,31 @@ files:
|
|
250
276
|
- README.md
|
251
277
|
- Rakefile
|
252
278
|
- bin/extcite
|
279
|
+
- doc/Array.html
|
280
|
+
- doc/Configuration.html
|
281
|
+
- doc/Extcite.html
|
282
|
+
- doc/Hash.html
|
283
|
+
- doc/String.html
|
284
|
+
- doc/Textminer.html
|
285
|
+
- doc/Textminer/Fetch.html
|
286
|
+
- doc/Textminer/Mined.html
|
287
|
+
- doc/Textminer/Miner.html
|
288
|
+
- doc/Textminer/Request.html
|
289
|
+
- doc/Textminer/Response.html
|
290
|
+
- doc/_index.html
|
291
|
+
- doc/class_list.html
|
292
|
+
- doc/css/common.css
|
293
|
+
- doc/css/full_list.css
|
294
|
+
- doc/css/style.css
|
295
|
+
- doc/file.README.html
|
296
|
+
- doc/file_list.html
|
297
|
+
- doc/frames.html
|
298
|
+
- doc/index.html
|
299
|
+
- doc/js/app.js
|
300
|
+
- doc/js/full_list.js
|
301
|
+
- doc/js/jquery.js
|
302
|
+
- doc/method_list.html
|
303
|
+
- doc/top-level-namespace.html
|
253
304
|
- extcite.gemspec
|
254
305
|
- extra/fetch.rb
|
255
306
|
- extra/fetch_method.rb
|
@@ -261,7 +312,12 @@ files:
|
|
261
312
|
homepage: https://github.com/sckott/extcite
|
262
313
|
licenses:
|
263
314
|
- MIT
|
264
|
-
metadata:
|
315
|
+
metadata:
|
316
|
+
homepage_uri: https://github.com/sckott/extcite
|
317
|
+
changelog_uri: https://github.com/sckott/extcite/releases/tag/v0.4.0
|
318
|
+
source_code_uri: https://github.com/sckott/extcite
|
319
|
+
bug_tracker_uri: https://github.com/sckott/extcite/issues
|
320
|
+
documentation_uri: https://xenodochial-stonebraker-ad2732.netlify.app
|
265
321
|
post_install_message:
|
266
322
|
rdoc_options: []
|
267
323
|
require_paths:
|