extcite 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +0 -1
- data/CHANGELOG.md +9 -0
- data/Gemfile.lock +13 -11
- data/README.md +1 -1
- data/doc/Array.html +205 -0
- data/doc/Configuration.html +296 -0
- data/doc/Extcite.html +1122 -0
- data/doc/Hash.html +380 -0
- data/doc/String.html +289 -0
- data/doc/Textminer.html +601 -0
- data/doc/Textminer/Fetch.html +447 -0
- data/doc/Textminer/Mined.html +509 -0
- data/doc/Textminer/Miner.html +385 -0
- data/doc/Textminer/Request.html +669 -0
- data/doc/Textminer/Response.html +923 -0
- data/doc/_index.html +135 -0
- data/doc/class_list.html +51 -0
- data/doc/css/common.css +1 -0
- data/doc/css/full_list.css +58 -0
- data/doc/css/style.css +496 -0
- data/doc/file.README.html +139 -0
- data/doc/file_list.html +56 -0
- data/doc/frames.html +17 -0
- data/doc/index.html +139 -0
- data/doc/js/app.js +314 -0
- data/doc/js/full_list.js +216 -0
- data/doc/js/jquery.js +4 -0
- data/doc/method_list.html +155 -0
- data/doc/top-level-namespace.html +397 -0
- data/extcite.gemspec +14 -4
- data/lib/extcite.rb +39 -24
- data/lib/extcite/version.rb +1 -1
- metadata +65 -9
data/extcite.gemspec
CHANGED
@@ -6,7 +6,7 @@ require 'extcite/version'
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = 'extcite'
|
8
8
|
s.version = Extcite::VERSION
|
9
|
-
s.date = '2020-04-
|
9
|
+
s.date = '2020-04-15'
|
10
10
|
s.summary = "Citations from PDFs"
|
11
11
|
s.description = "Gets DOIS and generates citations for your papers"
|
12
12
|
s.authors = "Scott Chamberlain"
|
@@ -26,12 +26,22 @@ Gem::Specification.new do |s|
|
|
26
26
|
s.add_development_dependency 'simplecov', '~> 0.18.0'
|
27
27
|
s.add_development_dependency 'codecov', '~> 0.1.16'
|
28
28
|
|
29
|
+
s.add_runtime_dependency 'timeout', '~> 0.1.0'
|
29
30
|
s.add_runtime_dependency 'faraday', '>= 0.15', '< 1.1'
|
30
|
-
s.add_runtime_dependency 'faraday_middleware', '>= 0.12.2', '<
|
31
|
-
s.add_runtime_dependency 'thor', '
|
31
|
+
s.add_runtime_dependency 'faraday_middleware', '>= 0.12.2', '< 1.1.0'
|
32
|
+
s.add_runtime_dependency 'thor', '>= 0.20.3', '< 1.1.0'
|
32
33
|
s.add_runtime_dependency 'parallel', '~> 1.19'
|
33
34
|
s.add_runtime_dependency 'oga', '>= 2.15', '< 4.0'
|
34
|
-
s.add_runtime_dependency 'serrano', '
|
35
|
+
s.add_runtime_dependency 'serrano', '>= 0.5.2', '< 0.7.0'
|
35
36
|
s.add_runtime_dependency 'bibtex-ruby', '~> 5.0', '>= 5.0.1'
|
36
37
|
s.add_runtime_dependency 'pdf-reader', '~> 2.4'
|
38
|
+
|
39
|
+
s.metadata = {
|
40
|
+
'homepage_uri' => 'https://github.com/sckott/extcite',
|
41
|
+
'changelog_uri' =>
|
42
|
+
"https://github.com/sckott/extcite/releases/tag/v#{s.version}",
|
43
|
+
'source_code_uri' => 'https://github.com/sckott/extcite',
|
44
|
+
'bug_tracker_uri' => 'https://github.com/sckott/extcite/issues',
|
45
|
+
'documentation_uri' => 'https://xenodochial-stonebraker-ad2732.netlify.app'
|
46
|
+
}
|
37
47
|
end
|
data/lib/extcite.rb
CHANGED
@@ -7,7 +7,7 @@ require "extcite/version"
|
|
7
7
|
require 'serrano'
|
8
8
|
require 'pdf-reader'
|
9
9
|
require 'faraday'
|
10
|
-
|
10
|
+
require 'timeout'
|
11
11
|
|
12
12
|
|
13
13
|
module Extcite
|
@@ -142,36 +142,43 @@ module Extcite
|
|
142
142
|
rr = PDF::Reader.new(x)
|
143
143
|
pdfmeta = rr.metadata
|
144
144
|
if !pdfmeta.nil?
|
145
|
-
xml = Oga.parse_xml(pdfmeta);
|
146
145
|
begin
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
146
|
+
xml = Oga.parse_xml(pdfmeta);
|
147
|
+
rescue Exception => e
|
148
|
+
xml = nil
|
149
|
+
end
|
150
|
+
|
151
|
+
if !xml.nil?
|
152
|
+
begin
|
153
|
+
tt = xml.xpath('//rdf:Description')
|
154
|
+
# try dc:identifier attribute
|
155
|
+
ss = tt.attr('dc:identifier')[0]
|
156
|
+
if !ss.nil?
|
157
|
+
ids = ss.text.sub(/doi:/, '')
|
157
158
|
else
|
158
|
-
# try
|
159
|
-
|
160
|
-
if
|
161
|
-
ids =
|
159
|
+
# try prism:doi node
|
160
|
+
pdoi = xml.xpath('//rdf:Description//prism:doi')
|
161
|
+
if pdoi.length == 1
|
162
|
+
ids = pdoi.text
|
162
163
|
else
|
163
|
-
# try
|
164
|
-
|
165
|
-
if
|
166
|
-
ids =
|
164
|
+
# try pdf:WPS-ARTICLEDOI node
|
165
|
+
wpsdoi = xml.xpath('//rdf:Description//pdf:WPS-ARTICLEDOI')
|
166
|
+
if wpsdoi.length == 1
|
167
|
+
ids = wpsdoi.text
|
167
168
|
else
|
168
|
-
|
169
|
+
# try pdfx:WPS-ARTICLEDOI node
|
170
|
+
pdfxwpsdoi = xml.xpath('//rdf:Description//pdfx:WPS-ARTICLEDOI')
|
171
|
+
if pdfxwpsdoi.length == 1
|
172
|
+
ids = pdfxwpsdoi.text
|
173
|
+
else
|
174
|
+
ids = nil
|
175
|
+
end
|
169
176
|
end
|
170
177
|
end
|
171
178
|
end
|
179
|
+
rescue
|
180
|
+
ids = nil
|
172
181
|
end
|
173
|
-
rescue
|
174
|
-
ids = nil
|
175
182
|
end
|
176
183
|
end
|
177
184
|
|
@@ -295,7 +302,15 @@ module Extcite
|
|
295
302
|
|
296
303
|
def self.extract_text_one(x)
|
297
304
|
rr = PDF::Reader.new(x)
|
298
|
-
return rr.pages.map { |page|
|
305
|
+
return rr.pages.map { |page|
|
306
|
+
begin
|
307
|
+
Timeout.timeout(1) do
|
308
|
+
page.text
|
309
|
+
end
|
310
|
+
rescue Timeout::Error
|
311
|
+
next
|
312
|
+
end
|
313
|
+
}.join("\n")
|
299
314
|
end
|
300
315
|
|
301
316
|
end
|
data/lib/extcite/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: extcite
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Scott Chamberlain
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-04-
|
11
|
+
date: 2020-04-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -98,6 +98,20 @@ dependencies:
|
|
98
98
|
- - "~>"
|
99
99
|
- !ruby/object:Gem::Version
|
100
100
|
version: 0.1.16
|
101
|
+
- !ruby/object:Gem::Dependency
|
102
|
+
name: timeout
|
103
|
+
requirement: !ruby/object:Gem::Requirement
|
104
|
+
requirements:
|
105
|
+
- - "~>"
|
106
|
+
- !ruby/object:Gem::Version
|
107
|
+
version: 0.1.0
|
108
|
+
type: :runtime
|
109
|
+
prerelease: false
|
110
|
+
version_requirements: !ruby/object:Gem::Requirement
|
111
|
+
requirements:
|
112
|
+
- - "~>"
|
113
|
+
- !ruby/object:Gem::Version
|
114
|
+
version: 0.1.0
|
101
115
|
- !ruby/object:Gem::Dependency
|
102
116
|
name: faraday
|
103
117
|
requirement: !ruby/object:Gem::Requirement
|
@@ -127,7 +141,7 @@ dependencies:
|
|
127
141
|
version: 0.12.2
|
128
142
|
- - "<"
|
129
143
|
- !ruby/object:Gem::Version
|
130
|
-
version:
|
144
|
+
version: 1.1.0
|
131
145
|
type: :runtime
|
132
146
|
prerelease: false
|
133
147
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -137,21 +151,27 @@ dependencies:
|
|
137
151
|
version: 0.12.2
|
138
152
|
- - "<"
|
139
153
|
- !ruby/object:Gem::Version
|
140
|
-
version:
|
154
|
+
version: 1.1.0
|
141
155
|
- !ruby/object:Gem::Dependency
|
142
156
|
name: thor
|
143
157
|
requirement: !ruby/object:Gem::Requirement
|
144
158
|
requirements:
|
145
|
-
- - "
|
159
|
+
- - ">="
|
146
160
|
- !ruby/object:Gem::Version
|
147
161
|
version: 0.20.3
|
162
|
+
- - "<"
|
163
|
+
- !ruby/object:Gem::Version
|
164
|
+
version: 1.1.0
|
148
165
|
type: :runtime
|
149
166
|
prerelease: false
|
150
167
|
version_requirements: !ruby/object:Gem::Requirement
|
151
168
|
requirements:
|
152
|
-
- - "
|
169
|
+
- - ">="
|
153
170
|
- !ruby/object:Gem::Version
|
154
171
|
version: 0.20.3
|
172
|
+
- - "<"
|
173
|
+
- !ruby/object:Gem::Version
|
174
|
+
version: 1.1.0
|
155
175
|
- !ruby/object:Gem::Dependency
|
156
176
|
name: parallel
|
157
177
|
requirement: !ruby/object:Gem::Requirement
|
@@ -190,16 +210,22 @@ dependencies:
|
|
190
210
|
name: serrano
|
191
211
|
requirement: !ruby/object:Gem::Requirement
|
192
212
|
requirements:
|
193
|
-
- - "
|
213
|
+
- - ">="
|
194
214
|
- !ruby/object:Gem::Version
|
195
215
|
version: 0.5.2
|
216
|
+
- - "<"
|
217
|
+
- !ruby/object:Gem::Version
|
218
|
+
version: 0.7.0
|
196
219
|
type: :runtime
|
197
220
|
prerelease: false
|
198
221
|
version_requirements: !ruby/object:Gem::Requirement
|
199
222
|
requirements:
|
200
|
-
- - "
|
223
|
+
- - ">="
|
201
224
|
- !ruby/object:Gem::Version
|
202
225
|
version: 0.5.2
|
226
|
+
- - "<"
|
227
|
+
- !ruby/object:Gem::Version
|
228
|
+
version: 0.7.0
|
203
229
|
- !ruby/object:Gem::Dependency
|
204
230
|
name: bibtex-ruby
|
205
231
|
requirement: !ruby/object:Gem::Requirement
|
@@ -250,6 +276,31 @@ files:
|
|
250
276
|
- README.md
|
251
277
|
- Rakefile
|
252
278
|
- bin/extcite
|
279
|
+
- doc/Array.html
|
280
|
+
- doc/Configuration.html
|
281
|
+
- doc/Extcite.html
|
282
|
+
- doc/Hash.html
|
283
|
+
- doc/String.html
|
284
|
+
- doc/Textminer.html
|
285
|
+
- doc/Textminer/Fetch.html
|
286
|
+
- doc/Textminer/Mined.html
|
287
|
+
- doc/Textminer/Miner.html
|
288
|
+
- doc/Textminer/Request.html
|
289
|
+
- doc/Textminer/Response.html
|
290
|
+
- doc/_index.html
|
291
|
+
- doc/class_list.html
|
292
|
+
- doc/css/common.css
|
293
|
+
- doc/css/full_list.css
|
294
|
+
- doc/css/style.css
|
295
|
+
- doc/file.README.html
|
296
|
+
- doc/file_list.html
|
297
|
+
- doc/frames.html
|
298
|
+
- doc/index.html
|
299
|
+
- doc/js/app.js
|
300
|
+
- doc/js/full_list.js
|
301
|
+
- doc/js/jquery.js
|
302
|
+
- doc/method_list.html
|
303
|
+
- doc/top-level-namespace.html
|
253
304
|
- extcite.gemspec
|
254
305
|
- extra/fetch.rb
|
255
306
|
- extra/fetch_method.rb
|
@@ -261,7 +312,12 @@ files:
|
|
261
312
|
homepage: https://github.com/sckott/extcite
|
262
313
|
licenses:
|
263
314
|
- MIT
|
264
|
-
metadata:
|
315
|
+
metadata:
|
316
|
+
homepage_uri: https://github.com/sckott/extcite
|
317
|
+
changelog_uri: https://github.com/sckott/extcite/releases/tag/v0.4.0
|
318
|
+
source_code_uri: https://github.com/sckott/extcite
|
319
|
+
bug_tracker_uri: https://github.com/sckott/extcite/issues
|
320
|
+
documentation_uri: https://xenodochial-stonebraker-ad2732.netlify.app
|
265
321
|
post_install_message:
|
266
322
|
rdoc_options: []
|
267
323
|
require_paths:
|