serrano 0.1.1 → 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/Gemfile.lock +1 -7
- data/README.md +48 -8
- data/Rakefile +2 -2
- data/bin/serrano +37 -3
- data/lib/serrano/link_methods_array.rb +51 -0
- data/lib/serrano/link_methods_hash.rb +46 -0
- data/lib/serrano/styles.rb +23 -0
- data/lib/serrano/version.rb +1 -1
- data/lib/serrano.rb +10 -57
- data/serrano.gemspec +1 -3
- metadata +5 -45
- data/lib/serrano/mine_utils.rb +0 -65
- data/lib/serrano/mined.rb +0 -31
- data/lib/serrano/miner.rb +0 -42
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ecc2b22a17fb2e658070f687b18232a534a33f7a
|
4
|
+
data.tar.gz: c24446bd652f48cd806319914a4ce15dd3b20565
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2542e1fe55dcaac00e62afb198f59ada4691cd44f347d2364528d282cdda1768cb303495cc5d2e1e82b22e7a281a846fef2b3185129e14c391f1f43de75e6c6a
|
7
|
+
data.tar.gz: 0ed3050c3ab2c7ef709902893bc4c334996f248b72d43d50a6228323664f1c92a3083a1edba6b2bca87417e00e8dba034e2acff20290627b7c441b97c53b614a
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,11 @@
|
|
1
|
+
## 0.1.4 (2015-12-04)
|
2
|
+
|
3
|
+
* Added `csl_styles()` method to get CSL styles info (#23)
|
4
|
+
* note to docs that `sample` parameter is ignored unless `works` route used (#22)
|
5
|
+
* note to docs that `funderes` without IDs don't show up in the `funders` route (#21)
|
6
|
+
* Added hash and array method to extract links from output of any methods with works (#18)
|
7
|
+
* Method `Serrano.text` for text mining removed. use the `textminer` gem (#13)
|
8
|
+
|
1
9
|
## 0.1.0 (2015-11-17)
|
2
10
|
|
3
11
|
* Improved documentation
|
data/Gemfile.lock
CHANGED
@@ -1,13 +1,11 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
serrano (0.1.
|
4
|
+
serrano (0.1.4)
|
5
5
|
faraday (~> 0.9.1)
|
6
6
|
faraday_middleware (~> 0.10.0)
|
7
7
|
multi_json (~> 1.0)
|
8
|
-
nokogiri (~> 1.6, >= 1.6.6.2)
|
9
8
|
thor (~> 0.19)
|
10
|
-
uuidtools (~> 2.1, >= 2.1.5)
|
11
9
|
|
12
10
|
GEM
|
13
11
|
remote: https://rubygems.org/
|
@@ -22,11 +20,8 @@ GEM
|
|
22
20
|
faraday_middleware (0.10.0)
|
23
21
|
faraday (>= 0.7.4, < 0.10)
|
24
22
|
json (1.8.3)
|
25
|
-
mini_portile (0.6.2)
|
26
23
|
multi_json (1.11.2)
|
27
24
|
multipart-post (2.0.0)
|
28
|
-
nokogiri (1.6.6.2)
|
29
|
-
mini_portile (~> 0.6.0)
|
30
25
|
power_assert (0.2.4)
|
31
26
|
rake (10.4.2)
|
32
27
|
simplecov (0.10.0)
|
@@ -38,7 +33,6 @@ GEM
|
|
38
33
|
power_assert
|
39
34
|
thor (0.19.1)
|
40
35
|
url (0.3.2)
|
41
|
-
uuidtools (2.1.5)
|
42
36
|
|
43
37
|
PLATFORMS
|
44
38
|
ruby
|
data/README.md
CHANGED
@@ -16,7 +16,7 @@ Other Crossref API clients:
|
|
16
16
|
|
17
17
|
## Changes
|
18
18
|
|
19
|
-
For changes see the [Changelog]
|
19
|
+
For changes see the [Changelog][changelog]
|
20
20
|
|
21
21
|
## API
|
22
22
|
|
@@ -38,7 +38,6 @@ Additional methods built on top of the Crossref search API:
|
|
38
38
|
Other methods:
|
39
39
|
|
40
40
|
* [Conent negotiation][cn] - `Serrano.content_negotiation()`
|
41
|
-
* [Text and data mining][tdm] - `Serrano.text()`
|
42
41
|
* [Citation count][ccount] - `Serrano.citation_count()`
|
43
42
|
* [get CSL styles][csl] - `Serrano.csl_styles()`
|
44
43
|
|
@@ -60,7 +59,7 @@ rake install
|
|
60
59
|
|
61
60
|
## Setup
|
62
61
|
|
63
|
-
Crossref's API will likely be used by others in the future, allowing the base URL to be swapped out. You can swap out the base URL by passing named options in a block to `Serrano.configuration`.
|
62
|
+
Crossref's API will likely be used by others in the future, allowing the base URL to be swapped out. You can swap out the base URL by passing named options in a block to `Serrano.configuration`.
|
64
63
|
|
65
64
|
This will also be the way to set up other user options, as needed down the road.
|
66
65
|
|
@@ -72,11 +71,13 @@ end
|
|
72
71
|
|
73
72
|
## Examples
|
74
73
|
|
74
|
+
### Use in a Ruby repl
|
75
|
+
|
75
76
|
Search works by DOI
|
76
77
|
|
77
78
|
```ruby
|
78
79
|
require 'serrano'
|
79
|
-
Serrano.works(
|
80
|
+
Serrano.works(ids: '10.1371/journal.pone.0033693')
|
80
81
|
```
|
81
82
|
|
82
83
|
Search works by query string
|
@@ -85,6 +86,20 @@ Search works by query string
|
|
85
86
|
Serrano.works(query: "ecology")
|
86
87
|
```
|
87
88
|
|
89
|
+
Get links
|
90
|
+
|
91
|
+
```ruby
|
92
|
+
res = Serrano.works(filter: {has_full_text: true})
|
93
|
+
# entire links metadata
|
94
|
+
res.links
|
95
|
+
# just links URLs
|
96
|
+
res.links(true)
|
97
|
+
# just xml links, if present
|
98
|
+
res.links_xml(true)
|
99
|
+
# just pdf links, if present
|
100
|
+
res.links_pdf
|
101
|
+
```
|
102
|
+
|
88
103
|
Search journals by publisher name
|
89
104
|
|
90
105
|
```ruby
|
@@ -112,13 +127,37 @@ Serrano.random_dois(sample: 100)
|
|
112
127
|
Content negotiation
|
113
128
|
|
114
129
|
```ruby
|
115
|
-
Serrano.
|
130
|
+
Serrano.content_negotiation(ids: '10.1126/science.169.3946.635', format: "citeproc-json")
|
116
131
|
```
|
117
132
|
|
118
|
-
|
133
|
+
### Use on the CLI
|
119
134
|
|
120
|
-
|
121
|
-
|
135
|
+
The command line tool `serrano` should be available after you install
|
136
|
+
|
137
|
+
```
|
138
|
+
~$ serrano
|
139
|
+
Commands:
|
140
|
+
serrano contneg # Content negotiation
|
141
|
+
serrano funders [funder IDs] # Search for funders by DOI prefix
|
142
|
+
serrano help [COMMAND] # Describe available commands or one spec...
|
143
|
+
serrano journals [journal ISSNs] # Search for journals by ISSNs
|
144
|
+
serrano licenses # Search for licenses by name
|
145
|
+
serrano members [member IDs] # Get members by id
|
146
|
+
serrano prefixes [DOI prefixes] # Search for prefixes by DOI prefix
|
147
|
+
serrano types [type name] # Search for types by name
|
148
|
+
serrano version # Get serrano version
|
149
|
+
serrano works [DOIs] # Get works by DOIs
|
150
|
+
```
|
151
|
+
|
152
|
+
```
|
153
|
+
# A single DOI
|
154
|
+
~$ serrano works 10.1371/journal.pone.0033693
|
155
|
+
|
156
|
+
# Many DOIs
|
157
|
+
~$ serrano works "10.1007/12080.1874-1746,10.1007/10452.1573-5125"
|
158
|
+
|
159
|
+
# output JSON, then parse with e.g., jq
|
160
|
+
~$ serrano works --filter=has_orcid:true --json --limit=2 | jq '.message.items[].author[].ORCID | select(. != null)'
|
122
161
|
```
|
123
162
|
|
124
163
|
## Meta
|
@@ -131,3 +170,4 @@ res = Serrano.text(url: 'http://...');
|
|
131
170
|
[tdm]: http://www.crossref.org/tdm/
|
132
171
|
[ccount]: http://labs.crossref.org/openurl/
|
133
172
|
[csl]: https://github.com/citation-style-language/styles
|
173
|
+
[changelog]: https://github.com/sckott/serrano/blob/master/CHANGELOG.md
|
data/Rakefile
CHANGED
@@ -16,7 +16,7 @@ task :docs do
|
|
16
16
|
end
|
17
17
|
|
18
18
|
desc "bundle install"
|
19
|
-
task :
|
19
|
+
task :bundle do
|
20
20
|
system "bundle install"
|
21
21
|
end
|
22
22
|
|
@@ -31,7 +31,7 @@ task :build do
|
|
31
31
|
end
|
32
32
|
|
33
33
|
desc "Install serrano"
|
34
|
-
task :install => :build do
|
34
|
+
task :install => [:bundle, :build] do
|
35
35
|
system "gem install serrano-#{Serrano::VERSION}.gem"
|
36
36
|
end
|
37
37
|
|
data/bin/serrano
CHANGED
@@ -38,10 +38,24 @@ class Sr < Thor
|
|
38
38
|
"http://id.crossref.org/member/340"
|
39
39
|
\x5"http://id.crossref.org/member/297"
|
40
40
|
\x5"http://id.crossref.org/member/297"
|
41
|
+
|
42
|
+
# Filter usage
|
43
|
+
\x5$ serrano works --filter=has_orcid:true --json --limit=2 | jq '.message.items[].author[].ORCID | select(. != null)'
|
44
|
+
|
45
|
+
"http://orcid.org/0000-0003-4087-8021"
|
46
|
+
\x5"http://orcid.org/0000-0002-2076-5452"
|
47
|
+
\x5"http://orcid.org/0000-0003-4087-8021"
|
48
|
+
\x5"http://orcid.org/0000-0002-2076-5452"
|
41
49
|
LONGDESC
|
42
50
|
option :json, :type => :boolean, :default => false
|
43
|
-
|
44
|
-
|
51
|
+
option :filter, :type => :hash, :default => nil
|
52
|
+
option :limit, :type => :numeric, :default => nil
|
53
|
+
def works(ids=nil)
|
54
|
+
if ids.nil?
|
55
|
+
out = Serrano.works(filter: options[:filter], limit: options[:limit])
|
56
|
+
else
|
57
|
+
out = Serrano.works(ids: ids.split(","), filter: options[:filter])
|
58
|
+
end
|
45
59
|
if !options[:json]
|
46
60
|
out = out.collect { |x| x['message'].select { |k,v| k[/DOI|type|title/] } }
|
47
61
|
out.each do |x|
|
@@ -397,7 +411,7 @@ class Sr < Thor
|
|
397
411
|
\x5name: U.S. Department of Energy
|
398
412
|
\x5location: United States
|
399
413
|
|
400
|
-
$ serrano licenses --json=true | jq .
|
414
|
+
$ serrano licenses --json=true | jq .message.items[]
|
401
415
|
|
402
416
|
"National Science Foundation"
|
403
417
|
\x5"U.S. Department of Energy"
|
@@ -418,6 +432,26 @@ class Sr < Thor
|
|
418
432
|
end
|
419
433
|
end
|
420
434
|
|
435
|
+
desc "contneg", "Content negotiation"
|
436
|
+
long_desc <<-LONGDESC
|
437
|
+
`serrano contneg` accepts a DOI
|
438
|
+
|
439
|
+
$ serrano contneg 10.1890/13-0590.1
|
440
|
+
|
441
|
+
Murtaugh, P. A. (2014). In defense of P values . Ecology, 95(3), 611–617. doi:10.1890/13-0590.1
|
442
|
+
|
443
|
+
$ serrano contneg 10.1890/13-0590.1 --style=heredity
|
444
|
+
|
445
|
+
Murtaugh PA (2014). In defense of P values . Ecology 95: 611–617.
|
446
|
+
LONGDESC
|
447
|
+
option :format, :type => :string, :default => "text"
|
448
|
+
option :style, :type => :string, :default => "apa"
|
449
|
+
option :locale, :type => :string, :default => "en-US"
|
450
|
+
def contneg(ids)
|
451
|
+
puts Serrano.content_negotiation(ids: ids, format: options[:format], style:
|
452
|
+
options[:style], locale: options[:locale])
|
453
|
+
end
|
454
|
+
|
421
455
|
desc "version", "Get serrano version"
|
422
456
|
def version
|
423
457
|
puts Serrano::VERSION
|
@@ -0,0 +1,51 @@
|
|
1
|
+
# Array methods
|
2
|
+
class Array
|
3
|
+
def links(just_urls = false)
|
4
|
+
return self.collect{ |x| x.links(just_urls) }.flatten
|
5
|
+
# if temp.length == 1
|
6
|
+
# return tmp[0]
|
7
|
+
# else
|
8
|
+
# return tmp
|
9
|
+
# end
|
10
|
+
# tmp = self.collect{ |x| x['message']['link'] }
|
11
|
+
# return parse_link(tmp, just_urls)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
class Array
|
16
|
+
def links_xml(just_urls = false)
|
17
|
+
return parse_link(self.collect { |z| z.links_xml }[0], just_urls)
|
18
|
+
# return parse_link(pull_link(self, '^application\/xml$|^text\/xml$'), just_urls)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
class Array
|
23
|
+
def links_pdf(just_urls = false)
|
24
|
+
return parse_link(self.collect { |z| z.links_pdf }[0], just_urls)
|
25
|
+
# return parse_link(pull_link(self, '^application\/pdf$'), just_urls)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
class Array
|
30
|
+
def links_plain(just_urls = false)
|
31
|
+
return parse_link(self.collect { |z| z.links_plain }[0], just_urls)
|
32
|
+
# return parse_link(pull_link(self, '^application\/plain$|^text\/plain$'), just_urls)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def pull_link(x, y)
|
37
|
+
return x.collect { |z| z.links_xml }[0]
|
38
|
+
# return x.collect { |z| z['message']['link'] }.compact.collect { |z| z.compact.select { |w| w['content-type'].match(/#{y}/) } }
|
39
|
+
end
|
40
|
+
|
41
|
+
def parse_link(x, just_urls)
|
42
|
+
if x.nil?
|
43
|
+
return x
|
44
|
+
else
|
45
|
+
if just_urls
|
46
|
+
return x.compact.collect { |z| z.collect{ |y| y['URL'] }}.flatten
|
47
|
+
else
|
48
|
+
return x
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
# Hash methods
|
2
|
+
class Hash
|
3
|
+
def links(just_urls = false)
|
4
|
+
if self['message']['items'].nil?
|
5
|
+
tmp = self['message']['link'].reject { |c| c.empty? }
|
6
|
+
else
|
7
|
+
tmp = self['message']['items'].collect { |x| x['link'] }.reject { |c| c.empty? }
|
8
|
+
end
|
9
|
+
|
10
|
+
return parse_links(tmp, just_urls)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
class Hash
|
15
|
+
def links_xml(just_urls = false)
|
16
|
+
return parse_links(pull_links(self, '^application\/xml$|^text\/xml$'), just_urls)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
class Hash
|
21
|
+
def links_pdf(just_urls = false)
|
22
|
+
return parse_links(pull_links(self, '^application\/pdf$'), just_urls)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
class Hash
|
27
|
+
def links_plain(just_urls = false)
|
28
|
+
return parse_links(pull_links(self, '^application\/plain$|^text\/plain$'), just_urls)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def pull_links(x, y)
|
33
|
+
return x['message']['items'].collect { |x| x['link'].select { |z| z['content-type'].match(/#{y}/) } }.reject { |c| c.empty? }
|
34
|
+
end
|
35
|
+
|
36
|
+
def parse_links(x, just_urls)
|
37
|
+
if x.empty?
|
38
|
+
return x
|
39
|
+
else
|
40
|
+
if just_urls
|
41
|
+
return x.collect { |x| x.collect { |z| z['URL'] }}.flatten
|
42
|
+
else
|
43
|
+
return x
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require "faraday"
|
2
|
+
require "multi_json"
|
3
|
+
|
4
|
+
def get_styles
|
5
|
+
base = "https://api.github.com/repos/citation-style-language/styles"
|
6
|
+
conn = Faraday.new(:url => base)
|
7
|
+
args = { per_page: 1 }
|
8
|
+
tt = conn.get 'commits', args
|
9
|
+
commres = MultiJson.load(tt.body)
|
10
|
+
sha = commres[0]['sha']
|
11
|
+
sty = conn.get "git/trees/" + sha
|
12
|
+
res = MultiJson.load(sty.body)
|
13
|
+
files = res['tree'].collect { |x| x['path'] }
|
14
|
+
matches = files.collect { |x|
|
15
|
+
if x.match('csl').nil?
|
16
|
+
nil
|
17
|
+
else
|
18
|
+
x.match('csl').string
|
19
|
+
end
|
20
|
+
}
|
21
|
+
csls = matches.compact
|
22
|
+
return csls.collect { |z| z.gsub('.csl', '') }
|
23
|
+
end
|
data/lib/serrano/version.rb
CHANGED
data/lib/serrano.rb
CHANGED
@@ -2,8 +2,10 @@ require "serrano/version"
|
|
2
2
|
require "serrano/request"
|
3
3
|
require "serrano/filterhandler"
|
4
4
|
require "serrano/cnrequest"
|
5
|
-
require "serrano/miner"
|
6
5
|
require "serrano/filters"
|
6
|
+
require "serrano/styles"
|
7
|
+
require "serrano/link_methods_hash"
|
8
|
+
require "serrano/link_methods_array"
|
7
9
|
|
8
10
|
require 'rexml/document'
|
9
11
|
require 'rexml/xpath'
|
@@ -12,7 +14,7 @@ require 'rexml/xpath'
|
|
12
14
|
# @param offset [Fixnum] Number of record to start at, from 1 to infinity.
|
13
15
|
# @param limit [Fixnum] Number of results to return. Not relavant when searching with specific dois. Default: 20. Max: 1000
|
14
16
|
# @param sample [Fixnum] Number of random results to return. when you use the sample parameter,
|
15
|
-
# the limit and offset parameters are ignored.
|
17
|
+
# the limit and offset parameters are ignored. This parameter only used when works requested.
|
16
18
|
# @param sort [String] Field to sort on, one of score, relevance,
|
17
19
|
# updated (date of most recent change to metadata. Currently the same as deposited),
|
18
20
|
# deposited (time of most recent deposit), indexed (time of most recent index), or
|
@@ -52,7 +54,6 @@ require 'rexml/xpath'
|
|
52
54
|
# Additional methods
|
53
55
|
# * `Serrano.agency` - test the registration agency for a DOI
|
54
56
|
# * `Serrano.content_negotiation` - Conent negotiation
|
55
|
-
# * `Serrano.text` - Text and data mining
|
56
57
|
# * `Serrano.citation_count` - Citation count
|
57
58
|
# * `Serrano.csl_styles` - get CSL styles
|
58
59
|
#
|
@@ -67,7 +68,6 @@ module Serrano
|
|
67
68
|
|
68
69
|
define_setting :access_token
|
69
70
|
define_setting :access_secret
|
70
|
-
define_setting :elsevier_key
|
71
71
|
define_setting :base_url, "http://api.crossref.org/"
|
72
72
|
|
73
73
|
##
|
@@ -86,6 +86,7 @@ module Serrano
|
|
86
86
|
# Serrano.works(ids: '10.5555/515151')
|
87
87
|
# Serrano.works(ids: '10.1371/journal.pone.0033693')
|
88
88
|
# Serrano.works(ids: ['10.1007/12080.1874-1746','10.1007/10452.1573-5125', '10.1111/(issn)1442-9993'])
|
89
|
+
# Serrano.works(ids: ["10.1016/0304-4009(81)90025-5", "10.1016/0304-4009(83)90036-0"])
|
89
90
|
# # query
|
90
91
|
# Serrano.works(query: "ecology")
|
91
92
|
# Serrano.works(query: "renear+-ontologies")
|
@@ -93,6 +94,9 @@ module Serrano
|
|
93
94
|
# Serrano.works(query: "ecology", sort: 'relevance', order: "asc")
|
94
95
|
# # Filters
|
95
96
|
# Serrano.works(filter: {has_full_text: true})
|
97
|
+
# res = Serrano.works(filter: {has_full_text: true})
|
98
|
+
# res.links # entire links metadata
|
99
|
+
# res.links(true) # just links URLs
|
96
100
|
# Serrano.works(filter: {has_funder: true, has_full_text: true})
|
97
101
|
# Serrano.works(filter: {award_number: 'CBET-0756451', award_funder: '10.13039/100000001'})
|
98
102
|
#
|
@@ -181,6 +185,8 @@ module Serrano
|
|
181
185
|
# @param works [Boolean] If true, works returned as well. Default: false
|
182
186
|
# @return [Array] An array of hashes
|
183
187
|
#
|
188
|
+
# @note Funders without IDs don't show up on the /funders route
|
189
|
+
#
|
184
190
|
# @example
|
185
191
|
# require 'serrano'
|
186
192
|
# # Search by DOI, one or more
|
@@ -382,59 +388,6 @@ module Serrano
|
|
382
388
|
CNRequest.new(ids, format, style, locale).perform
|
383
389
|
end
|
384
390
|
|
385
|
-
##
|
386
|
-
# Get full text
|
387
|
-
#
|
388
|
-
# Should work for open access papers, but for closed, requires authentication and
|
389
|
-
# likely pre-authorized IP address.
|
390
|
-
#
|
391
|
-
# @param url [String] A url for full text
|
392
|
-
# @param type [Hash] Ignored for now. One of xml, plain, or pdf. Right now, type auto-detected from the URL
|
393
|
-
# @return [Mined] An object of class Mined, with methods for extracting
|
394
|
-
# the url requested, the file path, and parsing the plain text, XML, or extracting
|
395
|
-
# text from the pdf.
|
396
|
-
#
|
397
|
-
# @example
|
398
|
-
# require 'serrano'
|
399
|
-
# # Set authorization
|
400
|
-
# Serrano.configuration do |config|
|
401
|
-
# config.elsevier_key = "<your key>"
|
402
|
-
# end
|
403
|
-
# # Get some elsevier works
|
404
|
-
# res = Serrano.members(ids: 78, works: true);
|
405
|
-
# # get full text links, here doing xml
|
406
|
-
# links = res[0]['message']['items'].collect { |x| x['link'].keep_if { |z| z['content-type'] == 'text/xml' } };
|
407
|
-
# links = links.collect { |z| z[0].select { |k,v| k[/URL/] }.values[0] };
|
408
|
-
# # Get full text for an article
|
409
|
-
# res = Serrano.text(url: links[0]);
|
410
|
-
# res.url
|
411
|
-
# res.path
|
412
|
-
# res.type
|
413
|
-
# xml = res.parse()
|
414
|
-
# puts xml
|
415
|
-
# xml.xpath('//xocs:cover-date-text', xml.root.namespaces).text
|
416
|
-
#
|
417
|
-
# ## plain text
|
418
|
-
# # get full text links, here doing xml
|
419
|
-
# links = res[0]['message']['items'].collect { |x| x['link'].keep_if { |z| z['content-type'] == 'text/plain' } };
|
420
|
-
# links = links.collect { |z| z[0].select { |k,v| k[/URL/] }.values[0] };
|
421
|
-
# # Get full text for an article
|
422
|
-
# res = Serrano.text(url: links[0]);
|
423
|
-
# res.url
|
424
|
-
# res.parse
|
425
|
-
#
|
426
|
-
# # With open access content - using Pensoft
|
427
|
-
# res = Serrano.members(ids: 2258, works: true, filter: {has_full_text: true});
|
428
|
-
# links = res[0]['message']['items'].collect { |x| x['link'].keep_if { |z| z['content-type'] == 'application/xml' } };
|
429
|
-
# links = links.collect { |z| z[0].select { |k,v| k[/URL/] }.values[0] };
|
430
|
-
# # Get full text for an article
|
431
|
-
# res = Serrano.text(url: links[0]);
|
432
|
-
# res.url
|
433
|
-
# res.parse
|
434
|
-
def self.text(url:, type: 'xml')
|
435
|
-
Miner.new(url, type).perform
|
436
|
-
end
|
437
|
-
|
438
391
|
# Get a citation count with a DOI
|
439
392
|
#
|
440
393
|
# @!macro serrano_options
|
data/serrano.gemspec
CHANGED
@@ -8,7 +8,7 @@ Gem::Specification.new do |s|
|
|
8
8
|
s.version = Serrano::VERSION
|
9
9
|
s.platform = Gem::Platform::RUBY
|
10
10
|
s.required_ruby_version = '>= 2.0'
|
11
|
-
s.date = '2015-
|
11
|
+
s.date = '2015-12-04'
|
12
12
|
s.summary = "Crossref Client"
|
13
13
|
s.description = "Low Level Ruby Client for the Crossref Search API"
|
14
14
|
s.authors = "Scott Chamberlain"
|
@@ -33,6 +33,4 @@ Gem::Specification.new do |s|
|
|
33
33
|
s.add_runtime_dependency 'faraday_middleware', '~> 0.10.0'
|
34
34
|
s.add_runtime_dependency 'thor', '~> 0.19'
|
35
35
|
s.add_runtime_dependency 'multi_json', '~> 1.0'
|
36
|
-
s.add_runtime_dependency 'nokogiri', '~> 1.6', '>= 1.6.6.2'
|
37
|
-
s.add_runtime_dependency 'uuidtools', '~> 2.1', '>= 2.1.5'
|
38
36
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: serrano
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Scott Chamberlain
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-12-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -156,46 +156,6 @@ dependencies:
|
|
156
156
|
- - "~>"
|
157
157
|
- !ruby/object:Gem::Version
|
158
158
|
version: '1.0'
|
159
|
-
- !ruby/object:Gem::Dependency
|
160
|
-
name: nokogiri
|
161
|
-
requirement: !ruby/object:Gem::Requirement
|
162
|
-
requirements:
|
163
|
-
- - "~>"
|
164
|
-
- !ruby/object:Gem::Version
|
165
|
-
version: '1.6'
|
166
|
-
- - ">="
|
167
|
-
- !ruby/object:Gem::Version
|
168
|
-
version: 1.6.6.2
|
169
|
-
type: :runtime
|
170
|
-
prerelease: false
|
171
|
-
version_requirements: !ruby/object:Gem::Requirement
|
172
|
-
requirements:
|
173
|
-
- - "~>"
|
174
|
-
- !ruby/object:Gem::Version
|
175
|
-
version: '1.6'
|
176
|
-
- - ">="
|
177
|
-
- !ruby/object:Gem::Version
|
178
|
-
version: 1.6.6.2
|
179
|
-
- !ruby/object:Gem::Dependency
|
180
|
-
name: uuidtools
|
181
|
-
requirement: !ruby/object:Gem::Requirement
|
182
|
-
requirements:
|
183
|
-
- - "~>"
|
184
|
-
- !ruby/object:Gem::Version
|
185
|
-
version: '2.1'
|
186
|
-
- - ">="
|
187
|
-
- !ruby/object:Gem::Version
|
188
|
-
version: 2.1.5
|
189
|
-
type: :runtime
|
190
|
-
prerelease: false
|
191
|
-
version_requirements: !ruby/object:Gem::Requirement
|
192
|
-
requirements:
|
193
|
-
- - "~>"
|
194
|
-
- !ruby/object:Gem::Version
|
195
|
-
version: '2.1'
|
196
|
-
- - ">="
|
197
|
-
- !ruby/object:Gem::Version
|
198
|
-
version: 2.1.5
|
199
159
|
description: Low Level Ruby Client for the Crossref Search API
|
200
160
|
email: myrmecocystus@gmail.com
|
201
161
|
executables:
|
@@ -221,10 +181,10 @@ files:
|
|
221
181
|
- lib/serrano/filterhandler.rb
|
222
182
|
- lib/serrano/filters.rb
|
223
183
|
- lib/serrano/helpers/configuration.rb
|
224
|
-
- lib/serrano/
|
225
|
-
- lib/serrano/
|
226
|
-
- lib/serrano/miner.rb
|
184
|
+
- lib/serrano/link_methods_array.rb
|
185
|
+
- lib/serrano/link_methods_hash.rb
|
227
186
|
- lib/serrano/request.rb
|
187
|
+
- lib/serrano/styles.rb
|
228
188
|
- lib/serrano/version.rb
|
229
189
|
- serrano.gemspec
|
230
190
|
homepage: http://github.com/sckott/serrano
|
data/lib/serrano/mine_utils.rb
DELETED
@@ -1,65 +0,0 @@
|
|
1
|
-
require 'nokogiri'
|
2
|
-
require 'uuidtools'
|
3
|
-
|
4
|
-
def detect_type(x)
|
5
|
-
ctype = x.headers['content-type']
|
6
|
-
case ctype
|
7
|
-
when 'text/xml'
|
8
|
-
'xml'
|
9
|
-
when 'text/plain'
|
10
|
-
'plain'
|
11
|
-
when 'application/pdf'
|
12
|
-
'pdf'
|
13
|
-
end
|
14
|
-
end
|
15
|
-
|
16
|
-
def make_ext(x)
|
17
|
-
case x
|
18
|
-
when 'xml'
|
19
|
-
'xml'
|
20
|
-
when 'plain'
|
21
|
-
'txt'
|
22
|
-
when 'pdf'
|
23
|
-
'pdf'
|
24
|
-
end
|
25
|
-
end
|
26
|
-
|
27
|
-
def make_path(type)
|
28
|
-
# id = x.split('article/')[1].split('?')[0]
|
29
|
-
# path = id + '.' + type
|
30
|
-
# return path
|
31
|
-
type = make_ext(type)
|
32
|
-
uuid = UUIDTools::UUID.random_create.to_s
|
33
|
-
path = uuid + '.' + type
|
34
|
-
return path
|
35
|
-
end
|
36
|
-
|
37
|
-
def write_disk(res, path)
|
38
|
-
f = File.new(path, "wb")
|
39
|
-
f.write(res.body)
|
40
|
-
f.close()
|
41
|
-
end
|
42
|
-
|
43
|
-
def read_disk(path)
|
44
|
-
return File.read(path)
|
45
|
-
end
|
46
|
-
|
47
|
-
def parse_xml(x)
|
48
|
-
text = read_disk(x)
|
49
|
-
xml = Nokogiri.parse(text)
|
50
|
-
return xml
|
51
|
-
end
|
52
|
-
|
53
|
-
def parse_plain(x)
|
54
|
-
text = read_disk(x)
|
55
|
-
return text
|
56
|
-
end
|
57
|
-
|
58
|
-
def parse_pdf(x)
|
59
|
-
raise "not ready yet"
|
60
|
-
end
|
61
|
-
|
62
|
-
def is_elsevier(x)
|
63
|
-
tmp = x.match 'elsevier'
|
64
|
-
!tmp.nil?
|
65
|
-
end
|
data/lib/serrano/mined.rb
DELETED
@@ -1,31 +0,0 @@
|
|
1
|
-
require "nokogiri"
|
2
|
-
|
3
|
-
##
|
4
|
-
# Serrano::Mined
|
5
|
-
#
|
6
|
-
# Class to give back text mining object
|
7
|
-
module Serrano
|
8
|
-
class Mined #:nodoc:
|
9
|
-
attr_accessor :url
|
10
|
-
attr_accessor :path
|
11
|
-
attr_accessor :type
|
12
|
-
|
13
|
-
def initialize(url, path, type)
|
14
|
-
self.url = url
|
15
|
-
self.path = path
|
16
|
-
self.type = type
|
17
|
-
end
|
18
|
-
|
19
|
-
def parse
|
20
|
-
case self.type
|
21
|
-
when 'xml'
|
22
|
-
parse_xml(self.path)
|
23
|
-
when 'plain'
|
24
|
-
parse_plain(self.path)
|
25
|
-
when 'pdf'
|
26
|
-
parse_pdf(self.path)
|
27
|
-
end
|
28
|
-
end
|
29
|
-
|
30
|
-
end
|
31
|
-
end
|
data/lib/serrano/miner.rb
DELETED
@@ -1,42 +0,0 @@
|
|
1
|
-
require "faraday"
|
2
|
-
require "multi_json"
|
3
|
-
require "serrano/errors"
|
4
|
-
require "serrano/constants"
|
5
|
-
require 'serrano/helpers/configuration'
|
6
|
-
require 'serrano/mined'
|
7
|
-
require 'serrano/mine_utils'
|
8
|
-
|
9
|
-
##
|
10
|
-
# Serrano::Miner
|
11
|
-
#
|
12
|
-
# Class to give back text mining object
|
13
|
-
module Serrano
|
14
|
-
class Miner #:nodoc:
|
15
|
-
attr_accessor :url
|
16
|
-
attr_accessor :type
|
17
|
-
|
18
|
-
def initialize(url, type)
|
19
|
-
self.url = url
|
20
|
-
self.type = type
|
21
|
-
end
|
22
|
-
|
23
|
-
def perform
|
24
|
-
conn = Faraday.new(:url => self.url)
|
25
|
-
|
26
|
-
if is_elsevier(self.url)
|
27
|
-
res = conn.get do |req|
|
28
|
-
req.headers['X-ELS-APIKey'] = Serrano.elsevier_key
|
29
|
-
end
|
30
|
-
else
|
31
|
-
res = conn.get
|
32
|
-
end
|
33
|
-
|
34
|
-
type = detect_type(res)
|
35
|
-
path = make_path(type)
|
36
|
-
write_disk(res, path)
|
37
|
-
|
38
|
-
return Mined.new(self.url, path, type)
|
39
|
-
end
|
40
|
-
|
41
|
-
end
|
42
|
-
end
|