serrano 0.1.1 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/Gemfile.lock +1 -7
- data/README.md +48 -8
- data/Rakefile +2 -2
- data/bin/serrano +37 -3
- data/lib/serrano/link_methods_array.rb +51 -0
- data/lib/serrano/link_methods_hash.rb +46 -0
- data/lib/serrano/styles.rb +23 -0
- data/lib/serrano/version.rb +1 -1
- data/lib/serrano.rb +10 -57
- data/serrano.gemspec +1 -3
- metadata +5 -45
- data/lib/serrano/mine_utils.rb +0 -65
- data/lib/serrano/mined.rb +0 -31
- data/lib/serrano/miner.rb +0 -42
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ecc2b22a17fb2e658070f687b18232a534a33f7a
|
4
|
+
data.tar.gz: c24446bd652f48cd806319914a4ce15dd3b20565
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2542e1fe55dcaac00e62afb198f59ada4691cd44f347d2364528d282cdda1768cb303495cc5d2e1e82b22e7a281a846fef2b3185129e14c391f1f43de75e6c6a
|
7
|
+
data.tar.gz: 0ed3050c3ab2c7ef709902893bc4c334996f248b72d43d50a6228323664f1c92a3083a1edba6b2bca87417e00e8dba034e2acff20290627b7c441b97c53b614a
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,11 @@
|
|
1
|
+
## 0.1.4 (2015-12-04)
|
2
|
+
|
3
|
+
* Added `csl_styles()` method to get CSL styles info (#23)
|
4
|
+
* note to docs that `sample` parameter is ignored unless `works` route used (#22)
|
5
|
+
* note to docs that `funderes` without IDs don't show up in the `funders` route (#21)
|
6
|
+
* Added hash and array method to extract links from output of any methods with works (#18)
|
7
|
+
* Method `Serrano.text` for text mining removed. use the `textminer` gem (#13)
|
8
|
+
|
1
9
|
## 0.1.0 (2015-11-17)
|
2
10
|
|
3
11
|
* Improved documentation
|
data/Gemfile.lock
CHANGED
@@ -1,13 +1,11 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
serrano (0.1.
|
4
|
+
serrano (0.1.4)
|
5
5
|
faraday (~> 0.9.1)
|
6
6
|
faraday_middleware (~> 0.10.0)
|
7
7
|
multi_json (~> 1.0)
|
8
|
-
nokogiri (~> 1.6, >= 1.6.6.2)
|
9
8
|
thor (~> 0.19)
|
10
|
-
uuidtools (~> 2.1, >= 2.1.5)
|
11
9
|
|
12
10
|
GEM
|
13
11
|
remote: https://rubygems.org/
|
@@ -22,11 +20,8 @@ GEM
|
|
22
20
|
faraday_middleware (0.10.0)
|
23
21
|
faraday (>= 0.7.4, < 0.10)
|
24
22
|
json (1.8.3)
|
25
|
-
mini_portile (0.6.2)
|
26
23
|
multi_json (1.11.2)
|
27
24
|
multipart-post (2.0.0)
|
28
|
-
nokogiri (1.6.6.2)
|
29
|
-
mini_portile (~> 0.6.0)
|
30
25
|
power_assert (0.2.4)
|
31
26
|
rake (10.4.2)
|
32
27
|
simplecov (0.10.0)
|
@@ -38,7 +33,6 @@ GEM
|
|
38
33
|
power_assert
|
39
34
|
thor (0.19.1)
|
40
35
|
url (0.3.2)
|
41
|
-
uuidtools (2.1.5)
|
42
36
|
|
43
37
|
PLATFORMS
|
44
38
|
ruby
|
data/README.md
CHANGED
@@ -16,7 +16,7 @@ Other Crossref API clients:
|
|
16
16
|
|
17
17
|
## Changes
|
18
18
|
|
19
|
-
For changes see the [Changelog]
|
19
|
+
For changes see the [Changelog][changelog]
|
20
20
|
|
21
21
|
## API
|
22
22
|
|
@@ -38,7 +38,6 @@ Additional methods built on top of the Crossref search API:
|
|
38
38
|
Other methods:
|
39
39
|
|
40
40
|
* [Conent negotiation][cn] - `Serrano.content_negotiation()`
|
41
|
-
* [Text and data mining][tdm] - `Serrano.text()`
|
42
41
|
* [Citation count][ccount] - `Serrano.citation_count()`
|
43
42
|
* [get CSL styles][csl] - `Serrano.csl_styles()`
|
44
43
|
|
@@ -60,7 +59,7 @@ rake install
|
|
60
59
|
|
61
60
|
## Setup
|
62
61
|
|
63
|
-
Crossref's API will likely be used by others in the future, allowing the base URL to be swapped out. You can swap out the base URL by passing named options in a block to `Serrano.configuration`.
|
62
|
+
Crossref's API will likely be used by others in the future, allowing the base URL to be swapped out. You can swap out the base URL by passing named options in a block to `Serrano.configuration`.
|
64
63
|
|
65
64
|
This will also be the way to set up other user options, as needed down the road.
|
66
65
|
|
@@ -72,11 +71,13 @@ end
|
|
72
71
|
|
73
72
|
## Examples
|
74
73
|
|
74
|
+
### Use in a Ruby repl
|
75
|
+
|
75
76
|
Search works by DOI
|
76
77
|
|
77
78
|
```ruby
|
78
79
|
require 'serrano'
|
79
|
-
Serrano.works(
|
80
|
+
Serrano.works(ids: '10.1371/journal.pone.0033693')
|
80
81
|
```
|
81
82
|
|
82
83
|
Search works by query string
|
@@ -85,6 +86,20 @@ Search works by query string
|
|
85
86
|
Serrano.works(query: "ecology")
|
86
87
|
```
|
87
88
|
|
89
|
+
Get links
|
90
|
+
|
91
|
+
```ruby
|
92
|
+
res = Serrano.works(filter: {has_full_text: true})
|
93
|
+
# entire links metadata
|
94
|
+
res.links
|
95
|
+
# just links URLs
|
96
|
+
res.links(true)
|
97
|
+
# just xml links, if present
|
98
|
+
res.links_xml(true)
|
99
|
+
# just pdf links, if present
|
100
|
+
res.links_pdf
|
101
|
+
```
|
102
|
+
|
88
103
|
Search journals by publisher name
|
89
104
|
|
90
105
|
```ruby
|
@@ -112,13 +127,37 @@ Serrano.random_dois(sample: 100)
|
|
112
127
|
Content negotiation
|
113
128
|
|
114
129
|
```ruby
|
115
|
-
Serrano.
|
130
|
+
Serrano.content_negotiation(ids: '10.1126/science.169.3946.635', format: "citeproc-json")
|
116
131
|
```
|
117
132
|
|
118
|
-
|
133
|
+
### Use on the CLI
|
119
134
|
|
120
|
-
|
121
|
-
|
135
|
+
The command line tool `serrano` should be available after you install
|
136
|
+
|
137
|
+
```
|
138
|
+
~$ serrano
|
139
|
+
Commands:
|
140
|
+
serrano contneg # Content negotiation
|
141
|
+
serrano funders [funder IDs] # Search for funders by DOI prefix
|
142
|
+
serrano help [COMMAND] # Describe available commands or one spec...
|
143
|
+
serrano journals [journal ISSNs] # Search for journals by ISSNs
|
144
|
+
serrano licenses # Search for licenses by name
|
145
|
+
serrano members [member IDs] # Get members by id
|
146
|
+
serrano prefixes [DOI prefixes] # Search for prefixes by DOI prefix
|
147
|
+
serrano types [type name] # Search for types by name
|
148
|
+
serrano version # Get serrano version
|
149
|
+
serrano works [DOIs] # Get works by DOIs
|
150
|
+
```
|
151
|
+
|
152
|
+
```
|
153
|
+
# A single DOI
|
154
|
+
~$ serrano works 10.1371/journal.pone.0033693
|
155
|
+
|
156
|
+
# Many DOIs
|
157
|
+
~$ serrano works "10.1007/12080.1874-1746,10.1007/10452.1573-5125"
|
158
|
+
|
159
|
+
# output JSON, then parse with e.g., jq
|
160
|
+
~$ serrano works --filter=has_orcid:true --json --limit=2 | jq '.message.items[].author[].ORCID | select(. != null)'
|
122
161
|
```
|
123
162
|
|
124
163
|
## Meta
|
@@ -131,3 +170,4 @@ res = Serrano.text(url: 'http://...');
|
|
131
170
|
[tdm]: http://www.crossref.org/tdm/
|
132
171
|
[ccount]: http://labs.crossref.org/openurl/
|
133
172
|
[csl]: https://github.com/citation-style-language/styles
|
173
|
+
[changelog]: https://github.com/sckott/serrano/blob/master/CHANGELOG.md
|
data/Rakefile
CHANGED
@@ -16,7 +16,7 @@ task :docs do
|
|
16
16
|
end
|
17
17
|
|
18
18
|
desc "bundle install"
|
19
|
-
task :
|
19
|
+
task :bundle do
|
20
20
|
system "bundle install"
|
21
21
|
end
|
22
22
|
|
@@ -31,7 +31,7 @@ task :build do
|
|
31
31
|
end
|
32
32
|
|
33
33
|
desc "Install serrano"
|
34
|
-
task :install => :build do
|
34
|
+
task :install => [:bundle, :build] do
|
35
35
|
system "gem install serrano-#{Serrano::VERSION}.gem"
|
36
36
|
end
|
37
37
|
|
data/bin/serrano
CHANGED
@@ -38,10 +38,24 @@ class Sr < Thor
|
|
38
38
|
"http://id.crossref.org/member/340"
|
39
39
|
\x5"http://id.crossref.org/member/297"
|
40
40
|
\x5"http://id.crossref.org/member/297"
|
41
|
+
|
42
|
+
# Filter usage
|
43
|
+
\x5$ serrano works --filter=has_orcid:true --json --limit=2 | jq '.message.items[].author[].ORCID | select(. != null)'
|
44
|
+
|
45
|
+
"http://orcid.org/0000-0003-4087-8021"
|
46
|
+
\x5"http://orcid.org/0000-0002-2076-5452"
|
47
|
+
\x5"http://orcid.org/0000-0003-4087-8021"
|
48
|
+
\x5"http://orcid.org/0000-0002-2076-5452"
|
41
49
|
LONGDESC
|
42
50
|
option :json, :type => :boolean, :default => false
|
43
|
-
|
44
|
-
|
51
|
+
option :filter, :type => :hash, :default => nil
|
52
|
+
option :limit, :type => :numeric, :default => nil
|
53
|
+
def works(ids=nil)
|
54
|
+
if ids.nil?
|
55
|
+
out = Serrano.works(filter: options[:filter], limit: options[:limit])
|
56
|
+
else
|
57
|
+
out = Serrano.works(ids: ids.split(","), filter: options[:filter])
|
58
|
+
end
|
45
59
|
if !options[:json]
|
46
60
|
out = out.collect { |x| x['message'].select { |k,v| k[/DOI|type|title/] } }
|
47
61
|
out.each do |x|
|
@@ -397,7 +411,7 @@ class Sr < Thor
|
|
397
411
|
\x5name: U.S. Department of Energy
|
398
412
|
\x5location: United States
|
399
413
|
|
400
|
-
$ serrano licenses --json=true | jq .
|
414
|
+
$ serrano licenses --json=true | jq .message.items[]
|
401
415
|
|
402
416
|
"National Science Foundation"
|
403
417
|
\x5"U.S. Department of Energy"
|
@@ -418,6 +432,26 @@ class Sr < Thor
|
|
418
432
|
end
|
419
433
|
end
|
420
434
|
|
435
|
+
desc "contneg", "Content negotiation"
|
436
|
+
long_desc <<-LONGDESC
|
437
|
+
`serrano contneg` accepts a DOI
|
438
|
+
|
439
|
+
$ serrano contneg 10.1890/13-0590.1
|
440
|
+
|
441
|
+
Murtaugh, P. A. (2014). In defense of P values . Ecology, 95(3), 611–617. doi:10.1890/13-0590.1
|
442
|
+
|
443
|
+
$ serrano contneg 10.1890/13-0590.1 --style=heredity
|
444
|
+
|
445
|
+
Murtaugh PA (2014). In defense of P values . Ecology 95: 611–617.
|
446
|
+
LONGDESC
|
447
|
+
option :format, :type => :string, :default => "text"
|
448
|
+
option :style, :type => :string, :default => "apa"
|
449
|
+
option :locale, :type => :string, :default => "en-US"
|
450
|
+
def contneg(ids)
|
451
|
+
puts Serrano.content_negotiation(ids: ids, format: options[:format], style:
|
452
|
+
options[:style], locale: options[:locale])
|
453
|
+
end
|
454
|
+
|
421
455
|
desc "version", "Get serrano version"
|
422
456
|
def version
|
423
457
|
puts Serrano::VERSION
|
@@ -0,0 +1,51 @@
|
|
1
|
+
# Array methods
|
2
|
+
class Array
|
3
|
+
def links(just_urls = false)
|
4
|
+
return self.collect{ |x| x.links(just_urls) }.flatten
|
5
|
+
# if temp.length == 1
|
6
|
+
# return tmp[0]
|
7
|
+
# else
|
8
|
+
# return tmp
|
9
|
+
# end
|
10
|
+
# tmp = self.collect{ |x| x['message']['link'] }
|
11
|
+
# return parse_link(tmp, just_urls)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
class Array
|
16
|
+
def links_xml(just_urls = false)
|
17
|
+
return parse_link(self.collect { |z| z.links_xml }[0], just_urls)
|
18
|
+
# return parse_link(pull_link(self, '^application\/xml$|^text\/xml$'), just_urls)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
class Array
|
23
|
+
def links_pdf(just_urls = false)
|
24
|
+
return parse_link(self.collect { |z| z.links_pdf }[0], just_urls)
|
25
|
+
# return parse_link(pull_link(self, '^application\/pdf$'), just_urls)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
class Array
|
30
|
+
def links_plain(just_urls = false)
|
31
|
+
return parse_link(self.collect { |z| z.links_plain }[0], just_urls)
|
32
|
+
# return parse_link(pull_link(self, '^application\/plain$|^text\/plain$'), just_urls)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def pull_link(x, y)
|
37
|
+
return x.collect { |z| z.links_xml }[0]
|
38
|
+
# return x.collect { |z| z['message']['link'] }.compact.collect { |z| z.compact.select { |w| w['content-type'].match(/#{y}/) } }
|
39
|
+
end
|
40
|
+
|
41
|
+
def parse_link(x, just_urls)
|
42
|
+
if x.nil?
|
43
|
+
return x
|
44
|
+
else
|
45
|
+
if just_urls
|
46
|
+
return x.compact.collect { |z| z.collect{ |y| y['URL'] }}.flatten
|
47
|
+
else
|
48
|
+
return x
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
# Hash methods
|
2
|
+
class Hash
|
3
|
+
def links(just_urls = false)
|
4
|
+
if self['message']['items'].nil?
|
5
|
+
tmp = self['message']['link'].reject { |c| c.empty? }
|
6
|
+
else
|
7
|
+
tmp = self['message']['items'].collect { |x| x['link'] }.reject { |c| c.empty? }
|
8
|
+
end
|
9
|
+
|
10
|
+
return parse_links(tmp, just_urls)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
class Hash
|
15
|
+
def links_xml(just_urls = false)
|
16
|
+
return parse_links(pull_links(self, '^application\/xml$|^text\/xml$'), just_urls)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
class Hash
|
21
|
+
def links_pdf(just_urls = false)
|
22
|
+
return parse_links(pull_links(self, '^application\/pdf$'), just_urls)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
class Hash
|
27
|
+
def links_plain(just_urls = false)
|
28
|
+
return parse_links(pull_links(self, '^application\/plain$|^text\/plain$'), just_urls)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def pull_links(x, y)
|
33
|
+
return x['message']['items'].collect { |x| x['link'].select { |z| z['content-type'].match(/#{y}/) } }.reject { |c| c.empty? }
|
34
|
+
end
|
35
|
+
|
36
|
+
def parse_links(x, just_urls)
|
37
|
+
if x.empty?
|
38
|
+
return x
|
39
|
+
else
|
40
|
+
if just_urls
|
41
|
+
return x.collect { |x| x.collect { |z| z['URL'] }}.flatten
|
42
|
+
else
|
43
|
+
return x
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require "faraday"
|
2
|
+
require "multi_json"
|
3
|
+
|
4
|
+
def get_styles
|
5
|
+
base = "https://api.github.com/repos/citation-style-language/styles"
|
6
|
+
conn = Faraday.new(:url => base)
|
7
|
+
args = { per_page: 1 }
|
8
|
+
tt = conn.get 'commits', args
|
9
|
+
commres = MultiJson.load(tt.body)
|
10
|
+
sha = commres[0]['sha']
|
11
|
+
sty = conn.get "git/trees/" + sha
|
12
|
+
res = MultiJson.load(sty.body)
|
13
|
+
files = res['tree'].collect { |x| x['path'] }
|
14
|
+
matches = files.collect { |x|
|
15
|
+
if x.match('csl').nil?
|
16
|
+
nil
|
17
|
+
else
|
18
|
+
x.match('csl').string
|
19
|
+
end
|
20
|
+
}
|
21
|
+
csls = matches.compact
|
22
|
+
return csls.collect { |z| z.gsub('.csl', '') }
|
23
|
+
end
|
data/lib/serrano/version.rb
CHANGED
data/lib/serrano.rb
CHANGED
@@ -2,8 +2,10 @@ require "serrano/version"
|
|
2
2
|
require "serrano/request"
|
3
3
|
require "serrano/filterhandler"
|
4
4
|
require "serrano/cnrequest"
|
5
|
-
require "serrano/miner"
|
6
5
|
require "serrano/filters"
|
6
|
+
require "serrano/styles"
|
7
|
+
require "serrano/link_methods_hash"
|
8
|
+
require "serrano/link_methods_array"
|
7
9
|
|
8
10
|
require 'rexml/document'
|
9
11
|
require 'rexml/xpath'
|
@@ -12,7 +14,7 @@ require 'rexml/xpath'
|
|
12
14
|
# @param offset [Fixnum] Number of record to start at, from 1 to infinity.
|
13
15
|
# @param limit [Fixnum] Number of results to return. Not relavant when searching with specific dois. Default: 20. Max: 1000
|
14
16
|
# @param sample [Fixnum] Number of random results to return. when you use the sample parameter,
|
15
|
-
# the limit and offset parameters are ignored.
|
17
|
+
# the limit and offset parameters are ignored. This parameter only used when works requested.
|
16
18
|
# @param sort [String] Field to sort on, one of score, relevance,
|
17
19
|
# updated (date of most recent change to metadata. Currently the same as deposited),
|
18
20
|
# deposited (time of most recent deposit), indexed (time of most recent index), or
|
@@ -52,7 +54,6 @@ require 'rexml/xpath'
|
|
52
54
|
# Additional methods
|
53
55
|
# * `Serrano.agency` - test the registration agency for a DOI
|
54
56
|
# * `Serrano.content_negotiation` - Conent negotiation
|
55
|
-
# * `Serrano.text` - Text and data mining
|
56
57
|
# * `Serrano.citation_count` - Citation count
|
57
58
|
# * `Serrano.csl_styles` - get CSL styles
|
58
59
|
#
|
@@ -67,7 +68,6 @@ module Serrano
|
|
67
68
|
|
68
69
|
define_setting :access_token
|
69
70
|
define_setting :access_secret
|
70
|
-
define_setting :elsevier_key
|
71
71
|
define_setting :base_url, "http://api.crossref.org/"
|
72
72
|
|
73
73
|
##
|
@@ -86,6 +86,7 @@ module Serrano
|
|
86
86
|
# Serrano.works(ids: '10.5555/515151')
|
87
87
|
# Serrano.works(ids: '10.1371/journal.pone.0033693')
|
88
88
|
# Serrano.works(ids: ['10.1007/12080.1874-1746','10.1007/10452.1573-5125', '10.1111/(issn)1442-9993'])
|
89
|
+
# Serrano.works(ids: ["10.1016/0304-4009(81)90025-5", "10.1016/0304-4009(83)90036-0"])
|
89
90
|
# # query
|
90
91
|
# Serrano.works(query: "ecology")
|
91
92
|
# Serrano.works(query: "renear+-ontologies")
|
@@ -93,6 +94,9 @@ module Serrano
|
|
93
94
|
# Serrano.works(query: "ecology", sort: 'relevance', order: "asc")
|
94
95
|
# # Filters
|
95
96
|
# Serrano.works(filter: {has_full_text: true})
|
97
|
+
# res = Serrano.works(filter: {has_full_text: true})
|
98
|
+
# res.links # entire links metadata
|
99
|
+
# res.links(true) # just links URLs
|
96
100
|
# Serrano.works(filter: {has_funder: true, has_full_text: true})
|
97
101
|
# Serrano.works(filter: {award_number: 'CBET-0756451', award_funder: '10.13039/100000001'})
|
98
102
|
#
|
@@ -181,6 +185,8 @@ module Serrano
|
|
181
185
|
# @param works [Boolean] If true, works returned as well. Default: false
|
182
186
|
# @return [Array] An array of hashes
|
183
187
|
#
|
188
|
+
# @note Funders without IDs don't show up on the /funders route
|
189
|
+
#
|
184
190
|
# @example
|
185
191
|
# require 'serrano'
|
186
192
|
# # Search by DOI, one or more
|
@@ -382,59 +388,6 @@ module Serrano
|
|
382
388
|
CNRequest.new(ids, format, style, locale).perform
|
383
389
|
end
|
384
390
|
|
385
|
-
##
|
386
|
-
# Get full text
|
387
|
-
#
|
388
|
-
# Should work for open access papers, but for closed, requires authentication and
|
389
|
-
# likely pre-authorized IP address.
|
390
|
-
#
|
391
|
-
# @param url [String] A url for full text
|
392
|
-
# @param type [Hash] Ignored for now. One of xml, plain, or pdf. Right now, type auto-detected from the URL
|
393
|
-
# @return [Mined] An object of class Mined, with methods for extracting
|
394
|
-
# the url requested, the file path, and parsing the plain text, XML, or extracting
|
395
|
-
# text from the pdf.
|
396
|
-
#
|
397
|
-
# @example
|
398
|
-
# require 'serrano'
|
399
|
-
# # Set authorization
|
400
|
-
# Serrano.configuration do |config|
|
401
|
-
# config.elsevier_key = "<your key>"
|
402
|
-
# end
|
403
|
-
# # Get some elsevier works
|
404
|
-
# res = Serrano.members(ids: 78, works: true);
|
405
|
-
# # get full text links, here doing xml
|
406
|
-
# links = res[0]['message']['items'].collect { |x| x['link'].keep_if { |z| z['content-type'] == 'text/xml' } };
|
407
|
-
# links = links.collect { |z| z[0].select { |k,v| k[/URL/] }.values[0] };
|
408
|
-
# # Get full text for an article
|
409
|
-
# res = Serrano.text(url: links[0]);
|
410
|
-
# res.url
|
411
|
-
# res.path
|
412
|
-
# res.type
|
413
|
-
# xml = res.parse()
|
414
|
-
# puts xml
|
415
|
-
# xml.xpath('//xocs:cover-date-text', xml.root.namespaces).text
|
416
|
-
#
|
417
|
-
# ## plain text
|
418
|
-
# # get full text links, here doing xml
|
419
|
-
# links = res[0]['message']['items'].collect { |x| x['link'].keep_if { |z| z['content-type'] == 'text/plain' } };
|
420
|
-
# links = links.collect { |z| z[0].select { |k,v| k[/URL/] }.values[0] };
|
421
|
-
# # Get full text for an article
|
422
|
-
# res = Serrano.text(url: links[0]);
|
423
|
-
# res.url
|
424
|
-
# res.parse
|
425
|
-
#
|
426
|
-
# # With open access content - using Pensoft
|
427
|
-
# res = Serrano.members(ids: 2258, works: true, filter: {has_full_text: true});
|
428
|
-
# links = res[0]['message']['items'].collect { |x| x['link'].keep_if { |z| z['content-type'] == 'application/xml' } };
|
429
|
-
# links = links.collect { |z| z[0].select { |k,v| k[/URL/] }.values[0] };
|
430
|
-
# # Get full text for an article
|
431
|
-
# res = Serrano.text(url: links[0]);
|
432
|
-
# res.url
|
433
|
-
# res.parse
|
434
|
-
def self.text(url:, type: 'xml')
|
435
|
-
Miner.new(url, type).perform
|
436
|
-
end
|
437
|
-
|
438
391
|
# Get a citation count with a DOI
|
439
392
|
#
|
440
393
|
# @!macro serrano_options
|
data/serrano.gemspec
CHANGED
@@ -8,7 +8,7 @@ Gem::Specification.new do |s|
|
|
8
8
|
s.version = Serrano::VERSION
|
9
9
|
s.platform = Gem::Platform::RUBY
|
10
10
|
s.required_ruby_version = '>= 2.0'
|
11
|
-
s.date = '2015-
|
11
|
+
s.date = '2015-12-04'
|
12
12
|
s.summary = "Crossref Client"
|
13
13
|
s.description = "Low Level Ruby Client for the Crossref Search API"
|
14
14
|
s.authors = "Scott Chamberlain"
|
@@ -33,6 +33,4 @@ Gem::Specification.new do |s|
|
|
33
33
|
s.add_runtime_dependency 'faraday_middleware', '~> 0.10.0'
|
34
34
|
s.add_runtime_dependency 'thor', '~> 0.19'
|
35
35
|
s.add_runtime_dependency 'multi_json', '~> 1.0'
|
36
|
-
s.add_runtime_dependency 'nokogiri', '~> 1.6', '>= 1.6.6.2'
|
37
|
-
s.add_runtime_dependency 'uuidtools', '~> 2.1', '>= 2.1.5'
|
38
36
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: serrano
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Scott Chamberlain
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-12-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -156,46 +156,6 @@ dependencies:
|
|
156
156
|
- - "~>"
|
157
157
|
- !ruby/object:Gem::Version
|
158
158
|
version: '1.0'
|
159
|
-
- !ruby/object:Gem::Dependency
|
160
|
-
name: nokogiri
|
161
|
-
requirement: !ruby/object:Gem::Requirement
|
162
|
-
requirements:
|
163
|
-
- - "~>"
|
164
|
-
- !ruby/object:Gem::Version
|
165
|
-
version: '1.6'
|
166
|
-
- - ">="
|
167
|
-
- !ruby/object:Gem::Version
|
168
|
-
version: 1.6.6.2
|
169
|
-
type: :runtime
|
170
|
-
prerelease: false
|
171
|
-
version_requirements: !ruby/object:Gem::Requirement
|
172
|
-
requirements:
|
173
|
-
- - "~>"
|
174
|
-
- !ruby/object:Gem::Version
|
175
|
-
version: '1.6'
|
176
|
-
- - ">="
|
177
|
-
- !ruby/object:Gem::Version
|
178
|
-
version: 1.6.6.2
|
179
|
-
- !ruby/object:Gem::Dependency
|
180
|
-
name: uuidtools
|
181
|
-
requirement: !ruby/object:Gem::Requirement
|
182
|
-
requirements:
|
183
|
-
- - "~>"
|
184
|
-
- !ruby/object:Gem::Version
|
185
|
-
version: '2.1'
|
186
|
-
- - ">="
|
187
|
-
- !ruby/object:Gem::Version
|
188
|
-
version: 2.1.5
|
189
|
-
type: :runtime
|
190
|
-
prerelease: false
|
191
|
-
version_requirements: !ruby/object:Gem::Requirement
|
192
|
-
requirements:
|
193
|
-
- - "~>"
|
194
|
-
- !ruby/object:Gem::Version
|
195
|
-
version: '2.1'
|
196
|
-
- - ">="
|
197
|
-
- !ruby/object:Gem::Version
|
198
|
-
version: 2.1.5
|
199
159
|
description: Low Level Ruby Client for the Crossref Search API
|
200
160
|
email: myrmecocystus@gmail.com
|
201
161
|
executables:
|
@@ -221,10 +181,10 @@ files:
|
|
221
181
|
- lib/serrano/filterhandler.rb
|
222
182
|
- lib/serrano/filters.rb
|
223
183
|
- lib/serrano/helpers/configuration.rb
|
224
|
-
- lib/serrano/
|
225
|
-
- lib/serrano/
|
226
|
-
- lib/serrano/miner.rb
|
184
|
+
- lib/serrano/link_methods_array.rb
|
185
|
+
- lib/serrano/link_methods_hash.rb
|
227
186
|
- lib/serrano/request.rb
|
187
|
+
- lib/serrano/styles.rb
|
228
188
|
- lib/serrano/version.rb
|
229
189
|
- serrano.gemspec
|
230
190
|
homepage: http://github.com/sckott/serrano
|
data/lib/serrano/mine_utils.rb
DELETED
@@ -1,65 +0,0 @@
|
|
1
|
-
require 'nokogiri'
|
2
|
-
require 'uuidtools'
|
3
|
-
|
4
|
-
def detect_type(x)
|
5
|
-
ctype = x.headers['content-type']
|
6
|
-
case ctype
|
7
|
-
when 'text/xml'
|
8
|
-
'xml'
|
9
|
-
when 'text/plain'
|
10
|
-
'plain'
|
11
|
-
when 'application/pdf'
|
12
|
-
'pdf'
|
13
|
-
end
|
14
|
-
end
|
15
|
-
|
16
|
-
def make_ext(x)
|
17
|
-
case x
|
18
|
-
when 'xml'
|
19
|
-
'xml'
|
20
|
-
when 'plain'
|
21
|
-
'txt'
|
22
|
-
when 'pdf'
|
23
|
-
'pdf'
|
24
|
-
end
|
25
|
-
end
|
26
|
-
|
27
|
-
def make_path(type)
|
28
|
-
# id = x.split('article/')[1].split('?')[0]
|
29
|
-
# path = id + '.' + type
|
30
|
-
# return path
|
31
|
-
type = make_ext(type)
|
32
|
-
uuid = UUIDTools::UUID.random_create.to_s
|
33
|
-
path = uuid + '.' + type
|
34
|
-
return path
|
35
|
-
end
|
36
|
-
|
37
|
-
def write_disk(res, path)
|
38
|
-
f = File.new(path, "wb")
|
39
|
-
f.write(res.body)
|
40
|
-
f.close()
|
41
|
-
end
|
42
|
-
|
43
|
-
def read_disk(path)
|
44
|
-
return File.read(path)
|
45
|
-
end
|
46
|
-
|
47
|
-
def parse_xml(x)
|
48
|
-
text = read_disk(x)
|
49
|
-
xml = Nokogiri.parse(text)
|
50
|
-
return xml
|
51
|
-
end
|
52
|
-
|
53
|
-
def parse_plain(x)
|
54
|
-
text = read_disk(x)
|
55
|
-
return text
|
56
|
-
end
|
57
|
-
|
58
|
-
def parse_pdf(x)
|
59
|
-
raise "not ready yet"
|
60
|
-
end
|
61
|
-
|
62
|
-
def is_elsevier(x)
|
63
|
-
tmp = x.match 'elsevier'
|
64
|
-
!tmp.nil?
|
65
|
-
end
|
data/lib/serrano/mined.rb
DELETED
@@ -1,31 +0,0 @@
|
|
1
|
-
require "nokogiri"
|
2
|
-
|
3
|
-
##
|
4
|
-
# Serrano::Mined
|
5
|
-
#
|
6
|
-
# Class to give back text mining object
|
7
|
-
module Serrano
|
8
|
-
class Mined #:nodoc:
|
9
|
-
attr_accessor :url
|
10
|
-
attr_accessor :path
|
11
|
-
attr_accessor :type
|
12
|
-
|
13
|
-
def initialize(url, path, type)
|
14
|
-
self.url = url
|
15
|
-
self.path = path
|
16
|
-
self.type = type
|
17
|
-
end
|
18
|
-
|
19
|
-
def parse
|
20
|
-
case self.type
|
21
|
-
when 'xml'
|
22
|
-
parse_xml(self.path)
|
23
|
-
when 'plain'
|
24
|
-
parse_plain(self.path)
|
25
|
-
when 'pdf'
|
26
|
-
parse_pdf(self.path)
|
27
|
-
end
|
28
|
-
end
|
29
|
-
|
30
|
-
end
|
31
|
-
end
|
data/lib/serrano/miner.rb
DELETED
@@ -1,42 +0,0 @@
|
|
1
|
-
require "faraday"
|
2
|
-
require "multi_json"
|
3
|
-
require "serrano/errors"
|
4
|
-
require "serrano/constants"
|
5
|
-
require 'serrano/helpers/configuration'
|
6
|
-
require 'serrano/mined'
|
7
|
-
require 'serrano/mine_utils'
|
8
|
-
|
9
|
-
##
|
10
|
-
# Serrano::Miner
|
11
|
-
#
|
12
|
-
# Class to give back text mining object
|
13
|
-
module Serrano
|
14
|
-
class Miner #:nodoc:
|
15
|
-
attr_accessor :url
|
16
|
-
attr_accessor :type
|
17
|
-
|
18
|
-
def initialize(url, type)
|
19
|
-
self.url = url
|
20
|
-
self.type = type
|
21
|
-
end
|
22
|
-
|
23
|
-
def perform
|
24
|
-
conn = Faraday.new(:url => self.url)
|
25
|
-
|
26
|
-
if is_elsevier(self.url)
|
27
|
-
res = conn.get do |req|
|
28
|
-
req.headers['X-ELS-APIKey'] = Serrano.elsevier_key
|
29
|
-
end
|
30
|
-
else
|
31
|
-
res = conn.get
|
32
|
-
end
|
33
|
-
|
34
|
-
type = detect_type(res)
|
35
|
-
path = make_path(type)
|
36
|
-
write_disk(res, path)
|
37
|
-
|
38
|
-
return Mined.new(self.url, path, type)
|
39
|
-
end
|
40
|
-
|
41
|
-
end
|
42
|
-
end
|