terrier 1.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +17 -0
- data/.rspec +3 -0
- data/.travis.yml +14 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +58 -0
- data/Rakefile +8 -0
- data/lib/terrier.rb +58 -0
- data/lib/terrier/doi_data.rb +55 -0
- data/lib/terrier/html_data.rb +78 -0
- data/lib/terrier/version.rb +3 -0
- data/spec/doi_data_spec.rb +45 -0
- data/spec/html_data_spec.rb +58 -0
- data/spec/spec_helper.rb +17 -0
- data/spec/terrier_spec.rb +86 -0
- data/terrier.gemspec +29 -0
- metadata +166 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 8d5db9a930b0e7613577840fd0baab3304d685e7
|
4
|
+
data.tar.gz: d5f21a1ca92b7320f801cffd8b88adb6dcd2acc8
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: f5920676d492c32a8b9dd2c4d2d836513fe6a6a0c6497296c00942f13e55e318f713430731d364a1d9b2646315d0145675113b9f92d6b46b1837af53bcd95f65
|
7
|
+
data.tar.gz: 0c7b2f7a445fa2f6eee95017f3ec92a7eba983fe7c9573fb2ecaeff0a84e5419f14f6bd97e856d9286fb58dd443aba8981c3c24e246f43a744e6245c1c4845b5
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2016 The Winnower
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
# Terrier
|
2
|
+
|
3
|
+
[![Gem Version](https://badge.fury.io/rb/terrier.svg)](http://badge.fury.io/rb/terrier) [![Build Status](https://travis-ci.org/thewinnower/Terrier.svg?branch=master)](https://travis-ci.org/thewinnower/Terrier)
|
4
|
+
|
5
|
+
Terrier is used to retrieve metadata of scholarly works from a variety of sources.
|
6
|
+
|
7
|
+
Terrier can be used to pull metadata on any article that has been issued a digital object identifier (DOI) or that is hosted on the Zenodo Repository, maintained by CERN.
|
8
|
+
|
9
|
+
With Terrier you can enter any scholarly article URL or DOI to retrieve scholarly information about that article. Terrier will pull full PDFs of scholarly content if it is hosted on the Zenodo repository. The production of Terrier was funded by [OpenAire](https://www.openaire.eu/openaire-open-peer-review-tenders) and developed by [The Winnower](thewinnower.com)
|
10
|
+
|
11
|
+
|
12
|
+
## Installation
|
13
|
+
|
14
|
+
Add this line to your application's Gemfile:
|
15
|
+
|
16
|
+
```ruby
|
17
|
+
gem 'terrier'
|
18
|
+
```
|
19
|
+
|
20
|
+
And then execute:
|
21
|
+
|
22
|
+
$ bundle
|
23
|
+
|
24
|
+
Or install it yourself as:
|
25
|
+
|
26
|
+
$ gem install terrier
|
27
|
+
|
28
|
+
## Usage
|
29
|
+
|
30
|
+
To use Terrier simply call Terrier.new with DOI or zenodo url
|
31
|
+
|
32
|
+
#examples
|
33
|
+
|
34
|
+
```ruby
|
35
|
+
Terrier.new('https://zenodo.org/record/32475')
|
36
|
+
Terrier.new('doi:10.1186/1479-5868-10-79')
|
37
|
+
```
|
38
|
+
|
39
|
+
Terrier returns a hash of information about the document. The keys for which are.
|
40
|
+
|
41
|
+
* url
|
42
|
+
* journal
|
43
|
+
* title
|
44
|
+
* authors
|
45
|
+
* publication_date
|
46
|
+
* publication_year #Note only returned if document has a DOI
|
47
|
+
* doi
|
48
|
+
* issn
|
49
|
+
* zenodo_pdf: #Note if published on Zenodo and has pdf else this will be nil.
|
50
|
+
* bibliography
|
51
|
+
|
52
|
+
## Contributing
|
53
|
+
|
54
|
+
1. Fork it ( https://github.com/thewinnower/terrier/fork )
|
55
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
56
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
57
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
58
|
+
5. Create a new Pull Request
|
data/Rakefile
ADDED
data/lib/terrier.rb
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
require 'httparty'
|
2
|
+
require 'nokogiri'
|
3
|
+
require "terrier/version"
|
4
|
+
require "terrier/html_data"
|
5
|
+
require "terrier/doi_data"
|
6
|
+
|
7
|
+
class Terrier
|
8
|
+
attr_reader :identifier, :citation_data, :zenodo_pdf
|
9
|
+
|
10
|
+
def initialize(identifier)
|
11
|
+
@identifier = identifier
|
12
|
+
@citation_data = {}
|
13
|
+
data
|
14
|
+
end
|
15
|
+
|
16
|
+
def data
|
17
|
+
if uri?(@identifier)
|
18
|
+
html_data
|
19
|
+
else
|
20
|
+
doi_data(identifier)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
# if the article is pulled by dio and can be found on zenodo we can
|
25
|
+
# pull the pdf from the html link.
|
26
|
+
# this is a bit unclean
|
27
|
+
def zenodo_pdf
|
28
|
+
if citation_data[:zenodo_pdf]
|
29
|
+
citation_data[:zenodo_pdf]
|
30
|
+
elsif citation_data[:url].include?("zenodo")
|
31
|
+
Terrier::HtmlData.new(citation_data[:url]).zenodo_pdf
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
private
|
36
|
+
|
37
|
+
def uri?(string)
|
38
|
+
uri = URI.parse(string)
|
39
|
+
%w( http https ).include?(uri.scheme)
|
40
|
+
rescue URI::BadURIError
|
41
|
+
false
|
42
|
+
rescue URI::InvalidURIError
|
43
|
+
false
|
44
|
+
end
|
45
|
+
|
46
|
+
def html_data
|
47
|
+
@citation_data = Terrier::HtmlData.new(identifier).data
|
48
|
+
if citation_data[:doi]
|
49
|
+
doi_data(citation_data[:doi])
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def doi_data(doi)
|
54
|
+
fetched_data = Terrier::DoiData.new(doi).data || {}
|
55
|
+
@citation_data = @citation_data.merge(fetched_data)
|
56
|
+
end
|
57
|
+
|
58
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
class Terrier::DoiData
|
2
|
+
include HTTParty
|
3
|
+
attr_reader :doi, :citation_info
|
4
|
+
|
5
|
+
def initialize(doi)
|
6
|
+
@doi = doi
|
7
|
+
end
|
8
|
+
|
9
|
+
def data
|
10
|
+
@citation_info = doi_citation_info
|
11
|
+
{
|
12
|
+
url: citation_info["URL"],
|
13
|
+
journal: citation_info["publisher"],
|
14
|
+
title: citation_info["title"],
|
15
|
+
authors: authors,
|
16
|
+
publication_year: publication_year,
|
17
|
+
issn: citation_info["ISSN"],
|
18
|
+
bibliography: bibliography
|
19
|
+
}
|
20
|
+
end
|
21
|
+
|
22
|
+
private
|
23
|
+
|
24
|
+
def bibliography
|
25
|
+
self.class.get("http://dx.doi.org/#{doi}", headers: bibliography_header)
|
26
|
+
.strip
|
27
|
+
.force_encoding("utf-8")
|
28
|
+
.gsub(/(https?:\/\/[\S]+)/, '<a href="\0">\0</a>')
|
29
|
+
.gsub(/(doi:[^\s|<|>]+)/, '<a href="\0">\0</a>')
|
30
|
+
.gsub('="doi:', '="https://doi.org/')
|
31
|
+
|
32
|
+
end
|
33
|
+
|
34
|
+
def doi_citation_info
|
35
|
+
self.class.get("http://dx.doi.org/#{doi}", headers: citation_header, format: :json)
|
36
|
+
end
|
37
|
+
|
38
|
+
def bibliography_header
|
39
|
+
{ "Accept" => "text/x-bibliography; style=apa" }
|
40
|
+
end
|
41
|
+
|
42
|
+
def citation_header
|
43
|
+
{ "Accept" => "application/vnd.citationstyles.csl+json;q=1.0" }
|
44
|
+
end
|
45
|
+
|
46
|
+
def authors
|
47
|
+
citation_info["author"].map do |author|
|
48
|
+
"#{author['given']} #{author['family']}"
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def publication_year
|
53
|
+
citation_info["issued"]["raw"] || citation_info["issued"]["date-parts"][0][0]
|
54
|
+
end
|
55
|
+
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
class Terrier::HtmlData
|
2
|
+
include HTTParty
|
3
|
+
attr_reader :url
|
4
|
+
|
5
|
+
PUBLICATION_META_TAGS = ["citation_journal_title", "dc.publisher", "prism.publicationName"]
|
6
|
+
TITLE_META_TAGS = ["citation_title", "dc.title", "prism.title"]
|
7
|
+
AUTHOR_META_TAGS = ["citation_author", "dc.creator", "citation_authors", "Authors", "AUTHOR", "creator"]
|
8
|
+
PUBLICATION_DATE_META_TAGS = ["citation_publication_date", "publisher", "dc.publisher"]
|
9
|
+
DOI_META_TAGS = ["citation_doi", "dc.identifier"]
|
10
|
+
LICENSING_TAGS = ["dc.rights"]
|
11
|
+
ISSN_TAGS = ["prism.issn"]
|
12
|
+
|
13
|
+
def initialize(url)
|
14
|
+
raise Terrier::UrlError, "bad url given" unless uri?(url)
|
15
|
+
@url = url
|
16
|
+
@raw = self.class.get(url)
|
17
|
+
@parsed_html = Nokogiri::HTML(@raw)
|
18
|
+
end
|
19
|
+
|
20
|
+
def data
|
21
|
+
return @_data if @_data
|
22
|
+
@_data = {
|
23
|
+
url: url,
|
24
|
+
journal: collect_meta_data(@parsed_html, PUBLICATION_META_TAGS).first,
|
25
|
+
title: collect_meta_data( @parsed_html, TITLE_META_TAGS).first,
|
26
|
+
authors: collect_meta_data( @parsed_html, AUTHOR_META_TAGS).uniq,
|
27
|
+
publication_date: collect_meta_data(@parsed_html, PUBLICATION_DATE_META_TAGS).first,
|
28
|
+
doi: collect_meta_data(@parsed_html, DOI_META_TAGS).first,
|
29
|
+
issn: nil,
|
30
|
+
zenodo_pdf: zenodo_pdf
|
31
|
+
}
|
32
|
+
|
33
|
+
@_data.merge(bibliography: bibliography(@_data))
|
34
|
+
end
|
35
|
+
|
36
|
+
private
|
37
|
+
|
38
|
+
def zenodo_pdf
|
39
|
+
@zenodo_pdf ||= /\bhttps:\/\/zenodo.org\S*pdf\b/.match(@raw).to_s
|
40
|
+
end
|
41
|
+
|
42
|
+
def bibliography(parsed_data)
|
43
|
+
"#{parsed_data[:authors].join(', ')}. (#{parsed_data[:publication_date]}). #{parsed_data[:title]}. #{parsed_data[:journal]}. #{bibliography_reference}"
|
44
|
+
end
|
45
|
+
|
46
|
+
def uri?(string)
|
47
|
+
uri = URI.parse(string)
|
48
|
+
%w( http https ).include?(uri.scheme)
|
49
|
+
rescue URI::BadURIError
|
50
|
+
false
|
51
|
+
rescue URI::InvalidURIError
|
52
|
+
false
|
53
|
+
end
|
54
|
+
|
55
|
+
def collect_meta_data(parsed_html, meta_names_array)
|
56
|
+
value = []
|
57
|
+
meta_names_array.each do |meta_name|
|
58
|
+
value = parsed_html.xpath("//meta[@name='#{meta_name}']/@content").map(&:value)
|
59
|
+
return value unless value.empty?
|
60
|
+
end
|
61
|
+
value
|
62
|
+
end
|
63
|
+
|
64
|
+
def bibliography_reference
|
65
|
+
if data[:doi]
|
66
|
+
"<a href='https://doi.org/#{data[:doi]}'>DOI: #{data[:doi]}</a>"
|
67
|
+
else
|
68
|
+
data[:url]
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
def citation_header
|
73
|
+
{ "Accept" => "application/vnd.citationstyles.csl+json;q=1.0" }
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
class Terrier::UrlError < StandardError
|
78
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
describe Terrier::DoiData do
|
3
|
+
|
4
|
+
describe 'initalizer' do
|
5
|
+
use_vcr_cassette
|
6
|
+
it "sets passed in doi as url" do
|
7
|
+
terrier = Terrier::DoiData.new('doi:10.1186/1479-5868-10-79')
|
8
|
+
expect(terrier.doi).to eq('doi:10.1186/1479-5868-10-79')
|
9
|
+
end
|
10
|
+
|
11
|
+
end
|
12
|
+
|
13
|
+
describe 'data' do
|
14
|
+
use_vcr_cassette
|
15
|
+
it "returns a collection containing the a url" do
|
16
|
+
expect(Terrier::DoiData.new('doi:10.1186/1479-5868-10-79').data[:url]).to eq('http://dx.doi.org/10.1186/1479-5868-10-79')
|
17
|
+
end
|
18
|
+
|
19
|
+
it "returns a collection containing the journal publisher" do
|
20
|
+
expect(Terrier::DoiData.new('doi:10.1186/1479-5868-10-79').data[:journal]).to eq('Springer Science + Business Media')
|
21
|
+
end
|
22
|
+
|
23
|
+
it "returns a collection containing the journal title" do
|
24
|
+
expect(Terrier::DoiData.new('doi:10.1186/1479-5868-10-79').data[:title]).to eq('The relationship between cell phone use, physical and sedentary activity, and cardiorespiratory fitness in a sample of U.S. college students')
|
25
|
+
end
|
26
|
+
|
27
|
+
it "returns a collection containing a collection of authors" do
|
28
|
+
expect(Terrier::DoiData.new('doi:10.1186/1479-5868-10-79').data[:authors]).to eq(["Andrew Lepp", "Jacob E Barkley", "Gabriel J Sanders", "Michael Rebold", "Peter Gates"])
|
29
|
+
end
|
30
|
+
|
31
|
+
it "returns a collection containing publication year" do
|
32
|
+
expect(Terrier::DoiData.new('doi:10.1186/1479-5868-10-79').data[:publication_year]).to eq(2013)
|
33
|
+
end
|
34
|
+
|
35
|
+
it "returns a collection containing issn" do
|
36
|
+
expect(Terrier::DoiData.new('doi:10.1186/1479-5868-10-79').data[:issn]).to eq(["1479-5868"])
|
37
|
+
end
|
38
|
+
|
39
|
+
it "returns a collection containing a well formed bibliography" do
|
40
|
+
expect(Terrier::DoiData.new('doi:10.1186/1479-5868-10-79').data[:bibliography]).to eq("Lepp, A., Barkley, J. E., Sanders, G. J., Rebold, M., & Gates, P. (2013). The relationship between cell phone use, physical and sedentary activity, and cardiorespiratory fitness in a sample of U.S. college students. International Journal of Behavioral Nutrition and Physical Activity, 10(1), 79. <a href=\"https://doi.org/10.1186/1479-5868-10-79\">doi:10.1186/1479-5868-10-79</a>")
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
describe Terrier::HtmlData do
|
3
|
+
|
4
|
+
describe 'initalizer' do
|
5
|
+
use_vcr_cassette
|
6
|
+
it "sets passed in arg as url" do
|
7
|
+
terrier = Terrier::HtmlData.new('http://www.example.com')
|
8
|
+
expect(terrier.url).to eq('http://www.example.com')
|
9
|
+
end
|
10
|
+
|
11
|
+
it "return string if bad url is sent in" do
|
12
|
+
expect { Terrier::HtmlData.new('htasdasd:/asdsad,sd') }.to raise_error(Terrier::UrlError)
|
13
|
+
end
|
14
|
+
|
15
|
+
end
|
16
|
+
|
17
|
+
describe 'data' do
|
18
|
+
use_vcr_cassette('htmldata_data', :record => :new_episodes)
|
19
|
+
it "returns a collection containing the original url" do
|
20
|
+
expect(Terrier::HtmlData.new('https://zenodo.org/record/32475').data[:url]).to eq('https://zenodo.org/record/32475')
|
21
|
+
end
|
22
|
+
|
23
|
+
it "returns a collection containing the journal meta tags if they exist" do
|
24
|
+
expect(Terrier::HtmlData.new('https://zenodo.org/record/46077?ln=en').data[:journal]).to eq(nil)
|
25
|
+
end
|
26
|
+
|
27
|
+
it "returns a collection containing the journal title" do
|
28
|
+
expect(Terrier::HtmlData.new('https://zenodo.org/record/32481?ln=en').data[:title]).to eq('Big and Smart Data Analytics - Possible Advantages to Clinical Practice')
|
29
|
+
end
|
30
|
+
|
31
|
+
it "returns a collection containing a collection of authors" do
|
32
|
+
expect(Terrier::HtmlData.new('https://zenodo.org/record/32481?ln=en').data[:authors]).to eq(["Di Meglio, Alberto", "Manca, Marco"])
|
33
|
+
end
|
34
|
+
|
35
|
+
it "returns a collection containing publication_date" do
|
36
|
+
expect(Terrier::HtmlData.new('https://zenodo.org/record/32481?ln=en').data[:publication_date]).to eq("2015/10/20")
|
37
|
+
end
|
38
|
+
|
39
|
+
it "returns a collection containing the doi" do
|
40
|
+
expect(Terrier::HtmlData.new('https://zenodo.org/record/32481?ln=en').data[:doi]).to eq("10.5281/zenodo.32481")
|
41
|
+
end
|
42
|
+
|
43
|
+
it "returns a collection containing the pdf if it exists on zendo" do
|
44
|
+
expect(Terrier::HtmlData.new('https://zenodo.org/record/46067?ln=en').data[:zenodo_pdf]).to eq('https://zenodo.org/record/46067/files/article.pdf')
|
45
|
+
end
|
46
|
+
|
47
|
+
|
48
|
+
it "returns a collection containing the issn" do
|
49
|
+
expect(Terrier::HtmlData.new('https://zenodo.org/record/46077?ln=en').data[:issn]).to eq(nil)
|
50
|
+
end
|
51
|
+
|
52
|
+
it "returns a collection containing a well formed bibliography" do
|
53
|
+
expect(Terrier::HtmlData.new('https://zenodo.org/record/32481?ln=en').data[:bibliography]).to eq("Di Meglio, Alberto, Manca, Marco. (2015/10/20). Big and Smart Data Analytics - Possible Advantages to Clinical Practice. . <a href='https://doi.org/10.5281/zenodo.32481'>DOI: 10.5281/zenodo.32481</a>")
|
54
|
+
end
|
55
|
+
|
56
|
+
end
|
57
|
+
|
58
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
require 'bundler/setup'
|
2
|
+
Bundler.setup
|
3
|
+
|
4
|
+
require 'terrier' # and any other gems you need
|
5
|
+
require 'vcr'
|
6
|
+
|
7
|
+
RSpec.configure do |config|
|
8
|
+
config.extend VCR::RSpec::Macros
|
9
|
+
end
|
10
|
+
|
11
|
+
VCR.configure do |config|
|
12
|
+
config.cassette_library_dir = "spec/vcr_cassettes"
|
13
|
+
config.hook_into :webmock # or :fakeweb
|
14
|
+
config.default_cassette_options = {
|
15
|
+
:re_record_interval => 604800
|
16
|
+
}
|
17
|
+
end
|
@@ -0,0 +1,86 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
describe Terrier do
|
3
|
+
|
4
|
+
describe 'initalizer' do
|
5
|
+
use_vcr_cassette
|
6
|
+
it "sets passed in arg to indentifier" do
|
7
|
+
allow_any_instance_of(Terrier).to receive(:data)
|
8
|
+
instance = Terrier.new("blah")
|
9
|
+
expect(instance.identifier).to eq("blah")
|
10
|
+
end
|
11
|
+
|
12
|
+
it "sets citation_data to {}" do
|
13
|
+
allow_any_instance_of(Terrier).to receive(:data)
|
14
|
+
instance = Terrier.new("blah")
|
15
|
+
expect(instance.citation_data).to eq({})
|
16
|
+
end
|
17
|
+
|
18
|
+
it 'calls the internal method data' do
|
19
|
+
expect_any_instance_of(Terrier).to receive(:data)
|
20
|
+
Terrier.new("blah")
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
24
|
+
|
25
|
+
describe 'data' do
|
26
|
+
use_vcr_cassette('data', :record => :new_episodes)
|
27
|
+
|
28
|
+
it "calls html_data if indentifier is a url" do
|
29
|
+
expect_any_instance_of(Terrier).to receive(:html_data)
|
30
|
+
Terrier.new('https://zenodo.org/record/32475')
|
31
|
+
end
|
32
|
+
|
33
|
+
it "does not call html_data if it's a doi indentifier" do
|
34
|
+
expect_any_instance_of(Terrier).not_to receive(:html_data)
|
35
|
+
Terrier.new('doi:10.1186/1479-5868-10-79')
|
36
|
+
end
|
37
|
+
|
38
|
+
it "calls doi_data if indentifier is a doi indentifier" do
|
39
|
+
expect_any_instance_of(Terrier).to receive(:doi_data).with('doi:10.1186/1479-5868-10-79')
|
40
|
+
Terrier.new('doi:10.1186/1479-5868-10-79')
|
41
|
+
end
|
42
|
+
|
43
|
+
it "does not call doi_data if it's a url and doi is not found" do
|
44
|
+
allow_any_instance_of(Terrier).to receive(:citation_data).and_return({})
|
45
|
+
expect_any_instance_of(Terrier).not_to receive(:doi_data)
|
46
|
+
Terrier.new('https://zenodo.org/record/32475')
|
47
|
+
end
|
48
|
+
|
49
|
+
it "calls doi_data if it's a url and doi is found" do
|
50
|
+
expect_any_instance_of(Terrier).to receive(:doi_data)
|
51
|
+
Terrier.new('https://zenodo.org/record/32475')
|
52
|
+
end
|
53
|
+
|
54
|
+
it 'merges doi data if found from url' do
|
55
|
+
|
56
|
+
http_data = {
|
57
|
+
url: "http://www.example.com/paper/path",
|
58
|
+
journal: "The Winnower",
|
59
|
+
publication_date: "2015/07/09",
|
60
|
+
title: "When Publishers Aren't getting it done.",
|
61
|
+
authors: ["Neil Christensen", "Stacy Konkiel", "Martin Paul Eve", "Joshua Nicholson", "Lenny Teytelman"],
|
62
|
+
doi: "12345/winn.12345",
|
63
|
+
issn: nil,
|
64
|
+
zenodo_pdf: "https://zenodo.org/record/22796/files/14th_South_African_Congress_on_Biochemistry_and_Molecular_Biology.pdf",
|
65
|
+
bibliography: "Neil Christensen, Stacy Konkiel, Martin Paul Eve, Joshua Nicholson, Lenny Teytelman. (2015/07/09). When Publishers Aren't Getting It Done. The Winnower. DOI: 12345/winn.12345"
|
66
|
+
}
|
67
|
+
allow_any_instance_of(Terrier::HtmlData).to receive(:data).and_return(http_data)
|
68
|
+
|
69
|
+
doi_data = {
|
70
|
+
url: "http://dx.doi.org/10.15200/winn.140832.20404",
|
71
|
+
issn: ["2373-146X"],
|
72
|
+
journal: "The Winnower, LLC",
|
73
|
+
publication_year: 2014,
|
74
|
+
title: "The R-Factor: A Measure of Scientific Veracity",
|
75
|
+
authors: ["Joshua Nicholson", "Yuri Lazebnik"],
|
76
|
+
bibliography: "Neil Christensen, Stacy Konkiel, Martin Paul Eve, Joshua Nicholson, Lenny Teytelman. (2015/07/09). When Publishers Aren't Getting It Done. The Winnower. DOI: 12345/winn.12345"
|
77
|
+
}
|
78
|
+
|
79
|
+
allow_any_instance_of(Terrier::DoiData).to receive(:data).and_return(doi_data)
|
80
|
+
expect(Terrier.new('https://zenodo.org/record/32475').citation_data).to eq( http_data.merge(doi_data)
|
81
|
+
)
|
82
|
+
end
|
83
|
+
|
84
|
+
end
|
85
|
+
|
86
|
+
end
|
data/terrier.gemspec
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'terrier/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "terrier"
|
8
|
+
spec.homepage = 'https://github.com/thewinnower/terrier'
|
9
|
+
spec.version = Terrier::VERSION
|
10
|
+
spec.authors = ["Matthew Bergman", "The Winnower"]
|
11
|
+
spec.email = ["matthew.z.bergman@gmail.com"]
|
12
|
+
spec.summary = %q{Terrier: Import Doi and Zenodo Papers}
|
13
|
+
spec.description = %q{Terrier is used to retrieve metadata of scholarly works from a variety of sources. Terrier can be used to pull metadata on any article that has been issued a digital object identifier (DOI) or that is hosted on the Zenodo Repository, maintained by CERN.}
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files -z`.split("\x0")
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_dependency('httparty', '~> 0')
|
22
|
+
spec.add_dependency 'nokogiri', "~> 1.6.7"
|
23
|
+
spec.add_development_dependency "bundler", "~> 1.7"
|
24
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
25
|
+
spec.add_development_dependency "rspec", "~> 3.0"
|
26
|
+
spec.add_development_dependency "vcr", "~> 2.9.3"
|
27
|
+
spec.add_development_dependency "webmock", "~> 1.24.1"
|
28
|
+
|
29
|
+
end
|
metadata
ADDED
@@ -0,0 +1,166 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: terrier
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Matthew Bergman
|
8
|
+
- The Winnower
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2016-03-02 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: httparty
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
requirements:
|
18
|
+
- - "~>"
|
19
|
+
- !ruby/object:Gem::Version
|
20
|
+
version: '0'
|
21
|
+
type: :runtime
|
22
|
+
prerelease: false
|
23
|
+
version_requirements: !ruby/object:Gem::Requirement
|
24
|
+
requirements:
|
25
|
+
- - "~>"
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
version: '0'
|
28
|
+
- !ruby/object:Gem::Dependency
|
29
|
+
name: nokogiri
|
30
|
+
requirement: !ruby/object:Gem::Requirement
|
31
|
+
requirements:
|
32
|
+
- - "~>"
|
33
|
+
- !ruby/object:Gem::Version
|
34
|
+
version: 1.6.7
|
35
|
+
type: :runtime
|
36
|
+
prerelease: false
|
37
|
+
version_requirements: !ruby/object:Gem::Requirement
|
38
|
+
requirements:
|
39
|
+
- - "~>"
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
version: 1.6.7
|
42
|
+
- !ruby/object:Gem::Dependency
|
43
|
+
name: bundler
|
44
|
+
requirement: !ruby/object:Gem::Requirement
|
45
|
+
requirements:
|
46
|
+
- - "~>"
|
47
|
+
- !ruby/object:Gem::Version
|
48
|
+
version: '1.7'
|
49
|
+
type: :development
|
50
|
+
prerelease: false
|
51
|
+
version_requirements: !ruby/object:Gem::Requirement
|
52
|
+
requirements:
|
53
|
+
- - "~>"
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
version: '1.7'
|
56
|
+
- !ruby/object:Gem::Dependency
|
57
|
+
name: rake
|
58
|
+
requirement: !ruby/object:Gem::Requirement
|
59
|
+
requirements:
|
60
|
+
- - "~>"
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
version: '10.0'
|
63
|
+
type: :development
|
64
|
+
prerelease: false
|
65
|
+
version_requirements: !ruby/object:Gem::Requirement
|
66
|
+
requirements:
|
67
|
+
- - "~>"
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '10.0'
|
70
|
+
- !ruby/object:Gem::Dependency
|
71
|
+
name: rspec
|
72
|
+
requirement: !ruby/object:Gem::Requirement
|
73
|
+
requirements:
|
74
|
+
- - "~>"
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: '3.0'
|
77
|
+
type: :development
|
78
|
+
prerelease: false
|
79
|
+
version_requirements: !ruby/object:Gem::Requirement
|
80
|
+
requirements:
|
81
|
+
- - "~>"
|
82
|
+
- !ruby/object:Gem::Version
|
83
|
+
version: '3.0'
|
84
|
+
- !ruby/object:Gem::Dependency
|
85
|
+
name: vcr
|
86
|
+
requirement: !ruby/object:Gem::Requirement
|
87
|
+
requirements:
|
88
|
+
- - "~>"
|
89
|
+
- !ruby/object:Gem::Version
|
90
|
+
version: 2.9.3
|
91
|
+
type: :development
|
92
|
+
prerelease: false
|
93
|
+
version_requirements: !ruby/object:Gem::Requirement
|
94
|
+
requirements:
|
95
|
+
- - "~>"
|
96
|
+
- !ruby/object:Gem::Version
|
97
|
+
version: 2.9.3
|
98
|
+
- !ruby/object:Gem::Dependency
|
99
|
+
name: webmock
|
100
|
+
requirement: !ruby/object:Gem::Requirement
|
101
|
+
requirements:
|
102
|
+
- - "~>"
|
103
|
+
- !ruby/object:Gem::Version
|
104
|
+
version: 1.24.1
|
105
|
+
type: :development
|
106
|
+
prerelease: false
|
107
|
+
version_requirements: !ruby/object:Gem::Requirement
|
108
|
+
requirements:
|
109
|
+
- - "~>"
|
110
|
+
- !ruby/object:Gem::Version
|
111
|
+
version: 1.24.1
|
112
|
+
description: Terrier is used to retrieve metadata of scholarly works from a variety
|
113
|
+
of sources. Terrier can be used to pull metadata on any article that has been issued
|
114
|
+
a digital object identifier (DOI) or that is hosted on the Zenodo Repository, maintained
|
115
|
+
by CERN.
|
116
|
+
email:
|
117
|
+
- matthew.z.bergman@gmail.com
|
118
|
+
executables: []
|
119
|
+
extensions: []
|
120
|
+
extra_rdoc_files: []
|
121
|
+
files:
|
122
|
+
- ".gitignore"
|
123
|
+
- ".rspec"
|
124
|
+
- ".travis.yml"
|
125
|
+
- Gemfile
|
126
|
+
- LICENSE.txt
|
127
|
+
- README.md
|
128
|
+
- Rakefile
|
129
|
+
- lib/terrier.rb
|
130
|
+
- lib/terrier/doi_data.rb
|
131
|
+
- lib/terrier/html_data.rb
|
132
|
+
- lib/terrier/version.rb
|
133
|
+
- spec/doi_data_spec.rb
|
134
|
+
- spec/html_data_spec.rb
|
135
|
+
- spec/spec_helper.rb
|
136
|
+
- spec/terrier_spec.rb
|
137
|
+
- terrier.gemspec
|
138
|
+
homepage: https://github.com/thewinnower/terrier
|
139
|
+
licenses:
|
140
|
+
- MIT
|
141
|
+
metadata: {}
|
142
|
+
post_install_message:
|
143
|
+
rdoc_options: []
|
144
|
+
require_paths:
|
145
|
+
- lib
|
146
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
147
|
+
requirements:
|
148
|
+
- - ">="
|
149
|
+
- !ruby/object:Gem::Version
|
150
|
+
version: '0'
|
151
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
152
|
+
requirements:
|
153
|
+
- - ">="
|
154
|
+
- !ruby/object:Gem::Version
|
155
|
+
version: '0'
|
156
|
+
requirements: []
|
157
|
+
rubyforge_project:
|
158
|
+
rubygems_version: 2.4.5
|
159
|
+
signing_key:
|
160
|
+
specification_version: 4
|
161
|
+
summary: 'Terrier: Import Doi and Zenodo Papers'
|
162
|
+
test_files:
|
163
|
+
- spec/doi_data_spec.rb
|
164
|
+
- spec/html_data_spec.rb
|
165
|
+
- spec/spec_helper.rb
|
166
|
+
- spec/terrier_spec.rb
|