plos 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +84 -2
- data/lib/plos.rb +9 -0
- data/lib/plos/affiliation.rb +15 -0
- data/lib/plos/article.rb +86 -0
- data/lib/plos/article_ref.rb +12 -21
- data/lib/plos/article_set.rb +9 -0
- data/lib/plos/client.rb +17 -4
- data/lib/plos/contributor.rb +25 -0
- data/lib/plos/figure.rb +55 -0
- data/lib/plos/name.rb +19 -0
- data/lib/plos/reference.rb +38 -0
- data/lib/plos/section.rb +36 -0
- data/lib/plos/version.rb +1 -1
- data/lib/plos/xml_helpers.rb +37 -0
- data/spec/article1.xml +999 -0
- data/spec/article1_spec.rb +186 -0
- data/spec/article2.xml +1110 -0
- data/spec/article2_spec.rb +51 -0
- metadata +21 -4
data/README.md
CHANGED
@@ -18,6 +18,10 @@ Or install it yourself as:
|
|
18
18
|
|
19
19
|
## Usage
|
20
20
|
|
21
|
+
### Searching
|
22
|
+
|
23
|
+
You can perform a basic search using the ```PLOS::Client.search(query, rows, start)``` method. The second two parameters are optional. That method returns a ```PLOS::ArtcleSet``` object. ```ArticleSet``` inherits from Array and includes some meta-information about the search. The following example show the information that's available:
|
24
|
+
|
21
25
|
```ruby
|
22
26
|
require 'plos'
|
23
27
|
|
@@ -27,8 +31,86 @@ hits.each do |hit|
|
|
27
31
|
puts "#{hit.score} - #{hit.title} - #{hit.article_url}"
|
28
32
|
end
|
29
33
|
|
30
|
-
|
31
|
-
|
34
|
+
hits.status # Return status of the query (0 is success)
|
35
|
+
hits.time # The amount of time the query took (in ms)
|
36
|
+
hits.num_found # Total number of results
|
37
|
+
hits.max_score # Score of the closest matching document
|
38
|
+
hits.start # Index of the first result
|
39
|
+
|
40
|
+
xml = hits[2].article_xml
|
41
|
+
puts hits[2].citation
|
42
|
+
```
|
43
|
+
|
44
|
+
Change the number of results starting position. The following retrieves 50 results starting at result 100:
|
45
|
+
|
46
|
+
```ruby
|
47
|
+
require 'plos'
|
48
|
+
client = PLOS::Client.new(ENV["API_KEY"])
|
49
|
+
hits = client.search("xenograft", 50, 100)
|
50
|
+
```
|
51
|
+
|
52
|
+
Retrieve all results (paged). The following retrieves all results 200 - 300:
|
53
|
+
|
54
|
+
```ruby
|
55
|
+
require 'plos'
|
56
|
+
client = PLOS::Client.new(ENV["API_KEY"])
|
57
|
+
hits = client.all(100, 200)
|
58
|
+
```
|
59
|
+
|
60
|
+
### Getting the Article Details
|
61
|
+
|
62
|
+
You can get the full article from the ```ArticleRef``` in a number of ways.
|
63
|
+
|
64
|
+
You can get the raw xml content using ```ArticleRef.article_content```. For example, the following returns a string:
|
65
|
+
|
66
|
+
```ruby
|
67
|
+
require 'plos'
|
68
|
+
client = PLOS::Client.new(ENV["API_KEY"])
|
69
|
+
hits = client.search("xenograft")
|
70
|
+
str = hits.first.article_content
|
71
|
+
```
|
72
|
+
|
73
|
+
You may also get the parsed xml content using ```ArticleRef.article_xml```. For example, the following returns a ```Nokogiri::XML::Document```:
|
74
|
+
|
75
|
+
```ruby
|
76
|
+
require 'plos'
|
77
|
+
client = PLOS::Client.new(ENV["API_KEY"])
|
78
|
+
hits = client.search("xenograft")
|
79
|
+
xml_doc = hits.first.article_xml
|
80
|
+
```
|
81
|
+
|
82
|
+
Finally you may get an ```Article``` object using ```ArticleRef.article```. For example, the following returns a ```PLOS::Article```:
|
83
|
+
|
84
|
+
```ruby
|
85
|
+
require 'plos'
|
86
|
+
client = PLOS::Client.new(ENV["API_KEY"])
|
87
|
+
hits = client.search("xenograft")
|
88
|
+
article = hits.first.article
|
89
|
+
```
|
90
|
+
|
91
|
+
### Working with Articles
|
92
|
+
|
93
|
+
Once you have an article, you can get a number of pieces of information from that article. The following will give you an idea of the type of information that's available:
|
94
|
+
|
95
|
+
```ruby
|
96
|
+
require 'plos'
|
97
|
+
client = PLOS::Client.new(ENV["API_KEY"])
|
98
|
+
hits = client.search("xenograft")
|
99
|
+
article = hits.first.article
|
100
|
+
|
101
|
+
article.article_title # The title of the article
|
102
|
+
article.article_ids # Returns a Hash of ids. For instance {"doi"=>"##.###/journal.pxxx.###", "publisher-id"=>"###-ABC-###"}
|
103
|
+
article.journal_title # The title of the journal that published the article
|
104
|
+
article.journal_ids # Returns a Hash of ids. Keys could include publisher-id, publisher, allenpress-id, nlm-ta, pmc, etc.
|
105
|
+
article.issns # Returns a Hash of ISSN numbers, keys could include ppub or epub among others.
|
106
|
+
article.affiliations # Returns an Array of PLOS::Affiliation objects representing the organizations involved in this research.
|
107
|
+
article.contributors # Returns an Array of PLOS::Contributor objects representing all the people involved in this research, including authors and editors.
|
108
|
+
article.authors # Returns an Array of PLOS::Name objects, one for each author of this research
|
109
|
+
article.editors # Returns an Array of PLOS::Name objects, one for each editor of this research
|
110
|
+
article.figures # Returns an Array of PLOS::Figure objects representing the figures in this article.
|
111
|
+
article.references # Returns an Array of PLOS::Reference objects representing all the articles this article references.
|
112
|
+
article.sections # Returns an Array of PLOS::Section objects containing the actual content of the article.
|
113
|
+
article.named_content # Returns an Array of Hash objects. Each representing a piece of "named-content". Named content is often used to separate genes from other text.
|
32
114
|
```
|
33
115
|
|
34
116
|
## Contributing
|
data/lib/plos.rb
CHANGED
@@ -1,3 +1,12 @@
|
|
1
|
+
require "plos/xml_helpers"
|
2
|
+
require "plos/affiliation"
|
1
3
|
require "plos/article_ref"
|
4
|
+
require "plos/article_set"
|
5
|
+
require "plos/article"
|
6
|
+
require "plos/contributor"
|
2
7
|
require "plos/client"
|
8
|
+
require "plos/figure"
|
9
|
+
require "plos/reference"
|
10
|
+
require "plos/name"
|
11
|
+
require "plos/section"
|
3
12
|
require "plos/version"
|
@@ -0,0 +1,15 @@
|
|
1
|
+
module PLOS
|
2
|
+
class Affiliation
|
3
|
+
include XmlHelpers
|
4
|
+
|
5
|
+
attr_accessor :id
|
6
|
+
attr_accessor :label
|
7
|
+
attr_accessor :address
|
8
|
+
|
9
|
+
def initialize(node)
|
10
|
+
self.id = node.attr("id")
|
11
|
+
self.label = tag_value(node, "label")
|
12
|
+
self.address = tag_value(node, "addr-line")
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
data/lib/plos/article.rb
ADDED
@@ -0,0 +1,86 @@
|
|
1
|
+
module PLOS
|
2
|
+
class Article
|
3
|
+
include XmlHelpers
|
4
|
+
|
5
|
+
attr_accessor :article_title
|
6
|
+
attr_accessor :article_ids
|
7
|
+
attr_accessor :journal_title
|
8
|
+
attr_accessor :journal_ids
|
9
|
+
attr_accessor :issns
|
10
|
+
attr_writer :affiliations
|
11
|
+
attr_writer :contributors
|
12
|
+
attr_writer :figures
|
13
|
+
attr_writer :references
|
14
|
+
attr_writer :sections
|
15
|
+
attr_writer :named_content
|
16
|
+
|
17
|
+
def initialize(node)
|
18
|
+
self.article_title = tag_value(node.search("title-group"), "article-title")
|
19
|
+
self.journal_title = tag_value(node.search("journal-title-group"), "journal-title")
|
20
|
+
|
21
|
+
self.issns = nodes_to_hash(node.search("journal-meta/issn"), "pub-type")
|
22
|
+
self.journal_ids = nodes_to_hash(node.search("journal-meta/journal-id"), "journal-id-type")
|
23
|
+
self.article_ids = nodes_to_hash(node.search("article-meta/article-id"), "pub-id-type")
|
24
|
+
|
25
|
+
node.search("aff").each do |aff_node|
|
26
|
+
self.affiliations << PLOS::Affiliation.new(aff_node)
|
27
|
+
end
|
28
|
+
|
29
|
+
node.search("contrib").each do |contrib_node|
|
30
|
+
self.contributors << PLOS::Contributor.new(contrib_node)
|
31
|
+
end
|
32
|
+
|
33
|
+
node.search("fig").each do |fig_node|
|
34
|
+
self.figures << PLOS::Figure.new(fig_node)
|
35
|
+
end
|
36
|
+
|
37
|
+
node.search("sec").each do |section_node|
|
38
|
+
self.sections << PLOS::Section.new(section_node)
|
39
|
+
end
|
40
|
+
|
41
|
+
node.search("ref").each do |ref_node|
|
42
|
+
self.references << PLOS::Reference.new(ref_node)
|
43
|
+
end
|
44
|
+
|
45
|
+
node.search("named-content").each do |content_node|
|
46
|
+
type = content_node.attr("content-type")
|
47
|
+
value = content_node.text
|
48
|
+
named_content << {:type => type, :value => value}
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def authors
|
53
|
+
contributors.collect { |contrib| contrib.name if contrib.type == "author" }.compact
|
54
|
+
end
|
55
|
+
|
56
|
+
def editors
|
57
|
+
contributors.collect { |contrib| contrib.name if contrib.type == "editor" }.compact
|
58
|
+
end
|
59
|
+
|
60
|
+
def affiliations
|
61
|
+
@affiliations ||= []
|
62
|
+
end
|
63
|
+
|
64
|
+
def contributors
|
65
|
+
@contributors ||= []
|
66
|
+
end
|
67
|
+
|
68
|
+
def figures
|
69
|
+
@figures ||= []
|
70
|
+
end
|
71
|
+
|
72
|
+
def references
|
73
|
+
@references ||= []
|
74
|
+
end
|
75
|
+
|
76
|
+
def sections
|
77
|
+
@sections ||= []
|
78
|
+
end
|
79
|
+
|
80
|
+
def named_content
|
81
|
+
@named_content ||= []
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
# <named-content content-type="gene" xlink:type="simple">5′- AGGACGCAAGGAGGGTTTG -3′</named-content>
|
data/lib/plos/article_ref.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
|
-
require "rest_client"
|
2
|
-
|
3
1
|
module PLOS
|
4
2
|
class ArticleRef
|
3
|
+
include PLOS::XmlHelpers
|
4
|
+
|
5
5
|
attr_accessor :client
|
6
6
|
attr_accessor :score
|
7
7
|
attr_accessor :type
|
@@ -17,32 +17,23 @@ module PLOS
|
|
17
17
|
alias :title_display= :title=
|
18
18
|
alias :publication_date= :published_at=
|
19
19
|
|
20
|
-
def self.parse_node(node, obj=nil)
|
21
|
-
value = case(node.name)
|
22
|
-
when "arr"
|
23
|
-
node.children.collect { |child| parse_node(child) }
|
24
|
-
when "date"
|
25
|
-
DateTime.parse(node.content)
|
26
|
-
when "float"
|
27
|
-
node.content.to_f
|
28
|
-
else
|
29
|
-
node.content
|
30
|
-
end
|
31
|
-
if node.attr("name") && obj
|
32
|
-
obj.send("#{node.attr("name")}=",value)
|
33
|
-
end
|
34
|
-
value
|
35
|
-
end
|
36
|
-
|
37
20
|
def initialize(client, node)
|
38
21
|
self.client = client
|
39
22
|
node.children.each do |child|
|
40
|
-
|
23
|
+
parse_node(child, self)
|
41
24
|
end
|
42
25
|
end
|
43
26
|
|
27
|
+
def article
|
28
|
+
@article ||= PLOS::Article.new(article_xml)
|
29
|
+
end
|
30
|
+
|
44
31
|
def article_xml
|
45
|
-
Nokogiri::XML(
|
32
|
+
Nokogiri::XML(article_content)
|
33
|
+
end
|
34
|
+
|
35
|
+
def article_content
|
36
|
+
RestClient.get(article_url)
|
46
37
|
end
|
47
38
|
|
48
39
|
def citation(format="RIS")
|
data/lib/plos/client.rb
CHANGED
@@ -16,13 +16,26 @@ module PLOS
|
|
16
16
|
end
|
17
17
|
|
18
18
|
def search(query, rows=50, start=0)
|
19
|
-
result =
|
19
|
+
result = PLOS::ArticleSet.new
|
20
20
|
doc = execute( search_url, { :q => query, :rows => rows, :start => start } )
|
21
21
|
if doc && doc.root
|
22
22
|
doc.root.children.each do |child|
|
23
|
-
|
24
|
-
|
25
|
-
|
23
|
+
if child.name == "lst"
|
24
|
+
child.children.each do |int|
|
25
|
+
case int.attr("name")
|
26
|
+
when "status"
|
27
|
+
result.status = int.text
|
28
|
+
when "QTime"
|
29
|
+
result.time = int.text.to_i
|
30
|
+
end
|
31
|
+
end
|
32
|
+
elsif child.name == "result"
|
33
|
+
result.num_found = child.attr("numFound").to_i if child.attr("numFound")
|
34
|
+
result.start = child.attr("start").to_i if child.attr("start")
|
35
|
+
result.max_score = child.attr("maxScore").to_f if child.attr("maxScore")
|
36
|
+
child.children.each do |doc|
|
37
|
+
result << PLOS::ArticleRef.new(self, doc)
|
38
|
+
end
|
26
39
|
end
|
27
40
|
end
|
28
41
|
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
module PLOS
|
2
|
+
class Contributor
|
3
|
+
include XmlHelpers
|
4
|
+
|
5
|
+
attr_accessor :name
|
6
|
+
attr_accessor :type
|
7
|
+
attr_accessor :role
|
8
|
+
attr_writer :xrefs
|
9
|
+
|
10
|
+
def initialize(node)
|
11
|
+
self.type = node.attr("contrib-type")
|
12
|
+
self.name = PLOS::Name.new(node.search("name").first)
|
13
|
+
self.role = tag_value(node, "role")
|
14
|
+
node.search("xref").each do |xref|
|
15
|
+
type = xref.attr("ref-type")
|
16
|
+
id = xref.attr("rid")
|
17
|
+
self.xrefs << { :type => type, :id => id }
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def xrefs
|
22
|
+
@xrefs ||= []
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
data/lib/plos/figure.rb
ADDED
@@ -0,0 +1,55 @@
|
|
1
|
+
require "rest_client"
|
2
|
+
|
3
|
+
module PLOS
|
4
|
+
class Figure
|
5
|
+
include XmlHelpers
|
6
|
+
|
7
|
+
attr_accessor :id
|
8
|
+
attr_accessor :position
|
9
|
+
attr_accessor :label
|
10
|
+
attr_writer :caption
|
11
|
+
attr_writer :graphic
|
12
|
+
attr_writer :object
|
13
|
+
|
14
|
+
def initialize(node)
|
15
|
+
self.id = node.attr("id")
|
16
|
+
self.position = node.attr("position")
|
17
|
+
self.label = tag_value(node, "label")
|
18
|
+
caption_node = node.search("caption").first
|
19
|
+
if caption_node
|
20
|
+
self.caption[:title] = tag_value(caption_node, "title")
|
21
|
+
self.caption[:body] = tag_value(caption_node, "p")
|
22
|
+
end
|
23
|
+
|
24
|
+
graphic_node = node.search("graphic").first
|
25
|
+
if graphic_node
|
26
|
+
mimetype = graphic_node.attr("mimetype")
|
27
|
+
position = graphic_node.attr("position")
|
28
|
+
link = graphic_node.attr("xlink:href")
|
29
|
+
self.graphic[:mimetype] = mimetype if mimetype
|
30
|
+
self.graphic[:position] = position if position
|
31
|
+
self.graphic[:link] = link if link
|
32
|
+
end
|
33
|
+
|
34
|
+
object_node = node.search("object-id").first
|
35
|
+
if object_node
|
36
|
+
type = object_node.attr("pub-id-type")
|
37
|
+
value = object_node.text
|
38
|
+
self.object[:type] = type if type
|
39
|
+
self.object[:value] = value if value
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def caption
|
44
|
+
@caption ||= {}
|
45
|
+
end
|
46
|
+
|
47
|
+
def graphic
|
48
|
+
@graphic ||= {}
|
49
|
+
end
|
50
|
+
|
51
|
+
def object
|
52
|
+
@object ||= {}
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
data/lib/plos/name.rb
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
module PLOS
|
2
|
+
class Name
|
3
|
+
include XmlHelpers
|
4
|
+
|
5
|
+
attr_accessor :style
|
6
|
+
attr_accessor :given_name
|
7
|
+
attr_accessor :surname
|
8
|
+
|
9
|
+
def initialize(node)
|
10
|
+
self.style = node.attr("name-style") if node.attr("name-style")
|
11
|
+
self.given_name = tag_value(node, "given-names")
|
12
|
+
self.surname = tag_value(node, "surname")
|
13
|
+
end
|
14
|
+
|
15
|
+
def to_s
|
16
|
+
"#{given_name} #{surname}"
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
module PLOS
|
2
|
+
class Reference
|
3
|
+
include PLOS::XmlHelpers
|
4
|
+
|
5
|
+
attr_accessor :id
|
6
|
+
attr_accessor :label
|
7
|
+
attr_accessor :year
|
8
|
+
attr_accessor :type
|
9
|
+
attr_accessor :title
|
10
|
+
attr_accessor :source
|
11
|
+
attr_accessor :volume
|
12
|
+
attr_accessor :first_page
|
13
|
+
attr_accessor :last_page
|
14
|
+
attr_writer :authors
|
15
|
+
|
16
|
+
def initialize(node)
|
17
|
+
self.id = node.attr("id") if node.attr("id")
|
18
|
+
self.label = tag_value(node, "label")
|
19
|
+
citation_node = node.search("element-citation")
|
20
|
+
if citation_node
|
21
|
+
self.type = citation_node.attr("publication-type").value if citation_node.attr("publication-type")
|
22
|
+
self.year = tag_value(citation_node, "year")
|
23
|
+
self.title = tag_value(citation_node, "article-title")
|
24
|
+
self.source = tag_value(citation_node, "source")
|
25
|
+
self.volume = tag_value(citation_node, "volume")
|
26
|
+
self.first_page = tag_value(citation_node, "fpage")
|
27
|
+
self.last_page = tag_value(citation_node, "lpage")
|
28
|
+
citation_node.search("name").each do |name_node|
|
29
|
+
self.authors << PLOS::Name.new(name_node)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def authors
|
35
|
+
@authors ||= []
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|