taxpub 0.0.4 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/taxpub.rb +82 -43
- data/lib/taxpub/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 10f95f45417a3ad2aaa954898a158021959abbd4
|
4
|
+
data.tar.gz: 50766bdde4dd782953cde156956b5c31e79f7b72
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 227032df578f11ba9f5476d0620a8a4fdf384a7f88163269dd6854cbd8d32501567b8fca8d651b8192908b14921df0e03a8f5258c94c301b130c210561055d57
|
7
|
+
data.tar.gz: 3f8a1240779210bf0f741069537f79da477c5c85dc45f7b0f0e0939dfe8d0a176802f08a72b8986e5cd22ea4d17004703a451b1166c0abfd70da8c0ece5a16b4
|
data/lib/taxpub.rb
CHANGED
@@ -6,6 +6,7 @@ require "taxpub/version"
|
|
6
6
|
require "nokogiri"
|
7
7
|
require "open-uri"
|
8
8
|
require "set"
|
9
|
+
require "byebug"
|
9
10
|
|
10
11
|
class TaxPub
|
11
12
|
|
@@ -64,6 +65,7 @@ class TaxPub
|
|
64
65
|
@doc = File.open(file_path) { |f| Nokogiri::XML(f) }
|
65
66
|
end
|
66
67
|
Validator.validate_nokogiri(@doc)
|
68
|
+
@doc
|
67
69
|
end
|
68
70
|
|
69
71
|
##
|
@@ -73,10 +75,17 @@ class TaxPub
|
|
73
75
|
@doc
|
74
76
|
end
|
75
77
|
|
78
|
+
def type
|
79
|
+
Validator.validate_nokogiri(@doc)
|
80
|
+
xpath = "/article/@article-type"
|
81
|
+
@doc.xpath(xpath).text
|
82
|
+
end
|
83
|
+
|
76
84
|
##
|
77
85
|
# Get the raw text content of the Nokogiri document
|
78
86
|
#
|
79
87
|
def content
|
88
|
+
Validator.validate_nokogiri(@doc)
|
80
89
|
Utils.clean_text(@doc.text)
|
81
90
|
end
|
82
91
|
|
@@ -85,7 +94,8 @@ class TaxPub
|
|
85
94
|
#
|
86
95
|
def doi
|
87
96
|
Validator.validate_nokogiri(@doc)
|
88
|
-
|
97
|
+
xpath = "//*/article-meta/article-id[@pub-id-type='doi']"
|
98
|
+
Utils.expand_doi(@doc.xpath(xpath).text)
|
89
99
|
end
|
90
100
|
|
91
101
|
##
|
@@ -93,7 +103,8 @@ class TaxPub
|
|
93
103
|
#
|
94
104
|
def title
|
95
105
|
Validator.validate_nokogiri(@doc)
|
96
|
-
|
106
|
+
xpath = "//*/article-meta/title-group/article-title"
|
107
|
+
t = @doc.xpath(xpath).text
|
97
108
|
Utils.clean_text(t)
|
98
109
|
end
|
99
110
|
|
@@ -102,7 +113,8 @@ class TaxPub
|
|
102
113
|
#
|
103
114
|
def abstract
|
104
115
|
Validator.validate_nokogiri(@doc)
|
105
|
-
|
116
|
+
xpath = "//*/article-meta/abstract"
|
117
|
+
a = @doc.xpath(xpath).text
|
106
118
|
Utils.clean_text(a)
|
107
119
|
end
|
108
120
|
|
@@ -111,7 +123,8 @@ class TaxPub
|
|
111
123
|
#
|
112
124
|
def keywords
|
113
125
|
Validator.validate_nokogiri(@doc)
|
114
|
-
|
126
|
+
xpath = "//*/article-meta/kwd-group/kwd"
|
127
|
+
@doc.xpath(xpath)
|
115
128
|
.map{|a| Utils.clean_text(a.text)}
|
116
129
|
end
|
117
130
|
|
@@ -121,7 +134,8 @@ class TaxPub
|
|
121
134
|
def authors
|
122
135
|
Validator.validate_nokogiri(@doc)
|
123
136
|
data = []
|
124
|
-
|
137
|
+
xpath = "//*/contrib[@contrib-type='author']"
|
138
|
+
@doc.xpath(xpath).each do |author|
|
125
139
|
affiliations = []
|
126
140
|
author.xpath("xref/@rid").each do |rid|
|
127
141
|
xpath = "//*/aff[@id='#{rid}']/addr-line"
|
@@ -142,25 +156,6 @@ class TaxPub
|
|
142
156
|
data
|
143
157
|
end
|
144
158
|
|
145
|
-
##
|
146
|
-
# Get the conference part of a proceeding
|
147
|
-
#
|
148
|
-
def conference_part
|
149
|
-
Validator.validate_nokogiri(@doc)
|
150
|
-
xpath = "//*/subj-group[@subj-group-type='conference-part']/subject"
|
151
|
-
coll = @doc.xpath(xpath).text
|
152
|
-
Utils.clean_text(coll)
|
153
|
-
end
|
154
|
-
|
155
|
-
##
|
156
|
-
# Get the presenting author of a proceeding
|
157
|
-
#
|
158
|
-
def presenting_author
|
159
|
-
Validator.validate_nokogiri(@doc)
|
160
|
-
xpath = "//*/sec[@sec-type='Presenting author']/p"
|
161
|
-
author = @doc.xpath(xpath).text
|
162
|
-
Utils.clean_text(author)
|
163
|
-
end
|
164
159
|
|
165
160
|
##
|
166
161
|
# Get the corresponding author
|
@@ -173,34 +168,52 @@ class TaxPub
|
|
173
168
|
end
|
174
169
|
|
175
170
|
##
|
176
|
-
# Get the
|
171
|
+
# Get the conference metadata
|
177
172
|
#
|
178
|
-
def
|
173
|
+
def conference
|
179
174
|
Validator.validate_nokogiri(@doc)
|
180
|
-
|
181
|
-
@doc.xpath(
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
175
|
+
xpath = "//*/conference"
|
176
|
+
conf = @doc.xpath(xpath)
|
177
|
+
return {} if conf.empty?
|
178
|
+
session_xpath = "//*/subj-group[@subj-group-type='conference-part']/subject"
|
179
|
+
session = Utils.clean_text(@doc.xpath(session_xpath).text)
|
180
|
+
presenter_xpath = "//*/sec[@sec-type='Presenting author']/p"
|
181
|
+
presenter = Utils.clean_text(@doc.xpath(presenter_xpath).text)
|
182
|
+
{
|
183
|
+
date: Utils.clean_text(conf.at_xpath("conf-date").text),
|
184
|
+
name: Utils.clean_text(conf.at_xpath("conf-name").text),
|
185
|
+
acronym: Utils.clean_text(conf.at_xpath("conf-acronym").text),
|
186
|
+
location: Utils.clean_text(conf.at_xpath("conf-loc").text),
|
187
|
+
theme: Utils.clean_text(conf.at_xpath("conf-theme").text),
|
188
|
+
session: session,
|
189
|
+
presenter: presenter
|
190
|
+
}
|
191
|
+
end
|
192
|
+
|
193
|
+
##
|
194
|
+
# Get the taxa
|
195
|
+
#
|
196
|
+
# == Attributes
|
197
|
+
#
|
198
|
+
# * +hsh+ - Hash { with_ranks: true } for scientific names returned with ranks as keys
|
199
|
+
#
|
200
|
+
def scientific_names(hsh = {})
|
201
|
+
if hsh[:with_ranks]
|
202
|
+
scientific_names_with_ranks
|
203
|
+
else
|
204
|
+
scientific_names_with_ranks.map{ |s| s.values.join(" ") }
|
193
205
|
end
|
194
|
-
names.to_a
|
195
206
|
end
|
196
207
|
|
208
|
+
|
197
209
|
##
|
198
210
|
# Get occurrences with dwc keys
|
199
211
|
#
|
200
212
|
def occurrences
|
201
213
|
Validator.validate_nokogiri(@doc)
|
202
214
|
data = []
|
203
|
-
|
215
|
+
xpath = "//*/list[@list-content='occurrences']/list-item"
|
216
|
+
@doc.xpath(xpath).each do |occ|
|
204
217
|
obj = {}
|
205
218
|
occ.xpath("*/named-content").each do |dwc|
|
206
219
|
prefix = dwc.attributes["content-type"].text.gsub(/dwc\:/, "")
|
@@ -217,7 +230,8 @@ class TaxPub
|
|
217
230
|
def figures
|
218
231
|
Validator.validate_nokogiri(@doc)
|
219
232
|
data = []
|
220
|
-
|
233
|
+
xpath = "//*/fig"
|
234
|
+
@doc.xpath(xpath).each do |fig|
|
221
235
|
data << {
|
222
236
|
label: Utils.clean_text(fig.xpath("label").text),
|
223
237
|
caption: Utils.clean_text(fig.xpath("caption").text),
|
@@ -239,4 +253,29 @@ class TaxPub
|
|
239
253
|
@doc.xpath(xpath).map{ |r| Reference.parse(r) }
|
240
254
|
end
|
241
255
|
|
242
|
-
|
256
|
+
private
|
257
|
+
|
258
|
+
##
|
259
|
+
# Get the ranked taxa
|
260
|
+
#
|
261
|
+
def scientific_names_with_ranks
|
262
|
+
Validator.validate_nokogiri(@doc)
|
263
|
+
names = Set.new
|
264
|
+
xpath = "//*//tp:taxon-name"
|
265
|
+
@doc.xpath(xpath).each do |taxon|
|
266
|
+
tp = {}
|
267
|
+
taxon.children.each do |child|
|
268
|
+
next if !child.has_attribute?("taxon-name-part-type")
|
269
|
+
rank = child.attributes["taxon-name-part-type"].value.to_sym
|
270
|
+
if child.has_attribute?("reg")
|
271
|
+
tp[rank] = child.attributes["reg"].value
|
272
|
+
else
|
273
|
+
tp[rank] = child.text
|
274
|
+
end
|
275
|
+
end
|
276
|
+
names.add(tp)
|
277
|
+
end
|
278
|
+
names.to_a
|
279
|
+
end
|
280
|
+
|
281
|
+
end
|
data/lib/taxpub/version.rb
CHANGED