taxpub 0.0.4 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/taxpub.rb +82 -43
- data/lib/taxpub/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 10f95f45417a3ad2aaa954898a158021959abbd4
|
4
|
+
data.tar.gz: 50766bdde4dd782953cde156956b5c31e79f7b72
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 227032df578f11ba9f5476d0620a8a4fdf384a7f88163269dd6854cbd8d32501567b8fca8d651b8192908b14921df0e03a8f5258c94c301b130c210561055d57
|
7
|
+
data.tar.gz: 3f8a1240779210bf0f741069537f79da477c5c85dc45f7b0f0e0939dfe8d0a176802f08a72b8986e5cd22ea4d17004703a451b1166c0abfd70da8c0ece5a16b4
|
data/lib/taxpub.rb
CHANGED
@@ -6,6 +6,7 @@ require "taxpub/version"
|
|
6
6
|
require "nokogiri"
|
7
7
|
require "open-uri"
|
8
8
|
require "set"
|
9
|
+
require "byebug"
|
9
10
|
|
10
11
|
class TaxPub
|
11
12
|
|
@@ -64,6 +65,7 @@ class TaxPub
|
|
64
65
|
@doc = File.open(file_path) { |f| Nokogiri::XML(f) }
|
65
66
|
end
|
66
67
|
Validator.validate_nokogiri(@doc)
|
68
|
+
@doc
|
67
69
|
end
|
68
70
|
|
69
71
|
##
|
@@ -73,10 +75,17 @@ class TaxPub
|
|
73
75
|
@doc
|
74
76
|
end
|
75
77
|
|
78
|
+
def type
|
79
|
+
Validator.validate_nokogiri(@doc)
|
80
|
+
xpath = "/article/@article-type"
|
81
|
+
@doc.xpath(xpath).text
|
82
|
+
end
|
83
|
+
|
76
84
|
##
|
77
85
|
# Get the raw text content of the Nokogiri document
|
78
86
|
#
|
79
87
|
def content
|
88
|
+
Validator.validate_nokogiri(@doc)
|
80
89
|
Utils.clean_text(@doc.text)
|
81
90
|
end
|
82
91
|
|
@@ -85,7 +94,8 @@ class TaxPub
|
|
85
94
|
#
|
86
95
|
def doi
|
87
96
|
Validator.validate_nokogiri(@doc)
|
88
|
-
|
97
|
+
xpath = "//*/article-meta/article-id[@pub-id-type='doi']"
|
98
|
+
Utils.expand_doi(@doc.xpath(xpath).text)
|
89
99
|
end
|
90
100
|
|
91
101
|
##
|
@@ -93,7 +103,8 @@ class TaxPub
|
|
93
103
|
#
|
94
104
|
def title
|
95
105
|
Validator.validate_nokogiri(@doc)
|
96
|
-
|
106
|
+
xpath = "//*/article-meta/title-group/article-title"
|
107
|
+
t = @doc.xpath(xpath).text
|
97
108
|
Utils.clean_text(t)
|
98
109
|
end
|
99
110
|
|
@@ -102,7 +113,8 @@ class TaxPub
|
|
102
113
|
#
|
103
114
|
def abstract
|
104
115
|
Validator.validate_nokogiri(@doc)
|
105
|
-
|
116
|
+
xpath = "//*/article-meta/abstract"
|
117
|
+
a = @doc.xpath(xpath).text
|
106
118
|
Utils.clean_text(a)
|
107
119
|
end
|
108
120
|
|
@@ -111,7 +123,8 @@ class TaxPub
|
|
111
123
|
#
|
112
124
|
def keywords
|
113
125
|
Validator.validate_nokogiri(@doc)
|
114
|
-
|
126
|
+
xpath = "//*/article-meta/kwd-group/kwd"
|
127
|
+
@doc.xpath(xpath)
|
115
128
|
.map{|a| Utils.clean_text(a.text)}
|
116
129
|
end
|
117
130
|
|
@@ -121,7 +134,8 @@ class TaxPub
|
|
121
134
|
def authors
|
122
135
|
Validator.validate_nokogiri(@doc)
|
123
136
|
data = []
|
124
|
-
|
137
|
+
xpath = "//*/contrib[@contrib-type='author']"
|
138
|
+
@doc.xpath(xpath).each do |author|
|
125
139
|
affiliations = []
|
126
140
|
author.xpath("xref/@rid").each do |rid|
|
127
141
|
xpath = "//*/aff[@id='#{rid}']/addr-line"
|
@@ -142,25 +156,6 @@ class TaxPub
|
|
142
156
|
data
|
143
157
|
end
|
144
158
|
|
145
|
-
##
|
146
|
-
# Get the conference part of a proceeding
|
147
|
-
#
|
148
|
-
def conference_part
|
149
|
-
Validator.validate_nokogiri(@doc)
|
150
|
-
xpath = "//*/subj-group[@subj-group-type='conference-part']/subject"
|
151
|
-
coll = @doc.xpath(xpath).text
|
152
|
-
Utils.clean_text(coll)
|
153
|
-
end
|
154
|
-
|
155
|
-
##
|
156
|
-
# Get the presenting author of a proceeding
|
157
|
-
#
|
158
|
-
def presenting_author
|
159
|
-
Validator.validate_nokogiri(@doc)
|
160
|
-
xpath = "//*/sec[@sec-type='Presenting author']/p"
|
161
|
-
author = @doc.xpath(xpath).text
|
162
|
-
Utils.clean_text(author)
|
163
|
-
end
|
164
159
|
|
165
160
|
##
|
166
161
|
# Get the corresponding author
|
@@ -173,34 +168,52 @@ class TaxPub
|
|
173
168
|
end
|
174
169
|
|
175
170
|
##
|
176
|
-
# Get the
|
171
|
+
# Get the conference metadata
|
177
172
|
#
|
178
|
-
def
|
173
|
+
def conference
|
179
174
|
Validator.validate_nokogiri(@doc)
|
180
|
-
|
181
|
-
@doc.xpath(
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
175
|
+
xpath = "//*/conference"
|
176
|
+
conf = @doc.xpath(xpath)
|
177
|
+
return {} if conf.empty?
|
178
|
+
session_xpath = "//*/subj-group[@subj-group-type='conference-part']/subject"
|
179
|
+
session = Utils.clean_text(@doc.xpath(session_xpath).text)
|
180
|
+
presenter_xpath = "//*/sec[@sec-type='Presenting author']/p"
|
181
|
+
presenter = Utils.clean_text(@doc.xpath(presenter_xpath).text)
|
182
|
+
{
|
183
|
+
date: Utils.clean_text(conf.at_xpath("conf-date").text),
|
184
|
+
name: Utils.clean_text(conf.at_xpath("conf-name").text),
|
185
|
+
acronym: Utils.clean_text(conf.at_xpath("conf-acronym").text),
|
186
|
+
location: Utils.clean_text(conf.at_xpath("conf-loc").text),
|
187
|
+
theme: Utils.clean_text(conf.at_xpath("conf-theme").text),
|
188
|
+
session: session,
|
189
|
+
presenter: presenter
|
190
|
+
}
|
191
|
+
end
|
192
|
+
|
193
|
+
##
|
194
|
+
# Get the taxa
|
195
|
+
#
|
196
|
+
# == Attributes
|
197
|
+
#
|
198
|
+
# * +hsh+ - Hash { with_ranks: true } for scientific names returned with ranks as keys
|
199
|
+
#
|
200
|
+
def scientific_names(hsh = {})
|
201
|
+
if hsh[:with_ranks]
|
202
|
+
scientific_names_with_ranks
|
203
|
+
else
|
204
|
+
scientific_names_with_ranks.map{ |s| s.values.join(" ") }
|
193
205
|
end
|
194
|
-
names.to_a
|
195
206
|
end
|
196
207
|
|
208
|
+
|
197
209
|
##
|
198
210
|
# Get occurrences with dwc keys
|
199
211
|
#
|
200
212
|
def occurrences
|
201
213
|
Validator.validate_nokogiri(@doc)
|
202
214
|
data = []
|
203
|
-
|
215
|
+
xpath = "//*/list[@list-content='occurrences']/list-item"
|
216
|
+
@doc.xpath(xpath).each do |occ|
|
204
217
|
obj = {}
|
205
218
|
occ.xpath("*/named-content").each do |dwc|
|
206
219
|
prefix = dwc.attributes["content-type"].text.gsub(/dwc\:/, "")
|
@@ -217,7 +230,8 @@ class TaxPub
|
|
217
230
|
def figures
|
218
231
|
Validator.validate_nokogiri(@doc)
|
219
232
|
data = []
|
220
|
-
|
233
|
+
xpath = "//*/fig"
|
234
|
+
@doc.xpath(xpath).each do |fig|
|
221
235
|
data << {
|
222
236
|
label: Utils.clean_text(fig.xpath("label").text),
|
223
237
|
caption: Utils.clean_text(fig.xpath("caption").text),
|
@@ -239,4 +253,29 @@ class TaxPub
|
|
239
253
|
@doc.xpath(xpath).map{ |r| Reference.parse(r) }
|
240
254
|
end
|
241
255
|
|
242
|
-
|
256
|
+
private
|
257
|
+
|
258
|
+
##
|
259
|
+
# Get the ranked taxa
|
260
|
+
#
|
261
|
+
def scientific_names_with_ranks
|
262
|
+
Validator.validate_nokogiri(@doc)
|
263
|
+
names = Set.new
|
264
|
+
xpath = "//*//tp:taxon-name"
|
265
|
+
@doc.xpath(xpath).each do |taxon|
|
266
|
+
tp = {}
|
267
|
+
taxon.children.each do |child|
|
268
|
+
next if !child.has_attribute?("taxon-name-part-type")
|
269
|
+
rank = child.attributes["taxon-name-part-type"].value.to_sym
|
270
|
+
if child.has_attribute?("reg")
|
271
|
+
tp[rank] = child.attributes["reg"].value
|
272
|
+
else
|
273
|
+
tp[rank] = child.text
|
274
|
+
end
|
275
|
+
end
|
276
|
+
names.add(tp)
|
277
|
+
end
|
278
|
+
names.to_a
|
279
|
+
end
|
280
|
+
|
281
|
+
end
|
data/lib/taxpub/version.rb
CHANGED