taxpub 0.0.4 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (4) hide show
  1. checksums.yaml +4 -4
  2. data/lib/taxpub.rb +82 -43
  3. data/lib/taxpub/version.rb +1 -1
  4. metadata +1 -1
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e75adf8b3be4f3dfae9e7aefccf3af753585a623
4
- data.tar.gz: 8e4405dec8b17b4a52cd7b0f14ec4632be8e1d86
3
+ metadata.gz: 10f95f45417a3ad2aaa954898a158021959abbd4
4
+ data.tar.gz: 50766bdde4dd782953cde156956b5c31e79f7b72
5
5
  SHA512:
6
- metadata.gz: 7a43ea0275cd2df38b4f4400b97c5b79a6992ad9221fe37e7400bb21ff1da8e276170172727c35f4dc56336f29057af22a1552397938b7a442459a8358703f81
7
- data.tar.gz: aa50f82daa9a4cc361bb8708d66e43aa53a34881e98bcf65999fc1e241089e4967448747d2fb18a8efa3edd7822fe3b058e256dd391691d7680ed17d9341c908
6
+ metadata.gz: 227032df578f11ba9f5476d0620a8a4fdf384a7f88163269dd6854cbd8d32501567b8fca8d651b8192908b14921df0e03a8f5258c94c301b130c210561055d57
7
+ data.tar.gz: 3f8a1240779210bf0f741069537f79da477c5c85dc45f7b0f0e0939dfe8d0a176802f08a72b8986e5cd22ea4d17004703a451b1166c0abfd70da8c0ece5a16b4
@@ -6,6 +6,7 @@ require "taxpub/version"
6
6
  require "nokogiri"
7
7
  require "open-uri"
8
8
  require "set"
9
+ require "byebug"
9
10
 
10
11
  class TaxPub
11
12
 
@@ -64,6 +65,7 @@ class TaxPub
64
65
  @doc = File.open(file_path) { |f| Nokogiri::XML(f) }
65
66
  end
66
67
  Validator.validate_nokogiri(@doc)
68
+ @doc
67
69
  end
68
70
 
69
71
  ##
@@ -73,10 +75,17 @@ class TaxPub
73
75
  @doc
74
76
  end
75
77
 
78
+ def type
79
+ Validator.validate_nokogiri(@doc)
80
+ xpath = "/article/@article-type"
81
+ @doc.xpath(xpath).text
82
+ end
83
+
76
84
  ##
77
85
  # Get the raw text content of the Nokogiri document
78
86
  #
79
87
  def content
88
+ Validator.validate_nokogiri(@doc)
80
89
  Utils.clean_text(@doc.text)
81
90
  end
82
91
 
@@ -85,7 +94,8 @@ class TaxPub
85
94
  #
86
95
  def doi
87
96
  Validator.validate_nokogiri(@doc)
88
- Utils.expand_doi(@doc.xpath("//*/article-meta/article-id[@pub-id-type='doi']").text)
97
+ xpath = "//*/article-meta/article-id[@pub-id-type='doi']"
98
+ Utils.expand_doi(@doc.xpath(xpath).text)
89
99
  end
90
100
 
91
101
  ##
@@ -93,7 +103,8 @@ class TaxPub
93
103
  #
94
104
  def title
95
105
  Validator.validate_nokogiri(@doc)
96
- t = @doc.xpath("//*/article-meta/title-group/article-title").text
106
+ xpath = "//*/article-meta/title-group/article-title"
107
+ t = @doc.xpath(xpath).text
97
108
  Utils.clean_text(t)
98
109
  end
99
110
 
@@ -102,7 +113,8 @@ class TaxPub
102
113
  #
103
114
  def abstract
104
115
  Validator.validate_nokogiri(@doc)
105
- a = @doc.xpath("//*/article-meta/abstract").text
116
+ xpath = "//*/article-meta/abstract"
117
+ a = @doc.xpath(xpath).text
106
118
  Utils.clean_text(a)
107
119
  end
108
120
 
@@ -111,7 +123,8 @@ class TaxPub
111
123
  #
112
124
  def keywords
113
125
  Validator.validate_nokogiri(@doc)
114
- @doc.xpath("//*/article-meta/kwd-group/kwd")
126
+ xpath = "//*/article-meta/kwd-group/kwd"
127
+ @doc.xpath(xpath)
115
128
  .map{|a| Utils.clean_text(a.text)}
116
129
  end
117
130
 
@@ -121,7 +134,8 @@ class TaxPub
121
134
  def authors
122
135
  Validator.validate_nokogiri(@doc)
123
136
  data = []
124
- @doc.xpath("//*/contrib[@contrib-type='author']").each do |author|
137
+ xpath = "//*/contrib[@contrib-type='author']"
138
+ @doc.xpath(xpath).each do |author|
125
139
  affiliations = []
126
140
  author.xpath("xref/@rid").each do |rid|
127
141
  xpath = "//*/aff[@id='#{rid}']/addr-line"
@@ -142,25 +156,6 @@ class TaxPub
142
156
  data
143
157
  end
144
158
 
145
- ##
146
- # Get the conference part of a proceeding
147
- #
148
- def conference_part
149
- Validator.validate_nokogiri(@doc)
150
- xpath = "//*/subj-group[@subj-group-type='conference-part']/subject"
151
- coll = @doc.xpath(xpath).text
152
- Utils.clean_text(coll)
153
- end
154
-
155
- ##
156
- # Get the presenting author of a proceeding
157
- #
158
- def presenting_author
159
- Validator.validate_nokogiri(@doc)
160
- xpath = "//*/sec[@sec-type='Presenting author']/p"
161
- author = @doc.xpath(xpath).text
162
- Utils.clean_text(author)
163
- end
164
159
 
165
160
  ##
166
161
  # Get the corresponding author
@@ -173,34 +168,52 @@ class TaxPub
173
168
  end
174
169
 
175
170
  ##
176
- # Get the ranked taxa
171
+ # Get the conference metadata
177
172
  #
178
- def ranked_taxa
173
+ def conference
179
174
  Validator.validate_nokogiri(@doc)
180
- names = Set.new
181
- @doc.xpath("//*//tp:taxon-name").each do |taxon|
182
- tp = {}
183
- taxon.children.each do |child|
184
- next if !child.has_attribute?("taxon-name-part-type")
185
- rank = child.attributes["taxon-name-part-type"].value.to_sym
186
- if child.has_attribute?("reg")
187
- tp[rank] = child.attributes["reg"].value
188
- else
189
- tp[rank] = child.text
190
- end
191
- end
192
- names.add(tp)
175
+ xpath = "//*/conference"
176
+ conf = @doc.xpath(xpath)
177
+ return {} if conf.empty?
178
+ session_xpath = "//*/subj-group[@subj-group-type='conference-part']/subject"
179
+ session = Utils.clean_text(@doc.xpath(session_xpath).text)
180
+ presenter_xpath = "//*/sec[@sec-type='Presenting author']/p"
181
+ presenter = Utils.clean_text(@doc.xpath(presenter_xpath).text)
182
+ {
183
+ date: Utils.clean_text(conf.at_xpath("conf-date").text),
184
+ name: Utils.clean_text(conf.at_xpath("conf-name").text),
185
+ acronym: Utils.clean_text(conf.at_xpath("conf-acronym").text),
186
+ location: Utils.clean_text(conf.at_xpath("conf-loc").text),
187
+ theme: Utils.clean_text(conf.at_xpath("conf-theme").text),
188
+ session: session,
189
+ presenter: presenter
190
+ }
191
+ end
192
+
193
+ ##
194
+ # Get the taxa
195
+ #
196
+ # == Attributes
197
+ #
198
+ # * +hsh+ - Hash { with_ranks: true } for scientific names returned with ranks as keys
199
+ #
200
+ def scientific_names(hsh = {})
201
+ if hsh[:with_ranks]
202
+ scientific_names_with_ranks
203
+ else
204
+ scientific_names_with_ranks.map{ |s| s.values.join(" ") }
193
205
  end
194
- names.to_a
195
206
  end
196
207
 
208
+
197
209
  ##
198
210
  # Get occurrences with dwc keys
199
211
  #
200
212
  def occurrences
201
213
  Validator.validate_nokogiri(@doc)
202
214
  data = []
203
- @doc.xpath("//*/list[@list-content='occurrences']/list-item").each do |occ|
215
+ xpath = "//*/list[@list-content='occurrences']/list-item"
216
+ @doc.xpath(xpath).each do |occ|
204
217
  obj = {}
205
218
  occ.xpath("*/named-content").each do |dwc|
206
219
  prefix = dwc.attributes["content-type"].text.gsub(/dwc\:/, "")
@@ -217,7 +230,8 @@ class TaxPub
217
230
  def figures
218
231
  Validator.validate_nokogiri(@doc)
219
232
  data = []
220
- @doc.xpath("//*/fig").each do |fig|
233
+ xpath = "//*/fig"
234
+ @doc.xpath(xpath).each do |fig|
221
235
  data << {
222
236
  label: Utils.clean_text(fig.xpath("label").text),
223
237
  caption: Utils.clean_text(fig.xpath("caption").text),
@@ -239,4 +253,29 @@ class TaxPub
239
253
  @doc.xpath(xpath).map{ |r| Reference.parse(r) }
240
254
  end
241
255
 
242
- end
256
+ private
257
+
258
+ ##
259
+ # Get the ranked taxa
260
+ #
261
+ def scientific_names_with_ranks
262
+ Validator.validate_nokogiri(@doc)
263
+ names = Set.new
264
+ xpath = "//*//tp:taxon-name"
265
+ @doc.xpath(xpath).each do |taxon|
266
+ tp = {}
267
+ taxon.children.each do |child|
268
+ next if !child.has_attribute?("taxon-name-part-type")
269
+ rank = child.attributes["taxon-name-part-type"].value.to_sym
270
+ if child.has_attribute?("reg")
271
+ tp[rank] = child.attributes["reg"].value
272
+ else
273
+ tp[rank] = child.text
274
+ end
275
+ end
276
+ names.add(tp)
277
+ end
278
+ names.to_a
279
+ end
280
+
281
+ end
@@ -1,5 +1,5 @@
1
1
  class TaxPub
2
- VERSION = "0.0.4"
2
+ VERSION = "0.1.0"
3
3
 
4
4
  def self.version
5
5
  VERSION
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: taxpub
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - David P. Shorthouse