taxpub 0.0.4 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. checksums.yaml +4 -4
  2. data/lib/taxpub.rb +82 -43
  3. data/lib/taxpub/version.rb +1 -1
  4. metadata +1 -1
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e75adf8b3be4f3dfae9e7aefccf3af753585a623
4
- data.tar.gz: 8e4405dec8b17b4a52cd7b0f14ec4632be8e1d86
3
+ metadata.gz: 10f95f45417a3ad2aaa954898a158021959abbd4
4
+ data.tar.gz: 50766bdde4dd782953cde156956b5c31e79f7b72
5
5
  SHA512:
6
- metadata.gz: 7a43ea0275cd2df38b4f4400b97c5b79a6992ad9221fe37e7400bb21ff1da8e276170172727c35f4dc56336f29057af22a1552397938b7a442459a8358703f81
7
- data.tar.gz: aa50f82daa9a4cc361bb8708d66e43aa53a34881e98bcf65999fc1e241089e4967448747d2fb18a8efa3edd7822fe3b058e256dd391691d7680ed17d9341c908
6
+ metadata.gz: 227032df578f11ba9f5476d0620a8a4fdf384a7f88163269dd6854cbd8d32501567b8fca8d651b8192908b14921df0e03a8f5258c94c301b130c210561055d57
7
+ data.tar.gz: 3f8a1240779210bf0f741069537f79da477c5c85dc45f7b0f0e0939dfe8d0a176802f08a72b8986e5cd22ea4d17004703a451b1166c0abfd70da8c0ece5a16b4
@@ -6,6 +6,7 @@ require "taxpub/version"
6
6
  require "nokogiri"
7
7
  require "open-uri"
8
8
  require "set"
9
+ require "byebug"
9
10
 
10
11
  class TaxPub
11
12
 
@@ -64,6 +65,7 @@ class TaxPub
64
65
  @doc = File.open(file_path) { |f| Nokogiri::XML(f) }
65
66
  end
66
67
  Validator.validate_nokogiri(@doc)
68
+ @doc
67
69
  end
68
70
 
69
71
  ##
@@ -73,10 +75,17 @@ class TaxPub
73
75
  @doc
74
76
  end
75
77
 
78
+ def type
79
+ Validator.validate_nokogiri(@doc)
80
+ xpath = "/article/@article-type"
81
+ @doc.xpath(xpath).text
82
+ end
83
+
76
84
  ##
77
85
  # Get the raw text content of the Nokogiri document
78
86
  #
79
87
  def content
88
+ Validator.validate_nokogiri(@doc)
80
89
  Utils.clean_text(@doc.text)
81
90
  end
82
91
 
@@ -85,7 +94,8 @@ class TaxPub
85
94
  #
86
95
  def doi
87
96
  Validator.validate_nokogiri(@doc)
88
- Utils.expand_doi(@doc.xpath("//*/article-meta/article-id[@pub-id-type='doi']").text)
97
+ xpath = "//*/article-meta/article-id[@pub-id-type='doi']"
98
+ Utils.expand_doi(@doc.xpath(xpath).text)
89
99
  end
90
100
 
91
101
  ##
@@ -93,7 +103,8 @@ class TaxPub
93
103
  #
94
104
  def title
95
105
  Validator.validate_nokogiri(@doc)
96
- t = @doc.xpath("//*/article-meta/title-group/article-title").text
106
+ xpath = "//*/article-meta/title-group/article-title"
107
+ t = @doc.xpath(xpath).text
97
108
  Utils.clean_text(t)
98
109
  end
99
110
 
@@ -102,7 +113,8 @@ class TaxPub
102
113
  #
103
114
  def abstract
104
115
  Validator.validate_nokogiri(@doc)
105
- a = @doc.xpath("//*/article-meta/abstract").text
116
+ xpath = "//*/article-meta/abstract"
117
+ a = @doc.xpath(xpath).text
106
118
  Utils.clean_text(a)
107
119
  end
108
120
 
@@ -111,7 +123,8 @@ class TaxPub
111
123
  #
112
124
  def keywords
113
125
  Validator.validate_nokogiri(@doc)
114
- @doc.xpath("//*/article-meta/kwd-group/kwd")
126
+ xpath = "//*/article-meta/kwd-group/kwd"
127
+ @doc.xpath(xpath)
115
128
  .map{|a| Utils.clean_text(a.text)}
116
129
  end
117
130
 
@@ -121,7 +134,8 @@ class TaxPub
121
134
  def authors
122
135
  Validator.validate_nokogiri(@doc)
123
136
  data = []
124
- @doc.xpath("//*/contrib[@contrib-type='author']").each do |author|
137
+ xpath = "//*/contrib[@contrib-type='author']"
138
+ @doc.xpath(xpath).each do |author|
125
139
  affiliations = []
126
140
  author.xpath("xref/@rid").each do |rid|
127
141
  xpath = "//*/aff[@id='#{rid}']/addr-line"
@@ -142,25 +156,6 @@ class TaxPub
142
156
  data
143
157
  end
144
158
 
145
- ##
146
- # Get the conference part of a proceeding
147
- #
148
- def conference_part
149
- Validator.validate_nokogiri(@doc)
150
- xpath = "//*/subj-group[@subj-group-type='conference-part']/subject"
151
- coll = @doc.xpath(xpath).text
152
- Utils.clean_text(coll)
153
- end
154
-
155
- ##
156
- # Get the presenting author of a proceeding
157
- #
158
- def presenting_author
159
- Validator.validate_nokogiri(@doc)
160
- xpath = "//*/sec[@sec-type='Presenting author']/p"
161
- author = @doc.xpath(xpath).text
162
- Utils.clean_text(author)
163
- end
164
159
 
165
160
  ##
166
161
  # Get the corresponding author
@@ -173,34 +168,52 @@ class TaxPub
173
168
  end
174
169
 
175
170
  ##
176
- # Get the ranked taxa
171
+ # Get the conference metadata
177
172
  #
178
- def ranked_taxa
173
+ def conference
179
174
  Validator.validate_nokogiri(@doc)
180
- names = Set.new
181
- @doc.xpath("//*//tp:taxon-name").each do |taxon|
182
- tp = {}
183
- taxon.children.each do |child|
184
- next if !child.has_attribute?("taxon-name-part-type")
185
- rank = child.attributes["taxon-name-part-type"].value.to_sym
186
- if child.has_attribute?("reg")
187
- tp[rank] = child.attributes["reg"].value
188
- else
189
- tp[rank] = child.text
190
- end
191
- end
192
- names.add(tp)
175
+ xpath = "//*/conference"
176
+ conf = @doc.xpath(xpath)
177
+ return {} if conf.empty?
178
+ session_xpath = "//*/subj-group[@subj-group-type='conference-part']/subject"
179
+ session = Utils.clean_text(@doc.xpath(session_xpath).text)
180
+ presenter_xpath = "//*/sec[@sec-type='Presenting author']/p"
181
+ presenter = Utils.clean_text(@doc.xpath(presenter_xpath).text)
182
+ {
183
+ date: Utils.clean_text(conf.at_xpath("conf-date").text),
184
+ name: Utils.clean_text(conf.at_xpath("conf-name").text),
185
+ acronym: Utils.clean_text(conf.at_xpath("conf-acronym").text),
186
+ location: Utils.clean_text(conf.at_xpath("conf-loc").text),
187
+ theme: Utils.clean_text(conf.at_xpath("conf-theme").text),
188
+ session: session,
189
+ presenter: presenter
190
+ }
191
+ end
192
+
193
+ ##
194
+ # Get the taxa
195
+ #
196
+ # == Attributes
197
+ #
198
+ # * +hsh+ - Hash { with_ranks: true } for scientific names returned with ranks as keys
199
+ #
200
+ def scientific_names(hsh = {})
201
+ if hsh[:with_ranks]
202
+ scientific_names_with_ranks
203
+ else
204
+ scientific_names_with_ranks.map{ |s| s.values.join(" ") }
193
205
  end
194
- names.to_a
195
206
  end
196
207
 
208
+
197
209
  ##
198
210
  # Get occurrences with dwc keys
199
211
  #
200
212
  def occurrences
201
213
  Validator.validate_nokogiri(@doc)
202
214
  data = []
203
- @doc.xpath("//*/list[@list-content='occurrences']/list-item").each do |occ|
215
+ xpath = "//*/list[@list-content='occurrences']/list-item"
216
+ @doc.xpath(xpath).each do |occ|
204
217
  obj = {}
205
218
  occ.xpath("*/named-content").each do |dwc|
206
219
  prefix = dwc.attributes["content-type"].text.gsub(/dwc\:/, "")
@@ -217,7 +230,8 @@ class TaxPub
217
230
  def figures
218
231
  Validator.validate_nokogiri(@doc)
219
232
  data = []
220
- @doc.xpath("//*/fig").each do |fig|
233
+ xpath = "//*/fig"
234
+ @doc.xpath(xpath).each do |fig|
221
235
  data << {
222
236
  label: Utils.clean_text(fig.xpath("label").text),
223
237
  caption: Utils.clean_text(fig.xpath("caption").text),
@@ -239,4 +253,29 @@ class TaxPub
239
253
  @doc.xpath(xpath).map{ |r| Reference.parse(r) }
240
254
  end
241
255
 
242
- end
256
+ private
257
+
258
+ ##
259
+ # Get the ranked taxa
260
+ #
261
+ def scientific_names_with_ranks
262
+ Validator.validate_nokogiri(@doc)
263
+ names = Set.new
264
+ xpath = "//*//tp:taxon-name"
265
+ @doc.xpath(xpath).each do |taxon|
266
+ tp = {}
267
+ taxon.children.each do |child|
268
+ next if !child.has_attribute?("taxon-name-part-type")
269
+ rank = child.attributes["taxon-name-part-type"].value.to_sym
270
+ if child.has_attribute?("reg")
271
+ tp[rank] = child.attributes["reg"].value
272
+ else
273
+ tp[rank] = child.text
274
+ end
275
+ end
276
+ names.add(tp)
277
+ end
278
+ names.to_a
279
+ end
280
+
281
+ end
@@ -1,5 +1,5 @@
1
1
  class TaxPub
2
- VERSION = "0.0.4"
2
+ VERSION = "0.1.0"
3
3
 
4
4
  def self.version
5
5
  VERSION
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: taxpub
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - David P. Shorthouse