relaton-bipm 1.19.2 → 1.19.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/relaton_bipm/data_outcomes_parser.rb +22 -7
- data/lib/relaton_bipm/rawdata_bipm_metrologia/affiliations.rb +111 -0
- data/lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb +59 -26
- data/lib/relaton_bipm/rawdata_bipm_metrologia/fetcher.rb +2 -1
- data/lib/relaton_bipm/version.rb +1 -1
- metadata +7 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3c9261622d7be459730444f639a2b7f91a6606600a8aa1d0e4806036a766235c
|
4
|
+
data.tar.gz: 0b285754154788c58a7cfb812c92c5cc54331ea6f52e41dd71c129e59ffa5660
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 573c4e55c1d0cc0ecfc2f96c3dccf31f0a85790a433c9332db3d0f8478bcc2d32330fb7727187102045378cbf9555ede82e05871c54024e04267a16b40629372
|
7
|
+
data.tar.gz: 61f92667f0154f11345ca3bec967a2fee19ac2d4795ef81ffc50561f5c554000501e5f0d26a337b903a3be2a2f76cd637026cbb7daaf4206e032bd609aef960e
|
@@ -88,8 +88,7 @@ module RelatonBipm
|
|
88
88
|
if @data_fetcher.files.include?(path) && part
|
89
89
|
add_part hash, part
|
90
90
|
item = RelatonBipm::BipmBibliographicItem.new(**hash)
|
91
|
-
|
92
|
-
has_part_item = RelatonBipm::BipmBibliographicItem.from_hash(yaml)
|
91
|
+
has_part_item = parse_file path
|
93
92
|
has_part_item.relation << RelatonBib::DocumentRelation.new(type: "partOf", bibitem: item)
|
94
93
|
@data_fetcher.write_file path, has_part_item, warn_duplicate: false
|
95
94
|
path = File.join dir, "#{num}-#{part}.#{@data_fetcher.ext}"
|
@@ -111,6 +110,17 @@ module RelatonBipm
|
|
111
110
|
fetch_resolution body: body, en: en, fr: fr, dir: dir, src: src, num: num
|
112
111
|
end
|
113
112
|
|
113
|
+
def parse_file(path)
|
114
|
+
case @data_fetcher.format
|
115
|
+
when "yaml"
|
116
|
+
yaml = RelatonBib.parse_yaml(File.read(path, encoding: "UTF-8"), [Date])
|
117
|
+
RelatonBipm::BipmBibliographicItem.from_hash(yaml)
|
118
|
+
when "xml"
|
119
|
+
xml = File.read(path, encoding: "UTF-8")
|
120
|
+
RelatonBipm::XMLParser.from_xml xml
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
114
124
|
#
|
115
125
|
# Read English and French files
|
116
126
|
#
|
@@ -348,8 +358,12 @@ module RelatonBipm
|
|
348
358
|
#
|
349
359
|
# @return [Hash] title
|
350
360
|
#
|
351
|
-
def create_title(content, language)
|
352
|
-
|
361
|
+
def create_title(content, language, format = "text/plain")
|
362
|
+
if language == "fr"
|
363
|
+
content.sub!(/(\d+)(e)/, '\1<sup>\2</sup>')
|
364
|
+
format = "text/html" if content.match?(/<sup>/)
|
365
|
+
end
|
366
|
+
{ content: content, language: language, script: "Latn", format: format }
|
353
367
|
end
|
354
368
|
|
355
369
|
#
|
@@ -541,11 +555,12 @@ module RelatonBipm
|
|
541
555
|
end
|
542
556
|
|
543
557
|
def create_meeting_docids(en_id)
|
544
|
-
fr_id = en_id.sub(/(\d+)(?:st|nd|rd|th)/, '\1e').sub("Meeting", "
|
558
|
+
fr_id = en_id.sub(/(\d+)(?:st|nd|rd|th)/, '\1e').sub("Meeting", "réunion")
|
559
|
+
fr_id_sup = fr_id.sub(/(\d+)(e)/, '\1<sup>\2</sup>')
|
545
560
|
[
|
546
561
|
make_docid(id: en_id, type: "BIPM", primary: true, language: "en", script: "Latn"),
|
547
|
-
make_docid(id:
|
548
|
-
make_docid(id: "#{en_id} / #{
|
562
|
+
make_docid(id: fr_id_sup, type: "BIPM", primary: true, language: "fr", script: "Latn"),
|
563
|
+
make_docid(id: "#{en_id} / #{fr_id_sup}", type: "BIPM", primary: true),
|
549
564
|
]
|
550
565
|
end
|
551
566
|
|
@@ -0,0 +1,111 @@
|
|
1
|
+
module RelatonBipm
|
2
|
+
module RawdataBipmMetrologia
|
3
|
+
class Affiliations
|
4
|
+
attr_reader :affiliations
|
5
|
+
|
6
|
+
#
|
7
|
+
# Initialize parser
|
8
|
+
#
|
9
|
+
# @param [Array<RelatonBib::Affiliation>] affiliations directory with affiliations
|
10
|
+
#
|
11
|
+
def initialize(affiliations)
|
12
|
+
@affiliations = affiliations
|
13
|
+
end
|
14
|
+
|
15
|
+
#
|
16
|
+
# Parse affiliations
|
17
|
+
#
|
18
|
+
# @return [RelatonBipm::RawdataBipmMetrologia::Affiliations] affiliations
|
19
|
+
#
|
20
|
+
def self.parse(dir)
|
21
|
+
affiliations = Dir["#{dir}/*.xml"].each_with_object([]) do |path, m|
|
22
|
+
doc = Nokogiri::XML(File.read(path, encoding: "UTF-8"))
|
23
|
+
doc.xpath("//aff").each do |aff|
|
24
|
+
m << parse_affiliation(aff) if aff.at("institution")
|
25
|
+
end
|
26
|
+
end.uniq { |a| a.organization.name.first.content }
|
27
|
+
new affiliations
|
28
|
+
end
|
29
|
+
|
30
|
+
#
|
31
|
+
# Parse affiliation organization
|
32
|
+
# https://github.com/relaton/relaton-data-bipm/issues/17#issuecomment-1367035444
|
33
|
+
#
|
34
|
+
# @param [Nokogiri::XML::Element] aff
|
35
|
+
#
|
36
|
+
# @return [RelatonBib::Affiliation] Organization name, country, division, street address
|
37
|
+
#
|
38
|
+
def self.parse_affiliation(aff)
|
39
|
+
text = aff.at("text()").text
|
40
|
+
return if text.include? "Permanent address:" || text.include?("1005 Southover Lane") ||
|
41
|
+
text == "Germany" || text.starts_with?("Guest") || text.starts_with?("Deceased") ||
|
42
|
+
text.include?("Author to whom any correspondence should be addressed")
|
43
|
+
|
44
|
+
args = {}
|
45
|
+
institution = aff.at('institution')
|
46
|
+
if institution
|
47
|
+
name = institution.text
|
48
|
+
return if name == "1005 Southover Lane"
|
49
|
+
|
50
|
+
args[:subdivision] = parse_division(aff)
|
51
|
+
args[:contact] = parse_address(aff)
|
52
|
+
else
|
53
|
+
# div, name, city, country = aff.xpath("text()").text.strip.split(", ")
|
54
|
+
# div, name = name, div if name.nil?
|
55
|
+
# args[:subdivision] = [RelatonBib::LocalizedString.new(div)] if div
|
56
|
+
# args[:contact] = [RelatonBib::Address.new(city: city, country: country)] if city && country
|
57
|
+
name = aff.text
|
58
|
+
end
|
59
|
+
args[:name] = [RelatonBib::LocalizedString.new(name)]
|
60
|
+
org = RelatonBib::Organization.new(**args)
|
61
|
+
RelatonBib::Affiliation.new(organization: org)
|
62
|
+
end
|
63
|
+
|
64
|
+
def self.parse_division(aff)
|
65
|
+
div = aff.xpath("text()[following-sibling::institution]").text.gsub(/^\W*|\W*$/, "")
|
66
|
+
return [] if div.empty?
|
67
|
+
|
68
|
+
[RelatonBib::LocalizedString.new(div)]
|
69
|
+
end
|
70
|
+
|
71
|
+
def self.parse_address(aff)
|
72
|
+
address = []
|
73
|
+
addr = aff.xpath("text()[preceding-sibling::institution]").text.gsub(/^\W*|\W*$/, "")
|
74
|
+
address << addr unless addr.empty?
|
75
|
+
country = aff.at('country')
|
76
|
+
address << country.text if country && !country.text.empty?
|
77
|
+
address = address.join(", ")
|
78
|
+
return [] if address.empty?
|
79
|
+
|
80
|
+
[RelatonBib::Address.new(formatted_address: address)]
|
81
|
+
end
|
82
|
+
|
83
|
+
def self.parse_elements(aff)
|
84
|
+
elements = aff.xpath("text()").text.strip.split(", ")
|
85
|
+
case elements.size
|
86
|
+
when 1 then { name: RelatonBib::LocalizedString.new(elements[0]) }
|
87
|
+
when 2
|
88
|
+
# name, country
|
89
|
+
{ name: RelatonBib::LocalizedString.new(elements[0]),
|
90
|
+
contact: [RelatonBib::Address.new(formatted_address: elements[1])] }
|
91
|
+
when 3
|
92
|
+
# it can be name, country, city or name, city, country
|
93
|
+
# so use formatted_address instead of city and country
|
94
|
+
{ name: RelatonBib::LocalizedString.new(elements[0]),
|
95
|
+
contact: RelatonBib::Address.new(formatted_address: elements[1, 2].join(", ")) }
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
#
|
100
|
+
# Find affiliation by organization name
|
101
|
+
#
|
102
|
+
# @param [Strign] text string with organization name in it
|
103
|
+
#
|
104
|
+
# @return [RelatonBib::Affiliation]
|
105
|
+
#
|
106
|
+
def find(text)
|
107
|
+
@affiliations.select { |a| text.include?(a.organization.name[0].content) }.sort.last
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
@@ -59,12 +59,9 @@ module RelatonBipm
|
|
59
59
|
#
|
60
60
|
# Parse volume, issue and page
|
61
61
|
#
|
62
|
-
# @return [
|
62
|
+
# @return [String] volume issue page
|
63
63
|
#
|
64
64
|
def volume_issue_article
|
65
|
-
# volume = @meta.at("./volume").text
|
66
|
-
# issue = @meta.at("./issue").text
|
67
|
-
# page = @doc.at("./front/article-meta/fpage")&.text || manuscript
|
68
65
|
[@journal, @volume, @article].compact.join(" ")
|
69
66
|
end
|
70
67
|
|
@@ -140,17 +137,53 @@ module RelatonBipm
|
|
140
137
|
#
|
141
138
|
# @return [Array<RelatonBib::Affiliation>] array of affiliations
|
142
139
|
#
|
143
|
-
def affiliation(contrib)
|
140
|
+
def affiliation(contrib)
|
144
141
|
contrib.xpath("./xref[@ref-type='aff']").map do |x|
|
145
|
-
a = @meta.at("./contrib-group/aff[@id='#{x[:rid]}']/label/following-sibling::node()")
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
142
|
+
a = @meta.at("./contrib-group/aff[@id='#{x[:rid]}']") # /label/following-sibling::node()")
|
143
|
+
parse_affiliation a
|
144
|
+
end.compact
|
145
|
+
end
|
146
|
+
|
147
|
+
def parse_affiliation(aff)
|
148
|
+
text = aff.xpath("text()|sup|sub").to_xml.split(",").map(&:strip).reject(&:empty?).join(", ")
|
149
|
+
text = CGI::unescapeHTML(text)
|
150
|
+
return if text.include?("Permanent address:") || text == "Germany" ||
|
151
|
+
text.start_with?("Guest") || text.start_with?("Deceased") ||
|
152
|
+
text.include?("Author to whom any correspondence should be addressed")
|
153
|
+
|
154
|
+
args = {}
|
155
|
+
institution = aff.at('institution')
|
156
|
+
if institution
|
157
|
+
name = institution.text
|
158
|
+
return if name == "1005 Southover Lane"
|
159
|
+
|
160
|
+
args[:subdivision] = parse_division(aff)
|
161
|
+
args[:contact] = parse_address(aff)
|
162
|
+
else
|
163
|
+
name = text
|
153
164
|
end
|
165
|
+
args[:name] = [RelatonBib::LocalizedString.new(name)]
|
166
|
+
org = RelatonBib::Organization.new(**args)
|
167
|
+
RelatonBib::Affiliation.new(organization: org)
|
168
|
+
end
|
169
|
+
|
170
|
+
def parse_division(aff)
|
171
|
+
div = aff.xpath("text()[following-sibling::institution]").text.gsub(/^\W*|\W*$/, "")
|
172
|
+
return [] if div.empty?
|
173
|
+
|
174
|
+
[RelatonBib::LocalizedString.new(div)]
|
175
|
+
end
|
176
|
+
|
177
|
+
def parse_address(aff)
|
178
|
+
address = []
|
179
|
+
addr = aff.xpath("text()[preceding-sibling::institution]").text.gsub(/^\W*|\W*$/, "")
|
180
|
+
address << addr unless addr.empty?
|
181
|
+
country = aff.at('country')
|
182
|
+
address << country.text if country && !country.text.empty?
|
183
|
+
address = address.join(", ")
|
184
|
+
return [] if address.empty?
|
185
|
+
|
186
|
+
[RelatonBib::Address.new(formatted_address: address)]
|
154
187
|
end
|
155
188
|
|
156
189
|
#
|
@@ -173,20 +206,20 @@ module RelatonBipm
|
|
173
206
|
#
|
174
207
|
# @return [Array<RelatonBib::Forename>] array of forenames
|
175
208
|
#
|
176
|
-
def forename(given_name) # rubocop:disable Metrics/MethodLength
|
177
|
-
|
209
|
+
# def forename(given_name) # rubocop:disable Metrics/MethodLength
|
210
|
+
# return [] unless given_name
|
178
211
|
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
end
|
212
|
+
# given_name.text.scan(/(\w+)(?:\s(\w)(?:\s|$))?/).map do |nm, int|
|
213
|
+
# if nm.size == 1
|
214
|
+
# name = nil
|
215
|
+
# init = nm
|
216
|
+
# else
|
217
|
+
# name = nm
|
218
|
+
# init = int
|
219
|
+
# end
|
220
|
+
# RelatonBib::Forename.new(content: name, language: ["en"], script: ["Latn"], initial: init)
|
221
|
+
# end
|
222
|
+
# end
|
190
223
|
|
191
224
|
#
|
192
225
|
# Parse date
|
@@ -29,8 +29,9 @@ module RelatonBipm
|
|
29
29
|
# Fetch articles from rawdata-bipm-metrologia and save to files
|
30
30
|
#
|
31
31
|
def fetch_articles # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
32
|
+
# aff = Affiliations.parse DIR
|
32
33
|
Dir["#{DIR}/**/*.xml"].each do |path|
|
33
|
-
item = ArticleParser.parse path
|
34
|
+
item = ArticleParser.parse path # , aff
|
34
35
|
file = "#{item.docidentifier.first.id.downcase.tr(' ', '-')}.#{@data_fetcher.ext}"
|
35
36
|
out_path = File.join(@data_fetcher.output, file)
|
36
37
|
key = Id.new.parse(item.docidentifier.first.id).to_hash
|
data/lib/relaton_bipm/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-bipm
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.19.
|
4
|
+
version: 1.19.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-12-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: faraday
|
@@ -135,6 +135,7 @@ files:
|
|
135
135
|
- lib/relaton_bipm/hash_converter.rb
|
136
136
|
- lib/relaton_bipm/id_parser.rb
|
137
137
|
- lib/relaton_bipm/processor.rb
|
138
|
+
- lib/relaton_bipm/rawdata_bipm_metrologia/affiliations.rb
|
138
139
|
- lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb
|
139
140
|
- lib/relaton_bipm/rawdata_bipm_metrologia/fetcher.rb
|
140
141
|
- lib/relaton_bipm/structured_identifier.rb
|
@@ -149,7 +150,7 @@ licenses:
|
|
149
150
|
metadata:
|
150
151
|
homepage_uri: https://github.com/relaton/relaton-bipm
|
151
152
|
source_code_uri: https://github.com/relaton/relaton-bipm
|
152
|
-
post_install_message:
|
153
|
+
post_install_message:
|
153
154
|
rdoc_options: []
|
154
155
|
require_paths:
|
155
156
|
- lib
|
@@ -164,8 +165,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
164
165
|
- !ruby/object:Gem::Version
|
165
166
|
version: '0'
|
166
167
|
requirements: []
|
167
|
-
rubygems_version: 3.
|
168
|
-
signing_key:
|
168
|
+
rubygems_version: 3.5.19
|
169
|
+
signing_key:
|
169
170
|
specification_version: 4
|
170
171
|
summary: 'RelatonBipm: retrieve BIPM Standards for bibliographic use using the BibliographicItem
|
171
172
|
model'
|