relaton-bipm 1.19.2 → 1.19.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/relaton_bipm/data_outcomes_parser.rb +22 -7
- data/lib/relaton_bipm/rawdata_bipm_metrologia/affiliations.rb +111 -0
- data/lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb +59 -26
- data/lib/relaton_bipm/rawdata_bipm_metrologia/fetcher.rb +2 -1
- data/lib/relaton_bipm/version.rb +1 -1
- metadata +7 -6
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 3c9261622d7be459730444f639a2b7f91a6606600a8aa1d0e4806036a766235c
|
|
4
|
+
data.tar.gz: 0b285754154788c58a7cfb812c92c5cc54331ea6f52e41dd71c129e59ffa5660
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 573c4e55c1d0cc0ecfc2f96c3dccf31f0a85790a433c9332db3d0f8478bcc2d32330fb7727187102045378cbf9555ede82e05871c54024e04267a16b40629372
|
|
7
|
+
data.tar.gz: 61f92667f0154f11345ca3bec967a2fee19ac2d4795ef81ffc50561f5c554000501e5f0d26a337b903a3be2a2f76cd637026cbb7daaf4206e032bd609aef960e
|
|
@@ -88,8 +88,7 @@ module RelatonBipm
|
|
|
88
88
|
if @data_fetcher.files.include?(path) && part
|
|
89
89
|
add_part hash, part
|
|
90
90
|
item = RelatonBipm::BipmBibliographicItem.new(**hash)
|
|
91
|
-
|
|
92
|
-
has_part_item = RelatonBipm::BipmBibliographicItem.from_hash(yaml)
|
|
91
|
+
has_part_item = parse_file path
|
|
93
92
|
has_part_item.relation << RelatonBib::DocumentRelation.new(type: "partOf", bibitem: item)
|
|
94
93
|
@data_fetcher.write_file path, has_part_item, warn_duplicate: false
|
|
95
94
|
path = File.join dir, "#{num}-#{part}.#{@data_fetcher.ext}"
|
|
@@ -111,6 +110,17 @@ module RelatonBipm
|
|
|
111
110
|
fetch_resolution body: body, en: en, fr: fr, dir: dir, src: src, num: num
|
|
112
111
|
end
|
|
113
112
|
|
|
113
|
+
def parse_file(path)
|
|
114
|
+
case @data_fetcher.format
|
|
115
|
+
when "yaml"
|
|
116
|
+
yaml = RelatonBib.parse_yaml(File.read(path, encoding: "UTF-8"), [Date])
|
|
117
|
+
RelatonBipm::BipmBibliographicItem.from_hash(yaml)
|
|
118
|
+
when "xml"
|
|
119
|
+
xml = File.read(path, encoding: "UTF-8")
|
|
120
|
+
RelatonBipm::XMLParser.from_xml xml
|
|
121
|
+
end
|
|
122
|
+
end
|
|
123
|
+
|
|
114
124
|
#
|
|
115
125
|
# Read English and French files
|
|
116
126
|
#
|
|
@@ -348,8 +358,12 @@ module RelatonBipm
|
|
|
348
358
|
#
|
|
349
359
|
# @return [Hash] title
|
|
350
360
|
#
|
|
351
|
-
def create_title(content, language)
|
|
352
|
-
|
|
361
|
+
def create_title(content, language, format = "text/plain")
|
|
362
|
+
if language == "fr"
|
|
363
|
+
content.sub!(/(\d+)(e)/, '\1<sup>\2</sup>')
|
|
364
|
+
format = "text/html" if content.match?(/<sup>/)
|
|
365
|
+
end
|
|
366
|
+
{ content: content, language: language, script: "Latn", format: format }
|
|
353
367
|
end
|
|
354
368
|
|
|
355
369
|
#
|
|
@@ -541,11 +555,12 @@ module RelatonBipm
|
|
|
541
555
|
end
|
|
542
556
|
|
|
543
557
|
def create_meeting_docids(en_id)
|
|
544
|
-
fr_id = en_id.sub(/(\d+)(?:st|nd|rd|th)/, '\1e').sub("Meeting", "
|
|
558
|
+
fr_id = en_id.sub(/(\d+)(?:st|nd|rd|th)/, '\1e').sub("Meeting", "réunion")
|
|
559
|
+
fr_id_sup = fr_id.sub(/(\d+)(e)/, '\1<sup>\2</sup>')
|
|
545
560
|
[
|
|
546
561
|
make_docid(id: en_id, type: "BIPM", primary: true, language: "en", script: "Latn"),
|
|
547
|
-
make_docid(id:
|
|
548
|
-
make_docid(id: "#{en_id} / #{
|
|
562
|
+
make_docid(id: fr_id_sup, type: "BIPM", primary: true, language: "fr", script: "Latn"),
|
|
563
|
+
make_docid(id: "#{en_id} / #{fr_id_sup}", type: "BIPM", primary: true),
|
|
549
564
|
]
|
|
550
565
|
end
|
|
551
566
|
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
module RelatonBipm
|
|
2
|
+
module RawdataBipmMetrologia
|
|
3
|
+
class Affiliations
|
|
4
|
+
attr_reader :affiliations
|
|
5
|
+
|
|
6
|
+
#
|
|
7
|
+
# Initialize parser
|
|
8
|
+
#
|
|
9
|
+
# @param [Array<RelatonBib::Affiliation>] affiliations directory with affiliations
|
|
10
|
+
#
|
|
11
|
+
def initialize(affiliations)
|
|
12
|
+
@affiliations = affiliations
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
#
|
|
16
|
+
# Parse affiliations
|
|
17
|
+
#
|
|
18
|
+
# @return [RelatonBipm::RawdataBipmMetrologia::Affiliations] affiliations
|
|
19
|
+
#
|
|
20
|
+
def self.parse(dir)
|
|
21
|
+
affiliations = Dir["#{dir}/*.xml"].each_with_object([]) do |path, m|
|
|
22
|
+
doc = Nokogiri::XML(File.read(path, encoding: "UTF-8"))
|
|
23
|
+
doc.xpath("//aff").each do |aff|
|
|
24
|
+
m << parse_affiliation(aff) if aff.at("institution")
|
|
25
|
+
end
|
|
26
|
+
end.uniq { |a| a.organization.name.first.content }
|
|
27
|
+
new affiliations
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
#
|
|
31
|
+
# Parse affiliation organization
|
|
32
|
+
# https://github.com/relaton/relaton-data-bipm/issues/17#issuecomment-1367035444
|
|
33
|
+
#
|
|
34
|
+
# @param [Nokogiri::XML::Element] aff
|
|
35
|
+
#
|
|
36
|
+
# @return [RelatonBib::Affiliation] Organization name, country, division, street address
|
|
37
|
+
#
|
|
38
|
+
def self.parse_affiliation(aff)
|
|
39
|
+
text = aff.at("text()").text
|
|
40
|
+
return if text.include? "Permanent address:" || text.include?("1005 Southover Lane") ||
|
|
41
|
+
text == "Germany" || text.starts_with?("Guest") || text.starts_with?("Deceased") ||
|
|
42
|
+
text.include?("Author to whom any correspondence should be addressed")
|
|
43
|
+
|
|
44
|
+
args = {}
|
|
45
|
+
institution = aff.at('institution')
|
|
46
|
+
if institution
|
|
47
|
+
name = institution.text
|
|
48
|
+
return if name == "1005 Southover Lane"
|
|
49
|
+
|
|
50
|
+
args[:subdivision] = parse_division(aff)
|
|
51
|
+
args[:contact] = parse_address(aff)
|
|
52
|
+
else
|
|
53
|
+
# div, name, city, country = aff.xpath("text()").text.strip.split(", ")
|
|
54
|
+
# div, name = name, div if name.nil?
|
|
55
|
+
# args[:subdivision] = [RelatonBib::LocalizedString.new(div)] if div
|
|
56
|
+
# args[:contact] = [RelatonBib::Address.new(city: city, country: country)] if city && country
|
|
57
|
+
name = aff.text
|
|
58
|
+
end
|
|
59
|
+
args[:name] = [RelatonBib::LocalizedString.new(name)]
|
|
60
|
+
org = RelatonBib::Organization.new(**args)
|
|
61
|
+
RelatonBib::Affiliation.new(organization: org)
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def self.parse_division(aff)
|
|
65
|
+
div = aff.xpath("text()[following-sibling::institution]").text.gsub(/^\W*|\W*$/, "")
|
|
66
|
+
return [] if div.empty?
|
|
67
|
+
|
|
68
|
+
[RelatonBib::LocalizedString.new(div)]
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def self.parse_address(aff)
|
|
72
|
+
address = []
|
|
73
|
+
addr = aff.xpath("text()[preceding-sibling::institution]").text.gsub(/^\W*|\W*$/, "")
|
|
74
|
+
address << addr unless addr.empty?
|
|
75
|
+
country = aff.at('country')
|
|
76
|
+
address << country.text if country && !country.text.empty?
|
|
77
|
+
address = address.join(", ")
|
|
78
|
+
return [] if address.empty?
|
|
79
|
+
|
|
80
|
+
[RelatonBib::Address.new(formatted_address: address)]
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
def self.parse_elements(aff)
|
|
84
|
+
elements = aff.xpath("text()").text.strip.split(", ")
|
|
85
|
+
case elements.size
|
|
86
|
+
when 1 then { name: RelatonBib::LocalizedString.new(elements[0]) }
|
|
87
|
+
when 2
|
|
88
|
+
# name, country
|
|
89
|
+
{ name: RelatonBib::LocalizedString.new(elements[0]),
|
|
90
|
+
contact: [RelatonBib::Address.new(formatted_address: elements[1])] }
|
|
91
|
+
when 3
|
|
92
|
+
# it can be name, country, city or name, city, country
|
|
93
|
+
# so use formatted_address instead of city and country
|
|
94
|
+
{ name: RelatonBib::LocalizedString.new(elements[0]),
|
|
95
|
+
contact: RelatonBib::Address.new(formatted_address: elements[1, 2].join(", ")) }
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
#
|
|
100
|
+
# Find affiliation by organization name
|
|
101
|
+
#
|
|
102
|
+
# @param [Strign] text string with organization name in it
|
|
103
|
+
#
|
|
104
|
+
# @return [RelatonBib::Affiliation]
|
|
105
|
+
#
|
|
106
|
+
def find(text)
|
|
107
|
+
@affiliations.select { |a| text.include?(a.organization.name[0].content) }.sort.last
|
|
108
|
+
end
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
end
|
|
@@ -59,12 +59,9 @@ module RelatonBipm
|
|
|
59
59
|
#
|
|
60
60
|
# Parse volume, issue and page
|
|
61
61
|
#
|
|
62
|
-
# @return [
|
|
62
|
+
# @return [String] volume issue page
|
|
63
63
|
#
|
|
64
64
|
def volume_issue_article
|
|
65
|
-
# volume = @meta.at("./volume").text
|
|
66
|
-
# issue = @meta.at("./issue").text
|
|
67
|
-
# page = @doc.at("./front/article-meta/fpage")&.text || manuscript
|
|
68
65
|
[@journal, @volume, @article].compact.join(" ")
|
|
69
66
|
end
|
|
70
67
|
|
|
@@ -140,17 +137,53 @@ module RelatonBipm
|
|
|
140
137
|
#
|
|
141
138
|
# @return [Array<RelatonBib::Affiliation>] array of affiliations
|
|
142
139
|
#
|
|
143
|
-
def affiliation(contrib)
|
|
140
|
+
def affiliation(contrib)
|
|
144
141
|
contrib.xpath("./xref[@ref-type='aff']").map do |x|
|
|
145
|
-
a = @meta.at("./contrib-group/aff[@id='#{x[:rid]}']/label/following-sibling::node()")
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
142
|
+
a = @meta.at("./contrib-group/aff[@id='#{x[:rid]}']") # /label/following-sibling::node()")
|
|
143
|
+
parse_affiliation a
|
|
144
|
+
end.compact
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
def parse_affiliation(aff)
|
|
148
|
+
text = aff.xpath("text()|sup|sub").to_xml.split(",").map(&:strip).reject(&:empty?).join(", ")
|
|
149
|
+
text = CGI::unescapeHTML(text)
|
|
150
|
+
return if text.include?("Permanent address:") || text == "Germany" ||
|
|
151
|
+
text.start_with?("Guest") || text.start_with?("Deceased") ||
|
|
152
|
+
text.include?("Author to whom any correspondence should be addressed")
|
|
153
|
+
|
|
154
|
+
args = {}
|
|
155
|
+
institution = aff.at('institution')
|
|
156
|
+
if institution
|
|
157
|
+
name = institution.text
|
|
158
|
+
return if name == "1005 Southover Lane"
|
|
159
|
+
|
|
160
|
+
args[:subdivision] = parse_division(aff)
|
|
161
|
+
args[:contact] = parse_address(aff)
|
|
162
|
+
else
|
|
163
|
+
name = text
|
|
153
164
|
end
|
|
165
|
+
args[:name] = [RelatonBib::LocalizedString.new(name)]
|
|
166
|
+
org = RelatonBib::Organization.new(**args)
|
|
167
|
+
RelatonBib::Affiliation.new(organization: org)
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
def parse_division(aff)
|
|
171
|
+
div = aff.xpath("text()[following-sibling::institution]").text.gsub(/^\W*|\W*$/, "")
|
|
172
|
+
return [] if div.empty?
|
|
173
|
+
|
|
174
|
+
[RelatonBib::LocalizedString.new(div)]
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
def parse_address(aff)
|
|
178
|
+
address = []
|
|
179
|
+
addr = aff.xpath("text()[preceding-sibling::institution]").text.gsub(/^\W*|\W*$/, "")
|
|
180
|
+
address << addr unless addr.empty?
|
|
181
|
+
country = aff.at('country')
|
|
182
|
+
address << country.text if country && !country.text.empty?
|
|
183
|
+
address = address.join(", ")
|
|
184
|
+
return [] if address.empty?
|
|
185
|
+
|
|
186
|
+
[RelatonBib::Address.new(formatted_address: address)]
|
|
154
187
|
end
|
|
155
188
|
|
|
156
189
|
#
|
|
@@ -173,20 +206,20 @@ module RelatonBipm
|
|
|
173
206
|
#
|
|
174
207
|
# @return [Array<RelatonBib::Forename>] array of forenames
|
|
175
208
|
#
|
|
176
|
-
def forename(given_name) # rubocop:disable Metrics/MethodLength
|
|
177
|
-
|
|
209
|
+
# def forename(given_name) # rubocop:disable Metrics/MethodLength
|
|
210
|
+
# return [] unless given_name
|
|
178
211
|
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
end
|
|
212
|
+
# given_name.text.scan(/(\w+)(?:\s(\w)(?:\s|$))?/).map do |nm, int|
|
|
213
|
+
# if nm.size == 1
|
|
214
|
+
# name = nil
|
|
215
|
+
# init = nm
|
|
216
|
+
# else
|
|
217
|
+
# name = nm
|
|
218
|
+
# init = int
|
|
219
|
+
# end
|
|
220
|
+
# RelatonBib::Forename.new(content: name, language: ["en"], script: ["Latn"], initial: init)
|
|
221
|
+
# end
|
|
222
|
+
# end
|
|
190
223
|
|
|
191
224
|
#
|
|
192
225
|
# Parse date
|
|
@@ -29,8 +29,9 @@ module RelatonBipm
|
|
|
29
29
|
# Fetch articles from rawdata-bipm-metrologia and save to files
|
|
30
30
|
#
|
|
31
31
|
def fetch_articles # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
|
32
|
+
# aff = Affiliations.parse DIR
|
|
32
33
|
Dir["#{DIR}/**/*.xml"].each do |path|
|
|
33
|
-
item = ArticleParser.parse path
|
|
34
|
+
item = ArticleParser.parse path # , aff
|
|
34
35
|
file = "#{item.docidentifier.first.id.downcase.tr(' ', '-')}.#{@data_fetcher.ext}"
|
|
35
36
|
out_path = File.join(@data_fetcher.output, file)
|
|
36
37
|
key = Id.new.parse(item.docidentifier.first.id).to_hash
|
data/lib/relaton_bipm/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: relaton-bipm
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.19.
|
|
4
|
+
version: 1.19.3
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Ribose Inc.
|
|
8
|
-
autorequire:
|
|
8
|
+
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2024-
|
|
11
|
+
date: 2024-12-13 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: faraday
|
|
@@ -135,6 +135,7 @@ files:
|
|
|
135
135
|
- lib/relaton_bipm/hash_converter.rb
|
|
136
136
|
- lib/relaton_bipm/id_parser.rb
|
|
137
137
|
- lib/relaton_bipm/processor.rb
|
|
138
|
+
- lib/relaton_bipm/rawdata_bipm_metrologia/affiliations.rb
|
|
138
139
|
- lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb
|
|
139
140
|
- lib/relaton_bipm/rawdata_bipm_metrologia/fetcher.rb
|
|
140
141
|
- lib/relaton_bipm/structured_identifier.rb
|
|
@@ -149,7 +150,7 @@ licenses:
|
|
|
149
150
|
metadata:
|
|
150
151
|
homepage_uri: https://github.com/relaton/relaton-bipm
|
|
151
152
|
source_code_uri: https://github.com/relaton/relaton-bipm
|
|
152
|
-
post_install_message:
|
|
153
|
+
post_install_message:
|
|
153
154
|
rdoc_options: []
|
|
154
155
|
require_paths:
|
|
155
156
|
- lib
|
|
@@ -164,8 +165,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
164
165
|
- !ruby/object:Gem::Version
|
|
165
166
|
version: '0'
|
|
166
167
|
requirements: []
|
|
167
|
-
rubygems_version: 3.
|
|
168
|
-
signing_key:
|
|
168
|
+
rubygems_version: 3.5.19
|
|
169
|
+
signing_key:
|
|
169
170
|
specification_version: 4
|
|
170
171
|
summary: 'RelatonBipm: retrieve BIPM Standards for bibliographic use using the BibliographicItem
|
|
171
172
|
model'
|