relaton-bipm 1.19.2 → 1.19.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d26cb426c0ebdcd6e4544117b76fabc440ce698ae86511d2fa13022fedc0fa33
4
- data.tar.gz: b0dae1c393906430b924171c4bc9dc0ddc723108372fd7ac351092845153cb6b
3
+ metadata.gz: 3c9261622d7be459730444f639a2b7f91a6606600a8aa1d0e4806036a766235c
4
+ data.tar.gz: 0b285754154788c58a7cfb812c92c5cc54331ea6f52e41dd71c129e59ffa5660
5
5
  SHA512:
6
- metadata.gz: bfcf6fc883da15803965d74ef0faee65f5aabc637b382e55586d63ace8647f988862748a88269fe3bc33c27308c41a2801c5d5bed7c408e736c451e1b7ba1f03
7
- data.tar.gz: 6a1b674e7285565be0fd400f4b27fa9066483fb2500bea43bde9aec6f8c95aa483602d74946b1acb0a669203467a8739bd8e4e5b11eb2ef01958203058651780
6
+ metadata.gz: 573c4e55c1d0cc0ecfc2f96c3dccf31f0a85790a433c9332db3d0f8478bcc2d32330fb7727187102045378cbf9555ede82e05871c54024e04267a16b40629372
7
+ data.tar.gz: 61f92667f0154f11345ca3bec967a2fee19ac2d4795ef81ffc50561f5c554000501e5f0d26a337b903a3be2a2f76cd637026cbb7daaf4206e032bd609aef960e
@@ -88,8 +88,7 @@ module RelatonBipm
88
88
  if @data_fetcher.files.include?(path) && part
89
89
  add_part hash, part
90
90
  item = RelatonBipm::BipmBibliographicItem.new(**hash)
91
- yaml = RelatonBib.parse_yaml(File.read(path, encoding: "UTF-8"), [Date])
92
- has_part_item = RelatonBipm::BipmBibliographicItem.from_hash(yaml)
91
+ has_part_item = parse_file path
93
92
  has_part_item.relation << RelatonBib::DocumentRelation.new(type: "partOf", bibitem: item)
94
93
  @data_fetcher.write_file path, has_part_item, warn_duplicate: false
95
94
  path = File.join dir, "#{num}-#{part}.#{@data_fetcher.ext}"
@@ -111,6 +110,17 @@ module RelatonBipm
111
110
  fetch_resolution body: body, en: en, fr: fr, dir: dir, src: src, num: num
112
111
  end
113
112
 
113
+ def parse_file(path)
114
+ case @data_fetcher.format
115
+ when "yaml"
116
+ yaml = RelatonBib.parse_yaml(File.read(path, encoding: "UTF-8"), [Date])
117
+ RelatonBipm::BipmBibliographicItem.from_hash(yaml)
118
+ when "xml"
119
+ xml = File.read(path, encoding: "UTF-8")
120
+ RelatonBipm::XMLParser.from_xml xml
121
+ end
122
+ end
123
+
114
124
  #
115
125
  # Read English and French files
116
126
  #
@@ -348,8 +358,12 @@ module RelatonBipm
348
358
  #
349
359
  # @return [Hash] title
350
360
  #
351
- def create_title(content, language)
352
- { content: content, language: language, script: "Latn" }
361
+ def create_title(content, language, format = "text/plain")
362
+ if language == "fr"
363
+ content.sub!(/(\d+)(e)/, '\1<sup>\2</sup>')
364
+ format = "text/html" if content.match?(/<sup>/)
365
+ end
366
+ { content: content, language: language, script: "Latn", format: format }
353
367
  end
354
368
 
355
369
  #
@@ -541,11 +555,12 @@ module RelatonBipm
541
555
  end
542
556
 
543
557
  def create_meeting_docids(en_id)
544
- fr_id = en_id.sub(/(\d+)(?:st|nd|rd|th)/, '\1e').sub("Meeting", "Réunion")
558
+ fr_id = en_id.sub(/(\d+)(?:st|nd|rd|th)/, '\1e').sub("Meeting", "réunion")
559
+ fr_id_sup = fr_id.sub(/(\d+)(e)/, '\1<sup>\2</sup>')
545
560
  [
546
561
  make_docid(id: en_id, type: "BIPM", primary: true, language: "en", script: "Latn"),
547
- make_docid(id: fr_id, type: "BIPM", primary: true, language: "fr", script: "Latn"),
548
- make_docid(id: "#{en_id} / #{fr_id}", type: "BIPM", primary: true),
562
+ make_docid(id: fr_id_sup, type: "BIPM", primary: true, language: "fr", script: "Latn"),
563
+ make_docid(id: "#{en_id} / #{fr_id_sup}", type: "BIPM", primary: true),
549
564
  ]
550
565
  end
551
566
 
@@ -0,0 +1,111 @@
1
+ module RelatonBipm
2
+ module RawdataBipmMetrologia
3
+ class Affiliations
4
+ attr_reader :affiliations
5
+
6
+ #
7
+ # Initialize parser
8
+ #
9
+ # @param [Array<RelatonBib::Affiliation>] affiliations directory with affiliations
10
+ #
11
+ def initialize(affiliations)
12
+ @affiliations = affiliations
13
+ end
14
+
15
+ #
16
+ # Parse affiliations
17
+ #
18
+ # @return [RelatonBipm::RawdataBipmMetrologia::Affiliations] affiliations
19
+ #
20
+ def self.parse(dir)
21
+ affiliations = Dir["#{dir}/*.xml"].each_with_object([]) do |path, m|
22
+ doc = Nokogiri::XML(File.read(path, encoding: "UTF-8"))
23
+ doc.xpath("//aff").each do |aff|
24
+ m << parse_affiliation(aff) if aff.at("institution")
25
+ end
26
+ end.uniq { |a| a.organization.name.first.content }
27
+ new affiliations
28
+ end
29
+
30
+ #
31
+ # Parse affiliation organization
32
+ # https://github.com/relaton/relaton-data-bipm/issues/17#issuecomment-1367035444
33
+ #
34
+ # @param [Nokogiri::XML::Element] aff
35
+ #
36
+ # @return [RelatonBib::Affiliation] Organization name, country, division, street address
37
+ #
38
+ def self.parse_affiliation(aff)
39
+ text = aff.at("text()").text
40
+ return if text.include? "Permanent address:" || text.include?("1005 Southover Lane") ||
41
+ text == "Germany" || text.starts_with?("Guest") || text.starts_with?("Deceased") ||
42
+ text.include?("Author to whom any correspondence should be addressed")
43
+
44
+ args = {}
45
+ institution = aff.at('institution')
46
+ if institution
47
+ name = institution.text
48
+ return if name == "1005 Southover Lane"
49
+
50
+ args[:subdivision] = parse_division(aff)
51
+ args[:contact] = parse_address(aff)
52
+ else
53
+ # div, name, city, country = aff.xpath("text()").text.strip.split(", ")
54
+ # div, name = name, div if name.nil?
55
+ # args[:subdivision] = [RelatonBib::LocalizedString.new(div)] if div
56
+ # args[:contact] = [RelatonBib::Address.new(city: city, country: country)] if city && country
57
+ name = aff.text
58
+ end
59
+ args[:name] = [RelatonBib::LocalizedString.new(name)]
60
+ org = RelatonBib::Organization.new(**args)
61
+ RelatonBib::Affiliation.new(organization: org)
62
+ end
63
+
64
+ def self.parse_division(aff)
65
+ div = aff.xpath("text()[following-sibling::institution]").text.gsub(/^\W*|\W*$/, "")
66
+ return [] if div.empty?
67
+
68
+ [RelatonBib::LocalizedString.new(div)]
69
+ end
70
+
71
+ def self.parse_address(aff)
72
+ address = []
73
+ addr = aff.xpath("text()[preceding-sibling::institution]").text.gsub(/^\W*|\W*$/, "")
74
+ address << addr unless addr.empty?
75
+ country = aff.at('country')
76
+ address << country.text if country && !country.text.empty?
77
+ address = address.join(", ")
78
+ return [] if address.empty?
79
+
80
+ [RelatonBib::Address.new(formatted_address: address)]
81
+ end
82
+
83
+ def self.parse_elements(aff)
84
+ elements = aff.xpath("text()").text.strip.split(", ")
85
+ case elements.size
86
+ when 1 then { name: RelatonBib::LocalizedString.new(elements[0]) }
87
+ when 2
88
+ # name, country
89
+ { name: RelatonBib::LocalizedString.new(elements[0]),
90
+ contact: [RelatonBib::Address.new(formatted_address: elements[1])] }
91
+ when 3
92
+ # it can be name, country, city or name, city, country
93
+ # so use formatted_address instead of city and country
94
+ { name: RelatonBib::LocalizedString.new(elements[0]),
95
+ contact: RelatonBib::Address.new(formatted_address: elements[1, 2].join(", ")) }
96
+ end
97
+ end
98
+
99
+ #
100
+ # Find affiliation by organization name
101
+ #
102
+ # @param [Strign] text string with organization name in it
103
+ #
104
+ # @return [RelatonBib::Affiliation]
105
+ #
106
+ def find(text)
107
+ @affiliations.select { |a| text.include?(a.organization.name[0].content) }.sort.last
108
+ end
109
+ end
110
+ end
111
+ end
@@ -59,12 +59,9 @@ module RelatonBipm
59
59
  #
60
60
  # Parse volume, issue and page
61
61
  #
62
- # @return [Array<String>] array of volume, issue and page
62
+ # @return [String] volume issue page
63
63
  #
64
64
  def volume_issue_article
65
- # volume = @meta.at("./volume").text
66
- # issue = @meta.at("./issue").text
67
- # page = @doc.at("./front/article-meta/fpage")&.text || manuscript
68
65
  [@journal, @volume, @article].compact.join(" ")
69
66
  end
70
67
 
@@ -140,17 +137,53 @@ module RelatonBipm
140
137
  #
141
138
  # @return [Array<RelatonBib::Affiliation>] array of affiliations
142
139
  #
143
- def affiliation(contrib) # rubocop:disable Metrics/AbcSize
140
+ def affiliation(contrib)
144
141
  contrib.xpath("./xref[@ref-type='aff']").map do |x|
145
- a = @meta.at("./contrib-group/aff[@id='#{x[:rid]}']/label/following-sibling::node()")
146
- parts = a.text.split(", ")
147
- orgname = parts[0..-3].join(", ")
148
- city, country = parts[-2..]
149
- address = []
150
- address << RelatonBib::Address.new(city: city, country: country) if city && country
151
- org = RelatonBib::Organization.new name: orgname, contact: address
152
- RelatonBib::Affiliation.new organization: org
142
+ a = @meta.at("./contrib-group/aff[@id='#{x[:rid]}']") # /label/following-sibling::node()")
143
+ parse_affiliation a
144
+ end.compact
145
+ end
146
+
147
+ def parse_affiliation(aff)
148
+ text = aff.xpath("text()|sup|sub").to_xml.split(",").map(&:strip).reject(&:empty?).join(", ")
149
+ text = CGI::unescapeHTML(text)
150
+ return if text.include?("Permanent address:") || text == "Germany" ||
151
+ text.start_with?("Guest") || text.start_with?("Deceased") ||
152
+ text.include?("Author to whom any correspondence should be addressed")
153
+
154
+ args = {}
155
+ institution = aff.at('institution')
156
+ if institution
157
+ name = institution.text
158
+ return if name == "1005 Southover Lane"
159
+
160
+ args[:subdivision] = parse_division(aff)
161
+ args[:contact] = parse_address(aff)
162
+ else
163
+ name = text
153
164
  end
165
+ args[:name] = [RelatonBib::LocalizedString.new(name)]
166
+ org = RelatonBib::Organization.new(**args)
167
+ RelatonBib::Affiliation.new(organization: org)
168
+ end
169
+
170
+ def parse_division(aff)
171
+ div = aff.xpath("text()[following-sibling::institution]").text.gsub(/^\W*|\W*$/, "")
172
+ return [] if div.empty?
173
+
174
+ [RelatonBib::LocalizedString.new(div)]
175
+ end
176
+
177
+ def parse_address(aff)
178
+ address = []
179
+ addr = aff.xpath("text()[preceding-sibling::institution]").text.gsub(/^\W*|\W*$/, "")
180
+ address << addr unless addr.empty?
181
+ country = aff.at('country')
182
+ address << country.text if country && !country.text.empty?
183
+ address = address.join(", ")
184
+ return [] if address.empty?
185
+
186
+ [RelatonBib::Address.new(formatted_address: address)]
154
187
  end
155
188
 
156
189
  #
@@ -173,20 +206,20 @@ module RelatonBipm
173
206
  #
174
207
  # @return [Array<RelatonBib::Forename>] array of forenames
175
208
  #
176
- def forename(given_name) # rubocop:disable Metrics/MethodLength
177
- return [] unless given_name
209
+ # def forename(given_name) # rubocop:disable Metrics/MethodLength
210
+ # return [] unless given_name
178
211
 
179
- given_name.text.scan(/(\w+)(?:\s(\w)(?:\s|$))?/).map do |nm, int|
180
- if nm.size == 1
181
- name = nil
182
- init = nm
183
- else
184
- name = nm
185
- init = int
186
- end
187
- RelatonBib::Forename.new(content: name, language: ["en"], script: ["Latn"], initial: init)
188
- end
189
- end
212
+ # given_name.text.scan(/(\w+)(?:\s(\w)(?:\s|$))?/).map do |nm, int|
213
+ # if nm.size == 1
214
+ # name = nil
215
+ # init = nm
216
+ # else
217
+ # name = nm
218
+ # init = int
219
+ # end
220
+ # RelatonBib::Forename.new(content: name, language: ["en"], script: ["Latn"], initial: init)
221
+ # end
222
+ # end
190
223
 
191
224
  #
192
225
  # Parse date
@@ -29,8 +29,9 @@ module RelatonBipm
29
29
  # Fetch articles from rawdata-bipm-metrologia and save to files
30
30
  #
31
31
  def fetch_articles # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
32
+ # aff = Affiliations.parse DIR
32
33
  Dir["#{DIR}/**/*.xml"].each do |path|
33
- item = ArticleParser.parse path
34
+ item = ArticleParser.parse path # , aff
34
35
  file = "#{item.docidentifier.first.id.downcase.tr(' ', '-')}.#{@data_fetcher.ext}"
35
36
  out_path = File.join(@data_fetcher.output, file)
36
37
  key = Id.new.parse(item.docidentifier.first.id).to_hash
@@ -1,3 +1,3 @@
1
1
  module RelatonBipm
2
- VERSION = "1.19.2".freeze
2
+ VERSION = "1.19.3".freeze
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: relaton-bipm
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.19.2
4
+ version: 1.19.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-08-08 00:00:00.000000000 Z
11
+ date: 2024-12-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: faraday
@@ -135,6 +135,7 @@ files:
135
135
  - lib/relaton_bipm/hash_converter.rb
136
136
  - lib/relaton_bipm/id_parser.rb
137
137
  - lib/relaton_bipm/processor.rb
138
+ - lib/relaton_bipm/rawdata_bipm_metrologia/affiliations.rb
138
139
  - lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb
139
140
  - lib/relaton_bipm/rawdata_bipm_metrologia/fetcher.rb
140
141
  - lib/relaton_bipm/structured_identifier.rb
@@ -149,7 +150,7 @@ licenses:
149
150
  metadata:
150
151
  homepage_uri: https://github.com/relaton/relaton-bipm
151
152
  source_code_uri: https://github.com/relaton/relaton-bipm
152
- post_install_message:
153
+ post_install_message:
153
154
  rdoc_options: []
154
155
  require_paths:
155
156
  - lib
@@ -164,8 +165,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
164
165
  - !ruby/object:Gem::Version
165
166
  version: '0'
166
167
  requirements: []
167
- rubygems_version: 3.3.27
168
- signing_key:
168
+ rubygems_version: 3.5.19
169
+ signing_key:
169
170
  specification_version: 4
170
171
  summary: 'RelatonBipm: retrieve BIPM Standards for bibliographic use using the BibliographicItem
171
172
  model'