iev 0.4.3 → 0.4.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/exe/iev CHANGED
@@ -9,7 +9,7 @@ require "creek"
9
9
  require "glossarist"
10
10
  require "nokogiri"
11
11
  require "relaton"
12
- require "relaton_bib"
12
+ require "relaton/bib"
13
13
  require "sequel"
14
14
  require "thor"
15
15
 
data/iev.gemspec CHANGED
@@ -22,7 +22,7 @@ Gem::Specification.new do |spec|
22
22
  spec.required_ruby_version = Gem::Requirement.new(">= 3.2.0")
23
23
 
24
24
  spec.add_dependency "creek", "~> 2.6"
25
- spec.add_dependency "glossarist", ">= 2.3.0"
25
+ spec.add_dependency "glossarist", "~> 2.6", ">= 2.6.7"
26
26
  spec.add_dependency "ferrum", "~> 0.15"
27
27
  spec.add_dependency "nokogiri", "~> 1.19"
28
28
  spec.add_dependency "plurimath"
@@ -142,6 +142,30 @@ module Iev
142
142
  summary
143
143
  end
144
144
 
145
+ desc "subject_areas", "Fetch IEV subject areas and sections from Electropedia."
146
+ option :output, desc: "Output YAML file (default: stdout)", aliases: :o
147
+ option :refresh, type: :boolean, default: false,
148
+ desc: "Force re-fetch even if cached"
149
+ def subject_areas
150
+ if options[:refresh]
151
+ cache_path = File.join(Iev.config.cache_dir, "subject_areas.yaml")
152
+ FileUtils.rm_f(cache_path) if File.exist?(cache_path)
153
+ end
154
+
155
+ result = Iev::SubjectAreas.fetch
156
+
157
+ yaml = YAML.dump(result)
158
+ if options[:output]
159
+ File.write(options[:output], yaml, encoding: "utf-8")
160
+ puts "Written to #{options[:output]}"
161
+ else
162
+ puts yaml
163
+ end
164
+ rescue Iev::SubjectAreas::FetchError => e
165
+ error e.message
166
+ exit 1
167
+ end
168
+
145
169
  desc "fetch CODE", "Fetch an IEV concept and output YAML to stdout."
146
170
  option :scrape, type: :boolean, default: false,
147
171
  desc: "Scrape from Electropedia instead of using cached data"
@@ -111,8 +111,7 @@ module Iev
111
111
 
112
112
  definition = entry["definition"]
113
113
  if definition
114
- content = definition.is_a?(String) ? definition : definition
115
- cd.definition = [Glossarist::DetailedDefinition.new(content: content)]
114
+ cd.definition = [Glossarist::DetailedDefinition.new(content: definition)]
116
115
  end
117
116
 
118
117
  l10n = Glossarist::LocalizedConcept.new
data/lib/iev/exporter.rb CHANGED
@@ -28,16 +28,19 @@ module Iev
28
28
  # @param only_concepts [String, nil] SQL LIKE pattern for IEVREF filtering
29
29
  # @param only_languages [String, nil] comma-separated language codes
30
30
  # @param fetch_relaton_links [Boolean] fetch source URLs via Relaton
31
+ # @param include_areas [Boolean] create area/section hierarchy concepts
31
32
  # @param on_progress [Proc, nil] callback (current, total) during build
32
33
  def initialize(input_path, output_dir: Dir.pwd,
33
34
  only_concepts: nil, only_languages: nil,
34
35
  fetch_relaton_links: false,
36
+ include_areas: true,
35
37
  on_progress: nil)
36
38
  @input_path = Pathname.new(input_path)
37
39
  validate_input!
38
40
 
39
41
  @output_dir = Pathname.new(output_dir)
40
42
  @fetch_relaton_links = fetch_relaton_links
43
+ @include_areas = include_areas
41
44
  @on_progress = on_progress
42
45
  @filters = {
43
46
  only_concepts: only_concepts,
@@ -51,6 +54,8 @@ module Iev
51
54
  start_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
52
55
  dataset = load_dataset
53
56
  collection = build_collection(dataset)
57
+ add_subject_area_concepts(collection) if @include_areas
58
+ build_section_narrower_relations(collection) if @include_areas
54
59
  save_collection(collection)
55
60
  elapsed = Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time
56
61
 
@@ -81,7 +86,7 @@ module Iev
81
86
 
82
87
  exts = (XLSX_EXTENSIONS + SQLITE_EXTENSIONS).join(", ")
83
88
  raise ArgumentError,
84
- "Unsupported format: #{input_path.extname}. Supported: #{exts}"
89
+ "Unsupported format: #{input_path.extname}. Supported: #{exts}"
85
90
  end
86
91
 
87
92
  def input_format
@@ -137,10 +142,16 @@ module Iev
137
142
 
138
143
  concept = concept_index[term.id] ||= begin
139
144
  c = Glossarist::ManagedConcept.new(data: { "id" => term.id })
145
+ c.uuid = term.id
146
+ c.data.domains = domain_references_for(term.id)
147
+ add_section_broader(c, term.id)
140
148
  collection.store(c)
141
149
  c
142
150
  end
143
151
  concept.add_l10n(term)
152
+
153
+ promote_supersession(concept, term)
154
+ set_managed_status(concept, term)
144
155
  end
145
156
 
146
157
  collection
@@ -148,14 +159,109 @@ module Iev
148
159
  SourceParser.relaton_enabled = true
149
160
  end
150
161
 
162
+ def add_subject_area_concepts(collection)
163
+ SubjectAreaConcepts.add_to(collection)
164
+ end
165
+
151
166
  def save_collection(collection)
152
167
  concepts_dir = output_dir.expand_path.join("concepts")
153
168
  FileUtils.mkdir_p(concepts_dir)
154
- collection.save_to_files(concepts_dir.to_s)
169
+ collection.save_grouped_concepts_to_files(concepts_dir.to_s)
155
170
  end
156
171
 
157
172
  def localized_count(collection)
158
173
  collection.sum { |c| c.localized_concepts.count }
159
174
  end
175
+
176
+ IEV_SOURCE = "urn:iec:std:iec:60050"
177
+
178
+ def domain_references_for(ievref)
179
+ code = IevCode.new(ievref.to_s)
180
+ refs = []
181
+ if code.area_code
182
+ refs << Glossarist::ConceptReference.new(
183
+ concept_id: code.area_uri,
184
+ source: IEV_SOURCE,
185
+ ref_type: "domain",
186
+ )
187
+ end
188
+ if code.section_code
189
+ refs << Glossarist::ConceptReference.new(
190
+ concept_id: code.section_uri,
191
+ source: IEV_SOURCE,
192
+ ref_type: "domain",
193
+ )
194
+ end
195
+ refs
196
+ end
197
+
198
+ def add_section_broader(concept, ievref)
199
+ code = IevCode.new(ievref.to_s)
200
+ return unless code.section_uri
201
+
202
+ concept.related ||= []
203
+ return if concept.related.any? do |r|
204
+ r.type == "broader" && r.ref&.id == code.section_uri
205
+ end
206
+
207
+ concept.related << Glossarist::RelatedConcept.new(
208
+ type: "broader",
209
+ content: code.section_uri,
210
+ ref: Glossarist::ConceptRef.new(source: "IEV", id: code.section_uri),
211
+ )
212
+ end
213
+
214
+ def build_section_narrower_relations(collection)
215
+ mc_index = collection.each_with_object({}) do |c, h|
216
+ h[c.data&.id] = c if c.data&.id
217
+ end
218
+
219
+ section_children = {}
220
+ mc_index.each_key do |concept_id|
221
+ code = IevCode.new(concept_id)
222
+ next unless code.section_uri
223
+
224
+ (section_children[code.section_uri] ||= []) << concept_id
225
+ end
226
+
227
+ section_children.each do |section_uri, child_ids|
228
+ section_mc = mc_index[section_uri]
229
+ next unless section_mc
230
+
231
+ narrower = child_ids.sort.map do |child_id|
232
+ Glossarist::RelatedConcept.new(
233
+ type: "narrower",
234
+ content: child_id,
235
+ ref: Glossarist::ConceptRef.new(source: "IEV", id: child_id),
236
+ )
237
+ end
238
+
239
+ section_mc.related ||= []
240
+ section_mc.related.concat(narrower)
241
+ end
242
+ end
243
+
244
+ # Promote supersedes relations from localized ConceptData to managed level.
245
+ # Supersession is language-independent (REPLACES column is per-concept).
246
+ def promote_supersession(concept, term)
247
+ related = term.data&.related
248
+ return unless related&.any?
249
+
250
+ concept.related ||= []
251
+ related.each do |r|
252
+ next if concept.related.any? { |er| er.type == r.type && er.ref&.id == r.ref&.id }
253
+
254
+ concept.related << r
255
+ end
256
+ term.data.related = nil
257
+ end
258
+
259
+ # Derive managed concept status from the localization's entry_status.
260
+ def set_managed_status(concept, term)
261
+ return if concept.status
262
+
263
+ status = term.entry_status
264
+ concept.status = status if status && !status.empty?
265
+ end
160
266
  end
161
267
  end
@@ -0,0 +1,80 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Iev
4
+ # Immutable value object that decomposes an IEV concept code
5
+ # into its structural parts: area code, section code, and number.
6
+ #
7
+ # The IEV code format is AAA-BB-CC where:
8
+ # AAA = area code (e.g. "103")
9
+ # BB = section sub-code (e.g. "01")
10
+ # CC = concept number (e.g. "02")
11
+ #
12
+ # @example Full concept code
13
+ # code = Iev::IevCode.new("103-01-02")
14
+ # code.area_code #=> "103"
15
+ # code.section_code #=> "103-01"
16
+ # code.number #=> "02"
17
+ # code.area_uri #=> "area-103"
18
+ # code.section_uri #=> "section-103-01"
19
+ #
20
+ # @example Section code (no concept number)
21
+ # code = Iev::IevCode.new("103-01")
22
+ # code.area_code #=> "103"
23
+ # code.section_code #=> "103-01"
24
+ # code.number #=> nil
25
+ # code.section_uri #=> "section-103-01"
26
+ #
27
+ class IevCode
28
+ include Comparable
29
+
30
+ attr_reader :raw, :area_code, :section_code, :number
31
+
32
+ # @param code [#to_s] IEV reference, e.g. "103-01-02"
33
+ def initialize(code)
34
+ @raw = code.to_s
35
+ parts = @raw.split("-")
36
+ @area_code = parts[0]
37
+ @section_code = parts.length >= 2 ? "#{parts[0]}-#{parts[1]}" : nil
38
+ @number = parts.length >= 3 ? parts[2] : nil
39
+ freeze
40
+ end
41
+
42
+ def area_uri
43
+ "area-#{area_code}"
44
+ end
45
+
46
+ def section_uri
47
+ "section-#{section_code}" if section_code
48
+ end
49
+
50
+ def to_s
51
+ @raw
52
+ end
53
+
54
+ def to_str
55
+ @raw
56
+ end
57
+
58
+ def ==(other)
59
+ other.is_a?(self.class) && raw == other.raw
60
+ end
61
+ alias_method :eql?, :==
62
+
63
+ def hash
64
+ raw.hash
65
+ end
66
+
67
+ def <=>(other)
68
+ to_s <=> other.to_s
69
+ end
70
+
71
+ # Safe constructor that returns nil for codes that don't parse.
72
+ # @param code [#to_s]
73
+ # @return [IevCode, nil]
74
+ def self.parse(code)
75
+ new(code)
76
+ rescue ArgumentError
77
+ nil
78
+ end
79
+ end
80
+ end
@@ -6,7 +6,7 @@
6
6
  module Iev
7
7
  # @todo This needs to be rewritten.
8
8
  class Iso639Code
9
- COUNTRY_CODES = YAML.load(IO.read(File.join(__dir__, "iso_639_2.yaml")))
9
+ COUNTRY_CODES = YAML.safe_load(IO.read(File.join(__dir__, "iso_639_2.yaml")), permitted_classes: [Symbol]).freeze
10
10
  # rubocop:disable Style/MutableConstant
11
11
  THREE_CHAR_MEMO = {} # Memoization cache, must be mutable
12
12
  # rubocop:enable Style/MutableConstant
@@ -0,0 +1,102 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "ferrum"
4
+
5
+ module Iev
6
+ # Shared headless browser utilities for fetching pages behind AWS WAF.
7
+ module ScraperBrowser
8
+ USER_AGENT_PROFILES = [
9
+ {
10
+ user_agent: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) " \
11
+ "AppleWebKit/537.36 (KHTML, like Gecko) " \
12
+ "Chrome/131.0.0.0 Safari/537.36",
13
+ platform: '"macOS"',
14
+ chrome_version: "131",
15
+ },
16
+ {
17
+ user_agent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " \
18
+ "AppleWebKit/537.36 (KHTML, like Gecko) " \
19
+ "Chrome/130.0.0.0 Safari/537.36",
20
+ platform: '"Windows"',
21
+ chrome_version: "130",
22
+ },
23
+ {
24
+ user_agent: "Mozilla/5.0 (X11; Linux x86_64) " \
25
+ "AppleWebKit/537.36 (KHTML, like Gecko) " \
26
+ "Chrome/131.0.0.0 Safari/537.36",
27
+ platform: '"Linux"',
28
+ chrome_version: "131",
29
+ },
30
+ {
31
+ user_agent: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) " \
32
+ "AppleWebKit/537.36 (KHTML, like Gecko) " \
33
+ "Chrome/129.0.0.0 Safari/537.36",
34
+ platform: '"macOS"',
35
+ chrome_version: "129",
36
+ },
37
+ {
38
+ user_agent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " \
39
+ "AppleWebKit/537.36 (KHTML, like Gecko) " \
40
+ "Chrome/131.0.0.0 Safari/537.36",
41
+ platform: '"Windows"',
42
+ chrome_version: "131",
43
+ },
44
+ ].freeze
45
+
46
+ # Fetch a URL using headless Chrome, returning the page HTML.
47
+ # Handles AWS WAF challenge pages by waiting for JS execution.
48
+ def self.fetch(url, browser_opts: {})
49
+ browser = Ferrum::Browser.new(
50
+ headless: "new",
51
+ timeout: 30,
52
+ window_size: [1366, 768],
53
+ browser_options: {
54
+ "disable-blink-features" => "AutomationControlled",
55
+ },
56
+ **browser_opts,
57
+ )
58
+
59
+ browser.headers.set(random_headers)
60
+ browser.go_to(url)
61
+ browser.network.wait_for_idle(timeout: 15)
62
+ html = browser.body
63
+
64
+ if html.include?("403 ERROR") || html.include?("Request blocked")
65
+ warn "IEV: AWS WAF blocked request for #{url}"
66
+ return nil
67
+ end
68
+
69
+ html
70
+ rescue Ferrum::Error, Ferrum::BrowserError => e
71
+ warn "IEV: Browser error fetching #{url}: #{e.message}"
72
+ nil
73
+ ensure
74
+ browser&.quit
75
+ end
76
+
77
+ def self.random_headers
78
+ profile = USER_AGENT_PROFILES.sample
79
+ sec_ch_ua = "\"Google Chrome\";v=\"#{profile[:chrome_version]}\", " \
80
+ "\"Chromium\";v=\"#{profile[:chrome_version]}\", " \
81
+ "\"Not_A Brand\";v=\"24\""
82
+
83
+ {
84
+ "Accept" => "text/html,application/xhtml+xml,application/xml;q=0.9," \
85
+ "image/avif,image/webp,image/apng,*/*;q=0.8," \
86
+ "application/signed-exchange;v=b3;q=0.7",
87
+ "Accept-Language" => "en-GB,en-US;q=0.9,en;q=0.8",
88
+ "Cache-Control" => "no-cache",
89
+ "Pragma" => "no-cache",
90
+ "Sec-Ch-Ua" => sec_ch_ua,
91
+ "Sec-Ch-Ua-Mobile" => "?0",
92
+ "Sec-Ch-Ua-Platform" => profile[:platform],
93
+ "Sec-Fetch-Dest" => "document",
94
+ "Sec-Fetch-Mode" => "navigate",
95
+ "Sec-Fetch-Site" => "cross-site",
96
+ "Sec-Fetch-User" => "?1",
97
+ "Upgrade-Insecure-Requests" => "1",
98
+ "User-Agent" => profile[:user_agent],
99
+ }
100
+ end
101
+ end
102
+ end
data/lib/iev/scraper.rb CHANGED
@@ -1,59 +1,12 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "nokogiri"
4
+
3
5
  module Iev
4
- # Scrapes IEV term data from Electropedia (electropedia.org).
5
- #
6
- # Electropedia is behind AWS WAF which requires JavaScript execution,
7
- # so a headless browser (via Ferrum/Chrome) is used to handle the challenge.
8
- #
9
- # @example
10
- # scraper = Iev::Scraper.new
11
- # concept = scraper.fetch_concept("103-01-02")
12
- # doc = scraper.fetch_page("103-01-02")
13
6
  class Scraper
14
7
  BASE_URL = "https://www.electropedia.org/iev/iev.nsf/" \
15
8
  "display?openform&ievref="
16
9
 
17
- # Pool of realistic Chrome User-Agent strings with matching platform hints.
18
- # Rotated per request to reduce fingerprinting by AWS WAF.
19
- USER_AGENT_PROFILES = [
20
- {
21
- user_agent: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) " \
22
- "AppleWebKit/537.36 (KHTML, like Gecko) " \
23
- "Chrome/131.0.0.0 Safari/537.36",
24
- platform: '"macOS"',
25
- chrome_version: "131",
26
- },
27
- {
28
- user_agent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " \
29
- "AppleWebKit/537.36 (KHTML, like Gecko) " \
30
- "Chrome/130.0.0.0 Safari/537.36",
31
- platform: '"Windows"',
32
- chrome_version: "130",
33
- },
34
- {
35
- user_agent: "Mozilla/5.0 (X11; Linux x86_64) " \
36
- "AppleWebKit/537.36 (KHTML, like Gecko) " \
37
- "Chrome/131.0.0.0 Safari/537.36",
38
- platform: '"Linux"',
39
- chrome_version: "131",
40
- },
41
- {
42
- user_agent: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) " \
43
- "AppleWebKit/537.36 (KHTML, like Gecko) " \
44
- "Chrome/129.0.0.0 Safari/537.36",
45
- platform: '"macOS"',
46
- chrome_version: "129",
47
- },
48
- {
49
- user_agent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " \
50
- "AppleWebKit/537.36 (KHTML, like Gecko) " \
51
- "Chrome/131.0.0.0 Safari/537.36",
52
- platform: '"Windows"',
53
- chrome_version: "131",
54
- },
55
- ].freeze
56
-
57
10
  def initialize(browser_opts: {})
58
11
  @browser_opts = browser_opts
59
12
  end
@@ -61,37 +14,10 @@ module Iev
61
14
  # Fetch the Electropedia page HTML for a given IEV code.
62
15
  # Returns a Nokogiri document.
63
16
  def fetch_page(code)
64
- require "ferrum"
65
- require "nokogiri"
66
-
67
- url = "#{BASE_URL}#{code}"
68
- browser = Ferrum::Browser.new(
69
- headless: "new",
70
- timeout: 30,
71
- window_size: [1366, 768],
72
- browser_options: {
73
- "disable-blink-features" => "AutomationControlled",
74
- },
75
- **@browser_opts,
76
- )
77
-
78
- browser.headers.set(random_headers)
79
- browser.go_to(url)
80
- browser.network.wait_for_idle(timeout: 15)
81
- html = browser.body
82
-
83
- # Check if we got a real page or a WAF block
84
- if html.include?("403 ERROR") || html.include?("Request blocked")
85
- warn "IEV Scraper: AWS WAF blocked request for #{code}"
86
- return nil
87
- end
17
+ html = ScraperBrowser.fetch("#{BASE_URL}#{code}", browser_opts: @browser_opts)
18
+ return nil unless html
88
19
 
89
20
  Nokogiri::HTML(html)
90
- rescue Ferrum::Error, Ferrum::BrowserError => e
91
- warn "IEV Scraper error for #{code}: #{e.message}"
92
- nil
93
- ensure
94
- browser&.quit
95
21
  end
96
22
 
97
23
  # Fetch and parse concept data for an IEV code.
@@ -102,34 +28,8 @@ module Iev
102
28
 
103
29
  PageParser.new(doc, code).parse
104
30
  end
105
-
106
- private
107
-
108
- def random_headers
109
- profile = USER_AGENT_PROFILES.sample
110
- sec_ch_ua = "\"Google Chrome\";v=\"#{profile[:chrome_version]}\", " \
111
- "\"Chromium\";v=\"#{profile[:chrome_version]}\", " \
112
- "\"Not_A Brand\";v=\"24\""
113
-
114
- {
115
- "Accept" => "text/html,application/xhtml+xml,application/xml;q=0.9," \
116
- "image/avif,image/webp,image/apng,*/*;q=0.8," \
117
- "application/signed-exchange;v=b3;q=0.7",
118
- "Accept-Language" => "en-GB,en-US;q=0.9,en;q=0.8",
119
- "Cache-Control" => "no-cache",
120
- "Pragma" => "no-cache",
121
- "Sec-Ch-Ua" => sec_ch_ua,
122
- "Sec-Ch-Ua-Mobile" => "?0",
123
- "Sec-Ch-Ua-Platform" => profile[:platform],
124
- "Sec-Fetch-Dest" => "document",
125
- "Sec-Fetch-Mode" => "navigate",
126
- "Sec-Fetch-Site" => "cross-site",
127
- "Sec-Fetch-User" => "?1",
128
- "Upgrade-Insecure-Requests" => "1",
129
- "User-Agent" => profile[:user_agent],
130
- }
131
- end
132
31
  end
133
32
  end
134
33
 
34
+ require_relative "scraper/browser"
135
35
  require_relative "scraper/page_parser"
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Iev
4
+ # Immutable value object representing an IEV section (e.g. "103-01").
5
+ #
6
+ # A section belongs to exactly one area, identified by +area_code+.
7
+ class Section
8
+ attr_reader :code, :title, :area_code
9
+
10
+ # @param code [#to_s] section code, e.g. "103-01"
11
+ # @param title [#to_s] section title, e.g. "General concepts on functions"
12
+ # @param area_code [#to_s] parent area code, e.g. "103"
13
+ def initialize(code:, title:, area_code:)
14
+ @code = code.to_s
15
+ @title = title.to_s
16
+ @area_code = area_code.to_s
17
+ freeze
18
+ end
19
+
20
+ def uri
21
+ "section-#{code}"
22
+ end
23
+
24
+ def to_h
25
+ { "code" => code, "title" => title }
26
+ end
27
+
28
+ def ==(other)
29
+ other.is_a?(self.class) && code == other.code
30
+ end
31
+ alias_method :eql?, :==
32
+
33
+ def hash
34
+ code.hash
35
+ end
36
+ end
37
+ end
@@ -79,10 +79,11 @@ module Iev
79
79
  relationship = extract_source_relationship(raw_ref)
80
80
  clean_ref = normalize_ref_string(raw_ref)
81
81
  source_ref = extract_source_ref(clean_ref)
82
+ ref_source, ref_id = split_ref(source_ref)
82
83
  clause = extract_source_clause(clean_ref)
83
84
 
84
85
  origin = Glossarist::Citation.new(
85
- ref: source_ref,
86
+ ref: Glossarist::Citation::Ref.new(source: ref_source, id: ref_id),
86
87
  locality: build_locality(clause),
87
88
  link: obtain_source_link(source_ref),
88
89
  original: Iev::Converter.mathml_to_asciimath(
@@ -351,6 +352,52 @@ module Iev
351
352
  )
352
353
  end
353
354
 
355
+ # Splits a normalized bibliographic reference into [source, id] for
356
+ # structured Citation::Ref construction. The full string is still
357
+ # passed to Relaton for link resolution — only the Citation::Ref
358
+ # model receives the split form.
359
+ #
360
+ # "IEC 62302:2007" → ["IEC", "62302:2007"]
361
+ # "ISO/IEC 2382:2015" → ["ISO/IEC", "2382:2015"]
362
+ # "ISO/TS 14812:2022" → ["ISO/TS", "14812:2022"]
363
+ # "IEC CISPR 16-1:2003" → ["IEC CISPR", "16-1:2003"]
364
+ # "ITU-T Recommendation F.791 (11/2015)" → ["ITU-T Recommendation", "F.791 (11/2015)"]
365
+ # "IEV" → ["IEV", nil]
366
+ def split_ref(full_ref)
367
+ case full_ref
368
+ when /\A(ISO\/IEC\/IEEE)\s+(.+)/
369
+ [$1, $2]
370
+ when /\A(ISO\/IEC\s+Guide)\s+(.+)/
371
+ [$1, $2]
372
+ when /\A(ISO\/IEC)\s+(.+)/
373
+ [$1, $2]
374
+ when /\A(IEC\/IEEE)\s+(.+)/
375
+ [$1, $2]
376
+ when %r{\A((?:ISO|IEC)/(?:PAS|TR|TS))\s+(.+)}
377
+ [$1, $2]
378
+ when /\A(IEC\s+CISPR)\s+(.+)/
379
+ [$1, $2]
380
+ when /\A(ITU-T\s+Recommendation)\s+(.+)/
381
+ [$1, $2]
382
+ when /\A(ITU-R\s+Recommendation)\s+(.+)/
383
+ [$1, $2]
384
+ when /\A(ITU-R)\s+(.+)/
385
+ [$1, $2]
386
+ when /\A((?:ISO|IEC)\s+Guide)\s+(.+)/
387
+ [$1, $2]
388
+ when /\A(ISO|IEC|IAEA)\s+(.+)/
389
+ [$1, $2]
390
+ when /\AIEV\z/
391
+ ["IEV", nil]
392
+ when /\A(JCGM)\s+(VIM)\z/
393
+ [$1, $2]
394
+ when /\ABBIPM/
395
+ ["BIPM", "SI Brochure"]
396
+ else
397
+ [full_ref, nil]
398
+ end
399
+ end
400
+
354
401
  # Uses Relaton to obtain link for given source ref.
355
402
  def obtain_source_link(ref)
356
403
  return nil unless self.class.relaton_enabled