iev 0.4.3 → 0.4.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/rake.yml +7 -4
- data/.github/workflows/release.yml +2 -0
- data/.gitignore +2 -0
- data/.rubocop.yml +4 -2
- data/CLAUDE.md +3 -0
- data/Gemfile +0 -19
- data/README.adoc +412 -0
- data/data/subject_areas.yaml +1920 -0
- data/exe/iev +1 -1
- data/iev.gemspec +1 -1
- data/lib/iev/cli/command.rb +24 -0
- data/lib/iev/cli/command_helper.rb +1 -2
- data/lib/iev/exporter.rb +108 -2
- data/lib/iev/iev_code.rb +80 -0
- data/lib/iev/iso_639_code.rb +1 -1
- data/lib/iev/scraper/browser.rb +102 -0
- data/lib/iev/scraper.rb +5 -105
- data/lib/iev/section.rb +37 -0
- data/lib/iev/source_parser.rb +48 -1
- data/lib/iev/subject_area.rb +46 -0
- data/lib/iev/subject_area_concepts.rb +145 -0
- data/lib/iev/subject_areas.rb +273 -0
- data/lib/iev/supersession_parser.rb +1 -2
- data/lib/iev/term_builder.rb +19 -0
- data/lib/iev/version.rb +1 -1
- data/lib/iev.rb +46 -0
- metadata +17 -4
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Iev
|
|
4
|
+
# Immutable value object representing an IEV subject area (e.g. "102").
|
|
5
|
+
#
|
|
6
|
+
# A subject area is the aggregate root for its sections.
|
|
7
|
+
# Navigation: area → sections (direct), section → area (via registry).
|
|
8
|
+
class SubjectArea
|
|
9
|
+
attr_reader :code, :title, :sections
|
|
10
|
+
|
|
11
|
+
# @param code [#to_s] area code, e.g. "103"
|
|
12
|
+
# @param title [#to_s] area title, e.g. "Mathematics - Functions"
|
|
13
|
+
# @param sections [Array<Iev::Section>] child sections
|
|
14
|
+
def initialize(code:, title:, sections: [])
|
|
15
|
+
@code = code.to_s
|
|
16
|
+
@title = title.to_s
|
|
17
|
+
@sections = sections
|
|
18
|
+
freeze
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def uri
|
|
22
|
+
"area-#{code}"
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def section(section_code)
|
|
26
|
+
sections.find { |s| s.code == section_code.to_s }
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def to_h
|
|
30
|
+
{
|
|
31
|
+
"code" => code,
|
|
32
|
+
"title" => title,
|
|
33
|
+
"sections" => sections.map(&:to_h),
|
|
34
|
+
}
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def ==(other)
|
|
38
|
+
other.is_a?(self.class) && code == other.code
|
|
39
|
+
end
|
|
40
|
+
alias_method :eql?, :==
|
|
41
|
+
|
|
42
|
+
def hash
|
|
43
|
+
code.hash
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
end
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Iev
|
|
4
|
+
# Creates ManagedConcept entries for the IEV subject area hierarchy.
|
|
5
|
+
#
|
|
6
|
+
# The hierarchy has two levels:
|
|
7
|
+
# - Area (e.g., "102" = "Mathematics - General concepts")
|
|
8
|
+
# - Section (e.g., "102-01" = "Sets and operations")
|
|
9
|
+
#
|
|
10
|
+
# Linking (all at ManagedConcept#related level):
|
|
11
|
+
# - Each area has "narrower" relations to its sections
|
|
12
|
+
# - Each section has "broader" relation to parent area
|
|
13
|
+
# - Each section gets "narrower" to child concepts (added by Exporter)
|
|
14
|
+
# - Each regular IEV concept gets "broader" to its section
|
|
15
|
+
# (added by Exporter)
|
|
16
|
+
#
|
|
17
|
+
# Classification (separate from hierarchy):
|
|
18
|
+
# - Each concept's ManagedConceptData#domains includes area and
|
|
19
|
+
# section ConceptReferences
|
|
20
|
+
# - Each concept's ConceptData#domain references its section URI
|
|
21
|
+
# - Each section concept's ConceptData#domain references parent area
|
|
22
|
+
module SubjectAreaConcepts
|
|
23
|
+
IEV_SOURCE = "urn:iec:std:iec:60050"
|
|
24
|
+
|
|
25
|
+
class << self
|
|
26
|
+
# Build all area and section concepts and add them to the collection.
|
|
27
|
+
#
|
|
28
|
+
# @param collection [Glossarist::ManagedConceptCollection]
|
|
29
|
+
# @return [void]
|
|
30
|
+
def add_to(collection)
|
|
31
|
+
Iev.subject_areas.each do |area|
|
|
32
|
+
area_mc = build_area_concept(area)
|
|
33
|
+
collection.store(area_mc)
|
|
34
|
+
|
|
35
|
+
area.sections.each do |section|
|
|
36
|
+
section_mc = build_section_concept(section, area)
|
|
37
|
+
collection.store(section_mc)
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
private
|
|
43
|
+
|
|
44
|
+
def domain_ref(concept_id)
|
|
45
|
+
Glossarist::ConceptReference.new(
|
|
46
|
+
concept_id: concept_id,
|
|
47
|
+
source: IEV_SOURCE,
|
|
48
|
+
ref_type: "domain",
|
|
49
|
+
)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def build_area_concept(area)
|
|
53
|
+
id = area.uri
|
|
54
|
+
|
|
55
|
+
mc = Glossarist::ManagedConcept.new(
|
|
56
|
+
data: Glossarist::ManagedConceptData.new(
|
|
57
|
+
id: id,
|
|
58
|
+
domains: [domain_ref(id)],
|
|
59
|
+
),
|
|
60
|
+
)
|
|
61
|
+
mc.uuid = id
|
|
62
|
+
|
|
63
|
+
mc.add_localization(build_localization(id, area.title, "eng"))
|
|
64
|
+
mc.related = area.sections.map { |s| build_narrower_relation(s.uri) }
|
|
65
|
+
mc.related = nil if mc.related.empty?
|
|
66
|
+
|
|
67
|
+
mc
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def build_section_concept(section, area)
|
|
71
|
+
id = section.uri
|
|
72
|
+
|
|
73
|
+
mc = Glossarist::ManagedConcept.new(
|
|
74
|
+
data: Glossarist::ManagedConceptData.new(
|
|
75
|
+
id: id,
|
|
76
|
+
domains: [
|
|
77
|
+
domain_ref(area.uri),
|
|
78
|
+
domain_ref(id),
|
|
79
|
+
],
|
|
80
|
+
),
|
|
81
|
+
)
|
|
82
|
+
mc.uuid = id
|
|
83
|
+
|
|
84
|
+
cd = build_concept_data(id, section.title, "eng")
|
|
85
|
+
cd.domain = area.uri
|
|
86
|
+
|
|
87
|
+
mc.add_localization(build_localization_from_data(id, cd))
|
|
88
|
+
|
|
89
|
+
mc.related = [build_broader_relation(area.uri)]
|
|
90
|
+
|
|
91
|
+
mc
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
def build_concept_data(id, title, lang_code)
|
|
95
|
+
Glossarist::ConceptData.new(
|
|
96
|
+
id: id,
|
|
97
|
+
language_code: lang_code,
|
|
98
|
+
terms: [
|
|
99
|
+
Glossarist::Designation::Expression.new(
|
|
100
|
+
type: "expression",
|
|
101
|
+
designation: title,
|
|
102
|
+
normative_status: "preferred",
|
|
103
|
+
),
|
|
104
|
+
],
|
|
105
|
+
)
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
def build_localization(id, title, lang_code)
|
|
109
|
+
cd = build_concept_data(id, title, lang_code)
|
|
110
|
+
|
|
111
|
+
l10n = Glossarist::LocalizedConcept.new
|
|
112
|
+
l10n.data = cd
|
|
113
|
+
l10n.id = id
|
|
114
|
+
l10n.entry_status = "valid"
|
|
115
|
+
l10n.data.review_decision_event = "published"
|
|
116
|
+
l10n
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
def build_localization_from_data(id, concept_data)
|
|
120
|
+
l10n = Glossarist::LocalizedConcept.new
|
|
121
|
+
l10n.data = concept_data
|
|
122
|
+
l10n.id = id
|
|
123
|
+
l10n.entry_status = "valid"
|
|
124
|
+
l10n.data.review_decision_event = "published"
|
|
125
|
+
l10n
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
def build_broader_relation(target_uri)
|
|
129
|
+
Glossarist::RelatedConcept.new(
|
|
130
|
+
type: "broader",
|
|
131
|
+
content: target_uri,
|
|
132
|
+
ref: Glossarist::ConceptRef.new(source: "IEV", id: target_uri),
|
|
133
|
+
)
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
def build_narrower_relation(target_uri)
|
|
137
|
+
Glossarist::RelatedConcept.new(
|
|
138
|
+
type: "narrower",
|
|
139
|
+
content: target_uri,
|
|
140
|
+
ref: Glossarist::ConceptRef.new(source: "IEV", id: target_uri),
|
|
141
|
+
)
|
|
142
|
+
end
|
|
143
|
+
end
|
|
144
|
+
end
|
|
145
|
+
end
|
|
@@ -0,0 +1,273 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "yaml"
|
|
4
|
+
require "nokogiri"
|
|
5
|
+
require "fileutils"
|
|
6
|
+
require "iev/config"
|
|
7
|
+
|
|
8
|
+
module Iev
|
|
9
|
+
module SubjectAreas
|
|
10
|
+
DATA_FILE = File.expand_path("../../data/subject_areas.yaml", __dir__)
|
|
11
|
+
|
|
12
|
+
AREAS_URL = "https://electropedia.org/iev/iev.nsf/" \
|
|
13
|
+
"6d6bdd8667c378f7c12581fa003d80e7?OpenForm"
|
|
14
|
+
SECTIONS_URL_TEMPLATE = "https://electropedia.org/iev/iev.nsf/" \
|
|
15
|
+
"index?openform&part=%<part>s"
|
|
16
|
+
|
|
17
|
+
MIN_PAGE_SIZE = 15_000
|
|
18
|
+
|
|
19
|
+
FETCH_DELAY = 5
|
|
20
|
+
RETRY_DELAY = 30
|
|
21
|
+
MAX_RETRIES = 2
|
|
22
|
+
|
|
23
|
+
class FetchError < StandardError; end
|
|
24
|
+
|
|
25
|
+
class << self
|
|
26
|
+
# --- URI scheme ---
|
|
27
|
+
|
|
28
|
+
# URI for a subject area concept.
|
|
29
|
+
# @param code [String, Integer] e.g. "102"
|
|
30
|
+
# @return [String] e.g. "area-102"
|
|
31
|
+
def area_uri(code)
|
|
32
|
+
"area-#{code}"
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# URI for a section concept.
|
|
36
|
+
# @param code [String] e.g. "103-01"
|
|
37
|
+
# @return [String] e.g. "section-103-01"
|
|
38
|
+
def section_uri(code)
|
|
39
|
+
"section-#{code}"
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# --- Query API (returns typed objects) ---
|
|
43
|
+
|
|
44
|
+
# Return all subject areas with their sections.
|
|
45
|
+
# @return [Array<SubjectArea>]
|
|
46
|
+
def all
|
|
47
|
+
@typed_areas ||= raw_data["areas"].map { |h| build_area(h) }
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Find a single subject area by its numeric code. O(1) indexed.
|
|
51
|
+
# @param code [String, Integer] e.g. "102" or 102
|
|
52
|
+
# @return [SubjectArea, nil]
|
|
53
|
+
def find_area(code)
|
|
54
|
+
area_index[code.to_s]
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# Return all sections for a given area code.
|
|
58
|
+
# @param code [String, Integer] area code, e.g. "102"
|
|
59
|
+
# @return [Array<Section>]
|
|
60
|
+
def sections_for(code)
|
|
61
|
+
find_area(code)&.sections || []
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# Find a single section by its section code. O(1) indexed.
|
|
65
|
+
# @param section_code [String] e.g. "102-01"
|
|
66
|
+
# @return [Section, nil]
|
|
67
|
+
def find_section(section_code)
|
|
68
|
+
section_index[section_code.to_s]
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# Return the parent area for a given section code.
|
|
72
|
+
# @param section_code [String] e.g. "102-01"
|
|
73
|
+
# @return [SubjectArea, nil]
|
|
74
|
+
def area_for_section(section_code)
|
|
75
|
+
sec = find_section(section_code)
|
|
76
|
+
sec ? find_area(sec.area_code) : nil
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# --- Navigation from IEV reference ---
|
|
80
|
+
|
|
81
|
+
# Find the subject area for any IEV reference.
|
|
82
|
+
# @param ievref [String] e.g. "103-01-02"
|
|
83
|
+
# @return [SubjectArea, nil]
|
|
84
|
+
def area_for(ievref)
|
|
85
|
+
code = IevCode.new(ievref)
|
|
86
|
+
find_area(code.area_code)
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# Find the section for any IEV reference.
|
|
90
|
+
# @param ievref [String] e.g. "103-01-02"
|
|
91
|
+
# @return [Section, nil]
|
|
92
|
+
def section_for(ievref)
|
|
93
|
+
code = IevCode.new(ievref)
|
|
94
|
+
code.section_code ? find_section(code.section_code) : nil
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
# --- Fetching (network, writes to bundled data file) ---
|
|
98
|
+
|
|
99
|
+
def fetch
|
|
100
|
+
cached = read_cache("subject_areas.yaml")
|
|
101
|
+
return cached if cached && complete?(cached)
|
|
102
|
+
|
|
103
|
+
areas = cached ? cached["areas"] : []
|
|
104
|
+
fresh_areas = fetch_areas
|
|
105
|
+
puts "Found #{fresh_areas.length} areas (#{areas.length} cached)" if $stdout.tty?
|
|
106
|
+
|
|
107
|
+
# Merge: keep existing sections, add new areas
|
|
108
|
+
existing = areas.each_with_object({}) { |a, h| h[a["code"]] = a }
|
|
109
|
+
fresh_areas.each do |fa|
|
|
110
|
+
existing[fa["code"]] ||= fa
|
|
111
|
+
end
|
|
112
|
+
areas = fresh_areas.map { |fa| existing[fa["code"]] || fa }
|
|
113
|
+
|
|
114
|
+
areas.each_with_index do |area, i|
|
|
115
|
+
next if area["fetched"]
|
|
116
|
+
|
|
117
|
+
begin
|
|
118
|
+
area["sections"] = fetch_sections(area["code"])
|
|
119
|
+
area["fetched"] = true
|
|
120
|
+
rescue FetchError
|
|
121
|
+
area["sections"] ||= []
|
|
122
|
+
warn "IEV: Skipping area #{area["code"]} due to WAF"
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
puts "[#{i + 1}/#{areas.length}] #{area["code"]}: #{area["title"]} — #{area["sections"].length} sections" if $stdout.tty?
|
|
126
|
+
|
|
127
|
+
# Save progress every 10 areas so partial results survive WAF failures
|
|
128
|
+
if (i + 1) % 10 == 0
|
|
129
|
+
write_cache("subject_areas.yaml", { "areas" => areas })
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
sleep FETCH_DELAY unless i == areas.length - 1
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
result = { "areas" => areas }
|
|
136
|
+
write_cache("subject_areas.yaml", result)
|
|
137
|
+
result
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
def fetch_areas
|
|
141
|
+
html = fetch_page_with_retry(AREAS_URL)
|
|
142
|
+
doc = Nokogiri::HTML(html)
|
|
143
|
+
|
|
144
|
+
areas = []
|
|
145
|
+
doc.css("a").each do |link|
|
|
146
|
+
href = link["href"].to_s
|
|
147
|
+
next unless href.include?("part=")
|
|
148
|
+
|
|
149
|
+
code = href.match(/part=(\d+)/)&.[](1)
|
|
150
|
+
next unless code
|
|
151
|
+
|
|
152
|
+
title = link.text.strip
|
|
153
|
+
next if title.empty?
|
|
154
|
+
|
|
155
|
+
areas << { "code" => code, "title" => title, "sections" => [] }
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
areas.uniq { |a| a["code"] }
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
def fetch_sections(part)
|
|
162
|
+
url = format(SECTIONS_URL_TEMPLATE, part: part)
|
|
163
|
+
html = fetch_page_with_retry(url)
|
|
164
|
+
doc = Nokogiri::HTML(html)
|
|
165
|
+
|
|
166
|
+
sections = []
|
|
167
|
+
doc.css("td").each do |td|
|
|
168
|
+
text = td.text.strip
|
|
169
|
+
if (m = text.match(/\ASection\s+([\d-]+):\s*(.+)\z/))
|
|
170
|
+
sections << { "code" => m[1], "title" => m[2].strip }
|
|
171
|
+
end
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
sections.uniq { |s| s["code"] }
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
# Clear cached typed objects (useful after fetch updates raw data).
|
|
178
|
+
def reload!
|
|
179
|
+
@typed_areas = nil
|
|
180
|
+
@area_index = nil
|
|
181
|
+
@section_index = nil
|
|
182
|
+
@raw_data = nil
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
private
|
|
186
|
+
|
|
187
|
+
def build_area(hash)
|
|
188
|
+
area_code = hash["code"]
|
|
189
|
+
sections = (hash["sections"] || []).map do |s|
|
|
190
|
+
Section.new(code: s["code"], title: s["title"], area_code: area_code)
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
SubjectArea.new(
|
|
194
|
+
code: area_code,
|
|
195
|
+
title: hash["title"],
|
|
196
|
+
sections: sections,
|
|
197
|
+
)
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
def raw_data
|
|
201
|
+
@raw_data ||= begin
|
|
202
|
+
path = File.exist?(DATA_FILE) ? DATA_FILE : nil
|
|
203
|
+
if path
|
|
204
|
+
YAML.safe_load(File.read(path, encoding: "utf-8")) || { "areas" => [] }
|
|
205
|
+
else
|
|
206
|
+
{ "areas" => [] }
|
|
207
|
+
end
|
|
208
|
+
end
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
def area_index
|
|
212
|
+
@area_index ||= all.each_with_object({}) { |a, h| h[a.code] = a }
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
def section_index
|
|
216
|
+
@section_index ||= all.each_with_object({}) do |area, h|
|
|
217
|
+
area.sections.each { |s| h[s.code] = s }
|
|
218
|
+
end
|
|
219
|
+
end
|
|
220
|
+
|
|
221
|
+
def complete?(data)
|
|
222
|
+
areas = data["areas"]
|
|
223
|
+
return false unless areas&.length&.>= 99
|
|
224
|
+
|
|
225
|
+
areas.all? { |a| a["fetched"] == true }
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
def captcha_page?(html)
|
|
229
|
+
html.length < MIN_PAGE_SIZE ||
|
|
230
|
+
html.include?("Confirm you are human") ||
|
|
231
|
+
html.include?("solve a puzzle") ||
|
|
232
|
+
html.include?("security check before continuing")
|
|
233
|
+
end
|
|
234
|
+
|
|
235
|
+
def fetch_page_with_retry(url, retries: MAX_RETRIES)
|
|
236
|
+
require "iev/scraper/browser"
|
|
237
|
+
|
|
238
|
+
retries.times do |attempt|
|
|
239
|
+
html = ScraperBrowser.fetch(url)
|
|
240
|
+
raise FetchError, "Failed to fetch #{url}" unless html
|
|
241
|
+
|
|
242
|
+
unless captcha_page?(html)
|
|
243
|
+
return html
|
|
244
|
+
end
|
|
245
|
+
|
|
246
|
+
if attempt < retries - 1
|
|
247
|
+
wait = RETRY_DELAY * (attempt + 1)
|
|
248
|
+
warn "IEV: WAF challenge for #{url}, retrying in #{wait}s (attempt #{attempt + 1}/#{retries})"
|
|
249
|
+
sleep wait
|
|
250
|
+
else
|
|
251
|
+
raise FetchError, "WAF challenge for #{url}"
|
|
252
|
+
end
|
|
253
|
+
end
|
|
254
|
+
end
|
|
255
|
+
|
|
256
|
+
def read_cache(filename)
|
|
257
|
+
cache_path = File.join(Iev.config.cache_dir, filename)
|
|
258
|
+
return nil unless File.exist?(cache_path)
|
|
259
|
+
|
|
260
|
+
d = YAML.safe_load(File.read(cache_path, encoding: "utf-8"))
|
|
261
|
+
return nil unless d&.dig("areas")&.any?
|
|
262
|
+
|
|
263
|
+
d
|
|
264
|
+
end
|
|
265
|
+
|
|
266
|
+
def write_cache(filename, d)
|
|
267
|
+
cache_path = File.join(Iev.config.cache_dir, filename)
|
|
268
|
+
FileUtils.mkdir_p(File.dirname(cache_path))
|
|
269
|
+
File.write(cache_path, YAML.dump(d), encoding: "utf-8")
|
|
270
|
+
end
|
|
271
|
+
end
|
|
272
|
+
end
|
|
273
|
+
end
|
|
@@ -54,10 +54,9 @@ module Iev
|
|
|
54
54
|
def relation_from_match(match_data)
|
|
55
55
|
Glossarist::RelatedConcept.new(
|
|
56
56
|
type: "supersedes",
|
|
57
|
-
ref: Glossarist::
|
|
57
|
+
ref: Glossarist::ConceptRef.new(
|
|
58
58
|
source: "IEV",
|
|
59
59
|
id: match_data[:ref],
|
|
60
|
-
version: match_data[:version],
|
|
61
60
|
),
|
|
62
61
|
)
|
|
63
62
|
end
|
data/lib/iev/term_builder.rb
CHANGED
|
@@ -77,6 +77,9 @@ module Iev
|
|
|
77
77
|
cd.notes = extract_notes
|
|
78
78
|
cd.terms = extract_terms
|
|
79
79
|
|
|
80
|
+
domain = extract_domain
|
|
81
|
+
cd.domain = domain if domain
|
|
82
|
+
|
|
80
83
|
sources = extract_authoritative_source
|
|
81
84
|
cd.sources = sources if sources&.any?
|
|
82
85
|
|
|
@@ -98,6 +101,22 @@ module Iev
|
|
|
98
101
|
@term_language ||= find_value_for("LANGUAGE").to_three_char_code
|
|
99
102
|
end
|
|
100
103
|
|
|
104
|
+
# Derives the domain (subject area section) from the IEVREF identifier.
|
|
105
|
+
# IEVREF format: "AAA-BB-CC" where AAA = area, AAA-BB = section.
|
|
106
|
+
# Returns a URI reference to the section concept (e.g. "section-103-01").
|
|
107
|
+
def extract_domain
|
|
108
|
+
return nil unless term_id
|
|
109
|
+
|
|
110
|
+
section_code = term_id.split("-")[0..1].join("-")
|
|
111
|
+
section = Iev.find_section(section_code)
|
|
112
|
+
return SubjectAreas.section_uri(section_code) if section
|
|
113
|
+
|
|
114
|
+
area_code = term_id.split("-")[0]
|
|
115
|
+
SubjectAreas.area_uri(area_code)
|
|
116
|
+
rescue StandardError
|
|
117
|
+
nil
|
|
118
|
+
end
|
|
119
|
+
|
|
101
120
|
# Splits unified definition (from the spreadsheet) into separate
|
|
102
121
|
# definition, examples, and notes strings (for YAMLs).
|
|
103
122
|
#
|
data/lib/iev/version.rb
CHANGED
data/lib/iev.rb
CHANGED
|
@@ -29,11 +29,16 @@ module Iev
|
|
|
29
29
|
autoload :DataSource, "iev/data_source"
|
|
30
30
|
autoload :DbWriter, "iev/db_writer"
|
|
31
31
|
autoload :Exporter, "iev/exporter"
|
|
32
|
+
autoload :IevCode, "iev/iev_code"
|
|
32
33
|
autoload :Iso639Code, "iev/iso_639_code"
|
|
33
34
|
autoload :Profiler, "iev/profiler"
|
|
34
35
|
autoload :RelatonDb, "iev/relaton_db"
|
|
35
36
|
autoload :Scraper, "iev/scraper"
|
|
37
|
+
autoload :Section, "iev/section"
|
|
36
38
|
autoload :SourceParser, "iev/source_parser"
|
|
39
|
+
autoload :SubjectArea, "iev/subject_area"
|
|
40
|
+
autoload :SubjectAreas, "iev/subject_areas"
|
|
41
|
+
autoload :SubjectAreaConcepts, "iev/subject_area_concepts"
|
|
37
42
|
autoload :SupersessionParser, "iev/supersession_parser"
|
|
38
43
|
autoload :TermAttrsParser, "iev/term_attrs_parser"
|
|
39
44
|
autoload :TermBuilder, "iev/term_builder"
|
|
@@ -80,4 +85,45 @@ module Iev
|
|
|
80
85
|
def self.scrape_concept(code)
|
|
81
86
|
Scraper.new.fetch_concept(code)
|
|
82
87
|
end
|
|
88
|
+
|
|
89
|
+
# Return all IEV subject areas with their sections (from bundled data).
|
|
90
|
+
# @return [Array<SubjectArea>]
|
|
91
|
+
def self.subject_areas
|
|
92
|
+
SubjectAreas.all
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
# Find a subject area by code.
|
|
96
|
+
# @param code [String, Integer] e.g. "102"
|
|
97
|
+
# @return [SubjectArea, nil]
|
|
98
|
+
def self.find_subject_area(code)
|
|
99
|
+
SubjectAreas.find_area(code)
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
# Find a section by its section code.
|
|
103
|
+
# @param section_code [String] e.g. "102-01"
|
|
104
|
+
# @return [Section, nil]
|
|
105
|
+
def self.find_section(section_code)
|
|
106
|
+
SubjectAreas.find_section(section_code)
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
# Return sections for a given area code.
|
|
110
|
+
# @param code [String, Integer] e.g. "102"
|
|
111
|
+
# @return [Array<Section>]
|
|
112
|
+
def self.sections_for(code)
|
|
113
|
+
SubjectAreas.sections_for(code)
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
# Return the parent subject area for a given section code.
|
|
117
|
+
# @param section_code [String] e.g. "102-01"
|
|
118
|
+
# @return [SubjectArea, nil]
|
|
119
|
+
def self.area_for_section(section_code)
|
|
120
|
+
SubjectAreas.area_for_section(section_code)
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
# Parse an IEV code into its structural components.
|
|
124
|
+
# @param code [String] e.g. "103-01-02"
|
|
125
|
+
# @return [IevCode, nil] nil if the code is blank
|
|
126
|
+
def self.parse_code(code)
|
|
127
|
+
IevCode.parse(code)
|
|
128
|
+
end
|
|
83
129
|
end
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: iev
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.4.
|
|
4
|
+
version: 0.4.5
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Ribose Inc.
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-05-
|
|
11
|
+
date: 2026-05-25 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: creek
|
|
@@ -28,16 +28,22 @@ dependencies:
|
|
|
28
28
|
name: glossarist
|
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
|
30
30
|
requirements:
|
|
31
|
+
- - "~>"
|
|
32
|
+
- !ruby/object:Gem::Version
|
|
33
|
+
version: '2.6'
|
|
31
34
|
- - ">="
|
|
32
35
|
- !ruby/object:Gem::Version
|
|
33
|
-
version: 2.
|
|
36
|
+
version: 2.6.7
|
|
34
37
|
type: :runtime
|
|
35
38
|
prerelease: false
|
|
36
39
|
version_requirements: !ruby/object:Gem::Requirement
|
|
37
40
|
requirements:
|
|
41
|
+
- - "~>"
|
|
42
|
+
- !ruby/object:Gem::Version
|
|
43
|
+
version: '2.6'
|
|
38
44
|
- - ">="
|
|
39
45
|
- !ruby/object:Gem::Version
|
|
40
|
-
version: 2.
|
|
46
|
+
version: 2.6.7
|
|
41
47
|
- !ruby/object:Gem::Dependency
|
|
42
48
|
name: ferrum
|
|
43
49
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -191,6 +197,7 @@ files:
|
|
|
191
197
|
- Rakefile
|
|
192
198
|
- bin/console
|
|
193
199
|
- bin/setup
|
|
200
|
+
- data/subject_areas.yaml
|
|
194
201
|
- exe/iev
|
|
195
202
|
- iev.gemspec
|
|
196
203
|
- lib/iev.rb
|
|
@@ -205,13 +212,19 @@ files:
|
|
|
205
212
|
- lib/iev/data_source.rb
|
|
206
213
|
- lib/iev/db_writer.rb
|
|
207
214
|
- lib/iev/exporter.rb
|
|
215
|
+
- lib/iev/iev_code.rb
|
|
208
216
|
- lib/iev/iso_639_2.yaml
|
|
209
217
|
- lib/iev/iso_639_code.rb
|
|
210
218
|
- lib/iev/profiler.rb
|
|
211
219
|
- lib/iev/relaton_db.rb
|
|
212
220
|
- lib/iev/scraper.rb
|
|
221
|
+
- lib/iev/scraper/browser.rb
|
|
213
222
|
- lib/iev/scraper/page_parser.rb
|
|
223
|
+
- lib/iev/section.rb
|
|
214
224
|
- lib/iev/source_parser.rb
|
|
225
|
+
- lib/iev/subject_area.rb
|
|
226
|
+
- lib/iev/subject_area_concepts.rb
|
|
227
|
+
- lib/iev/subject_areas.rb
|
|
215
228
|
- lib/iev/supersession_parser.rb
|
|
216
229
|
- lib/iev/term_attrs_parser.rb
|
|
217
230
|
- lib/iev/term_builder.rb
|