iev 0.4.5 → 0.4.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/iev/exporter.rb CHANGED
@@ -56,12 +56,18 @@ module Iev
56
56
  collection = build_collection(dataset)
57
57
  add_subject_area_concepts(collection) if @include_areas
58
58
  build_section_narrower_relations(collection) if @include_areas
59
+ figures = FigureBuilder.extract!(collection)
60
+ enrich_references(collection)
59
61
  save_collection(collection)
62
+ save_figures(figures)
63
+ save_bibliography(BibliographyBuilder.build(collection))
64
+ save_register
60
65
  elapsed = Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time
61
66
 
62
67
  @stats = {
63
68
  concept_count: collection.count,
64
69
  localized_count: localized_count(collection),
70
+ figure_count: figures.length,
65
71
  elapsed_seconds: elapsed,
66
72
  }
67
73
  collection
@@ -140,11 +146,16 @@ module Iev
140
146
  term = TermBuilder.build_from(row)
141
147
  next unless term
142
148
 
149
+ # Parse IevCode once per concept — used by all helpers below.
150
+ code = IevCode.new(term.id)
151
+
143
152
  concept = concept_index[term.id] ||= begin
144
153
  c = Glossarist::ManagedConcept.new(data: { "id" => term.id })
145
154
  c.uuid = term.id
146
- c.data.domains = domain_references_for(term.id)
147
- add_section_broader(c, term.id)
155
+ c.schema_version = "3"
156
+ c.data.domains = domain_references_for(code)
157
+ c.data.tags = tags_for(code)
158
+ add_section_broader(c, code)
148
159
  collection.store(c)
149
160
  c
150
161
  end
@@ -169,34 +180,125 @@ module Iev
169
180
  collection.save_grouped_concepts_to_files(concepts_dir.to_s)
170
181
  end
171
182
 
172
- def localized_count(collection)
173
- collection.sum { |c| c.localized_concepts.count }
183
+ def save_figures(figures)
184
+ return if figures.empty?
185
+
186
+ figures_dir = output_dir.expand_path.join("figures")
187
+ FileUtils.mkdir_p(figures_dir)
188
+ figures.each do |figure|
189
+ path = figures_dir.join("#{figure.id}.yaml")
190
+ File.write(path, figure.to_yaml, encoding: "utf-8")
191
+ end
192
+ puts "Written #{figures.length} figures to figures/" if $stdout.tty?
174
193
  end
175
194
 
176
- IEV_SOURCE = "urn:iec:std:iec:60050"
195
+ def save_bibliography(bibliography)
196
+ return if bibliography.entries.empty?
177
197
 
178
- def domain_references_for(ievref)
179
- code = IevCode.new(ievref.to_s)
180
- refs = []
181
- if code.area_code
182
- refs << Glossarist::ConceptReference.new(
183
- concept_id: code.area_uri,
184
- source: IEV_SOURCE,
185
- ref_type: "domain",
198
+ path = output_dir.expand_path.join("bibliography.yaml")
199
+ FileUtils.mkdir_p(path.dirname)
200
+ File.write(path, bibliography.to_yaml, encoding: "utf-8")
201
+ count = bibliography.entries.length
202
+ puts "Written bibliography.yaml with #{count} entries" if $stdout.tty?
203
+ end
204
+
205
+ def enrich_references(collection)
206
+ return if collection.none?
207
+
208
+ Glossarist::ConceptEnricher.new.inject_references(collection.to_a)
209
+ end
210
+
211
+ def save_register
212
+ areas = SubjectAreas.all
213
+ sections = build_section_tree(areas)
214
+
215
+ register = Glossarist::DatasetRegister.new(
216
+ schema_type: "glossarist",
217
+ schema_version: "3",
218
+ id: "iev",
219
+ ref: "IEC 60050:2011",
220
+ year: 2011,
221
+ urn: IEV_SOURCE,
222
+ urn_aliases: ["#{IEV_SOURCE}*"],
223
+ status: "current",
224
+ owner: "IEC",
225
+ source_repo: "https://github.com/glossarist/iev-data",
226
+ tags: %w[electrotechnical vocabulary iec],
227
+ languages: %w[eng fra],
228
+ language_order: %w[eng fra],
229
+ ordering: "systematic",
230
+ sections: sections,
231
+ )
232
+
233
+ register_path = output_dir.expand_path.join("register.yaml")
234
+ FileUtils.mkdir_p(register_path.dirname)
235
+ File.write(register_path, register.to_yaml, encoding: "utf-8")
236
+ puts "Written register.yaml with #{sections.length} areas" if $stdout.tty?
237
+ end
238
+
239
+ def build_section_tree(areas)
240
+ areas.sort_by { |a| a.code.to_i }.map do |area|
241
+ children = area.sections.sort_by do |s|
242
+ s.code.split("-").map(&:to_i)
243
+ end.map do |sec|
244
+ Glossarist::Section.new(
245
+ id: sec.code,
246
+ names: { "eng" => sec.title },
247
+ )
248
+ end
249
+
250
+ Glossarist::Section.new(
251
+ id: area.code,
252
+ names: { "eng" => area.title },
253
+ children: children.empty? ? nil : children,
186
254
  )
187
255
  end
256
+ end
257
+
258
+ def localized_count(collection)
259
+ collection.sum { |c| c.localized_concepts.count }
260
+ end
261
+
262
+ # Build domain ConceptReferences for a concept.
263
+ #
264
+ # Per the concept model, ConceptReferenceType distinguishes:
265
+ # - "domain" → thematic/subject-area classification (area level)
266
+ # - "section" → structural section membership (section level)
267
+ #
268
+ # Every concept gets both: a "domain" ref to its area and a "section"
269
+ # ref to its section. Concepts with only an area code (no section)
270
+ # get only a "domain" ref.
271
+ #
272
+ # @param code [IevCode] pre-parsed IEV code
273
+ # @return [Array<Glossarist::ConceptReference>]
274
+ def domain_references_for(code)
275
+ refs = []
276
+
277
+ # Domain reference: thematic classification at the area level
278
+ refs << domain_ref(code.area_uri)
279
+
280
+ # Section reference: structural membership in the section
188
281
  if code.section_code
189
- refs << Glossarist::ConceptReference.new(
190
- concept_id: code.section_uri,
191
- source: IEV_SOURCE,
192
- ref_type: "domain",
193
- )
282
+ refs << section_ref(code.section_uri)
194
283
  end
284
+
195
285
  refs
196
286
  end
197
287
 
198
- def add_section_broader(concept, ievref)
199
- code = IevCode.new(ievref.to_s)
288
+ # @param code [IevCode] pre-parsed IEV code
289
+ # @return [Array<String>]
290
+ def tags_for(code)
291
+ tags = []
292
+ area = SubjectAreas.find_area(code.area_code)
293
+ tags << area.title if area
294
+ section = code.section_code && SubjectAreas.find_section(code.section_code)
295
+ tags << section.title if section
296
+ tags
297
+ end
298
+
299
+ # @param concept [Glossarist::ManagedConcept]
300
+ # @param code [IevCode] pre-parsed IEV code
301
+ def add_section_broader(concept, code)
200
302
  return unless code.section_uri
201
303
 
202
304
  concept.related ||= []
@@ -249,7 +351,9 @@ module Iev
249
351
 
250
352
  concept.related ||= []
251
353
  related.each do |r|
252
- next if concept.related.any? { |er| er.type == r.type && er.ref&.id == r.ref&.id }
354
+ next if concept.related.any? do |er|
355
+ er.type == r.type && er.ref&.id == r.ref&.id
356
+ end
253
357
 
254
358
  concept.related << r
255
359
  end
@@ -263,5 +367,19 @@ module Iev
263
367
  status = term.entry_status
264
368
  concept.status = status if status && !status.empty?
265
369
  end
370
+
371
+ # --- ConceptReference factory helpers ---
372
+
373
+ def domain_ref(concept_id)
374
+ ref = Glossarist::ConceptReference.domain(concept_id)
375
+ ref.source = IEV_SOURCE
376
+ ref
377
+ end
378
+
379
+ def section_ref(concept_id)
380
+ ref = Glossarist::ConceptReference.section(concept_id)
381
+ ref.source = IEV_SOURCE
382
+ ref
383
+ end
266
384
  end
267
385
  end
@@ -0,0 +1,186 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Iev
4
+ # Hoists IEV figure references into dataset-shared Figure entities.
5
+ #
6
+ # IEV source data carries figures as inline SIMG tags, which Utilities
7
+ # rewrites to AsciiDoc image macros (+image::/assets/images/parts/{area}/
8
+ # FILE[Figure N - caption]+). This builder walks every concept's
9
+ # localizations, finds those image macros, promotes each to a
10
+ # dataset-shared Glossarist::Figure entity, and rewrites the inline text
11
+ # to a V3 figure mention (+{{fig:id, display}}+).
12
+ #
13
+ # The Figure entity is shared across concepts and languages — captions
14
+ # from different localizations merge into the same {lang => text} hash.
15
+ # The structural link from concept to figure is a FigureReference entry
16
+ # on ManagedConceptData#figures.
17
+ #
18
+ # Extraction is destructive: it mutates DetailedDefinition#content and
19
+ # appends FigureReference entries. Returns the unique Figure entities so
20
+ # the exporter can persist them to figures/{id}.yaml.
21
+ module FigureBuilder
22
+ # URL path prefix emitted by Utilities when converting SIMG tags.
23
+ # Kept in sync with Utilities::IMAGE_PATH_PREFIX (without the macro).
24
+ PATH_PREFIX = "/assets/images/parts"
25
+ private_constant :PATH_PREFIX
26
+
27
+ # Matches AsciiDoc image macros emitted by Utilities#process_simg_figures.
28
+ IMAGE_MACRO_REGEX = /
29
+ image::#{Regexp.escape(PATH_PREFIX)}
30
+ \/(?<area>\d+)\/(?<file>[\w.-]+)\[(?<caption>[^\]]*)\]
31
+ /x
32
+
33
+ # Captures "Figure N" label and the trailing caption text.
34
+ CAPTION_REGEX = /\A(?<label>Figure\s+\d+)\s*[–-]\s*(?<text>.+)\z/m
35
+
36
+ module_function
37
+
38
+ # @param collection [Glossarist::ManagedConceptCollection]
39
+ # @return [Array<Glossarist::Figure>] unique figures, sorted by id
40
+ def extract!(collection)
41
+ figures_by_id = {}
42
+
43
+ collection.each do |concept|
44
+ concept.localizations.each do |l10n|
45
+ process_localization(l10n, concept, figures_by_id)
46
+ end
47
+ end
48
+
49
+ figures_by_id.values.sort_by(&:id)
50
+ end
51
+
52
+ def process_localization(l10n, concept, figures_by_id)
53
+ lang = l10n.data&.language_code
54
+ return unless lang && lang.length == 3
55
+
56
+ Glossarist::ConceptData.detailed_definition_fields.each do |field|
57
+ process_field(l10n, field, lang, concept, figures_by_id)
58
+ end
59
+ end
60
+ private_class_method :process_localization
61
+
62
+ def process_field(l10n, field, lang, concept, figures_by_id)
63
+ l10n.data.public_send(field).each do |dd|
64
+ next unless dd.content&.include?("image::")
65
+
66
+ rewritten, hits = extract_from_text(dd.content, lang)
67
+ next if hits.empty?
68
+
69
+ dd.content = rewritten
70
+ hits.each { |hit| register_figure(hit, concept, figures_by_id) }
71
+ end
72
+ end
73
+ private_class_method :process_field
74
+
75
+ # @return [Array<(String, Array<Hash>)>] rewritten text and per-match
76
+ # figure descriptors ({ id:, identifier:, caption:, lang:, image: })
77
+ def extract_from_text(text, lang)
78
+ hits = []
79
+ rewritten = text.gsub(IMAGE_MACRO_REGEX) do
80
+ hit = build_hit(Regexp.last_match, lang)
81
+ hits << hit
82
+ mention_for(hit)
83
+ end
84
+ [rewritten, hits]
85
+ end
86
+ private_class_method :extract_from_text
87
+
88
+ def build_hit(match, lang)
89
+ identifier, caption = parse_caption(match[:caption])
90
+ {
91
+ id: figure_id_for(match[:file]),
92
+ identifier: identifier,
93
+ caption: caption,
94
+ lang: lang,
95
+ image: build_image(match[:area], match[:file]),
96
+ }
97
+ end
98
+ private_class_method :build_hit
99
+
100
+ def build_image(area, file)
101
+ Glossarist::FigureImage.new(
102
+ src: "#{PATH_PREFIX}/#{area}/#{file}",
103
+ format: format_for(file),
104
+ )
105
+ end
106
+ private_class_method :build_image
107
+
108
+ def parse_caption(bracket)
109
+ stripped = bracket.to_s.strip
110
+ return [nil, nil] if stripped.empty?
111
+
112
+ if (m = stripped.match(CAPTION_REGEX))
113
+ label = m[:label].gsub(/\s+/, " ")
114
+ [label, m[:text].strip]
115
+ elsif stripped.match?(/\AFigure\s+\d+\z/)
116
+ [stripped, nil]
117
+ else
118
+ [nil, stripped]
119
+ end
120
+ end
121
+ private_class_method :parse_caption
122
+
123
+ def figure_id_for(file)
124
+ "fig-#{file.sub(/\.[^.]+\z/, '')}"
125
+ end
126
+ private_class_method :figure_id_for
127
+
128
+ def format_for(file)
129
+ File.extname(file).delete_prefix(".").downcase
130
+ end
131
+ private_class_method :format_for
132
+
133
+ def mention_for(hit)
134
+ parts = [hit[:identifier], hit[:caption]].compact
135
+ return "{{fig:#{hit[:id]}}}" if parts.empty?
136
+
137
+ "{{fig:#{hit[:id]}, #{parts.join(' - ')}}}"
138
+ end
139
+ private_class_method :mention_for
140
+
141
+ # Add or merge a figure descriptor into the shared index, and ensure the
142
+ # concept carries a FigureReference to it.
143
+ def register_figure(hit, concept, figures_by_id)
144
+ figure = figures_by_id[hit[:id]] ||= build_figure(hit)
145
+ merge_caption!(figure, hit)
146
+ add_image_if_missing(figure, hit[:image])
147
+ add_figure_reference(concept, hit[:id], hit[:identifier])
148
+ end
149
+ private_class_method :register_figure
150
+
151
+ def build_figure(hit)
152
+ Glossarist::Figure.new(
153
+ id: hit[:id],
154
+ identifier: hit[:identifier],
155
+ images: [],
156
+ caption: {},
157
+ )
158
+ end
159
+ private_class_method :build_figure
160
+
161
+ def merge_caption!(figure, hit)
162
+ return unless hit[:caption]
163
+
164
+ figure.caption ||= {}
165
+ figure.caption[hit[:lang]] ||= hit[:caption]
166
+ end
167
+ private_class_method :merge_caption!
168
+
169
+ def add_image_if_missing(figure, image)
170
+ return if figure.images.any? { |i| i.src == image.src }
171
+
172
+ figure.images << image
173
+ end
174
+ private_class_method :add_image_if_missing
175
+
176
+ def add_figure_reference(concept, figure_id, display)
177
+ refs = Array(concept.data.figures)
178
+ return if refs.any? { |r| r.entity_id == figure_id }
179
+
180
+ concept.data.figures = refs + [
181
+ Glossarist::FigureReference.new(entity_id: figure_id, display: display),
182
+ ]
183
+ end
184
+ private_class_method :add_figure_reference
185
+ end
186
+ end
@@ -6,7 +6,8 @@
6
6
  module Iev
7
7
  # @todo This needs to be rewritten.
8
8
  class Iso639Code
9
- COUNTRY_CODES = YAML.safe_load(IO.read(File.join(__dir__, "iso_639_2.yaml")), permitted_classes: [Symbol]).freeze
9
+ COUNTRY_CODES = YAML.safe_load_file(File.join(__dir__,
10
+ "iso_639_2.yaml"), permitted_classes: [Symbol]).freeze
10
11
  # rubocop:disable Style/MutableConstant
11
12
  THREE_CHAR_MEMO = {} # Memoization cache, must be mutable
12
13
  # rubocop:enable Style/MutableConstant
@@ -36,7 +36,7 @@ module Iev
36
36
  rescue StandardError
37
37
  raise unless curr_attempt <= attempts
38
38
 
39
- sleep(2**curr_attempt * 0.1)
39
+ sleep((2**curr_attempt) * 0.1)
40
40
  curr_attempt += 1
41
41
  retry
42
42
  end
@@ -3,100 +3,102 @@
3
3
  require "ferrum"
4
4
 
5
5
  module Iev
6
- # Shared headless browser utilities for fetching pages behind AWS WAF.
7
- module ScraperBrowser
8
- USER_AGENT_PROFILES = [
9
- {
10
- user_agent: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) " \
11
- "AppleWebKit/537.36 (KHTML, like Gecko) " \
12
- "Chrome/131.0.0.0 Safari/537.36",
13
- platform: '"macOS"',
14
- chrome_version: "131",
15
- },
16
- {
17
- user_agent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " \
18
- "AppleWebKit/537.36 (KHTML, like Gecko) " \
19
- "Chrome/130.0.0.0 Safari/537.36",
20
- platform: '"Windows"',
21
- chrome_version: "130",
22
- },
23
- {
24
- user_agent: "Mozilla/5.0 (X11; Linux x86_64) " \
25
- "AppleWebKit/537.36 (KHTML, like Gecko) " \
26
- "Chrome/131.0.0.0 Safari/537.36",
27
- platform: '"Linux"',
28
- chrome_version: "131",
29
- },
30
- {
31
- user_agent: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) " \
32
- "AppleWebKit/537.36 (KHTML, like Gecko) " \
33
- "Chrome/129.0.0.0 Safari/537.36",
34
- platform: '"macOS"',
35
- chrome_version: "129",
36
- },
37
- {
38
- user_agent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " \
39
- "AppleWebKit/537.36 (KHTML, like Gecko) " \
40
- "Chrome/131.0.0.0 Safari/537.36",
41
- platform: '"Windows"',
42
- chrome_version: "131",
43
- },
44
- ].freeze
45
-
46
- # Fetch a URL using headless Chrome, returning the page HTML.
47
- # Handles AWS WAF challenge pages by waiting for JS execution.
48
- def self.fetch(url, browser_opts: {})
49
- browser = Ferrum::Browser.new(
50
- headless: "new",
51
- timeout: 30,
52
- window_size: [1366, 768],
53
- browser_options: {
54
- "disable-blink-features" => "AutomationControlled",
6
+ class Scraper
7
+ # Shared headless browser utilities for fetching pages behind AWS WAF.
8
+ module Browser
9
+ USER_AGENT_PROFILES = [
10
+ {
11
+ user_agent: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) " \
12
+ "AppleWebKit/537.36 (KHTML, like Gecko) " \
13
+ "Chrome/131.0.0.0 Safari/537.36",
14
+ platform: '"macOS"',
15
+ chrome_version: "131",
16
+ },
17
+ {
18
+ user_agent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " \
19
+ "AppleWebKit/537.36 (KHTML, like Gecko) " \
20
+ "Chrome/130.0.0.0 Safari/537.36",
21
+ platform: '"Windows"',
22
+ chrome_version: "130",
23
+ },
24
+ {
25
+ user_agent: "Mozilla/5.0 (X11; Linux x86_64) " \
26
+ "AppleWebKit/537.36 (KHTML, like Gecko) " \
27
+ "Chrome/131.0.0.0 Safari/537.36",
28
+ platform: '"Linux"',
29
+ chrome_version: "131",
30
+ },
31
+ {
32
+ user_agent: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) " \
33
+ "AppleWebKit/537.36 (KHTML, like Gecko) " \
34
+ "Chrome/129.0.0.0 Safari/537.36",
35
+ platform: '"macOS"',
36
+ chrome_version: "129",
37
+ },
38
+ {
39
+ user_agent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " \
40
+ "AppleWebKit/537.36 (KHTML, like Gecko) " \
41
+ "Chrome/131.0.0.0 Safari/537.36",
42
+ platform: '"Windows"',
43
+ chrome_version: "131",
55
44
  },
56
- **browser_opts,
57
- )
45
+ ].freeze
58
46
 
59
- browser.headers.set(random_headers)
60
- browser.go_to(url)
61
- browser.network.wait_for_idle(timeout: 15)
62
- html = browser.body
47
+ # Fetch a URL using headless Chrome, returning the page HTML.
48
+ # Handles AWS WAF challenge pages by waiting for JS execution.
49
+ def self.fetch(url, browser_opts: {})
50
+ browser = Ferrum::Browser.new(
51
+ headless: "new",
52
+ timeout: 30,
53
+ window_size: [1366, 768],
54
+ browser_options: {
55
+ "disable-blink-features" => "AutomationControlled",
56
+ },
57
+ **browser_opts,
58
+ )
63
59
 
64
- if html.include?("403 ERROR") || html.include?("Request blocked")
65
- warn "IEV: AWS WAF blocked request for #{url}"
66
- return nil
67
- end
60
+ browser.headers.set(random_headers)
61
+ browser.go_to(url)
62
+ browser.network.wait_for_idle(timeout: 15)
63
+ html = browser.body
68
64
 
69
- html
70
- rescue Ferrum::Error, Ferrum::BrowserError => e
71
- warn "IEV: Browser error fetching #{url}: #{e.message}"
72
- nil
73
- ensure
74
- browser&.quit
75
- end
65
+ if html.include?("403 ERROR") || html.include?("Request blocked")
66
+ warn "IEV: AWS WAF blocked request for #{url}"
67
+ return nil
68
+ end
76
69
 
77
- def self.random_headers
78
- profile = USER_AGENT_PROFILES.sample
79
- sec_ch_ua = "\"Google Chrome\";v=\"#{profile[:chrome_version]}\", " \
80
- "\"Chromium\";v=\"#{profile[:chrome_version]}\", " \
81
- "\"Not_A Brand\";v=\"24\""
70
+ html
71
+ rescue Ferrum::Error, Ferrum::BrowserError => e
72
+ warn "IEV: Browser error fetching #{url}: #{e.message}"
73
+ nil
74
+ ensure
75
+ browser&.quit
76
+ end
82
77
 
83
- {
84
- "Accept" => "text/html,application/xhtml+xml,application/xml;q=0.9," \
85
- "image/avif,image/webp,image/apng,*/*;q=0.8," \
86
- "application/signed-exchange;v=b3;q=0.7",
87
- "Accept-Language" => "en-GB,en-US;q=0.9,en;q=0.8",
88
- "Cache-Control" => "no-cache",
89
- "Pragma" => "no-cache",
90
- "Sec-Ch-Ua" => sec_ch_ua,
91
- "Sec-Ch-Ua-Mobile" => "?0",
92
- "Sec-Ch-Ua-Platform" => profile[:platform],
93
- "Sec-Fetch-Dest" => "document",
94
- "Sec-Fetch-Mode" => "navigate",
95
- "Sec-Fetch-Site" => "cross-site",
96
- "Sec-Fetch-User" => "?1",
97
- "Upgrade-Insecure-Requests" => "1",
98
- "User-Agent" => profile[:user_agent],
99
- }
78
+ def self.random_headers
79
+ profile = USER_AGENT_PROFILES.sample
80
+ sec_ch_ua = "\"Google Chrome\";v=\"#{profile[:chrome_version]}\", " \
81
+ "\"Chromium\";v=\"#{profile[:chrome_version]}\", " \
82
+ "\"Not_A Brand\";v=\"24\""
83
+
84
+ {
85
+ "Accept" => "text/html,application/xhtml+xml,application/xml;q=0.9," \
86
+ "image/avif,image/webp,image/apng,*/*;q=0.8," \
87
+ "application/signed-exchange;v=b3;q=0.7",
88
+ "Accept-Language" => "en-GB,en-US;q=0.9,en;q=0.8",
89
+ "Cache-Control" => "no-cache",
90
+ "Pragma" => "no-cache",
91
+ "Sec-Ch-Ua" => sec_ch_ua,
92
+ "Sec-Ch-Ua-Mobile" => "?0",
93
+ "Sec-Ch-Ua-Platform" => profile[:platform],
94
+ "Sec-Fetch-Dest" => "document",
95
+ "Sec-Fetch-Mode" => "navigate",
96
+ "Sec-Fetch-Site" => "cross-site",
97
+ "Sec-Fetch-User" => "?1",
98
+ "Upgrade-Insecure-Requests" => "1",
99
+ "User-Agent" => profile[:user_agent],
100
+ }
101
+ end
100
102
  end
101
103
  end
102
104
  end
data/lib/iev/scraper.rb CHANGED
@@ -4,6 +4,9 @@ require "nokogiri"
4
4
 
5
5
  module Iev
6
6
  class Scraper
7
+ autoload :Browser, "iev/scraper/browser"
8
+ autoload :PageParser, "iev/scraper/page_parser"
9
+
7
10
  BASE_URL = "https://www.electropedia.org/iev/iev.nsf/" \
8
11
  "display?openform&ievref="
9
12
 
@@ -14,7 +17,8 @@ module Iev
14
17
  # Fetch the Electropedia page HTML for a given IEV code.
15
18
  # Returns a Nokogiri document.
16
19
  def fetch_page(code)
17
- html = ScraperBrowser.fetch("#{BASE_URL}#{code}", browser_opts: @browser_opts)
20
+ html = Browser.fetch("#{BASE_URL}#{code}",
21
+ browser_opts: @browser_opts)
18
22
  return nil unless html
19
23
 
20
24
  Nokogiri::HTML(html)
@@ -30,6 +34,3 @@ module Iev
30
34
  end
31
35
  end
32
36
  end
33
-
34
- require_relative "scraper/browser"
35
- require_relative "scraper/page_parser"