iev 0.4.1 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3d7d47000d44c3dcf02934083f9d05b0a195491d6ef130cdc9ca198feeb0d8ad
4
- data.tar.gz: 84b2da3efd17c94b8278bdbe963f1d356433fb0d20ccc6e06e1c2bffd05e8851
3
+ metadata.gz: 5100fcef07e984496be43178ab8ebaf48fb6b77a2144ccd722a1d7acc1135e57
4
+ data.tar.gz: 724f02e6431f8a3534a1f28e6894dd5462d65ad67ce736695b8e5f8c1b7f04ad
5
5
  SHA512:
6
- metadata.gz: ba7fbe8bbdcbdec627b43aa62288d2b282a5476a2616ae74a56afcc81c7b983c0190dd43e92378024ad11155c0c9f9b5cf2f2ae4042c4c2536c68cecb4380262
7
- data.tar.gz: ee080f35c1d17e1309217def4db29acc019e58c5ad3d104efbfc351155addc43c38a1e0602033024d213e91ce13692084f868cc1f1b64521145d90a763ae2f85
6
+ metadata.gz: c239187ba2d7d06645dbebca3478bda513dd5e6da8937ff4c9fc51be1647bf2866338d6b79f865e9b3462359b45dce67999e78e8d0cd486c2264c3d3060b6abc
7
+ data.tar.gz: 88aded4d4dcedc76afb3160f7adb7f1746088d250a34750bb681bb69bc8c4f3d025b8d2f4c2d50df4aa66dfb4543e8699c9261ef5a3fd31ee84a6f5600828d52
data/README.adoc CHANGED
@@ -85,19 +85,21 @@ Electropedia administrator.
85
85
  Iev.get("103-01-02", "en")
86
86
  => "functional"
87
87
 
88
- # If code not found
88
+ # If code not found, returns nil (does not raise)
89
89
  Iev.get("111-11-11", "en")
90
- => ""
90
+ => nil
91
91
 
92
- # If language not found
92
+ # If language not found, returns nil
93
93
  Iev.get("103-01-02", "eee")
94
94
  => nil
95
95
 
96
96
  # Fetch full concept data (all languages)
97
+ # Raises Iev::DataSource::NotFoundError if code not found
97
98
  Iev.fetch_concept("103-01-02")
98
99
  => { "id" => "103-01-02", "data" => { ... } }
99
100
 
100
101
  # Fetch localized term data
102
+ # Raises Iev::DataSource::NotFoundError if code not found
101
103
  Iev.fetch_term("103-01-02", "en")
102
104
  => { "term" => "functional", ... }
103
105
  ----
@@ -8,6 +8,11 @@ module Iev
8
8
  class Command < Thor
9
9
  include CommandHelper
10
10
 
11
+ desc "version", "Show iev gem version"
12
+ def version
13
+ puts "iev #{Iev::VERSION}"
14
+ end
15
+
11
16
  desc "export FILE", "Export IEV data to Glossarist YAML format"
12
17
  long_desc <<~DESC
13
18
  Exports IEV data from an Excel (.xlsx/.xls) or SQLite (.sqlite3/.sqlite/.db)
@@ -39,18 +44,25 @@ module Iev
39
44
  def export(file)
40
45
  handle_generic_options(options)
41
46
 
42
- Iev::Exporter.new(
47
+ exporter = Iev::Exporter.new(
43
48
  file,
44
49
  output_dir: options[:output],
45
50
  only_concepts: options[:only_concepts],
46
51
  only_languages: options[:only_languages],
47
52
  fetch_relaton_links: options[:relaton],
48
- ).export
49
-
50
- info "Done!"
53
+ on_progress: method(:export_progress),
54
+ )
55
+ exporter.export
56
+ print_export_summary(exporter.stats)
57
+ rescue ArgumentError => e
58
+ error e.message
59
+ exit 1
60
+ rescue Sequel::Error => e
61
+ error "Database error: #{e.message}"
62
+ exit 1
51
63
  end
52
64
 
53
- desc "xlsx2yaml FILE", "Converts Excel IEV exports to YAMLs."
65
+ desc "xlsx2yaml FILE", "[DEPRECATED] Use 'export' instead."
54
66
  option :output, desc: "Output directory", aliases: :o, default: Dir.pwd
55
67
  option :only_concepts,
56
68
  desc: "Only process concepts with IEVREF matching this argument, " \
@@ -69,6 +81,7 @@ module Iev
69
81
  option :debug_sources, type: :boolean, default: false
70
82
  option :debug_relaton, type: :boolean, default: false
71
83
  def xlsx2yaml(file)
84
+ warn "[DEPRECATED] 'xlsx2yaml' is deprecated. Use 'export' instead."
72
85
  handle_generic_options(options)
73
86
 
74
87
  Iev::Exporter.new(
@@ -81,7 +94,7 @@ module Iev
81
94
  summary
82
95
  end
83
96
 
84
- desc "db2yaml DB_FILE", "Exports SQLite to IEV YAMLs."
97
+ desc "db2yaml DB_FILE", "[DEPRECATED] Use 'export' instead."
85
98
  option :output, desc: "Output directory", aliases: :o, default: Dir.pwd
86
99
  option :only_concepts,
87
100
  desc: "Only process concepts with IEVREF matching this argument, " \
@@ -100,6 +113,7 @@ module Iev
100
113
  option :debug_sources, type: :boolean, default: false
101
114
  option :debug_relaton, type: :boolean, default: false
102
115
  def db2yaml(dbfile)
116
+ warn "[DEPRECATED] 'db2yaml' is deprecated. Use 'export' instead."
103
117
  handle_generic_options(options)
104
118
 
105
119
  Iev::Exporter.new(
@@ -138,13 +152,14 @@ module Iev
138
152
  DataSource.fetch_concept(code)
139
153
  end
140
154
 
141
- unless raw
142
- warn "IEV: concept #{code} not found."
143
- exit 1
144
- end
145
-
146
155
  concept = build_concept_from_raw(code, raw)
147
156
  print_concept_grouped_yaml(concept)
157
+ rescue Iev::DataSource::NotFoundError
158
+ error "IEV concept not found: #{code}"
159
+ exit 1
160
+ rescue Ferrum::Error => e
161
+ error "Scraping failed: #{e.message}"
162
+ exit 1
148
163
  end
149
164
 
150
165
  def self.exit_on_failure?
@@ -24,6 +24,36 @@ module Iev
24
24
  info "Done!"
25
25
  end
26
26
 
27
+ def export_progress(current, total)
28
+ return unless $IEV_PROGRESS
29
+ return if total <= 1 # single-row dataset, skip progress
30
+
31
+ if current == total
32
+ Ui.info "" # clear progress line
33
+ else
34
+ Ui.progress "Processing #{current}/#{total}..."
35
+ end
36
+ end
37
+
38
+ def print_export_summary(stats)
39
+ return unless stats
40
+
41
+ s = stats
42
+ elapsed = format_elapsed(s[:elapsed_seconds])
43
+ info "Exported #{s[:concept_count]} concepts " \
44
+ "(#{s[:localized_count]} localized) in #{elapsed}"
45
+ end
46
+
47
+ def format_elapsed(seconds)
48
+ if seconds < 60
49
+ "%.1fs" % seconds
50
+ else
51
+ mins = (seconds / 60).to_i
52
+ secs = (seconds % 60).round
53
+ "#{mins}m #{secs}s"
54
+ end
55
+ end
56
+
27
57
  def handle_generic_options(options)
28
58
  $IEV_PROFILE = options[:profile]
29
59
  $IEV_PROGRESS = options.fetch(:progress, !ENV["CI"])
data/lib/iev/cli/ui.rb CHANGED
@@ -32,6 +32,11 @@ module Iev
32
32
  print "#{Helper.clear_progress}#{message}\n"
33
33
  end
34
34
 
35
+ # Prints error message to stderr.
36
+ def error(message)
37
+ Kernel.warn "Error: #{message}"
38
+ end
39
+
35
40
  # Sets an UI tag which will be prepended to messages printed with
36
41
  # #debug and #warn.
37
42
  def set_ui_tag(str)
@@ -12,19 +12,21 @@ module Iev
12
12
  # Fetch full concept data (all languages) for a given IEV code.
13
13
  #
14
14
  # @param code [String] IEV code, e.g. "103-01-02"
15
- # @return [Hash, nil] concept data hash or nil if not found
15
+ # @return [Hash] concept data hash
16
+ # @raise [NotFoundError] if the concept does not exist
16
17
  def fetch_concept(code)
17
- fetch_concept_data(code)
18
+ fetch_concept_data(code) ||
19
+ raise(NotFoundError, "IEV concept not found: #{code}")
18
20
  end
19
21
 
20
22
  # Fetch localized term data for a given IEV code and language.
21
23
  #
22
24
  # @param code [String] IEV code, e.g. "103-01-02"
23
25
  # @param lang [String] language code, e.g. "en" or "eng"
24
- # @return [Hash, nil] localized concept data or nil
26
+ # @return [Hash, nil] localized concept data or nil if language not found
27
+ # @raise [NotFoundError] if the concept does not exist
25
28
  def fetch_term(code, lang)
26
29
  concept = fetch_concept(code)
27
- return nil unless concept
28
30
 
29
31
  lang_key = normalize_lang(lang)
30
32
  concept[lang_key]
@@ -35,7 +37,8 @@ module Iev
35
37
  #
36
38
  # @param code [String] IEV code, e.g. "103-01-02"
37
39
  # @param lang [String] language code, e.g. "en"
38
- # @return [String, nil] term designation or nil
40
+ # @return [String, nil] term designation or nil if not found
41
+ # @raise [NotFoundError] if the concept does not exist
39
42
  def fetch_term_designation(code, lang)
40
43
  term_data = fetch_term(code, lang)
41
44
  return nil unless term_data
data/lib/iev/exporter.rb CHANGED
@@ -27,15 +27,18 @@ module Iev
27
27
  # @param output_dir [String, Pathname] destination for YAML files
28
28
  # @param only_concepts [String, nil] SQL LIKE pattern for IEVREF filtering
29
29
  # @param only_languages [String, nil] comma-separated language codes
30
- # @param fetch_relaton_links [Boolean] whether to fetch source URLs via Relaton
30
+ # @param fetch_relaton_links [Boolean] fetch source URLs via Relaton
31
+ # @param on_progress [Proc, nil] callback (current, total) during build
31
32
  def initialize(input_path, output_dir: Dir.pwd,
32
33
  only_concepts: nil, only_languages: nil,
33
- fetch_relaton_links: false)
34
+ fetch_relaton_links: false,
35
+ on_progress: nil)
34
36
  @input_path = Pathname.new(input_path)
35
37
  validate_input!
36
38
 
37
39
  @output_dir = Pathname.new(output_dir)
38
40
  @fetch_relaton_links = fetch_relaton_links
41
+ @on_progress = on_progress
39
42
  @filters = {
40
43
  only_concepts: only_concepts,
41
44
  only_languages: only_languages,
@@ -45,12 +48,23 @@ module Iev
45
48
  # Run the export pipeline: load → transform → save.
46
49
  # @return [Glossarist::ManagedConceptCollection]
47
50
  def export
51
+ start_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
48
52
  dataset = load_dataset
49
53
  collection = build_collection(dataset)
50
54
  save_collection(collection)
55
+ elapsed = Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time
56
+
57
+ @stats = {
58
+ concept_count: collection.count,
59
+ localized_count: localized_count(collection),
60
+ elapsed_seconds: elapsed,
61
+ }
51
62
  collection
52
63
  end
53
64
 
65
+ # @return [Hash, nil] stats from last export, or nil if export hasn't run
66
+ attr_reader :stats
67
+
54
68
  private
55
69
 
56
70
  def supported_format?
@@ -111,8 +125,13 @@ module Iev
111
125
  # Glossarist's O(n) fetch_or_initialize which does linear scan.
112
126
  concept_index = {}
113
127
  collection = Glossarist::ManagedConceptCollection.new
128
+ row_count = dataset.count
129
+ current = 0
114
130
 
115
131
  dataset.each do |row|
132
+ current += 1
133
+ @on_progress&.call(current, row_count)
134
+
116
135
  term = TermBuilder.build_from(row)
117
136
  next unless term
118
137
 
@@ -134,5 +153,9 @@ module Iev
134
153
  FileUtils.mkdir_p(concepts_dir)
135
154
  collection.save_to_files(concepts_dir.to_s)
136
155
  end
156
+
157
+ def localized_count(collection)
158
+ collection.sum { |c| c.localized_concepts.count }
159
+ end
137
160
  end
138
161
  end
data/lib/iev/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Iev
4
- VERSION = "0.4.1"
4
+ VERSION = "0.4.2"
5
5
  end
data/lib/iev.rb CHANGED
@@ -45,17 +45,19 @@ module Iev
45
45
  # @param [String] lang language code, for example "en"
46
46
  #
47
47
  # @return [String, nil] if found then term,
48
- # if code not found then nil,
49
- # if language not found then nil.
48
+ # if code or language not found then nil.
50
49
  #
51
50
  def self.get(code, lang)
52
51
  DataSource.fetch_term_designation(code, lang)
52
+ rescue DataSource::NotFoundError
53
+ nil
53
54
  end
54
55
 
55
56
  # Fetch full concept data (all languages) for a given IEV code.
56
57
  #
57
58
  # @param [String] code IEV code, e.g. "103-01-02"
58
- # @return [Hash, nil] concept data hash with all languages
59
+ # @return [Hash] concept data hash with all languages
60
+ # @raise [DataSource::NotFoundError] if concept not found
59
61
  def self.fetch_concept(code)
60
62
  DataSource.fetch_concept(code)
61
63
  end
@@ -64,7 +66,8 @@ module Iev
64
66
  #
65
67
  # @param [String] code IEV code, e.g. "103-01-02"
66
68
  # @param [String] lang language code, e.g. "en" or "eng"
67
- # @return [Hash, nil] localized concept data
69
+ # @return [Hash, nil] localized concept data or nil if not found
70
+ # @raise [DataSource::NotFoundError] if concept not found
68
71
  def self.fetch_term(code, lang)
69
72
  DataSource.fetch_term(code, lang)
70
73
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: iev
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.1
4
+ version: 0.4.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.