iev 0.4.1 → 0.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.adoc +5 -3
- data/lib/iev/cli/command.rb +26 -11
- data/lib/iev/cli/command_helper.rb +30 -0
- data/lib/iev/cli/ui.rb +5 -0
- data/lib/iev/data_source.rb +8 -5
- data/lib/iev/exporter.rb +25 -2
- data/lib/iev/version.rb +1 -1
- data/lib/iev.rb +7 -4
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 5100fcef07e984496be43178ab8ebaf48fb6b77a2144ccd722a1d7acc1135e57
|
|
4
|
+
data.tar.gz: 724f02e6431f8a3534a1f28e6894dd5462d65ad67ce736695b8e5f8c1b7f04ad
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: c239187ba2d7d06645dbebca3478bda513dd5e6da8937ff4c9fc51be1647bf2866338d6b79f865e9b3462359b45dce67999e78e8d0cd486c2264c3d3060b6abc
|
|
7
|
+
data.tar.gz: 88aded4d4dcedc76afb3160f7adb7f1746088d250a34750bb681bb69bc8c4f3d025b8d2f4c2d50df4aa66dfb4543e8699c9261ef5a3fd31ee84a6f5600828d52
|
data/README.adoc
CHANGED
|
@@ -85,19 +85,21 @@ Electropedia administrator.
|
|
|
85
85
|
Iev.get("103-01-02", "en")
|
|
86
86
|
=> "functional"
|
|
87
87
|
|
|
88
|
-
# If code not found
|
|
88
|
+
# If code not found, returns nil (does not raise)
|
|
89
89
|
Iev.get("111-11-11", "en")
|
|
90
|
-
=>
|
|
90
|
+
=> nil
|
|
91
91
|
|
|
92
|
-
# If language not found
|
|
92
|
+
# If language not found, returns nil
|
|
93
93
|
Iev.get("103-01-02", "eee")
|
|
94
94
|
=> nil
|
|
95
95
|
|
|
96
96
|
# Fetch full concept data (all languages)
|
|
97
|
+
# Raises Iev::DataSource::NotFoundError if code not found
|
|
97
98
|
Iev.fetch_concept("103-01-02")
|
|
98
99
|
=> { "id" => "103-01-02", "data" => { ... } }
|
|
99
100
|
|
|
100
101
|
# Fetch localized term data
|
|
102
|
+
# Raises Iev::DataSource::NotFoundError if code not found
|
|
101
103
|
Iev.fetch_term("103-01-02", "en")
|
|
102
104
|
=> { "term" => "functional", ... }
|
|
103
105
|
----
|
data/lib/iev/cli/command.rb
CHANGED
|
@@ -8,6 +8,11 @@ module Iev
|
|
|
8
8
|
class Command < Thor
|
|
9
9
|
include CommandHelper
|
|
10
10
|
|
|
11
|
+
desc "version", "Show iev gem version"
|
|
12
|
+
def version
|
|
13
|
+
puts "iev #{Iev::VERSION}"
|
|
14
|
+
end
|
|
15
|
+
|
|
11
16
|
desc "export FILE", "Export IEV data to Glossarist YAML format"
|
|
12
17
|
long_desc <<~DESC
|
|
13
18
|
Exports IEV data from an Excel (.xlsx/.xls) or SQLite (.sqlite3/.sqlite/.db)
|
|
@@ -39,18 +44,25 @@ module Iev
|
|
|
39
44
|
def export(file)
|
|
40
45
|
handle_generic_options(options)
|
|
41
46
|
|
|
42
|
-
Iev::Exporter.new(
|
|
47
|
+
exporter = Iev::Exporter.new(
|
|
43
48
|
file,
|
|
44
49
|
output_dir: options[:output],
|
|
45
50
|
only_concepts: options[:only_concepts],
|
|
46
51
|
only_languages: options[:only_languages],
|
|
47
52
|
fetch_relaton_links: options[:relaton],
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
53
|
+
on_progress: method(:export_progress),
|
|
54
|
+
)
|
|
55
|
+
exporter.export
|
|
56
|
+
print_export_summary(exporter.stats)
|
|
57
|
+
rescue ArgumentError => e
|
|
58
|
+
error e.message
|
|
59
|
+
exit 1
|
|
60
|
+
rescue Sequel::Error => e
|
|
61
|
+
error "Database error: #{e.message}"
|
|
62
|
+
exit 1
|
|
51
63
|
end
|
|
52
64
|
|
|
53
|
-
desc "xlsx2yaml FILE", "
|
|
65
|
+
desc "xlsx2yaml FILE", "[DEPRECATED] Use 'export' instead."
|
|
54
66
|
option :output, desc: "Output directory", aliases: :o, default: Dir.pwd
|
|
55
67
|
option :only_concepts,
|
|
56
68
|
desc: "Only process concepts with IEVREF matching this argument, " \
|
|
@@ -69,6 +81,7 @@ module Iev
|
|
|
69
81
|
option :debug_sources, type: :boolean, default: false
|
|
70
82
|
option :debug_relaton, type: :boolean, default: false
|
|
71
83
|
def xlsx2yaml(file)
|
|
84
|
+
warn "[DEPRECATED] 'xlsx2yaml' is deprecated. Use 'export' instead."
|
|
72
85
|
handle_generic_options(options)
|
|
73
86
|
|
|
74
87
|
Iev::Exporter.new(
|
|
@@ -81,7 +94,7 @@ module Iev
|
|
|
81
94
|
summary
|
|
82
95
|
end
|
|
83
96
|
|
|
84
|
-
desc "db2yaml DB_FILE", "
|
|
97
|
+
desc "db2yaml DB_FILE", "[DEPRECATED] Use 'export' instead."
|
|
85
98
|
option :output, desc: "Output directory", aliases: :o, default: Dir.pwd
|
|
86
99
|
option :only_concepts,
|
|
87
100
|
desc: "Only process concepts with IEVREF matching this argument, " \
|
|
@@ -100,6 +113,7 @@ module Iev
|
|
|
100
113
|
option :debug_sources, type: :boolean, default: false
|
|
101
114
|
option :debug_relaton, type: :boolean, default: false
|
|
102
115
|
def db2yaml(dbfile)
|
|
116
|
+
warn "[DEPRECATED] 'db2yaml' is deprecated. Use 'export' instead."
|
|
103
117
|
handle_generic_options(options)
|
|
104
118
|
|
|
105
119
|
Iev::Exporter.new(
|
|
@@ -138,13 +152,14 @@ module Iev
|
|
|
138
152
|
DataSource.fetch_concept(code)
|
|
139
153
|
end
|
|
140
154
|
|
|
141
|
-
unless raw
|
|
142
|
-
warn "IEV: concept #{code} not found."
|
|
143
|
-
exit 1
|
|
144
|
-
end
|
|
145
|
-
|
|
146
155
|
concept = build_concept_from_raw(code, raw)
|
|
147
156
|
print_concept_grouped_yaml(concept)
|
|
157
|
+
rescue Iev::DataSource::NotFoundError
|
|
158
|
+
error "IEV concept not found: #{code}"
|
|
159
|
+
exit 1
|
|
160
|
+
rescue Ferrum::Error => e
|
|
161
|
+
error "Scraping failed: #{e.message}"
|
|
162
|
+
exit 1
|
|
148
163
|
end
|
|
149
164
|
|
|
150
165
|
def self.exit_on_failure?
|
|
@@ -24,6 +24,36 @@ module Iev
|
|
|
24
24
|
info "Done!"
|
|
25
25
|
end
|
|
26
26
|
|
|
27
|
+
def export_progress(current, total)
|
|
28
|
+
return unless $IEV_PROGRESS
|
|
29
|
+
return if total <= 1 # single-row dataset, skip progress
|
|
30
|
+
|
|
31
|
+
if current == total
|
|
32
|
+
Ui.info "" # clear progress line
|
|
33
|
+
else
|
|
34
|
+
Ui.progress "Processing #{current}/#{total}..."
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def print_export_summary(stats)
|
|
39
|
+
return unless stats
|
|
40
|
+
|
|
41
|
+
s = stats
|
|
42
|
+
elapsed = format_elapsed(s[:elapsed_seconds])
|
|
43
|
+
info "Exported #{s[:concept_count]} concepts " \
|
|
44
|
+
"(#{s[:localized_count]} localized) in #{elapsed}"
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def format_elapsed(seconds)
|
|
48
|
+
if seconds < 60
|
|
49
|
+
"%.1fs" % seconds
|
|
50
|
+
else
|
|
51
|
+
mins = (seconds / 60).to_i
|
|
52
|
+
secs = (seconds % 60).round
|
|
53
|
+
"#{mins}m #{secs}s"
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
|
|
27
57
|
def handle_generic_options(options)
|
|
28
58
|
$IEV_PROFILE = options[:profile]
|
|
29
59
|
$IEV_PROGRESS = options.fetch(:progress, !ENV["CI"])
|
data/lib/iev/cli/ui.rb
CHANGED
|
@@ -32,6 +32,11 @@ module Iev
|
|
|
32
32
|
print "#{Helper.clear_progress}#{message}\n"
|
|
33
33
|
end
|
|
34
34
|
|
|
35
|
+
# Prints error message to stderr.
|
|
36
|
+
def error(message)
|
|
37
|
+
Kernel.warn "Error: #{message}"
|
|
38
|
+
end
|
|
39
|
+
|
|
35
40
|
# Sets an UI tag which will be prepended to messages printed with
|
|
36
41
|
# #debug and #warn.
|
|
37
42
|
def set_ui_tag(str)
|
data/lib/iev/data_source.rb
CHANGED
|
@@ -12,19 +12,21 @@ module Iev
|
|
|
12
12
|
# Fetch full concept data (all languages) for a given IEV code.
|
|
13
13
|
#
|
|
14
14
|
# @param code [String] IEV code, e.g. "103-01-02"
|
|
15
|
-
# @return [Hash
|
|
15
|
+
# @return [Hash] concept data hash
|
|
16
|
+
# @raise [NotFoundError] if the concept does not exist
|
|
16
17
|
def fetch_concept(code)
|
|
17
|
-
fetch_concept_data(code)
|
|
18
|
+
fetch_concept_data(code) ||
|
|
19
|
+
raise(NotFoundError, "IEV concept not found: #{code}")
|
|
18
20
|
end
|
|
19
21
|
|
|
20
22
|
# Fetch localized term data for a given IEV code and language.
|
|
21
23
|
#
|
|
22
24
|
# @param code [String] IEV code, e.g. "103-01-02"
|
|
23
25
|
# @param lang [String] language code, e.g. "en" or "eng"
|
|
24
|
-
# @return [Hash, nil] localized concept data or nil
|
|
26
|
+
# @return [Hash, nil] localized concept data or nil if language not found
|
|
27
|
+
# @raise [NotFoundError] if the concept does not exist
|
|
25
28
|
def fetch_term(code, lang)
|
|
26
29
|
concept = fetch_concept(code)
|
|
27
|
-
return nil unless concept
|
|
28
30
|
|
|
29
31
|
lang_key = normalize_lang(lang)
|
|
30
32
|
concept[lang_key]
|
|
@@ -35,7 +37,8 @@ module Iev
|
|
|
35
37
|
#
|
|
36
38
|
# @param code [String] IEV code, e.g. "103-01-02"
|
|
37
39
|
# @param lang [String] language code, e.g. "en"
|
|
38
|
-
# @return [String, nil] term designation or nil
|
|
40
|
+
# @return [String, nil] term designation or nil if not found
|
|
41
|
+
# @raise [NotFoundError] if the concept does not exist
|
|
39
42
|
def fetch_term_designation(code, lang)
|
|
40
43
|
term_data = fetch_term(code, lang)
|
|
41
44
|
return nil unless term_data
|
data/lib/iev/exporter.rb
CHANGED
|
@@ -27,15 +27,18 @@ module Iev
|
|
|
27
27
|
# @param output_dir [String, Pathname] destination for YAML files
|
|
28
28
|
# @param only_concepts [String, nil] SQL LIKE pattern for IEVREF filtering
|
|
29
29
|
# @param only_languages [String, nil] comma-separated language codes
|
|
30
|
-
# @param fetch_relaton_links [Boolean]
|
|
30
|
+
# @param fetch_relaton_links [Boolean] fetch source URLs via Relaton
|
|
31
|
+
# @param on_progress [Proc, nil] callback (current, total) during build
|
|
31
32
|
def initialize(input_path, output_dir: Dir.pwd,
|
|
32
33
|
only_concepts: nil, only_languages: nil,
|
|
33
|
-
fetch_relaton_links: false
|
|
34
|
+
fetch_relaton_links: false,
|
|
35
|
+
on_progress: nil)
|
|
34
36
|
@input_path = Pathname.new(input_path)
|
|
35
37
|
validate_input!
|
|
36
38
|
|
|
37
39
|
@output_dir = Pathname.new(output_dir)
|
|
38
40
|
@fetch_relaton_links = fetch_relaton_links
|
|
41
|
+
@on_progress = on_progress
|
|
39
42
|
@filters = {
|
|
40
43
|
only_concepts: only_concepts,
|
|
41
44
|
only_languages: only_languages,
|
|
@@ -45,12 +48,23 @@ module Iev
|
|
|
45
48
|
# Run the export pipeline: load → transform → save.
|
|
46
49
|
# @return [Glossarist::ManagedConceptCollection]
|
|
47
50
|
def export
|
|
51
|
+
start_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
48
52
|
dataset = load_dataset
|
|
49
53
|
collection = build_collection(dataset)
|
|
50
54
|
save_collection(collection)
|
|
55
|
+
elapsed = Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time
|
|
56
|
+
|
|
57
|
+
@stats = {
|
|
58
|
+
concept_count: collection.count,
|
|
59
|
+
localized_count: localized_count(collection),
|
|
60
|
+
elapsed_seconds: elapsed,
|
|
61
|
+
}
|
|
51
62
|
collection
|
|
52
63
|
end
|
|
53
64
|
|
|
65
|
+
# @return [Hash, nil] stats from last export, or nil if export hasn't run
|
|
66
|
+
attr_reader :stats
|
|
67
|
+
|
|
54
68
|
private
|
|
55
69
|
|
|
56
70
|
def supported_format?
|
|
@@ -111,8 +125,13 @@ module Iev
|
|
|
111
125
|
# Glossarist's O(n) fetch_or_initialize which does linear scan.
|
|
112
126
|
concept_index = {}
|
|
113
127
|
collection = Glossarist::ManagedConceptCollection.new
|
|
128
|
+
row_count = dataset.count
|
|
129
|
+
current = 0
|
|
114
130
|
|
|
115
131
|
dataset.each do |row|
|
|
132
|
+
current += 1
|
|
133
|
+
@on_progress&.call(current, row_count)
|
|
134
|
+
|
|
116
135
|
term = TermBuilder.build_from(row)
|
|
117
136
|
next unless term
|
|
118
137
|
|
|
@@ -134,5 +153,9 @@ module Iev
|
|
|
134
153
|
FileUtils.mkdir_p(concepts_dir)
|
|
135
154
|
collection.save_to_files(concepts_dir.to_s)
|
|
136
155
|
end
|
|
156
|
+
|
|
157
|
+
def localized_count(collection)
|
|
158
|
+
collection.sum { |c| c.localized_concepts.count }
|
|
159
|
+
end
|
|
137
160
|
end
|
|
138
161
|
end
|
data/lib/iev/version.rb
CHANGED
data/lib/iev.rb
CHANGED
|
@@ -45,17 +45,19 @@ module Iev
|
|
|
45
45
|
# @param [String] lang language code, for example "en"
|
|
46
46
|
#
|
|
47
47
|
# @return [String, nil] if found then term,
|
|
48
|
-
#
|
|
49
|
-
# if language not found then nil.
|
|
48
|
+
# if code or language not found then nil.
|
|
50
49
|
#
|
|
51
50
|
def self.get(code, lang)
|
|
52
51
|
DataSource.fetch_term_designation(code, lang)
|
|
52
|
+
rescue DataSource::NotFoundError
|
|
53
|
+
nil
|
|
53
54
|
end
|
|
54
55
|
|
|
55
56
|
# Fetch full concept data (all languages) for a given IEV code.
|
|
56
57
|
#
|
|
57
58
|
# @param [String] code IEV code, e.g. "103-01-02"
|
|
58
|
-
# @return [Hash
|
|
59
|
+
# @return [Hash] concept data hash with all languages
|
|
60
|
+
# @raise [DataSource::NotFoundError] if concept not found
|
|
59
61
|
def self.fetch_concept(code)
|
|
60
62
|
DataSource.fetch_concept(code)
|
|
61
63
|
end
|
|
@@ -64,7 +66,8 @@ module Iev
|
|
|
64
66
|
#
|
|
65
67
|
# @param [String] code IEV code, e.g. "103-01-02"
|
|
66
68
|
# @param [String] lang language code, e.g. "en" or "eng"
|
|
67
|
-
# @return [Hash, nil] localized concept data
|
|
69
|
+
# @return [Hash, nil] localized concept data or nil if not found
|
|
70
|
+
# @raise [DataSource::NotFoundError] if concept not found
|
|
68
71
|
def self.fetch_term(code, lang)
|
|
69
72
|
DataSource.fetch_term(code, lang)
|
|
70
73
|
end
|