unitsdb 1.0.0 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitmodules +3 -0
- data/.rspec +2 -1
- data/.rubocop_todo.yml +168 -15
- data/Gemfile +3 -2
- data/README.adoc +803 -1
- data/exe/unitsdb +7 -0
- data/lib/unitsdb/cli.rb +88 -0
- data/lib/unitsdb/commands/_modify.rb +22 -0
- data/lib/unitsdb/commands/base.rb +26 -0
- data/lib/unitsdb/commands/check_si.rb +124 -0
- data/lib/unitsdb/commands/get.rb +133 -0
- data/lib/unitsdb/commands/normalize.rb +81 -0
- data/lib/unitsdb/commands/release.rb +73 -0
- data/lib/unitsdb/commands/search.rb +219 -0
- data/lib/unitsdb/commands/si_formatter.rb +485 -0
- data/lib/unitsdb/commands/si_matcher.rb +470 -0
- data/lib/unitsdb/commands/si_ttl_parser.rb +100 -0
- data/lib/unitsdb/commands/si_updater.rb +212 -0
- data/lib/unitsdb/commands/ucum/check.rb +126 -0
- data/lib/unitsdb/commands/ucum/formatter.rb +141 -0
- data/lib/unitsdb/commands/ucum/matcher.rb +301 -0
- data/lib/unitsdb/commands/ucum/update.rb +84 -0
- data/lib/unitsdb/commands/ucum/updater.rb +98 -0
- data/lib/unitsdb/commands/ucum/xml_parser.rb +34 -0
- data/lib/unitsdb/commands/ucum.rb +43 -0
- data/lib/unitsdb/commands/validate/identifiers.rb +42 -0
- data/lib/unitsdb/commands/validate/references.rb +318 -0
- data/lib/unitsdb/commands/validate/si_references.rb +109 -0
- data/lib/unitsdb/commands/validate.rb +40 -0
- data/lib/unitsdb/database.rb +662 -0
- data/lib/unitsdb/dimension.rb +49 -0
- data/lib/unitsdb/dimension_details.rb +20 -0
- data/lib/unitsdb/dimension_reference.rb +8 -0
- data/lib/unitsdb/dimensions.rb +5 -10
- data/lib/unitsdb/errors.rb +13 -0
- data/lib/unitsdb/external_reference.rb +14 -0
- data/lib/unitsdb/identifier.rb +8 -0
- data/lib/unitsdb/localized_string.rb +17 -0
- data/lib/unitsdb/prefix.rb +30 -0
- data/lib/unitsdb/prefix_reference.rb +10 -0
- data/lib/unitsdb/prefixes.rb +5 -11
- data/lib/unitsdb/quantities.rb +5 -31
- data/lib/unitsdb/quantity.rb +21 -0
- data/lib/unitsdb/quantity_reference.rb +10 -0
- data/lib/unitsdb/root_unit_reference.rb +14 -0
- data/lib/unitsdb/scale.rb +17 -0
- data/lib/unitsdb/scale_properties.rb +12 -0
- data/lib/unitsdb/scale_reference.rb +10 -0
- data/lib/unitsdb/scales.rb +12 -0
- data/lib/unitsdb/si_derived_base.rb +19 -0
- data/lib/unitsdb/symbol_presentations.rb +3 -8
- data/lib/unitsdb/ucum.rb +198 -0
- data/lib/unitsdb/unit.rb +63 -0
- data/lib/unitsdb/unit_reference.rb +10 -0
- data/lib/unitsdb/unit_system.rb +15 -0
- data/lib/unitsdb/unit_system_reference.rb +10 -0
- data/lib/unitsdb/unit_systems.rb +5 -10
- data/lib/unitsdb/units.rb +5 -10
- data/lib/unitsdb/utils.rb +84 -0
- data/lib/unitsdb/version.rb +1 -1
- data/lib/unitsdb.rb +12 -2
- data/unitsdb.gemspec +6 -3
- metadata +124 -20
- data/lib/unitsdb/dimensions/dimension.rb +0 -59
- data/lib/unitsdb/dimensions/quantity.rb +0 -32
- data/lib/unitsdb/dimensions/symbol.rb +0 -26
- data/lib/unitsdb/prefixes/prefix.rb +0 -35
- data/lib/unitsdb/prefixes/symbol.rb +0 -17
- data/lib/unitsdb/quantities/quantity.rb +0 -37
- data/lib/unitsdb/quantities/unit_reference.rb +0 -15
- data/lib/unitsdb/unit_systems/unit_system.rb +0 -19
- data/lib/unitsdb/units/quantity_reference.rb +0 -17
- data/lib/unitsdb/units/root_unit.rb +0 -21
- data/lib/unitsdb/units/root_units.rb +0 -18
- data/lib/unitsdb/units/si_derived_base.rb +0 -26
- data/lib/unitsdb/units/symbol.rb +0 -19
- data/lib/unitsdb/units/system.rb +0 -17
- data/lib/unitsdb/units/unit.rb +0 -73
- data/lib/unitsdb/unitsdb.rb +0 -6
@@ -0,0 +1,470 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "si_ttl_parser"
|
4
|
+
|
5
|
+
module Unitsdb
|
6
|
+
module Commands
|
7
|
+
# Matcher for SI entities and UnitsDB entities
|
8
|
+
module SiMatcher
|
9
|
+
SI_AUTHORITY = "si-digital-framework"
|
10
|
+
@match_details = {}
|
11
|
+
|
12
|
+
module_function
|
13
|
+
|
14
|
+
# Match TTL entities to database entities (from_si direction)
|
15
|
+
def match_ttl_to_db(entity_type, ttl_entities, db_entities)
|
16
|
+
matches = []
|
17
|
+
missing_matches = []
|
18
|
+
matched_ttl_uris = []
|
19
|
+
processed_pairs = {} # Track processed entity-ttl pairs to avoid duplicates
|
20
|
+
entity_matches = {} # Track matches by entity ID
|
21
|
+
|
22
|
+
# First pass: find direct references
|
23
|
+
db_entities.each do |entity|
|
24
|
+
next unless entity.respond_to?(:references) && entity.references
|
25
|
+
|
26
|
+
entity.references.each do |ref|
|
27
|
+
next unless ref.authority == SI_AUTHORITY
|
28
|
+
|
29
|
+
matched_ttl_uris << ref.uri
|
30
|
+
ttl_entity = ttl_entities.find { |e| e[:uri] == ref.uri }
|
31
|
+
next unless ttl_entity
|
32
|
+
|
33
|
+
matches << {
|
34
|
+
entity_id: entity.short,
|
35
|
+
entity_name: format_entity_name(entity),
|
36
|
+
si_uri: ttl_entity[:uri],
|
37
|
+
si_name: ttl_entity[:name],
|
38
|
+
si_label: ttl_entity[:label],
|
39
|
+
si_alt_label: ttl_entity[:alt_label],
|
40
|
+
si_symbol: ttl_entity[:symbol],
|
41
|
+
entity: entity
|
42
|
+
}
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
# Second pass: find matching entities
|
47
|
+
ttl_entities.each do |ttl_entity|
|
48
|
+
next if matched_ttl_uris.include?(ttl_entity[:uri])
|
49
|
+
|
50
|
+
matching_entities = find_matching_entities(entity_type, ttl_entity, db_entities)
|
51
|
+
next if matching_entities.empty?
|
52
|
+
|
53
|
+
matched_ttl_uris << ttl_entity[:uri]
|
54
|
+
|
55
|
+
matching_entities.each do |entity|
|
56
|
+
entity_id = entity.short
|
57
|
+
entity_name = format_entity_name(entity)
|
58
|
+
|
59
|
+
# Create a unique key for this entity-ttl pair to avoid duplicates
|
60
|
+
pair_key = "#{entity_id}:#{ttl_entity[:uri]}"
|
61
|
+
next if processed_pairs[pair_key]
|
62
|
+
|
63
|
+
processed_pairs[pair_key] = true
|
64
|
+
|
65
|
+
# Get detailed match information
|
66
|
+
match_result = match_entity_names?(entity_type, entity, ttl_entity)
|
67
|
+
next unless match_result[:match]
|
68
|
+
|
69
|
+
# Save match details for later use
|
70
|
+
@match_details[pair_key] = match_result
|
71
|
+
|
72
|
+
# Check if already has reference
|
73
|
+
has_reference = entity.references&.any? do |ref|
|
74
|
+
ref.uri == ttl_entity[:uri] && ref.authority == SI_AUTHORITY
|
75
|
+
end
|
76
|
+
|
77
|
+
match_data = {
|
78
|
+
entity_id: entity_id,
|
79
|
+
entity_name: entity_name,
|
80
|
+
si_uri: ttl_entity[:uri],
|
81
|
+
si_name: ttl_entity[:name],
|
82
|
+
si_label: ttl_entity[:label],
|
83
|
+
si_alt_label: ttl_entity[:alt_label],
|
84
|
+
si_symbol: ttl_entity[:symbol],
|
85
|
+
entity: entity,
|
86
|
+
match_type: match_result[:match_type],
|
87
|
+
match_details: match_result,
|
88
|
+
match_types: { ttl_entity[:uri] => match_result[:match_type] }
|
89
|
+
}
|
90
|
+
|
91
|
+
if has_reference
|
92
|
+
matches << match_data
|
93
|
+
else
|
94
|
+
# Group by entity_id for multiple SI matches
|
95
|
+
entity_matches[entity_id] ||= []
|
96
|
+
entity_matches[entity_id] << {
|
97
|
+
uri: ttl_entity[:uri],
|
98
|
+
name: ttl_entity[:name],
|
99
|
+
label: ttl_entity[:label]
|
100
|
+
}
|
101
|
+
|
102
|
+
# Add first occurrence of this entity to missing_matches
|
103
|
+
missing_matches << match_data unless missing_matches.any? { |m| m[:entity_id] == entity_id }
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
# Update missing_matches to include multiple SI entities
|
109
|
+
missing_matches.each do |match|
|
110
|
+
entity_id = match[:entity_id]
|
111
|
+
si_matches = entity_matches[entity_id]
|
112
|
+
|
113
|
+
# If entity matches multiple SI entities, record them
|
114
|
+
match[:multiple_si] = si_matches if si_matches && si_matches.size > 1
|
115
|
+
end
|
116
|
+
|
117
|
+
# Find unmatched TTL entities
|
118
|
+
unmatched_ttl = ttl_entities.reject do |entity|
|
119
|
+
matched_ttl_uris.include?(entity[:uri]) ||
|
120
|
+
entity[:uri].end_with?("/units/") ||
|
121
|
+
entity[:uri].end_with?("/quantities/") ||
|
122
|
+
entity[:uri].end_with?("/prefixes/")
|
123
|
+
end
|
124
|
+
|
125
|
+
[matches, missing_matches, unmatched_ttl]
|
126
|
+
end
|
127
|
+
|
128
|
+
# Match database entities to TTL entities (to_si direction)
|
129
|
+
def match_db_to_ttl(entity_type, ttl_entities, db_entities)
|
130
|
+
matches = []
|
131
|
+
missing_refs = []
|
132
|
+
matched_db_ids = []
|
133
|
+
processed_db_ids = {} # Track processed entities
|
134
|
+
|
135
|
+
# Map from NIST IDs to display names for original output compatibility
|
136
|
+
nist_id_to_display = {}
|
137
|
+
|
138
|
+
# Build mappings for each entity type
|
139
|
+
db_entities.each do |entity|
|
140
|
+
next unless entity.respond_to?(:identifiers) && entity.identifiers&.first&.id&.start_with?("NIST")
|
141
|
+
|
142
|
+
nist_id = entity.identifiers.first.id
|
143
|
+
|
144
|
+
# For quantities and prefixes, we want to show the "short" field
|
145
|
+
nist_id_to_display[nist_id] = entity.short if %w[quantities
|
146
|
+
prefixes].include?(entity_type) && entity.respond_to?(:short)
|
147
|
+
end
|
148
|
+
|
149
|
+
db_entities.each do |db_entity|
|
150
|
+
entity_id = find_entity_id(db_entity)
|
151
|
+
|
152
|
+
# For display purposes - use original display names
|
153
|
+
display_id = entity_id
|
154
|
+
|
155
|
+
# Apply the NIST ID mapping if available
|
156
|
+
display_id = nist_id_to_display[entity_id] if entity_id.start_with?("NIST") && nist_id_to_display[entity_id]
|
157
|
+
|
158
|
+
# Skip if we've already processed this entity
|
159
|
+
next if processed_db_ids[entity_id]
|
160
|
+
|
161
|
+
processed_db_ids[entity_id] = true
|
162
|
+
has_reference = false
|
163
|
+
|
164
|
+
# Check for existing SI references
|
165
|
+
if db_entity.respond_to?(:references) && db_entity.references
|
166
|
+
db_entity.references.each do |ref|
|
167
|
+
next unless ref.authority == SI_AUTHORITY
|
168
|
+
|
169
|
+
has_reference = true
|
170
|
+
# Find the matching TTL entity for display
|
171
|
+
ttl_entity = ttl_entities.find { |e| e[:uri] == ref.uri }
|
172
|
+
|
173
|
+
matches << {
|
174
|
+
entity_id: display_id,
|
175
|
+
db_entity: db_entity,
|
176
|
+
ttl_uri: ref.uri,
|
177
|
+
ttl_entity: ttl_entity
|
178
|
+
}
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
182
|
+
# If already has reference, continue to next entity
|
183
|
+
if has_reference
|
184
|
+
matched_db_ids << entity_id
|
185
|
+
next
|
186
|
+
end
|
187
|
+
|
188
|
+
# Find matching TTL entities
|
189
|
+
matching_ttl = []
|
190
|
+
match_types = {}
|
191
|
+
|
192
|
+
ttl_entities.each do |ttl_entity|
|
193
|
+
match_result = match_entity_names?(entity_type, db_entity, ttl_entity)
|
194
|
+
next unless match_result[:match]
|
195
|
+
|
196
|
+
matching_ttl << ttl_entity
|
197
|
+
match_types[ttl_entity[:uri]] = match_result[:match_type]
|
198
|
+
|
199
|
+
# Save detailed match info
|
200
|
+
@match_details["#{entity_id}:#{ttl_entity[:uri]}"] = match_result
|
201
|
+
end
|
202
|
+
|
203
|
+
# If found matches, add to missing_refs
|
204
|
+
next if matching_ttl.empty?
|
205
|
+
|
206
|
+
matched_db_ids << entity_id
|
207
|
+
missing_refs << {
|
208
|
+
entity_id: display_id,
|
209
|
+
db_entity: db_entity,
|
210
|
+
ttl_entities: matching_ttl,
|
211
|
+
match_types: match_types
|
212
|
+
}
|
213
|
+
end
|
214
|
+
|
215
|
+
# Find unmatched db entities
|
216
|
+
unmatched_db = db_entities.reject { |entity| matched_db_ids.include?(find_entity_id(entity)) }
|
217
|
+
|
218
|
+
[matches, missing_refs, unmatched_db]
|
219
|
+
end
|
220
|
+
|
221
|
+
# Find entity ID
|
222
|
+
def find_entity_id(entity)
|
223
|
+
return entity.id if entity.respond_to?(:id) && entity.id
|
224
|
+
return entity.identifiers.first.id if entity.respond_to?(:identifiers) && !entity.identifiers.empty? &&
|
225
|
+
entity.identifiers.first.respond_to?(:id)
|
226
|
+
|
227
|
+
entity.short
|
228
|
+
end
|
229
|
+
|
230
|
+
# Format entity name correctly
|
231
|
+
def format_entity_name(entity)
|
232
|
+
return nil unless entity.respond_to?(:names) && entity.names&.first
|
233
|
+
|
234
|
+
entity.names.first
|
235
|
+
|
236
|
+
# # Special handling for sidereal names - use comma format
|
237
|
+
# if name.include?("sidereal")
|
238
|
+
# if name.start_with?("sidereal ")
|
239
|
+
# # For names that already start with "sidereal " - strip it
|
240
|
+
# base_name = name.gsub("sidereal ", "")
|
241
|
+
# return "#{base_name}, sidereal"
|
242
|
+
# elsif name.end_with?(" sidereal")
|
243
|
+
# # For names that already have comma format but missing comma
|
244
|
+
# parts = name.split
|
245
|
+
# return "#{parts.first}, #{parts.last}"
|
246
|
+
# end
|
247
|
+
# end
|
248
|
+
|
249
|
+
# # Handle other special cases
|
250
|
+
# return name if name == "year (365 days)"
|
251
|
+
|
252
|
+
# # Default to the original name
|
253
|
+
end
|
254
|
+
|
255
|
+
# Find matching entities for a TTL entity
|
256
|
+
def find_matching_entities(entity_type, ttl_entity, db_entities)
|
257
|
+
case entity_type
|
258
|
+
when "units"
|
259
|
+
find_matching_units(ttl_entity, db_entities)
|
260
|
+
when "quantities"
|
261
|
+
find_matching_quantities(ttl_entity, db_entities)
|
262
|
+
when "prefixes"
|
263
|
+
find_matching_prefixes(ttl_entity, db_entities)
|
264
|
+
else
|
265
|
+
[]
|
266
|
+
end
|
267
|
+
end
|
268
|
+
|
269
|
+
# Find exact matches for units
|
270
|
+
def find_matching_units(ttl_unit, units)
|
271
|
+
matching_units = []
|
272
|
+
|
273
|
+
units.each do |unit|
|
274
|
+
# Match by short
|
275
|
+
if unit.short&.downcase == ttl_unit[:name]&.downcase ||
|
276
|
+
unit.short&.downcase == ttl_unit[:label]&.downcase
|
277
|
+
matching_units << unit
|
278
|
+
next
|
279
|
+
end
|
280
|
+
|
281
|
+
# Match by name
|
282
|
+
if unit.respond_to?(:names) && unit.names&.any? do |name|
|
283
|
+
name.downcase == ttl_unit[:name]&.downcase ||
|
284
|
+
name.downcase == ttl_unit[:label]&.downcase
|
285
|
+
end
|
286
|
+
matching_units << unit
|
287
|
+
next
|
288
|
+
end
|
289
|
+
|
290
|
+
# Match by symbol
|
291
|
+
next unless ttl_unit[:symbol] && unit.respond_to?(:symbols) && unit.symbols&.any? do |sym|
|
292
|
+
sym.respond_to?(:ascii) && sym.ascii && sym.ascii.downcase == ttl_unit[:symbol].downcase
|
293
|
+
end
|
294
|
+
|
295
|
+
matching_units << unit
|
296
|
+
end
|
297
|
+
|
298
|
+
matching_units.uniq
|
299
|
+
end
|
300
|
+
|
301
|
+
# Find exact matches for quantities
|
302
|
+
def find_matching_quantities(ttl_quantity, quantities)
|
303
|
+
matching_quantities = []
|
304
|
+
|
305
|
+
quantities.each do |quantity|
|
306
|
+
# Match by short
|
307
|
+
if quantity.short&.downcase == ttl_quantity[:name]&.downcase ||
|
308
|
+
quantity.short&.downcase == ttl_quantity[:label]&.downcase ||
|
309
|
+
quantity.short&.downcase == ttl_quantity[:alt_label]&.downcase
|
310
|
+
matching_quantities << quantity
|
311
|
+
next
|
312
|
+
end
|
313
|
+
|
314
|
+
# Match by name
|
315
|
+
next unless quantity.respond_to?(:names) && quantity.names&.any? do |name|
|
316
|
+
name.downcase == ttl_quantity[:name]&.downcase ||
|
317
|
+
name.downcase == ttl_quantity[:label]&.downcase ||
|
318
|
+
name.downcase == ttl_quantity[:alt_label]&.downcase
|
319
|
+
end
|
320
|
+
|
321
|
+
matching_quantities << quantity
|
322
|
+
end
|
323
|
+
|
324
|
+
matching_quantities.uniq
|
325
|
+
end
|
326
|
+
|
327
|
+
# Find exact matches for prefixes
|
328
|
+
def find_matching_prefixes(ttl_prefix, prefixes)
|
329
|
+
matching_prefixes = []
|
330
|
+
|
331
|
+
prefixes.each do |prefix|
|
332
|
+
# Match by short
|
333
|
+
if prefix.short&.downcase == ttl_prefix[:name]&.downcase ||
|
334
|
+
prefix.short&.downcase == ttl_prefix[:label]&.downcase
|
335
|
+
matching_prefixes << prefix
|
336
|
+
next
|
337
|
+
end
|
338
|
+
|
339
|
+
# Match by name
|
340
|
+
if prefix.respond_to?(:names) && prefix.names&.any? do |name|
|
341
|
+
name.downcase == ttl_prefix[:name]&.downcase ||
|
342
|
+
name.downcase == ttl_prefix[:label]&.downcase
|
343
|
+
end
|
344
|
+
matching_prefixes << prefix
|
345
|
+
next
|
346
|
+
end
|
347
|
+
|
348
|
+
# Match by symbol
|
349
|
+
next unless ttl_prefix[:symbol] && prefix.respond_to?(:symbol) && prefix.symbol &&
|
350
|
+
prefix.symbol.respond_to?(:ascii) && prefix.symbol.ascii &&
|
351
|
+
prefix.symbol.ascii.downcase == ttl_prefix[:symbol].downcase
|
352
|
+
|
353
|
+
matching_prefixes << prefix
|
354
|
+
end
|
355
|
+
|
356
|
+
matching_prefixes.uniq
|
357
|
+
end
|
358
|
+
|
359
|
+
# Match entity names with detailed type information
|
360
|
+
def match_entity_names?(entity_type, db_entity, ttl_entity)
|
361
|
+
match_details = { match: false }
|
362
|
+
|
363
|
+
# Match by short name - EXACT match
|
364
|
+
if db_entity.short && db_entity.short.downcase == ttl_entity[:name].downcase
|
365
|
+
match_details = {
|
366
|
+
match: true,
|
367
|
+
exact: true,
|
368
|
+
match_type: "Exact match",
|
369
|
+
match_desc: "short_to_name",
|
370
|
+
details: "UnitsDB short '#{db_entity.short}' matches SI name '#{ttl_entity[:name]}'"
|
371
|
+
}
|
372
|
+
# Match by short to label
|
373
|
+
elsif db_entity.short && ttl_entity[:label] && db_entity.short.downcase == ttl_entity[:label].downcase
|
374
|
+
match_details = {
|
375
|
+
match: true,
|
376
|
+
exact: true,
|
377
|
+
match_type: "Exact match",
|
378
|
+
match_desc: "short_to_label",
|
379
|
+
details: "UnitsDB short '#{db_entity.short}' matches SI label '#{ttl_entity[:label]}'"
|
380
|
+
}
|
381
|
+
# Match by names - EXACT match
|
382
|
+
elsif db_entity.respond_to?(:names) && db_entity.names
|
383
|
+
# Match by TTL name
|
384
|
+
db_name_match = db_entity.names.find { |name| name.downcase == ttl_entity[:name].downcase }
|
385
|
+
if db_name_match
|
386
|
+
match_details = {
|
387
|
+
match: true,
|
388
|
+
exact: true,
|
389
|
+
match_type: "Exact match",
|
390
|
+
match_desc: "name_to_name",
|
391
|
+
details: "UnitsDB name '#{db_name_match}' matches SI name '#{ttl_entity[:name]}'"
|
392
|
+
}
|
393
|
+
# Match by TTL label
|
394
|
+
elsif ttl_entity[:label]
|
395
|
+
db_name_match = db_entity.names.find { |name| name.downcase == ttl_entity[:label].downcase }
|
396
|
+
if db_name_match
|
397
|
+
match_details = {
|
398
|
+
match: true,
|
399
|
+
exact: true,
|
400
|
+
match_type: "Exact match",
|
401
|
+
match_desc: "name_to_label",
|
402
|
+
details: "UnitsDB name '#{db_name_match}' matches SI label '#{ttl_entity[:label]}'"
|
403
|
+
}
|
404
|
+
end
|
405
|
+
end
|
406
|
+
|
407
|
+
# Match by TTL alt_label
|
408
|
+
if !match_details[:match] && ttl_entity[:alt_label]
|
409
|
+
db_name_match = db_entity.names.find { |name| name.downcase == ttl_entity[:alt_label].downcase }
|
410
|
+
if db_name_match
|
411
|
+
match_details = {
|
412
|
+
match: true,
|
413
|
+
exact: true,
|
414
|
+
match_type: "Exact match",
|
415
|
+
match_desc: "name_to_alt_label",
|
416
|
+
details: "UnitsDB name '#{db_name_match}' matches SI alt_label '#{ttl_entity[:alt_label]}'"
|
417
|
+
}
|
418
|
+
end
|
419
|
+
end
|
420
|
+
end
|
421
|
+
|
422
|
+
# Special validation for "sidereal_" units
|
423
|
+
if match_details[:match] && match_details[:exact] && db_entity.short&.include?("sidereal_") &&
|
424
|
+
!(ttl_entity[:name]&.include?("sidereal") || ttl_entity[:label]&.include?("sidereal"))
|
425
|
+
match_details = {
|
426
|
+
match: true,
|
427
|
+
exact: false,
|
428
|
+
match_type: "Potential match",
|
429
|
+
match_desc: "partial_match",
|
430
|
+
details: "UnitsDB '#{db_entity.short}' partially matches SI '#{ttl_entity[:name]}'"
|
431
|
+
}
|
432
|
+
end
|
433
|
+
|
434
|
+
# Match by symbol if available (units and prefixes) - POTENTIAL match
|
435
|
+
if !match_details[:match] && %w[units prefixes].include?(entity_type) && ttl_entity[:symbol]
|
436
|
+
if entity_type == "units" && db_entity.respond_to?(:symbols) && db_entity.symbols
|
437
|
+
matching_symbol = db_entity.symbols.find do |sym|
|
438
|
+
sym.respond_to?(:ascii) && sym.ascii && sym.ascii.downcase == ttl_entity[:symbol].downcase
|
439
|
+
end
|
440
|
+
|
441
|
+
if matching_symbol
|
442
|
+
match_details = {
|
443
|
+
match: true,
|
444
|
+
exact: false,
|
445
|
+
match_type: "Potential match",
|
446
|
+
match_desc: "symbol_match",
|
447
|
+
details: "UnitsDB symbol '#{matching_symbol.ascii}' matches SI symbol '#{ttl_entity[:symbol]}'"
|
448
|
+
}
|
449
|
+
end
|
450
|
+
elsif entity_type == "prefixes" && db_entity.respond_to?(:symbol) && db_entity.symbol
|
451
|
+
if db_entity.symbol.respond_to?(:ascii) &&
|
452
|
+
db_entity.symbol.ascii &&
|
453
|
+
db_entity.symbol.ascii.downcase == ttl_entity[:symbol].downcase
|
454
|
+
|
455
|
+
match_details = {
|
456
|
+
match: true,
|
457
|
+
exact: false,
|
458
|
+
match_type: "Potential match",
|
459
|
+
match_desc: "symbol_match",
|
460
|
+
details: "UnitsDB symbol '#{db_entity.symbol.ascii}' matches SI symbol '#{ttl_entity[:symbol]}'"
|
461
|
+
}
|
462
|
+
end
|
463
|
+
end
|
464
|
+
end
|
465
|
+
|
466
|
+
match_details
|
467
|
+
end
|
468
|
+
end
|
469
|
+
end
|
470
|
+
end
|
@@ -0,0 +1,100 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "rdf"
|
4
|
+
require "rdf/turtle"
|
5
|
+
|
6
|
+
module Unitsdb
|
7
|
+
module Commands
|
8
|
+
# Parser for SI TTL files
|
9
|
+
module SiTtlParser
|
10
|
+
SI_URI_PREFIX = "http://si-digital-framework.org/SI/"
|
11
|
+
|
12
|
+
module_function
|
13
|
+
|
14
|
+
# Parse TTL files and return RDF graph
|
15
|
+
def parse_ttl_files(dir)
|
16
|
+
puts "Parsing TTL files in #{dir}..."
|
17
|
+
graph = RDF::Graph.new
|
18
|
+
|
19
|
+
Dir.glob(File.join(dir, "*.ttl")).each do |file|
|
20
|
+
puts " Reading #{File.basename(file)}"
|
21
|
+
graph.load(file, format: :ttl)
|
22
|
+
end
|
23
|
+
|
24
|
+
graph
|
25
|
+
end
|
26
|
+
|
27
|
+
# Extract entities from TTL based on entity type
|
28
|
+
def extract_entities_from_ttl(entity_type, graph)
|
29
|
+
skos = RDF::Vocabulary.new("http://www.w3.org/2004/02/skos/core#")
|
30
|
+
si = RDF::Vocabulary.new("http://si-digital-framework.org/SI#")
|
31
|
+
|
32
|
+
namespace_uri = case entity_type
|
33
|
+
when "units" then "http://si-digital-framework.org/SI/units/"
|
34
|
+
when "quantities" then "http://si-digital-framework.org/quantities/"
|
35
|
+
when "prefixes" then "http://si-digital-framework.org/SI/prefixes/"
|
36
|
+
else return []
|
37
|
+
end
|
38
|
+
|
39
|
+
namespace = RDF::Vocabulary.new(namespace_uri)
|
40
|
+
entities = extract_base_entities(graph, namespace, skos)
|
41
|
+
add_symbols_to_entities(entities, graph, si) if %w[units prefixes].include?(entity_type)
|
42
|
+
entities
|
43
|
+
end
|
44
|
+
|
45
|
+
# Extract base entities from graph
|
46
|
+
def extract_base_entities(graph, namespace, skos)
|
47
|
+
entities = []
|
48
|
+
processed_uris = {}
|
49
|
+
|
50
|
+
RDF::Query.new({ entity: { skos.prefLabel => :label } })
|
51
|
+
.execute(graph).each do |solution|
|
52
|
+
entity_uri = solution.entity.to_s
|
53
|
+
next unless entity_uri.start_with?(namespace.to_s)
|
54
|
+
next if processed_uris[entity_uri]
|
55
|
+
|
56
|
+
processed_uris[entity_uri] = true
|
57
|
+
|
58
|
+
entity_name = entity_uri.split("/").last
|
59
|
+
label = RDF::Query.new({ RDF::URI(entity_uri) => { skos.prefLabel => :value } })
|
60
|
+
.execute(graph).first&.value&.to_s
|
61
|
+
alt_label = RDF::Query.new({ RDF::URI(entity_uri) => { skos.altLabel => :value } })
|
62
|
+
.execute(graph).first&.value&.to_s
|
63
|
+
|
64
|
+
entities << {
|
65
|
+
uri: entity_uri,
|
66
|
+
name: entity_name,
|
67
|
+
label: label,
|
68
|
+
alt_label: alt_label
|
69
|
+
}
|
70
|
+
end
|
71
|
+
|
72
|
+
entities
|
73
|
+
end
|
74
|
+
|
75
|
+
# Add symbols to entities
|
76
|
+
def add_symbols_to_entities(entities, graph, si)
|
77
|
+
entities.each do |entity|
|
78
|
+
symbol = RDF::Query.new({ RDF::URI(entity[:uri]) => { si.hasSymbol => :value } })
|
79
|
+
.execute(graph).first&.value&.to_s
|
80
|
+
entity[:symbol] = symbol if symbol
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
# Extract suffix from URI for display
|
85
|
+
def extract_identifying_suffix(uri)
|
86
|
+
return "" unless uri
|
87
|
+
|
88
|
+
# For display, we need to format as exactly like the original
|
89
|
+
# This helps format the comma-separated multi-units correctly
|
90
|
+
if uri.include?("/units/")
|
91
|
+
# Return units/name format for units (without duplicating "units/")
|
92
|
+
"units/#{uri.split("/").last}"
|
93
|
+
else
|
94
|
+
# Otherwise strip the prefix
|
95
|
+
uri.gsub(SI_URI_PREFIX, "")
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|