unitsdb 2.1.1 → 2.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/release.yml +8 -1
  3. data/.gitignore +2 -0
  4. data/.gitmodules +4 -3
  5. data/.rubocop.yml +13 -8
  6. data/.rubocop_todo.yml +217 -100
  7. data/CLAUDE.md +55 -0
  8. data/Gemfile +4 -1
  9. data/README.adoc +283 -16
  10. data/data/dimensions.yaml +1864 -0
  11. data/data/prefixes.yaml +874 -0
  12. data/data/quantities.yaml +3715 -0
  13. data/data/scales.yaml +97 -0
  14. data/data/schemas/dimensions-schema.yaml +153 -0
  15. data/data/schemas/prefixes-schema.yaml +155 -0
  16. data/data/schemas/quantities-schema.yaml +117 -0
  17. data/data/schemas/scales-schema.yaml +106 -0
  18. data/data/schemas/unit_systems-schema.yaml +116 -0
  19. data/data/schemas/units-schema.yaml +215 -0
  20. data/data/unit_systems.yaml +78 -0
  21. data/data/units.yaml +14052 -0
  22. data/exe/unitsdb +7 -1
  23. data/lib/unitsdb/cli.rb +42 -15
  24. data/lib/unitsdb/commands/_modify.rb +40 -4
  25. data/lib/unitsdb/commands/base.rb +6 -2
  26. data/lib/unitsdb/commands/check_si/si_formatter.rb +488 -0
  27. data/lib/unitsdb/commands/check_si/si_matcher.rb +487 -0
  28. data/lib/unitsdb/commands/check_si/si_ttl_parser.rb +103 -0
  29. data/lib/unitsdb/commands/check_si/si_updater.rb +254 -0
  30. data/lib/unitsdb/commands/check_si.rb +54 -35
  31. data/lib/unitsdb/commands/get.rb +11 -10
  32. data/lib/unitsdb/commands/normalize.rb +21 -7
  33. data/lib/unitsdb/commands/qudt/check.rb +150 -0
  34. data/lib/unitsdb/commands/qudt/formatter.rb +194 -0
  35. data/lib/unitsdb/commands/qudt/matcher.rb +746 -0
  36. data/lib/unitsdb/commands/qudt/ttl_parser.rb +403 -0
  37. data/lib/unitsdb/commands/qudt/update.rb +126 -0
  38. data/lib/unitsdb/commands/qudt/updater.rb +189 -0
  39. data/lib/unitsdb/commands/qudt.rb +82 -0
  40. data/lib/unitsdb/commands/release.rb +12 -9
  41. data/lib/unitsdb/commands/search.rb +12 -11
  42. data/lib/unitsdb/commands/ucum/check.rb +42 -29
  43. data/lib/unitsdb/commands/ucum/formatter.rb +2 -1
  44. data/lib/unitsdb/commands/ucum/matcher.rb +23 -9
  45. data/lib/unitsdb/commands/ucum/update.rb +14 -13
  46. data/lib/unitsdb/commands/ucum/updater.rb +40 -6
  47. data/lib/unitsdb/commands/ucum/xml_parser.rb +0 -2
  48. data/lib/unitsdb/commands/ucum.rb +44 -4
  49. data/lib/unitsdb/commands/validate/identifiers.rb +2 -4
  50. data/lib/unitsdb/commands/validate/qudt_references.rb +111 -0
  51. data/lib/unitsdb/commands/validate/references.rb +36 -19
  52. data/lib/unitsdb/commands/validate/si_references.rb +3 -5
  53. data/lib/unitsdb/commands/validate/ucum_references.rb +105 -0
  54. data/lib/unitsdb/commands/validate.rb +67 -11
  55. data/lib/unitsdb/commands.rb +20 -0
  56. data/lib/unitsdb/database.rb +90 -52
  57. data/lib/unitsdb/dimension.rb +1 -4
  58. data/lib/unitsdb/dimension_details.rb +0 -1
  59. data/lib/unitsdb/dimensions.rb +0 -2
  60. data/lib/unitsdb/errors.rb +7 -0
  61. data/lib/unitsdb/prefix.rb +0 -4
  62. data/lib/unitsdb/prefix_reference.rb +0 -2
  63. data/lib/unitsdb/prefixes.rb +0 -1
  64. data/lib/unitsdb/quantities.rb +0 -2
  65. data/lib/unitsdb/quantity.rb +0 -6
  66. data/lib/unitsdb/qudt.rb +100 -0
  67. data/lib/unitsdb/root_unit_reference.rb +0 -3
  68. data/lib/unitsdb/scale.rb +0 -4
  69. data/lib/unitsdb/scale_reference.rb +0 -2
  70. data/lib/unitsdb/scales.rb +0 -2
  71. data/lib/unitsdb/si_derived_base.rb +0 -2
  72. data/lib/unitsdb/ucum.rb +14 -10
  73. data/lib/unitsdb/unit.rb +0 -10
  74. data/lib/unitsdb/unit_reference.rb +0 -2
  75. data/lib/unitsdb/unit_system.rb +1 -3
  76. data/lib/unitsdb/unit_system_reference.rb +0 -2
  77. data/lib/unitsdb/unit_systems.rb +0 -2
  78. data/lib/unitsdb/units.rb +0 -2
  79. data/lib/unitsdb/utils.rb +32 -21
  80. data/lib/unitsdb/version.rb +5 -1
  81. data/lib/unitsdb.rb +62 -14
  82. data/unitsdb.gemspec +6 -3
  83. metadata +52 -13
  84. data/lib/unitsdb/commands/si_formatter.rb +0 -485
  85. data/lib/unitsdb/commands/si_matcher.rb +0 -470
  86. data/lib/unitsdb/commands/si_ttl_parser.rb +0 -100
  87. data/lib/unitsdb/commands/si_updater.rb +0 -212
@@ -0,0 +1,487 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Unitsdb
4
+ module Commands
5
+ module CheckSi
6
+ # Matcher for SI entities and UnitsDB entities
7
+ module SiMatcher
8
+ SI_AUTHORITY = "si-digital-framework"
9
+ @match_details = {}
10
+
11
+ module_function
12
+
13
+ # Match TTL entities to database entities (from_si direction)
14
+ def match_ttl_to_db(entity_type, ttl_entities, db_entities)
15
+ matches = []
16
+ missing_matches = []
17
+ matched_ttl_uris = []
18
+ processed_pairs = {} # Track processed entity-ttl pairs to avoid duplicates
19
+ entity_matches = {} # Track matches by entity ID
20
+
21
+ # First pass: find direct references
22
+ db_entities.each do |entity|
23
+ next unless entity.respond_to?(:references) && entity.references
24
+
25
+ entity.references.each do |ref|
26
+ next unless ref.authority == SI_AUTHORITY
27
+
28
+ matched_ttl_uris << ref.uri
29
+ ttl_entity = ttl_entities.find { |e| e[:uri] == ref.uri }
30
+ next unless ttl_entity
31
+
32
+ matches << {
33
+ entity_id: entity.short,
34
+ entity_name: format_entity_name(entity),
35
+ si_uri: ttl_entity[:uri],
36
+ si_name: ttl_entity[:name],
37
+ si_label: ttl_entity[:label],
38
+ si_alt_label: ttl_entity[:alt_label],
39
+ si_symbol: ttl_entity[:symbol],
40
+ entity: entity,
41
+ }
42
+ end
43
+ end
44
+
45
+ # Second pass: find matching entities
46
+ ttl_entities.each do |ttl_entity|
47
+ next if matched_ttl_uris.include?(ttl_entity[:uri])
48
+
49
+ matching_entities = find_matching_entities(entity_type, ttl_entity,
50
+ db_entities)
51
+ next if matching_entities.empty?
52
+
53
+ matched_ttl_uris << ttl_entity[:uri]
54
+
55
+ matching_entities.each do |entity|
56
+ entity_id = entity.short
57
+ entity_name = format_entity_name(entity)
58
+
59
+ # Create a unique key for this entity-ttl pair to avoid duplicates
60
+ pair_key = "#{entity_id}:#{ttl_entity[:uri]}"
61
+ next if processed_pairs[pair_key]
62
+
63
+ processed_pairs[pair_key] = true
64
+
65
+ # Get detailed match information
66
+ match_result = match_entity_names?(entity_type, entity,
67
+ ttl_entity)
68
+ next unless match_result[:match]
69
+
70
+ # Save match details for later use
71
+ @match_details[pair_key] = match_result
72
+
73
+ # Check if already has reference
74
+ has_reference = entity.references&.any? do |ref|
75
+ ref.uri == ttl_entity[:uri] && ref.authority == SI_AUTHORITY
76
+ end
77
+
78
+ match_data = {
79
+ entity_id: entity_id,
80
+ entity_name: entity_name,
81
+ si_uri: ttl_entity[:uri],
82
+ si_name: ttl_entity[:name],
83
+ si_label: ttl_entity[:label],
84
+ si_alt_label: ttl_entity[:alt_label],
85
+ si_symbol: ttl_entity[:symbol],
86
+ entity: entity,
87
+ match_type: match_result[:match_type],
88
+ match_details: match_result,
89
+ match_types: { ttl_entity[:uri] => match_result[:match_type] },
90
+ }
91
+
92
+ if has_reference
93
+ matches << match_data
94
+ else
95
+ # Group by entity_id for multiple SI matches
96
+ entity_matches[entity_id] ||= []
97
+ entity_matches[entity_id] << {
98
+ uri: ttl_entity[:uri],
99
+ name: ttl_entity[:name],
100
+ label: ttl_entity[:label],
101
+ }
102
+
103
+ # Add first occurrence of this entity to missing_matches
104
+ missing_matches << match_data unless missing_matches.any? do |m|
105
+ m[:entity_id] == entity_id
106
+ end
107
+ end
108
+ end
109
+ end
110
+
111
+ # Update missing_matches to include multiple SI entities
112
+ missing_matches.each do |match|
113
+ entity_id = match[:entity_id]
114
+ si_matches = entity_matches[entity_id]
115
+
116
+ # If entity matches multiple SI entities, record them
117
+ if si_matches && si_matches.size > 1
118
+ match[:multiple_si] =
119
+ si_matches
120
+ end
121
+ end
122
+
123
+ # Find unmatched TTL entities
124
+ unmatched_ttl = ttl_entities.reject do |entity|
125
+ matched_ttl_uris.include?(entity[:uri]) ||
126
+ entity[:uri].end_with?("/units/") ||
127
+ entity[:uri].end_with?("/quantities/") ||
128
+ entity[:uri].end_with?("/prefixes/")
129
+ end
130
+
131
+ [matches, missing_matches, unmatched_ttl]
132
+ end
133
+
134
+ # Match database entities to TTL entities (to_si direction)
135
+ def match_db_to_ttl(entity_type, ttl_entities, db_entities)
136
+ matches = []
137
+ missing_refs = []
138
+ matched_db_ids = []
139
+ processed_db_ids = {} # Track processed entities
140
+
141
+ # Map from NIST IDs to display names for original output compatibility
142
+ nist_id_to_display = {}
143
+
144
+ # Build mappings for each entity type
145
+ db_entities.each do |entity|
146
+ next unless entity.respond_to?(:identifiers) && entity.identifiers&.first&.id&.start_with?("NIST")
147
+
148
+ nist_id = entity.identifiers.first.id
149
+
150
+ # For quantities and prefixes, we want to show the "short" field
151
+ nist_id_to_display[nist_id] = entity.short if %w[quantities
152
+ prefixes].include?(entity_type) && entity.respond_to?(:short)
153
+ end
154
+
155
+ db_entities.each do |db_entity|
156
+ entity_id = find_entity_id(db_entity)
157
+
158
+ # For display purposes - use original display names
159
+ display_id = entity_id
160
+
161
+ # Apply the NIST ID mapping if available
162
+ display_id = nist_id_to_display[entity_id] if entity_id.start_with?("NIST") && nist_id_to_display[entity_id]
163
+
164
+ # Skip if we've already processed this entity
165
+ next if processed_db_ids[entity_id]
166
+
167
+ processed_db_ids[entity_id] = true
168
+ has_reference = false
169
+
170
+ # Check for existing SI references
171
+ if db_entity.respond_to?(:references) && db_entity.references
172
+ db_entity.references.each do |ref|
173
+ next unless ref.authority == SI_AUTHORITY
174
+
175
+ has_reference = true
176
+ # Find the matching TTL entity for display
177
+ ttl_entity = ttl_entities.find { |e| e[:uri] == ref.uri }
178
+
179
+ matches << {
180
+ entity_id: display_id,
181
+ db_entity: db_entity,
182
+ ttl_uri: ref.uri,
183
+ ttl_entity: ttl_entity,
184
+ }
185
+ end
186
+ end
187
+
188
+ # If already has reference, continue to next entity
189
+ if has_reference
190
+ matched_db_ids << entity_id
191
+ next
192
+ end
193
+
194
+ # Find matching TTL entities
195
+ matching_ttl = []
196
+ match_types = {}
197
+
198
+ ttl_entities.each do |ttl_entity|
199
+ match_result = match_entity_names?(entity_type, db_entity,
200
+ ttl_entity)
201
+ next unless match_result[:match]
202
+
203
+ matching_ttl << ttl_entity
204
+ match_types[ttl_entity[:uri]] = match_result[:match_type]
205
+
206
+ # Save detailed match info
207
+ @match_details["#{entity_id}:#{ttl_entity[:uri]}"] = match_result
208
+ end
209
+
210
+ # If found matches, add to missing_refs
211
+ next if matching_ttl.empty?
212
+
213
+ matched_db_ids << entity_id
214
+ missing_refs << {
215
+ entity_id: display_id,
216
+ db_entity: db_entity,
217
+ ttl_entities: matching_ttl,
218
+ match_types: match_types,
219
+ }
220
+ end
221
+
222
+ # Find unmatched db entities
223
+ unmatched_db = db_entities.reject do |entity|
224
+ matched_db_ids.include?(find_entity_id(entity))
225
+ end
226
+
227
+ [matches, missing_refs, unmatched_db]
228
+ end
229
+
230
+ # Find entity ID
231
+ def find_entity_id(entity)
232
+ return entity.id if entity.respond_to?(:id) && entity.id
233
+ return entity.identifiers.first.id if entity.respond_to?(:identifiers) && !entity.identifiers.empty? &&
234
+ entity.identifiers.first.respond_to?(:id)
235
+
236
+ entity.short
237
+ end
238
+
239
+ # Format entity name correctly
240
+ def format_entity_name(entity)
241
+ return nil unless entity.respond_to?(:names) && entity.names&.first
242
+
243
+ entity.names.first
244
+
245
+ # # Special handling for sidereal names - use comma format
246
+ # if name.include?("sidereal")
247
+ # if name.start_with?("sidereal ")
248
+ # # For names that already start with "sidereal " - strip it
249
+ # base_name = name.gsub("sidereal ", "")
250
+ # return "#{base_name}, sidereal"
251
+ # elsif name.end_with?(" sidereal")
252
+ # # For names that already have comma format but missing comma
253
+ # parts = name.split
254
+ # return "#{parts.first}, #{parts.last}"
255
+ # end
256
+ # end
257
+
258
+ # # Handle other special cases
259
+ # return name if name == "year (365 days)"
260
+
261
+ # # Default to the original name
262
+ end
263
+
264
+ # Find matching entities for a TTL entity
265
+ def find_matching_entities(entity_type, ttl_entity, db_entities)
266
+ case entity_type
267
+ when "units"
268
+ find_matching_units(ttl_entity, db_entities)
269
+ when "quantities"
270
+ find_matching_quantities(ttl_entity, db_entities)
271
+ when "prefixes"
272
+ find_matching_prefixes(ttl_entity, db_entities)
273
+ else
274
+ []
275
+ end
276
+ end
277
+
278
+ # Find exact matches for units
279
+ def find_matching_units(ttl_unit, units)
280
+ matching_units = []
281
+
282
+ units.each do |unit|
283
+ # Match by short
284
+ if unit.short&.downcase == ttl_unit[:name]&.downcase ||
285
+ unit.short&.downcase == ttl_unit[:label]&.downcase
286
+ matching_units << unit
287
+ next
288
+ end
289
+
290
+ # Match by name
291
+ if unit.respond_to?(:names) && unit.names&.any? do |name|
292
+ name.downcase == ttl_unit[:name]&.downcase ||
293
+ name.downcase == ttl_unit[:label]&.downcase
294
+ end
295
+ matching_units << unit
296
+ next
297
+ end
298
+
299
+ # Match by symbol
300
+ next unless ttl_unit[:symbol] && unit.respond_to?(:symbols) && unit.symbols&.any? do |sym|
301
+ sym.respond_to?(:ascii) && sym.ascii && sym.ascii.downcase == ttl_unit[:symbol].downcase
302
+ end
303
+
304
+ matching_units << unit
305
+ end
306
+
307
+ matching_units.uniq
308
+ end
309
+
310
+ # Find exact matches for quantities
311
+ def find_matching_quantities(ttl_quantity, quantities)
312
+ matching_quantities = []
313
+
314
+ quantities.each do |quantity|
315
+ # Match by short
316
+ if quantity.short&.downcase == ttl_quantity[:name]&.downcase ||
317
+ quantity.short&.downcase == ttl_quantity[:label]&.downcase ||
318
+ quantity.short&.downcase == ttl_quantity[:alt_label]&.downcase
319
+ matching_quantities << quantity
320
+ next
321
+ end
322
+
323
+ # Match by name
324
+ next unless quantity.respond_to?(:names) && quantity.names&.any? do |name|
325
+ name.downcase == ttl_quantity[:name]&.downcase ||
326
+ name.downcase == ttl_quantity[:label]&.downcase ||
327
+ name.downcase == ttl_quantity[:alt_label]&.downcase
328
+ end
329
+
330
+ matching_quantities << quantity
331
+ end
332
+
333
+ matching_quantities.uniq
334
+ end
335
+
336
+ # Find exact matches for prefixes
337
+ def find_matching_prefixes(ttl_prefix, prefixes)
338
+ matching_prefixes = []
339
+
340
+ prefixes.each do |prefix|
341
+ # Match by short
342
+ if prefix.short&.downcase == ttl_prefix[:name]&.downcase ||
343
+ prefix.short&.downcase == ttl_prefix[:label]&.downcase
344
+ matching_prefixes << prefix
345
+ next
346
+ end
347
+
348
+ # Match by name
349
+ if prefix.respond_to?(:names) && prefix.names&.any? do |name|
350
+ name.downcase == ttl_prefix[:name]&.downcase ||
351
+ name.downcase == ttl_prefix[:label]&.downcase
352
+ end
353
+ matching_prefixes << prefix
354
+ next
355
+ end
356
+
357
+ # Match by symbol
358
+ next unless ttl_prefix[:symbol] && prefix.respond_to?(:symbol) && prefix.symbol &&
359
+ prefix.symbol.respond_to?(:ascii) && prefix.symbol.ascii &&
360
+ prefix.symbol.ascii.downcase == ttl_prefix[:symbol].downcase
361
+
362
+ matching_prefixes << prefix
363
+ end
364
+
365
+ matching_prefixes.uniq
366
+ end
367
+
368
+ # Match entity names with detailed type information
369
+ def match_entity_names?(entity_type, db_entity, ttl_entity)
370
+ match_details = { match: false }
371
+
372
+ # Match by short name - EXACT match
373
+ if db_entity.short && db_entity.short.downcase == ttl_entity[:name].downcase
374
+ match_details = {
375
+ match: true,
376
+ exact: true,
377
+ match_type: "Exact match",
378
+ match_desc: "short_to_name",
379
+ details: "UnitsDB short '#{db_entity.short}' matches SI name '#{ttl_entity[:name]}'",
380
+ }
381
+ # Match by short to label
382
+ elsif db_entity.short && ttl_entity[:label] && db_entity.short.downcase == ttl_entity[:label].downcase
383
+ match_details = {
384
+ match: true,
385
+ exact: true,
386
+ match_type: "Exact match",
387
+ match_desc: "short_to_label",
388
+ details: "UnitsDB short '#{db_entity.short}' matches SI label '#{ttl_entity[:label]}'",
389
+ }
390
+ # Match by names - EXACT match
391
+ elsif db_entity.respond_to?(:names) && db_entity.names
392
+ # Match by TTL name
393
+ db_name_match = db_entity.names.find do |name|
394
+ name.downcase == ttl_entity[:name].downcase
395
+ end
396
+ if db_name_match
397
+ match_details = {
398
+ match: true,
399
+ exact: true,
400
+ match_type: "Exact match",
401
+ match_desc: "name_to_name",
402
+ details: "UnitsDB name '#{db_name_match}' matches SI name '#{ttl_entity[:name]}'",
403
+ }
404
+ # Match by TTL label
405
+ elsif ttl_entity[:label]
406
+ db_name_match = db_entity.names.find do |name|
407
+ name.downcase == ttl_entity[:label].downcase
408
+ end
409
+ if db_name_match
410
+ match_details = {
411
+ match: true,
412
+ exact: true,
413
+ match_type: "Exact match",
414
+ match_desc: "name_to_label",
415
+ details: "UnitsDB name '#{db_name_match}' matches SI label '#{ttl_entity[:label]}'",
416
+ }
417
+ end
418
+ end
419
+
420
+ # Match by TTL alt_label
421
+ if !match_details[:match] && ttl_entity[:alt_label]
422
+ db_name_match = db_entity.names.find do |name|
423
+ name.downcase == ttl_entity[:alt_label].downcase
424
+ end
425
+ if db_name_match
426
+ match_details = {
427
+ match: true,
428
+ exact: true,
429
+ match_type: "Exact match",
430
+ match_desc: "name_to_alt_label",
431
+ details: "UnitsDB name '#{db_name_match}' matches SI alt_label '#{ttl_entity[:alt_label]}'",
432
+ }
433
+ end
434
+ end
435
+ end
436
+
437
+ # Special validation for "sidereal_" units
438
+ if match_details[:match] && match_details[:exact] && db_entity.short&.include?("sidereal_") &&
439
+ !(ttl_entity[:name]&.include?("sidereal") || ttl_entity[:label]&.include?("sidereal"))
440
+ match_details = {
441
+ match: true,
442
+ exact: false,
443
+ match_type: "Potential match",
444
+ match_desc: "partial_match",
445
+ details: "UnitsDB '#{db_entity.short}' partially matches SI '#{ttl_entity[:name]}'",
446
+ }
447
+ end
448
+
449
+ # Match by symbol if available (units and prefixes) - POTENTIAL match
450
+ if !match_details[:match] && %w[units
451
+ prefixes].include?(entity_type) && ttl_entity[:symbol]
452
+ if entity_type == "units" && db_entity.respond_to?(:symbols) && db_entity.symbols
453
+ matching_symbol = db_entity.symbols.find do |sym|
454
+ sym.respond_to?(:ascii) && sym.ascii && sym.ascii.downcase == ttl_entity[:symbol].downcase
455
+ end
456
+
457
+ if matching_symbol
458
+ match_details = {
459
+ match: true,
460
+ exact: false,
461
+ match_type: "Potential match",
462
+ match_desc: "symbol_match",
463
+ details: "UnitsDB symbol '#{matching_symbol.ascii}' matches SI symbol '#{ttl_entity[:symbol]}'",
464
+ }
465
+ end
466
+ elsif entity_type == "prefixes" && db_entity.respond_to?(:symbol) && db_entity.symbol
467
+ if db_entity.symbol.respond_to?(:ascii) &&
468
+ db_entity.symbol.ascii &&
469
+ db_entity.symbol.ascii.downcase == ttl_entity[:symbol].downcase
470
+
471
+ match_details = {
472
+ match: true,
473
+ exact: false,
474
+ match_type: "Potential match",
475
+ match_desc: "symbol_match",
476
+ details: "UnitsDB symbol '#{db_entity.symbol.ascii}' matches SI symbol '#{ttl_entity[:symbol]}'",
477
+ }
478
+ end
479
+ end
480
+ end
481
+
482
+ match_details
483
+ end
484
+ end
485
+ end
486
+ end
487
+ end
@@ -0,0 +1,103 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "rdf"
4
+ require "rdf/turtle"
5
+
6
+ module Unitsdb
7
+ module Commands
8
+ module CheckSi
9
+ # Parser for SI TTL files
10
+ module SiTtlParser
11
+ SI_URI_PREFIX = "http://si-digital-framework.org/SI/"
12
+
13
+ module_function
14
+
15
+ # Parse TTL files and return RDF graph
16
+ def parse_ttl_files(dir)
17
+ puts "Parsing TTL files in #{dir}..."
18
+ graph = RDF::Graph.new
19
+
20
+ Dir.glob(File.join(dir, "*.ttl")).each do |file|
21
+ puts " Reading #{File.basename(file)}"
22
+ graph.load(file, format: :ttl)
23
+ end
24
+
25
+ graph
26
+ end
27
+
28
+ # Extract entities from TTL based on entity type
29
+ def extract_entities_from_ttl(entity_type, graph)
30
+ skos = RDF::Vocabulary.new("http://www.w3.org/2004/02/skos/core#")
31
+ si = RDF::Vocabulary.new("http://si-digital-framework.org/SI#")
32
+
33
+ namespace_uri = case entity_type
34
+ when "units" then "http://si-digital-framework.org/SI/units/"
35
+ when "quantities" then "http://si-digital-framework.org/quantities/"
36
+ when "prefixes" then "http://si-digital-framework.org/SI/prefixes/"
37
+ else return []
38
+ end
39
+
40
+ namespace = RDF::Vocabulary.new(namespace_uri)
41
+ entities = extract_base_entities(graph, namespace, skos)
42
+ add_symbols_to_entities(entities, graph, si) if %w[units
43
+ prefixes].include?(entity_type)
44
+ entities
45
+ end
46
+
47
+ # Extract base entities from graph
48
+ def extract_base_entities(graph, namespace, skos)
49
+ entities = []
50
+ processed_uris = {}
51
+
52
+ RDF::Query.new({ entity: { skos.prefLabel => :label } })
53
+ .execute(graph).each do |solution|
54
+ entity_uri = solution.entity.to_s
55
+ next unless entity_uri.start_with?(namespace.to_s)
56
+ next if processed_uris[entity_uri]
57
+
58
+ processed_uris[entity_uri] = true
59
+
60
+ entity_name = entity_uri.split("/").last
61
+ label = RDF::Query.new({ RDF::URI(entity_uri) => { skos.prefLabel => :value } })
62
+ .execute(graph).first&.value&.to_s
63
+ alt_label = RDF::Query.new({ RDF::URI(entity_uri) => { skos.altLabel => :value } })
64
+ .execute(graph).first&.value&.to_s
65
+
66
+ entities << {
67
+ uri: entity_uri,
68
+ name: entity_name,
69
+ label: label,
70
+ alt_label: alt_label,
71
+ }
72
+ end
73
+
74
+ entities
75
+ end
76
+
77
+ # Add symbols to entities
78
+ def add_symbols_to_entities(entities, graph, si)
79
+ entities.each do |entity|
80
+ symbol = RDF::Query.new({ RDF::URI(entity[:uri]) => { si.hasSymbol => :value } })
81
+ .execute(graph).first&.value&.to_s
82
+ entity[:symbol] = symbol if symbol
83
+ end
84
+ end
85
+
86
+ # Extract suffix from URI for display
87
+ def extract_identifying_suffix(uri)
88
+ return "" unless uri
89
+
90
+ # For display, we need to format as exactly like the original
91
+ # This helps format the comma-separated multi-units correctly
92
+ if uri.include?("/units/")
93
+ # Return units/name format for units (without duplicating "units/")
94
+ "units/#{uri.split('/').last}"
95
+ else
96
+ # Otherwise strip the prefix
97
+ uri.gsub(SI_URI_PREFIX, "")
98
+ end
99
+ end
100
+ end
101
+ end
102
+ end
103
+ end