glossarist 2.6.5 → 2.6.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/release.yml +1 -4
  3. data/.rubocop_todo.yml +53 -2
  4. data/CLAUDE.md +27 -2
  5. data/README.adoc +532 -56
  6. data/config.yml +68 -1
  7. data/glossarist.gemspec +2 -0
  8. data/lib/glossarist/citation.rb +26 -123
  9. data/lib/glossarist/cli/compare_command.rb +106 -0
  10. data/lib/glossarist/cli/export_command.rb +11 -14
  11. data/lib/glossarist/cli/validate_command.rb +111 -20
  12. data/lib/glossarist/cli.rb +18 -0
  13. data/lib/glossarist/collections/bibliography_collection.rb +4 -2
  14. data/lib/glossarist/collections/localization_collection.rb +2 -0
  15. data/lib/glossarist/comparison_result.rb +35 -0
  16. data/lib/glossarist/concept.rb +1 -1
  17. data/lib/glossarist/concept_collector.rb +44 -0
  18. data/lib/glossarist/concept_comparator.rb +72 -0
  19. data/lib/glossarist/concept_data.rb +20 -0
  20. data/lib/glossarist/concept_diff.rb +15 -0
  21. data/lib/glossarist/concept_document.rb +11 -0
  22. data/lib/glossarist/concept_manager.rb +19 -5
  23. data/lib/glossarist/concept_ref.rb +13 -0
  24. data/lib/glossarist/concept_reference.rb +12 -19
  25. data/lib/glossarist/concept_validator.rb +6 -1
  26. data/lib/glossarist/context_configuration.rb +90 -0
  27. data/lib/glossarist/dataset_validator.rb +8 -4
  28. data/lib/glossarist/designation/abbreviation.rb +0 -2
  29. data/lib/glossarist/designation/base.rb +21 -1
  30. data/lib/glossarist/designation/expression.rb +3 -0
  31. data/lib/glossarist/designation/letter_symbol.rb +0 -4
  32. data/lib/glossarist/designation/prefix.rb +17 -0
  33. data/lib/glossarist/designation/suffix.rb +17 -0
  34. data/lib/glossarist/designation/symbol.rb +0 -2
  35. data/lib/glossarist/gcr_metadata.rb +7 -14
  36. data/lib/glossarist/gcr_package.rb +35 -23
  37. data/lib/glossarist/gcr_validator.rb +38 -17
  38. data/lib/glossarist/glossary_definition.rb +5 -0
  39. data/lib/glossarist/localized_concept.rb +8 -0
  40. data/lib/glossarist/managed_concept.rb +39 -6
  41. data/lib/glossarist/managed_concept_data.rb +22 -2
  42. data/lib/glossarist/non_verb_rep.rb +21 -6
  43. data/lib/glossarist/pronunciation.rb +32 -0
  44. data/lib/glossarist/rdf/ext/jsonld_transform_ext.rb +208 -0
  45. data/lib/glossarist/rdf/ext/mapping_ext.rb +37 -0
  46. data/lib/glossarist/rdf/ext/mapping_rule_ext.rb +27 -0
  47. data/lib/glossarist/rdf/ext/member_rule_ext.rb +34 -0
  48. data/lib/glossarist/rdf/ext/turtle_transform_ext.rb +222 -0
  49. data/lib/glossarist/rdf/ext.rb +39 -0
  50. data/lib/glossarist/rdf/gloss_citation.rb +36 -0
  51. data/lib/glossarist/rdf/gloss_concept.rb +58 -0
  52. data/lib/glossarist/rdf/gloss_concept_date.rb +24 -0
  53. data/lib/glossarist/rdf/gloss_concept_reference.rb +29 -0
  54. data/lib/glossarist/rdf/gloss_concept_source.rb +37 -0
  55. data/lib/glossarist/rdf/gloss_designation.rb +146 -0
  56. data/lib/glossarist/rdf/gloss_detailed_definition.rb +24 -0
  57. data/lib/glossarist/rdf/gloss_grammar_info.rb +57 -0
  58. data/lib/glossarist/rdf/gloss_locality.rb +25 -0
  59. data/lib/glossarist/rdf/gloss_localized_concept.rb +67 -0
  60. data/lib/glossarist/rdf/gloss_non_verbal_rep.rb +31 -0
  61. data/lib/glossarist/rdf/gloss_pronunciation.rb +32 -0
  62. data/lib/glossarist/rdf/gloss_reference.rb +55 -0
  63. data/lib/glossarist/rdf/namespaces/glossarist_namespace.rb +12 -0
  64. data/lib/glossarist/rdf/namespaces/iso_thes_namespace.rb +12 -0
  65. data/lib/glossarist/rdf/namespaces/owl_namespace.rb +12 -0
  66. data/lib/glossarist/rdf/namespaces/prov_namespace.rb +12 -0
  67. data/lib/glossarist/rdf/namespaces/rdf_namespace.rb +12 -0
  68. data/lib/glossarist/rdf/namespaces/skosxl_namespace.rb +12 -0
  69. data/lib/glossarist/rdf/namespaces.rb +8 -2
  70. data/lib/glossarist/rdf/relationships.rb +19 -0
  71. data/lib/glossarist/rdf/v3/configuration.rb +15 -0
  72. data/lib/glossarist/rdf/v3.rb +79 -0
  73. data/lib/glossarist/rdf.rb +22 -2
  74. data/lib/glossarist/reference_extractor.rb +15 -24
  75. data/lib/glossarist/reference_resolver.rb +3 -3
  76. data/lib/glossarist/related_concept.rb +2 -10
  77. data/lib/glossarist/schema_migration.rb +39 -0
  78. data/lib/glossarist/sts/term_mapper.rb +2 -2
  79. data/lib/glossarist/transforms/concept_to_gloss_transform.rb +355 -0
  80. data/lib/glossarist/transforms.rb +2 -2
  81. data/lib/glossarist/urn_resolver.rb +13 -1
  82. data/lib/glossarist/v1/concept.rb +18 -11
  83. data/lib/glossarist/v2/citation.rb +36 -0
  84. data/lib/glossarist/v2/concept_data.rb +46 -0
  85. data/lib/glossarist/v2/concept_document.rb +18 -0
  86. data/lib/glossarist/v2/concept_ref.rb +8 -0
  87. data/lib/glossarist/v2/concept_source.rb +16 -0
  88. data/lib/glossarist/v2/configuration.rb +13 -0
  89. data/lib/glossarist/v2/detailed_definition.rb +14 -0
  90. data/lib/glossarist/v2/localized_concept.rb +9 -0
  91. data/lib/glossarist/v2/managed_concept.rb +25 -0
  92. data/lib/glossarist/v2/managed_concept_data.rb +49 -0
  93. data/lib/glossarist/v2/related_concept.rb +15 -0
  94. data/lib/glossarist/v2.rb +28 -0
  95. data/lib/glossarist/v3/bibliography_entry.rb +19 -0
  96. data/lib/glossarist/v3/bibliography_file.rb +27 -0
  97. data/lib/glossarist/v3/citation.rb +30 -0
  98. data/lib/glossarist/v3/concept_data.rb +46 -0
  99. data/lib/glossarist/v3/concept_document.rb +18 -0
  100. data/lib/glossarist/v3/concept_ref.rb +8 -0
  101. data/lib/glossarist/v3/concept_source.rb +16 -0
  102. data/lib/glossarist/v3/configuration.rb +13 -0
  103. data/lib/glossarist/v3/detailed_definition.rb +14 -0
  104. data/lib/glossarist/v3/image_entry.rb +21 -0
  105. data/lib/glossarist/v3/image_file.rb +31 -0
  106. data/lib/glossarist/v3/localized_concept.rb +9 -0
  107. data/lib/glossarist/v3/managed_concept.rb +26 -0
  108. data/lib/glossarist/v3/managed_concept_data.rb +34 -0
  109. data/lib/glossarist/v3/related_concept.rb +15 -0
  110. data/lib/glossarist/v3.rb +36 -0
  111. data/lib/glossarist/validation/asset_index.rb +4 -3
  112. data/lib/glossarist/validation/bibliography_index.rb +61 -30
  113. data/lib/glossarist/validation/rules/asciidoc_xref_rule.rb +2 -15
  114. data/lib/glossarist/validation/rules/authoritative_source_rule.rb +2 -15
  115. data/lib/glossarist/validation/rules/base.rb +5 -0
  116. data/lib/glossarist/validation/rules/bibliography_yaml_rule.rb +2 -3
  117. data/lib/glossarist/validation/rules/citation_completeness_rule.rb +5 -27
  118. data/lib/glossarist/validation/rules/dataset_context.rb +8 -3
  119. data/lib/glossarist/validation/rules/date_validity_rule.rb +1 -1
  120. data/lib/glossarist/validation/rules/designation_status_rule.rb +0 -1
  121. data/lib/glossarist/validation/rules/designation_type_rule.rb +1 -5
  122. data/lib/glossarist/validation/rules/domain_ref_rule.rb +37 -0
  123. data/lib/glossarist/validation/rules/domain_target_rule.rb +56 -0
  124. data/lib/glossarist/validation/rules/gcr_context.rb +12 -13
  125. data/lib/glossarist/validation/rules/image_reference_rule.rb +2 -17
  126. data/lib/glossarist/validation/rules/locality_completeness_rule.rb +58 -0
  127. data/lib/glossarist/validation/rules/localization_consistency_rule.rb +72 -0
  128. data/lib/glossarist/validation/rules/localization_presence_rule.rb +1 -1
  129. data/lib/glossarist/validation/rules/model_validity_rule.rb +71 -0
  130. data/lib/glossarist/validation/rules/orphaned_bibliography_rule.rb +1 -13
  131. data/lib/glossarist/validation/rules/orphaned_images_rule.rb +16 -11
  132. data/lib/glossarist/validation/rules/ref_shape_rule.rb +68 -0
  133. data/lib/glossarist/validation/rules/related_concept_cycle_rule.rb +1 -3
  134. data/lib/glossarist/validation/rules/related_concept_symmetry_rule.rb +1 -3
  135. data/lib/glossarist/validation/rules/related_concept_target_rule.rb +64 -0
  136. data/lib/glossarist/validation/rules/schema_version_rule.rb +41 -0
  137. data/lib/glossarist/validation/rules/source_type_rule.rb +1 -15
  138. data/lib/glossarist/validation/rules/source_urn_format_rule.rb +65 -0
  139. data/lib/glossarist/validation/rules/uuid_format_rule.rb +33 -0
  140. data/lib/glossarist/validation/rules.rb +10 -43
  141. data/lib/glossarist/validation/validation_issue.rb +14 -11
  142. data/lib/glossarist/validation_result.rb +12 -22
  143. data/lib/glossarist/version.rb +1 -1
  144. data/lib/glossarist.rb +10 -0
  145. data/memory/project-status.md +43 -0
  146. data/scripts/migrate_dataset.rb +180 -0
  147. data/scripts/migrate_isotc204_to_v3.rb +134 -0
  148. data/scripts/migrate_isotc211_to_v3.rb +153 -0
  149. data/scripts/migrate_osgeo_to_v3.rb +155 -0
  150. data/scripts/upgrade_dataset_to_v3.rb +47 -0
  151. metadata +112 -6
  152. data/TODO.integration/01-gcr-package-cli.md +0 -180
  153. data/lib/glossarist/rdf/skos_concept.rb +0 -43
  154. data/lib/glossarist/rdf/skos_vocabulary.rb +0 -25
  155. data/lib/glossarist/transforms/concept_to_skos_transform.rb +0 -131
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Glossarist
4
+ class ComparisonResult < Lutaml::Model::Serializable
5
+ attribute :new_count, :integer
6
+ attribute :old_count, :integer
7
+ attribute :matched, :string, collection: true, initialize_empty: true
8
+ attribute :new_only, :string, collection: true, initialize_empty: true
9
+ attribute :old_only, :string, collection: true, initialize_empty: true
10
+ attribute :diffs, ConceptDiff, collection: true, initialize_empty: true
11
+
12
+ key_value do
13
+ map :new_count, to: :new_count
14
+ map :old_count, to: :old_count
15
+ map :matched, to: :matched
16
+ map :new_only, to: :new_only
17
+ map :old_only, to: :old_only
18
+ map :diffs, to: :diffs
19
+ end
20
+
21
+ def summary
22
+ diff = new_count - old_count
23
+ change = if diff.positive?
24
+ "+#{diff} new"
25
+ elsif diff.negative?
26
+ "#{diff.abs} removed"
27
+ else
28
+ "no change"
29
+ end
30
+ "#{new_count} new, #{old_count} old (#{change}), " \
31
+ "#{matched.length} matched, #{new_only.length} new-only, " \
32
+ "#{old_only.length} old-only"
33
+ end
34
+ end
35
+ end
@@ -4,7 +4,7 @@ module Glossarist
4
4
  attribute :id, :string
5
5
  attribute :uuid, :string
6
6
  attribute :subject, :string
7
- attribute :non_verb_rep, :string
7
+ attribute :non_verb_rep, NonVerbRep, collection: true
8
8
  attribute :extension_attributes, :string
9
9
  attribute :lineage_source, :string
10
10
  attribute :localizations, :hash
@@ -35,9 +35,37 @@ module Glossarist
35
35
  end
36
36
  end
37
37
 
38
+ def self.count(dir)
39
+ dir = File.expand_path(dir)
40
+ return 0 unless File.directory?(dir)
41
+
42
+ if managed_concepts?(dir)
43
+ Dir.glob(File.join(dir, "concepts", "concept", "*.yaml")).length
44
+ elsif v2_concepts?(dir)
45
+ count_v2(dir)
46
+ elsif v1_concepts?(dir)
47
+ Dir.glob(File.join(dir, "concepts", "*.yaml")).length
48
+ else
49
+ 0
50
+ end
51
+ end
52
+
38
53
  class << self
39
54
  private
40
55
 
56
+ def count_v2(dir)
57
+ if v2_flat_concepts?(dir)
58
+ Dir.glob(File.join(dir, "concepts", "*.yaml")).length
59
+ else
60
+ v2_dir = File.join(dir, "geolexica-v2")
61
+ if File.directory?(File.join(v2_dir, "concepts"))
62
+ Dir.glob(File.join(v2_dir, "concepts", "concept", "*.yaml")).length
63
+ else
64
+ Dir.glob(File.join(v2_dir, "*.yaml")).length
65
+ end
66
+ end
67
+ end
68
+
41
69
  def v1_concepts?(dir)
42
70
  concepts_dir = File.join(dir, "concepts")
43
71
  File.directory?(concepts_dir) &&
@@ -124,6 +152,7 @@ module Glossarist
124
152
  def each_grouped_v2_concepts(v2_dir, &block)
125
153
  collection = ManagedConceptCollection.new
126
154
  manager = ConceptManager.new(path: v2_dir)
155
+ manager.version = detect_schema_version(v2_dir)
127
156
  manager.load_from_files(collection: collection)
128
157
  collection.each(&block)
129
158
  end
@@ -131,6 +160,7 @@ module Glossarist
131
160
  def collect_grouped_v2_concepts(v2_dir)
132
161
  collection = ManagedConceptCollection.new
133
162
  manager = ConceptManager.new(path: v2_dir)
163
+ manager.version = detect_schema_version(v2_dir)
134
164
  manager.load_from_files(collection: collection)
135
165
  collection.to_a
136
166
  end
@@ -178,6 +208,20 @@ module Glossarist
178
208
  end
179
209
  nil
180
210
  end
211
+
212
+ def detect_schema_version(dir)
213
+ concepts_dir = File.join(dir, "concepts")
214
+ search_dir = File.directory?(concepts_dir) ? concepts_dir : dir
215
+ sample = Dir.glob(File.join(search_dir, "*.yaml")).first
216
+ return "2" unless sample
217
+
218
+ raw = File.read(sample, encoding: "utf-8")
219
+ doc = ConceptDocument.from_yamls(raw)
220
+ ver = doc.concept&.schema_version.to_s
221
+ ver == "3" ? "3" : "2"
222
+ rescue StandardError
223
+ "2"
224
+ end
181
225
  end
182
226
  end
183
227
  end
@@ -0,0 +1,72 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Glossarist
4
+ class ConceptComparator
5
+ def initialize(new_concepts:, old_concepts:)
6
+ @new_concepts = new_concepts
7
+ @old_concepts = old_concepts
8
+ end
9
+
10
+ def compare(show_diffs: true)
11
+ new_index = build_index(@new_concepts)
12
+ old_index = build_index(@old_concepts)
13
+
14
+ matched_ids = new_index.keys & old_index.keys
15
+ new_only_ids = new_index.keys - old_index.keys
16
+ old_only_ids = old_index.keys - new_index.keys
17
+
18
+ diffs = if show_diffs
19
+ compute_diffs(matched_ids, new_index, old_index)
20
+ else
21
+ []
22
+ end
23
+
24
+ ComparisonResult.new(
25
+ new_count: @new_concepts.length,
26
+ old_count: @old_concepts.length,
27
+ matched: matched_ids.sort,
28
+ new_only: new_only_ids.sort,
29
+ old_only: old_only_ids.sort,
30
+ diffs: diffs,
31
+ )
32
+ end
33
+
34
+ private
35
+
36
+ def build_index(concepts)
37
+ concepts.each_with_object({}) do |concept, index|
38
+ id = extract_id(concept)
39
+ index[id] = concept if id
40
+ end
41
+ end
42
+
43
+ def extract_id(concept)
44
+ concept.data&.id || concept.id
45
+ end
46
+
47
+ def compute_diffs(matched_ids, new_index, old_index)
48
+ matched_ids.filter_map do |id|
49
+ new_concept = new_index[id]
50
+ old_concept = old_index[id]
51
+
52
+ score, tree = Lutaml::Model::Serialize.diff_with_score(
53
+ new_concept, old_concept,
54
+ show_unchanged: false,
55
+ highlight_diff: false,
56
+ indent: ""
57
+ )
58
+ similarity = ((1 - score) * 100).round(2)
59
+
60
+ ConceptDiff.new(
61
+ concept_id: id,
62
+ similarity: similarity,
63
+ diff_tree: strip_ansi(tree),
64
+ )
65
+ end.sort_by { |d| -d.similarity }
66
+ end
67
+
68
+ def strip_ansi(text)
69
+ text.gsub(/\e\[[0-9;]*m/, "")
70
+ end
71
+ end
72
+ end
@@ -30,6 +30,8 @@ module Glossarist
30
30
  # Language code should be exactly 3 char long.
31
31
  # TODO: use min_length, max_length once added in lutaml-model
32
32
  attribute :language_code, :string, pattern: /^.{3}$/
33
+ attribute :script, :string
34
+ attribute :system, :string
33
35
  attribute :entry_status, :string
34
36
 
35
37
  key_value do
@@ -48,6 +50,8 @@ module Glossarist
48
50
  map :references, to: :references
49
51
  map :domain, to: :domain
50
52
  map %i[language_code languageCode], to: :language_code
53
+ map :script, to: :script
54
+ map :system, to: :system
51
55
  map %i[entry_status entryStatus], to: :entry_status
52
56
  map %i[review_date reviewDate], to: :review_date
53
57
  map %i[review_decision_date reviewDecisionDate], to: :review_decision_date
@@ -74,5 +78,21 @@ module Glossarist
74
78
 
75
79
  sources.select(&:authoritative?)
76
80
  end
81
+
82
+ def all_sources
83
+ list = sources.to_a
84
+ definition.each { |d| list.concat(d.sources.to_a) }
85
+ notes.each { |n| list.concat(n.sources.to_a) }
86
+ examples.each { |e| list.concat(e.sources.to_a) }
87
+ list
88
+ end
89
+
90
+ def text_content
91
+ texts = []
92
+ definition.each { |d| texts << d.content if d.content }
93
+ notes.each { |n| texts << n.content if n.content }
94
+ examples.each { |e| texts << e.content if e.content }
95
+ texts
96
+ end
77
97
  end
78
98
  end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Glossarist
4
+ class ConceptDiff < Lutaml::Model::Serializable
5
+ attribute :concept_id, :string
6
+ attribute :similarity, :float
7
+ attribute :diff_tree, :string
8
+
9
+ key_value do
10
+ map :concept_id, to: :concept_id
11
+ map :similarity, to: :similarity
12
+ map :diff_tree, to: :diff_tree
13
+ end
14
+ end
15
+ end
@@ -20,6 +20,17 @@ module Glossarist
20
20
  )
21
21
  end
22
22
 
23
+ def self.for_version(version)
24
+ case version.to_s
25
+ when "2"
26
+ require_relative "v2"
27
+ V2::ConceptDocument
28
+ else
29
+ require_relative "v3"
30
+ V3::ConceptDocument
31
+ end
32
+ end
33
+
23
34
  def to_managed_concept
24
35
  mc = concept
25
36
  localizations.each { |l10n| mc.add_localization(l10n) }
@@ -2,6 +2,7 @@ module Glossarist
2
2
  class ConceptManager < Lutaml::Model::Serializable
3
3
  attribute :path, :string
4
4
  attribute :localized_concepts_path, :string
5
+ attribute :version, :string, default: -> { "2" }
5
6
 
6
7
  key_value do
7
8
  map :path, to: :path
@@ -9,6 +10,19 @@ module Glossarist
9
10
  to: :localized_concepts_path
10
11
  end
11
12
 
13
+ def concept_document_class
14
+ ConceptDocument.for_version(version)
15
+ end
16
+
17
+ def localized_concept_class
18
+ if version.to_s == "2"
19
+ require_relative "v2"
20
+ V2::LocalizedConcept
21
+ else
22
+ LocalizedConcept
23
+ end
24
+ end
25
+
12
26
  def load_from_files(collection: nil)
13
27
  collection ||= ManagedConceptCollection.new
14
28
 
@@ -35,7 +49,7 @@ module Glossarist
35
49
 
36
50
  def load_concept_from_file(filename) # rubocop:disable Metrics/CyclomaticComplexity
37
51
  raw = File.read(filename, encoding: "utf-8")
38
- doc = ConceptDocument.from_yamls(raw)
52
+ doc = concept_document_class.from_yamls(raw)
39
53
  concept = doc.concept
40
54
  unless concept
41
55
  raise Glossarist::ParseError.new(filename: filename)
@@ -44,7 +58,7 @@ module Glossarist
44
58
  concept_uuid = concept.identifier || concept.data&.id || File.basename(
45
59
  filename, ".*"
46
60
  )
47
- concept.instance_variable_set(:@uuid, concept_uuid)
61
+ concept.uuid = concept_uuid
48
62
 
49
63
  concept.data.localized_concepts.each_value do |id|
50
64
  localized_concept = load_localized_concept(id, doc.localizations)
@@ -60,15 +74,15 @@ module Glossarist
60
74
  if inline_localizations
61
75
  l10n = inline_localizations.find { |l| l.id == id }
62
76
  if l10n
63
- l10n.instance_variable_set(:@uuid, id)
77
+ l10n.uuid = id
64
78
  return l10n
65
79
  end
66
80
  end
67
81
 
68
- l10n = LocalizedConcept.from_yaml(
82
+ l10n = localized_concept_class.from_yaml(
69
83
  File.read(localized_concept_path(id), encoding: "utf-8"),
70
84
  )
71
- l10n.instance_variable_set(:@uuid, id)
85
+ l10n.uuid = id
72
86
  l10n
73
87
  rescue Psych::SyntaxError => e
74
88
  raise Glossarist::ParseError.new(filename: filename, line: e.line)
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Glossarist
4
+ class ConceptRef < Lutaml::Model::Serializable
5
+ attribute :source, :string
6
+ attribute :id, :string
7
+
8
+ key_value do
9
+ map :source, to: :source
10
+ map :id, to: :id
11
+ end
12
+ end
13
+ end
@@ -1,17 +1,29 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Glossarist
4
+ # A typed reference to another concept, either local (within the same
5
+ # glossary) or external (in another concept registry).
6
+ #
7
+ # Local references use +concept_id+ without +source+. External references
8
+ # use +source+ (a registry URN prefix) and +concept_id+ to identify the
9
+ # target concept, or a direct +urn+ field for formal URN references.
4
10
  class ConceptReference < Lutaml::Model::Serializable
5
11
  attribute :term, :string
6
12
  attribute :concept_id, :string
7
13
  attribute :source, :string
8
14
  attribute :ref_type, :string
15
+ attribute :urn, :string
9
16
 
10
17
  key_value do
11
18
  map :term, to: :term
12
19
  map :concept_id, to: :concept_id
13
20
  map :source, to: :source
14
21
  map :ref_type, to: :ref_type
22
+ map :urn, to: :urn
23
+ end
24
+
25
+ def self.domain(concept_id)
26
+ new(concept_id: concept_id, ref_type: "domain")
15
27
  end
16
28
 
17
29
  def local?
@@ -23,25 +35,6 @@ module Glossarist
23
35
  !local?
24
36
  end
25
37
 
26
- def to_urn
27
- return nil unless external?
28
- return nil unless source && concept_id
29
-
30
- case source
31
- when /\Aurn:iec/ then "#{source}-#{concept_id}"
32
- when /\Aurn:iso/ then "#{source}:term:#{concept_id}"
33
- else "#{source}/#{concept_id}"
34
- end
35
- end
36
-
37
- def to_gcr_hash
38
- h = { "term" => term }
39
- h["concept_id"] = concept_id if concept_id
40
- h["source"] = source if source
41
- h["ref_type"] = ref_type if ref_type
42
- h.compact
43
- end
44
-
45
38
  def dedup_key
46
39
  concept_id ? [source, concept_id] : [source, concept_id, term]
47
40
  end
@@ -4,8 +4,9 @@ module Glossarist
4
4
  class ConceptValidator
5
5
  attr_reader :path, :errors, :warnings
6
6
 
7
- def initialize(path)
7
+ def initialize(path, on_progress: nil)
8
8
  @path = path
9
+ @on_progress = on_progress
9
10
  @errors = []
10
11
  @warnings = []
11
12
  end
@@ -14,9 +15,12 @@ module Glossarist
14
15
  result = ValidationResult.new
15
16
  context = Validation::Rules::DatasetContext.new(@path)
16
17
  concept_rules = Validation::Rules::Registry.for_scope(:concept)
18
+ total = ConceptCollector.count(@path)
17
19
  file_idx = 0
18
20
 
19
21
  ConceptCollector.each_concept(@path) do |concept|
22
+ context.add_concept(concept)
23
+
20
24
  fname = concept_file_name(concept, file_idx)
21
25
  concept_context = Validation::Rules::ConceptContext.new(
22
26
  concept, file_name: fname, collection_context: context
@@ -29,6 +33,7 @@ module Glossarist
29
33
  end
30
34
 
31
35
  file_idx += 1
36
+ @on_progress&.call(file_idx, total)
32
37
  end
33
38
 
34
39
  if file_idx.zero?
@@ -0,0 +1,90 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Glossarist
4
+ module ContextConfiguration
5
+ def context_id
6
+ self::CONTEXT_ID
7
+ end
8
+
9
+ def context
10
+ Lutaml::Model::GlobalContext.context(context_id)
11
+ end
12
+
13
+ def create_context(
14
+ id:,
15
+ registry: nil,
16
+ fallback_to: [context_id],
17
+ substitutions: []
18
+ )
19
+ normalized_id = id.to_sym
20
+
21
+ return populate_context! if normalized_id == context_id
22
+
23
+ Lutaml::Model::GlobalContext.unregister_context(normalized_id) if Lutaml::Model::GlobalContext.context(normalized_id)
24
+ create_type_context(
25
+ id: normalized_id,
26
+ registry: registry || Lutaml::Model::TypeRegistry.new,
27
+ fallback_to: normalize_fallbacks(fallback_to),
28
+ substitutions: substitutions,
29
+ )
30
+ end
31
+
32
+ def populate_context!
33
+ Lutaml::Model::GlobalContext.unregister_context(context_id) if context
34
+ register_models_in(base_type_context)
35
+ end
36
+
37
+ def register_model(klass, id:)
38
+ normalized_id = id.to_sym
39
+ registered_models[normalized_id] = klass
40
+ (context || populate_base_context).registry.register(normalized_id, klass)
41
+ klass
42
+ end
43
+
44
+ def resolve_model(id)
45
+ Lutaml::Model::GlobalContext.resolve_type(id, context_id)
46
+ end
47
+
48
+ private
49
+
50
+ def populate_base_context
51
+ base_type_context
52
+ end
53
+
54
+ def create_type_context(id:, registry:, fallback_to:, substitutions: [])
55
+ Lutaml::Model::GlobalContext.create_context(
56
+ id: id,
57
+ registry: registry,
58
+ fallback_to: fallback_to,
59
+ substitutions: substitutions,
60
+ ).tap do
61
+ Lutaml::Model::GlobalContext.clear_caches
62
+ end
63
+ end
64
+
65
+ def base_type_context
66
+ create_type_context(
67
+ id: context_id,
68
+ registry: Lutaml::Model::TypeRegistry.new,
69
+ fallback_to: [:default],
70
+ )
71
+ end
72
+
73
+ def register_models_in(type_context)
74
+ registered_models.each do |model_id, klass|
75
+ type_context.registry.register(model_id, klass)
76
+ end
77
+
78
+ Lutaml::Model::GlobalContext.clear_caches
79
+ type_context
80
+ end
81
+
82
+ def normalize_fallbacks(fallback_to)
83
+ Array(fallback_to).map(&:to_sym)
84
+ end
85
+
86
+ def registered_models
87
+ @registered_models ||= {}
88
+ end
89
+ end
90
+ end
@@ -2,6 +2,10 @@
2
2
 
3
3
  module Glossarist
4
4
  class DatasetValidator
5
+ def initialize(on_progress: nil)
6
+ @on_progress = on_progress
7
+ end
8
+
5
9
  def validate(path, strict: false, reference_path: nil)
6
10
  if File.extname(path).downcase == ".gcr"
7
11
  validate_gcr(path, reference_path: reference_path)
@@ -13,7 +17,7 @@ module Glossarist
13
17
  private
14
18
 
15
19
  def validate_gcr(path, reference_path: nil)
16
- result = GcrValidator.new.validate(path)
20
+ result = GcrValidator.new(on_progress: @on_progress).validate(path)
17
21
 
18
22
  if reference_path
19
23
  ref_result = validate_gcr_cross_references(path, reference_path)
@@ -24,7 +28,7 @@ module Glossarist
24
28
  end
25
29
 
26
30
  def validate_directory(path, reference_path: nil)
27
- result = ConceptValidator.new(path).validate_all
31
+ result = ConceptValidator.new(path, on_progress: @on_progress).validate_all
28
32
 
29
33
  if reference_path
30
34
  ref_result = validate_directory_cross_references(path, reference_path)
@@ -38,7 +42,7 @@ module Glossarist
38
42
  extractor = ReferenceExtractor.new
39
43
  resolver = build_resolver(reference_path)
40
44
  pkg = GcrPackage.load(path)
41
- uri_prefix = pkg.metadata&.dig("uri_prefix") || pkg.metadata&.dig("shortname")
45
+ uri_prefix = pkg.metadata&.uri_prefix || pkg.metadata&.shortname
42
46
  resolver.register_self(pkg.concepts)
43
47
  resolver.register_package(pkg, uri_prefix: uri_prefix)
44
48
  resolver.validate_all(pkg, extractor: extractor)
@@ -56,7 +60,7 @@ module Glossarist
56
60
  resolver = ReferenceResolver.new
57
61
  Dir.glob(File.join(reference_path, "*.gcr")).each do |gcr_path|
58
62
  pkg = GcrPackage.load(gcr_path)
59
- uri_prefix = pkg.metadata&.dig("uri_prefix") || pkg.metadata&.dig("shortname")
63
+ uri_prefix = pkg.metadata&.uri_prefix || pkg.metadata&.shortname
60
64
  resolver.register_package(pkg, uri_prefix: uri_prefix)
61
65
  end
62
66
  resolver
@@ -1,7 +1,6 @@
1
1
  module Glossarist
2
2
  module Designation
3
3
  class Abbreviation < Expression
4
- attribute :international, :boolean
5
4
  attribute :type, :string, default: -> { "abbreviation" }
6
5
 
7
6
  Glossarist::GlossaryDefinition::ABBREVIATION_TYPES.each do |name|
@@ -9,7 +8,6 @@ module Glossarist
9
8
  end
10
9
 
11
10
  key_value do
12
- map :international, to: :international
13
11
  map :type, to: :type, render_default: true
14
12
  Glossarist::GlossaryDefinition::ABBREVIATION_TYPES.each do |name|
15
13
  map name.to_sym, to: name.to_sym
@@ -6,12 +6,30 @@ module Glossarist
6
6
  attribute :normative_status, :string,
7
7
  values: Glossarist::GlossaryDefinition::DESIGNATION_BASE_NORMATIVE_STATUSES
8
8
  attribute :type, :string
9
+ attribute :language, :string
10
+ attribute :script, :string
11
+ attribute :system, :string
12
+ attribute :international, :boolean
13
+ attribute :absent, :boolean
14
+ attribute :pronunciation, Pronunciation, collection: true
15
+ attribute :sources, ConceptSource, collection: true
16
+ attribute :term_type, :string
17
+ attribute :related, RelatedConcept, collection: true
9
18
 
10
19
  key_value do
11
20
  map :type, to: :type
12
21
  map %i[normative_status normativeStatus], to: :normative_status
13
22
  map %i[geographical_area geographicalArea], to: :geographical_area
14
23
  map :designation, to: :designation
24
+ map :language, to: :language
25
+ map :script, to: :script
26
+ map :system, to: :system
27
+ map :international, to: :international
28
+ map :absent, to: :absent
29
+ map :pronunciation, to: :pronunciation
30
+ map :sources, to: :sources
31
+ map %i[term_type termType], to: :term_type
32
+ map :related, to: :related
15
33
  end
16
34
 
17
35
  def self.of_yaml(hash, options = {})
@@ -34,7 +52,9 @@ module Glossarist
34
52
  end
35
53
 
36
54
  def self.infer_designation_type(hash)
37
- if hash["international"] || hash["abbreviation_type"]
55
+ if hash["abbreviation_type"]
56
+ "abbreviation"
57
+ elsif hash["international"]
38
58
  "symbol"
39
59
  else
40
60
  "expression"
@@ -5,6 +5,7 @@ module Glossarist
5
5
  class Expression < Base
6
6
  attribute :prefix, :string
7
7
  attribute :usage_info, :string
8
+ attribute :field_of_application, :string
8
9
 
9
10
  attribute :gender, :string
10
11
  attribute :plurality, :string
@@ -16,6 +17,8 @@ module Glossarist
16
17
  map :type, to: :type, render_default: true
17
18
  map :prefix, to: :prefix
18
19
  map %i[usage_info usageInfo], to: :usage_info
20
+ map %i[field_of_application fieldOfApplication],
21
+ to: :field_of_application
19
22
  map %i[grammar_info grammarInfo], to: :grammar_info
20
23
  end
21
24
 
@@ -2,13 +2,9 @@ module Glossarist
2
2
  module Designation
3
3
  class LetterSymbol < Symbol
4
4
  attribute :text, :string
5
- attribute :language, :string
6
- attribute :script, :string
7
5
 
8
6
  key_value do
9
7
  map :text, to: :text
10
- map :language, to: :language
11
- map :script, to: :script
12
8
  end
13
9
 
14
10
  def self.of_yaml(hash, options = {})
@@ -0,0 +1,17 @@
1
+ module Glossarist
2
+ module Designation
3
+ class Prefix < Base
4
+ attribute :type, :string, default: -> { "prefix" }
5
+
6
+ key_value do
7
+ map :type, to: :type, render_default: true
8
+ end
9
+
10
+ def self.of_yaml(hash, options = {})
11
+ hash["type"] = "prefix" unless hash["type"]
12
+
13
+ super
14
+ end
15
+ end
16
+ end
17
+ end