glossarist 2.6.6 → 2.6.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. checksums.yaml +4 -4
  2. data/README.adoc +90 -29
  3. data/glossarist.gemspec +2 -0
  4. data/lib/glossarist/citation.rb +26 -123
  5. data/lib/glossarist/cli/compare_command.rb +106 -0
  6. data/lib/glossarist/cli/export_command.rb +11 -14
  7. data/lib/glossarist/cli/validate_command.rb +111 -20
  8. data/lib/glossarist/cli.rb +18 -0
  9. data/lib/glossarist/collections/bibliography_collection.rb +4 -2
  10. data/lib/glossarist/collections/localization_collection.rb +2 -0
  11. data/lib/glossarist/comparison_result.rb +35 -0
  12. data/lib/glossarist/concept_collector.rb +44 -0
  13. data/lib/glossarist/concept_comparator.rb +72 -0
  14. data/lib/glossarist/concept_data.rb +16 -0
  15. data/lib/glossarist/concept_diff.rb +15 -0
  16. data/lib/glossarist/concept_document.rb +11 -0
  17. data/lib/glossarist/concept_manager.rb +19 -5
  18. data/lib/glossarist/concept_ref.rb +13 -0
  19. data/lib/glossarist/concept_validator.rb +6 -1
  20. data/lib/glossarist/context_configuration.rb +90 -0
  21. data/lib/glossarist/dataset_validator.rb +8 -4
  22. data/lib/glossarist/designation/prefix.rb +17 -0
  23. data/lib/glossarist/designation/suffix.rb +17 -0
  24. data/lib/glossarist/gcr_metadata.rb +7 -14
  25. data/lib/glossarist/gcr_package.rb +35 -23
  26. data/lib/glossarist/gcr_validator.rb +38 -17
  27. data/lib/glossarist/localized_concept.rb +8 -0
  28. data/lib/glossarist/managed_concept.rb +39 -6
  29. data/lib/glossarist/managed_concept_data.rb +2 -1
  30. data/lib/glossarist/rdf/ext/jsonld_transform_ext.rb +208 -0
  31. data/lib/glossarist/rdf/ext/mapping_ext.rb +37 -0
  32. data/lib/glossarist/rdf/ext/mapping_rule_ext.rb +27 -0
  33. data/lib/glossarist/rdf/ext/member_rule_ext.rb +34 -0
  34. data/lib/glossarist/rdf/ext/turtle_transform_ext.rb +222 -0
  35. data/lib/glossarist/rdf/ext.rb +39 -0
  36. data/lib/glossarist/rdf/gloss_citation.rb +36 -0
  37. data/lib/glossarist/rdf/gloss_concept.rb +58 -0
  38. data/lib/glossarist/rdf/gloss_concept_date.rb +24 -0
  39. data/lib/glossarist/rdf/gloss_concept_reference.rb +29 -0
  40. data/lib/glossarist/rdf/gloss_concept_source.rb +37 -0
  41. data/lib/glossarist/rdf/gloss_designation.rb +146 -0
  42. data/lib/glossarist/rdf/gloss_detailed_definition.rb +24 -0
  43. data/lib/glossarist/rdf/gloss_grammar_info.rb +57 -0
  44. data/lib/glossarist/rdf/gloss_locality.rb +25 -0
  45. data/lib/glossarist/rdf/gloss_localized_concept.rb +67 -0
  46. data/lib/glossarist/rdf/gloss_non_verbal_rep.rb +31 -0
  47. data/lib/glossarist/rdf/gloss_pronunciation.rb +32 -0
  48. data/lib/glossarist/rdf/gloss_reference.rb +55 -0
  49. data/lib/glossarist/rdf/namespaces/glossarist_namespace.rb +12 -0
  50. data/lib/glossarist/rdf/namespaces/iso_thes_namespace.rb +12 -0
  51. data/lib/glossarist/rdf/namespaces/owl_namespace.rb +12 -0
  52. data/lib/glossarist/rdf/namespaces/prov_namespace.rb +12 -0
  53. data/lib/glossarist/rdf/namespaces/rdf_namespace.rb +12 -0
  54. data/lib/glossarist/rdf/namespaces/skosxl_namespace.rb +12 -0
  55. data/lib/glossarist/rdf/namespaces.rb +8 -2
  56. data/lib/glossarist/rdf/relationships.rb +19 -0
  57. data/lib/glossarist/rdf/v3/configuration.rb +15 -0
  58. data/lib/glossarist/rdf/v3.rb +79 -0
  59. data/lib/glossarist/rdf.rb +22 -2
  60. data/lib/glossarist/reference_extractor.rb +12 -19
  61. data/lib/glossarist/reference_resolver.rb +3 -3
  62. data/lib/glossarist/related_concept.rb +2 -10
  63. data/lib/glossarist/schema_migration.rb +39 -0
  64. data/lib/glossarist/sts/term_mapper.rb +2 -2
  65. data/lib/glossarist/transforms/concept_to_gloss_transform.rb +355 -0
  66. data/lib/glossarist/transforms.rb +2 -2
  67. data/lib/glossarist/v1/concept.rb +17 -17
  68. data/lib/glossarist/v2/citation.rb +36 -0
  69. data/lib/glossarist/v2/concept_data.rb +46 -0
  70. data/lib/glossarist/v2/concept_document.rb +18 -0
  71. data/lib/glossarist/v2/concept_ref.rb +8 -0
  72. data/lib/glossarist/v2/concept_source.rb +16 -0
  73. data/lib/glossarist/v2/configuration.rb +13 -0
  74. data/lib/glossarist/v2/detailed_definition.rb +14 -0
  75. data/lib/glossarist/v2/localized_concept.rb +9 -0
  76. data/lib/glossarist/v2/managed_concept.rb +25 -0
  77. data/lib/glossarist/v2/managed_concept_data.rb +49 -0
  78. data/lib/glossarist/v2/related_concept.rb +15 -0
  79. data/lib/glossarist/v2.rb +28 -0
  80. data/lib/glossarist/v3/bibliography_entry.rb +19 -0
  81. data/lib/glossarist/v3/bibliography_file.rb +27 -0
  82. data/lib/glossarist/v3/citation.rb +30 -0
  83. data/lib/glossarist/v3/concept_data.rb +46 -0
  84. data/lib/glossarist/v3/concept_document.rb +18 -0
  85. data/lib/glossarist/v3/concept_ref.rb +8 -0
  86. data/lib/glossarist/v3/concept_source.rb +16 -0
  87. data/lib/glossarist/v3/configuration.rb +13 -0
  88. data/lib/glossarist/v3/detailed_definition.rb +14 -0
  89. data/lib/glossarist/v3/image_entry.rb +21 -0
  90. data/lib/glossarist/v3/image_file.rb +31 -0
  91. data/lib/glossarist/v3/localized_concept.rb +9 -0
  92. data/lib/glossarist/v3/managed_concept.rb +26 -0
  93. data/lib/glossarist/v3/managed_concept_data.rb +34 -0
  94. data/lib/glossarist/v3/related_concept.rb +15 -0
  95. data/lib/glossarist/v3.rb +36 -0
  96. data/lib/glossarist/validation/bibliography_index.rb +61 -30
  97. data/lib/glossarist/validation/rules/asciidoc_xref_rule.rb +2 -15
  98. data/lib/glossarist/validation/rules/authoritative_source_rule.rb +2 -15
  99. data/lib/glossarist/validation/rules/base.rb +5 -0
  100. data/lib/glossarist/validation/rules/bibliography_yaml_rule.rb +2 -3
  101. data/lib/glossarist/validation/rules/citation_completeness_rule.rb +5 -27
  102. data/lib/glossarist/validation/rules/dataset_context.rb +8 -3
  103. data/lib/glossarist/validation/rules/date_validity_rule.rb +1 -1
  104. data/lib/glossarist/validation/rules/designation_status_rule.rb +0 -1
  105. data/lib/glossarist/validation/rules/designation_type_rule.rb +1 -5
  106. data/lib/glossarist/validation/rules/domain_ref_rule.rb +37 -0
  107. data/lib/glossarist/validation/rules/domain_target_rule.rb +56 -0
  108. data/lib/glossarist/validation/rules/gcr_context.rb +12 -13
  109. data/lib/glossarist/validation/rules/image_reference_rule.rb +2 -17
  110. data/lib/glossarist/validation/rules/locality_completeness_rule.rb +58 -0
  111. data/lib/glossarist/validation/rules/localization_consistency_rule.rb +72 -0
  112. data/lib/glossarist/validation/rules/localization_presence_rule.rb +1 -1
  113. data/lib/glossarist/validation/rules/model_validity_rule.rb +71 -0
  114. data/lib/glossarist/validation/rules/orphaned_bibliography_rule.rb +1 -13
  115. data/lib/glossarist/validation/rules/orphaned_images_rule.rb +16 -11
  116. data/lib/glossarist/validation/rules/ref_shape_rule.rb +68 -0
  117. data/lib/glossarist/validation/rules/related_concept_cycle_rule.rb +1 -3
  118. data/lib/glossarist/validation/rules/related_concept_symmetry_rule.rb +1 -3
  119. data/lib/glossarist/validation/rules/related_concept_target_rule.rb +64 -0
  120. data/lib/glossarist/validation/rules/schema_version_rule.rb +41 -0
  121. data/lib/glossarist/validation/rules/source_type_rule.rb +1 -15
  122. data/lib/glossarist/validation/rules/source_urn_format_rule.rb +65 -0
  123. data/lib/glossarist/validation/rules/uuid_format_rule.rb +33 -0
  124. data/lib/glossarist/validation/rules.rb +10 -43
  125. data/lib/glossarist/validation/validation_issue.rb +14 -11
  126. data/lib/glossarist/validation_result.rb +12 -22
  127. data/lib/glossarist/version.rb +1 -1
  128. data/lib/glossarist.rb +9 -0
  129. data/memory/project-status.md +43 -0
  130. data/scripts/migrate_dataset.rb +180 -0
  131. data/scripts/migrate_isotc204_to_v3.rb +134 -0
  132. data/scripts/migrate_isotc211_to_v3.rb +153 -0
  133. data/scripts/migrate_osgeo_to_v3.rb +155 -0
  134. data/scripts/upgrade_dataset_to_v3.rb +47 -0
  135. metadata +111 -6
  136. data/TODO.integration/01-gcr-package-cli.md +0 -180
  137. data/lib/glossarist/rdf/skos_concept.rb +0 -43
  138. data/lib/glossarist/rdf/skos_vocabulary.rb +0 -25
  139. data/lib/glossarist/transforms/concept_to_skos_transform.rb +0 -131
@@ -1,5 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "paint"
4
+ require "table_tennis"
5
+
3
6
  module Glossarist
4
7
  class CLI
5
8
  class ValidateCommand
@@ -9,47 +12,135 @@ module Glossarist
9
12
  end
10
13
 
11
14
  def run
12
- result = DatasetValidator.new.validate(
15
+ text_output = @options[:format] == "text"
16
+ validator = DatasetValidator.new(on_progress: text_output ? method(:print_progress) : nil)
17
+ result = validator.validate(
13
18
  @path,
14
19
  strict: @options[:strict],
15
20
  reference_path: @options[:reference_path],
16
21
  )
22
+
23
+ $stderr.print "\r#{' ' * 60}\r" if text_output
17
24
  report(result)
18
- exit_code = result.errors.any? || (@options[:strict] && result.warnings.any?) ? 1 : 0
19
- exit(exit_code) unless exit_code.zero?
25
+ exit(1) unless result.errors.empty? && !strict_failure?(result)
20
26
  end
21
27
 
22
28
  private
23
29
 
30
+ def strict_failure?(result)
31
+ @options[:strict] && result.warnings.any?
32
+ end
33
+
34
+ def print_progress(current, total)
35
+ pct = (current.to_f / total * 100).round
36
+ bar_width = 30
37
+ filled = (current.to_f / total * bar_width).round
38
+ bar = "#{'█' * filled}#{'░' * (bar_width - filled)}"
39
+
40
+ $stderr.print "\r #{Paint['Validating', :bold]} #{bar} #{current}/#{total} (#{pct}%)"
41
+ $stderr.flush
42
+ end
43
+
24
44
  def report(result)
25
45
  case @options[:format]
26
46
  when "json"
27
- require "json"
28
- puts JSON.pretty_generate(result.to_h)
47
+ puts result.to_json
29
48
  when "yaml"
30
- require "yaml"
31
- puts YAML.dump(result.to_h)
49
+ puts result.to_yaml
32
50
  else
33
- report_text(result)
51
+ print_text_output(result)
52
+ print_table_output(result) if result.issues.any?
34
53
  end
35
54
  end
36
55
 
37
- def report_text(result)
38
- if result.valid?
39
- puts "Valid."
40
- else
41
- puts "Invalid."
42
- result.errors.each { |e| puts " ERROR: #{e}" }
56
+ def print_text_output(result)
57
+ puts
58
+ puts Paint["Validating #{@path}", :bold]
59
+ puts
60
+
61
+ if result.issues.empty?
62
+ puts " #{Paint['No issues found.', :green, :bold]}"
63
+ return
43
64
  end
44
65
 
45
- if result.warnings.any?
46
- result.warnings.each do |w|
47
- puts " WARNING: #{w}"
48
- end
66
+ print_grouped_issues(result)
67
+ print_summary_line(result)
68
+ end
69
+
70
+ def print_grouped_issues(result)
71
+ result.issues
72
+ .group_by { |i| i.location || "(dataset)" }
73
+ .sort_by { |loc, issues| [has_errors?(issues) ? 0 : 1, loc] }
74
+ .each { |location, issues| print_location_group(location, issues) }
75
+ end
76
+
77
+ def has_errors?(issues)
78
+ issues.any?(&:error?)
79
+ end
80
+
81
+ def print_location_group(location, issues)
82
+ puts " #{Paint[location, :cyan, :bold]}"
83
+ issues.sort_by { |i| issue_sort_key(i) }
84
+ .each { |issue| print_issue(issue) }
85
+ puts
86
+ end
87
+
88
+ def issue_sort_key(issue)
89
+ [issue.error? ? 0 : 1, issue.code || "￿", issue.message]
90
+ end
91
+
92
+ def print_issue(issue)
93
+ color = issue.error? ? :red : :yellow
94
+ label = Paint[issue.error? ? "ERROR" : " WARN", color, :bold]
95
+ code = Paint["%-8s" % (issue.code || ""), :magenta]
96
+ msg_col = 21
97
+
98
+ puts " #{label} #{code} #{issue.message}"
99
+ puts "#{' ' * msg_col}#{Paint[issue.suggestion, :green]}" if issue.suggestion
100
+ end
101
+
102
+ def print_summary_line(result)
103
+ error_count = result.issues.count(&:error?)
104
+ warning_count = result.issues.count(&:warning?)
105
+
106
+ status = error_count.positive? ? Paint["INVALID", :red, :bold] : Paint["VALID", :green, :bold]
107
+
108
+ details = []
109
+ details << Paint["#{error_count} error(s)", :red] if error_count.positive?
110
+ details << Paint["#{warning_count} warning(s)", :yellow] if warning_count.positive?
111
+
112
+ puts " #{status} #{details.join(', ')}"
113
+ end
114
+
115
+ def print_table_output(result)
116
+ rows = build_summary_rows(result)
117
+ return if rows.empty?
118
+
119
+ options = {
120
+ title: "Issues by Rule",
121
+ columns: %i[code severity count],
122
+ headers: { code: "Rule", severity: "Level", count: "Count" },
123
+ color_scales: { count: :gw },
124
+ mark: ->(row) { row[:severity] == "error" },
125
+ zebra: true,
126
+ }
127
+ puts
128
+ puts TableTennis.new(rows, options)
129
+ end
130
+
131
+ def build_summary_rows(result)
132
+ counts = Hash.new(0)
133
+ severities = {}
134
+
135
+ result.issues.each do |issue|
136
+ key = issue.code || "unknown"
137
+ counts[key] += 1
138
+ severities[key] ||= issue.severity
49
139
  end
50
140
 
51
- total = result.errors.length + result.warnings.length
52
- puts "#{total} issue(s) found."
141
+ counts.sort_by { |_, c| -c }.map do |code, count|
142
+ { code: code, severity: severities[code], count: count }
143
+ end
53
144
  end
54
145
  end
55
146
  end
@@ -9,6 +9,7 @@ module Glossarist
9
9
  autoload :ValidateCommand, "#{__dir__}/cli/validate_command"
10
10
  autoload :ImportCommand, "#{__dir__}/cli/import_command"
11
11
  autoload :ExportCommand, "#{__dir__}/cli/export_command"
12
+ autoload :CompareCommand, "#{__dir__}/cli/compare_command"
12
13
  desc "generate_latex", "Convert Concepts to Latex format"
13
14
 
14
15
  option :concepts_path, aliases: :p, required: true,
@@ -120,6 +121,23 @@ module Glossarist
120
121
  CLI::ExportCommand.new(path, options).run
121
122
  end
122
123
 
124
+ desc "compare NEW_PATH OLD_PATH", "Compare two concept datasets"
125
+ option :format, type: :string, default: "text",
126
+ enum: %w[text json yaml],
127
+ desc: "Output format"
128
+ option :report, type: :string,
129
+ desc: "Write report to file"
130
+ option :no_diffs, type: :boolean, default: false,
131
+ desc: "Skip per-concept diff computation"
132
+ def compare(new_path, old_path)
133
+ CLI::CompareCommand.new(new_path, old_path, options).run
134
+ end
135
+
136
+ desc "version", "Show Glossarist version"
137
+ def version
138
+ puts Glossarist::VERSION
139
+ end
140
+
123
141
  def method_missing(*args)
124
142
  warn "No method found named: #{args[0]}"
125
143
  warn "Run with `--help` or `-h` to see available options"
@@ -39,9 +39,11 @@ module Glossarist
39
39
  def populate_bibliographies(concepts)
40
40
  concepts.each do |concept|
41
41
  concept.default_lang.sources.each do |source|
42
- next if source.origin.text.nil?
42
+ next if source.origin.ref.nil?
43
+ ref_text = source.origin.ref.source
44
+ next if ref_text.nil?
43
45
 
44
- fetch(source.origin.text)
46
+ fetch(ref_text)
45
47
  end
46
48
  end
47
49
  end
@@ -7,6 +7,8 @@ module Glossarist
7
7
 
8
8
  index_by :language_code
9
9
 
10
+ alias :length :size
11
+
10
12
  def [](lang_code)
11
13
  find_by(:language_code, lang_code.to_s)
12
14
  end
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Glossarist
4
+ class ComparisonResult < Lutaml::Model::Serializable
5
+ attribute :new_count, :integer
6
+ attribute :old_count, :integer
7
+ attribute :matched, :string, collection: true, initialize_empty: true
8
+ attribute :new_only, :string, collection: true, initialize_empty: true
9
+ attribute :old_only, :string, collection: true, initialize_empty: true
10
+ attribute :diffs, ConceptDiff, collection: true, initialize_empty: true
11
+
12
+ key_value do
13
+ map :new_count, to: :new_count
14
+ map :old_count, to: :old_count
15
+ map :matched, to: :matched
16
+ map :new_only, to: :new_only
17
+ map :old_only, to: :old_only
18
+ map :diffs, to: :diffs
19
+ end
20
+
21
+ def summary
22
+ diff = new_count - old_count
23
+ change = if diff.positive?
24
+ "+#{diff} new"
25
+ elsif diff.negative?
26
+ "#{diff.abs} removed"
27
+ else
28
+ "no change"
29
+ end
30
+ "#{new_count} new, #{old_count} old (#{change}), " \
31
+ "#{matched.length} matched, #{new_only.length} new-only, " \
32
+ "#{old_only.length} old-only"
33
+ end
34
+ end
35
+ end
@@ -35,9 +35,37 @@ module Glossarist
35
35
  end
36
36
  end
37
37
 
38
+ def self.count(dir)
39
+ dir = File.expand_path(dir)
40
+ return 0 unless File.directory?(dir)
41
+
42
+ if managed_concepts?(dir)
43
+ Dir.glob(File.join(dir, "concepts", "concept", "*.yaml")).length
44
+ elsif v2_concepts?(dir)
45
+ count_v2(dir)
46
+ elsif v1_concepts?(dir)
47
+ Dir.glob(File.join(dir, "concepts", "*.yaml")).length
48
+ else
49
+ 0
50
+ end
51
+ end
52
+
38
53
  class << self
39
54
  private
40
55
 
56
+ def count_v2(dir)
57
+ if v2_flat_concepts?(dir)
58
+ Dir.glob(File.join(dir, "concepts", "*.yaml")).length
59
+ else
60
+ v2_dir = File.join(dir, "geolexica-v2")
61
+ if File.directory?(File.join(v2_dir, "concepts"))
62
+ Dir.glob(File.join(v2_dir, "concepts", "concept", "*.yaml")).length
63
+ else
64
+ Dir.glob(File.join(v2_dir, "*.yaml")).length
65
+ end
66
+ end
67
+ end
68
+
41
69
  def v1_concepts?(dir)
42
70
  concepts_dir = File.join(dir, "concepts")
43
71
  File.directory?(concepts_dir) &&
@@ -124,6 +152,7 @@ module Glossarist
124
152
  def each_grouped_v2_concepts(v2_dir, &block)
125
153
  collection = ManagedConceptCollection.new
126
154
  manager = ConceptManager.new(path: v2_dir)
155
+ manager.version = detect_schema_version(v2_dir)
127
156
  manager.load_from_files(collection: collection)
128
157
  collection.each(&block)
129
158
  end
@@ -131,6 +160,7 @@ module Glossarist
131
160
  def collect_grouped_v2_concepts(v2_dir)
132
161
  collection = ManagedConceptCollection.new
133
162
  manager = ConceptManager.new(path: v2_dir)
163
+ manager.version = detect_schema_version(v2_dir)
134
164
  manager.load_from_files(collection: collection)
135
165
  collection.to_a
136
166
  end
@@ -178,6 +208,20 @@ module Glossarist
178
208
  end
179
209
  nil
180
210
  end
211
+
212
+ def detect_schema_version(dir)
213
+ concepts_dir = File.join(dir, "concepts")
214
+ search_dir = File.directory?(concepts_dir) ? concepts_dir : dir
215
+ sample = Dir.glob(File.join(search_dir, "*.yaml")).first
216
+ return "2" unless sample
217
+
218
+ raw = File.read(sample, encoding: "utf-8")
219
+ doc = ConceptDocument.from_yamls(raw)
220
+ ver = doc.concept&.schema_version.to_s
221
+ ver == "3" ? "3" : "2"
222
+ rescue StandardError
223
+ "2"
224
+ end
181
225
  end
182
226
  end
183
227
  end
@@ -0,0 +1,72 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Glossarist
4
+ class ConceptComparator
5
+ def initialize(new_concepts:, old_concepts:)
6
+ @new_concepts = new_concepts
7
+ @old_concepts = old_concepts
8
+ end
9
+
10
+ def compare(show_diffs: true)
11
+ new_index = build_index(@new_concepts)
12
+ old_index = build_index(@old_concepts)
13
+
14
+ matched_ids = new_index.keys & old_index.keys
15
+ new_only_ids = new_index.keys - old_index.keys
16
+ old_only_ids = old_index.keys - new_index.keys
17
+
18
+ diffs = if show_diffs
19
+ compute_diffs(matched_ids, new_index, old_index)
20
+ else
21
+ []
22
+ end
23
+
24
+ ComparisonResult.new(
25
+ new_count: @new_concepts.length,
26
+ old_count: @old_concepts.length,
27
+ matched: matched_ids.sort,
28
+ new_only: new_only_ids.sort,
29
+ old_only: old_only_ids.sort,
30
+ diffs: diffs,
31
+ )
32
+ end
33
+
34
+ private
35
+
36
+ def build_index(concepts)
37
+ concepts.each_with_object({}) do |concept, index|
38
+ id = extract_id(concept)
39
+ index[id] = concept if id
40
+ end
41
+ end
42
+
43
+ def extract_id(concept)
44
+ concept.data&.id || concept.id
45
+ end
46
+
47
+ def compute_diffs(matched_ids, new_index, old_index)
48
+ matched_ids.filter_map do |id|
49
+ new_concept = new_index[id]
50
+ old_concept = old_index[id]
51
+
52
+ score, tree = Lutaml::Model::Serialize.diff_with_score(
53
+ new_concept, old_concept,
54
+ show_unchanged: false,
55
+ highlight_diff: false,
56
+ indent: ""
57
+ )
58
+ similarity = ((1 - score) * 100).round(2)
59
+
60
+ ConceptDiff.new(
61
+ concept_id: id,
62
+ similarity: similarity,
63
+ diff_tree: strip_ansi(tree),
64
+ )
65
+ end.sort_by { |d| -d.similarity }
66
+ end
67
+
68
+ def strip_ansi(text)
69
+ text.gsub(/\e\[[0-9;]*m/, "")
70
+ end
71
+ end
72
+ end
@@ -78,5 +78,21 @@ module Glossarist
78
78
 
79
79
  sources.select(&:authoritative?)
80
80
  end
81
+
82
+ def all_sources
83
+ list = sources.to_a
84
+ definition.each { |d| list.concat(d.sources.to_a) }
85
+ notes.each { |n| list.concat(n.sources.to_a) }
86
+ examples.each { |e| list.concat(e.sources.to_a) }
87
+ list
88
+ end
89
+
90
+ def text_content
91
+ texts = []
92
+ definition.each { |d| texts << d.content if d.content }
93
+ notes.each { |n| texts << n.content if n.content }
94
+ examples.each { |e| texts << e.content if e.content }
95
+ texts
96
+ end
81
97
  end
82
98
  end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Glossarist
4
+ class ConceptDiff < Lutaml::Model::Serializable
5
+ attribute :concept_id, :string
6
+ attribute :similarity, :float
7
+ attribute :diff_tree, :string
8
+
9
+ key_value do
10
+ map :concept_id, to: :concept_id
11
+ map :similarity, to: :similarity
12
+ map :diff_tree, to: :diff_tree
13
+ end
14
+ end
15
+ end
@@ -20,6 +20,17 @@ module Glossarist
20
20
  )
21
21
  end
22
22
 
23
+ def self.for_version(version)
24
+ case version.to_s
25
+ when "2"
26
+ require_relative "v2"
27
+ V2::ConceptDocument
28
+ else
29
+ require_relative "v3"
30
+ V3::ConceptDocument
31
+ end
32
+ end
33
+
23
34
  def to_managed_concept
24
35
  mc = concept
25
36
  localizations.each { |l10n| mc.add_localization(l10n) }
@@ -2,6 +2,7 @@ module Glossarist
2
2
  class ConceptManager < Lutaml::Model::Serializable
3
3
  attribute :path, :string
4
4
  attribute :localized_concepts_path, :string
5
+ attribute :version, :string, default: -> { "2" }
5
6
 
6
7
  key_value do
7
8
  map :path, to: :path
@@ -9,6 +10,19 @@ module Glossarist
9
10
  to: :localized_concepts_path
10
11
  end
11
12
 
13
+ def concept_document_class
14
+ ConceptDocument.for_version(version)
15
+ end
16
+
17
+ def localized_concept_class
18
+ if version.to_s == "2"
19
+ require_relative "v2"
20
+ V2::LocalizedConcept
21
+ else
22
+ LocalizedConcept
23
+ end
24
+ end
25
+
12
26
  def load_from_files(collection: nil)
13
27
  collection ||= ManagedConceptCollection.new
14
28
 
@@ -35,7 +49,7 @@ module Glossarist
35
49
 
36
50
  def load_concept_from_file(filename) # rubocop:disable Metrics/CyclomaticComplexity
37
51
  raw = File.read(filename, encoding: "utf-8")
38
- doc = ConceptDocument.from_yamls(raw)
52
+ doc = concept_document_class.from_yamls(raw)
39
53
  concept = doc.concept
40
54
  unless concept
41
55
  raise Glossarist::ParseError.new(filename: filename)
@@ -44,7 +58,7 @@ module Glossarist
44
58
  concept_uuid = concept.identifier || concept.data&.id || File.basename(
45
59
  filename, ".*"
46
60
  )
47
- concept.instance_variable_set(:@uuid, concept_uuid)
61
+ concept.uuid = concept_uuid
48
62
 
49
63
  concept.data.localized_concepts.each_value do |id|
50
64
  localized_concept = load_localized_concept(id, doc.localizations)
@@ -60,15 +74,15 @@ module Glossarist
60
74
  if inline_localizations
61
75
  l10n = inline_localizations.find { |l| l.id == id }
62
76
  if l10n
63
- l10n.instance_variable_set(:@uuid, id)
77
+ l10n.uuid = id
64
78
  return l10n
65
79
  end
66
80
  end
67
81
 
68
- l10n = LocalizedConcept.from_yaml(
82
+ l10n = localized_concept_class.from_yaml(
69
83
  File.read(localized_concept_path(id), encoding: "utf-8"),
70
84
  )
71
- l10n.instance_variable_set(:@uuid, id)
85
+ l10n.uuid = id
72
86
  l10n
73
87
  rescue Psych::SyntaxError => e
74
88
  raise Glossarist::ParseError.new(filename: filename, line: e.line)
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Glossarist
4
+ class ConceptRef < Lutaml::Model::Serializable
5
+ attribute :source, :string
6
+ attribute :id, :string
7
+
8
+ key_value do
9
+ map :source, to: :source
10
+ map :id, to: :id
11
+ end
12
+ end
13
+ end
@@ -4,8 +4,9 @@ module Glossarist
4
4
  class ConceptValidator
5
5
  attr_reader :path, :errors, :warnings
6
6
 
7
- def initialize(path)
7
+ def initialize(path, on_progress: nil)
8
8
  @path = path
9
+ @on_progress = on_progress
9
10
  @errors = []
10
11
  @warnings = []
11
12
  end
@@ -14,9 +15,12 @@ module Glossarist
14
15
  result = ValidationResult.new
15
16
  context = Validation::Rules::DatasetContext.new(@path)
16
17
  concept_rules = Validation::Rules::Registry.for_scope(:concept)
18
+ total = ConceptCollector.count(@path)
17
19
  file_idx = 0
18
20
 
19
21
  ConceptCollector.each_concept(@path) do |concept|
22
+ context.add_concept(concept)
23
+
20
24
  fname = concept_file_name(concept, file_idx)
21
25
  concept_context = Validation::Rules::ConceptContext.new(
22
26
  concept, file_name: fname, collection_context: context
@@ -29,6 +33,7 @@ module Glossarist
29
33
  end
30
34
 
31
35
  file_idx += 1
36
+ @on_progress&.call(file_idx, total)
32
37
  end
33
38
 
34
39
  if file_idx.zero?
@@ -0,0 +1,90 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Glossarist
4
+ module ContextConfiguration
5
+ def context_id
6
+ self::CONTEXT_ID
7
+ end
8
+
9
+ def context
10
+ Lutaml::Model::GlobalContext.context(context_id)
11
+ end
12
+
13
+ def create_context(
14
+ id:,
15
+ registry: nil,
16
+ fallback_to: [context_id],
17
+ substitutions: []
18
+ )
19
+ normalized_id = id.to_sym
20
+
21
+ return populate_context! if normalized_id == context_id
22
+
23
+ Lutaml::Model::GlobalContext.unregister_context(normalized_id) if Lutaml::Model::GlobalContext.context(normalized_id)
24
+ create_type_context(
25
+ id: normalized_id,
26
+ registry: registry || Lutaml::Model::TypeRegistry.new,
27
+ fallback_to: normalize_fallbacks(fallback_to),
28
+ substitutions: substitutions,
29
+ )
30
+ end
31
+
32
+ def populate_context!
33
+ Lutaml::Model::GlobalContext.unregister_context(context_id) if context
34
+ register_models_in(base_type_context)
35
+ end
36
+
37
+ def register_model(klass, id:)
38
+ normalized_id = id.to_sym
39
+ registered_models[normalized_id] = klass
40
+ (context || populate_base_context).registry.register(normalized_id, klass)
41
+ klass
42
+ end
43
+
44
+ def resolve_model(id)
45
+ Lutaml::Model::GlobalContext.resolve_type(id, context_id)
46
+ end
47
+
48
+ private
49
+
50
+ def populate_base_context
51
+ base_type_context
52
+ end
53
+
54
+ def create_type_context(id:, registry:, fallback_to:, substitutions: [])
55
+ Lutaml::Model::GlobalContext.create_context(
56
+ id: id,
57
+ registry: registry,
58
+ fallback_to: fallback_to,
59
+ substitutions: substitutions,
60
+ ).tap do
61
+ Lutaml::Model::GlobalContext.clear_caches
62
+ end
63
+ end
64
+
65
+ def base_type_context
66
+ create_type_context(
67
+ id: context_id,
68
+ registry: Lutaml::Model::TypeRegistry.new,
69
+ fallback_to: [:default],
70
+ )
71
+ end
72
+
73
+ def register_models_in(type_context)
74
+ registered_models.each do |model_id, klass|
75
+ type_context.registry.register(model_id, klass)
76
+ end
77
+
78
+ Lutaml::Model::GlobalContext.clear_caches
79
+ type_context
80
+ end
81
+
82
+ def normalize_fallbacks(fallback_to)
83
+ Array(fallback_to).map(&:to_sym)
84
+ end
85
+
86
+ def registered_models
87
+ @registered_models ||= {}
88
+ end
89
+ end
90
+ end