glossarist 2.8.18 → 2.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +175 -8
  3. data/Rakefile +4 -0
  4. data/config.yml +2 -3
  5. data/data/concept-model/README.md +35 -0
  6. data/data/concept-model/SOURCE.json +5 -0
  7. data/data/concept-model/glossarist.context.jsonld +209 -0
  8. data/data/concept-model/glossarist.ttl +960 -0
  9. data/data/concept-model/shapes/glossarist.shacl.ttl +575 -0
  10. data/glossarist.gemspec +2 -0
  11. data/lib/glossarist/asset_reference.rb +2 -0
  12. data/lib/glossarist/bibliographic_reference.rb +13 -0
  13. data/lib/glossarist/cli/export_command.rb +130 -7
  14. data/lib/glossarist/cli.rb +11 -2
  15. data/lib/glossarist/concept_document.rb +13 -0
  16. data/lib/glossarist/concept_reference.rb +6 -0
  17. data/lib/glossarist/concept_store.rb +2 -3
  18. data/lib/glossarist/dataset_register.rb +2 -2
  19. data/lib/glossarist/figure_reference.rb +0 -4
  20. data/lib/glossarist/formula_reference.rb +0 -4
  21. data/lib/glossarist/non_verbal_reference.rb +10 -0
  22. data/lib/glossarist/rdf/gloss_detailed_definition.rb +6 -1
  23. data/lib/glossarist/rdf/gloss_localized_concept.rb +65 -0
  24. data/lib/glossarist/rdf/lutaml_ext.rb +33 -0
  25. data/lib/glossarist/rdf.rb +2 -0
  26. data/lib/glossarist/reference.rb +33 -0
  27. data/lib/glossarist/table_reference.rb +0 -4
  28. data/lib/glossarist/tasks/shacl.rake +30 -0
  29. data/lib/glossarist/tasks/sync.rake +14 -0
  30. data/lib/glossarist/tasks/sync_model.rb +92 -0
  31. data/lib/glossarist/transforms/concept_to_gloss_transform.rb +3 -2
  32. data/lib/glossarist/utilities/uuid.rb +5 -1
  33. data/lib/glossarist/validation/rules/related_concept_cycle_rule.rb +15 -1
  34. data/lib/glossarist/validation/shacl_validator.rb +97 -0
  35. data/lib/glossarist/validation.rb +1 -0
  36. data/lib/glossarist/version.rb +1 -1
  37. data/lib/glossarist.rb +1 -0
  38. metadata +41 -2
@@ -3,8 +3,12 @@
3
3
  module Glossarist
4
4
  class CLI
5
5
  class ExportCommand
6
+ AGGREGATE_FORMATS = %w[jsonld turtle tbx jsonl].freeze
7
+ PER_CONCEPT_FORMATS = %w[json jsonld turtle yaml].freeze
8
+
6
9
  EXTENSIONS = {
7
10
  "json" => "json",
11
+ "yaml" => "yaml",
8
12
  **GcrPackage::COMPILED_EXTENSIONS,
9
13
  }.freeze
10
14
 
@@ -14,20 +18,32 @@ module Glossarist
14
18
  end
15
19
 
16
20
  def run
17
- format = @options[:format]
21
+ formats = parse_formats(@options[:format])
18
22
  output_dir = File.expand_path(@options[:output])
19
23
  FileUtils.mkdir_p(output_dir)
20
24
 
21
25
  concepts = load_concepts
22
26
  name = resolve_shortname
23
27
 
24
- case format
25
- when "json" then export_json(concepts, output_dir)
26
- when "jsonld" then export_jsonld(concepts, name, output_dir)
27
- when "turtle" then export_turtle(concepts, name, output_dir)
28
- when "tbx" then export_tbx(concepts, name, output_dir)
29
- when "jsonl" then export_jsonl(concepts, name, output_dir)
28
+ formats.each do |format|
29
+ if per_concept? && per_concept_supported?(format)
30
+ export_per_concept(concepts, format, output_dir)
31
+ end
32
+
33
+ if aggregate?
34
+ export_aggregate(format, concepts, name, output_dir)
35
+ elsif per_concept? && !per_concept_supported?(format)
36
+ raise ArgumentError,
37
+ format(
38
+ "Per-concept export is not supported for '%<format>s' " \
39
+ "(only aggregate). Drop --per-concept or pick one of: %<formats>s",
40
+ format: format,
41
+ formats: PER_CONCEPT_FORMATS.join(", "),
42
+ )
43
+ end
30
44
  end
45
+
46
+ validate_outputs!(formats, output_dir, name) if validate?
31
47
  rescue ArgumentError => e
32
48
  warn "Error: #{e.message}"
33
49
  exit 1
@@ -63,6 +79,51 @@ module Glossarist
63
79
  }.compact
64
80
  end
65
81
 
82
+ def parse_formats(raw)
83
+ Array(raw).flat_map { |value| value.to_s.split(",") }
84
+ .map(&:strip).reject(&:empty?)
85
+ .each { |f| validate_format!(f) }
86
+ end
87
+
88
+ def validate_format!(format)
89
+ return if EXTENSIONS.key?(format)
90
+
91
+ raise ArgumentError,
92
+ format("Unknown format '%<format>s'. Valid formats: %<valid>s",
93
+ format: format,
94
+ valid: EXTENSIONS.keys.join(", "))
95
+ end
96
+
97
+ def per_concept?
98
+ @options.fetch(:per_concept, false)
99
+ end
100
+
101
+ def aggregate?
102
+ !per_concept? || @options.fetch(:aggregate, false)
103
+ end
104
+
105
+ def validate?
106
+ @options.fetch(:validate, false)
107
+ end
108
+
109
+ def per_concept_supported?(format)
110
+ PER_CONCEPT_FORMATS.include?(format)
111
+ end
112
+
113
+ def aggregate_supported?(format)
114
+ AGGREGATE_FORMATS.include?(format) || format == "json"
115
+ end
116
+
117
+ def export_aggregate(format, concepts, name, output_dir)
118
+ case format
119
+ when "json" then export_json(concepts, output_dir)
120
+ when "jsonld" then export_jsonld(concepts, name, output_dir)
121
+ when "turtle" then export_turtle(concepts, name, output_dir)
122
+ when "tbx" then export_tbx(concepts, name, output_dir)
123
+ when "jsonl" then export_jsonl(concepts, name, output_dir)
124
+ end
125
+ end
126
+
66
127
  def export_json(concepts, output_dir)
67
128
  concepts.each do |concept|
68
129
  id = concept.data&.id || concept.identifier
@@ -104,6 +165,68 @@ module Glossarist
104
165
  end
105
166
  end
106
167
  end
168
+
169
+ def export_per_concept(concepts, format, output_dir)
170
+ dir = File.join(output_dir, "concepts")
171
+ FileUtils.mkdir_p(dir)
172
+
173
+ case format
174
+ when "json" then per_concept_json(concepts, dir)
175
+ when "yaml" then per_concept_yaml(concepts, dir)
176
+ when "jsonld" then per_concept_jsonld(concepts, dir)
177
+ when "turtle" then per_concept_turtle(concepts, dir)
178
+ end
179
+ end
180
+
181
+ def per_concept_json(concepts, dir)
182
+ concepts.each do |concept|
183
+ id = concept.data&.id || concept.identifier
184
+ File.write(File.join(dir, "#{id}.json"), concept.to_json)
185
+ end
186
+ end
187
+
188
+ def per_concept_yaml(concepts, dir)
189
+ concepts.each do |concept|
190
+ id = concept.data&.id || concept.identifier
191
+ File.write(File.join(dir, "#{id}.yaml"), concept.to_yaml)
192
+ end
193
+ end
194
+
195
+ def per_concept_jsonld(concepts, dir)
196
+ require "glossarist/transforms/concept_to_gloss_transform"
197
+ concepts.each do |concept|
198
+ id = concept.data&.id || concept.identifier
199
+ transform = Transforms::ConceptToGlossTransform.new(concept,
200
+ transform_options)
201
+ File.write(File.join(dir, "#{id}.jsonld"), transform.to_jsonld)
202
+ end
203
+ end
204
+
205
+ def per_concept_turtle(concepts, dir)
206
+ require "glossarist/transforms/concept_to_gloss_transform"
207
+ concepts.each do |concept|
208
+ id = concept.data&.id || concept.identifier
209
+ transform = Transforms::ConceptToGlossTransform.new(concept,
210
+ transform_options)
211
+ File.write(File.join(dir, "#{id}.ttl"), transform.to_turtle)
212
+ end
213
+ end
214
+
215
+ def validate_outputs!(formats, output_dir, name)
216
+ return unless formats.include?("turtle")
217
+
218
+ require "glossarist/validation/shacl_validator"
219
+ shapes_path = @options[:shapes]
220
+ validator = Validation::ShaclValidator.new(shapes_path:)
221
+ files = []
222
+ files << File.join(output_dir, "#{name}.ttl") if File.exist?(File.join(output_dir, "#{name}.ttl"))
223
+ files.concat(Dir.glob(File.join(output_dir, "concepts", "*.ttl")))
224
+ report = validator.validate_files(files)
225
+ unless report.conformant?
226
+ warn report.to_s
227
+ exit 1
228
+ end
229
+ end
107
230
  end
108
231
  end
109
232
  end
@@ -105,8 +105,7 @@ module Glossarist
105
105
 
106
106
  desc "export PATH", "Export concepts in machine-readable formats"
107
107
  option :format, type: :string, required: true,
108
- enum: %w[json jsonld turtle tbx jsonl],
109
- desc: "Output format"
108
+ desc: "Output format(s), comma-separated (json, jsonld, turtle, tbx, jsonl, yaml)"
110
109
  option :output, aliases: :o, type: :string, required: true,
111
110
  desc: "Output directory"
112
111
  option :shortname, type: :string,
@@ -117,6 +116,16 @@ module Glossarist
117
116
  desc: "Base URL of the glossarist site"
118
117
  option :title, type: :string,
119
118
  desc: "Dataset title for document header"
119
+ option :per_concept, type: :boolean, default: false,
120
+ desc: "Emit one file per concept (in concepts/ subdirectory)"
121
+ option :aggregate, type: :boolean, default: nil,
122
+ desc: "Emit aggregate files (default: true unless --per-concept only)"
123
+ option :validate, type: :boolean, default: false,
124
+ desc: "Run SHACL validation on every emitted .ttl; fail on violation"
125
+ option :shapes, type: :string,
126
+ desc: "Path to SHACL shapes .ttl (default: bundled concept-model shapes)"
127
+ option :context, type: :string,
128
+ desc: "Path to custom JSON-LD context (default: concept-model context)"
120
129
  def export(path)
121
130
  CLI::ExportCommand.new(path, options).run
122
131
  end
@@ -35,5 +35,18 @@ module Glossarist
35
35
  localizations.each { |l10n| mc.add_localization(l10n) }
36
36
  mc
37
37
  end
38
+
39
+ # Set concept.uuid from the document's record key (+id+) when the YAML
40
+ # stream did not provide one. The YAML is the source of truth for the
41
+ # UUID; +id+ is a layout concern — for the grouped layout it is the
42
+ # filename stem, which may be a clause identifier (e.g. 3.1.1.1.yaml)
43
+ # rather than a UUID. Both the load path (ConceptStore#load_glossary)
44
+ # and the round-trip path (ConceptDocumentSerializer#deserialize) call
45
+ # this so the rule lives in one place.
46
+ def ensure_concept_uuid!
47
+ return unless concept && id
48
+
49
+ concept.uuid ||= id
50
+ end
38
51
  end
39
52
  end
@@ -7,7 +7,13 @@ module Glossarist
7
7
  # Local references use +concept_id+ without +source+. External references
8
8
  # use +source+ (a registry URN prefix) and +concept_id+ to identify the
9
9
  # target concept, or a direct +urn+ field for formal URN references.
10
+ #
11
+ # Includes the {Reference} protocol. Overrides +cite?+, +local?+, and
12
+ # +external?+ because ConceptReference is the only reference kind whose
13
+ # predicates depend on runtime state (ref_type, source).
10
14
  class ConceptReference < Lutaml::Model::Serializable
15
+ include Reference
16
+
11
17
  attribute :term, :string
12
18
  attribute :concept_id, :string
13
19
  attribute :source, :string
@@ -15,8 +15,8 @@ module Glossarist
15
15
  def deserialize(data, model_class)
16
16
  doc = model_class.from_yamls(data["_yamls"])
17
17
  doc.id = data["_id"]
18
+ doc.ensure_concept_uuid!
18
19
  concept = doc.concept
19
- concept.uuid = doc.id if doc.id && concept
20
20
  doc.localizations.each { |l10n| concept&.add_localization(l10n) }
21
21
  doc
22
22
  end
@@ -37,8 +37,7 @@ module Glossarist
37
37
  )
38
38
 
39
39
  documents.each do |doc|
40
- concept = doc.concept
41
- concept.uuid = doc.id
40
+ doc.ensure_concept_uuid!
42
41
  db.save(doc)
43
42
  end
44
43
 
@@ -123,7 +123,7 @@ module Glossarist
123
123
  end
124
124
 
125
125
  def explicit_section_ids(concept)
126
- domains = concept.respond_to?(:data) ? concept.data&.domains : nil
126
+ domains = concept.is_a?(ManagedConcept) ? concept.data&.domains : nil
127
127
  Array(domains).select { |d| d.ref_type == "section" }
128
128
  .filter_map(&:concept_id)
129
129
  end
@@ -133,7 +133,7 @@ module Glossarist
133
133
  # longest registered section prefix.
134
134
  # @example "103-01-01" with section "103" registered → ["103"]
135
135
  def derive_section_ids_from_id(concept)
136
- concept_id = concept.respond_to?(:data) ? concept.data&.id : nil
136
+ concept_id = concept.is_a?(ManagedConcept) ? concept.data&.id : nil
137
137
  return [] unless concept_id
138
138
 
139
139
  all_section_ids = collect_all_section_ids
@@ -10,9 +10,5 @@ module Glossarist
10
10
  map :entity_id, to: :entity_id
11
11
  map :display, to: :display
12
12
  end
13
-
14
- def dedup_key
15
- [self.class.name, entity_id]
16
- end
17
13
  end
18
14
  end
@@ -10,9 +10,5 @@ module Glossarist
10
10
  map :entity_id, to: :entity_id
11
11
  map :display, to: :display
12
12
  end
13
-
14
- def dedup_key
15
- [self.class.name, entity_id]
16
- end
17
13
  end
18
14
  end
@@ -7,7 +7,13 @@ module Glossarist
7
7
  # entity ID and an optional display override. They are produced both by
8
8
  # structural arrays (`figures: [id]` on ManagedConceptData) and by inline
9
9
  # mentions (`{{fig:id}}` in text).
10
+ #
11
+ # Includes the {Reference} protocol so mixed-collection validation rules
12
+ # can iterate these alongside ConceptReference / BibliographicReference /
13
+ # AssetReference without type-checking. All predicates default to false.
10
14
  class NonVerbalReference < Lutaml::Model::Serializable
15
+ include Reference
16
+
11
17
  attribute :entity_id, :string
12
18
  attribute :display, :string
13
19
 
@@ -19,5 +25,9 @@ module Glossarist
19
25
  display: hash["display"] || hash[:display],
20
26
  )
21
27
  end
28
+
29
+ def dedup_key
30
+ [self.class.name, entity_id]
31
+ end
22
32
  end
23
33
  end
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "digest"
3
4
  require "lutaml/model"
4
5
 
5
6
  module Glossarist
@@ -12,7 +13,7 @@ module Glossarist
12
13
  rdf do
13
14
  namespace Namespaces::GlossaristNamespace, Namespaces::RdfNamespace
14
15
 
15
- subject { |d| "definition/#{d.content.hash.abs}" }
16
+ subject { |d| "definition/#{GlossDetailedDefinition.deterministic_id(d)}" }
16
17
 
17
18
  types "gloss:DetailedDefinition"
18
19
 
@@ -21,6 +22,10 @@ module Glossarist
21
22
  members :sources
22
23
  members :examples, link: "gloss:hasExample"
23
24
  end
25
+
26
+ def self.deterministic_id(definition)
27
+ Digest::MD5.hexdigest(definition.content.to_s)[0..11]
28
+ end
24
29
  end
25
30
  end
26
31
  end
@@ -1,10 +1,20 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "lutaml/model"
4
+ require "rdf"
4
5
 
5
6
  module Glossarist
6
7
  module Rdf
7
8
  class GlossLocalizedConcept < Lutaml::Model::Serializable
9
+ include EmitsExtraTriples
10
+
11
+ SKOS_PREF_LABEL = RDF::URI("http://www.w3.org/2004/02/skos/core#prefLabel")
12
+ SKOS_ALT_LABEL = RDF::URI("http://www.w3.org/2004/02/skos/core#altLabel")
13
+ SKOS_HIDDEN_LABEL = RDF::URI("http://www.w3.org/2004/02/skos/core#hiddenLabel")
14
+ SKOS_DEFINITION = RDF::URI("http://www.w3.org/2004/02/skos/core#definition")
15
+ SKOS_SCOPE_NOTE = RDF::URI("http://www.w3.org/2004/02/skos/core#scopeNote")
16
+ SKOS_EXAMPLE = RDF::URI("http://www.w3.org/2004/02/skos/core#example")
17
+
8
18
  attribute :concept_id, :string
9
19
  attribute :language_code, :string
10
20
  attribute :domain, :string
@@ -72,6 +82,61 @@ module Glossarist
72
82
  else "skosxl:altLabel"
73
83
  end
74
84
  end
85
+
86
+ # Hook invoked by Glossarist::Rdf::LutamlTurtleTransformExt.
87
+ # Emits direct SKOS predicates alongside the reified SKOS-XL / gloss
88
+ # forms, so consumers that only speak plain SKOS (no SKOS-XL) see the
89
+ # labels, definitions, notes, and examples as plain literals.
90
+ def emit_extra_triples(subject_uri, _mapping)
91
+ lang = language_code.to_s if language_code && !language_code.to_s.empty?
92
+ triples = []
93
+
94
+ Array(designations).each do |desig|
95
+ predicate = self.class.skos_label_predicate(desig)
96
+ next unless predicate
97
+
98
+ triples << RDF::Statement.new(subject_uri, predicate,
99
+ self.class.rdf_literal(desig.designation, lang))
100
+ end
101
+
102
+ Array(definitions).each do |d|
103
+ triples << RDF::Statement.new(subject_uri, SKOS_DEFINITION,
104
+ self.class.rdf_literal(d.content, lang))
105
+ end
106
+
107
+ Array(notes).each do |n|
108
+ triples << RDF::Statement.new(subject_uri, SKOS_SCOPE_NOTE,
109
+ self.class.rdf_literal(n.content, lang))
110
+ end
111
+
112
+ Array(examples).each do |e|
113
+ triples << RDF::Statement.new(subject_uri, SKOS_EXAMPLE,
114
+ self.class.rdf_literal(e.content, lang))
115
+ end
116
+
117
+ triples
118
+ end
119
+
120
+ class << self
121
+ def skos_label_predicate(designation)
122
+ status = designation.normative_status.to_s.split("/").last
123
+ case status
124
+ when "preferred" then SKOS_PREF_LABEL
125
+ when "deprecated" then SKOS_HIDDEN_LABEL
126
+ else SKOS_ALT_LABEL
127
+ end
128
+ end
129
+
130
+ def rdf_literal(value, lang)
131
+ return RDF::Literal.new("") if value.nil?
132
+
133
+ if lang
134
+ RDF::Literal.new(value.to_s, language: lang)
135
+ else
136
+ RDF::Literal.new(value.to_s)
137
+ end
138
+ end
139
+ end
75
140
  end
76
141
  end
77
142
  end
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "lutaml/turtle"
4
+
5
+ # Bridge between lutaml-model's Transform hook and Serializable instances.
6
+ #
7
+ # lutaml-model defines `additional_resource_triples(instance, subject_uri,
8
+ # mapping)` on Lutaml::Turtle::Transform and calls it from `build_graph`.
9
+ # The Transform is a separate object from the model instance, so the model
10
+ # cannot directly emit extra RDF statements.
11
+ #
12
+ # Models that want to emit extra RDF (e.g., direct SKOS alongside reified
13
+ # SKOS-XL) include `EmitsExtraTriples` and override `emit_extra_triples`.
14
+ module Glossarist
15
+ module Rdf
16
+ module EmitsExtraTriples
17
+ def emit_extra_triples(_subject_uri, _mapping)
18
+ []
19
+ end
20
+ end
21
+
22
+ module LutamlTurtleTransformExt
23
+ def additional_resource_triples(instance, subject_uri, mapping)
24
+ triples = super
25
+ return triples unless instance.is_a?(EmitsExtraTriples)
26
+
27
+ triples + Array(instance.emit_extra_triples(subject_uri, mapping))
28
+ end
29
+ end
30
+ end
31
+ end
32
+
33
+ Lutaml::Turtle::Transform.prepend(Glossarist::Rdf::LutamlTurtleTransformExt)
@@ -4,6 +4,8 @@ require "lutaml/rdf"
4
4
  require "lutaml/turtle"
5
5
  require "lutaml/jsonld"
6
6
 
7
+ require_relative "rdf/lutaml_ext"
8
+
7
9
  module Glossarist
8
10
  module Rdf
9
11
  autoload :Namespaces, "#{__dir__}/rdf/namespaces"
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Glossarist
4
+ # Shared protocol for all reference kinds produced by ReferenceExtractor.
5
+ #
6
+ # ConceptReference, BibliographicReference, AssetReference, and the
7
+ # NonVerbalReference family (Figure/Table/Formula) all participate in
8
+ # validation rules that iterate a mixed collection extracted from a
9
+ # concept's text fields. Rules such as CiteRefIntegrityRule call
10
+ # `select(&:cite?)` on these mixed collections and must not crash on
11
+ # any member.
12
+ #
13
+ # The defaults here represent the common case: most reference kinds are
14
+ # neither inline `{{cite:...}}` mentions, nor local/external concept
15
+ # cross-refs. ConceptReference overrides all three predicates because
16
+ # its semantics depend on ref_type and source.
17
+ #
18
+ # Including this module in a new reference class is sufficient to make
19
+ # it participate correctly in mixed-collection validation rules.
20
+ module Reference
21
+ def cite?
22
+ false
23
+ end
24
+
25
+ def local?
26
+ false
27
+ end
28
+
29
+ def external?
30
+ false
31
+ end
32
+ end
33
+ end
@@ -10,9 +10,5 @@ module Glossarist
10
10
  map :entity_id, to: :entity_id
11
11
  map :display, to: :display
12
12
  end
13
-
14
- def dedup_key
15
- [self.class.name, entity_id]
16
- end
17
13
  end
18
14
  end
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "rdf/turtle"
4
+ require "shacl"
5
+ require "pathname"
6
+ require "glossarist/validation/shacl_validator"
7
+
8
+ namespace :glossarist do
9
+ desc "Validate all .ttl outputs against concept-model SHACL shapes. " \
10
+ "Pass path=root_dir or shapes=path/to/shapes.ttl."
11
+ task :shacl, [:path] do |_t, args|
12
+ shapes = args[:shapes] || ENV.fetch("SHAPES_PATH", nil)
13
+ root = args[:path] || ENV.fetch("SHACL_PATH", "compiled")
14
+
15
+ files = Pathname.glob("#{root}/**/*.ttl")
16
+ if files.empty?
17
+ warn "No .ttl files found under #{root}"
18
+ exit 1
19
+ end
20
+
21
+ validator = Glossarist::Validation::ShaclValidator.new(shapes_path: shapes)
22
+ report = validator.validate_files(files.map(&:to_s))
23
+ if report.conformant?
24
+ puts "All #{files.length} .ttl file(s) conform to SHACL shapes."
25
+ else
26
+ warn report.to_s
27
+ exit 1
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../tasks/sync_model"
4
+
5
+ namespace :glossarist do
6
+ namespace :sync do
7
+ desc "Sync vendored concept-model data from upstream. " \
8
+ "Pass ref=[tag|branch|sha] to pin a specific version."
9
+ task :model, [:ref] do |_t, args|
10
+ ref = args[:ref] || ENV.fetch("REF", nil)
11
+ Glossarist::Tasks::SyncModel.call(ref: ref)
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,92 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+ require "fileutils"
5
+ require "net/http"
6
+ require "uri"
7
+
8
+ module Glossarist
9
+ module Tasks
10
+ # Syncs vendored concept-model data artifacts from glossarist/concept-model.
11
+ #
12
+ # concept-model is a data-only repo (TTL, JSON-LD, YAML schemas). It is
13
+ # not a gem. We vendor the small set of artifacts we need (shapes,
14
+ # context, ontology) and load them at runtime via ShaclValidator.
15
+ module SyncModel
16
+ REPO = "glossarist/concept-model"
17
+ OUT_DIR = File.expand_path("data/concept-model", File.join(__dir__, "..", "..", ".."))
18
+
19
+ TARGETS = {
20
+ "glossarist.context.jsonld" => %w[
21
+ ontologies/glossarist.context.jsonld
22
+ glossarist.context.jsonld
23
+ ].freeze,
24
+ "glossarist.ttl" => %w[
25
+ ontologies/glossarist.ttl
26
+ glossarist.ttl
27
+ ].freeze,
28
+ "shapes/glossarist.shacl.ttl" => %w[
29
+ ontologies/shapes/glossarist.shacl.ttl
30
+ shapes/glossarist.shacl.ttl
31
+ ].freeze,
32
+ }.freeze
33
+
34
+ class << self
35
+ def call(ref: nil)
36
+ ref ||= latest_tag
37
+ FileUtils.mkdir_p(File.join(OUT_DIR, "shapes"))
38
+
39
+ TARGETS.each do |out_rel, candidates|
40
+ content = fetch_any(ref, candidates)
41
+ out_path = File.join(OUT_DIR, out_rel)
42
+ FileUtils.mkdir_p(File.dirname(out_path))
43
+ File.write(out_path, content)
44
+ puts " ✓ #{out_rel} (#{content.length} bytes)"
45
+ end
46
+
47
+ write_source_manifest(ref)
48
+ puts "\nSynced #{TARGETS.length} file(s) from #{REPO}@#{ref}."
49
+ end
50
+
51
+ private
52
+
53
+ def latest_tag
54
+ url = URI("https://api.github.com/repos/#{REPO}/releases/latest")
55
+ req = Net::HTTP::Get.new(url)
56
+ req["Accept"] = "application/vnd.github+json"
57
+ JSON.parse(Net::HTTP.start(url.hostname, url.port, use_ssl: true) { |http| http.request(req) }.body)
58
+ .fetch("tag_name")
59
+ rescue StandardError => e
60
+ warn "Could not determine latest concept-model tag: #{e.message}"
61
+ exit 1
62
+ end
63
+
64
+ def fetch_any(ref, candidates)
65
+ candidates.each do |path|
66
+ begin
67
+ return fetch_file(ref, path)
68
+ rescue StandardError
69
+ next
70
+ end
71
+ end
72
+ raise "Could not fetch any of: #{candidates.join(', ')}"
73
+ end
74
+
75
+ def fetch_file(ref, path)
76
+ url = URI("https://raw.githubusercontent.com/#{REPO}/#{ref}/#{path}")
77
+ Net::HTTP.get(url)
78
+ end
79
+
80
+ def write_source_manifest(ref)
81
+ manifest = {
82
+ "repo" => REPO,
83
+ "ref" => ref,
84
+ "syncedAt" => Time.now.utc.iso8601,
85
+ }
86
+ File.write(File.join(OUT_DIR, "SOURCE.json"),
87
+ JSON.pretty_generate(manifest) + "\n")
88
+ end
89
+ end
90
+ end
91
+ end
92
+ end
@@ -122,10 +122,11 @@ module Glossarist
122
122
 
123
123
  dd_attrs = if data
124
124
  data.class.detailed_definition_fields.to_h do |field|
125
- [field, build_gloss_definitions(data.public_send(field))]
125
+ key = field == :definition ? :definitions : field
126
+ [key, build_gloss_definitions(data.public_send(field))]
126
127
  end
127
128
  else
128
- { definition: [], notes: [], examples: [] }
129
+ { definitions: [], notes: [], examples: [] }
129
130
  end
130
131
 
131
132
  sources = build_gloss_sources(data&.sources)