glossarist 2.8.18 → 2.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +175 -8
- data/Rakefile +4 -0
- data/config.yml +2 -3
- data/data/concept-model/README.md +35 -0
- data/data/concept-model/SOURCE.json +5 -0
- data/data/concept-model/glossarist.context.jsonld +209 -0
- data/data/concept-model/glossarist.ttl +960 -0
- data/data/concept-model/shapes/glossarist.shacl.ttl +575 -0
- data/glossarist.gemspec +2 -0
- data/lib/glossarist/asset_reference.rb +2 -0
- data/lib/glossarist/bibliographic_reference.rb +13 -0
- data/lib/glossarist/cli/export_command.rb +130 -7
- data/lib/glossarist/cli.rb +11 -2
- data/lib/glossarist/concept_document.rb +13 -0
- data/lib/glossarist/concept_reference.rb +6 -0
- data/lib/glossarist/concept_store.rb +2 -3
- data/lib/glossarist/dataset_register.rb +2 -2
- data/lib/glossarist/figure_reference.rb +0 -4
- data/lib/glossarist/formula_reference.rb +0 -4
- data/lib/glossarist/non_verbal_reference.rb +10 -0
- data/lib/glossarist/rdf/gloss_detailed_definition.rb +6 -1
- data/lib/glossarist/rdf/gloss_localized_concept.rb +65 -0
- data/lib/glossarist/rdf/lutaml_ext.rb +33 -0
- data/lib/glossarist/rdf.rb +2 -0
- data/lib/glossarist/reference.rb +33 -0
- data/lib/glossarist/table_reference.rb +0 -4
- data/lib/glossarist/tasks/shacl.rake +30 -0
- data/lib/glossarist/tasks/sync.rake +14 -0
- data/lib/glossarist/tasks/sync_model.rb +92 -0
- data/lib/glossarist/transforms/concept_to_gloss_transform.rb +3 -2
- data/lib/glossarist/utilities/uuid.rb +5 -1
- data/lib/glossarist/validation/rules/related_concept_cycle_rule.rb +15 -1
- data/lib/glossarist/validation/shacl_validator.rb +97 -0
- data/lib/glossarist/validation.rb +1 -0
- data/lib/glossarist/version.rb +1 -1
- data/lib/glossarist.rb +1 -0
- metadata +41 -2
|
@@ -3,8 +3,12 @@
|
|
|
3
3
|
module Glossarist
|
|
4
4
|
class CLI
|
|
5
5
|
class ExportCommand
|
|
6
|
+
AGGREGATE_FORMATS = %w[jsonld turtle tbx jsonl].freeze
|
|
7
|
+
PER_CONCEPT_FORMATS = %w[json jsonld turtle yaml].freeze
|
|
8
|
+
|
|
6
9
|
EXTENSIONS = {
|
|
7
10
|
"json" => "json",
|
|
11
|
+
"yaml" => "yaml",
|
|
8
12
|
**GcrPackage::COMPILED_EXTENSIONS,
|
|
9
13
|
}.freeze
|
|
10
14
|
|
|
@@ -14,20 +18,32 @@ module Glossarist
|
|
|
14
18
|
end
|
|
15
19
|
|
|
16
20
|
def run
|
|
17
|
-
|
|
21
|
+
formats = parse_formats(@options[:format])
|
|
18
22
|
output_dir = File.expand_path(@options[:output])
|
|
19
23
|
FileUtils.mkdir_p(output_dir)
|
|
20
24
|
|
|
21
25
|
concepts = load_concepts
|
|
22
26
|
name = resolve_shortname
|
|
23
27
|
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
28
|
+
formats.each do |format|
|
|
29
|
+
if per_concept? && per_concept_supported?(format)
|
|
30
|
+
export_per_concept(concepts, format, output_dir)
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
if aggregate?
|
|
34
|
+
export_aggregate(format, concepts, name, output_dir)
|
|
35
|
+
elsif per_concept? && !per_concept_supported?(format)
|
|
36
|
+
raise ArgumentError,
|
|
37
|
+
format(
|
|
38
|
+
"Per-concept export is not supported for '%<format>s' " \
|
|
39
|
+
"(only aggregate). Drop --per-concept or pick one of: %<formats>s",
|
|
40
|
+
format: format,
|
|
41
|
+
formats: PER_CONCEPT_FORMATS.join(", "),
|
|
42
|
+
)
|
|
43
|
+
end
|
|
30
44
|
end
|
|
45
|
+
|
|
46
|
+
validate_outputs!(formats, output_dir, name) if validate?
|
|
31
47
|
rescue ArgumentError => e
|
|
32
48
|
warn "Error: #{e.message}"
|
|
33
49
|
exit 1
|
|
@@ -63,6 +79,51 @@ module Glossarist
|
|
|
63
79
|
}.compact
|
|
64
80
|
end
|
|
65
81
|
|
|
82
|
+
def parse_formats(raw)
|
|
83
|
+
Array(raw).flat_map { |value| value.to_s.split(",") }
|
|
84
|
+
.map(&:strip).reject(&:empty?)
|
|
85
|
+
.each { |f| validate_format!(f) }
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def validate_format!(format)
|
|
89
|
+
return if EXTENSIONS.key?(format)
|
|
90
|
+
|
|
91
|
+
raise ArgumentError,
|
|
92
|
+
format("Unknown format '%<format>s'. Valid formats: %<valid>s",
|
|
93
|
+
format: format,
|
|
94
|
+
valid: EXTENSIONS.keys.join(", "))
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
def per_concept?
|
|
98
|
+
@options.fetch(:per_concept, false)
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
def aggregate?
|
|
102
|
+
!per_concept? || @options.fetch(:aggregate, false)
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
def validate?
|
|
106
|
+
@options.fetch(:validate, false)
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
def per_concept_supported?(format)
|
|
110
|
+
PER_CONCEPT_FORMATS.include?(format)
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def aggregate_supported?(format)
|
|
114
|
+
AGGREGATE_FORMATS.include?(format) || format == "json"
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
def export_aggregate(format, concepts, name, output_dir)
|
|
118
|
+
case format
|
|
119
|
+
when "json" then export_json(concepts, output_dir)
|
|
120
|
+
when "jsonld" then export_jsonld(concepts, name, output_dir)
|
|
121
|
+
when "turtle" then export_turtle(concepts, name, output_dir)
|
|
122
|
+
when "tbx" then export_tbx(concepts, name, output_dir)
|
|
123
|
+
when "jsonl" then export_jsonl(concepts, name, output_dir)
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
|
|
66
127
|
def export_json(concepts, output_dir)
|
|
67
128
|
concepts.each do |concept|
|
|
68
129
|
id = concept.data&.id || concept.identifier
|
|
@@ -104,6 +165,68 @@ module Glossarist
|
|
|
104
165
|
end
|
|
105
166
|
end
|
|
106
167
|
end
|
|
168
|
+
|
|
169
|
+
def export_per_concept(concepts, format, output_dir)
|
|
170
|
+
dir = File.join(output_dir, "concepts")
|
|
171
|
+
FileUtils.mkdir_p(dir)
|
|
172
|
+
|
|
173
|
+
case format
|
|
174
|
+
when "json" then per_concept_json(concepts, dir)
|
|
175
|
+
when "yaml" then per_concept_yaml(concepts, dir)
|
|
176
|
+
when "jsonld" then per_concept_jsonld(concepts, dir)
|
|
177
|
+
when "turtle" then per_concept_turtle(concepts, dir)
|
|
178
|
+
end
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
def per_concept_json(concepts, dir)
|
|
182
|
+
concepts.each do |concept|
|
|
183
|
+
id = concept.data&.id || concept.identifier
|
|
184
|
+
File.write(File.join(dir, "#{id}.json"), concept.to_json)
|
|
185
|
+
end
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
def per_concept_yaml(concepts, dir)
|
|
189
|
+
concepts.each do |concept|
|
|
190
|
+
id = concept.data&.id || concept.identifier
|
|
191
|
+
File.write(File.join(dir, "#{id}.yaml"), concept.to_yaml)
|
|
192
|
+
end
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
def per_concept_jsonld(concepts, dir)
|
|
196
|
+
require "glossarist/transforms/concept_to_gloss_transform"
|
|
197
|
+
concepts.each do |concept|
|
|
198
|
+
id = concept.data&.id || concept.identifier
|
|
199
|
+
transform = Transforms::ConceptToGlossTransform.new(concept,
|
|
200
|
+
transform_options)
|
|
201
|
+
File.write(File.join(dir, "#{id}.jsonld"), transform.to_jsonld)
|
|
202
|
+
end
|
|
203
|
+
end
|
|
204
|
+
|
|
205
|
+
def per_concept_turtle(concepts, dir)
|
|
206
|
+
require "glossarist/transforms/concept_to_gloss_transform"
|
|
207
|
+
concepts.each do |concept|
|
|
208
|
+
id = concept.data&.id || concept.identifier
|
|
209
|
+
transform = Transforms::ConceptToGlossTransform.new(concept,
|
|
210
|
+
transform_options)
|
|
211
|
+
File.write(File.join(dir, "#{id}.ttl"), transform.to_turtle)
|
|
212
|
+
end
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
def validate_outputs!(formats, output_dir, name)
|
|
216
|
+
return unless formats.include?("turtle")
|
|
217
|
+
|
|
218
|
+
require "glossarist/validation/shacl_validator"
|
|
219
|
+
shapes_path = @options[:shapes]
|
|
220
|
+
validator = Validation::ShaclValidator.new(shapes_path:)
|
|
221
|
+
files = []
|
|
222
|
+
files << File.join(output_dir, "#{name}.ttl") if File.exist?(File.join(output_dir, "#{name}.ttl"))
|
|
223
|
+
files.concat(Dir.glob(File.join(output_dir, "concepts", "*.ttl")))
|
|
224
|
+
report = validator.validate_files(files)
|
|
225
|
+
unless report.conformant?
|
|
226
|
+
warn report.to_s
|
|
227
|
+
exit 1
|
|
228
|
+
end
|
|
229
|
+
end
|
|
107
230
|
end
|
|
108
231
|
end
|
|
109
232
|
end
|
data/lib/glossarist/cli.rb
CHANGED
|
@@ -105,8 +105,7 @@ module Glossarist
|
|
|
105
105
|
|
|
106
106
|
desc "export PATH", "Export concepts in machine-readable formats"
|
|
107
107
|
option :format, type: :string, required: true,
|
|
108
|
-
|
|
109
|
-
desc: "Output format"
|
|
108
|
+
desc: "Output format(s), comma-separated (json, jsonld, turtle, tbx, jsonl, yaml)"
|
|
110
109
|
option :output, aliases: :o, type: :string, required: true,
|
|
111
110
|
desc: "Output directory"
|
|
112
111
|
option :shortname, type: :string,
|
|
@@ -117,6 +116,16 @@ module Glossarist
|
|
|
117
116
|
desc: "Base URL of the glossarist site"
|
|
118
117
|
option :title, type: :string,
|
|
119
118
|
desc: "Dataset title for document header"
|
|
119
|
+
option :per_concept, type: :boolean, default: false,
|
|
120
|
+
desc: "Emit one file per concept (in concepts/ subdirectory)"
|
|
121
|
+
option :aggregate, type: :boolean, default: nil,
|
|
122
|
+
desc: "Emit aggregate files (default: true unless --per-concept only)"
|
|
123
|
+
option :validate, type: :boolean, default: false,
|
|
124
|
+
desc: "Run SHACL validation on every emitted .ttl; fail on violation"
|
|
125
|
+
option :shapes, type: :string,
|
|
126
|
+
desc: "Path to SHACL shapes .ttl (default: bundled concept-model shapes)"
|
|
127
|
+
option :context, type: :string,
|
|
128
|
+
desc: "Path to custom JSON-LD context (default: concept-model context)"
|
|
120
129
|
def export(path)
|
|
121
130
|
CLI::ExportCommand.new(path, options).run
|
|
122
131
|
end
|
|
@@ -35,5 +35,18 @@ module Glossarist
|
|
|
35
35
|
localizations.each { |l10n| mc.add_localization(l10n) }
|
|
36
36
|
mc
|
|
37
37
|
end
|
|
38
|
+
|
|
39
|
+
# Set concept.uuid from the document's record key (+id+) when the YAML
|
|
40
|
+
# stream did not provide one. The YAML is the source of truth for the
|
|
41
|
+
# UUID; +id+ is a layout concern — for the grouped layout it is the
|
|
42
|
+
# filename stem, which may be a clause identifier (e.g. 3.1.1.1.yaml)
|
|
43
|
+
# rather than a UUID. Both the load path (ConceptStore#load_glossary)
|
|
44
|
+
# and the round-trip path (ConceptDocumentSerializer#deserialize) call
|
|
45
|
+
# this so the rule lives in one place.
|
|
46
|
+
def ensure_concept_uuid!
|
|
47
|
+
return unless concept && id
|
|
48
|
+
|
|
49
|
+
concept.uuid ||= id
|
|
50
|
+
end
|
|
38
51
|
end
|
|
39
52
|
end
|
|
@@ -7,7 +7,13 @@ module Glossarist
|
|
|
7
7
|
# Local references use +concept_id+ without +source+. External references
|
|
8
8
|
# use +source+ (a registry URN prefix) and +concept_id+ to identify the
|
|
9
9
|
# target concept, or a direct +urn+ field for formal URN references.
|
|
10
|
+
#
|
|
11
|
+
# Includes the {Reference} protocol. Overrides +cite?+, +local?+, and
|
|
12
|
+
# +external?+ because ConceptReference is the only reference kind whose
|
|
13
|
+
# predicates depend on runtime state (ref_type, source).
|
|
10
14
|
class ConceptReference < Lutaml::Model::Serializable
|
|
15
|
+
include Reference
|
|
16
|
+
|
|
11
17
|
attribute :term, :string
|
|
12
18
|
attribute :concept_id, :string
|
|
13
19
|
attribute :source, :string
|
|
@@ -15,8 +15,8 @@ module Glossarist
|
|
|
15
15
|
def deserialize(data, model_class)
|
|
16
16
|
doc = model_class.from_yamls(data["_yamls"])
|
|
17
17
|
doc.id = data["_id"]
|
|
18
|
+
doc.ensure_concept_uuid!
|
|
18
19
|
concept = doc.concept
|
|
19
|
-
concept.uuid = doc.id if doc.id && concept
|
|
20
20
|
doc.localizations.each { |l10n| concept&.add_localization(l10n) }
|
|
21
21
|
doc
|
|
22
22
|
end
|
|
@@ -37,8 +37,7 @@ module Glossarist
|
|
|
37
37
|
)
|
|
38
38
|
|
|
39
39
|
documents.each do |doc|
|
|
40
|
-
|
|
41
|
-
concept.uuid = doc.id
|
|
40
|
+
doc.ensure_concept_uuid!
|
|
42
41
|
db.save(doc)
|
|
43
42
|
end
|
|
44
43
|
|
|
@@ -123,7 +123,7 @@ module Glossarist
|
|
|
123
123
|
end
|
|
124
124
|
|
|
125
125
|
def explicit_section_ids(concept)
|
|
126
|
-
domains = concept.
|
|
126
|
+
domains = concept.is_a?(ManagedConcept) ? concept.data&.domains : nil
|
|
127
127
|
Array(domains).select { |d| d.ref_type == "section" }
|
|
128
128
|
.filter_map(&:concept_id)
|
|
129
129
|
end
|
|
@@ -133,7 +133,7 @@ module Glossarist
|
|
|
133
133
|
# longest registered section prefix.
|
|
134
134
|
# @example "103-01-01" with section "103" registered → ["103"]
|
|
135
135
|
def derive_section_ids_from_id(concept)
|
|
136
|
-
concept_id = concept.
|
|
136
|
+
concept_id = concept.is_a?(ManagedConcept) ? concept.data&.id : nil
|
|
137
137
|
return [] unless concept_id
|
|
138
138
|
|
|
139
139
|
all_section_ids = collect_all_section_ids
|
|
@@ -7,7 +7,13 @@ module Glossarist
|
|
|
7
7
|
# entity ID and an optional display override. They are produced both by
|
|
8
8
|
# structural arrays (`figures: [id]` on ManagedConceptData) and by inline
|
|
9
9
|
# mentions (`{{fig:id}}` in text).
|
|
10
|
+
#
|
|
11
|
+
# Includes the {Reference} protocol so mixed-collection validation rules
|
|
12
|
+
# can iterate these alongside ConceptReference / BibliographicReference /
|
|
13
|
+
# AssetReference without type-checking. All predicates default to false.
|
|
10
14
|
class NonVerbalReference < Lutaml::Model::Serializable
|
|
15
|
+
include Reference
|
|
16
|
+
|
|
11
17
|
attribute :entity_id, :string
|
|
12
18
|
attribute :display, :string
|
|
13
19
|
|
|
@@ -19,5 +25,9 @@ module Glossarist
|
|
|
19
25
|
display: hash["display"] || hash[:display],
|
|
20
26
|
)
|
|
21
27
|
end
|
|
28
|
+
|
|
29
|
+
def dedup_key
|
|
30
|
+
[self.class.name, entity_id]
|
|
31
|
+
end
|
|
22
32
|
end
|
|
23
33
|
end
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require "digest"
|
|
3
4
|
require "lutaml/model"
|
|
4
5
|
|
|
5
6
|
module Glossarist
|
|
@@ -12,7 +13,7 @@ module Glossarist
|
|
|
12
13
|
rdf do
|
|
13
14
|
namespace Namespaces::GlossaristNamespace, Namespaces::RdfNamespace
|
|
14
15
|
|
|
15
|
-
subject { |d| "definition/#{d
|
|
16
|
+
subject { |d| "definition/#{GlossDetailedDefinition.deterministic_id(d)}" }
|
|
16
17
|
|
|
17
18
|
types "gloss:DetailedDefinition"
|
|
18
19
|
|
|
@@ -21,6 +22,10 @@ module Glossarist
|
|
|
21
22
|
members :sources
|
|
22
23
|
members :examples, link: "gloss:hasExample"
|
|
23
24
|
end
|
|
25
|
+
|
|
26
|
+
def self.deterministic_id(definition)
|
|
27
|
+
Digest::MD5.hexdigest(definition.content.to_s)[0..11]
|
|
28
|
+
end
|
|
24
29
|
end
|
|
25
30
|
end
|
|
26
31
|
end
|
|
@@ -1,10 +1,20 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require "lutaml/model"
|
|
4
|
+
require "rdf"
|
|
4
5
|
|
|
5
6
|
module Glossarist
|
|
6
7
|
module Rdf
|
|
7
8
|
class GlossLocalizedConcept < Lutaml::Model::Serializable
|
|
9
|
+
include EmitsExtraTriples
|
|
10
|
+
|
|
11
|
+
SKOS_PREF_LABEL = RDF::URI("http://www.w3.org/2004/02/skos/core#prefLabel")
|
|
12
|
+
SKOS_ALT_LABEL = RDF::URI("http://www.w3.org/2004/02/skos/core#altLabel")
|
|
13
|
+
SKOS_HIDDEN_LABEL = RDF::URI("http://www.w3.org/2004/02/skos/core#hiddenLabel")
|
|
14
|
+
SKOS_DEFINITION = RDF::URI("http://www.w3.org/2004/02/skos/core#definition")
|
|
15
|
+
SKOS_SCOPE_NOTE = RDF::URI("http://www.w3.org/2004/02/skos/core#scopeNote")
|
|
16
|
+
SKOS_EXAMPLE = RDF::URI("http://www.w3.org/2004/02/skos/core#example")
|
|
17
|
+
|
|
8
18
|
attribute :concept_id, :string
|
|
9
19
|
attribute :language_code, :string
|
|
10
20
|
attribute :domain, :string
|
|
@@ -72,6 +82,61 @@ module Glossarist
|
|
|
72
82
|
else "skosxl:altLabel"
|
|
73
83
|
end
|
|
74
84
|
end
|
|
85
|
+
|
|
86
|
+
# Hook invoked by Glossarist::Rdf::LutamlTurtleTransformExt.
|
|
87
|
+
# Emits direct SKOS predicates alongside the reified SKOS-XL / gloss
|
|
88
|
+
# forms, so consumers that only speak plain SKOS (no SKOS-XL) see the
|
|
89
|
+
# labels, definitions, notes, and examples as plain literals.
|
|
90
|
+
def emit_extra_triples(subject_uri, _mapping)
|
|
91
|
+
lang = language_code.to_s if language_code && !language_code.to_s.empty?
|
|
92
|
+
triples = []
|
|
93
|
+
|
|
94
|
+
Array(designations).each do |desig|
|
|
95
|
+
predicate = self.class.skos_label_predicate(desig)
|
|
96
|
+
next unless predicate
|
|
97
|
+
|
|
98
|
+
triples << RDF::Statement.new(subject_uri, predicate,
|
|
99
|
+
self.class.rdf_literal(desig.designation, lang))
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
Array(definitions).each do |d|
|
|
103
|
+
triples << RDF::Statement.new(subject_uri, SKOS_DEFINITION,
|
|
104
|
+
self.class.rdf_literal(d.content, lang))
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
Array(notes).each do |n|
|
|
108
|
+
triples << RDF::Statement.new(subject_uri, SKOS_SCOPE_NOTE,
|
|
109
|
+
self.class.rdf_literal(n.content, lang))
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
Array(examples).each do |e|
|
|
113
|
+
triples << RDF::Statement.new(subject_uri, SKOS_EXAMPLE,
|
|
114
|
+
self.class.rdf_literal(e.content, lang))
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
triples
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
class << self
|
|
121
|
+
def skos_label_predicate(designation)
|
|
122
|
+
status = designation.normative_status.to_s.split("/").last
|
|
123
|
+
case status
|
|
124
|
+
when "preferred" then SKOS_PREF_LABEL
|
|
125
|
+
when "deprecated" then SKOS_HIDDEN_LABEL
|
|
126
|
+
else SKOS_ALT_LABEL
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
def rdf_literal(value, lang)
|
|
131
|
+
return RDF::Literal.new("") if value.nil?
|
|
132
|
+
|
|
133
|
+
if lang
|
|
134
|
+
RDF::Literal.new(value.to_s, language: lang)
|
|
135
|
+
else
|
|
136
|
+
RDF::Literal.new(value.to_s)
|
|
137
|
+
end
|
|
138
|
+
end
|
|
139
|
+
end
|
|
75
140
|
end
|
|
76
141
|
end
|
|
77
142
|
end
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "lutaml/turtle"
|
|
4
|
+
|
|
5
|
+
# Bridge between lutaml-model's Transform hook and Serializable instances.
|
|
6
|
+
#
|
|
7
|
+
# lutaml-model defines `additional_resource_triples(instance, subject_uri,
|
|
8
|
+
# mapping)` on Lutaml::Turtle::Transform and calls it from `build_graph`.
|
|
9
|
+
# The Transform is a separate object from the model instance, so the model
|
|
10
|
+
# cannot directly emit extra RDF statements.
|
|
11
|
+
#
|
|
12
|
+
# Models that want to emit extra RDF (e.g., direct SKOS alongside reified
|
|
13
|
+
# SKOS-XL) include `EmitsExtraTriples` and override `emit_extra_triples`.
|
|
14
|
+
module Glossarist
|
|
15
|
+
module Rdf
|
|
16
|
+
module EmitsExtraTriples
|
|
17
|
+
def emit_extra_triples(_subject_uri, _mapping)
|
|
18
|
+
[]
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
module LutamlTurtleTransformExt
|
|
23
|
+
def additional_resource_triples(instance, subject_uri, mapping)
|
|
24
|
+
triples = super
|
|
25
|
+
return triples unless instance.is_a?(EmitsExtraTriples)
|
|
26
|
+
|
|
27
|
+
triples + Array(instance.emit_extra_triples(subject_uri, mapping))
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
Lutaml::Turtle::Transform.prepend(Glossarist::Rdf::LutamlTurtleTransformExt)
|
data/lib/glossarist/rdf.rb
CHANGED
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Glossarist
|
|
4
|
+
# Shared protocol for all reference kinds produced by ReferenceExtractor.
|
|
5
|
+
#
|
|
6
|
+
# ConceptReference, BibliographicReference, AssetReference, and the
|
|
7
|
+
# NonVerbalReference family (Figure/Table/Formula) all participate in
|
|
8
|
+
# validation rules that iterate a mixed collection extracted from a
|
|
9
|
+
# concept's text fields. Rules such as CiteRefIntegrityRule call
|
|
10
|
+
# `select(&:cite?)` on these mixed collections and must not crash on
|
|
11
|
+
# any member.
|
|
12
|
+
#
|
|
13
|
+
# The defaults here represent the common case: most reference kinds are
|
|
14
|
+
# neither inline `{{cite:...}}` mentions, nor local/external concept
|
|
15
|
+
# cross-refs. ConceptReference overrides all three predicates because
|
|
16
|
+
# its semantics depend on ref_type and source.
|
|
17
|
+
#
|
|
18
|
+
# Including this module in a new reference class is sufficient to make
|
|
19
|
+
# it participate correctly in mixed-collection validation rules.
|
|
20
|
+
module Reference
|
|
21
|
+
def cite?
|
|
22
|
+
false
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def local?
|
|
26
|
+
false
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def external?
|
|
30
|
+
false
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "rdf/turtle"
|
|
4
|
+
require "shacl"
|
|
5
|
+
require "pathname"
|
|
6
|
+
require "glossarist/validation/shacl_validator"
|
|
7
|
+
|
|
8
|
+
namespace :glossarist do
|
|
9
|
+
desc "Validate all .ttl outputs against concept-model SHACL shapes. " \
|
|
10
|
+
"Pass path=root_dir or shapes=path/to/shapes.ttl."
|
|
11
|
+
task :shacl, [:path] do |_t, args|
|
|
12
|
+
shapes = args[:shapes] || ENV.fetch("SHAPES_PATH", nil)
|
|
13
|
+
root = args[:path] || ENV.fetch("SHACL_PATH", "compiled")
|
|
14
|
+
|
|
15
|
+
files = Pathname.glob("#{root}/**/*.ttl")
|
|
16
|
+
if files.empty?
|
|
17
|
+
warn "No .ttl files found under #{root}"
|
|
18
|
+
exit 1
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
validator = Glossarist::Validation::ShaclValidator.new(shapes_path: shapes)
|
|
22
|
+
report = validator.validate_files(files.map(&:to_s))
|
|
23
|
+
if report.conformant?
|
|
24
|
+
puts "All #{files.length} .ttl file(s) conform to SHACL shapes."
|
|
25
|
+
else
|
|
26
|
+
warn report.to_s
|
|
27
|
+
exit 1
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../tasks/sync_model"
|
|
4
|
+
|
|
5
|
+
namespace :glossarist do
|
|
6
|
+
namespace :sync do
|
|
7
|
+
desc "Sync vendored concept-model data from upstream. " \
|
|
8
|
+
"Pass ref=[tag|branch|sha] to pin a specific version."
|
|
9
|
+
task :model, [:ref] do |_t, args|
|
|
10
|
+
ref = args[:ref] || ENV.fetch("REF", nil)
|
|
11
|
+
Glossarist::Tasks::SyncModel.call(ref: ref)
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
require "fileutils"
|
|
5
|
+
require "net/http"
|
|
6
|
+
require "uri"
|
|
7
|
+
|
|
8
|
+
module Glossarist
|
|
9
|
+
module Tasks
|
|
10
|
+
# Syncs vendored concept-model data artifacts from glossarist/concept-model.
|
|
11
|
+
#
|
|
12
|
+
# concept-model is a data-only repo (TTL, JSON-LD, YAML schemas). It is
|
|
13
|
+
# not a gem. We vendor the small set of artifacts we need (shapes,
|
|
14
|
+
# context, ontology) and load them at runtime via ShaclValidator.
|
|
15
|
+
module SyncModel
|
|
16
|
+
REPO = "glossarist/concept-model"
|
|
17
|
+
OUT_DIR = File.expand_path("data/concept-model", File.join(__dir__, "..", "..", ".."))
|
|
18
|
+
|
|
19
|
+
TARGETS = {
|
|
20
|
+
"glossarist.context.jsonld" => %w[
|
|
21
|
+
ontologies/glossarist.context.jsonld
|
|
22
|
+
glossarist.context.jsonld
|
|
23
|
+
].freeze,
|
|
24
|
+
"glossarist.ttl" => %w[
|
|
25
|
+
ontologies/glossarist.ttl
|
|
26
|
+
glossarist.ttl
|
|
27
|
+
].freeze,
|
|
28
|
+
"shapes/glossarist.shacl.ttl" => %w[
|
|
29
|
+
ontologies/shapes/glossarist.shacl.ttl
|
|
30
|
+
shapes/glossarist.shacl.ttl
|
|
31
|
+
].freeze,
|
|
32
|
+
}.freeze
|
|
33
|
+
|
|
34
|
+
class << self
|
|
35
|
+
def call(ref: nil)
|
|
36
|
+
ref ||= latest_tag
|
|
37
|
+
FileUtils.mkdir_p(File.join(OUT_DIR, "shapes"))
|
|
38
|
+
|
|
39
|
+
TARGETS.each do |out_rel, candidates|
|
|
40
|
+
content = fetch_any(ref, candidates)
|
|
41
|
+
out_path = File.join(OUT_DIR, out_rel)
|
|
42
|
+
FileUtils.mkdir_p(File.dirname(out_path))
|
|
43
|
+
File.write(out_path, content)
|
|
44
|
+
puts " ✓ #{out_rel} (#{content.length} bytes)"
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
write_source_manifest(ref)
|
|
48
|
+
puts "\nSynced #{TARGETS.length} file(s) from #{REPO}@#{ref}."
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
private
|
|
52
|
+
|
|
53
|
+
def latest_tag
|
|
54
|
+
url = URI("https://api.github.com/repos/#{REPO}/releases/latest")
|
|
55
|
+
req = Net::HTTP::Get.new(url)
|
|
56
|
+
req["Accept"] = "application/vnd.github+json"
|
|
57
|
+
JSON.parse(Net::HTTP.start(url.hostname, url.port, use_ssl: true) { |http| http.request(req) }.body)
|
|
58
|
+
.fetch("tag_name")
|
|
59
|
+
rescue StandardError => e
|
|
60
|
+
warn "Could not determine latest concept-model tag: #{e.message}"
|
|
61
|
+
exit 1
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def fetch_any(ref, candidates)
|
|
65
|
+
candidates.each do |path|
|
|
66
|
+
begin
|
|
67
|
+
return fetch_file(ref, path)
|
|
68
|
+
rescue StandardError
|
|
69
|
+
next
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
raise "Could not fetch any of: #{candidates.join(', ')}"
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def fetch_file(ref, path)
|
|
76
|
+
url = URI("https://raw.githubusercontent.com/#{REPO}/#{ref}/#{path}")
|
|
77
|
+
Net::HTTP.get(url)
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def write_source_manifest(ref)
|
|
81
|
+
manifest = {
|
|
82
|
+
"repo" => REPO,
|
|
83
|
+
"ref" => ref,
|
|
84
|
+
"syncedAt" => Time.now.utc.iso8601,
|
|
85
|
+
}
|
|
86
|
+
File.write(File.join(OUT_DIR, "SOURCE.json"),
|
|
87
|
+
JSON.pretty_generate(manifest) + "\n")
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
end
|
|
@@ -122,10 +122,11 @@ module Glossarist
|
|
|
122
122
|
|
|
123
123
|
dd_attrs = if data
|
|
124
124
|
data.class.detailed_definition_fields.to_h do |field|
|
|
125
|
-
|
|
125
|
+
key = field == :definition ? :definitions : field
|
|
126
|
+
[key, build_gloss_definitions(data.public_send(field))]
|
|
126
127
|
end
|
|
127
128
|
else
|
|
128
|
-
{
|
|
129
|
+
{ definitions: [], notes: [], examples: [] }
|
|
129
130
|
end
|
|
130
131
|
|
|
131
132
|
sources = build_gloss_sources(data&.sources)
|