glossarist 2.5.1 → 2.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,6 +5,15 @@ require "fileutils"
5
5
 
6
6
  module Glossarist
7
7
  class GcrPackage
8
+ COMPILED_EXTENSIONS = {
9
+ "tbx" => "tbx.xml",
10
+ "jsonld" => "jsonld",
11
+ "turtle" => "ttl",
12
+ "jsonl" => "jsonl",
13
+ }.freeze
14
+
15
+ KNOWN_COMPILED_FORMATS = COMPILED_EXTENSIONS.keys.freeze
16
+
8
17
  attr_reader :zip_path, :metadata, :concepts
9
18
 
10
19
  def initialize(zip_path)
@@ -13,16 +22,18 @@ module Glossarist
13
22
  @concepts = []
14
23
  end
15
24
 
16
- def self.create(concepts:, metadata:, output_path:, register_data: nil)
25
+ def self.create(concepts:, metadata:, output_path:, register_data: nil,
26
+ compiled_formats: [], **opts)
17
27
  FileUtils.mkdir_p(File.dirname(output_path))
18
28
  package = new(output_path)
19
- package.send(:write, concepts, metadata, register_data)
29
+ package.write(concepts, metadata, register_data,
30
+ compiled_formats: compiled_formats, **opts)
20
31
  package
21
32
  end
22
33
 
23
34
  def self.load(zip_path)
24
35
  package = new(zip_path)
25
- package.send(:read)
36
+ package.read
26
37
  package
27
38
  end
28
39
 
@@ -30,8 +41,14 @@ module Glossarist
30
41
  title: nil, description: nil, owner: nil,
31
42
  tags: [], register_yaml: nil,
32
43
  uri_prefix: nil, concept_uri_template: nil,
33
- streaming: false)
44
+ streaming: false, compiled_formats: [])
34
45
  dir = File.expand_path(dir)
46
+ formats = Array(compiled_formats).map(&:to_s)
47
+
48
+ if streaming && formats.any?
49
+ raise ArgumentError,
50
+ "Compiled formats require batch mode (streaming: true is incompatible)"
51
+ end
35
52
 
36
53
  if streaming
37
54
  create_streaming(dir, output: output, shortname: shortname, version: version,
@@ -44,7 +61,8 @@ module Glossarist
44
61
  title: title, description: description, owner: owner,
45
62
  tags: tags, register_yaml: register_yaml,
46
63
  uri_prefix: uri_prefix,
47
- concept_uri_template: concept_uri_template)
64
+ concept_uri_template: concept_uri_template,
65
+ compiled_formats: formats)
48
66
  end
49
67
  end
50
68
 
@@ -52,9 +70,8 @@ module Glossarist
52
70
  GcrValidator.new.validate(@zip_path)
53
71
  end
54
72
 
55
- private
56
-
57
- def write(concepts, metadata, register_data)
73
+ def write(concepts, metadata, register_data, compiled_formats: [],
74
+ shortname: nil, **opts)
58
75
  Zip::File.open(@zip_path, create: true) do |zf|
59
76
  zf.get_output_stream("metadata.yaml") do |f|
60
77
  f.write(metadata.to_yaml)
@@ -69,6 +86,11 @@ module Glossarist
69
86
  concepts.each do |mc|
70
87
  write_concept(zf, mc)
71
88
  end
89
+
90
+ if compiled_formats.any?
91
+ write_compiled(zf, concepts, compiled_formats, shortname: shortname,
92
+ **opts)
93
+ end
72
94
  end
73
95
  end
74
96
 
@@ -98,10 +120,67 @@ module Glossarist
98
120
  end
99
121
  end
100
122
 
123
+ def write_compiled(zip_file, concepts, formats, shortname: nil, **opts)
124
+ name = shortname || "glossary"
125
+ transform_opts = { shortname: name }.merge(opts.slice(:site_url,
126
+ :uri_prefix, :title))
127
+
128
+ if formats.include?("tbx")
129
+ write_compiled_tbx(zip_file, concepts, transform_opts, name)
130
+ end
131
+
132
+ skos_formats = formats & %w[jsonld turtle jsonl]
133
+ if skos_formats.any?
134
+ write_compiled_skos(zip_file, concepts, skos_formats, transform_opts,
135
+ name)
136
+ end
137
+
138
+ (formats - KNOWN_COMPILED_FORMATS).each do |fmt|
139
+ warn "Warning: Unknown compiled format '#{fmt}', skipping"
140
+ end
141
+ end
142
+
143
+ def write_compiled_tbx(zip_file, concepts, opts, name)
144
+ require "glossarist/transforms/concept_to_tbx_transform"
145
+ doc = Transforms::ConceptToTbxTransform.transform_document(concepts, opts)
146
+ zip_file.get_output_stream("compiled/#{name}.tbx.xml") do |f|
147
+ f.write(doc.to_xml)
148
+ end
149
+ end
150
+
151
+ def write_compiled_skos(zip_file, concepts, formats, opts, name) # rubocop:disable Metrics/MethodLength
152
+ require "glossarist/transforms/concept_to_skos_transform"
153
+ vocab = Transforms::ConceptToSkosTransform.transform_document(concepts,
154
+ opts)
155
+
156
+ if formats.include?("jsonld")
157
+ zip_file.get_output_stream("compiled/#{name}.jsonld") do |f|
158
+ f.write(vocab.to_jsonld)
159
+ end
160
+ end
161
+
162
+ if formats.include?("turtle")
163
+ zip_file.get_output_stream("compiled/#{name}.ttl") do |f|
164
+ f.write(vocab.to_turtle)
165
+ end
166
+ end
167
+
168
+ return unless formats.include?("jsonl")
169
+
170
+ zip_file.get_output_stream("compiled/#{name}.jsonl") do |f|
171
+ concepts.each do |concept|
172
+ skos = Transforms::ConceptToSkosTransform.transform(concept, opts)
173
+ f.write(skos.to_jsonld)
174
+ f.write("\n")
175
+ end
176
+ end
177
+ end
178
+
101
179
  class << self
102
180
  private
103
181
 
104
- def create_batch(dir, output:, shortname:, version:, **opts)
182
+ def create_batch(dir, output:, shortname:, version:,
183
+ compiled_formats: [], **opts)
105
184
  concepts = ConceptCollector.collect(dir)
106
185
  if concepts.empty?
107
186
  raise ArgumentError,
@@ -117,13 +196,17 @@ module Glossarist
117
196
 
118
197
  register_data = load_register_data(opts[:register_yaml], dir)
119
198
  metadata = build_metadata(concepts, shortname: shortname, version: version,
120
- register_data: register_data, **opts)
199
+ register_data: register_data,
200
+ compiled_formats: compiled_formats, **opts)
121
201
 
122
202
  create(
123
203
  concepts: concepts,
124
204
  metadata: metadata,
125
205
  register_data: register_data,
126
206
  output_path: File.expand_path(output),
207
+ compiled_formats: compiled_formats,
208
+ shortname: shortname,
209
+ **opts,
127
210
  )
128
211
  end
129
212
 
@@ -195,7 +278,7 @@ module Glossarist
195
278
  end
196
279
 
197
280
  def build_metadata(concepts, shortname:, version:, register_data: nil,
198
- **opts)
281
+ compiled_formats: [], **opts)
199
282
  GcrMetadata.from_concepts(
200
283
  concepts,
201
284
  register_data: register_data,
@@ -208,6 +291,7 @@ module Glossarist
208
291
  tags: opts[:tags],
209
292
  uri_prefix: opts[:uri_prefix],
210
293
  concept_uri_template: opts[:concept_uri_template],
294
+ compiled_formats: compiled_formats,
211
295
  },
212
296
  )
213
297
  end
@@ -29,7 +29,11 @@ module Glossarist
29
29
  end
30
30
 
31
31
  def self.count_with(l10ns, attr)
32
- l10ns.count { |l| l.data.send(attr)&.any? }
32
+ case attr
33
+ when :definition then l10ns.count { |l| l.data.definition&.any? }
34
+ when :sources then l10ns.count { |l| l.data.sources&.any? }
35
+ else 0
36
+ end
33
37
  end
34
38
  end
35
39
  end
@@ -126,6 +126,16 @@ module Glossarist
126
126
  # Returns concept localization.
127
127
  # @param lang [String] language code
128
128
  # @return [LocalizedConcept]
129
+ def to_jsonld
130
+ require "glossarist/transforms/concept_to_skos_transform"
131
+ Transforms::ConceptToSkosTransform.transform(self).to_jsonld
132
+ end
133
+
134
+ def to_turtle
135
+ require "glossarist/transforms/concept_to_skos_transform"
136
+ Transforms::ConceptToSkosTransform.transform(self).to_turtle
137
+ end
138
+
129
139
  def default_designation
130
140
  localized = localization("eng") || localizations.values.first
131
141
  terms = localized&.preferred_terms&.first || localized&.terms&.first
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Glossarist
4
+ module Rdf
5
+ class LocalizedLiteral < Lutaml::Model::Serializable
6
+ include Lutaml::Rdf::LanguageTagged
7
+
8
+ attribute :value, :string
9
+ attribute :language_code, :string
10
+
11
+ key_value do
12
+ map :value, to: :value
13
+ map :language_code, to: :language_code
14
+ end
15
+
16
+ def language_tag
17
+ language_code
18
+ end
19
+
20
+ def to_s
21
+ value.to_s
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Glossarist
4
+ module Rdf
5
+ module Namespaces
6
+ class DctermsNamespace < Lutaml::Rdf::Namespace
7
+ uri "http://purl.org/dc/terms/"
8
+ prefix "dcterms"
9
+ end
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Glossarist
4
+ module Rdf
5
+ module Namespaces
6
+ class SkosNamespace < Lutaml::Rdf::Namespace
7
+ uri "http://www.w3.org/2004/02/skos/core#"
8
+ prefix "skos"
9
+ end
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,10 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Glossarist
4
+ module Rdf
5
+ module Namespaces
6
+ autoload :SkosNamespace, "#{__dir__}/namespaces/skos_namespace"
7
+ autoload :DctermsNamespace, "#{__dir__}/namespaces/dcterms_namespace"
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "lutaml/turtle"
4
+ require "lutaml/jsonld"
5
+ require_relative "../rdf"
6
+
7
+ module Glossarist
8
+ module Rdf
9
+ class SkosConcept < Lutaml::Model::Serializable
10
+ attribute :code, :string
11
+ attribute :labels, LocalizedLiteral, collection: true
12
+ attribute :definitions, LocalizedLiteral, collection: true
13
+ attribute :alt_labels, LocalizedLiteral, collection: true
14
+ attribute :scope_notes, LocalizedLiteral, collection: true
15
+ attribute :sources, :string, collection: true
16
+ attribute :domain, :string
17
+ attribute :date_accepted, :string
18
+
19
+ rdf do
20
+ namespace Namespaces::SkosNamespace, Namespaces::DctermsNamespace
21
+
22
+ subject { |c| "https://glossarist.org/concept/#{c.code}" }
23
+ type "skos:Concept"
24
+
25
+ predicate :notation, namespace: Namespaces::SkosNamespace,
26
+ to: :code
27
+ predicate :prefLabel, namespace: Namespaces::SkosNamespace,
28
+ to: :labels, lang_tagged: true
29
+ predicate :definition, namespace: Namespaces::SkosNamespace,
30
+ to: :definitions, lang_tagged: true
31
+ predicate :altLabel, namespace: Namespaces::SkosNamespace,
32
+ to: :alt_labels, lang_tagged: true
33
+ predicate :scopeNote, namespace: Namespaces::SkosNamespace,
34
+ to: :scope_notes, lang_tagged: true
35
+ predicate :subject, namespace: Namespaces::DctermsNamespace,
36
+ to: :domain
37
+ predicate :source, namespace: Namespaces::DctermsNamespace,
38
+ to: :sources
39
+ predicate :dateAccepted, namespace: Namespaces::DctermsNamespace,
40
+ to: :date_accepted
41
+ end
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "lutaml/turtle"
4
+ require "lutaml/jsonld"
5
+ require_relative "../rdf"
6
+
7
+ module Glossarist
8
+ module Rdf
9
+ class SkosVocabulary < Lutaml::Model::Serializable
10
+ attribute :id, :string
11
+ attribute :title, :string
12
+ attribute :concepts, SkosConcept, collection: true
13
+
14
+ rdf do
15
+ namespace Namespaces::SkosNamespace, Namespaces::DctermsNamespace
16
+
17
+ subject { |v| "https://glossarist.org/vocab/#{v.id}" }
18
+ type "skos:ConceptScheme"
19
+
20
+ predicate :prefLabel, namespace: Namespaces::SkosNamespace, to: :title
21
+
22
+ members :concepts
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,10 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Glossarist
4
+ module Rdf
5
+ autoload :Namespaces, "#{__dir__}/rdf/namespaces"
6
+ autoload :LocalizedLiteral, "#{__dir__}/rdf/localized_literal"
7
+ autoload :SkosConcept, "#{__dir__}/rdf/skos_concept"
8
+ autoload :SkosVocabulary, "#{__dir__}/rdf/skos_vocabulary"
9
+ end
10
+ end
@@ -41,9 +41,7 @@ module Glossarist
41
41
  def designations_for(concept)
42
42
  if concept.is_a?(ManagedConcept)
43
43
  concept.localizations.flat_map do |l10n|
44
- l10n.data.terms.filter_map do |t|
45
- t.respond_to?(:designation) ? t.designation : nil
46
- end
44
+ l10n.data.terms.filter_map(&:designation)
47
45
  end
48
46
  else
49
47
  concept.each_value.flat_map do |lang_block|
@@ -0,0 +1,133 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../rdf"
4
+
5
+ module Glossarist
6
+ module Transforms
7
+ class ConceptToSkosTransform
8
+ def self.transform(managed_concept, options = {})
9
+ new(managed_concept, options).build
10
+ end
11
+
12
+ def self.transform_document(concepts, options = {})
13
+ Rdf::SkosVocabulary.new(
14
+ id: options[:shortname] || "glossary",
15
+ title: options[:title],
16
+ concepts: concepts.map { |c| transform(c, options) },
17
+ )
18
+ end
19
+
20
+ def initialize(managed_concept, options = {})
21
+ @concept = managed_concept
22
+ @options = options
23
+ end
24
+
25
+ def build
26
+ Rdf::SkosConcept.new(
27
+ code: concept_code,
28
+ labels: build_labels,
29
+ definitions: build_definitions,
30
+ alt_labels: build_alt_labels,
31
+ scope_notes: build_scope_notes,
32
+ domain: build_domain,
33
+ sources: build_sources,
34
+ date_accepted: build_date_accepted,
35
+ )
36
+ end
37
+
38
+ private
39
+
40
+ attr_reader :concept, :options
41
+
42
+ def concept_code
43
+ concept.data&.id || concept.identifier
44
+ end
45
+
46
+ def build_labels
47
+ each_localization.filter_map do |lang, l10n|
48
+ term = l10n.preferred_terms&.first || l10n.terms&.first
49
+ next unless term
50
+
51
+ Rdf::LocalizedLiteral.new(
52
+ value: term.designation.to_s,
53
+ language_code: lang,
54
+ )
55
+ end
56
+ end
57
+
58
+ def build_alt_labels
59
+ each_localization.flat_map do |lang, l10n|
60
+ preferred_term = l10n.preferred_terms&.first || l10n.terms&.first
61
+ (l10n.terms || []).reject do |t|
62
+ t == preferred_term
63
+ end.filter_map do |term|
64
+ next unless term.designation
65
+
66
+ Rdf::LocalizedLiteral.new(
67
+ value: term.designation.to_s,
68
+ language_code: lang,
69
+ )
70
+ end
71
+ end
72
+ end
73
+
74
+ def build_definitions
75
+ each_localization.filter_map do |lang, l10n|
76
+ content = l10n.data&.definition&.first&.content
77
+ next unless content
78
+
79
+ Rdf::LocalizedLiteral.new(
80
+ value: content.to_s,
81
+ language_code: lang,
82
+ )
83
+ end
84
+ end
85
+
86
+ def build_scope_notes
87
+ each_localization.filter_map do |lang, l10n|
88
+ note = l10n.data&.notes&.first&.content
89
+ next unless note
90
+
91
+ Rdf::LocalizedLiteral.new(
92
+ value: note.to_s,
93
+ language_code: lang,
94
+ )
95
+ end
96
+ end
97
+
98
+ def build_domain
99
+ l10n = concept.localizations.first
100
+ l10n&.data&.domain
101
+ end
102
+
103
+ def build_sources
104
+ each_localization.flat_map do |_lang, l10n|
105
+ Array(l10n.data&.sources).select(&:authoritative?).filter_map do |src|
106
+ origin = src.origin
107
+ next unless origin
108
+
109
+ origin.ref || origin.text
110
+ end
111
+ end.uniq
112
+ end
113
+
114
+ def build_date_accepted
115
+ date = concept.date_accepted
116
+ return unless date
117
+
118
+ date.date&.iso8601
119
+ end
120
+
121
+ def each_localization
122
+ return enum_for(:each_localization) unless block_given?
123
+
124
+ concept.localizations.each do |l10n|
125
+ lang = l10n.language_code || l10n.data&.language_code
126
+ next unless lang
127
+
128
+ yield lang, l10n
129
+ end
130
+ end
131
+ end
132
+ end
133
+ end
@@ -0,0 +1,86 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "tbx"
4
+
5
+ module Glossarist
6
+ module Transforms
7
+ class ConceptToTbxTransform
8
+ def self.transform(managed_concept, options = {})
9
+ new(managed_concept, options).build_entry
10
+ end
11
+
12
+ def self.transform_document(concepts, options = {})
13
+ doc = Tbx::Document.new
14
+ body = Tbx::Body.new
15
+ body.concept_entry = concepts.map { |c| transform(c, options) }
16
+ text = Tbx::TextElement.new
17
+ text.body = body
18
+ doc.text = text
19
+
20
+ if options[:title]
21
+ header = doc.tbx_header || Tbx::TbxHeader.new
22
+ file_desc = Tbx::FileDesc.new
23
+ title_stmt = Tbx::TitleStmt.new
24
+ title = Tbx::Title.new
25
+ title.content = options[:title]
26
+ title_stmt.title = title
27
+ file_desc.title_stmt = title_stmt
28
+ header.file_desc = file_desc
29
+ doc.tbx_header = header
30
+ end
31
+
32
+ doc
33
+ end
34
+
35
+ def initialize(managed_concept, options = {})
36
+ @concept = managed_concept
37
+ @options = options
38
+ end
39
+
40
+ def build_entry
41
+ entry = Tbx::ConceptEntry.new
42
+ entry.id = concept_id
43
+ entry.lang_sec = build_lang_sections
44
+ entry
45
+ end
46
+
47
+ private
48
+
49
+ attr_reader :concept, :options
50
+
51
+ def concept_id
52
+ prefix = options[:shortname]
53
+ id = concept.data&.id || concept.identifier
54
+ prefix ? "#{prefix}_#{id}" : id.to_s
55
+ end
56
+
57
+ def build_lang_sections
58
+ concept.localizations.filter_map do |l10n|
59
+ lang = l10n.language_code
60
+ next unless lang
61
+
62
+ ls = Tbx::LangSec.new
63
+ ls.lang = lang
64
+
65
+ term = l10n.preferred_terms&.first || l10n.terms&.first
66
+ if term&.designation
67
+ ts = Tbx::TermSec.new
68
+ t = Tbx::Term.new
69
+ t.content = term.designation.to_s
70
+ ts.term = t
71
+ ls.term_sec = ts
72
+ end
73
+
74
+ definition = l10n.data&.definition&.first&.content
75
+ if definition
76
+ ds = Tbx::Descrip.new
77
+ ds.content = definition.to_s
78
+ ls.descrip = ds
79
+ end
80
+
81
+ ls
82
+ end
83
+ end
84
+ end
85
+ end
86
+ end
@@ -0,0 +1,10 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Glossarist
4
+ module Transforms
5
+ autoload :ConceptToSkosTransform,
6
+ "glossarist/transforms/concept_to_skos_transform"
7
+ autoload :ConceptToTbxTransform,
8
+ "glossarist/transforms/concept_to_tbx_transform"
9
+ end
10
+ end
@@ -4,5 +4,5 @@
4
4
  #
5
5
 
6
6
  module Glossarist
7
- VERSION = "2.5.1"
7
+ VERSION = "2.6.0"
8
8
  end
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file