glossarist 2.6.5 → 2.6.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/release.yml +1 -4
- data/.rubocop_todo.yml +53 -2
- data/CLAUDE.md +27 -2
- data/README.adoc +532 -56
- data/config.yml +68 -1
- data/glossarist.gemspec +2 -0
- data/lib/glossarist/citation.rb +26 -123
- data/lib/glossarist/cli/compare_command.rb +106 -0
- data/lib/glossarist/cli/export_command.rb +11 -14
- data/lib/glossarist/cli/validate_command.rb +111 -20
- data/lib/glossarist/cli.rb +18 -0
- data/lib/glossarist/collections/bibliography_collection.rb +4 -2
- data/lib/glossarist/collections/localization_collection.rb +2 -0
- data/lib/glossarist/comparison_result.rb +35 -0
- data/lib/glossarist/concept.rb +1 -1
- data/lib/glossarist/concept_collector.rb +44 -0
- data/lib/glossarist/concept_comparator.rb +72 -0
- data/lib/glossarist/concept_data.rb +20 -0
- data/lib/glossarist/concept_diff.rb +15 -0
- data/lib/glossarist/concept_document.rb +11 -0
- data/lib/glossarist/concept_manager.rb +19 -5
- data/lib/glossarist/concept_ref.rb +13 -0
- data/lib/glossarist/concept_reference.rb +12 -19
- data/lib/glossarist/concept_validator.rb +6 -1
- data/lib/glossarist/context_configuration.rb +90 -0
- data/lib/glossarist/dataset_validator.rb +8 -4
- data/lib/glossarist/designation/abbreviation.rb +0 -2
- data/lib/glossarist/designation/base.rb +21 -1
- data/lib/glossarist/designation/expression.rb +3 -0
- data/lib/glossarist/designation/letter_symbol.rb +0 -4
- data/lib/glossarist/designation/prefix.rb +17 -0
- data/lib/glossarist/designation/suffix.rb +17 -0
- data/lib/glossarist/designation/symbol.rb +0 -2
- data/lib/glossarist/gcr_metadata.rb +7 -14
- data/lib/glossarist/gcr_package.rb +35 -23
- data/lib/glossarist/gcr_validator.rb +38 -17
- data/lib/glossarist/glossary_definition.rb +5 -0
- data/lib/glossarist/localized_concept.rb +8 -0
- data/lib/glossarist/managed_concept.rb +39 -6
- data/lib/glossarist/managed_concept_data.rb +22 -2
- data/lib/glossarist/non_verb_rep.rb +21 -6
- data/lib/glossarist/pronunciation.rb +32 -0
- data/lib/glossarist/rdf/ext/jsonld_transform_ext.rb +208 -0
- data/lib/glossarist/rdf/ext/mapping_ext.rb +37 -0
- data/lib/glossarist/rdf/ext/mapping_rule_ext.rb +27 -0
- data/lib/glossarist/rdf/ext/member_rule_ext.rb +34 -0
- data/lib/glossarist/rdf/ext/turtle_transform_ext.rb +222 -0
- data/lib/glossarist/rdf/ext.rb +39 -0
- data/lib/glossarist/rdf/gloss_citation.rb +36 -0
- data/lib/glossarist/rdf/gloss_concept.rb +58 -0
- data/lib/glossarist/rdf/gloss_concept_date.rb +24 -0
- data/lib/glossarist/rdf/gloss_concept_reference.rb +29 -0
- data/lib/glossarist/rdf/gloss_concept_source.rb +37 -0
- data/lib/glossarist/rdf/gloss_designation.rb +146 -0
- data/lib/glossarist/rdf/gloss_detailed_definition.rb +24 -0
- data/lib/glossarist/rdf/gloss_grammar_info.rb +57 -0
- data/lib/glossarist/rdf/gloss_locality.rb +25 -0
- data/lib/glossarist/rdf/gloss_localized_concept.rb +67 -0
- data/lib/glossarist/rdf/gloss_non_verbal_rep.rb +31 -0
- data/lib/glossarist/rdf/gloss_pronunciation.rb +32 -0
- data/lib/glossarist/rdf/gloss_reference.rb +55 -0
- data/lib/glossarist/rdf/namespaces/glossarist_namespace.rb +12 -0
- data/lib/glossarist/rdf/namespaces/iso_thes_namespace.rb +12 -0
- data/lib/glossarist/rdf/namespaces/owl_namespace.rb +12 -0
- data/lib/glossarist/rdf/namespaces/prov_namespace.rb +12 -0
- data/lib/glossarist/rdf/namespaces/rdf_namespace.rb +12 -0
- data/lib/glossarist/rdf/namespaces/skosxl_namespace.rb +12 -0
- data/lib/glossarist/rdf/namespaces.rb +8 -2
- data/lib/glossarist/rdf/relationships.rb +19 -0
- data/lib/glossarist/rdf/v3/configuration.rb +15 -0
- data/lib/glossarist/rdf/v3.rb +79 -0
- data/lib/glossarist/rdf.rb +22 -2
- data/lib/glossarist/reference_extractor.rb +15 -24
- data/lib/glossarist/reference_resolver.rb +3 -3
- data/lib/glossarist/related_concept.rb +2 -10
- data/lib/glossarist/schema_migration.rb +39 -0
- data/lib/glossarist/sts/term_mapper.rb +2 -2
- data/lib/glossarist/transforms/concept_to_gloss_transform.rb +355 -0
- data/lib/glossarist/transforms.rb +2 -2
- data/lib/glossarist/urn_resolver.rb +13 -1
- data/lib/glossarist/v1/concept.rb +18 -11
- data/lib/glossarist/v2/citation.rb +36 -0
- data/lib/glossarist/v2/concept_data.rb +46 -0
- data/lib/glossarist/v2/concept_document.rb +18 -0
- data/lib/glossarist/v2/concept_ref.rb +8 -0
- data/lib/glossarist/v2/concept_source.rb +16 -0
- data/lib/glossarist/v2/configuration.rb +13 -0
- data/lib/glossarist/v2/detailed_definition.rb +14 -0
- data/lib/glossarist/v2/localized_concept.rb +9 -0
- data/lib/glossarist/v2/managed_concept.rb +25 -0
- data/lib/glossarist/v2/managed_concept_data.rb +49 -0
- data/lib/glossarist/v2/related_concept.rb +15 -0
- data/lib/glossarist/v2.rb +28 -0
- data/lib/glossarist/v3/bibliography_entry.rb +19 -0
- data/lib/glossarist/v3/bibliography_file.rb +27 -0
- data/lib/glossarist/v3/citation.rb +30 -0
- data/lib/glossarist/v3/concept_data.rb +46 -0
- data/lib/glossarist/v3/concept_document.rb +18 -0
- data/lib/glossarist/v3/concept_ref.rb +8 -0
- data/lib/glossarist/v3/concept_source.rb +16 -0
- data/lib/glossarist/v3/configuration.rb +13 -0
- data/lib/glossarist/v3/detailed_definition.rb +14 -0
- data/lib/glossarist/v3/image_entry.rb +21 -0
- data/lib/glossarist/v3/image_file.rb +31 -0
- data/lib/glossarist/v3/localized_concept.rb +9 -0
- data/lib/glossarist/v3/managed_concept.rb +26 -0
- data/lib/glossarist/v3/managed_concept_data.rb +34 -0
- data/lib/glossarist/v3/related_concept.rb +15 -0
- data/lib/glossarist/v3.rb +36 -0
- data/lib/glossarist/validation/asset_index.rb +4 -3
- data/lib/glossarist/validation/bibliography_index.rb +61 -30
- data/lib/glossarist/validation/rules/asciidoc_xref_rule.rb +2 -15
- data/lib/glossarist/validation/rules/authoritative_source_rule.rb +2 -15
- data/lib/glossarist/validation/rules/base.rb +5 -0
- data/lib/glossarist/validation/rules/bibliography_yaml_rule.rb +2 -3
- data/lib/glossarist/validation/rules/citation_completeness_rule.rb +5 -27
- data/lib/glossarist/validation/rules/dataset_context.rb +8 -3
- data/lib/glossarist/validation/rules/date_validity_rule.rb +1 -1
- data/lib/glossarist/validation/rules/designation_status_rule.rb +0 -1
- data/lib/glossarist/validation/rules/designation_type_rule.rb +1 -5
- data/lib/glossarist/validation/rules/domain_ref_rule.rb +37 -0
- data/lib/glossarist/validation/rules/domain_target_rule.rb +56 -0
- data/lib/glossarist/validation/rules/gcr_context.rb +12 -13
- data/lib/glossarist/validation/rules/image_reference_rule.rb +2 -17
- data/lib/glossarist/validation/rules/locality_completeness_rule.rb +58 -0
- data/lib/glossarist/validation/rules/localization_consistency_rule.rb +72 -0
- data/lib/glossarist/validation/rules/localization_presence_rule.rb +1 -1
- data/lib/glossarist/validation/rules/model_validity_rule.rb +71 -0
- data/lib/glossarist/validation/rules/orphaned_bibliography_rule.rb +1 -13
- data/lib/glossarist/validation/rules/orphaned_images_rule.rb +16 -11
- data/lib/glossarist/validation/rules/ref_shape_rule.rb +68 -0
- data/lib/glossarist/validation/rules/related_concept_cycle_rule.rb +1 -3
- data/lib/glossarist/validation/rules/related_concept_symmetry_rule.rb +1 -3
- data/lib/glossarist/validation/rules/related_concept_target_rule.rb +64 -0
- data/lib/glossarist/validation/rules/schema_version_rule.rb +41 -0
- data/lib/glossarist/validation/rules/source_type_rule.rb +1 -15
- data/lib/glossarist/validation/rules/source_urn_format_rule.rb +65 -0
- data/lib/glossarist/validation/rules/uuid_format_rule.rb +33 -0
- data/lib/glossarist/validation/rules.rb +10 -43
- data/lib/glossarist/validation/validation_issue.rb +14 -11
- data/lib/glossarist/validation_result.rb +12 -22
- data/lib/glossarist/version.rb +1 -1
- data/lib/glossarist.rb +10 -0
- data/memory/project-status.md +43 -0
- data/scripts/migrate_dataset.rb +180 -0
- data/scripts/migrate_isotc204_to_v3.rb +134 -0
- data/scripts/migrate_isotc211_to_v3.rb +153 -0
- data/scripts/migrate_osgeo_to_v3.rb +155 -0
- data/scripts/upgrade_dataset_to_v3.rb +47 -0
- metadata +112 -6
- data/TODO.integration/01-gcr-package-cli.md +0 -180
- data/lib/glossarist/rdf/skos_concept.rb +0 -43
- data/lib/glossarist/rdf/skos_vocabulary.rb +0 -25
- data/lib/glossarist/transforms/concept_to_skos_transform.rb +0 -131
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Glossarist
|
|
4
|
+
class ComparisonResult < Lutaml::Model::Serializable
|
|
5
|
+
attribute :new_count, :integer
|
|
6
|
+
attribute :old_count, :integer
|
|
7
|
+
attribute :matched, :string, collection: true, initialize_empty: true
|
|
8
|
+
attribute :new_only, :string, collection: true, initialize_empty: true
|
|
9
|
+
attribute :old_only, :string, collection: true, initialize_empty: true
|
|
10
|
+
attribute :diffs, ConceptDiff, collection: true, initialize_empty: true
|
|
11
|
+
|
|
12
|
+
key_value do
|
|
13
|
+
map :new_count, to: :new_count
|
|
14
|
+
map :old_count, to: :old_count
|
|
15
|
+
map :matched, to: :matched
|
|
16
|
+
map :new_only, to: :new_only
|
|
17
|
+
map :old_only, to: :old_only
|
|
18
|
+
map :diffs, to: :diffs
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def summary
|
|
22
|
+
diff = new_count - old_count
|
|
23
|
+
change = if diff.positive?
|
|
24
|
+
"+#{diff} new"
|
|
25
|
+
elsif diff.negative?
|
|
26
|
+
"#{diff.abs} removed"
|
|
27
|
+
else
|
|
28
|
+
"no change"
|
|
29
|
+
end
|
|
30
|
+
"#{new_count} new, #{old_count} old (#{change}), " \
|
|
31
|
+
"#{matched.length} matched, #{new_only.length} new-only, " \
|
|
32
|
+
"#{old_only.length} old-only"
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
data/lib/glossarist/concept.rb
CHANGED
|
@@ -4,7 +4,7 @@ module Glossarist
|
|
|
4
4
|
attribute :id, :string
|
|
5
5
|
attribute :uuid, :string
|
|
6
6
|
attribute :subject, :string
|
|
7
|
-
attribute :non_verb_rep, :
|
|
7
|
+
attribute :non_verb_rep, NonVerbRep, collection: true
|
|
8
8
|
attribute :extension_attributes, :string
|
|
9
9
|
attribute :lineage_source, :string
|
|
10
10
|
attribute :localizations, :hash
|
|
@@ -35,9 +35,37 @@ module Glossarist
|
|
|
35
35
|
end
|
|
36
36
|
end
|
|
37
37
|
|
|
38
|
+
def self.count(dir)
|
|
39
|
+
dir = File.expand_path(dir)
|
|
40
|
+
return 0 unless File.directory?(dir)
|
|
41
|
+
|
|
42
|
+
if managed_concepts?(dir)
|
|
43
|
+
Dir.glob(File.join(dir, "concepts", "concept", "*.yaml")).length
|
|
44
|
+
elsif v2_concepts?(dir)
|
|
45
|
+
count_v2(dir)
|
|
46
|
+
elsif v1_concepts?(dir)
|
|
47
|
+
Dir.glob(File.join(dir, "concepts", "*.yaml")).length
|
|
48
|
+
else
|
|
49
|
+
0
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
|
|
38
53
|
class << self
|
|
39
54
|
private
|
|
40
55
|
|
|
56
|
+
def count_v2(dir)
|
|
57
|
+
if v2_flat_concepts?(dir)
|
|
58
|
+
Dir.glob(File.join(dir, "concepts", "*.yaml")).length
|
|
59
|
+
else
|
|
60
|
+
v2_dir = File.join(dir, "geolexica-v2")
|
|
61
|
+
if File.directory?(File.join(v2_dir, "concepts"))
|
|
62
|
+
Dir.glob(File.join(v2_dir, "concepts", "concept", "*.yaml")).length
|
|
63
|
+
else
|
|
64
|
+
Dir.glob(File.join(v2_dir, "*.yaml")).length
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
|
|
41
69
|
def v1_concepts?(dir)
|
|
42
70
|
concepts_dir = File.join(dir, "concepts")
|
|
43
71
|
File.directory?(concepts_dir) &&
|
|
@@ -124,6 +152,7 @@ module Glossarist
|
|
|
124
152
|
def each_grouped_v2_concepts(v2_dir, &block)
|
|
125
153
|
collection = ManagedConceptCollection.new
|
|
126
154
|
manager = ConceptManager.new(path: v2_dir)
|
|
155
|
+
manager.version = detect_schema_version(v2_dir)
|
|
127
156
|
manager.load_from_files(collection: collection)
|
|
128
157
|
collection.each(&block)
|
|
129
158
|
end
|
|
@@ -131,6 +160,7 @@ module Glossarist
|
|
|
131
160
|
def collect_grouped_v2_concepts(v2_dir)
|
|
132
161
|
collection = ManagedConceptCollection.new
|
|
133
162
|
manager = ConceptManager.new(path: v2_dir)
|
|
163
|
+
manager.version = detect_schema_version(v2_dir)
|
|
134
164
|
manager.load_from_files(collection: collection)
|
|
135
165
|
collection.to_a
|
|
136
166
|
end
|
|
@@ -178,6 +208,20 @@ module Glossarist
|
|
|
178
208
|
end
|
|
179
209
|
nil
|
|
180
210
|
end
|
|
211
|
+
|
|
212
|
+
def detect_schema_version(dir)
|
|
213
|
+
concepts_dir = File.join(dir, "concepts")
|
|
214
|
+
search_dir = File.directory?(concepts_dir) ? concepts_dir : dir
|
|
215
|
+
sample = Dir.glob(File.join(search_dir, "*.yaml")).first
|
|
216
|
+
return "2" unless sample
|
|
217
|
+
|
|
218
|
+
raw = File.read(sample, encoding: "utf-8")
|
|
219
|
+
doc = ConceptDocument.from_yamls(raw)
|
|
220
|
+
ver = doc.concept&.schema_version.to_s
|
|
221
|
+
ver == "3" ? "3" : "2"
|
|
222
|
+
rescue StandardError
|
|
223
|
+
"2"
|
|
224
|
+
end
|
|
181
225
|
end
|
|
182
226
|
end
|
|
183
227
|
end
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Glossarist
|
|
4
|
+
class ConceptComparator
|
|
5
|
+
def initialize(new_concepts:, old_concepts:)
|
|
6
|
+
@new_concepts = new_concepts
|
|
7
|
+
@old_concepts = old_concepts
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
def compare(show_diffs: true)
|
|
11
|
+
new_index = build_index(@new_concepts)
|
|
12
|
+
old_index = build_index(@old_concepts)
|
|
13
|
+
|
|
14
|
+
matched_ids = new_index.keys & old_index.keys
|
|
15
|
+
new_only_ids = new_index.keys - old_index.keys
|
|
16
|
+
old_only_ids = old_index.keys - new_index.keys
|
|
17
|
+
|
|
18
|
+
diffs = if show_diffs
|
|
19
|
+
compute_diffs(matched_ids, new_index, old_index)
|
|
20
|
+
else
|
|
21
|
+
[]
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
ComparisonResult.new(
|
|
25
|
+
new_count: @new_concepts.length,
|
|
26
|
+
old_count: @old_concepts.length,
|
|
27
|
+
matched: matched_ids.sort,
|
|
28
|
+
new_only: new_only_ids.sort,
|
|
29
|
+
old_only: old_only_ids.sort,
|
|
30
|
+
diffs: diffs,
|
|
31
|
+
)
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
private
|
|
35
|
+
|
|
36
|
+
def build_index(concepts)
|
|
37
|
+
concepts.each_with_object({}) do |concept, index|
|
|
38
|
+
id = extract_id(concept)
|
|
39
|
+
index[id] = concept if id
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def extract_id(concept)
|
|
44
|
+
concept.data&.id || concept.id
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def compute_diffs(matched_ids, new_index, old_index)
|
|
48
|
+
matched_ids.filter_map do |id|
|
|
49
|
+
new_concept = new_index[id]
|
|
50
|
+
old_concept = old_index[id]
|
|
51
|
+
|
|
52
|
+
score, tree = Lutaml::Model::Serialize.diff_with_score(
|
|
53
|
+
new_concept, old_concept,
|
|
54
|
+
show_unchanged: false,
|
|
55
|
+
highlight_diff: false,
|
|
56
|
+
indent: ""
|
|
57
|
+
)
|
|
58
|
+
similarity = ((1 - score) * 100).round(2)
|
|
59
|
+
|
|
60
|
+
ConceptDiff.new(
|
|
61
|
+
concept_id: id,
|
|
62
|
+
similarity: similarity,
|
|
63
|
+
diff_tree: strip_ansi(tree),
|
|
64
|
+
)
|
|
65
|
+
end.sort_by { |d| -d.similarity }
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def strip_ansi(text)
|
|
69
|
+
text.gsub(/\e\[[0-9;]*m/, "")
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
end
|
|
@@ -30,6 +30,8 @@ module Glossarist
|
|
|
30
30
|
# Language code should be exactly 3 char long.
|
|
31
31
|
# TODO: use min_length, max_length once added in lutaml-model
|
|
32
32
|
attribute :language_code, :string, pattern: /^.{3}$/
|
|
33
|
+
attribute :script, :string
|
|
34
|
+
attribute :system, :string
|
|
33
35
|
attribute :entry_status, :string
|
|
34
36
|
|
|
35
37
|
key_value do
|
|
@@ -48,6 +50,8 @@ module Glossarist
|
|
|
48
50
|
map :references, to: :references
|
|
49
51
|
map :domain, to: :domain
|
|
50
52
|
map %i[language_code languageCode], to: :language_code
|
|
53
|
+
map :script, to: :script
|
|
54
|
+
map :system, to: :system
|
|
51
55
|
map %i[entry_status entryStatus], to: :entry_status
|
|
52
56
|
map %i[review_date reviewDate], to: :review_date
|
|
53
57
|
map %i[review_decision_date reviewDecisionDate], to: :review_decision_date
|
|
@@ -74,5 +78,21 @@ module Glossarist
|
|
|
74
78
|
|
|
75
79
|
sources.select(&:authoritative?)
|
|
76
80
|
end
|
|
81
|
+
|
|
82
|
+
def all_sources
|
|
83
|
+
list = sources.to_a
|
|
84
|
+
definition.each { |d| list.concat(d.sources.to_a) }
|
|
85
|
+
notes.each { |n| list.concat(n.sources.to_a) }
|
|
86
|
+
examples.each { |e| list.concat(e.sources.to_a) }
|
|
87
|
+
list
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
def text_content
|
|
91
|
+
texts = []
|
|
92
|
+
definition.each { |d| texts << d.content if d.content }
|
|
93
|
+
notes.each { |n| texts << n.content if n.content }
|
|
94
|
+
examples.each { |e| texts << e.content if e.content }
|
|
95
|
+
texts
|
|
96
|
+
end
|
|
77
97
|
end
|
|
78
98
|
end
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Glossarist
|
|
4
|
+
class ConceptDiff < Lutaml::Model::Serializable
|
|
5
|
+
attribute :concept_id, :string
|
|
6
|
+
attribute :similarity, :float
|
|
7
|
+
attribute :diff_tree, :string
|
|
8
|
+
|
|
9
|
+
key_value do
|
|
10
|
+
map :concept_id, to: :concept_id
|
|
11
|
+
map :similarity, to: :similarity
|
|
12
|
+
map :diff_tree, to: :diff_tree
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
@@ -20,6 +20,17 @@ module Glossarist
|
|
|
20
20
|
)
|
|
21
21
|
end
|
|
22
22
|
|
|
23
|
+
def self.for_version(version)
|
|
24
|
+
case version.to_s
|
|
25
|
+
when "2"
|
|
26
|
+
require_relative "v2"
|
|
27
|
+
V2::ConceptDocument
|
|
28
|
+
else
|
|
29
|
+
require_relative "v3"
|
|
30
|
+
V3::ConceptDocument
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
|
|
23
34
|
def to_managed_concept
|
|
24
35
|
mc = concept
|
|
25
36
|
localizations.each { |l10n| mc.add_localization(l10n) }
|
|
@@ -2,6 +2,7 @@ module Glossarist
|
|
|
2
2
|
class ConceptManager < Lutaml::Model::Serializable
|
|
3
3
|
attribute :path, :string
|
|
4
4
|
attribute :localized_concepts_path, :string
|
|
5
|
+
attribute :version, :string, default: -> { "2" }
|
|
5
6
|
|
|
6
7
|
key_value do
|
|
7
8
|
map :path, to: :path
|
|
@@ -9,6 +10,19 @@ module Glossarist
|
|
|
9
10
|
to: :localized_concepts_path
|
|
10
11
|
end
|
|
11
12
|
|
|
13
|
+
def concept_document_class
|
|
14
|
+
ConceptDocument.for_version(version)
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def localized_concept_class
|
|
18
|
+
if version.to_s == "2"
|
|
19
|
+
require_relative "v2"
|
|
20
|
+
V2::LocalizedConcept
|
|
21
|
+
else
|
|
22
|
+
LocalizedConcept
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
|
|
12
26
|
def load_from_files(collection: nil)
|
|
13
27
|
collection ||= ManagedConceptCollection.new
|
|
14
28
|
|
|
@@ -35,7 +49,7 @@ module Glossarist
|
|
|
35
49
|
|
|
36
50
|
def load_concept_from_file(filename) # rubocop:disable Metrics/CyclomaticComplexity
|
|
37
51
|
raw = File.read(filename, encoding: "utf-8")
|
|
38
|
-
doc =
|
|
52
|
+
doc = concept_document_class.from_yamls(raw)
|
|
39
53
|
concept = doc.concept
|
|
40
54
|
unless concept
|
|
41
55
|
raise Glossarist::ParseError.new(filename: filename)
|
|
@@ -44,7 +58,7 @@ module Glossarist
|
|
|
44
58
|
concept_uuid = concept.identifier || concept.data&.id || File.basename(
|
|
45
59
|
filename, ".*"
|
|
46
60
|
)
|
|
47
|
-
concept.
|
|
61
|
+
concept.uuid = concept_uuid
|
|
48
62
|
|
|
49
63
|
concept.data.localized_concepts.each_value do |id|
|
|
50
64
|
localized_concept = load_localized_concept(id, doc.localizations)
|
|
@@ -60,15 +74,15 @@ module Glossarist
|
|
|
60
74
|
if inline_localizations
|
|
61
75
|
l10n = inline_localizations.find { |l| l.id == id }
|
|
62
76
|
if l10n
|
|
63
|
-
l10n.
|
|
77
|
+
l10n.uuid = id
|
|
64
78
|
return l10n
|
|
65
79
|
end
|
|
66
80
|
end
|
|
67
81
|
|
|
68
|
-
l10n =
|
|
82
|
+
l10n = localized_concept_class.from_yaml(
|
|
69
83
|
File.read(localized_concept_path(id), encoding: "utf-8"),
|
|
70
84
|
)
|
|
71
|
-
l10n.
|
|
85
|
+
l10n.uuid = id
|
|
72
86
|
l10n
|
|
73
87
|
rescue Psych::SyntaxError => e
|
|
74
88
|
raise Glossarist::ParseError.new(filename: filename, line: e.line)
|
|
@@ -1,17 +1,29 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
module Glossarist
|
|
4
|
+
# A typed reference to another concept, either local (within the same
|
|
5
|
+
# glossary) or external (in another concept registry).
|
|
6
|
+
#
|
|
7
|
+
# Local references use +concept_id+ without +source+. External references
|
|
8
|
+
# use +source+ (a registry URN prefix) and +concept_id+ to identify the
|
|
9
|
+
# target concept, or a direct +urn+ field for formal URN references.
|
|
4
10
|
class ConceptReference < Lutaml::Model::Serializable
|
|
5
11
|
attribute :term, :string
|
|
6
12
|
attribute :concept_id, :string
|
|
7
13
|
attribute :source, :string
|
|
8
14
|
attribute :ref_type, :string
|
|
15
|
+
attribute :urn, :string
|
|
9
16
|
|
|
10
17
|
key_value do
|
|
11
18
|
map :term, to: :term
|
|
12
19
|
map :concept_id, to: :concept_id
|
|
13
20
|
map :source, to: :source
|
|
14
21
|
map :ref_type, to: :ref_type
|
|
22
|
+
map :urn, to: :urn
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def self.domain(concept_id)
|
|
26
|
+
new(concept_id: concept_id, ref_type: "domain")
|
|
15
27
|
end
|
|
16
28
|
|
|
17
29
|
def local?
|
|
@@ -23,25 +35,6 @@ module Glossarist
|
|
|
23
35
|
!local?
|
|
24
36
|
end
|
|
25
37
|
|
|
26
|
-
def to_urn
|
|
27
|
-
return nil unless external?
|
|
28
|
-
return nil unless source && concept_id
|
|
29
|
-
|
|
30
|
-
case source
|
|
31
|
-
when /\Aurn:iec/ then "#{source}-#{concept_id}"
|
|
32
|
-
when /\Aurn:iso/ then "#{source}:term:#{concept_id}"
|
|
33
|
-
else "#{source}/#{concept_id}"
|
|
34
|
-
end
|
|
35
|
-
end
|
|
36
|
-
|
|
37
|
-
def to_gcr_hash
|
|
38
|
-
h = { "term" => term }
|
|
39
|
-
h["concept_id"] = concept_id if concept_id
|
|
40
|
-
h["source"] = source if source
|
|
41
|
-
h["ref_type"] = ref_type if ref_type
|
|
42
|
-
h.compact
|
|
43
|
-
end
|
|
44
|
-
|
|
45
38
|
def dedup_key
|
|
46
39
|
concept_id ? [source, concept_id] : [source, concept_id, term]
|
|
47
40
|
end
|
|
@@ -4,8 +4,9 @@ module Glossarist
|
|
|
4
4
|
class ConceptValidator
|
|
5
5
|
attr_reader :path, :errors, :warnings
|
|
6
6
|
|
|
7
|
-
def initialize(path)
|
|
7
|
+
def initialize(path, on_progress: nil)
|
|
8
8
|
@path = path
|
|
9
|
+
@on_progress = on_progress
|
|
9
10
|
@errors = []
|
|
10
11
|
@warnings = []
|
|
11
12
|
end
|
|
@@ -14,9 +15,12 @@ module Glossarist
|
|
|
14
15
|
result = ValidationResult.new
|
|
15
16
|
context = Validation::Rules::DatasetContext.new(@path)
|
|
16
17
|
concept_rules = Validation::Rules::Registry.for_scope(:concept)
|
|
18
|
+
total = ConceptCollector.count(@path)
|
|
17
19
|
file_idx = 0
|
|
18
20
|
|
|
19
21
|
ConceptCollector.each_concept(@path) do |concept|
|
|
22
|
+
context.add_concept(concept)
|
|
23
|
+
|
|
20
24
|
fname = concept_file_name(concept, file_idx)
|
|
21
25
|
concept_context = Validation::Rules::ConceptContext.new(
|
|
22
26
|
concept, file_name: fname, collection_context: context
|
|
@@ -29,6 +33,7 @@ module Glossarist
|
|
|
29
33
|
end
|
|
30
34
|
|
|
31
35
|
file_idx += 1
|
|
36
|
+
@on_progress&.call(file_idx, total)
|
|
32
37
|
end
|
|
33
38
|
|
|
34
39
|
if file_idx.zero?
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Glossarist
|
|
4
|
+
module ContextConfiguration
|
|
5
|
+
def context_id
|
|
6
|
+
self::CONTEXT_ID
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
def context
|
|
10
|
+
Lutaml::Model::GlobalContext.context(context_id)
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def create_context(
|
|
14
|
+
id:,
|
|
15
|
+
registry: nil,
|
|
16
|
+
fallback_to: [context_id],
|
|
17
|
+
substitutions: []
|
|
18
|
+
)
|
|
19
|
+
normalized_id = id.to_sym
|
|
20
|
+
|
|
21
|
+
return populate_context! if normalized_id == context_id
|
|
22
|
+
|
|
23
|
+
Lutaml::Model::GlobalContext.unregister_context(normalized_id) if Lutaml::Model::GlobalContext.context(normalized_id)
|
|
24
|
+
create_type_context(
|
|
25
|
+
id: normalized_id,
|
|
26
|
+
registry: registry || Lutaml::Model::TypeRegistry.new,
|
|
27
|
+
fallback_to: normalize_fallbacks(fallback_to),
|
|
28
|
+
substitutions: substitutions,
|
|
29
|
+
)
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def populate_context!
|
|
33
|
+
Lutaml::Model::GlobalContext.unregister_context(context_id) if context
|
|
34
|
+
register_models_in(base_type_context)
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def register_model(klass, id:)
|
|
38
|
+
normalized_id = id.to_sym
|
|
39
|
+
registered_models[normalized_id] = klass
|
|
40
|
+
(context || populate_base_context).registry.register(normalized_id, klass)
|
|
41
|
+
klass
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def resolve_model(id)
|
|
45
|
+
Lutaml::Model::GlobalContext.resolve_type(id, context_id)
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
private
|
|
49
|
+
|
|
50
|
+
def populate_base_context
|
|
51
|
+
base_type_context
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def create_type_context(id:, registry:, fallback_to:, substitutions: [])
|
|
55
|
+
Lutaml::Model::GlobalContext.create_context(
|
|
56
|
+
id: id,
|
|
57
|
+
registry: registry,
|
|
58
|
+
fallback_to: fallback_to,
|
|
59
|
+
substitutions: substitutions,
|
|
60
|
+
).tap do
|
|
61
|
+
Lutaml::Model::GlobalContext.clear_caches
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def base_type_context
|
|
66
|
+
create_type_context(
|
|
67
|
+
id: context_id,
|
|
68
|
+
registry: Lutaml::Model::TypeRegistry.new,
|
|
69
|
+
fallback_to: [:default],
|
|
70
|
+
)
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def register_models_in(type_context)
|
|
74
|
+
registered_models.each do |model_id, klass|
|
|
75
|
+
type_context.registry.register(model_id, klass)
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
Lutaml::Model::GlobalContext.clear_caches
|
|
79
|
+
type_context
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def normalize_fallbacks(fallback_to)
|
|
83
|
+
Array(fallback_to).map(&:to_sym)
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def registered_models
|
|
87
|
+
@registered_models ||= {}
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
end
|
|
@@ -2,6 +2,10 @@
|
|
|
2
2
|
|
|
3
3
|
module Glossarist
|
|
4
4
|
class DatasetValidator
|
|
5
|
+
def initialize(on_progress: nil)
|
|
6
|
+
@on_progress = on_progress
|
|
7
|
+
end
|
|
8
|
+
|
|
5
9
|
def validate(path, strict: false, reference_path: nil)
|
|
6
10
|
if File.extname(path).downcase == ".gcr"
|
|
7
11
|
validate_gcr(path, reference_path: reference_path)
|
|
@@ -13,7 +17,7 @@ module Glossarist
|
|
|
13
17
|
private
|
|
14
18
|
|
|
15
19
|
def validate_gcr(path, reference_path: nil)
|
|
16
|
-
result = GcrValidator.new.validate(path)
|
|
20
|
+
result = GcrValidator.new(on_progress: @on_progress).validate(path)
|
|
17
21
|
|
|
18
22
|
if reference_path
|
|
19
23
|
ref_result = validate_gcr_cross_references(path, reference_path)
|
|
@@ -24,7 +28,7 @@ module Glossarist
|
|
|
24
28
|
end
|
|
25
29
|
|
|
26
30
|
def validate_directory(path, reference_path: nil)
|
|
27
|
-
result = ConceptValidator.new(path).validate_all
|
|
31
|
+
result = ConceptValidator.new(path, on_progress: @on_progress).validate_all
|
|
28
32
|
|
|
29
33
|
if reference_path
|
|
30
34
|
ref_result = validate_directory_cross_references(path, reference_path)
|
|
@@ -38,7 +42,7 @@ module Glossarist
|
|
|
38
42
|
extractor = ReferenceExtractor.new
|
|
39
43
|
resolver = build_resolver(reference_path)
|
|
40
44
|
pkg = GcrPackage.load(path)
|
|
41
|
-
uri_prefix = pkg.metadata&.
|
|
45
|
+
uri_prefix = pkg.metadata&.uri_prefix || pkg.metadata&.shortname
|
|
42
46
|
resolver.register_self(pkg.concepts)
|
|
43
47
|
resolver.register_package(pkg, uri_prefix: uri_prefix)
|
|
44
48
|
resolver.validate_all(pkg, extractor: extractor)
|
|
@@ -56,7 +60,7 @@ module Glossarist
|
|
|
56
60
|
resolver = ReferenceResolver.new
|
|
57
61
|
Dir.glob(File.join(reference_path, "*.gcr")).each do |gcr_path|
|
|
58
62
|
pkg = GcrPackage.load(gcr_path)
|
|
59
|
-
uri_prefix = pkg.metadata&.
|
|
63
|
+
uri_prefix = pkg.metadata&.uri_prefix || pkg.metadata&.shortname
|
|
60
64
|
resolver.register_package(pkg, uri_prefix: uri_prefix)
|
|
61
65
|
end
|
|
62
66
|
resolver
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
module Glossarist
|
|
2
2
|
module Designation
|
|
3
3
|
class Abbreviation < Expression
|
|
4
|
-
attribute :international, :boolean
|
|
5
4
|
attribute :type, :string, default: -> { "abbreviation" }
|
|
6
5
|
|
|
7
6
|
Glossarist::GlossaryDefinition::ABBREVIATION_TYPES.each do |name|
|
|
@@ -9,7 +8,6 @@ module Glossarist
|
|
|
9
8
|
end
|
|
10
9
|
|
|
11
10
|
key_value do
|
|
12
|
-
map :international, to: :international
|
|
13
11
|
map :type, to: :type, render_default: true
|
|
14
12
|
Glossarist::GlossaryDefinition::ABBREVIATION_TYPES.each do |name|
|
|
15
13
|
map name.to_sym, to: name.to_sym
|
|
@@ -6,12 +6,30 @@ module Glossarist
|
|
|
6
6
|
attribute :normative_status, :string,
|
|
7
7
|
values: Glossarist::GlossaryDefinition::DESIGNATION_BASE_NORMATIVE_STATUSES
|
|
8
8
|
attribute :type, :string
|
|
9
|
+
attribute :language, :string
|
|
10
|
+
attribute :script, :string
|
|
11
|
+
attribute :system, :string
|
|
12
|
+
attribute :international, :boolean
|
|
13
|
+
attribute :absent, :boolean
|
|
14
|
+
attribute :pronunciation, Pronunciation, collection: true
|
|
15
|
+
attribute :sources, ConceptSource, collection: true
|
|
16
|
+
attribute :term_type, :string
|
|
17
|
+
attribute :related, RelatedConcept, collection: true
|
|
9
18
|
|
|
10
19
|
key_value do
|
|
11
20
|
map :type, to: :type
|
|
12
21
|
map %i[normative_status normativeStatus], to: :normative_status
|
|
13
22
|
map %i[geographical_area geographicalArea], to: :geographical_area
|
|
14
23
|
map :designation, to: :designation
|
|
24
|
+
map :language, to: :language
|
|
25
|
+
map :script, to: :script
|
|
26
|
+
map :system, to: :system
|
|
27
|
+
map :international, to: :international
|
|
28
|
+
map :absent, to: :absent
|
|
29
|
+
map :pronunciation, to: :pronunciation
|
|
30
|
+
map :sources, to: :sources
|
|
31
|
+
map %i[term_type termType], to: :term_type
|
|
32
|
+
map :related, to: :related
|
|
15
33
|
end
|
|
16
34
|
|
|
17
35
|
def self.of_yaml(hash, options = {})
|
|
@@ -34,7 +52,9 @@ module Glossarist
|
|
|
34
52
|
end
|
|
35
53
|
|
|
36
54
|
def self.infer_designation_type(hash)
|
|
37
|
-
if hash["
|
|
55
|
+
if hash["abbreviation_type"]
|
|
56
|
+
"abbreviation"
|
|
57
|
+
elsif hash["international"]
|
|
38
58
|
"symbol"
|
|
39
59
|
else
|
|
40
60
|
"expression"
|
|
@@ -5,6 +5,7 @@ module Glossarist
|
|
|
5
5
|
class Expression < Base
|
|
6
6
|
attribute :prefix, :string
|
|
7
7
|
attribute :usage_info, :string
|
|
8
|
+
attribute :field_of_application, :string
|
|
8
9
|
|
|
9
10
|
attribute :gender, :string
|
|
10
11
|
attribute :plurality, :string
|
|
@@ -16,6 +17,8 @@ module Glossarist
|
|
|
16
17
|
map :type, to: :type, render_default: true
|
|
17
18
|
map :prefix, to: :prefix
|
|
18
19
|
map %i[usage_info usageInfo], to: :usage_info
|
|
20
|
+
map %i[field_of_application fieldOfApplication],
|
|
21
|
+
to: :field_of_application
|
|
19
22
|
map %i[grammar_info grammarInfo], to: :grammar_info
|
|
20
23
|
end
|
|
21
24
|
|
|
@@ -2,13 +2,9 @@ module Glossarist
|
|
|
2
2
|
module Designation
|
|
3
3
|
class LetterSymbol < Symbol
|
|
4
4
|
attribute :text, :string
|
|
5
|
-
attribute :language, :string
|
|
6
|
-
attribute :script, :string
|
|
7
5
|
|
|
8
6
|
key_value do
|
|
9
7
|
map :text, to: :text
|
|
10
|
-
map :language, to: :language
|
|
11
|
-
map :script, to: :script
|
|
12
8
|
end
|
|
13
9
|
|
|
14
10
|
def self.of_yaml(hash, options = {})
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
module Glossarist
|
|
2
|
+
module Designation
|
|
3
|
+
class Prefix < Base
|
|
4
|
+
attribute :type, :string, default: -> { "prefix" }
|
|
5
|
+
|
|
6
|
+
key_value do
|
|
7
|
+
map :type, to: :type, render_default: true
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
def self.of_yaml(hash, options = {})
|
|
11
|
+
hash["type"] = "prefix" unless hash["type"]
|
|
12
|
+
|
|
13
|
+
super
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|