glossarist 2.5.1 → 2.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/.rubocop_todo.yml +87 -7
- data/CLAUDE.md +15 -2
- data/Gemfile +7 -7
- data/README.adoc +86 -0
- data/glossarist.gemspec +2 -1
- data/lib/glossarist/cli/export_command.rb +109 -0
- data/lib/glossarist/cli/package_command.rb +11 -0
- data/lib/glossarist/cli.rb +23 -0
- data/lib/glossarist/concept_validator.rb +0 -13
- data/lib/glossarist/gcr_metadata.rb +3 -0
- data/lib/glossarist/gcr_package.rb +95 -11
- data/lib/glossarist/gcr_statistics.rb +5 -1
- data/lib/glossarist/managed_concept.rb +10 -0
- data/lib/glossarist/rdf/localized_literal.rb +25 -0
- data/lib/glossarist/rdf/namespaces/dcterms_namespace.rb +12 -0
- data/lib/glossarist/rdf/namespaces/skos_namespace.rb +12 -0
- data/lib/glossarist/rdf/namespaces.rb +10 -0
- data/lib/glossarist/rdf/skos_concept.rb +44 -0
- data/lib/glossarist/rdf/skos_vocabulary.rb +26 -0
- data/lib/glossarist/rdf.rb +10 -0
- data/lib/glossarist/resolution_adapter/local.rb +1 -3
- data/lib/glossarist/transforms/concept_to_skos_transform.rb +133 -0
- data/lib/glossarist/transforms/concept_to_tbx_transform.rb +86 -0
- data/lib/glossarist/transforms.rb +10 -0
- data/lib/glossarist/version.rb +1 -1
- data/relaton-bib-2.0.0.gem +0 -0
- data/relaton-bib-2.1.0.gem +0 -0
- data/relaton-cen-2.0.0.gem +0 -0
- data/relaton-iec-2.0.0.gem +0 -0
- data/relaton-iso-2.0.0.gem +0 -0
- data/relaton-itu-2.0.0.gem +0 -0
- metadata +35 -4
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: d8c6a7e6d8df0929ec60fdccdc44fd70a4042afec7ccd8f046cdcf6388839d21
|
|
4
|
+
data.tar.gz: 56d29d76e0fc77885edcbc1c59cd533ec99029b4db957ad5772d78bf96356c1c
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: baace51fb6d551075f190957abe4a1736c390ee06dafe7756d691ce35b23770383eb12756bf3790712935879830da929ebf0670007b2f149c26f9b9cdb4bff89
|
|
7
|
+
data.tar.gz: d1faeeaac48cc379ddded9f51fb223b23dc8bcf2f7182e196b6b721d6b2f4bb0674875be5ca9c842a4ddafb343b14a3f46778530d0b19118000b33387437d771
|
data/.gitignore
CHANGED
data/.rubocop_todo.yml
CHANGED
|
@@ -1,23 +1,66 @@
|
|
|
1
1
|
# This configuration was generated by
|
|
2
2
|
# `rubocop --auto-gen-config`
|
|
3
|
-
# on 2026-05-
|
|
3
|
+
# on 2026-05-06 17:07:44 UTC using RuboCop version 1.86.1.
|
|
4
4
|
# The point is for the user to remove these configuration records
|
|
5
5
|
# one by one as the offenses are removed from the code base.
|
|
6
6
|
# Note that changes in the inspected code, or installation of new
|
|
7
7
|
# versions of RuboCop, may require this file to be generated again.
|
|
8
8
|
|
|
9
|
+
# Offense count: 6
|
|
10
|
+
# This cop supports safe autocorrection (--autocorrect).
|
|
11
|
+
# Configuration parameters: TreatCommentsAsGroupSeparators, ConsiderPunctuation.
|
|
12
|
+
Bundler/OrderedGems:
|
|
13
|
+
Exclude:
|
|
14
|
+
- 'Gemfile'
|
|
15
|
+
|
|
9
16
|
# Offense count: 1
|
|
10
17
|
Gemspec/RequiredRubyVersion:
|
|
11
18
|
Exclude:
|
|
12
19
|
- 'glossarist.gemspec'
|
|
13
20
|
|
|
14
|
-
# Offense count:
|
|
21
|
+
# Offense count: 10
|
|
22
|
+
# This cop supports safe autocorrection (--autocorrect).
|
|
23
|
+
# Configuration parameters: EnforcedStyle, IndentationWidth.
|
|
24
|
+
# SupportedStyles: with_first_argument, with_fixed_indentation
|
|
25
|
+
Layout/ArgumentAlignment:
|
|
26
|
+
Exclude:
|
|
27
|
+
- 'lib/glossarist/cli/export_command.rb'
|
|
28
|
+
- 'lib/glossarist/gcr_package.rb'
|
|
29
|
+
- 'spec/unit/gcr_package_spec.rb'
|
|
30
|
+
|
|
31
|
+
# Offense count: 1
|
|
32
|
+
# This cop supports safe autocorrection (--autocorrect).
|
|
33
|
+
Layout/EmptyLinesAroundMethodBody:
|
|
34
|
+
Exclude:
|
|
35
|
+
- 'lib/glossarist/transforms/concept_to_tbx_transform.rb'
|
|
36
|
+
|
|
37
|
+
# Offense count: 1
|
|
38
|
+
# This cop supports safe autocorrection (--autocorrect).
|
|
39
|
+
# Configuration parameters: AllowMultipleStyles, EnforcedHashRocketStyle, EnforcedColonStyle, EnforcedLastArgumentHashStyle.
|
|
40
|
+
# SupportedHashRocketStyles: key, separator, table
|
|
41
|
+
# SupportedColonStyles: key, separator, table
|
|
42
|
+
# SupportedLastArgumentHashStyles: always_inspect, always_ignore, ignore_implicit, ignore_explicit
|
|
43
|
+
Layout/HashAlignment:
|
|
44
|
+
Exclude:
|
|
45
|
+
- 'lib/glossarist/gcr_package.rb'
|
|
46
|
+
|
|
47
|
+
# Offense count: 214
|
|
15
48
|
# This cop supports safe autocorrection (--autocorrect).
|
|
16
49
|
# Configuration parameters: Max, AllowHeredoc, AllowURI, AllowQualifiedName, URISchemes, AllowRBSInlineAnnotation, AllowCopDirectives, AllowedPatterns, SplitStrings.
|
|
17
50
|
# URISchemes: http, https
|
|
18
51
|
Layout/LineLength:
|
|
19
52
|
Enabled: false
|
|
20
53
|
|
|
54
|
+
# Offense count: 12
|
|
55
|
+
# This cop supports safe autocorrection (--autocorrect).
|
|
56
|
+
# Configuration parameters: AllowInHeredoc.
|
|
57
|
+
Layout/TrailingWhitespace:
|
|
58
|
+
Exclude:
|
|
59
|
+
- 'lib/glossarist/cli/export_command.rb'
|
|
60
|
+
- 'lib/glossarist/gcr_package.rb'
|
|
61
|
+
- 'lib/glossarist/transforms/concept_to_tbx_transform.rb'
|
|
62
|
+
- 'spec/unit/gcr_package_spec.rb'
|
|
63
|
+
|
|
21
64
|
# Offense count: 1
|
|
22
65
|
# Configuration parameters: AllowedMethods.
|
|
23
66
|
# AllowedMethods: enums
|
|
@@ -30,18 +73,20 @@ Lint/DuplicateMethods:
|
|
|
30
73
|
Exclude:
|
|
31
74
|
- 'lib/glossarist/managed_concept.rb'
|
|
32
75
|
|
|
33
|
-
# Offense count:
|
|
76
|
+
# Offense count: 2
|
|
34
77
|
# This cop supports safe autocorrection (--autocorrect).
|
|
35
78
|
# Configuration parameters: AllowUnusedKeywordArguments, IgnoreEmptyMethods, IgnoreNotImplementedMethods, NotImplementedExceptions.
|
|
36
79
|
# NotImplementedExceptions: NotImplementedError
|
|
37
80
|
Lint/UnusedMethodArgument:
|
|
38
81
|
Exclude:
|
|
82
|
+
- 'lib/glossarist/cli/export_command.rb'
|
|
39
83
|
- 'lib/glossarist/dataset_validator.rb'
|
|
40
84
|
|
|
41
|
-
# Offense count:
|
|
85
|
+
# Offense count: 23
|
|
42
86
|
# Configuration parameters: AllowedMethods, AllowedPatterns, CountRepeatedAttributes, Max.
|
|
43
87
|
Metrics/AbcSize:
|
|
44
88
|
Exclude:
|
|
89
|
+
- 'lib/glossarist/cli/export_command.rb'
|
|
45
90
|
- 'lib/glossarist/cli/package_command.rb'
|
|
46
91
|
- 'lib/glossarist/cli/validate_command.rb'
|
|
47
92
|
- 'lib/glossarist/concept_manager.rb'
|
|
@@ -51,6 +96,8 @@ Metrics/AbcSize:
|
|
|
51
96
|
- 'lib/glossarist/reference_resolver.rb'
|
|
52
97
|
- 'lib/glossarist/resolution_adapter/local.rb'
|
|
53
98
|
- 'lib/glossarist/schema_migration.rb'
|
|
99
|
+
- 'lib/glossarist/transforms/concept_to_skos_transform.rb'
|
|
100
|
+
- 'lib/glossarist/transforms/concept_to_tbx_transform.rb'
|
|
54
101
|
- 'lib/glossarist/utilities/uuid.rb'
|
|
55
102
|
- 'spec/unit/concept_collector_spec.rb'
|
|
56
103
|
|
|
@@ -60,25 +107,34 @@ Metrics/AbcSize:
|
|
|
60
107
|
Metrics/BlockLength:
|
|
61
108
|
Max: 28
|
|
62
109
|
|
|
63
|
-
# Offense count:
|
|
110
|
+
# Offense count: 19
|
|
64
111
|
# Configuration parameters: AllowedMethods, AllowedPatterns, Max.
|
|
65
112
|
Metrics/CyclomaticComplexity:
|
|
66
113
|
Exclude:
|
|
114
|
+
- 'lib/glossarist/cli/export_command.rb'
|
|
67
115
|
- 'lib/glossarist/concept_validator.rb'
|
|
68
116
|
- 'lib/glossarist/designation/expression.rb'
|
|
69
117
|
- 'lib/glossarist/gcr_metadata.rb'
|
|
118
|
+
- 'lib/glossarist/gcr_statistics.rb'
|
|
70
119
|
- 'lib/glossarist/managed_concept.rb'
|
|
71
120
|
- 'lib/glossarist/reference_extractor.rb'
|
|
72
121
|
- 'lib/glossarist/reference_resolver.rb'
|
|
73
122
|
- 'lib/glossarist/resolution_adapter/local.rb'
|
|
74
123
|
- 'lib/glossarist/schema_migration.rb'
|
|
124
|
+
- 'lib/glossarist/transforms/concept_to_skos_transform.rb'
|
|
125
|
+
- 'lib/glossarist/transforms/concept_to_tbx_transform.rb'
|
|
75
126
|
|
|
76
|
-
# Offense count:
|
|
127
|
+
# Offense count: 35
|
|
77
128
|
# Configuration parameters: CountComments, CountAsOne, AllowedMethods, AllowedPatterns.
|
|
78
129
|
Metrics/MethodLength:
|
|
79
130
|
Max: 42
|
|
80
131
|
|
|
81
|
-
# Offense count:
|
|
132
|
+
# Offense count: 4
|
|
133
|
+
# Configuration parameters: CountKeywordArgs, MaxOptionalParameters.
|
|
134
|
+
Metrics/ParameterLists:
|
|
135
|
+
Max: 6
|
|
136
|
+
|
|
137
|
+
# Offense count: 13
|
|
82
138
|
# Configuration parameters: AllowedMethods, AllowedPatterns, Max.
|
|
83
139
|
Metrics/PerceivedComplexity:
|
|
84
140
|
Exclude:
|
|
@@ -89,6 +145,8 @@ Metrics/PerceivedComplexity:
|
|
|
89
145
|
- 'lib/glossarist/reference_resolver.rb'
|
|
90
146
|
- 'lib/glossarist/resolution_adapter/local.rb'
|
|
91
147
|
- 'lib/glossarist/schema_migration.rb'
|
|
148
|
+
- 'lib/glossarist/transforms/concept_to_skos_transform.rb'
|
|
149
|
+
- 'lib/glossarist/transforms/concept_to_tbx_transform.rb'
|
|
92
150
|
|
|
93
151
|
# Offense count: 6
|
|
94
152
|
# Configuration parameters: MinNameLength, AllowNamesEndingInNumbers, AllowedNames, ForbiddenNames.
|
|
@@ -97,6 +155,14 @@ Naming/MethodParameterName:
|
|
|
97
155
|
Exclude:
|
|
98
156
|
- 'lib/glossarist/schema_migration.rb'
|
|
99
157
|
|
|
158
|
+
# Offense count: 3
|
|
159
|
+
# Configuration parameters: EnforcedStyle, CheckMethodNames, CheckSymbols, AllowedIdentifiers, AllowedPatterns.
|
|
160
|
+
# SupportedStyles: snake_case, normalcase, non_integer
|
|
161
|
+
# AllowedIdentifiers: TLS1_1, TLS1_2, capture3, iso8601, rfc1123_date, rfc822, rfc2822, rfc3339, x86_64
|
|
162
|
+
Naming/VariableNumber:
|
|
163
|
+
Exclude:
|
|
164
|
+
- 'spec/unit/rdf/skos_vocabulary_spec.rb'
|
|
165
|
+
|
|
100
166
|
# Offense count: 6
|
|
101
167
|
# This cop supports safe autocorrection (--autocorrect).
|
|
102
168
|
# Configuration parameters: MaxUnannotatedPlaceholdersAllowed, Mode, AllowedMethods, AllowedPatterns.
|
|
@@ -104,6 +170,12 @@ Naming/MethodParameterName:
|
|
|
104
170
|
Style/FormatStringToken:
|
|
105
171
|
EnforcedStyle: unannotated
|
|
106
172
|
|
|
173
|
+
# Offense count: 2
|
|
174
|
+
# This cop supports safe autocorrection (--autocorrect).
|
|
175
|
+
Style/MultilineIfModifier:
|
|
176
|
+
Exclude:
|
|
177
|
+
- 'lib/glossarist/gcr_package.rb'
|
|
178
|
+
|
|
107
179
|
# Offense count: 1
|
|
108
180
|
# Configuration parameters: AllowedClasses.
|
|
109
181
|
Style/OneClassPerFile:
|
|
@@ -113,7 +185,15 @@ Style/OneClassPerFile:
|
|
|
113
185
|
- 'lib/glossarist.rb'
|
|
114
186
|
|
|
115
187
|
# Offense count: 1
|
|
188
|
+
# This cop supports safe autocorrection (--autocorrect).
|
|
189
|
+
Style/RedundantAssignment:
|
|
190
|
+
Exclude:
|
|
191
|
+
- 'lib/glossarist/transforms/concept_to_tbx_transform.rb'
|
|
192
|
+
|
|
193
|
+
# Offense count: 4
|
|
116
194
|
# Configuration parameters: Max.
|
|
117
195
|
Style/SafeNavigationChainLength:
|
|
118
196
|
Exclude:
|
|
119
197
|
- 'lib/glossarist/managed_concept.rb'
|
|
198
|
+
- 'lib/glossarist/transforms/concept_to_skos_transform.rb'
|
|
199
|
+
- 'lib/glossarist/transforms/concept_to_tbx_transform.rb'
|
data/CLAUDE.md
CHANGED
|
@@ -67,11 +67,24 @@ Three classes handle glossary concept registry (GCR) ZIP packages:
|
|
|
67
67
|
|
|
68
68
|
### CLI
|
|
69
69
|
|
|
70
|
-
The `exe/glossarist` executable uses Thor.
|
|
70
|
+
The `exe/glossarist` executable uses Thor. Commands:
|
|
71
|
+
- `generate_latex` — converts concepts to LaTeX glossary entries
|
|
72
|
+
- `package` — creates `.gcr` ZIP archives with optional compiled formats (`--compiled-formats tbx,jsonld,turtle,jsonl`)
|
|
73
|
+
- `export` — exports concepts in json/tbx/jsonld/turtle/jsonl formats
|
|
74
|
+
- `validate` — validates datasets and `.gcr` files
|
|
75
|
+
- `upgrade` — migrates datasets to current schema version
|
|
76
|
+
|
|
77
|
+
### Export Transforms
|
|
78
|
+
|
|
79
|
+
- **`ConceptToTbxTransform`** (`transforms/concept_to_tbx_transform.rb`) — converts ManagedConcept to TBX-XML using the tbx gem (ISO 30042:2019). Produces `Tbx::ConceptEntry` per concept or `Tbx::Document` for full export.
|
|
80
|
+
- **`ConceptToSkosTransform`** (`transforms/concept_to_skos_transform.rb`) — converts ManagedConcept to SKOS RDF using `Glossarist::Rdf::SkosConcept`. Has `transform` (single) and `transform_document` (batch, returns `SkosVocabulary`). Produces JSON-LD and Turtle via the unified `rdf` DSL.
|
|
81
|
+
- **SKOS/RDF models** (`lib/glossarist/rdf/`) — `SkosConcept`, `SkosVocabulary` (ConceptScheme container), `LocalizedLiteral` (language-tagged value), namespace classes.
|
|
82
|
+
- TBX, Turtle, JSON-LD, JSONL export all write a single document file; JSON writes per-concept files.
|
|
71
83
|
|
|
72
84
|
### Dependencies
|
|
73
85
|
|
|
74
|
-
- `lutaml-model` (~> 0.8) — serialization framework (YAML/XML)
|
|
86
|
+
- `lutaml-model` (~> 0.8.5) — serialization framework (YAML/XML/JSON-LD/Turtle)
|
|
87
|
+
- `tbx` — ISO 30042:2019 TBX model classes
|
|
75
88
|
- `relaton` (>= 2.0.0, < 3) — bibliography database integration
|
|
76
89
|
- `thor` — CLI commands
|
|
77
90
|
|
data/Gemfile
CHANGED
|
@@ -7,6 +7,7 @@ gemspec
|
|
|
7
7
|
gem "canon"
|
|
8
8
|
gem "lutaml-model", "~> 0.8.0"
|
|
9
9
|
gem "rake", "~> 13.0"
|
|
10
|
+
gem "tbx", "~> 0.1"
|
|
10
11
|
gem "rspec", "~> 3.0"
|
|
11
12
|
gem "rubocop"
|
|
12
13
|
gem "rubocop-performance"
|
|
@@ -15,20 +16,19 @@ gem "rubocop-rspec"
|
|
|
15
16
|
|
|
16
17
|
# Override relaton gems with lutaml-model 0.8 compatible versions.
|
|
17
18
|
# Released 2.0.0 gems have untyped lutaml-model attributes that fail with 0.8+.
|
|
18
|
-
#
|
|
19
|
-
# lutaml-integration branches also have typed attributes and work with relaton-bib ~> 2.0.0.
|
|
19
|
+
# lutaml-integration branches have typed attributes and relaton-bib ~> 2.1.0.
|
|
20
20
|
# TODO: Remove once relaton gems release versions with lutaml-model 0.8 support.
|
|
21
21
|
gem "relaton-3gpp", github: "relaton/relaton-3gpp",
|
|
22
|
-
branch: "
|
|
23
|
-
gem "relaton-bib", github: "relaton/relaton-bib", branch: "
|
|
22
|
+
branch: "lutaml-integration"
|
|
23
|
+
gem "relaton-bib", github: "relaton/relaton-bib", branch: "lutaml-integration"
|
|
24
24
|
gem "relaton-bipm", github: "relaton/relaton-bipm",
|
|
25
|
-
branch: "
|
|
26
|
-
gem "relaton-bsi", github: "relaton/relaton-bsi", branch: "
|
|
25
|
+
branch: "lutaml-integration"
|
|
26
|
+
gem "relaton-bsi", github: "relaton/relaton-bsi", branch: "lutaml-integration"
|
|
27
27
|
gem "relaton-calconnect", github: "relaton/relaton-calconnect",
|
|
28
28
|
branch: "lutaml-integration"
|
|
29
29
|
gem "relaton-ccsds", github: "relaton/relaton-ccsds",
|
|
30
30
|
branch: "lutaml-integration"
|
|
31
31
|
gem "relaton-cen", github: "relaton/relaton-cen", branch: "lutaml-integration"
|
|
32
32
|
gem "relaton-iec", github: "relaton/relaton-iec", branch: "lutaml-integration"
|
|
33
|
-
gem "relaton-iso", github: "relaton/relaton-iso", branch: "
|
|
33
|
+
gem "relaton-iso", github: "relaton/relaton-iso", branch: "lutaml-integration"
|
|
34
34
|
gem "relaton-itu", github: "relaton/relaton-itu", branch: "lutaml-integration"
|
data/README.adoc
CHANGED
|
@@ -407,6 +407,12 @@ Options:
|
|
|
407
407
|
|
|
408
408
|
|--tags
|
|
409
409
|
|Tags for the dataset
|
|
410
|
+
|
|
411
|
+
|--compiled-formats
|
|
412
|
+
|Comma-separated compiled formats to bundle (tbx,jsonld,turtle,jsonl)
|
|
413
|
+
|
|
414
|
+
|--concept-uri-template
|
|
415
|
+
|URI template for concept URIs
|
|
410
416
|
|===
|
|
411
417
|
|
|
412
418
|
Ruby API:
|
|
@@ -418,9 +424,89 @@ GcrPackage.create_from_directory(
|
|
|
418
424
|
shortname: "mydataset",
|
|
419
425
|
version: "1.0.0",
|
|
420
426
|
uri_prefix: "urn:iso:std:iso:19111",
|
|
427
|
+
compiled_formats: ["jsonld", "turtle"],
|
|
421
428
|
)
|
|
422
429
|
----
|
|
423
430
|
|
|
431
|
+
=== export
|
|
432
|
+
|
|
433
|
+
Export concepts in machine-readable formats.
|
|
434
|
+
|
|
435
|
+
[,bash]
|
|
436
|
+
----
|
|
437
|
+
glossarist export PATH --format json --output DIR
|
|
438
|
+
glossarist export PATH --format jsonld --output DIR --shortname isotc211
|
|
439
|
+
glossarist export PATH --format turtle --output DIR
|
|
440
|
+
glossarist export PATH --format tbx --output DIR --shortname isotc211
|
|
441
|
+
glossarist export PATH --format jsonl --output DIR
|
|
442
|
+
glossarist export package.gcr --format json --output DIR
|
|
443
|
+
----
|
|
444
|
+
|
|
445
|
+
The path can be either a concept dataset directory or a `.gcr` file. When exporting from a `.gcr`, the `shortname` and `uri_prefix` are automatically resolved from the package metadata.
|
|
446
|
+
|
|
447
|
+
==== Output Formats
|
|
448
|
+
|
|
449
|
+
[cols="1,2,1"]
|
|
450
|
+
|===
|
|
451
|
+
|Format |Output |Files
|
|
452
|
+
|
|
453
|
+
|`json`
|
|
454
|
+
|Per-concept JSON files
|
|
455
|
+
|`{concept_id}.json`
|
|
456
|
+
|
|
457
|
+
|`tbx`
|
|
458
|
+
|Single TBX-XML document (ISO 30042:2019)
|
|
459
|
+
|`{shortname}.tbx.xml`
|
|
460
|
+
|
|
461
|
+
|`jsonld`
|
|
462
|
+
|Single JSON-LD file with `@graph`
|
|
463
|
+
|`{shortname}.jsonld`
|
|
464
|
+
|
|
465
|
+
|`turtle`
|
|
466
|
+
|Single Turtle file with all concept triples
|
|
467
|
+
|`{shortname}.ttl`
|
|
468
|
+
|
|
469
|
+
|`jsonl`
|
|
470
|
+
|JSONL file with one JSON-LD object per line
|
|
471
|
+
|`{shortname}.jsonl`
|
|
472
|
+
|===
|
|
473
|
+
|
|
474
|
+
Options:
|
|
475
|
+
[cols="1,1"]
|
|
476
|
+
|===
|
|
477
|
+
|--format (required)
|
|
478
|
+
|Output format: `json`, `tbx`, `jsonld`, `turtle`, or `jsonl`
|
|
479
|
+
|
|
480
|
+
|o, --output (required)
|
|
481
|
+
|Output directory
|
|
482
|
+
|
|
483
|
+
|--shortname
|
|
484
|
+
|Dataset shortname for concept ID prefixing
|
|
485
|
+
|
|
486
|
+
|--uri-prefix
|
|
487
|
+
|URI/URN prefix for the dataset
|
|
488
|
+
|
|
489
|
+
|--site-url
|
|
490
|
+
|Base URL of the glossarist site
|
|
491
|
+
|
|
492
|
+
|--title
|
|
493
|
+
|Dataset title for document header
|
|
494
|
+
|===
|
|
495
|
+
|
|
496
|
+
Ruby API:
|
|
497
|
+
[,ruby]
|
|
498
|
+
----
|
|
499
|
+
# Export to JSON-LD
|
|
500
|
+
cmd = Glossarist::CLI::ExportCommand.new("path/to/dataset",
|
|
501
|
+
format: "jsonld", output: "/tmp/export", shortname: "isotc211")
|
|
502
|
+
cmd.run
|
|
503
|
+
|
|
504
|
+
# Transform a single concept to SKOS
|
|
505
|
+
skos = Glossarist::Transforms::ConceptToSkosTransform.transform(concept)
|
|
506
|
+
puts skos.to_jsonld
|
|
507
|
+
puts skos.to_turtle
|
|
508
|
+
----
|
|
509
|
+
|
|
424
510
|
=== validate
|
|
425
511
|
|
|
426
512
|
Validate a dataset directory or `.gcr` file for schema compliance.
|
data/glossarist.gemspec
CHANGED
|
@@ -31,8 +31,9 @@ Gem::Specification.new do |spec|
|
|
|
31
31
|
spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
|
|
32
32
|
spec.require_paths = ["lib"]
|
|
33
33
|
|
|
34
|
-
spec.add_dependency "lutaml-model", "~> 0.8"
|
|
34
|
+
spec.add_dependency "lutaml-model", "~> 0.8.5"
|
|
35
35
|
spec.add_dependency "relaton", ">= 2.0.0", "< 3"
|
|
36
36
|
spec.add_dependency "rubyzip", ">= 2.3", "< 3"
|
|
37
|
+
spec.add_dependency "tbx", "~> 0.1"
|
|
37
38
|
spec.add_dependency "thor"
|
|
38
39
|
end
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Glossarist
|
|
4
|
+
class CLI
|
|
5
|
+
class ExportCommand
|
|
6
|
+
EXTENSIONS = {
|
|
7
|
+
"json" => "json",
|
|
8
|
+
"jsonld" => "jsonld",
|
|
9
|
+
"turtle" => "ttl",
|
|
10
|
+
"tbx" => "tbx.xml",
|
|
11
|
+
"jsonl" => "jsonl",
|
|
12
|
+
}.freeze
|
|
13
|
+
|
|
14
|
+
def initialize(path, options)
|
|
15
|
+
@path = path
|
|
16
|
+
@options = options
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def run
|
|
20
|
+
format = @options[:format]
|
|
21
|
+
output_dir = File.expand_path(@options[:output])
|
|
22
|
+
FileUtils.mkdir_p(output_dir)
|
|
23
|
+
|
|
24
|
+
concepts = load_concepts
|
|
25
|
+
name = resolve_shortname(concepts)
|
|
26
|
+
|
|
27
|
+
case format
|
|
28
|
+
when "json" then export_json(concepts, output_dir)
|
|
29
|
+
when "jsonld" then export_document(concepts, name, output_dir, :jsonld)
|
|
30
|
+
when "turtle" then export_document(concepts, name, output_dir, :turtle)
|
|
31
|
+
when "tbx" then export_tbx(concepts, name, output_dir)
|
|
32
|
+
when "jsonl" then export_jsonl(concepts, name, output_dir)
|
|
33
|
+
end
|
|
34
|
+
rescue ArgumentError => e
|
|
35
|
+
warn "Error: #{e.message}"
|
|
36
|
+
exit 1
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
private
|
|
40
|
+
|
|
41
|
+
def load_concepts
|
|
42
|
+
if @path.end_with?(".gcr")
|
|
43
|
+
package = GcrPackage.load(@path)
|
|
44
|
+
resolve_metadata_from_package(package)
|
|
45
|
+
package.concepts
|
|
46
|
+
else
|
|
47
|
+
collection = ManagedConceptCollection.new
|
|
48
|
+
collection.load_from_files(@path)
|
|
49
|
+
collection.to_a
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def resolve_metadata_from_package(package)
|
|
54
|
+
@options[:shortname] ||= package.metadata["shortname"]
|
|
55
|
+
@options[:uri_prefix] ||= package.metadata["uri_prefix"]
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def resolve_shortname(_concepts)
|
|
59
|
+
@options[:shortname] || "glossary"
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def transform_options
|
|
63
|
+
{
|
|
64
|
+
shortname: @options[:shortname],
|
|
65
|
+
uri_prefix: @options[:uri_prefix],
|
|
66
|
+
site_url: @options[:site_url],
|
|
67
|
+
title: @options[:title],
|
|
68
|
+
}.compact
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def export_json(concepts, output_dir)
|
|
72
|
+
concepts.each do |concept|
|
|
73
|
+
id = concept.data&.id || concept.identifier
|
|
74
|
+
File.write(File.join(output_dir, "#{id}.json"), concept.to_json)
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def export_document(concepts, name, output_dir, format)
|
|
79
|
+
require "glossarist/transforms/concept_to_skos_transform"
|
|
80
|
+
vocab = Transforms::ConceptToSkosTransform.transform_document(concepts,
|
|
81
|
+
transform_options)
|
|
82
|
+
ext = EXTENSIONS[format.to_s]
|
|
83
|
+
File.write(File.join(output_dir, "#{name}.#{ext}"),
|
|
84
|
+
vocab.public_send(:"to_#{format}"))
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def export_tbx(concepts, name, output_dir)
|
|
88
|
+
require "glossarist/transforms/concept_to_tbx_transform"
|
|
89
|
+
doc = Transforms::ConceptToTbxTransform.transform_document(concepts,
|
|
90
|
+
transform_options)
|
|
91
|
+
File.write(File.join(output_dir, "#{name}.#{EXTENSIONS['tbx']}"),
|
|
92
|
+
doc.to_xml)
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def export_jsonl(concepts, name, output_dir)
|
|
96
|
+
require "glossarist/transforms/concept_to_skos_transform"
|
|
97
|
+
File.open(File.join(output_dir, "#{name}.#{EXTENSIONS['jsonl']}"),
|
|
98
|
+
"w") do |f|
|
|
99
|
+
concepts.each do |concept|
|
|
100
|
+
skos = Transforms::ConceptToSkosTransform.transform(concept,
|
|
101
|
+
transform_options)
|
|
102
|
+
f.write(skos.to_jsonld)
|
|
103
|
+
f.write("\n")
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
end
|
|
@@ -20,6 +20,8 @@ module Glossarist
|
|
|
20
20
|
tags: @options[:tags],
|
|
21
21
|
register_yaml: @options[:register_yaml],
|
|
22
22
|
uri_prefix: @options[:uri_prefix],
|
|
23
|
+
concept_uri_template: @options[:concept_uri_template],
|
|
24
|
+
compiled_formats: parse_compiled_formats,
|
|
23
25
|
)
|
|
24
26
|
|
|
25
27
|
puts "Created #{@options[:output]}"
|
|
@@ -27,6 +29,15 @@ module Glossarist
|
|
|
27
29
|
warn "Error: #{e.message}"
|
|
28
30
|
exit 1
|
|
29
31
|
end
|
|
32
|
+
|
|
33
|
+
private
|
|
34
|
+
|
|
35
|
+
def parse_compiled_formats
|
|
36
|
+
raw = @options[:compiled_formats]
|
|
37
|
+
return [] unless raw
|
|
38
|
+
|
|
39
|
+
raw.split(",").map(&:strip).reject(&:empty?)
|
|
40
|
+
end
|
|
30
41
|
end
|
|
31
42
|
end
|
|
32
43
|
end
|
data/lib/glossarist/cli.rb
CHANGED
|
@@ -57,6 +57,10 @@ module Glossarist
|
|
|
57
57
|
option :register_yaml, type: :string,
|
|
58
58
|
desc: "Path to register.yaml to include in package"
|
|
59
59
|
option :tags, type: :array, desc: "Tags for the dataset"
|
|
60
|
+
option :compiled_formats, type: :string,
|
|
61
|
+
desc: "Comma-separated compiled formats to bundle (tbx,jsonld,turtle,jsonl)"
|
|
62
|
+
option :concept_uri_template, type: :string,
|
|
63
|
+
desc: "URI template for concept URIs"
|
|
60
64
|
def package(dir)
|
|
61
65
|
require_relative "cli/package_command"
|
|
62
66
|
Glossarist::CLI::PackageCommand.new(dir, options).run
|
|
@@ -76,6 +80,25 @@ module Glossarist
|
|
|
76
80
|
Glossarist::CLI::ValidateCommand.new(path, options).run
|
|
77
81
|
end
|
|
78
82
|
|
|
83
|
+
desc "export PATH", "Export concepts in machine-readable formats"
|
|
84
|
+
option :format, type: :string, required: true,
|
|
85
|
+
enum: %w[json jsonld turtle tbx jsonl],
|
|
86
|
+
desc: "Output format"
|
|
87
|
+
option :output, aliases: :o, type: :string, required: true,
|
|
88
|
+
desc: "Output directory"
|
|
89
|
+
option :shortname, type: :string,
|
|
90
|
+
desc: "Dataset shortname for concept ID prefixing"
|
|
91
|
+
option :uri_prefix, type: :string,
|
|
92
|
+
desc: "URI/URN prefix for the dataset"
|
|
93
|
+
option :site_url, type: :string,
|
|
94
|
+
desc: "Base URL of the glossarist site"
|
|
95
|
+
option :title, type: :string,
|
|
96
|
+
desc: "Dataset title for document header"
|
|
97
|
+
def export(path)
|
|
98
|
+
require_relative "cli/export_command"
|
|
99
|
+
Glossarist::CLI::ExportCommand.new(path, options).run
|
|
100
|
+
end
|
|
101
|
+
|
|
79
102
|
def method_missing(*args)
|
|
80
103
|
warn "No method found named: #{args[0]}"
|
|
81
104
|
warn "Run with `--help` or `-h` to see available options"
|
|
@@ -52,7 +52,6 @@ module Glossarist
|
|
|
52
52
|
def validate_concept(concept, fname, seen_ids)
|
|
53
53
|
validate_id(concept, fname, seen_ids)
|
|
54
54
|
validate_localizations(concept, fname)
|
|
55
|
-
validate_definitions(concept, fname)
|
|
56
55
|
validate_entry_statuses(concept, fname)
|
|
57
56
|
end
|
|
58
57
|
|
|
@@ -87,18 +86,6 @@ module Glossarist
|
|
|
87
86
|
end
|
|
88
87
|
end
|
|
89
88
|
|
|
90
|
-
def validate_definitions(concept, fname)
|
|
91
|
-
(concept.localizations&.values || []).each do |l10n|
|
|
92
|
-
lang = l10n.language_code || "unknown"
|
|
93
|
-
next unless l10n.data&.definition
|
|
94
|
-
|
|
95
|
-
defs = l10n.data.definition
|
|
96
|
-
if defs.empty?
|
|
97
|
-
@errors << "#{fname}/#{lang}: definition is empty"
|
|
98
|
-
end
|
|
99
|
-
end
|
|
100
|
-
end
|
|
101
|
-
|
|
102
89
|
def validate_entry_statuses(concept, fname)
|
|
103
90
|
(concept.localizations&.values || []).each do |l10n|
|
|
104
91
|
lang = l10n.language_code || "unknown"
|
|
@@ -19,6 +19,7 @@ module Glossarist
|
|
|
19
19
|
attribute :license, :string
|
|
20
20
|
attribute :uri_prefix, :string
|
|
21
21
|
attribute :concept_uri_template, :string
|
|
22
|
+
attribute :compiled_formats, :string, collection: true
|
|
22
23
|
attribute :external_references, :hash, collection: true
|
|
23
24
|
|
|
24
25
|
key_value do
|
|
@@ -39,6 +40,7 @@ module Glossarist
|
|
|
39
40
|
map :license, to: :license
|
|
40
41
|
map :uri_prefix, to: :uri_prefix
|
|
41
42
|
map :concept_uri_template, to: :concept_uri_template
|
|
43
|
+
map :compiled_formats, to: :compiled_formats
|
|
42
44
|
map :external_references, to: :external_references
|
|
43
45
|
end
|
|
44
46
|
|
|
@@ -59,6 +61,7 @@ module Glossarist
|
|
|
59
61
|
statistics: stats,
|
|
60
62
|
uri_prefix: options[:uri_prefix],
|
|
61
63
|
concept_uri_template: options[:concept_uri_template],
|
|
64
|
+
compiled_formats: options[:compiled_formats] || [],
|
|
62
65
|
external_references: derive_external_references(concepts),
|
|
63
66
|
)
|
|
64
67
|
end
|