glossarist 2.6.2 → 2.6.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a4a05468d25c9ac0d13c748454dc98d7dc031718fe98d298c3db89d6874963a7
4
- data.tar.gz: 203b26205a85b9593942d7d6095dab85faa93ed1e99b9f474f20c67a77e61c5d
3
+ metadata.gz: 1f3a8ec372c1c3e7a93ed7c2bad8ed2837f8f5bcd5ce4ae340bbb9f3b5ddaa75
4
+ data.tar.gz: e7c0672fc648ea748cff12bfc00a1ea62665aeaa20e4cf8a86dde1419a6094df
5
5
  SHA512:
6
- metadata.gz: dc177b1d927f7b309cb47fac5158c151001202ba0332b35f0eaf87a8e1e2d9eda400dd077145b10d4acc5ee968676f39dd95172a32ad16e6078ff3357c299317
7
- data.tar.gz: eb60cfa90dd26008e2287cbafb35aaeb4e48bb60c120cc254fba024d945f4a6cdb41e9b51a2b8c68a30ac6e31e7e40f38243ec44437a5995126eba47f47110dd
6
+ metadata.gz: 5a3654b99b5137104e26830fe77b1b6bad3eb2e0ce4ffa45d479b909399c469c41edbd7460ede72dc4f10bf94cc2f40649e78ce8510113ea9d97f0715750af15
7
+ data.tar.gz: eec5c75fd4a6a434999830038642ce387d74c2b7df976343d4b342aa919d1aaf7c4beccff4f92fbb18a0f0a4acf762885bf837edad7f16dd9889a93b33ed5613
data/.gitignore CHANGED
@@ -16,8 +16,9 @@
16
16
  .rubocop-http---*
17
17
  .rubocop-https---*
18
18
 
19
- # Relaton local cache directory
19
+ # Relaton cache directories
20
20
  localcache
21
+ spec/fixtures/relaton_cache/
21
22
 
22
23
  .vscode
23
24
 
data/.rubocop_todo.yml CHANGED
@@ -1,59 +1,93 @@
1
1
  # This configuration was generated by
2
2
  # `rubocop --auto-gen-config`
3
- # on 2026-05-07 13:57:54 UTC using RuboCop version 1.86.1.
3
+ # on 2026-05-12 04:13:45 UTC using RuboCop version 1.86.1.
4
4
  # The point is for the user to remove these configuration records
5
5
  # one by one as the offenses are removed from the code base.
6
6
  # Note that changes in the inspected code, or installation of new
7
7
  # versions of RuboCop, may require this file to be generated again.
8
8
 
9
+ # Offense count: 7
10
+ # This cop supports safe autocorrection (--autocorrect).
11
+ # Configuration parameters: TreatCommentsAsGroupSeparators, ConsiderPunctuation.
12
+ Bundler/OrderedGems:
13
+ Exclude:
14
+ - 'Gemfile'
15
+
9
16
  # Offense count: 1
10
17
  Gemspec/RequiredRubyVersion:
11
18
  Exclude:
12
19
  - 'glossarist.gemspec'
13
20
 
14
- # Offense count: 1
21
+ # Offense count: 4
15
22
  # This cop supports safe autocorrection (--autocorrect).
16
23
  # Configuration parameters: EnforcedStyle, IndentationWidth.
17
24
  # SupportedStyles: with_first_argument, with_fixed_indentation
18
25
  Layout/ArgumentAlignment:
19
26
  Exclude:
20
- - 'spec/unit/gcr_package_spec.rb'
27
+ - 'lib/glossarist/sts/import_result.rb'
28
+ - 'lib/glossarist/sts/importer.rb'
29
+ - 'lib/glossarist/sts/term_mapper.rb'
21
30
 
22
- # Offense count: 2
31
+ # Offense count: 1
32
+ # This cop supports safe autocorrection (--autocorrect).
33
+ # Configuration parameters: IndentationWidth.
34
+ Layout/AssignmentIndentation:
35
+ Exclude:
36
+ - 'lib/glossarist/sts/term_mapper.rb'
37
+
38
+ # Offense count: 6
23
39
  # This cop supports safe autocorrection (--autocorrect).
24
40
  # Configuration parameters: EnforcedStyleAlignWith.
25
41
  # SupportedStylesAlignWith: either, start_of_block, start_of_line
26
42
  Layout/BlockAlignment:
27
43
  Exclude:
28
- - 'lib/glossarist/gcr_validator.rb'
44
+ - 'lib/glossarist/sts/term_extractor.rb'
45
+ - 'spec/unit/sts/term_extractor_spec.rb'
46
+ - 'spec/unit/sts/term_mapper_spec.rb'
29
47
 
30
- # Offense count: 2
48
+ # Offense count: 6
31
49
  # This cop supports safe autocorrection (--autocorrect).
32
50
  Layout/BlockEndNewline:
33
51
  Exclude:
34
- - 'lib/glossarist/gcr_validator.rb'
52
+ - 'lib/glossarist/sts/term_extractor.rb'
53
+ - 'spec/unit/sts/term_extractor_spec.rb'
54
+ - 'spec/unit/sts/term_mapper_spec.rb'
35
55
 
36
- # Offense count: 4
56
+ # Offense count: 1
57
+ # This cop supports safe autocorrection (--autocorrect).
58
+ # Configuration parameters: AllowMultipleStyles, EnforcedHashRocketStyle, EnforcedColonStyle, EnforcedLastArgumentHashStyle.
59
+ # SupportedHashRocketStyles: key, separator, table
60
+ # SupportedColonStyles: key, separator, table
61
+ # SupportedLastArgumentHashStyles: always_inspect, always_ignore, ignore_implicit, ignore_explicit
62
+ Layout/HashAlignment:
63
+ Exclude:
64
+ - 'lib/glossarist/sts/importer.rb'
65
+
66
+ # Offense count: 12
37
67
  # This cop supports safe autocorrection (--autocorrect).
38
68
  # Configuration parameters: Width, EnforcedStyleAlignWith, AllowedPatterns.
39
69
  # SupportedStylesAlignWith: start_of_line, relative_to_receiver
40
70
  Layout/IndentationWidth:
41
71
  Exclude:
42
- - 'lib/glossarist/gcr_validator.rb'
72
+ - 'lib/glossarist/sts/term_extractor.rb'
73
+ - 'spec/unit/sts/term_extractor_spec.rb'
74
+ - 'spec/unit/sts/term_mapper_spec.rb'
43
75
 
44
- # Offense count: 214
76
+ # Offense count: 236
45
77
  # This cop supports safe autocorrection (--autocorrect).
46
78
  # Configuration parameters: Max, AllowHeredoc, AllowURI, AllowQualifiedName, URISchemes, AllowRBSInlineAnnotation, AllowCopDirectives, AllowedPatterns, SplitStrings.
47
79
  # URISchemes: http, https
48
80
  Layout/LineLength:
49
81
  Enabled: false
50
82
 
51
- # Offense count: 1
83
+ # Offense count: 7
52
84
  # This cop supports safe autocorrection (--autocorrect).
53
85
  # Configuration parameters: AllowInHeredoc.
54
86
  Layout/TrailingWhitespace:
55
87
  Exclude:
56
- - 'spec/unit/gcr_package_spec.rb'
88
+ - 'lib/glossarist/sts/import_result.rb'
89
+ - 'lib/glossarist/sts/importer.rb'
90
+ - 'lib/glossarist/sts/term_mapper.rb'
57
91
 
58
92
  # Offense count: 1
59
93
  # Configuration parameters: AllowedMethods.
@@ -106,12 +140,12 @@ Metrics/CyclomaticComplexity:
106
140
  - 'lib/glossarist/transforms/concept_to_skos_transform.rb'
107
141
  - 'lib/glossarist/transforms/concept_to_tbx_transform.rb'
108
142
 
109
- # Offense count: 35
143
+ # Offense count: 47
110
144
  # Configuration parameters: CountComments, CountAsOne, AllowedMethods, AllowedPatterns.
111
145
  Metrics/MethodLength:
112
146
  Max: 42
113
147
 
114
- # Offense count: 3
148
+ # Offense count: 4
115
149
  # Configuration parameters: CountKeywordArgs, MaxOptionalParameters.
116
150
  Metrics/ParameterLists:
117
151
  Max: 6
@@ -145,7 +179,7 @@ Naming/VariableNumber:
145
179
  Exclude:
146
180
  - 'spec/unit/rdf/skos_vocabulary_spec.rb'
147
181
 
148
- # Offense count: 3
182
+ # Offense count: 9
149
183
  # This cop supports safe autocorrection (--autocorrect).
150
184
  # Configuration parameters: EnforcedStyle, ProceduralMethods, FunctionalMethods, AllowedMethods, AllowedPatterns, AllowBracesOnProceduralOneLiners, BracesRequiredMethods.
151
185
  # SupportedStyles: line_count_based, semantic, braces_for_chaining, always_braces
@@ -154,7 +188,9 @@ Naming/VariableNumber:
154
188
  # AllowedMethods: lambda, proc, it
155
189
  Style/BlockDelimiters:
156
190
  Exclude:
157
- - 'lib/glossarist/gcr_validator.rb'
191
+ - 'lib/glossarist/sts/term_extractor.rb'
192
+ - 'spec/unit/sts/term_extractor_spec.rb'
193
+ - 'spec/unit/sts/term_mapper_spec.rb'
158
194
 
159
195
  # Offense count: 6
160
196
  # This cop supports safe autocorrection (--autocorrect).
@@ -163,6 +199,12 @@ Style/BlockDelimiters:
163
199
  Style/FormatStringToken:
164
200
  EnforcedStyle: unannotated
165
201
 
202
+ # Offense count: 2
203
+ # This cop supports safe autocorrection (--autocorrect).
204
+ Style/MultilineIfModifier:
205
+ Exclude:
206
+ - 'lib/glossarist/sts/importer.rb'
207
+
166
208
  # Offense count: 1
167
209
  # Configuration parameters: AllowedClasses.
168
210
  Style/OneClassPerFile:
data/Gemfile CHANGED
@@ -6,29 +6,13 @@ gemspec
6
6
 
7
7
  gem "canon"
8
8
  gem "lutaml-model", "~> 0.8.0"
9
+ gem "nokogiri"
9
10
  gem "rake", "~> 13.0"
11
+ gem "relaton", "~> 2.1.0"
10
12
  gem "rspec", "~> 3.0"
11
13
  gem "rubocop"
12
14
  gem "rubocop-performance"
13
15
  gem "rubocop-rake"
14
16
  gem "rubocop-rspec"
17
+ gem "sts", "~> 0.5.6"
15
18
  gem "tbx", "~> 0.1"
16
-
17
- # Override relaton gems with lutaml-model 0.8 compatible versions.
18
- # Released 2.0.0 gems have untyped lutaml-model attributes that fail with 0.8+.
19
- # lutaml-integration branches have typed attributes and relaton-bib ~> 2.1.0.
20
- # TODO: Remove once relaton gems release versions with lutaml-model 0.8 support.
21
- gem "relaton-3gpp", github: "relaton/relaton-3gpp",
22
- branch: "lutaml-integration"
23
- gem "relaton-bib", github: "relaton/relaton-bib", branch: "lutaml-integration"
24
- gem "relaton-bipm", github: "relaton/relaton-bipm",
25
- branch: "lutaml-integration"
26
- gem "relaton-bsi", github: "relaton/relaton-bsi", branch: "lutaml-integration"
27
- gem "relaton-calconnect", github: "relaton/relaton-calconnect",
28
- branch: "lutaml-integration"
29
- gem "relaton-ccsds", github: "relaton/relaton-ccsds",
30
- branch: "lutaml-integration"
31
- gem "relaton-cen", github: "relaton/relaton-cen", branch: "lutaml-integration"
32
- gem "relaton-iec", github: "relaton/relaton-iec", branch: "lutaml-integration"
33
- gem "relaton-iso", github: "relaton/relaton-iso", branch: "lutaml-integration"
34
- gem "relaton-itu", github: "relaton/relaton-itu", branch: "lutaml-integration"
data/README.adoc CHANGED
@@ -507,6 +507,123 @@ puts skos.to_jsonld
507
507
  puts skos.to_turtle
508
508
  ----
509
509
 
510
+ === import
511
+
512
+ Import terminology concepts from STS XML files into a new or existing dataset.
513
+
514
+ [,bash]
515
+ ----
516
+ # Import one or more STS XML files into a new dataset directory
517
+ glossarist import iso-8373.xml -o output_dir
518
+
519
+ # Import into a new GCR package (--shortname and --version required)
520
+ glossarist import iso-8373.xml -o iso-8373.gcr \
521
+ --shortname iso-8373 --version 1.0.0 --title "ISO 8373 Robotics"
522
+
523
+ # Import multiple files into a new dataset
524
+ glossarist import iso-8373.xml iso-9000.xml -o combined_dataset
525
+
526
+ # Import into an existing dataset (dedup by designation + domain)
527
+ glossarist import iso-8373.xml --into existing_dataset/
528
+
529
+ # Import into an existing GCR (re-packages automatically)
530
+ glossarist import iso-8373.xml --into existing.gcr
531
+
532
+ # Control duplicate handling
533
+ glossarist import iso-8373.xml --into existing_dataset/ --on-duplicate replace
534
+ ----
535
+
536
+ Deduplication is based on **designation + domain** (case-insensitive). When
537
+ duplicates are found, the `--on-duplicate` strategy determines the behavior:
538
+
539
+ [cols="1,2"]
540
+ |===
541
+ |`skip` (default)
542
+ |Keep the existing concept, skip the new one
543
+
544
+ |`replace`
545
+ |Replace the existing concept with the new one
546
+
547
+ |`merge`
548
+ |Add new localizations to the existing concept (e.g. add French to an English-only concept)
549
+ |===
550
+
551
+ Options:
552
+ [cols="1,1"]
553
+ |===
554
+ |o, --output
555
+ |Output directory or `.gcr` file path (new dataset)
556
+
557
+ |--into
558
+ |Path to existing dataset directory or `.gcr` file to merge into
559
+
560
+ |--shortname
561
+ |Dataset shortname (required for GCR output)
562
+
563
+ |--version
564
+ |Dataset version (required for GCR output)
565
+
566
+ |--title
567
+ |Dataset title
568
+
569
+ |--description
570
+ |Dataset description
571
+
572
+ |--owner
573
+ |Dataset owner
574
+
575
+ |--uri-prefix
576
+ |URI prefix for the dataset
577
+
578
+ |--on-duplicate
579
+ |How to handle duplicates: `skip`, `replace`, or `merge`
580
+ |===
581
+
582
+ Ruby API:
583
+ [,ruby]
584
+ ----
585
+ require "glossarist/sts"
586
+
587
+ importer = Glossarist::Sts::Importer.new
588
+
589
+ # Import into a new dataset directory
590
+ result = importer.import_new(
591
+ ["iso-8373.xml", "iso-9000.xml"],
592
+ output: "output_dir",
593
+ )
594
+ puts result.concepts.length # total concepts imported
595
+ puts result.conflicts.length # duplicates detected
596
+ puts result.skipped_count # skipped (strategy: skip)
597
+
598
+ # Import into a new GCR package
599
+ result = importer.import_new(
600
+ ["iso-8373.xml"],
601
+ output: "iso-8373.gcr",
602
+ shortname: "iso-8373",
603
+ version: "1.0.0",
604
+ title: "ISO 8373 Robotics Vocabulary",
605
+ )
606
+
607
+ # Import into an existing dataset with merge strategy
608
+ importer = Glossarist::Sts::Importer.new(duplicate_strategy: :merge)
609
+ result = importer.import_into_existing(
610
+ ["french_supplement.xml"],
611
+ "existing_dataset/",
612
+ )
613
+ result.concepts.each do |mc|
614
+ puts "#{mc.data.id}: #{mc.localizations.keys.join(', ')}"
615
+ end
616
+ ----
617
+
618
+ ==== Import result
619
+
620
+ `import_new` and `import_into_existing` return an `ImportResult` with:
621
+
622
+ concepts:: `Array<ManagedConcept>` — the imported concepts
623
+ conflicts:: `Array<DuplicateConflict>` — duplicate pairs detected by designation + domain
624
+ source_files:: `Array<String>` — the input file paths
625
+ skipped_count:: `Integer` — concepts skipped due to duplicates (strategy: skip)
626
+
510
627
  === validate
511
628
 
512
629
  Validate a dataset directory or `.gcr` file for schema compliance.
@@ -0,0 +1,54 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Glossarist
4
+ class CLI
5
+ class ImportCommand
6
+ def initialize(files, options)
7
+ @files = files
8
+ @options = options
9
+ end
10
+
11
+ def run
12
+ importer = Sts::Importer.new(
13
+ duplicate_strategy: @options[:on_duplicate]&.to_sym || :skip,
14
+ )
15
+
16
+ result = if @options[:into]
17
+ importer.import_into_existing(@files, @options[:into])
18
+ else
19
+ importer.import_new(@files, **import_new_args)
20
+ end
21
+
22
+ print_summary(result)
23
+ rescue ArgumentError => e
24
+ warn "Error: #{e.message}"
25
+ exit 1
26
+ end
27
+
28
+ private
29
+
30
+ def import_new_args
31
+ {
32
+ output: @options[:output],
33
+ shortname: @options[:shortname],
34
+ version: @options[:version],
35
+ title: @options[:title],
36
+ description: @options[:description],
37
+ owner: @options[:owner],
38
+ uri_prefix: @options[:uri_prefix],
39
+ }
40
+ end
41
+
42
+ def print_summary(result) # rubocop:disable Metrics/AbcSize
43
+ dest = @options[:into] || @options[:output]
44
+ puts "Imported #{result.concepts.length} concepts to #{dest}"
45
+ puts " Source files: #{@files.join(', ')}" if @files.any?
46
+ return unless result.conflict?
47
+
48
+ puts " #{result.conflicts.length} duplicate(s) detected " \
49
+ "(strategy: #{@options[:on_duplicate] || 'skip'})"
50
+ puts " #{result.skipped_count} concept(s) skipped" if result.skipped_count.positive?
51
+ end
52
+ end
53
+ end
54
+ end
@@ -4,6 +4,11 @@ require "thor"
4
4
 
5
5
  module Glossarist
6
6
  class CLI < Thor
7
+ autoload :UpgradeCommand, "#{__dir__}/cli/upgrade_command"
8
+ autoload :PackageCommand, "#{__dir__}/cli/package_command"
9
+ autoload :ValidateCommand, "#{__dir__}/cli/validate_command"
10
+ autoload :ImportCommand, "#{__dir__}/cli/import_command"
11
+ autoload :ExportCommand, "#{__dir__}/cli/export_command"
7
12
  desc "generate_latex", "Convert Concepts to Latex format"
8
13
 
9
14
  option :concepts_path, aliases: :p, required: true,
@@ -38,8 +43,7 @@ module Glossarist
38
43
  option :dry_run, type: :boolean, default: false,
39
44
  desc: "Show what would change without writing"
40
45
  def upgrade(source_dir)
41
- require_relative "cli/upgrade_command"
42
- Glossarist::CLI::UpgradeCommand.new(source_dir, options).run
46
+ CLI::UpgradeCommand.new(source_dir, options).run
43
47
  end
44
48
 
45
49
  desc "package DIR", "Create a .gcr ZIP archive from a schema v1 dataset"
@@ -62,8 +66,7 @@ module Glossarist
62
66
  option :concept_uri_template, type: :string,
63
67
  desc: "URI template for concept URIs"
64
68
  def package(dir)
65
- require_relative "cli/package_command"
66
- Glossarist::CLI::PackageCommand.new(dir, options).run
69
+ CLI::PackageCommand.new(dir, options).run
67
70
  end
68
71
 
69
72
  desc "validate PATH",
@@ -76,8 +79,27 @@ module Glossarist
76
79
  option :reference_path, type: :string,
77
80
  desc: "Path to directory of .gcr files for cross-dataset reference validation"
78
81
  def validate(path)
79
- require_relative "cli/validate_command"
80
- Glossarist::CLI::ValidateCommand.new(path, options).run
82
+ CLI::ValidateCommand.new(path, options).run
83
+ end
84
+
85
+ desc "import FILES...", "Import terms from STS XML files"
86
+ option :output, aliases: :o, type: :string,
87
+ desc: "Output directory or .gcr file path (new dataset)"
88
+ option :into, type: :string,
89
+ desc: "Path to existing dataset directory or .gcr file to merge into"
90
+ option :shortname, type: :string,
91
+ desc: "Dataset shortname (required for GCR output)"
92
+ option :version, type: :string,
93
+ desc: "Dataset version (required for GCR output)"
94
+ option :title, type: :string, desc: "Dataset title"
95
+ option :description, type: :string, desc: "Dataset description"
96
+ option :owner, type: :string, desc: "Dataset owner"
97
+ option :uri_prefix, type: :string, desc: "URI prefix for the dataset"
98
+ option :on_duplicate, type: :string, default: "skip",
99
+ enum: %w[skip replace merge],
100
+ desc: "How to handle duplicate concepts (designation + domain)"
101
+ def import(*files)
102
+ CLI::ImportCommand.new(files, options).run
81
103
  end
82
104
 
83
105
  desc "export PATH", "Export concepts in machine-readable formats"
@@ -95,8 +117,7 @@ module Glossarist
95
117
  option :title, type: :string,
96
118
  desc: "Dataset title for document header"
97
119
  def export(path)
98
- require_relative "cli/export_command"
99
- Glossarist::CLI::ExportCommand.new(path, options).run
120
+ CLI::ExportCommand.new(path, options).run
100
121
  end
101
122
 
102
123
  def method_missing(*args)
@@ -1,5 +1,4 @@
1
- require_relative "base"
2
- require_relative "grammar_info"
1
+ # frozen_string_literal: true
3
2
 
4
3
  module Glossarist
5
4
  module Designation
@@ -1,4 +1,4 @@
1
- require_relative "symbol"
1
+ # frozen_string_literal: true
2
2
 
3
3
  module Glossarist
4
4
  module Designation
@@ -1,4 +1,4 @@
1
- require_relative "localized_concept"
1
+ # frozen_string_literal: true
2
2
 
3
3
  module Glossarist
4
4
  class ManagedConcept < Lutaml::Model::Serializable
@@ -2,7 +2,6 @@
2
2
 
3
3
  require "lutaml/turtle"
4
4
  require "lutaml/jsonld"
5
- require_relative "../rdf"
6
5
 
7
6
  module Glossarist
8
7
  module Rdf
@@ -2,7 +2,6 @@
2
2
 
3
3
  require "lutaml/turtle"
4
4
  require "lutaml/jsonld"
5
- require_relative "../rdf"
6
5
 
7
6
  module Glossarist
8
7
  module Rdf
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Glossarist
4
+ module Sts
5
+ ExtractedDesignation = Struct.new(
6
+ :term,
7
+ :type,
8
+ :normative_status,
9
+ :part_of_speech,
10
+ :abbreviation_type,
11
+ keyword_init: true,
12
+ )
13
+ end
14
+ end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Glossarist
4
+ module Sts
5
+ ExtractedLangSet = Struct.new(
6
+ :language_code,
7
+ :definition_text,
8
+ :note_texts,
9
+ :example_texts,
10
+ :source_texts,
11
+ :domain,
12
+ :designations,
13
+ keyword_init: true,
14
+ )
15
+ end
16
+ end
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Glossarist
4
+ module Sts
5
+ ExtractedTerm = Struct.new(
6
+ :id,
7
+ :label,
8
+ :source_ref,
9
+ :lang_sets,
10
+ keyword_init: true,
11
+ )
12
+ end
13
+ end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Glossarist
4
+ module Sts
5
+ DuplicateConflict = Struct.new(:new_concept, :existing_concept, :key,
6
+ keyword_init: true)
7
+
8
+ class ImportResult
9
+ attr_reader :concepts, :conflicts, :source_files, :skipped_count
10
+
11
+ def initialize(concepts:, conflicts: [], source_files: [],
12
+ skipped_count: 0)
13
+ @concepts = concepts
14
+ @conflicts = conflicts
15
+ @source_files = source_files
16
+ @skipped_count = skipped_count
17
+ end
18
+
19
+ def conflict?
20
+ !conflicts.empty?
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,253 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "tmpdir"
4
+ require_relative "import_result"
5
+
6
+ module Glossarist
7
+ module Sts
8
+ class Importer
9
+ STRATEGIES = %i[skip replace merge].freeze
10
+
11
+ attr_reader :duplicate_strategy
12
+
13
+ def initialize(duplicate_strategy: :skip)
14
+ unless STRATEGIES.include?(duplicate_strategy)
15
+ raise ArgumentError,
16
+ "duplicate_strategy must be one of #{STRATEGIES.join(', ')}, got #{duplicate_strategy}"
17
+ end
18
+
19
+ @duplicate_strategy = duplicate_strategy
20
+ @mapper = TermMapper.new
21
+ end
22
+
23
+ def import_new(xml_files, output:, shortname: nil, version: nil, **opts)
24
+ raw_concepts = extract_all_concepts(xml_files)
25
+ concepts, conflicts, skipped = dedup_concepts(raw_concepts)
26
+
27
+ if output.end_with?(".gcr")
28
+ unless shortname
29
+ raise ArgumentError,
30
+ "--shortname is required for GCR output"
31
+ end
32
+ unless version
33
+ raise ArgumentError,
34
+ "--version is required for GCR output"
35
+ end
36
+
37
+ create_gcr(concepts, output, shortname: shortname, version: version,
38
+ **opts)
39
+ else
40
+ save_dataset(concepts, output)
41
+ end
42
+
43
+ ImportResult.new(
44
+ concepts: concepts,
45
+ conflicts: conflicts,
46
+ source_files: xml_files,
47
+ skipped_count: skipped,
48
+ )
49
+ end
50
+
51
+ def import_into_existing(xml_files, dataset_path)
52
+ existing = load_existing(dataset_path)
53
+ new_concepts = extract_all_concepts(xml_files)
54
+ index = build_concept_index(existing)
55
+
56
+ result_state = apply_with_dedup(new_concepts, existing, index)
57
+
58
+ save_to_path(existing, dataset_path)
59
+
60
+ ImportResult.new(
61
+ concepts: existing.managed_concepts,
62
+ conflicts: result_state.conflicts,
63
+ source_files: xml_files,
64
+ skipped_count: result_state.skipped,
65
+ )
66
+ end
67
+
68
+ DedupState = Struct.new(:conflicts, :skipped, keyword_init: true)
69
+
70
+ private
71
+
72
+ def apply_with_dedup(new_concepts, existing, index)
73
+ state = DedupState.new(conflicts: [], skipped: 0)
74
+
75
+ new_concepts.each do |mc|
76
+ key = concept_key(mc)
77
+ existing_mc = index[key]
78
+
79
+ if existing_mc.nil?
80
+ existing.store(mc)
81
+ index[key] = mc
82
+ else
83
+ state.conflicts << DuplicateConflict.new(
84
+ new_concept: mc, existing_concept: existing_mc, key: key,
85
+ )
86
+ handle_duplicate(existing, existing_mc, mc, index, key, state)
87
+ end
88
+ end
89
+
90
+ state
91
+ end
92
+
93
+ def handle_duplicate(existing, old_mc, new_mc, index, key, state)
94
+ case duplicate_strategy
95
+ when :skip
96
+ state.skipped += 1
97
+ when :replace
98
+ replace_in_collection(existing, old_mc, new_mc)
99
+ index[key] = new_mc
100
+ when :merge
101
+ merge_concept(old_mc, new_mc)
102
+ end
103
+ end
104
+
105
+ def extract_all_concepts(xml_files)
106
+ xml_files.flat_map do |path|
107
+ extractor = TermExtractor.new(path)
108
+ terms = extractor.extract
109
+ terms.map { |t| @mapper.map(t) }
110
+ end
111
+ end
112
+
113
+ def dedup_concepts(concepts) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity
114
+ seen = {}
115
+ conflicts = []
116
+ skipped = 0
117
+ unique = []
118
+
119
+ concepts.each do |mc|
120
+ key = concept_key(mc)
121
+ if key.first.empty? || seen[key].nil?
122
+ unique << mc
123
+ seen[key] = mc unless key.first.empty?
124
+ else
125
+ conflicts << DuplicateConflict.new(
126
+ new_concept: mc, existing_concept: seen[key], key: key,
127
+ )
128
+ skipped += apply_dedup_to_unique(unique, seen, mc, key)
129
+ end
130
+ end
131
+
132
+ [unique, conflicts, skipped]
133
+ end
134
+
135
+ def apply_dedup_to_unique(unique, seen, new_mc, key)
136
+ case duplicate_strategy
137
+ when :skip
138
+ 1
139
+ when :replace
140
+ unique.delete(seen[key])
141
+ unique << new_mc
142
+ seen[key] = new_mc
143
+ 0
144
+ when :merge
145
+ merge_concept(seen[key], new_mc)
146
+ 0
147
+ end
148
+ end
149
+
150
+ def concept_key(managed_concept)
151
+ designation = managed_concept.default_designation.to_s.downcase.strip
152
+ domain = begin
153
+ l10n = managed_concept.default_lang
154
+ l10n&.data&.domain.to_s.downcase.strip
155
+ end
156
+ [designation, domain]
157
+ end
158
+
159
+ def build_concept_index(collection)
160
+ index = {}
161
+ collection.each do |mc|
162
+ key = concept_key(mc)
163
+ index[key] = mc unless key.first.empty?
164
+ end
165
+ index
166
+ end
167
+
168
+ def merge_concept(existing_mc, new_mc)
169
+ new_mc.localizations.each do |l10n|
170
+ lang = l10n.language_code
171
+ if existing_mc.localization(lang).nil?
172
+ existing_mc.add_localization(l10n)
173
+ end
174
+ end
175
+ end
176
+
177
+ def replace_in_collection(collection, old_mc, new_mc)
178
+ collection.managed_concepts.delete(old_mc)
179
+ collection.store(new_mc)
180
+ end
181
+
182
+ def load_existing(path)
183
+ collection = ManagedConceptCollection.new
184
+ if path.end_with?(".gcr")
185
+ package = GcrPackage.load(path)
186
+ package.concepts.each { |mc| collection.store(mc) }
187
+ else
188
+ concepts = ConceptCollector.collect(path)
189
+ concepts.each { |mc| collection.store(mc) }
190
+ end
191
+ collection
192
+ end
193
+
194
+ def save_to_path(collection, path)
195
+ if path.end_with?(".gcr")
196
+ tmpdir = build_temp_dataset(collection.managed_concepts)
197
+ begin
198
+ GC.start
199
+ tmp_gcr = "#{path}.tmp.#{Process.pid}"
200
+ GcrPackage.create_from_directory(
201
+ tmpdir,
202
+ output: tmp_gcr,
203
+ shortname: File.basename(path, ".gcr"),
204
+ version: "1.0.0",
205
+ )
206
+ FileUtils.rm_f(path)
207
+ FileUtils.mv(tmp_gcr, path)
208
+ ensure
209
+ FileUtils.rm_rf(tmpdir)
210
+ FileUtils.rm_f(tmp_gcr)
211
+ end
212
+ else
213
+ save_dataset(collection.managed_concepts, path)
214
+ end
215
+ end
216
+
217
+ def save_dataset(concepts, dir)
218
+ concepts_dir = File.join(dir, "concepts")
219
+ FileUtils.mkdir_p(concepts_dir)
220
+ collection = ManagedConceptCollection.new
221
+ concepts.each { |mc| collection.store(mc) }
222
+ collection.save_grouped_concepts_to_files(concepts_dir)
223
+ end
224
+
225
+ def create_gcr(concepts, output, shortname:, version:, **opts)
226
+ tmpdir = build_temp_dataset(concepts)
227
+ begin
228
+ GcrPackage.create_from_directory(
229
+ tmpdir,
230
+ output: output,
231
+ shortname: shortname,
232
+ version: version,
233
+ **opts,
234
+ )
235
+ ensure
236
+ FileUtils.rm_rf(tmpdir)
237
+ end
238
+ end
239
+
240
+ def build_temp_dataset(concepts)
241
+ tmpdir = Dir.mktmpdir("glossarist-sts-import")
242
+ concepts_dir = File.join(tmpdir, "concepts")
243
+ FileUtils.mkdir_p(concepts_dir)
244
+
245
+ collection = ManagedConceptCollection.new
246
+ concepts.each { |mc| collection.store(mc) }
247
+ collection.save_grouped_concepts_to_files(concepts_dir)
248
+
249
+ tmpdir
250
+ end
251
+ end
252
+ end
253
+ end
@@ -0,0 +1,186 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Glossarist
4
+ module Sts
5
+ class TermExtractor
6
+ def initialize(xml_path)
7
+ raw = File.read(xml_path)
8
+ @standard = ::Sts::IsoSts::Standard.from_xml(raw)
9
+ @source_ref = extract_source_ref
10
+ end
11
+
12
+ def extract
13
+ term_secs = collect_term_secs
14
+ term_secs.filter_map do |ts|
15
+ next unless ts.term_entry
16
+
17
+ build_extracted_term(ts)
18
+ end
19
+ end
20
+
21
+ private
22
+
23
+ def collect_term_secs
24
+ secs = []
25
+ walk_sections(@standard.body, secs) if @standard.body
26
+ secs
27
+ end
28
+
29
+ def walk_sections(container, collected)
30
+ collect_term_secs_from(container, collected)
31
+ walk_child_secs(container, collected)
32
+ end
33
+
34
+ def collect_term_secs_from(container, collected)
35
+ secs = container.term_sec
36
+ secs&.each do |ts|
37
+ collected << ts
38
+ walk_sections(ts, collected) if ts.term_sec&.any?
39
+ end
40
+ end
41
+
42
+ def walk_child_secs(container, collected)
43
+ secs = container_child_secs(container)
44
+ secs&.each { |s| walk_sections(s, collected) }
45
+ end
46
+
47
+ def container_child_secs(container)
48
+ case container
49
+ when ::Sts::IsoSts::Body, ::Sts::IsoSts::Sec
50
+ container.sec
51
+ end
52
+ end
53
+
54
+ def build_extracted_term(term_sec)
55
+ entry = term_sec.term_entry
56
+ label_text = extract_label(term_sec)
57
+
58
+ lang_sets = entry.lang_set.filter_map do |ls|
59
+ build_lang_set(ls)
60
+ end
61
+
62
+ Sts::ExtractedTerm.new(
63
+ id: entry.id,
64
+ label: label_text,
65
+ source_ref: @source_ref,
66
+ lang_sets: lang_sets,
67
+ )
68
+ end
69
+
70
+ def extract_label(term_sec)
71
+ label = term_sec.label
72
+ return nil unless label
73
+
74
+ label.content&.join.to_s.strip
75
+ end
76
+
77
+ def build_lang_set(lang_set) # rubocop:disable Metrics/AbcSize
78
+ lang_code = Sts.convert_language_code(lang_set.lang.to_s)
79
+
80
+ Sts::ExtractedLangSet.new(
81
+ language_code: lang_code,
82
+ definition_text: extract_definition_text(lang_set),
83
+ note_texts: extract_note_texts(lang_set),
84
+ example_texts: extract_example_texts(lang_set),
85
+ source_texts: extract_source_texts(lang_set),
86
+ domain: extract_subject_field(lang_set),
87
+ designations: lang_set.tig.filter_map do |tig|
88
+ build_designation(tig)
89
+ end,
90
+ )
91
+ end
92
+
93
+ def extract_definition_text(lang_set)
94
+ definitions = lang_set.definition
95
+ return "" unless definitions&.any?
96
+
97
+ definitions.first.value&.join.to_s.strip
98
+ end
99
+
100
+ def extract_note_texts(lang_set)
101
+ lang_set.note.filter_map do |n|
102
+ text = n.value&.join.to_s.strip
103
+ text unless text.empty?
104
+ end
105
+ end
106
+
107
+ def extract_example_texts(lang_set)
108
+ lang_set.example.filter_map do |e|
109
+ text = e.value&.join.to_s.strip
110
+ text unless text.empty?
111
+ end
112
+ end
113
+
114
+ def extract_source_texts(lang_set)
115
+ lang_set.source.filter_map do |s|
116
+ text = s.value&.join.to_s.strip
117
+ text unless text.empty?
118
+ end
119
+ end
120
+
121
+ def extract_subject_field(lang_set)
122
+ fields = lang_set.subject_field
123
+ return nil unless fields&.any?
124
+
125
+ text = fields.first.value&.join.to_s.strip
126
+ text unless text.empty?
127
+ end
128
+
129
+ def build_designation(tig)
130
+ Sts::ExtractedDesignation.new(
131
+ term: resolve_term_text(tig),
132
+ type: map_term_type(tig),
133
+ normative_status: map_normative_status(tig),
134
+ part_of_speech: tig.pos&.value,
135
+ abbreviation_type: map_abbreviation_type(tig),
136
+ )
137
+ end
138
+
139
+ def resolve_term_text(tig)
140
+ tig.term&.value&.join.to_s.strip
141
+ end
142
+
143
+ def map_term_type(tig)
144
+ raw = tig.term_type&.value.to_s
145
+ mapped = TERM_TYPE_MAP[raw]
146
+ mapped.nil? || raw.empty? ? "expression" : mapped
147
+ end
148
+
149
+ def map_abbreviation_type(tig)
150
+ raw = tig.term_type&.value.to_s
151
+ return nil unless TERM_TYPE_MAP[raw] == "abbreviation"
152
+
153
+ raw == "acronym" ? "acronym" : "truncation"
154
+ end
155
+
156
+ def map_normative_status(tig)
157
+ NORMATIVE_STATUS_MAP[tig.normative_authorization&.value.to_s]
158
+ end
159
+
160
+ def extract_source_ref # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
161
+ front = @standard.front
162
+ return nil unless front
163
+
164
+ meta = front.iso_meta || front.std_meta
165
+ return nil unless meta
166
+
167
+ refs = meta.std_ref
168
+ return nil unless refs&.any?
169
+
170
+ best_ref = refs.find { |r| r.type == "dated" } ||
171
+ refs.find { |r| r.type == "undated" } ||
172
+ refs.first
173
+
174
+ extract_ref_text(best_ref)
175
+ end
176
+
177
+ def extract_ref_text(ref)
178
+ if ref.value.is_a?(String)
179
+ ref.value.to_s.strip
180
+ else
181
+ ref.content&.join.to_s.strip
182
+ end
183
+ end
184
+ end
185
+ end
186
+ end
@@ -0,0 +1,118 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Glossarist
4
+ module Sts
5
+ class TermMapper
6
+ def map(extracted_term)
7
+ concept_id = extracted_term.label || extracted_term.id
8
+
9
+ mc = Glossarist::ManagedConcept.new(data: { id: concept_id })
10
+
11
+ extracted_term.lang_sets.each do |ls|
12
+ mc.add_localization(build_localized_concept(ls,
13
+ extracted_term.source_ref))
14
+ end
15
+
16
+ mc
17
+ end
18
+
19
+ private
20
+
21
+ def build_localized_concept(lang_set, source_ref)
22
+ terms = lang_set.designations.map { |d| build_designation(d) }
23
+
24
+ Glossarist::LocalizedConcept.of_yaml(
25
+ "data" => {
26
+ "language_code" => lang_set.language_code,
27
+ "terms" => terms,
28
+ "definition" => build_definitions(lang_set.definition_text),
29
+ "notes" => build_detailed_definitions(lang_set.note_texts),
30
+ "examples" => build_detailed_definitions(lang_set.example_texts),
31
+ "sources" => build_sources(lang_set.source_texts, source_ref),
32
+ "domain" => lang_set.domain,
33
+ "entry_status" => "valid",
34
+ },
35
+ )
36
+ end
37
+
38
+ def build_definitions(text)
39
+ return [] unless text && !text.empty?
40
+
41
+ [{ "content" => text }]
42
+ end
43
+
44
+ def build_detailed_definitions(texts)
45
+ texts.filter_map do |text|
46
+ next if text.empty?
47
+
48
+ { "content" => text }
49
+ end
50
+ end
51
+
52
+ def build_designation(ext_desig)
53
+ case ext_desig.type
54
+ when "abbreviation"
55
+ build_abbreviation_designation(ext_desig)
56
+ when "symbol"
57
+ build_symbol_designation(ext_desig)
58
+ else
59
+ build_expression_designation(ext_desig)
60
+ end
61
+ end
62
+
63
+ def build_expression_designation(ext_desig)
64
+ hash = {
65
+ "type" => "expression",
66
+ "designation" => ext_desig.term,
67
+ "normative_status" => ext_desig.normative_status,
68
+ }.compact
69
+
70
+ if ext_desig.part_of_speech
71
+ hash["grammar_info"] =
72
+ [{ "part_of_speech" => ext_desig.part_of_speech }]
73
+ end
74
+
75
+ hash
76
+ end
77
+
78
+ def build_abbreviation_designation(ext_desig)
79
+ {
80
+ "type" => "abbreviation",
81
+ "designation" => ext_desig.term,
82
+ "normative_status" => ext_desig.normative_status,
83
+ "abbreviation_type" => ext_desig.abbreviation_type,
84
+ }.compact
85
+ end
86
+
87
+ def build_symbol_designation(ext_desig)
88
+ {
89
+ "type" => "symbol",
90
+ "designation" => ext_desig.term,
91
+ "normative_status" => ext_desig.normative_status,
92
+ }.compact
93
+ end
94
+
95
+ def build_sources(source_texts, source_ref)
96
+ sources = []
97
+ if source_ref
98
+ sources << {
99
+ "status" => "identical",
100
+ "type" => "authoritative",
101
+ "origin" => { "text" => source_ref },
102
+ }
103
+ end
104
+
105
+ source_texts.each do |text|
106
+ next if text.empty?
107
+
108
+ sources << {
109
+ "type" => "authoritative",
110
+ "origin" => { "text" => text },
111
+ }
112
+ end
113
+
114
+ sources
115
+ end
116
+ end
117
+ end
118
+ end
@@ -0,0 +1,87 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "sts"
4
+
5
+ module Glossarist
6
+ module Sts
7
+ autoload :ExtractedDesignation, "#{__dir__}/sts/extracted_designation"
8
+ autoload :ExtractedLangSet, "#{__dir__}/sts/extracted_lang_set"
9
+ autoload :ExtractedTerm, "#{__dir__}/sts/extracted_term"
10
+ autoload :ImportResult, "#{__dir__}/sts/import_result"
11
+ autoload :Importer, "#{__dir__}/sts/importer"
12
+ autoload :TermExtractor, "#{__dir__}/sts/term_extractor"
13
+ autoload :TermMapper, "#{__dir__}/sts/term_mapper"
14
+
15
+ ISO_639_1_TO_639_2 = {
16
+ "aa" => "aar", "ab" => "abk", "af" => "afr", "ak" => "aka",
17
+ "am" => "amh", "an" => "arg", "ar" => "ara", "as" => "asm",
18
+ "av" => "ava", "ay" => "aym", "az" => "aze", "ba" => "bak",
19
+ "be" => "bel", "bg" => "bul", "bh" => "bih", "bi" => "bis",
20
+ "bm" => "bam", "bn" => "ben", "bo" => "bod", "br" => "bre",
21
+ "bs" => "bos", "ca" => "cat", "ce" => "che", "ch" => "cha",
22
+ "co" => "cos", "cr" => "cre", "cs" => "ces", "cu" => "chu",
23
+ "cv" => "chv", "cy" => "cym", "da" => "dan", "de" => "deu",
24
+ "dv" => "div", "dz" => "dzo", "ee" => "ewe", "el" => "ell",
25
+ "en" => "eng", "eo" => "epo", "es" => "spa", "et" => "est",
26
+ "eu" => "eus", "fa" => "fas", "ff" => "ful", "fi" => "fin",
27
+ "fj" => "fij", "fo" => "fao", "fr" => "fra", "fy" => "fry",
28
+ "ga" => "gle", "gd" => "gla", "gl" => "glg", "gn" => "grn",
29
+ "gu" => "guj", "gv" => "glv", "ha" => "hau", "he" => "heb",
30
+ "hi" => "hin", "ho" => "hmo", "hr" => "hrv", "ht" => "hat",
31
+ "hu" => "hun", "hy" => "hye", "hz" => "her", "ia" => "ina",
32
+ "id" => "ind", "ie" => "ile", "ig" => "ibo", "ii" => "iii",
33
+ "ik" => "ipk", "io" => "ido", "is" => "isl", "it" => "ita",
34
+ "iu" => "iku", "ja" => "jpn", "jv" => "jav", "ka" => "kat",
35
+ "kg" => "kon", "ki" => "kik", "kj" => "kua", "kk" => "kaz",
36
+ "kl" => "kal", "km" => "khm", "kn" => "kan", "ko" => "kor",
37
+ "kr" => "kau", "ks" => "kas", "ku" => "kur", "kv" => "kom",
38
+ "kw" => "cor", "ky" => "kir", "la" => "lat", "lb" => "ltz",
39
+ "lg" => "lug", "li" => "lim", "ln" => "lin", "lo" => "lao",
40
+ "lt" => "lit", "lu" => "lub", "lv" => "lav", "mg" => "mlg",
41
+ "mh" => "mah", "mi" => "mri", "mk" => "mkd", "ml" => "mal",
42
+ "mn" => "mon", "mr" => "mar", "ms" => "msa", "mt" => "mlt",
43
+ "my" => "mya", "na" => "nau", "nb" => "nob", "nd" => "nde",
44
+ "ne" => "nep", "ng" => "ndo", "nl" => "nld", "nn" => "nno",
45
+ "no" => "nor", "nr" => "nbl", "nv" => "nav", "ny" => "nya",
46
+ "oc" => "oci", "oj" => "oji", "om" => "orm", "or" => "ori",
47
+ "os" => "oss", "pa" => "pan", "pi" => "pli", "pl" => "pol",
48
+ "ps" => "pus", "pt" => "por", "qu" => "que", "rm" => "roh",
49
+ "rn" => "run", "ro" => "ron", "ru" => "rus", "rw" => "kin",
50
+ "sa" => "san", "sc" => "srd", "sd" => "snd", "se" => "sme",
51
+ "sg" => "sag", "si" => "sin", "sk" => "slk", "sl" => "slv",
52
+ "sm" => "smo", "sn" => "sna", "so" => "som", "sq" => "sqi",
53
+ "sr" => "srp", "ss" => "ssw", "st" => "sot", "su" => "sun",
54
+ "sv" => "swe", "sw" => "swa", "ta" => "tam", "te" => "tel",
55
+ "tg" => "tgk", "th" => "tha", "ti" => "tir", "tk" => "tuk",
56
+ "tl" => "tgl", "tn" => "tsn", "to" => "ton", "tr" => "tur",
57
+ "ts" => "tso", "tt" => "tat", "tw" => "twi", "ty" => "tah",
58
+ "ug" => "uig", "uk" => "ukr", "ur" => "urd", "uz" => "uzb",
59
+ "ve" => "ven", "vi" => "vie", "vo" => "vol", "wa" => "wln",
60
+ "wo" => "wol", "xh" => "xho", "yi" => "yid", "yo" => "yor",
61
+ "za" => "zha", "zh" => "zho", "zu" => "zul"
62
+ }.freeze
63
+
64
+ TERM_TYPE_MAP = {
65
+ "acronym" => "abbreviation",
66
+ "abbreviation" => "abbreviation",
67
+ "fullForm" => "expression",
68
+ "symbol" => "symbol",
69
+ "variant" => "expression",
70
+ "equation" => "expression",
71
+ "formula" => "expression",
72
+ }.freeze
73
+
74
+ NORMATIVE_STATUS_MAP = {
75
+ "preferredTerm" => "preferred",
76
+ "admittedTerm" => "admitted",
77
+ "deprecatedTerm" => "deprecated",
78
+ }.freeze
79
+
80
+ def self.convert_language_code(code)
81
+ return code if code.nil?
82
+ return code if code.length == 3
83
+
84
+ ISO_639_1_TO_639_2[code] || code
85
+ end
86
+ end
87
+ end
@@ -1,7 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative "../rdf"
4
-
5
3
  module Glossarist
6
4
  module Transforms
7
5
  class ConceptToSkosTransform
@@ -4,5 +4,5 @@
4
4
  #
5
5
 
6
6
  module Glossarist
7
- VERSION = "2.6.2"
7
+ VERSION = "2.6.3"
8
8
  end
data/lib/glossarist.rb CHANGED
@@ -7,14 +7,13 @@ require "psych"
7
7
  require "thor"
8
8
  require "lutaml/model"
9
9
 
10
- require_relative "glossarist/glossary_definition"
11
-
12
10
  module Glossarist
13
11
  autoload :Asset, "glossarist/asset"
14
12
  autoload :Citation, "glossarist/citation"
15
13
  autoload :CLI, "glossarist/cli"
16
14
  autoload :CollectionConfig, "glossarist/collection_config"
17
15
  autoload :Collection, "glossarist/collection"
16
+ autoload :Collections, "glossarist/collections"
18
17
  autoload :Concept, "glossarist/concept"
19
18
  autoload :ConceptData, "glossarist/concept_data"
20
19
  autoload :ConceptReference, "glossarist/concept_reference"
@@ -35,10 +34,10 @@ module Glossarist
35
34
  autoload :DetailedDefinition, "glossarist/detailed_definition"
36
35
  autoload :Designation, "glossarist/designation"
37
36
  autoload :Error, "glossarist/error"
38
- autoload :GcrPackage, "glossarist/gcr_package"
39
- autoload :GcrMetadata, "glossarist/gcr_metadata"
40
- autoload :GcrStatistics, "glossarist/gcr_statistics"
41
- autoload :GcrValidator, "glossarist/gcr_validator"
37
+ autoload :GcrPackage, "glossarist/gcr_package"
38
+ autoload :GcrMetadata, "glossarist/gcr_metadata"
39
+ autoload :GcrStatistics, "glossarist/gcr_statistics"
40
+ autoload :GcrValidator, "glossarist/gcr_validator"
42
41
  autoload :InvalidTypeError, "glossarist/error/invalid_type_error"
43
42
  autoload :InvalidLanguageCodeError,
44
43
  "glossarist/error/invalid_language_code_error"
@@ -52,16 +51,20 @@ module Glossarist
52
51
  autoload :ManagedConceptData, "glossarist/managed_concept_data"
53
52
  autoload :NonVerbRep, "glossarist/non_verb_rep"
54
53
  autoload :RelatedConcept, "glossarist/related_concept"
54
+ autoload :Rdf, "glossarist/rdf"
55
+ autoload :Sts, "glossarist/sts"
56
+ autoload :Transforms, "glossarist/transforms"
55
57
  autoload :SchemaMigration, "glossarist/schema_migration"
56
58
  autoload :UrnResolver, "glossarist/urn_resolver"
57
59
  autoload :Utilities, "glossarist/utilities"
58
- autoload :RegisterData, "glossarist/register_data"
60
+ autoload :RegisterData, "glossarist/register_data"
59
61
  autoload :ValidationResult, "glossarist/validation_result"
60
62
  autoload :V1, "glossarist/v1"
61
63
  end
62
64
 
63
65
  require_relative "glossarist/version"
64
66
  require_relative "glossarist/collections"
67
+ require_relative "glossarist/glossary_definition"
65
68
 
66
69
  module Glossarist
67
70
  LANG_CODES = %w[eng ara deu fra spa ita jpn kor pol por srp swe zho rus fin
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: glossarist
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.6.2
4
+ version: 2.6.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2026-05-07 00:00:00.000000000 Z
11
+ date: 2026-05-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: lutaml-model
@@ -122,6 +122,7 @@ files:
122
122
  - lib/glossarist/citation.rb
123
123
  - lib/glossarist/cli.rb
124
124
  - lib/glossarist/cli/export_command.rb
125
+ - lib/glossarist/cli/import_command.rb
125
126
  - lib/glossarist/cli/package_command.rb
126
127
  - lib/glossarist/cli/upgrade_command.rb
127
128
  - lib/glossarist/cli/validate_command.rb
@@ -192,6 +193,14 @@ files:
192
193
  - lib/glossarist/resolution_adapter/remote.rb
193
194
  - lib/glossarist/resolution_adapter/route.rb
194
195
  - lib/glossarist/schema_migration.rb
196
+ - lib/glossarist/sts.rb
197
+ - lib/glossarist/sts/extracted_designation.rb
198
+ - lib/glossarist/sts/extracted_lang_set.rb
199
+ - lib/glossarist/sts/extracted_term.rb
200
+ - lib/glossarist/sts/import_result.rb
201
+ - lib/glossarist/sts/importer.rb
202
+ - lib/glossarist/sts/term_extractor.rb
203
+ - lib/glossarist/sts/term_mapper.rb
195
204
  - lib/glossarist/transforms.rb
196
205
  - lib/glossarist/transforms/concept_to_skos_transform.rb
197
206
  - lib/glossarist/transforms/concept_to_tbx_transform.rb