glossarist 2.5.0 → 2.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -0
  3. data/.rubocop_todo.yml +50 -146
  4. data/CLAUDE.md +33 -7
  5. data/Gemfile +20 -19
  6. data/README.adoc +383 -7
  7. data/TODO.integration/01-gcr-package-cli.md +180 -0
  8. data/exe/glossarist +1 -53
  9. data/glossarist.gemspec +1 -0
  10. data/lib/glossarist/asset.rb +1 -1
  11. data/lib/glossarist/citation.rb +1 -1
  12. data/lib/glossarist/cli/package_command.rb +32 -0
  13. data/lib/glossarist/cli/upgrade_command.rb +34 -0
  14. data/lib/glossarist/cli/validate_command.rb +56 -0
  15. data/lib/glossarist/cli.rb +105 -0
  16. data/lib/glossarist/collection_config.rb +23 -0
  17. data/lib/glossarist/collections.rb +15 -8
  18. data/lib/glossarist/concept.rb +1 -1
  19. data/lib/glossarist/concept_collector.rb +153 -0
  20. data/lib/glossarist/concept_data.rb +3 -1
  21. data/lib/glossarist/concept_date.rb +1 -1
  22. data/lib/glossarist/concept_document.rb +29 -0
  23. data/lib/glossarist/concept_enricher.rb +34 -0
  24. data/lib/glossarist/concept_manager.rb +31 -49
  25. data/lib/glossarist/concept_reference.rb +45 -0
  26. data/lib/glossarist/concept_source.rb +1 -1
  27. data/lib/glossarist/concept_validator.rb +101 -0
  28. data/lib/glossarist/custom_locality.rb +1 -1
  29. data/lib/glossarist/dataset_validator.rb +69 -0
  30. data/lib/glossarist/designation/abbreviation.rb +1 -1
  31. data/lib/glossarist/designation/base.rb +11 -4
  32. data/lib/glossarist/designation/expression.rb +1 -1
  33. data/lib/glossarist/designation/grammar_info.rb +1 -1
  34. data/lib/glossarist/designation/graphical_symbol.rb +1 -1
  35. data/lib/glossarist/designation/letter_symbol.rb +1 -1
  36. data/lib/glossarist/designation/symbol.rb +2 -2
  37. data/lib/glossarist/detailed_definition.rb +1 -1
  38. data/lib/glossarist/gcr_metadata.rb +87 -0
  39. data/lib/glossarist/gcr_package.rb +223 -0
  40. data/lib/glossarist/gcr_statistics.rb +35 -0
  41. data/lib/glossarist/gcr_validator.rb +98 -0
  42. data/lib/glossarist/locality.rb +1 -1
  43. data/lib/glossarist/localized_concept.rb +12 -1
  44. data/lib/glossarist/managed_concept.rb +1 -1
  45. data/lib/glossarist/managed_concept_data.rb +5 -2
  46. data/lib/glossarist/non_verb_rep.rb +1 -1
  47. data/lib/glossarist/reference_extractor.rb +227 -0
  48. data/lib/glossarist/reference_resolver.rb +169 -0
  49. data/lib/glossarist/register_data.rb +39 -0
  50. data/lib/glossarist/related_concept.rb +1 -1
  51. data/lib/glossarist/resolution_adapter/local.rb +73 -0
  52. data/lib/glossarist/resolution_adapter/package.rb +22 -0
  53. data/lib/glossarist/resolution_adapter/remote.rb +60 -0
  54. data/lib/glossarist/resolution_adapter/route.rb +34 -0
  55. data/lib/glossarist/resolution_adapter.rb +14 -0
  56. data/lib/glossarist/schema_migration.rb +334 -0
  57. data/lib/glossarist/urn_resolver.rb +71 -0
  58. data/lib/glossarist/v1/concept.rb +81 -0
  59. data/lib/glossarist/v1/cross_references.rb +41 -0
  60. data/lib/glossarist/v1/register.rb +50 -0
  61. data/lib/glossarist/v1.rb +9 -0
  62. data/lib/glossarist/validation_result.rb +38 -0
  63. data/lib/glossarist/version.rb +1 -1
  64. data/lib/glossarist.rb +29 -4
  65. data/relaton-bib-2.0.0.gem +0 -0
  66. data/relaton-bib-2.1.0.gem +0 -0
  67. data/relaton-cen-2.0.0.gem +0 -0
  68. data/relaton-iec-2.0.0.gem +0 -0
  69. data/relaton-iso-2.0.0.gem +0 -0
  70. data/relaton-itu-2.0.0.gem +0 -0
  71. metadata +60 -7
data/README.adoc CHANGED
@@ -344,30 +344,406 @@ modification:: A description of the modification to the cited definition of the
344
344
 
345
345
  == Commands
346
346
 
347
- `generate_latex`:: Convert Concepts to Latex format
347
+ === generate_latex
348
+
349
+ Convert Concepts to Latex format.
348
350
 
349
- === Usage:
350
351
  [,bash]
351
352
  ----
352
- glossarist generate_latex p, --concepts-path=CONCEPTS_PATH
353
+ glossarist generate_latex -p PATH_TO_CONCEPTS
353
354
  ----
354
355
 
355
- === Options:
356
+ Options:
356
357
  [cols="1,1"]
357
358
  |===
358
359
  |p, --concepts-path
359
360
  |Path to yaml concepts directory
360
361
 
361
362
  |l, --latex-concepts
362
- |File path having list of concepts that should be converted to LATEX format. If not provided all the concepts will be converted to the latex format
363
+ |File path having list of concepts that should be converted to LATEX format
363
364
 
364
365
  |o, --output-file
365
- |Output file path. By default the output will pe printed to the console
366
+ |Output file path
366
367
 
367
368
  |e, --extra-attributes
368
- |List of extra attributes that are not in standard Glossarist Concept model. eg -e one two three
369
+ |List of extra attributes that are not in standard Glossarist Concept model
370
+ |===
371
+
372
+ === package
373
+
374
+ Create a `.gcr` ZIP archive from a concept dataset.
375
+
376
+ [,bash]
377
+ ----
378
+ glossarist package DIR -o output.gcr --shortname mydataset --version 1.0.0 --uri-prefix urn:iso:std:iso:19111
379
+ ----
380
+
381
+ Options:
382
+ [cols="1,1"]
383
+ |===
384
+ |o, --output (required)
385
+ |Output `.gcr` file path
386
+
387
+ |--shortname (required)
388
+ |Machine-readable dataset shortname (e.g. `iev`, `iso19111`)
389
+
390
+ |--version (required)
391
+ |Semantic version (e.g. `1.0.0`)
392
+
393
+ |--title
394
+ |Human-readable dataset title
395
+
396
+ |--description
397
+ |Dataset description
398
+
399
+ |--owner
400
+ |Dataset owner
401
+
402
+ |--register-yaml
403
+ |Path to register.yaml to include in package
404
+
405
+ |--uri-prefix
406
+ |URI namespace this dataset provides (e.g. `urn:iec:std:iec:60050`)
407
+
408
+ |--tags
409
+ |Tags for the dataset
410
+ |===
411
+
412
+ Ruby API:
413
+ [,ruby]
414
+ ----
415
+ GcrPackage.create_from_directory(
416
+ "path/to/dataset",
417
+ output: "output.gcr",
418
+ shortname: "mydataset",
419
+ version: "1.0.0",
420
+ uri_prefix: "urn:iso:std:iso:19111",
421
+ )
422
+ ----
423
+
424
+ === validate
425
+
426
+ Validate a dataset directory or `.gcr` file for schema compliance.
427
+
428
+ [,bash]
429
+ ----
430
+ glossarist validate PATH
431
+ glossarist validate PATH --reference-path path/to/gcrs/
432
+ ----
433
+
434
+ Options:
435
+ [cols="1,1"]
436
+ |===
437
+ |--strict
438
+ |Treat warnings as errors
439
+
440
+ |--format
441
+ |Output format: `text`, `json`, or `yaml`
442
+
443
+ |--reference-path
444
+ |Path to directory of `.gcr` files for cross-dataset reference validation
445
+ |===
446
+
447
+ Ruby API:
448
+ [,ruby]
449
+ ----
450
+ result = DatasetValidator.new.validate("path/to/dataset")
451
+ result = DatasetValidator.new.validate("path/to/dataset", reference_path: "gcrs/")
452
+ result.valid? # => true/false
453
+ result.errors # => [...]
454
+ result.warnings # => [...]
455
+ ----
456
+
457
+ === upgrade
458
+
459
+ Upgrade a dataset to the current schema version.
460
+
461
+ [,bash]
462
+ ----
463
+ glossarist upgrade SOURCE_DIR -o OUTPUT_DIR
464
+ ----
465
+
466
+ == Glossarist Concept Repository (GCR)
467
+
468
+ A **GCR** (Glossarist Concept Repository) is a distributable, versioned ZIP archive containing glossary concepts and metadata. GCR packages are created from v2 datasets.
469
+
470
+ === GCR Package Format
471
+
472
+ A `.gcr` file is a ZIP archive with the following structure:
473
+
474
+ ----
475
+ metadata.yaml # Package metadata
476
+ register.yaml # Optional register information
477
+ concepts/ # Concept YAML files
478
+ 102-01-01.yaml
479
+ 200.yaml
480
+ ----
481
+
482
+ === Creating a GCR Package
483
+
484
+ CLI:
485
+ [,bash]
486
+ ----
487
+ glossarist package path/to/v2-dataset -o mydataset-1.0.0.gcr \
488
+ --shortname mydataset --version 1.0.0 --uri-prefix urn:iso:std:iso:19111
489
+ ----
490
+
491
+ Ruby API:
492
+ [,ruby]
493
+ ----
494
+ GcrPackage.create_from_directory(
495
+ "path/to/v2-dataset",
496
+ output: "mydataset-1.0.0.gcr",
497
+ shortname: "mydataset",
498
+ version: "1.0.0",
499
+ uri_prefix: "urn:iso:std:iso:19111",
500
+ title: "My Dataset",
501
+ description: "A terminology dataset",
502
+ )
503
+ ----
504
+
505
+ === Loading a GCR Package
506
+
507
+ [,ruby]
508
+ ----
509
+ pkg = GcrPackage.load("mydataset-1.0.0.gcr")
510
+ pkg.metadata # => Hash with metadata fields
511
+ pkg.concepts # => Array of concept hashes
512
+ ----
513
+
514
+ === GCR Metadata
515
+
516
+ Metadata fields in `metadata.yaml`:
517
+
518
+ [cols="1,1"]
519
+ |===
520
+ |shortname
521
+ |Machine-readable dataset identifier (e.g. `iev`)
522
+
523
+ |version
524
+ |Semantic version (e.g. `1.0.0`)
525
+
526
+ |title
527
+ |Human-readable title
528
+
529
+ |description
530
+ |Dataset description
531
+
532
+ |owner
533
+ |Dataset owner
534
+
535
+ |tags
536
+ |Array of tags
537
+
538
+ |concept_count
539
+ |Number of concepts in the package
540
+
541
+ |languages
542
+ |Array of language codes present
543
+
544
+ |created_at
545
+ |ISO 8601 timestamp of package creation
546
+
547
+ |glossarist_version
548
+ |Version of the Glossarist gem used
549
+
550
+ |schema_version
551
+ |Schema version of the package format
552
+
553
+ |uri_prefix
554
+ |URI namespace this dataset provides (e.g. `urn:iec:std:iec:60050`)
555
+
556
+ |external_references
557
+ |Array of `{uri: "..."}` for URI namespaces this dataset references
558
+ |===
559
+
560
+ === GCR Statistics
561
+
562
+ [,ruby]
563
+ ----
564
+ stats = GcrStatistics.from_concepts(concepts)
565
+ stats.total_concepts # => 150
566
+ stats.languages # => ["eng", "fra", "deu"]
567
+ stats.concepts_by_status # => { "valid" => 140, "draft" => 10 }
568
+ stats.concepts_with_definitions # => 148
569
+ stats.concepts_with_sources # => 130
570
+ ----
571
+
572
+ == Concept Mentions
573
+
574
+ Concepts can reference other concepts within the same dataset (intra-set) or in different datasets (inter-set) using inline mention syntax. All mentions use double braces `{{...}}`.
575
+
576
+ === Syntax
577
+
578
+ The concept mention syntax mirrors HTML `<a href="id">display_text</a>` — the display text is independent of the target concept's canonical designation.
579
+
580
+ [cols="1,2,2,2"]
581
+ |===
582
+ |Form |Syntax |Example |Resolution
583
+
584
+ |ID only
585
+ |`{{ID}}`
586
+ |`{{200}}`
587
+ |Intra-set: concept 200, auto-display
588
+
589
+ |ID + display
590
+ |`{{TEXT, ID}}`
591
+ |`{{geodetic latitude, 200}}`
592
+ |Intra-set: concept 200, custom display
593
+
594
+ |Designation
595
+ |`{{TEXT}}`
596
+ |`{{geodetic latitude}}`
597
+ |Intra-set: find by designation
598
+
599
+ |URN + display
600
+ |`{{TEXT, URN}}`
601
+ |`{{equality, urn:iec:std:iec:60050-102-01-01}}`
602
+ |Inter-set: resolve by URN
603
+
604
+ |URN only
605
+ |`{{URN}}`
606
+ |`{{urn:iec:std:iec:60050-102-01-01}}`
607
+ |Inter-set: resolve URN, auto-display
369
608
  |===
370
609
 
610
+ === URN Schemes
611
+
612
+ IEC URN (IEV):: `urn:iec:std:iec:60050-{code}` — source is `urn:iec:std:iec:60050`, concept_id is the IEV code
613
+ ISO URN (RFC 5141):: `urn:iso:std:iso:{std}:...:term:{id}` — source is `urn:iso:std:iso:{std}`, concept_id is the term ID
614
+
615
+ === Extracting Mentions (Ruby API)
616
+
617
+ [,ruby]
618
+ ----
619
+ extractor = ReferenceExtractor.new
620
+
621
+ # From a text string
622
+ refs = extractor.extract_from_text("See {{equality, urn:iec:std:iec:60050-102-01-01}} and {{lat, 200}}")
623
+ # => [ConceptReference(term: "equality", concept_id: "102-01-01",
624
+ # source: "urn:iec:std:iec:60050", ref_type: "urn"),
625
+ # ConceptReference(term: "lat", concept_id: "200",
626
+ # source: nil, ref_type: "local")]
627
+
628
+ # From all text fields in a localized concept
629
+ refs = extractor.extract_from_localized(lc_hash)
630
+
631
+ # From all language blocks in a concept
632
+ refs = extractor.extract_from_concept_hash(concept_hash)
633
+ ----
634
+
635
+ === Resolving Mentions (Ruby API)
636
+
637
+ Resolution uses an adapter chain: route overrides → local → package → remote.
638
+
639
+ [,ruby]
640
+ ----
641
+ resolver = ReferenceResolver.new
642
+
643
+ # Register the current dataset for intra-set resolution
644
+ resolver.register_self(concepts)
645
+
646
+ # Register co-loaded GCRs with their URI prefixes
647
+ resolver.register_package(iev_concepts, uri_prefix: "urn:iec:std:iec:60050")
648
+ resolver.register_package(iso_concepts, uri_prefix: "urn:iso:std:iso:19111")
649
+
650
+ # Add URI route overrides (e.g. author used wrong URI)
651
+ resolver.add_route(from: "urn:iso:std:iso:19115", to: "urn:iso:std:iso:19111")
652
+
653
+ # Resolve a single reference
654
+ ref = ConceptReference.new(term: "equality", concept_id: "102-01-01",
655
+ source: "urn:iec:std:iec:60050", ref_type: "urn")
656
+ resolver.resolve(ref) # => concept hash
657
+
658
+ # Validate all references in a package
659
+ result = resolver.validate_all(concepts)
660
+ result.errors # => structural errors
661
+ result.warnings # => unresolvable references
662
+ ----
663
+
664
+ === GCR Collection & Routing
665
+
666
+ When multiple GCRs are placed together in a directory, a `collection.yaml` configures resolution:
667
+
668
+ [,yaml]
669
+ ----
670
+ # collection.yaml
671
+ packages:
672
+ - file: iev-2.0.0.gcr
673
+ - file: iso19111-1.0.0.gcr
674
+
675
+ routes:
676
+ - from: "urn:iso:std:iso:19115"
677
+ to: "urn:iso:std:iso:19111"
678
+
679
+ remote:
680
+ - uri_prefix: "urn:iec:std:iec:60050"
681
+ endpoint: "https://vocabulary.example.org/api/concepts"
682
+ ----
683
+
684
+ [,ruby]
685
+ ----
686
+ resolver = ReferenceResolver.new
687
+ resolver.load_collection("path/to/gcr_collection/")
688
+ # Packages auto-registered with their uri_prefix from metadata
689
+ # Route overrides applied
690
+ # Remote endpoints registered
691
+ ----
692
+
693
+ === Resolution Adapters
694
+
695
+ The resolution framework uses a chain of adapters, each implementing `resolve(reference) → concept_hash | nil`:
696
+
697
+ LocalAdapter:: Resolves intra-set references by concept ID or designation lookup
698
+ PackageAdapter:: Resolves inter-set references by matching `source` URI to a GCR's `uri_prefix`
699
+ RouteAdapter:: Remaps incorrect source URIs before delegation
700
+ RemoteAdapter:: Resolves via HTTP to an online GCR endpoint
701
+
702
+ === URN-to-HTTP Resolution
703
+
704
+ Concept mentions rendered as hyperlinks need HTTP URLs. The `UrnResolver` converts URNs to their canonical web locations:
705
+
706
+ [,ruby]
707
+ ----
708
+ # Class-level convenience
709
+ url = UrnResolver.resolve("urn:iec:std:iec:60050-102-01-01")
710
+ # => "https://www.electropedia.org/iev/iev.nsf/display?openform&ievref=102-01-01"
711
+
712
+ url = UrnResolver.resolve("urn:iso:std:iso:19111:ed-3:v1:en:term:3.1.32")
713
+ # => "https://www.iso.org/obp/ui/#iso:std:iso:19111:ed-3:v1:en:term:3.1.32"
714
+
715
+ # Also accepts ConceptReference objects
716
+ ref = ConceptReference.new(term: "equality", concept_id: "102-01-01",
717
+ source: "urn:iec:std:iec:60050", ref_type: "urn")
718
+ url = UrnResolver.resolve(ref)
719
+ # => "https://www.electropedia.org/iev/iev.nsf/display?openform&ievref=102-01-01"
720
+ ----
721
+
722
+ Built-in mappings:
723
+
724
+ [cols="1,1,1"]
725
+ |===
726
+ |URN Prefix |Target |Example URL
727
+
728
+ |`urn:iec:std:iec:60050-*`
729
+ |IEC Electropedia
730
+ |`electropedia.org/iev/iev.nsf/display?openform&ievref=102-01-01`
731
+
732
+ |`urn:iso:*`
733
+ |ISO Online Browsing Platform
734
+ |`iso.org/obp/ui/#iso:std:iso:19111:term:3.1.32`
735
+ |===
736
+
737
+ Register custom schemes:
738
+
739
+ [,ruby]
740
+ ----
741
+ resolver = UrnResolver.new
742
+ resolver.register_scheme("urn:example:") do |urn|
743
+ "https://example.org/concepts/#{urn.sub('urn:example:', '')}"
744
+ end
745
+ ----
746
+
371
747
  == Credits
372
748
 
373
749
  This gem is developed, maintained and funded by
@@ -0,0 +1,180 @@
1
+ # 01 — GCR Packaging CLI with Versioning
2
+
3
+ ## Goal
4
+
5
+ The `glossarist` Ruby gem provides the canonical way to build versioned GCR packages from concept datasets. Each glossary repo uses `glossarist package` in CI to publish GCR releases.
6
+
7
+ ## Current State
8
+
9
+ - CLI has `package`, `upgrade`, `validate` commands (via Thor)
10
+ - `GcrPackage.create` / `GcrPackage.load` handle ZIP I/O
11
+ - `GcrMetadata` generates metadata with statistics
12
+ - `SchemaMigration` handles v0→v1 harmonization
13
+ - Missing: `shortname` and `version` fields in GcrMetadata
14
+ - Missing: v2 format support in `PackageCommand`
15
+
16
+ ## Status
17
+
18
+ Tasks 1–5 are **implemented** on branch `fix/gemfile-lutaml-model-substring`. Remaining: task 6 (publish gem to RubyGems).
19
+
20
+ ## Tasks
21
+
22
+ ### 1. Add `shortname` and `version` to `GcrMetadata`
23
+
24
+ Edit `lib/glossarist/gcr_metadata.rb`:
25
+
26
+ ```ruby
27
+ attr_accessor :shortname, :version, :title, :description, :owner, :tags,
28
+ :concept_count, :languages,
29
+ :created_at, :glossarist_version, :schema_version,
30
+ :statistics, :homepage, :repository, :license
31
+
32
+ def initialize(attrs = {})
33
+ @shortname = attrs[:shortname]
34
+ @version = attrs[:version]
35
+ # ... existing fields ...
36
+ end
37
+
38
+ def self.from_concepts(concepts, register_data: nil, options: {})
39
+ stats = GcrStatistics.from_concepts(concepts)
40
+ new(
41
+ shortname: options[:shortname],
42
+ version: options[:version],
43
+ title: options[:title] || register_data&.dig("name"),
44
+ # ... existing fields ...
45
+ )
46
+ end
47
+
48
+ def to_h
49
+ h = {
50
+ "shortname" => shortname,
51
+ "version" => version,
52
+ "title" => title,
53
+ # ... existing fields ...
54
+ }
55
+ h.compact
56
+ end
57
+ ```
58
+
59
+ ### 2. Add `--shortname` and `--version` CLI options
60
+
61
+ Edit `lib/glossarist/cli.rb`:
62
+
63
+ ```ruby
64
+ desc "package DIR", "Create a .gcr ZIP archive from a dataset"
65
+ option :output, aliases: :o, required: true, desc: "Output .gcr file path"
66
+ option :shortname, type: :string, required: true, desc: "Machine-readable dataset ID"
67
+ option :version, type: :string, required: true, desc: "Semantic version (e.g. 1.0.0)"
68
+ option :title, type: :string, desc: "Dataset title"
69
+ option :description, type: :string, desc: "Dataset description"
70
+ option :owner, type: :string, desc: "Dataset owner"
71
+ option :register_yaml, type: :string, desc: "Path to register.yaml"
72
+ option :tags, type: :array, desc: "Tags for the dataset"
73
+ def package(dir)
74
+ # ...
75
+ end
76
+ ```
77
+
78
+ ### 3. Add v2 format support to `PackageCommand`
79
+
80
+ Edit `lib/glossarist/cli/package_command.rb`:
81
+
82
+ ```ruby
83
+ def collect_concepts
84
+ if v1_concepts?
85
+ collect_v1_concepts
86
+ elsif v2_concepts?
87
+ collect_v2_concepts
88
+ else
89
+ []
90
+ end
91
+ end
92
+
93
+ def v1_concepts?
94
+ concepts_dir = File.join(@dir, "concepts")
95
+ File.directory?(concepts_dir) && Dir.glob(File.join(concepts_dir, "*.yaml")).any?
96
+ end
97
+
98
+ def v2_concepts?
99
+ File.directory?(File.join(@dir, "geolexica-v2"))
100
+ end
101
+
102
+ def collect_v2_concepts
103
+ collection = Glossarist::ManagedConceptCollection.new
104
+ manager = Glossarist::ConceptManager.new(path: File.join(@dir, "geolexica-v2"))
105
+ manager.load_from_files(collection: collection)
106
+
107
+ collection.map { |concept| concept_to_v1_hash(concept) }
108
+ end
109
+
110
+ def concept_to_v1_hash(concept)
111
+ hash = { "termid" => concept.data.id.to_s }
112
+ concept.localizations.each do |lang, l10n|
113
+ hash[lang] = localized_to_hash(l10n)
114
+ end
115
+ hash["term"] = preferred_designation(hash["eng"]&.dig("terms")) || ""
116
+ hash
117
+ end
118
+
119
+ def localized_to_hash(l10n)
120
+ h = {}
121
+ h["terms"] = l10n.designations.map(&:to_h) if l10n.designations.any?
122
+ h["definition"] = l10n.definition.map { |d| { "content" => d.content } } if l10n.definition.any?
123
+ h["notes"] = l10n.notes.map { |n| { "content" => n.content } } if l10n.notes.any?
124
+ h["examples"] = l10n.examples.map { |e| { "content" => e.content } } if l10n.examples.any?
125
+ h["sources"] = l10n.sources.map(&:to_h) if l10n.sources.any?
126
+ h["language_code"] = l10n.language_code if l10n.language_code
127
+ h["entry_status"] = l10n.entry_status if l10n.entry_status
128
+ h["dates"] = l10n.dates.map(&:to_h) if l10n.dates.any?
129
+ h
130
+ end
131
+ ```
132
+
133
+ ### 4. Auto-derive shortname from directory name
134
+
135
+ If `--shortname` is not provided, derive from:
136
+ 1. `register.yaml` → `register["shortname"]` or `register["id"]`
137
+ 2. Directory basename
138
+ 3. Raise error if none available
139
+
140
+ ### 5. Validate filename matches metadata
141
+
142
+ In `GcrPackage.validate`, check that the filename pattern `{shortname}-{version}.gcr` matches the `shortname` and `version` in metadata.yaml.
143
+
144
+ ### 6. Publish gem
145
+
146
+ ```bash
147
+ gem build glossarist.gemspec
148
+ gem push glossarist-2.6.0.gem
149
+ ```
150
+
151
+ ## CLI Usage
152
+
153
+ ```bash
154
+ # Install
155
+ gem install glossarist
156
+
157
+ # Package (v1 format)
158
+ glossarist package ./isotc204-glossary \
159
+ --shortname isotc204 --version 1.0.0 \
160
+ -o isotc204-1.0.0.gcr \
161
+ --title "ISO/TC 204 ITS Vocabulary" --owner "ISO/TC 204"
162
+
163
+ # Package (v2 format, auto-detected)
164
+ glossarist package ./isotc211-glossary \
165
+ --shortname isotc211 --version 2.3.0 \
166
+ -o isotc211-2.3.0.gcr \
167
+ --title "ISO/TC 211 Multi-Lingual Glossary" --owner "ISO/TC 211"
168
+
169
+ # Validate
170
+ glossarist validate isotc204-1.0.0.gcr
171
+ ```
172
+
173
+ ## Acceptance Criteria
174
+
175
+ - [x] `GcrMetadata` includes `shortname` and `version` fields
176
+ - [x] `glossarist package --shortname X --version Y` produces `{X}-{Y}.gcr`
177
+ - [x] `metadata.yaml` contains `shortname` and `version`
178
+ - [x] `glossarist validate` checks metadata has required fields
179
+ - [x] Works with both v1 (`concepts/*.yaml`) and v2 (`geolexica-v2/*.yaml`) datasets
180
+ - [ ] Gem published to RubyGems
data/exe/glossarist CHANGED
@@ -1,56 +1,4 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
3
  require_relative "../lib/glossarist"
4
-
5
- class GlossaristCommand < Thor
6
- desc "generate_latex", "Convert Concepts to Latex format"
7
-
8
- option :concepts_path, aliases: :p, required: true,
9
- desc: "Path to yaml concepts directory"
10
- option :latex_concepts, aliases: :l,
11
- desc: "File path having list of concepts that should be converted to LATEX format. If not provided all the concepts will be converted to the latex format"
12
- option :output_file, aliases: :o,
13
- desc: "Output file path. By default the output will pe printed to the console"
14
- option :extra_attributes, aliases: :e, type: :array,
15
- desc: "List of extra attributes that are not in standard Glossarist Concept model"
16
- def generate_latex
17
- assets = []
18
- latex_concepts_file = options[:latex_concepts]
19
-
20
- if options[:extra_attributes]
21
- Glossarist.configure do |config|
22
- config.register_extension_attributes(options[:extra_attributes])
23
- end
24
- end
25
-
26
- concept_set = Glossarist::ConceptSet.new(options[:concepts_path], assets)
27
- latex_str = concept_set.to_latex(latex_concepts_file)
28
- output_latex(latex_str)
29
- end
30
-
31
- def output_latex(latex_str)
32
- output_file_path = options[:output_file]
33
-
34
- if output_file_path
35
- File.open(output_file_path, "w") { |file| file.puts latex_str }
36
- else
37
- puts latex_str
38
- end
39
- end
40
-
41
- def method_missing(*args)
42
- warn "No method found named: #{args[0]}"
43
- warn "Run with `--help` or `-h` to see available options"
44
- exit 1
45
- end
46
-
47
- def respond_to_missing?
48
- true
49
- end
50
-
51
- def self.exit_on_failure?
52
- true
53
- end
54
- end
55
-
56
- GlossaristCommand.start(ARGV)
4
+ Glossarist::CLI.start(ARGV)
data/glossarist.gemspec CHANGED
@@ -33,5 +33,6 @@ Gem::Specification.new do |spec|
33
33
 
34
34
  spec.add_dependency "lutaml-model", "~> 0.8"
35
35
  spec.add_dependency "relaton", ">= 2.0.0", "< 3"
36
+ spec.add_dependency "rubyzip", ">= 2.3", "< 3"
36
37
  spec.add_dependency "thor"
37
38
  end
@@ -2,7 +2,7 @@ module Glossarist
2
2
  class Asset < Lutaml::Model::Serializable
3
3
  attribute :path, :string
4
4
 
5
- yaml do
5
+ key_value do
6
6
  map :path, to: :path
7
7
  end
8
8
 
@@ -34,7 +34,7 @@ module Glossarist
34
34
 
35
35
  attribute :custom_locality, CustomLocality, collection: true
36
36
 
37
- yaml do
37
+ key_value do
38
38
  map :id, to: :id, with: { from: :id_from_yaml, to: :id_to_yaml }
39
39
  map :text, to: :text, with: { from: :text_from_yaml, to: :text_to_yaml }
40
40
  map :source, to: :source,
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Glossarist
4
+ class CLI
5
+ class PackageCommand
6
+ def initialize(dir, options)
7
+ @dir = dir
8
+ @options = options
9
+ end
10
+
11
+ def run
12
+ GcrPackage.create_from_directory(
13
+ @dir,
14
+ output: @options[:output],
15
+ shortname: @options[:shortname],
16
+ version: @options[:version],
17
+ title: @options[:title],
18
+ description: @options[:description],
19
+ owner: @options[:owner],
20
+ tags: @options[:tags],
21
+ register_yaml: @options[:register_yaml],
22
+ uri_prefix: @options[:uri_prefix],
23
+ )
24
+
25
+ puts "Created #{@options[:output]}"
26
+ rescue ArgumentError => e
27
+ warn "Error: #{e.message}"
28
+ exit 1
29
+ end
30
+ end
31
+ end
32
+ end