glossarist 2.5.0 → 2.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/.rubocop_todo.yml +50 -146
- data/CLAUDE.md +33 -7
- data/Gemfile +20 -19
- data/README.adoc +383 -7
- data/TODO.integration/01-gcr-package-cli.md +180 -0
- data/exe/glossarist +1 -53
- data/glossarist.gemspec +1 -0
- data/lib/glossarist/asset.rb +1 -1
- data/lib/glossarist/citation.rb +1 -1
- data/lib/glossarist/cli/package_command.rb +32 -0
- data/lib/glossarist/cli/upgrade_command.rb +34 -0
- data/lib/glossarist/cli/validate_command.rb +56 -0
- data/lib/glossarist/cli.rb +105 -0
- data/lib/glossarist/collection_config.rb +23 -0
- data/lib/glossarist/collections.rb +15 -8
- data/lib/glossarist/concept.rb +1 -1
- data/lib/glossarist/concept_collector.rb +153 -0
- data/lib/glossarist/concept_data.rb +3 -1
- data/lib/glossarist/concept_date.rb +1 -1
- data/lib/glossarist/concept_document.rb +29 -0
- data/lib/glossarist/concept_enricher.rb +34 -0
- data/lib/glossarist/concept_manager.rb +31 -49
- data/lib/glossarist/concept_reference.rb +45 -0
- data/lib/glossarist/concept_source.rb +1 -1
- data/lib/glossarist/concept_validator.rb +101 -0
- data/lib/glossarist/custom_locality.rb +1 -1
- data/lib/glossarist/dataset_validator.rb +69 -0
- data/lib/glossarist/designation/abbreviation.rb +1 -1
- data/lib/glossarist/designation/base.rb +11 -4
- data/lib/glossarist/designation/expression.rb +1 -1
- data/lib/glossarist/designation/grammar_info.rb +1 -1
- data/lib/glossarist/designation/graphical_symbol.rb +1 -1
- data/lib/glossarist/designation/letter_symbol.rb +1 -1
- data/lib/glossarist/designation/symbol.rb +2 -2
- data/lib/glossarist/detailed_definition.rb +1 -1
- data/lib/glossarist/gcr_metadata.rb +87 -0
- data/lib/glossarist/gcr_package.rb +223 -0
- data/lib/glossarist/gcr_statistics.rb +35 -0
- data/lib/glossarist/gcr_validator.rb +98 -0
- data/lib/glossarist/locality.rb +1 -1
- data/lib/glossarist/localized_concept.rb +12 -1
- data/lib/glossarist/managed_concept.rb +1 -1
- data/lib/glossarist/managed_concept_data.rb +5 -2
- data/lib/glossarist/non_verb_rep.rb +1 -1
- data/lib/glossarist/reference_extractor.rb +227 -0
- data/lib/glossarist/reference_resolver.rb +169 -0
- data/lib/glossarist/register_data.rb +39 -0
- data/lib/glossarist/related_concept.rb +1 -1
- data/lib/glossarist/resolution_adapter/local.rb +73 -0
- data/lib/glossarist/resolution_adapter/package.rb +22 -0
- data/lib/glossarist/resolution_adapter/remote.rb +60 -0
- data/lib/glossarist/resolution_adapter/route.rb +34 -0
- data/lib/glossarist/resolution_adapter.rb +14 -0
- data/lib/glossarist/schema_migration.rb +334 -0
- data/lib/glossarist/urn_resolver.rb +71 -0
- data/lib/glossarist/v1/concept.rb +81 -0
- data/lib/glossarist/v1/cross_references.rb +41 -0
- data/lib/glossarist/v1/register.rb +50 -0
- data/lib/glossarist/v1.rb +9 -0
- data/lib/glossarist/validation_result.rb +38 -0
- data/lib/glossarist/version.rb +1 -1
- data/lib/glossarist.rb +29 -4
- data/relaton-bib-2.0.0.gem +0 -0
- data/relaton-bib-2.1.0.gem +0 -0
- data/relaton-cen-2.0.0.gem +0 -0
- data/relaton-iec-2.0.0.gem +0 -0
- data/relaton-iso-2.0.0.gem +0 -0
- data/relaton-itu-2.0.0.gem +0 -0
- metadata +60 -7
data/README.adoc
CHANGED
|
@@ -344,30 +344,406 @@ modification:: A description of the modification to the cited definition of the
|
|
|
344
344
|
|
|
345
345
|
== Commands
|
|
346
346
|
|
|
347
|
-
|
|
347
|
+
=== generate_latex
|
|
348
|
+
|
|
349
|
+
Convert Concepts to Latex format.
|
|
348
350
|
|
|
349
|
-
=== Usage:
|
|
350
351
|
[,bash]
|
|
351
352
|
----
|
|
352
|
-
glossarist generate_latex p
|
|
353
|
+
glossarist generate_latex -p PATH_TO_CONCEPTS
|
|
353
354
|
----
|
|
354
355
|
|
|
355
|
-
|
|
356
|
+
Options:
|
|
356
357
|
[cols="1,1"]
|
|
357
358
|
|===
|
|
358
359
|
|p, --concepts-path
|
|
359
360
|
|Path to yaml concepts directory
|
|
360
361
|
|
|
361
362
|
|l, --latex-concepts
|
|
362
|
-
|File path having list of concepts that should be converted to LATEX format
|
|
363
|
+
|File path having list of concepts that should be converted to LATEX format
|
|
363
364
|
|
|
364
365
|
|o, --output-file
|
|
365
|
-
|Output file path
|
|
366
|
+
|Output file path
|
|
366
367
|
|
|
367
368
|
|e, --extra-attributes
|
|
368
|
-
|List of extra attributes that are not in standard Glossarist Concept model
|
|
369
|
+
|List of extra attributes that are not in standard Glossarist Concept model
|
|
370
|
+
|===
|
|
371
|
+
|
|
372
|
+
=== package
|
|
373
|
+
|
|
374
|
+
Create a `.gcr` ZIP archive from a concept dataset.
|
|
375
|
+
|
|
376
|
+
[,bash]
|
|
377
|
+
----
|
|
378
|
+
glossarist package DIR -o output.gcr --shortname mydataset --version 1.0.0 --uri-prefix urn:iso:std:iso:19111
|
|
379
|
+
----
|
|
380
|
+
|
|
381
|
+
Options:
|
|
382
|
+
[cols="1,1"]
|
|
383
|
+
|===
|
|
384
|
+
|o, --output (required)
|
|
385
|
+
|Output `.gcr` file path
|
|
386
|
+
|
|
387
|
+
|--shortname (required)
|
|
388
|
+
|Machine-readable dataset shortname (e.g. `iev`, `iso19111`)
|
|
389
|
+
|
|
390
|
+
|--version (required)
|
|
391
|
+
|Semantic version (e.g. `1.0.0`)
|
|
392
|
+
|
|
393
|
+
|--title
|
|
394
|
+
|Human-readable dataset title
|
|
395
|
+
|
|
396
|
+
|--description
|
|
397
|
+
|Dataset description
|
|
398
|
+
|
|
399
|
+
|--owner
|
|
400
|
+
|Dataset owner
|
|
401
|
+
|
|
402
|
+
|--register-yaml
|
|
403
|
+
|Path to register.yaml to include in package
|
|
404
|
+
|
|
405
|
+
|--uri-prefix
|
|
406
|
+
|URI namespace this dataset provides (e.g. `urn:iec:std:iec:60050`)
|
|
407
|
+
|
|
408
|
+
|--tags
|
|
409
|
+
|Tags for the dataset
|
|
410
|
+
|===
|
|
411
|
+
|
|
412
|
+
Ruby API:
|
|
413
|
+
[,ruby]
|
|
414
|
+
----
|
|
415
|
+
GcrPackage.create_from_directory(
|
|
416
|
+
"path/to/dataset",
|
|
417
|
+
output: "output.gcr",
|
|
418
|
+
shortname: "mydataset",
|
|
419
|
+
version: "1.0.0",
|
|
420
|
+
uri_prefix: "urn:iso:std:iso:19111",
|
|
421
|
+
)
|
|
422
|
+
----
|
|
423
|
+
|
|
424
|
+
=== validate
|
|
425
|
+
|
|
426
|
+
Validate a dataset directory or `.gcr` file for schema compliance.
|
|
427
|
+
|
|
428
|
+
[,bash]
|
|
429
|
+
----
|
|
430
|
+
glossarist validate PATH
|
|
431
|
+
glossarist validate PATH --reference-path path/to/gcrs/
|
|
432
|
+
----
|
|
433
|
+
|
|
434
|
+
Options:
|
|
435
|
+
[cols="1,1"]
|
|
436
|
+
|===
|
|
437
|
+
|--strict
|
|
438
|
+
|Treat warnings as errors
|
|
439
|
+
|
|
440
|
+
|--format
|
|
441
|
+
|Output format: `text`, `json`, or `yaml`
|
|
442
|
+
|
|
443
|
+
|--reference-path
|
|
444
|
+
|Path to directory of `.gcr` files for cross-dataset reference validation
|
|
445
|
+
|===
|
|
446
|
+
|
|
447
|
+
Ruby API:
|
|
448
|
+
[,ruby]
|
|
449
|
+
----
|
|
450
|
+
result = DatasetValidator.new.validate("path/to/dataset")
|
|
451
|
+
result = DatasetValidator.new.validate("path/to/dataset", reference_path: "gcrs/")
|
|
452
|
+
result.valid? # => true/false
|
|
453
|
+
result.errors # => [...]
|
|
454
|
+
result.warnings # => [...]
|
|
455
|
+
----
|
|
456
|
+
|
|
457
|
+
=== upgrade
|
|
458
|
+
|
|
459
|
+
Upgrade a dataset to the current schema version.
|
|
460
|
+
|
|
461
|
+
[,bash]
|
|
462
|
+
----
|
|
463
|
+
glossarist upgrade SOURCE_DIR -o OUTPUT_DIR
|
|
464
|
+
----
|
|
465
|
+
|
|
466
|
+
== Glossarist Concept Repository (GCR)
|
|
467
|
+
|
|
468
|
+
A **GCR** (Glossarist Concept Repository) is a distributable, versioned ZIP archive containing glossary concepts and metadata. GCR packages are created from v2 datasets.
|
|
469
|
+
|
|
470
|
+
=== GCR Package Format
|
|
471
|
+
|
|
472
|
+
A `.gcr` file is a ZIP archive with the following structure:
|
|
473
|
+
|
|
474
|
+
----
|
|
475
|
+
metadata.yaml # Package metadata
|
|
476
|
+
register.yaml # Optional register information
|
|
477
|
+
concepts/ # Concept YAML files
|
|
478
|
+
102-01-01.yaml
|
|
479
|
+
200.yaml
|
|
480
|
+
----
|
|
481
|
+
|
|
482
|
+
=== Creating a GCR Package
|
|
483
|
+
|
|
484
|
+
CLI:
|
|
485
|
+
[,bash]
|
|
486
|
+
----
|
|
487
|
+
glossarist package path/to/v2-dataset -o mydataset-1.0.0.gcr \
|
|
488
|
+
--shortname mydataset --version 1.0.0 --uri-prefix urn:iso:std:iso:19111
|
|
489
|
+
----
|
|
490
|
+
|
|
491
|
+
Ruby API:
|
|
492
|
+
[,ruby]
|
|
493
|
+
----
|
|
494
|
+
GcrPackage.create_from_directory(
|
|
495
|
+
"path/to/v2-dataset",
|
|
496
|
+
output: "mydataset-1.0.0.gcr",
|
|
497
|
+
shortname: "mydataset",
|
|
498
|
+
version: "1.0.0",
|
|
499
|
+
uri_prefix: "urn:iso:std:iso:19111",
|
|
500
|
+
title: "My Dataset",
|
|
501
|
+
description: "A terminology dataset",
|
|
502
|
+
)
|
|
503
|
+
----
|
|
504
|
+
|
|
505
|
+
=== Loading a GCR Package
|
|
506
|
+
|
|
507
|
+
[,ruby]
|
|
508
|
+
----
|
|
509
|
+
pkg = GcrPackage.load("mydataset-1.0.0.gcr")
|
|
510
|
+
pkg.metadata # => Hash with metadata fields
|
|
511
|
+
pkg.concepts # => Array of concept hashes
|
|
512
|
+
----
|
|
513
|
+
|
|
514
|
+
=== GCR Metadata
|
|
515
|
+
|
|
516
|
+
Metadata fields in `metadata.yaml`:
|
|
517
|
+
|
|
518
|
+
[cols="1,1"]
|
|
519
|
+
|===
|
|
520
|
+
|shortname
|
|
521
|
+
|Machine-readable dataset identifier (e.g. `iev`)
|
|
522
|
+
|
|
523
|
+
|version
|
|
524
|
+
|Semantic version (e.g. `1.0.0`)
|
|
525
|
+
|
|
526
|
+
|title
|
|
527
|
+
|Human-readable title
|
|
528
|
+
|
|
529
|
+
|description
|
|
530
|
+
|Dataset description
|
|
531
|
+
|
|
532
|
+
|owner
|
|
533
|
+
|Dataset owner
|
|
534
|
+
|
|
535
|
+
|tags
|
|
536
|
+
|Array of tags
|
|
537
|
+
|
|
538
|
+
|concept_count
|
|
539
|
+
|Number of concepts in the package
|
|
540
|
+
|
|
541
|
+
|languages
|
|
542
|
+
|Array of language codes present
|
|
543
|
+
|
|
544
|
+
|created_at
|
|
545
|
+
|ISO 8601 timestamp of package creation
|
|
546
|
+
|
|
547
|
+
|glossarist_version
|
|
548
|
+
|Version of the Glossarist gem used
|
|
549
|
+
|
|
550
|
+
|schema_version
|
|
551
|
+
|Schema version of the package format
|
|
552
|
+
|
|
553
|
+
|uri_prefix
|
|
554
|
+
|URI namespace this dataset provides (e.g. `urn:iec:std:iec:60050`)
|
|
555
|
+
|
|
556
|
+
|external_references
|
|
557
|
+
|Array of `{uri: "..."}` for URI namespaces this dataset references
|
|
558
|
+
|===
|
|
559
|
+
|
|
560
|
+
=== GCR Statistics
|
|
561
|
+
|
|
562
|
+
[,ruby]
|
|
563
|
+
----
|
|
564
|
+
stats = GcrStatistics.from_concepts(concepts)
|
|
565
|
+
stats.total_concepts # => 150
|
|
566
|
+
stats.languages # => ["eng", "fra", "deu"]
|
|
567
|
+
stats.concepts_by_status # => { "valid" => 140, "draft" => 10 }
|
|
568
|
+
stats.concepts_with_definitions # => 148
|
|
569
|
+
stats.concepts_with_sources # => 130
|
|
570
|
+
----
|
|
571
|
+
|
|
572
|
+
== Concept Mentions
|
|
573
|
+
|
|
574
|
+
Concepts can reference other concepts within the same dataset (intra-set) or in different datasets (inter-set) using inline mention syntax. All mentions use double braces `{{...}}`.
|
|
575
|
+
|
|
576
|
+
=== Syntax
|
|
577
|
+
|
|
578
|
+
The concept mention syntax mirrors HTML `<a href="id">display_text</a>` — the display text is independent of the target concept's canonical designation.
|
|
579
|
+
|
|
580
|
+
[cols="1,2,2,2"]
|
|
581
|
+
|===
|
|
582
|
+
|Form |Syntax |Example |Resolution
|
|
583
|
+
|
|
584
|
+
|ID only
|
|
585
|
+
|`{{ID}}`
|
|
586
|
+
|`{{200}}`
|
|
587
|
+
|Intra-set: concept 200, auto-display
|
|
588
|
+
|
|
589
|
+
|ID + display
|
|
590
|
+
|`{{TEXT, ID}}`
|
|
591
|
+
|`{{geodetic latitude, 200}}`
|
|
592
|
+
|Intra-set: concept 200, custom display
|
|
593
|
+
|
|
594
|
+
|Designation
|
|
595
|
+
|`{{TEXT}}`
|
|
596
|
+
|`{{geodetic latitude}}`
|
|
597
|
+
|Intra-set: find by designation
|
|
598
|
+
|
|
599
|
+
|URN + display
|
|
600
|
+
|`{{TEXT, URN}}`
|
|
601
|
+
|`{{equality, urn:iec:std:iec:60050-102-01-01}}`
|
|
602
|
+
|Inter-set: resolve by URN
|
|
603
|
+
|
|
604
|
+
|URN only
|
|
605
|
+
|`{{URN}}`
|
|
606
|
+
|`{{urn:iec:std:iec:60050-102-01-01}}`
|
|
607
|
+
|Inter-set: resolve URN, auto-display
|
|
369
608
|
|===
|
|
370
609
|
|
|
610
|
+
=== URN Schemes
|
|
611
|
+
|
|
612
|
+
IEC URN (IEV):: `urn:iec:std:iec:60050-{code}` — source is `urn:iec:std:iec:60050`, concept_id is the IEV code
|
|
613
|
+
ISO URN (RFC 5141):: `urn:iso:std:iso:{std}:...:term:{id}` — source is `urn:iso:std:iso:{std}`, concept_id is the term ID
|
|
614
|
+
|
|
615
|
+
=== Extracting Mentions (Ruby API)
|
|
616
|
+
|
|
617
|
+
[,ruby]
|
|
618
|
+
----
|
|
619
|
+
extractor = ReferenceExtractor.new
|
|
620
|
+
|
|
621
|
+
# From a text string
|
|
622
|
+
refs = extractor.extract_from_text("See {{equality, urn:iec:std:iec:60050-102-01-01}} and {{lat, 200}}")
|
|
623
|
+
# => [ConceptReference(term: "equality", concept_id: "102-01-01",
|
|
624
|
+
# source: "urn:iec:std:iec:60050", ref_type: "urn"),
|
|
625
|
+
# ConceptReference(term: "lat", concept_id: "200",
|
|
626
|
+
# source: nil, ref_type: "local")]
|
|
627
|
+
|
|
628
|
+
# From all text fields in a localized concept
|
|
629
|
+
refs = extractor.extract_from_localized(lc_hash)
|
|
630
|
+
|
|
631
|
+
# From all language blocks in a concept
|
|
632
|
+
refs = extractor.extract_from_concept_hash(concept_hash)
|
|
633
|
+
----
|
|
634
|
+
|
|
635
|
+
=== Resolving Mentions (Ruby API)
|
|
636
|
+
|
|
637
|
+
Resolution uses an adapter chain: route overrides → local → package → remote.
|
|
638
|
+
|
|
639
|
+
[,ruby]
|
|
640
|
+
----
|
|
641
|
+
resolver = ReferenceResolver.new
|
|
642
|
+
|
|
643
|
+
# Register the current dataset for intra-set resolution
|
|
644
|
+
resolver.register_self(concepts)
|
|
645
|
+
|
|
646
|
+
# Register co-loaded GCRs with their URI prefixes
|
|
647
|
+
resolver.register_package(iev_concepts, uri_prefix: "urn:iec:std:iec:60050")
|
|
648
|
+
resolver.register_package(iso_concepts, uri_prefix: "urn:iso:std:iso:19111")
|
|
649
|
+
|
|
650
|
+
# Add URI route overrides (e.g. author used wrong URI)
|
|
651
|
+
resolver.add_route(from: "urn:iso:std:iso:19115", to: "urn:iso:std:iso:19111")
|
|
652
|
+
|
|
653
|
+
# Resolve a single reference
|
|
654
|
+
ref = ConceptReference.new(term: "equality", concept_id: "102-01-01",
|
|
655
|
+
source: "urn:iec:std:iec:60050", ref_type: "urn")
|
|
656
|
+
resolver.resolve(ref) # => concept hash
|
|
657
|
+
|
|
658
|
+
# Validate all references in a package
|
|
659
|
+
result = resolver.validate_all(concepts)
|
|
660
|
+
result.errors # => structural errors
|
|
661
|
+
result.warnings # => unresolvable references
|
|
662
|
+
----
|
|
663
|
+
|
|
664
|
+
=== GCR Collection & Routing
|
|
665
|
+
|
|
666
|
+
When multiple GCRs are placed together in a directory, a `collection.yaml` configures resolution:
|
|
667
|
+
|
|
668
|
+
[,yaml]
|
|
669
|
+
----
|
|
670
|
+
# collection.yaml
|
|
671
|
+
packages:
|
|
672
|
+
- file: iev-2.0.0.gcr
|
|
673
|
+
- file: iso19111-1.0.0.gcr
|
|
674
|
+
|
|
675
|
+
routes:
|
|
676
|
+
- from: "urn:iso:std:iso:19115"
|
|
677
|
+
to: "urn:iso:std:iso:19111"
|
|
678
|
+
|
|
679
|
+
remote:
|
|
680
|
+
- uri_prefix: "urn:iec:std:iec:60050"
|
|
681
|
+
endpoint: "https://vocabulary.example.org/api/concepts"
|
|
682
|
+
----
|
|
683
|
+
|
|
684
|
+
[,ruby]
|
|
685
|
+
----
|
|
686
|
+
resolver = ReferenceResolver.new
|
|
687
|
+
resolver.load_collection("path/to/gcr_collection/")
|
|
688
|
+
# Packages auto-registered with their uri_prefix from metadata
|
|
689
|
+
# Route overrides applied
|
|
690
|
+
# Remote endpoints registered
|
|
691
|
+
----
|
|
692
|
+
|
|
693
|
+
=== Resolution Adapters
|
|
694
|
+
|
|
695
|
+
The resolution framework uses a chain of adapters, each implementing `resolve(reference) → concept_hash | nil`:
|
|
696
|
+
|
|
697
|
+
LocalAdapter:: Resolves intra-set references by concept ID or designation lookup
|
|
698
|
+
PackageAdapter:: Resolves inter-set references by matching `source` URI to a GCR's `uri_prefix`
|
|
699
|
+
RouteAdapter:: Remaps incorrect source URIs before delegation
|
|
700
|
+
RemoteAdapter:: Resolves via HTTP to an online GCR endpoint
|
|
701
|
+
|
|
702
|
+
=== URN-to-HTTP Resolution
|
|
703
|
+
|
|
704
|
+
Concept mentions rendered as hyperlinks need HTTP URLs. The `UrnResolver` converts URNs to their canonical web locations:
|
|
705
|
+
|
|
706
|
+
[,ruby]
|
|
707
|
+
----
|
|
708
|
+
# Class-level convenience
|
|
709
|
+
url = UrnResolver.resolve("urn:iec:std:iec:60050-102-01-01")
|
|
710
|
+
# => "https://www.electropedia.org/iev/iev.nsf/display?openform&ievref=102-01-01"
|
|
711
|
+
|
|
712
|
+
url = UrnResolver.resolve("urn:iso:std:iso:19111:ed-3:v1:en:term:3.1.32")
|
|
713
|
+
# => "https://www.iso.org/obp/ui/#iso:std:iso:19111:ed-3:v1:en:term:3.1.32"
|
|
714
|
+
|
|
715
|
+
# Also accepts ConceptReference objects
|
|
716
|
+
ref = ConceptReference.new(term: "equality", concept_id: "102-01-01",
|
|
717
|
+
source: "urn:iec:std:iec:60050", ref_type: "urn")
|
|
718
|
+
url = UrnResolver.resolve(ref)
|
|
719
|
+
# => "https://www.electropedia.org/iev/iev.nsf/display?openform&ievref=102-01-01"
|
|
720
|
+
----
|
|
721
|
+
|
|
722
|
+
Built-in mappings:
|
|
723
|
+
|
|
724
|
+
[cols="1,1,1"]
|
|
725
|
+
|===
|
|
726
|
+
|URN Prefix |Target |Example URL
|
|
727
|
+
|
|
728
|
+
|`urn:iec:std:iec:60050-*`
|
|
729
|
+
|IEC Electropedia
|
|
730
|
+
|`electropedia.org/iev/iev.nsf/display?openform&ievref=102-01-01`
|
|
731
|
+
|
|
732
|
+
|`urn:iso:*`
|
|
733
|
+
|ISO Online Browsing Platform
|
|
734
|
+
|`iso.org/obp/ui/#iso:std:iso:19111:term:3.1.32`
|
|
735
|
+
|===
|
|
736
|
+
|
|
737
|
+
Register custom schemes:
|
|
738
|
+
|
|
739
|
+
[,ruby]
|
|
740
|
+
----
|
|
741
|
+
resolver = UrnResolver.new
|
|
742
|
+
resolver.register_scheme("urn:example:") do |urn|
|
|
743
|
+
"https://example.org/concepts/#{urn.sub('urn:example:', '')}"
|
|
744
|
+
end
|
|
745
|
+
----
|
|
746
|
+
|
|
371
747
|
== Credits
|
|
372
748
|
|
|
373
749
|
This gem is developed, maintained and funded by
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
# 01 — GCR Packaging CLI with Versioning
|
|
2
|
+
|
|
3
|
+
## Goal
|
|
4
|
+
|
|
5
|
+
The `glossarist` Ruby gem provides the canonical way to build versioned GCR packages from concept datasets. Each glossary repo uses `glossarist package` in CI to publish GCR releases.
|
|
6
|
+
|
|
7
|
+
## Current State
|
|
8
|
+
|
|
9
|
+
- CLI has `package`, `upgrade`, `validate` commands (via Thor)
|
|
10
|
+
- `GcrPackage.create` / `GcrPackage.load` handle ZIP I/O
|
|
11
|
+
- `GcrMetadata` generates metadata with statistics
|
|
12
|
+
- `SchemaMigration` handles v0→v1 harmonization
|
|
13
|
+
- Missing: `shortname` and `version` fields in GcrMetadata
|
|
14
|
+
- Missing: v2 format support in `PackageCommand`
|
|
15
|
+
|
|
16
|
+
## Status
|
|
17
|
+
|
|
18
|
+
Tasks 1–5 are **implemented** on branch `fix/gemfile-lutaml-model-substring`. Remaining: task 6 (publish gem to RubyGems).
|
|
19
|
+
|
|
20
|
+
## Tasks
|
|
21
|
+
|
|
22
|
+
### 1. Add `shortname` and `version` to `GcrMetadata`
|
|
23
|
+
|
|
24
|
+
Edit `lib/glossarist/gcr_metadata.rb`:
|
|
25
|
+
|
|
26
|
+
```ruby
|
|
27
|
+
attr_accessor :shortname, :version, :title, :description, :owner, :tags,
|
|
28
|
+
:concept_count, :languages,
|
|
29
|
+
:created_at, :glossarist_version, :schema_version,
|
|
30
|
+
:statistics, :homepage, :repository, :license
|
|
31
|
+
|
|
32
|
+
def initialize(attrs = {})
|
|
33
|
+
@shortname = attrs[:shortname]
|
|
34
|
+
@version = attrs[:version]
|
|
35
|
+
# ... existing fields ...
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def self.from_concepts(concepts, register_data: nil, options: {})
|
|
39
|
+
stats = GcrStatistics.from_concepts(concepts)
|
|
40
|
+
new(
|
|
41
|
+
shortname: options[:shortname],
|
|
42
|
+
version: options[:version],
|
|
43
|
+
title: options[:title] || register_data&.dig("name"),
|
|
44
|
+
# ... existing fields ...
|
|
45
|
+
)
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def to_h
|
|
49
|
+
h = {
|
|
50
|
+
"shortname" => shortname,
|
|
51
|
+
"version" => version,
|
|
52
|
+
"title" => title,
|
|
53
|
+
# ... existing fields ...
|
|
54
|
+
}
|
|
55
|
+
h.compact
|
|
56
|
+
end
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
### 2. Add `--shortname` and `--version` CLI options
|
|
60
|
+
|
|
61
|
+
Edit `lib/glossarist/cli.rb`:
|
|
62
|
+
|
|
63
|
+
```ruby
|
|
64
|
+
desc "package DIR", "Create a .gcr ZIP archive from a dataset"
|
|
65
|
+
option :output, aliases: :o, required: true, desc: "Output .gcr file path"
|
|
66
|
+
option :shortname, type: :string, required: true, desc: "Machine-readable dataset ID"
|
|
67
|
+
option :version, type: :string, required: true, desc: "Semantic version (e.g. 1.0.0)"
|
|
68
|
+
option :title, type: :string, desc: "Dataset title"
|
|
69
|
+
option :description, type: :string, desc: "Dataset description"
|
|
70
|
+
option :owner, type: :string, desc: "Dataset owner"
|
|
71
|
+
option :register_yaml, type: :string, desc: "Path to register.yaml"
|
|
72
|
+
option :tags, type: :array, desc: "Tags for the dataset"
|
|
73
|
+
def package(dir)
|
|
74
|
+
# ...
|
|
75
|
+
end
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
### 3. Add v2 format support to `PackageCommand`
|
|
79
|
+
|
|
80
|
+
Edit `lib/glossarist/cli/package_command.rb`:
|
|
81
|
+
|
|
82
|
+
```ruby
|
|
83
|
+
def collect_concepts
|
|
84
|
+
if v1_concepts?
|
|
85
|
+
collect_v1_concepts
|
|
86
|
+
elsif v2_concepts?
|
|
87
|
+
collect_v2_concepts
|
|
88
|
+
else
|
|
89
|
+
[]
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def v1_concepts?
|
|
94
|
+
concepts_dir = File.join(@dir, "concepts")
|
|
95
|
+
File.directory?(concepts_dir) && Dir.glob(File.join(concepts_dir, "*.yaml")).any?
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
def v2_concepts?
|
|
99
|
+
File.directory?(File.join(@dir, "geolexica-v2"))
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
def collect_v2_concepts
|
|
103
|
+
collection = Glossarist::ManagedConceptCollection.new
|
|
104
|
+
manager = Glossarist::ConceptManager.new(path: File.join(@dir, "geolexica-v2"))
|
|
105
|
+
manager.load_from_files(collection: collection)
|
|
106
|
+
|
|
107
|
+
collection.map { |concept| concept_to_v1_hash(concept) }
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
def concept_to_v1_hash(concept)
|
|
111
|
+
hash = { "termid" => concept.data.id.to_s }
|
|
112
|
+
concept.localizations.each do |lang, l10n|
|
|
113
|
+
hash[lang] = localized_to_hash(l10n)
|
|
114
|
+
end
|
|
115
|
+
hash["term"] = preferred_designation(hash["eng"]&.dig("terms")) || ""
|
|
116
|
+
hash
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
def localized_to_hash(l10n)
|
|
120
|
+
h = {}
|
|
121
|
+
h["terms"] = l10n.designations.map(&:to_h) if l10n.designations.any?
|
|
122
|
+
h["definition"] = l10n.definition.map { |d| { "content" => d.content } } if l10n.definition.any?
|
|
123
|
+
h["notes"] = l10n.notes.map { |n| { "content" => n.content } } if l10n.notes.any?
|
|
124
|
+
h["examples"] = l10n.examples.map { |e| { "content" => e.content } } if l10n.examples.any?
|
|
125
|
+
h["sources"] = l10n.sources.map(&:to_h) if l10n.sources.any?
|
|
126
|
+
h["language_code"] = l10n.language_code if l10n.language_code
|
|
127
|
+
h["entry_status"] = l10n.entry_status if l10n.entry_status
|
|
128
|
+
h["dates"] = l10n.dates.map(&:to_h) if l10n.dates.any?
|
|
129
|
+
h
|
|
130
|
+
end
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
### 4. Auto-derive shortname from directory name
|
|
134
|
+
|
|
135
|
+
If `--shortname` is not provided, derive from:
|
|
136
|
+
1. `register.yaml` → `register["shortname"]` or `register["id"]`
|
|
137
|
+
2. Directory basename
|
|
138
|
+
3. Raise error if none available
|
|
139
|
+
|
|
140
|
+
### 5. Validate filename matches metadata
|
|
141
|
+
|
|
142
|
+
In `GcrPackage.validate`, check that the filename pattern `{shortname}-{version}.gcr` matches the `shortname` and `version` in metadata.yaml.
|
|
143
|
+
|
|
144
|
+
### 6. Publish gem
|
|
145
|
+
|
|
146
|
+
```bash
|
|
147
|
+
gem build glossarist.gemspec
|
|
148
|
+
gem push glossarist-2.6.0.gem
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
## CLI Usage
|
|
152
|
+
|
|
153
|
+
```bash
|
|
154
|
+
# Install
|
|
155
|
+
gem install glossarist
|
|
156
|
+
|
|
157
|
+
# Package (v1 format)
|
|
158
|
+
glossarist package ./isotc204-glossary \
|
|
159
|
+
--shortname isotc204 --version 1.0.0 \
|
|
160
|
+
-o isotc204-1.0.0.gcr \
|
|
161
|
+
--title "ISO/TC 204 ITS Vocabulary" --owner "ISO/TC 204"
|
|
162
|
+
|
|
163
|
+
# Package (v2 format, auto-detected)
|
|
164
|
+
glossarist package ./isotc211-glossary \
|
|
165
|
+
--shortname isotc211 --version 2.3.0 \
|
|
166
|
+
-o isotc211-2.3.0.gcr \
|
|
167
|
+
--title "ISO/TC 211 Multi-Lingual Glossary" --owner "ISO/TC 211"
|
|
168
|
+
|
|
169
|
+
# Validate
|
|
170
|
+
glossarist validate isotc204-1.0.0.gcr
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
## Acceptance Criteria
|
|
174
|
+
|
|
175
|
+
- [x] `GcrMetadata` includes `shortname` and `version` fields
|
|
176
|
+
- [x] `glossarist package --shortname X --version Y` produces `{X}-{Y}.gcr`
|
|
177
|
+
- [x] `metadata.yaml` contains `shortname` and `version`
|
|
178
|
+
- [x] `glossarist validate` checks metadata has required fields
|
|
179
|
+
- [x] Works with both v1 (`concepts/*.yaml`) and v2 (`geolexica-v2/*.yaml`) datasets
|
|
180
|
+
- [ ] Gem published to RubyGems
|
data/exe/glossarist
CHANGED
|
@@ -1,56 +1,4 @@
|
|
|
1
1
|
#!/usr/bin/env ruby
|
|
2
2
|
|
|
3
3
|
require_relative "../lib/glossarist"
|
|
4
|
-
|
|
5
|
-
class GlossaristCommand < Thor
|
|
6
|
-
desc "generate_latex", "Convert Concepts to Latex format"
|
|
7
|
-
|
|
8
|
-
option :concepts_path, aliases: :p, required: true,
|
|
9
|
-
desc: "Path to yaml concepts directory"
|
|
10
|
-
option :latex_concepts, aliases: :l,
|
|
11
|
-
desc: "File path having list of concepts that should be converted to LATEX format. If not provided all the concepts will be converted to the latex format"
|
|
12
|
-
option :output_file, aliases: :o,
|
|
13
|
-
desc: "Output file path. By default the output will pe printed to the console"
|
|
14
|
-
option :extra_attributes, aliases: :e, type: :array,
|
|
15
|
-
desc: "List of extra attributes that are not in standard Glossarist Concept model"
|
|
16
|
-
def generate_latex
|
|
17
|
-
assets = []
|
|
18
|
-
latex_concepts_file = options[:latex_concepts]
|
|
19
|
-
|
|
20
|
-
if options[:extra_attributes]
|
|
21
|
-
Glossarist.configure do |config|
|
|
22
|
-
config.register_extension_attributes(options[:extra_attributes])
|
|
23
|
-
end
|
|
24
|
-
end
|
|
25
|
-
|
|
26
|
-
concept_set = Glossarist::ConceptSet.new(options[:concepts_path], assets)
|
|
27
|
-
latex_str = concept_set.to_latex(latex_concepts_file)
|
|
28
|
-
output_latex(latex_str)
|
|
29
|
-
end
|
|
30
|
-
|
|
31
|
-
def output_latex(latex_str)
|
|
32
|
-
output_file_path = options[:output_file]
|
|
33
|
-
|
|
34
|
-
if output_file_path
|
|
35
|
-
File.open(output_file_path, "w") { |file| file.puts latex_str }
|
|
36
|
-
else
|
|
37
|
-
puts latex_str
|
|
38
|
-
end
|
|
39
|
-
end
|
|
40
|
-
|
|
41
|
-
def method_missing(*args)
|
|
42
|
-
warn "No method found named: #{args[0]}"
|
|
43
|
-
warn "Run with `--help` or `-h` to see available options"
|
|
44
|
-
exit 1
|
|
45
|
-
end
|
|
46
|
-
|
|
47
|
-
def respond_to_missing?
|
|
48
|
-
true
|
|
49
|
-
end
|
|
50
|
-
|
|
51
|
-
def self.exit_on_failure?
|
|
52
|
-
true
|
|
53
|
-
end
|
|
54
|
-
end
|
|
55
|
-
|
|
56
|
-
GlossaristCommand.start(ARGV)
|
|
4
|
+
Glossarist::CLI.start(ARGV)
|
data/glossarist.gemspec
CHANGED
data/lib/glossarist/asset.rb
CHANGED
data/lib/glossarist/citation.rb
CHANGED
|
@@ -34,7 +34,7 @@ module Glossarist
|
|
|
34
34
|
|
|
35
35
|
attribute :custom_locality, CustomLocality, collection: true
|
|
36
36
|
|
|
37
|
-
|
|
37
|
+
key_value do
|
|
38
38
|
map :id, to: :id, with: { from: :id_from_yaml, to: :id_to_yaml }
|
|
39
39
|
map :text, to: :text, with: { from: :text_from_yaml, to: :text_to_yaml }
|
|
40
40
|
map :source, to: :source,
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Glossarist
|
|
4
|
+
class CLI
|
|
5
|
+
class PackageCommand
|
|
6
|
+
def initialize(dir, options)
|
|
7
|
+
@dir = dir
|
|
8
|
+
@options = options
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def run
|
|
12
|
+
GcrPackage.create_from_directory(
|
|
13
|
+
@dir,
|
|
14
|
+
output: @options[:output],
|
|
15
|
+
shortname: @options[:shortname],
|
|
16
|
+
version: @options[:version],
|
|
17
|
+
title: @options[:title],
|
|
18
|
+
description: @options[:description],
|
|
19
|
+
owner: @options[:owner],
|
|
20
|
+
tags: @options[:tags],
|
|
21
|
+
register_yaml: @options[:register_yaml],
|
|
22
|
+
uri_prefix: @options[:uri_prefix],
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
puts "Created #{@options[:output]}"
|
|
26
|
+
rescue ArgumentError => e
|
|
27
|
+
warn "Error: #{e.message}"
|
|
28
|
+
exit 1
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|