glossarist 2.8.15 → 2.8.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CLAUDE.md +25 -0
- data/README.adoc +62 -8
- data/lib/glossarist/bibliography_data.rb +28 -20
- data/lib/glossarist/bibliography_entry.rb +15 -4
- data/lib/glossarist/figure.rb +1 -14
- data/lib/glossarist/formula.rb +1 -7
- data/lib/glossarist/non_verb_rep.rb +14 -24
- data/lib/glossarist/non_verbal_entity.rb +14 -22
- data/lib/glossarist/reference_extractor.rb +7 -2
- data/lib/glossarist/shared_non_verbal_entity.rb +29 -0
- data/lib/glossarist/table.rb +1 -7
- data/lib/glossarist/transforms/concept_to_gloss_transform.rb +8 -2
- data/lib/glossarist/v3.rb +0 -8
- data/lib/glossarist/validation/asset_index.rb +7 -2
- data/lib/glossarist/validation/bibliography_index.rb +3 -32
- data/lib/glossarist/validation/rules/bibliography_yaml_rule.rb +1 -1
- data/lib/glossarist/validation/rules/gcr_context.rb +0 -1
- data/lib/glossarist/validation/rules/orphaned_images_rule.rb +1 -23
- data/lib/glossarist/version.rb +1 -1
- data/lib/glossarist.rb +1 -0
- metadata +3 -6
- data/lib/glossarist/v3/bibliography_entry.rb +0 -19
- data/lib/glossarist/v3/bibliography_file.rb +0 -27
- data/lib/glossarist/v3/image_entry.rb +0 -21
- data/lib/glossarist/v3/image_file.rb +0 -31
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 3dff1f025f20e6cfad5028acb99816984ddef4cd4a231991345a633c42ae5794
|
|
4
|
+
data.tar.gz: 7eed6c0df552c7206225460f212500fac6e4c7f3bc47d04bf75bb11acd53af7d
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 01bda07bdfcff32b5526b33e19bd38074f0eeeab585bdf739593c95f9edacdd85cff549ce7430b53a027b18b1cd011a99a33454f71fdb2163c57be6bb1a1affb
|
|
7
|
+
data.tar.gz: f9e036c52fa7fa63dab55c8623bd47bc46af544d1fa3660368c8f41ef9c9155373ca92ac91116c8f8d7ff18b1ca9e093fde4f6e25ce0ca7b8194c9724b2df75d
|
data/CLAUDE.md
CHANGED
|
@@ -71,6 +71,31 @@ Designation inheritance hierarchy (MECE):
|
|
|
71
71
|
- Supports both camelCase and snake_case keys in YAML (e.g., `localizedConcepts` / `localized_concepts`) using `%i[key1 key2]` mapping syntax.
|
|
72
72
|
- Also supports V1 format (`concept-*.yaml` files at root level).
|
|
73
73
|
|
|
74
|
+
### V3 Dataset Syntax (collection files are single-key mappings)
|
|
75
|
+
|
|
76
|
+
A dataset collection file — `bibliography.yaml`, `images.yaml`, and any future
|
|
77
|
+
equivalent — is the *V3 glossarist dataset syntax*: a YAML **mapping with a
|
|
78
|
+
single wrapper key** whose value is an **array of typed items**. No keyed maps
|
|
79
|
+
(indexing items by an out-of-band reference string), and no stray top-level
|
|
80
|
+
arrays — the array is always grouped under one named key. Each item carries its
|
|
81
|
+
own `id` field. (A keyed bibliography was tried and rejected as wrong — a
|
|
82
|
+
bibliography is an ordered collection, not a map, and keying forced the entry to
|
|
83
|
+
degenerate into `citation_key` + an untyped `data` hash. A bare top-level array
|
|
84
|
+
was also rejected — the user does not want stray arrays at the document root.)
|
|
85
|
+
|
|
86
|
+
Canonical models: `BibliographyData` + typed `BibliographyEntry`;
|
|
87
|
+
`V3::ImageFile` + `V3::ImageEntry`. `bibliography.yaml` wraps its entries under a
|
|
88
|
+
single `bibliography:` key. Because the root is a mapping, one `key_value` map
|
|
89
|
+
(`map "bibliography", to: :entries`) drives both the file (`to_yaml`/`from_yaml`)
|
|
90
|
+
and the in-memory store (`to_hash`/`from_hash`) — no overrides, no nested
|
|
91
|
+
Collection, no `YAML.safe_load`. `BibliographyData#shortname` is the
|
|
92
|
+
PackageStore record key only — never serialized. Documentation lives in
|
|
93
|
+
`README.adoc` (`== Bibliography`); this note is internal guidance, not user docs.
|
|
94
|
+
|
|
95
|
+
The remaining `map nil` keyed patterns live only in the **V1 legacy adapters**
|
|
96
|
+
(`v1/register.rb`, `v1/concept.rb`), which are intentional passthroughs for old
|
|
97
|
+
IEV-format datasets — do not "fix" them; that would break reading V1 data.
|
|
98
|
+
|
|
74
99
|
### Configuration & Extensibility
|
|
75
100
|
|
|
76
101
|
- **`Config`** (`config.rb`) — singleton that holds registered classes for `:localized_concept` and `:managed_concept`. Allows swapping implementations via `register_class`.
|
data/README.adoc
CHANGED
|
@@ -911,23 +911,35 @@ result.diffs # Array of ConceptDiff with similarity scores
|
|
|
911
911
|
|
|
912
912
|
=== NonVerbRep
|
|
913
913
|
|
|
914
|
-
Non-verbal representations are associated resources (images, tables, formulas) used to help define a concept (ISO 10241-1 §6.5).
|
|
914
|
+
Non-verbal representations are associated resources (images, tables, formulas) used to help define a concept (ISO 10241-1 §6.5). NonVerbRep is the concept-local form — attached directly to a concept's data; the dataset-shared form is xref:figure[Figure] / Table / Formula. Both share the same accessibility payload via `NonVerbalEntity`.
|
|
915
915
|
|
|
916
|
-
type:: String — the
|
|
917
|
-
|
|
918
|
-
|
|
916
|
+
type:: String — the kind of representation: `image`, `table`, or `formula`.
|
|
917
|
+
images:: Collection of `FigureImage` variants (responsive, format fallbacks, dark/light). Used when `type: image`.
|
|
918
|
+
caption:: Localized hash — short title keyed by ISO 639 code (e.g. `{ eng: "..." }`).
|
|
919
|
+
description:: Localized hash — long description for accessibility.
|
|
920
|
+
alt:: Localized hash — short alternative text for screen readers.
|
|
919
921
|
sources:: Collection of <<concept-source,ConceptSource>> entries — bibliographic sources for the representation.
|
|
920
922
|
|
|
921
923
|
Example:
|
|
922
924
|
+
|
|
923
925
|
[,yaml]
|
|
924
926
|
----
|
|
925
|
-
|
|
927
|
+
non_verb_rep:
|
|
926
928
|
- type: image
|
|
927
|
-
|
|
928
|
-
|
|
929
|
+
images:
|
|
930
|
+
- src: assets/images/figure-1.svg
|
|
931
|
+
format: svg
|
|
932
|
+
role: vector
|
|
933
|
+
- src: assets/images/figure-1.png
|
|
934
|
+
format: png
|
|
935
|
+
role: raster
|
|
936
|
+
caption:
|
|
937
|
+
eng: Concept hierarchy
|
|
938
|
+
alt:
|
|
939
|
+
eng: Diagram showing the concept hierarchy
|
|
929
940
|
- type: formula
|
|
930
|
-
|
|
941
|
+
images:
|
|
942
|
+
- src: urn:gcr:assets:formula-eq1
|
|
931
943
|
sources:
|
|
932
944
|
- type: authoritative
|
|
933
945
|
status: identical
|
|
@@ -996,6 +1008,48 @@ register.concept_section_ids(concept) # => ["3.1", "3"]
|
|
|
996
1008
|
When a concept has no explicit `domains[]` entry with `ref_type: section`, section membership is derived from the concept's identifier using the longest registered section prefix (e.g. `103-01-01` → section `103`).
|
|
997
1009
|
|
|
998
1010
|
|
|
1011
|
+
[[bibliography,Bibliography]]
|
|
1012
|
+
== Bibliography
|
|
1013
|
+
|
|
1014
|
+
A dataset directory may contain a `bibliography.yaml` — the dataset's bibliography, an ordered collection of the bibliographic references cited by its concepts. It is a YAML *mapping with a single key*, `bibliography`, whose value is an array of typed entries. This is the **V3 glossarist dataset syntax** for a collection file: a typed list grouped under one wrapper key, never a keyed map and never a stray top-level array. Each entry carries its own `id` — the identifier is a field on the item, not an out-of-band hash key.
|
|
1015
|
+
|
|
1016
|
+
[,yaml]
|
|
1017
|
+
----
|
|
1018
|
+
bibliography:
|
|
1019
|
+
- id: ref_1
|
|
1020
|
+
reference: ISO 704
|
|
1021
|
+
title: Terminology work — Principles and methods
|
|
1022
|
+
- id: ref_23
|
|
1023
|
+
reference: UNECE TRANS/WP29/1045
|
|
1024
|
+
title: Common definitions of vehicle categories, masses and dimensions
|
|
1025
|
+
link: https://www.unece.org/fileadmin/DAM/trans/doc/2005/wp29/TRANS-WP29-1045e.pdf
|
|
1026
|
+
- id: iso_std_iso_15704_en
|
|
1027
|
+
reference: ISO 15704
|
|
1028
|
+
----
|
|
1029
|
+
|
|
1030
|
+
`BibliographyEntry` fields:
|
|
1031
|
+
|
|
1032
|
+
[cols="1,4"]
|
|
1033
|
+
|===
|
|
1034
|
+
|Field |Description
|
|
1035
|
+
|
|
1036
|
+
|`id` |Entry identifier, dataset-unique. Cited from concept sources and inline `{{cite:...}}` mentions.
|
|
1037
|
+
|`reference` |Publication reference string (e.g. `ISO 704`, `IEC 60050`).
|
|
1038
|
+
|`title` |Title of the referenced document.
|
|
1039
|
+
|`link` |Optional URL to the referenced document.
|
|
1040
|
+
|`type` |Optional document type (e.g. `standard`).
|
|
1041
|
+
|===
|
|
1042
|
+
|
|
1043
|
+
[,ruby]
|
|
1044
|
+
----
|
|
1045
|
+
bib = Glossarist::BibliographyData.from_file("path/to/bibliography.yaml")
|
|
1046
|
+
bib.entries # => [#<BibliographyEntry id: "ref_1", ...>, ...]
|
|
1047
|
+
bib.find("ref_1") # => #<BibliographyEntry ...>
|
|
1048
|
+
bib.keys # => ["ref_1", "ref_23", ...]
|
|
1049
|
+
----
|
|
1050
|
+
|
|
1051
|
+
The same single-key convention applies to every dataset collection file (`images.yaml`, and any future equivalent): the collection is an array of typed items grouped under one wrapper key.
|
|
1052
|
+
|
|
999
1053
|
== Commands
|
|
1000
1054
|
|
|
1001
1055
|
=== generate_latex
|
|
@@ -1,41 +1,49 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
module Glossarist
|
|
4
|
+
# The bibliography of a dataset, persisted as bibliography.yaml.
|
|
5
|
+
#
|
|
6
|
+
# The file is the *V3 glossarist dataset syntax* for a collection: a YAML
|
|
7
|
+
# mapping with a single key, +bibliography+, whose value is an array of typed
|
|
8
|
+
# BibliographyEntry items. A bibliography is an ordered collection of
|
|
9
|
+
# references, not a keyed map, so each item carries its own +id+ field rather
|
|
10
|
+
# than being indexed by an out-of-band reference string. The single wrapper
|
|
11
|
+
# key keeps the document root a mapping (no stray top-level array).
|
|
12
|
+
#
|
|
13
|
+
# Because the root is a mapping, a single +key_value+ mapping drives both the
|
|
14
|
+
# file (#to_yaml / .from_yaml) and the in-memory store (#to_hash /
|
|
15
|
+
# .from_hash) — no special-case serialization.
|
|
16
|
+
#
|
|
17
|
+
# +shortname+ is internal bookkeeping only: lutaml-store's PackageStore needs
|
|
18
|
+
# a key field to store the bibliography as a single record. It is never
|
|
19
|
+
# serialized — only the +bibliography+ key appears in the file.
|
|
4
20
|
class BibliographyData < Lutaml::Model::Serializable
|
|
5
21
|
attribute :shortname, :string, default: -> { "bibliography" }
|
|
6
22
|
attribute :entries, BibliographyEntry, collection: true,
|
|
7
23
|
initialize_empty: true
|
|
8
24
|
|
|
9
25
|
key_value do
|
|
10
|
-
map
|
|
11
|
-
with: { from: :entries_from_hash, to: :entries_to_hash }
|
|
26
|
+
map "bibliography", to: :entries
|
|
12
27
|
end
|
|
13
28
|
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
29
|
+
class << self
|
|
30
|
+
def from_file(path)
|
|
31
|
+
return nil unless File.exist?(path)
|
|
17
32
|
|
|
18
|
-
|
|
19
|
-
|
|
33
|
+
from_yaml(File.read(path, encoding: "utf-8"))
|
|
34
|
+
end
|
|
20
35
|
end
|
|
21
36
|
|
|
22
|
-
def
|
|
23
|
-
|
|
24
|
-
entry&.data
|
|
37
|
+
def find(id)
|
|
38
|
+
entries.find { |e| e.id == id.to_s }
|
|
25
39
|
end
|
|
26
40
|
|
|
27
|
-
def
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
model.entries = value.map do |key, data|
|
|
31
|
-
BibliographyEntry.new(citation_key: key, data: data || {})
|
|
32
|
-
end
|
|
41
|
+
def keys
|
|
42
|
+
entries.map(&:id)
|
|
33
43
|
end
|
|
34
44
|
|
|
35
|
-
def
|
|
36
|
-
|
|
37
|
-
doc[entry.citation_key] = entry.data
|
|
38
|
-
end
|
|
45
|
+
def [](id)
|
|
46
|
+
find(id)
|
|
39
47
|
end
|
|
40
48
|
end
|
|
41
49
|
end
|
|
@@ -1,13 +1,24 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
module Glossarist
|
|
4
|
+
# A single bibliographic item in a dataset's bibliography.
|
|
5
|
+
#
|
|
6
|
+
# A bibliography is an ordered collection of references, so bibliography.yaml
|
|
7
|
+
# is a YAML sequence (array) of these typed entries. The entry's identifier is
|
|
8
|
+
# the +id+ field on each item — never an out-of-band hash key.
|
|
4
9
|
class BibliographyEntry < Lutaml::Model::Serializable
|
|
5
|
-
attribute :
|
|
6
|
-
attribute :
|
|
10
|
+
attribute :id, :string
|
|
11
|
+
attribute :reference, :string
|
|
12
|
+
attribute :title, :string
|
|
13
|
+
attribute :link, :string
|
|
14
|
+
attribute :type, :string
|
|
7
15
|
|
|
8
16
|
key_value do
|
|
9
|
-
map
|
|
10
|
-
map
|
|
17
|
+
map :id, to: :id
|
|
18
|
+
map :reference, to: :reference
|
|
19
|
+
map :title, to: :title
|
|
20
|
+
map :link, to: :link
|
|
21
|
+
map :type, to: :type
|
|
11
22
|
end
|
|
12
23
|
end
|
|
13
24
|
end
|
data/lib/glossarist/figure.rb
CHANGED
|
@@ -13,25 +13,15 @@ module Glossarist
|
|
|
13
13
|
# subfigures.
|
|
14
14
|
#
|
|
15
15
|
# Caption, description, and alt are localized (hash keyed by ISO 639 code).
|
|
16
|
-
class Figure <
|
|
16
|
+
class Figure < SharedNonVerbalEntity
|
|
17
17
|
attribute :images, FigureImage, collection: true
|
|
18
18
|
attribute :subfigures, Figure, collection: true
|
|
19
19
|
|
|
20
20
|
key_value do
|
|
21
|
-
map :id, to: :id
|
|
22
|
-
map :identifier, to: :identifier
|
|
23
|
-
map :caption, to: :caption
|
|
24
|
-
map :description, to: :description
|
|
25
|
-
map :alt, to: :alt
|
|
26
21
|
map :images, to: :images
|
|
27
|
-
map :sources, to: :sources
|
|
28
22
|
map :subfigures, to: :subfigures
|
|
29
23
|
end
|
|
30
24
|
|
|
31
|
-
# Recursively search for a subfigure (or self) by ID.
|
|
32
|
-
#
|
|
33
|
-
# @param target_id [String] the figure or subfigure ID
|
|
34
|
-
# @return [Figure, nil]
|
|
35
25
|
def find_by_id(target_id)
|
|
36
26
|
return self if id == target_id
|
|
37
27
|
|
|
@@ -42,9 +32,6 @@ module Glossarist
|
|
|
42
32
|
nil
|
|
43
33
|
end
|
|
44
34
|
|
|
45
|
-
# Collect this figure's ID and all descendant subfigure IDs.
|
|
46
|
-
#
|
|
47
|
-
# @return [Array<String>]
|
|
48
35
|
def all_ids
|
|
49
36
|
[id] + Array(subfigures).flat_map(&:all_ids)
|
|
50
37
|
end
|
data/lib/glossarist/formula.rb
CHANGED
|
@@ -7,19 +7,13 @@ module Glossarist
|
|
|
7
7
|
# shared across concepts. The mathematical expression is stored in a
|
|
8
8
|
# notation format (LaTeX, MathML, AsciiMath). Caption, description, and
|
|
9
9
|
# alt are localized for accessibility.
|
|
10
|
-
class Formula <
|
|
10
|
+
class Formula < SharedNonVerbalEntity
|
|
11
11
|
attribute :expression, :hash
|
|
12
12
|
attribute :notation, :string
|
|
13
13
|
|
|
14
14
|
key_value do
|
|
15
|
-
map :id, to: :id
|
|
16
|
-
map :identifier, to: :identifier
|
|
17
|
-
map :caption, to: :caption
|
|
18
|
-
map :description, to: :description
|
|
19
|
-
map :alt, to: :alt
|
|
20
15
|
map :expression, to: :expression
|
|
21
16
|
map :notation, to: :notation
|
|
22
|
-
map :sources, to: :sources
|
|
23
17
|
end
|
|
24
18
|
end
|
|
25
19
|
end
|
|
@@ -1,36 +1,26 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
module Glossarist
|
|
4
|
-
# A non-verbal representation
|
|
5
|
-
# ISO 10241-1 §6.5.
|
|
4
|
+
# A concept-local non-verbal representation (ISO 10241-1 §6.5).
|
|
6
5
|
#
|
|
7
|
-
#
|
|
8
|
-
#
|
|
9
|
-
#
|
|
10
|
-
#
|
|
6
|
+
# NonVerbRep is the inline form attached directly to a concept's data.
|
|
7
|
+
# The dataset-shared form is Figure / Table / Formula. The two share the
|
|
8
|
+
# same a11y + provenance payload via NonVerbalEntity; NonVerbRep differs
|
|
9
|
+
# only in that it has no dataset-wide identity (no +id+, no +identifier+)
|
|
10
|
+
# — its identity is its position inside the parent concept.
|
|
11
11
|
#
|
|
12
|
-
#
|
|
13
|
-
#
|
|
14
|
-
#
|
|
15
|
-
#
|
|
16
|
-
#
|
|
17
|
-
|
|
18
|
-
# - +sources+: bibliographic sources for the representation
|
|
19
|
-
class NonVerbRep < Lutaml::Model::Serializable
|
|
12
|
+
# +type+ discriminates the kind of non-verbal content: "image", "table",
|
|
13
|
+
# or "formula". When +type+ is "image", +images+ carries one or more
|
|
14
|
+
# FigureImage variants (responsive, format fallback, dark/light). The
|
|
15
|
+
# caption/description/alt fields are localized (hash keyed by ISO 639
|
|
16
|
+
# code) for accessibility.
|
|
17
|
+
class NonVerbRep < NonVerbalEntity
|
|
20
18
|
attribute :type, :string
|
|
21
|
-
attribute :
|
|
22
|
-
attribute :text, :string
|
|
23
|
-
attribute :caption, :hash
|
|
24
|
-
attribute :description, :hash
|
|
25
|
-
attribute :sources, ConceptSource, collection: true
|
|
19
|
+
attribute :images, FigureImage, collection: true, initialize_empty: true
|
|
26
20
|
|
|
27
21
|
key_value do
|
|
28
22
|
map :type, to: :type
|
|
29
|
-
map :
|
|
30
|
-
map :text, to: :text
|
|
31
|
-
map :caption, to: :caption
|
|
32
|
-
map :description, to: :description
|
|
33
|
-
map :sources, to: :sources
|
|
23
|
+
map :images, to: :images
|
|
34
24
|
end
|
|
35
25
|
end
|
|
36
26
|
end
|
|
@@ -1,47 +1,39 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
module Glossarist
|
|
4
|
-
#
|
|
4
|
+
# Shared payload for every non-verbal representation, whether it lives
|
|
5
|
+
# inline on a concept (NonVerbRep) or as a dataset-shared file
|
|
6
|
+
# (Figure / Table / Formula).
|
|
5
7
|
#
|
|
6
|
-
#
|
|
7
|
-
#
|
|
8
|
-
# Each is authored once at the dataset level and referenced by any number
|
|
9
|
-
# of concepts — the same pattern as bibliography entries.
|
|
8
|
+
# The four attributes here are the common a11y + provenance payload every
|
|
9
|
+
# non-verbal entity carries, regardless of content type or scope:
|
|
10
10
|
#
|
|
11
|
-
#
|
|
12
|
-
#
|
|
11
|
+
# - +caption+: localized short title (a11y / indexing).
|
|
12
|
+
# - +description+: localized long description (a11y screen readers).
|
|
13
|
+
# - +alt+: localized alternative text (a11y short screen-reader label).
|
|
14
|
+
# - +sources+: bibliographic sources for the representation.
|
|
13
15
|
#
|
|
14
|
-
#
|
|
16
|
+
# Identity (+id+, +identifier+) belongs on subclasses that have it; see
|
|
17
|
+
# SharedNonVerbalEntity for the dataset-shared variant.
|
|
15
18
|
class NonVerbalEntity < Lutaml::Model::Serializable
|
|
16
|
-
attribute :id, :string
|
|
17
|
-
attribute :identifier, :string
|
|
18
19
|
attribute :caption, :hash
|
|
19
20
|
attribute :description, :hash
|
|
20
21
|
attribute :alt, :hash
|
|
21
22
|
attribute :sources, ConceptSource, collection: true
|
|
22
23
|
|
|
23
24
|
key_value do
|
|
24
|
-
map :id, to: :id
|
|
25
|
-
map :identifier, to: :identifier
|
|
26
25
|
map :caption, to: :caption
|
|
27
26
|
map :description, to: :description
|
|
28
27
|
map :alt, to: :alt
|
|
29
28
|
map :sources, to: :sources
|
|
30
29
|
end
|
|
31
30
|
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
# @param target_id [String]
|
|
35
|
-
# @return [NonVerbalEntity, nil]
|
|
36
|
-
def find_by_id(target_id)
|
|
37
|
-
id == target_id ? self : nil
|
|
31
|
+
def find_by_id(_target_id)
|
|
32
|
+
nil
|
|
38
33
|
end
|
|
39
34
|
|
|
40
|
-
# This entity's IDs. Figure overrides to include subfigure IDs.
|
|
41
|
-
#
|
|
42
|
-
# @return [Array<String>]
|
|
43
35
|
def all_ids
|
|
44
|
-
[
|
|
36
|
+
[]
|
|
45
37
|
end
|
|
46
38
|
|
|
47
39
|
def self.from_file(path)
|
|
@@ -170,9 +170,14 @@ module Glossarist
|
|
|
170
170
|
|
|
171
171
|
concept.localizations.each do |l10n|
|
|
172
172
|
Array(l10n.non_verb_rep).each do |nvr|
|
|
173
|
-
next unless nvr.is_a?(NonVerbRep)
|
|
173
|
+
next unless nvr.is_a?(NonVerbRep)
|
|
174
174
|
|
|
175
|
-
|
|
175
|
+
Array(nvr.images).each do |image|
|
|
176
|
+
next unless image.is_a?(FigureImage)
|
|
177
|
+
next if image.src.nil? || image.src.strip.empty?
|
|
178
|
+
|
|
179
|
+
refs << AssetReference.new(path: image.src.strip)
|
|
180
|
+
end
|
|
176
181
|
end
|
|
177
182
|
|
|
178
183
|
(l10n.data&.terms || []).each do |term|
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Glossarist
|
|
4
|
+
# Dataset-shared non-verbal entity — a NonVerbalEntity with a stable
|
|
5
|
+
# identity. Figure, Table, and Formula inherit from this; NonVerbRep
|
|
6
|
+
# (concept-local, positional) inherits from NonVerbalEntity directly.
|
|
7
|
+
#
|
|
8
|
+
# The +id+ is the stable identifier used for cross-referencing
|
|
9
|
+
# (e.g. +figures/fig_A.23.yaml+ → +id: fig_A.23+). The +identifier+ is
|
|
10
|
+
# the human-readable label (e.g. +"A.23"+) used for display and AsciiDoc
|
|
11
|
+
# xref targets like +<<fig_A.23>>+.
|
|
12
|
+
class SharedNonVerbalEntity < NonVerbalEntity
|
|
13
|
+
attribute :id, :string
|
|
14
|
+
attribute :identifier, :string
|
|
15
|
+
|
|
16
|
+
key_value do
|
|
17
|
+
map :id, to: :id
|
|
18
|
+
map :identifier, to: :identifier
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def find_by_id(target_id)
|
|
22
|
+
id == target_id ? self : nil
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def all_ids
|
|
26
|
+
[id].compact
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
data/lib/glossarist/table.rb
CHANGED
|
@@ -7,19 +7,13 @@ module Glossarist
|
|
|
7
7
|
# across concepts. The content is stored as structured data (rows/columns)
|
|
8
8
|
# or as a markup string (HTML, Markdown, AsciiDoc). Caption, description,
|
|
9
9
|
# and alt are localized for accessibility.
|
|
10
|
-
class Table <
|
|
10
|
+
class Table < SharedNonVerbalEntity
|
|
11
11
|
attribute :content, :hash
|
|
12
12
|
attribute :format, :string
|
|
13
13
|
|
|
14
14
|
key_value do
|
|
15
|
-
map :id, to: :id
|
|
16
|
-
map :identifier, to: :identifier
|
|
17
|
-
map :caption, to: :caption
|
|
18
|
-
map :description, to: :description
|
|
19
|
-
map :alt, to: :alt
|
|
20
15
|
map :content, to: :content
|
|
21
16
|
map :format, to: :format
|
|
22
|
-
map :sources, to: :sources
|
|
23
17
|
end
|
|
24
18
|
end
|
|
25
19
|
end
|
|
@@ -303,8 +303,8 @@ desig_index)
|
|
|
303
303
|
Array(non_verb_reps).each_with_index.map do |nvr, idx|
|
|
304
304
|
Rdf::GlossNonVerbalRep.new(
|
|
305
305
|
representation_type: nvr.type,
|
|
306
|
-
representation_ref: nvr.
|
|
307
|
-
representation_text: nvr.
|
|
306
|
+
representation_ref: nvr.images.first&.src,
|
|
307
|
+
representation_text: localized_alt_for(nvr.alt, lang),
|
|
308
308
|
sources: build_gloss_sources(nvr.sources),
|
|
309
309
|
concept_id: concept_id.to_s,
|
|
310
310
|
lang_code: lang.to_s,
|
|
@@ -313,6 +313,12 @@ desig_index)
|
|
|
313
313
|
end
|
|
314
314
|
end
|
|
315
315
|
|
|
316
|
+
def localized_alt_for(alt, lang)
|
|
317
|
+
return nil unless alt.is_a?(Hash) && !alt.empty?
|
|
318
|
+
|
|
319
|
+
alt[lang.to_s] || alt[lang.to_sym] || alt.values.first
|
|
320
|
+
end
|
|
321
|
+
|
|
316
322
|
def build_gloss_domains(domains, concept_id)
|
|
317
323
|
Array(domains).map do |ref|
|
|
318
324
|
Rdf::GlossConceptReference.new(
|
data/lib/glossarist/v3.rb
CHANGED
|
@@ -13,10 +13,6 @@ module Glossarist
|
|
|
13
13
|
autoload :ManagedConceptData, "glossarist/v3/managed_concept_data"
|
|
14
14
|
autoload :ManagedConcept, "glossarist/v3/managed_concept"
|
|
15
15
|
autoload :ConceptDocument, "glossarist/v3/concept_document"
|
|
16
|
-
autoload :BibliographyEntry, "glossarist/v3/bibliography_entry"
|
|
17
|
-
autoload :BibliographyFile, "glossarist/v3/bibliography_file"
|
|
18
|
-
autoload :ImageEntry, "glossarist/v3/image_entry"
|
|
19
|
-
autoload :ImageFile, "glossarist/v3/image_file"
|
|
20
16
|
|
|
21
17
|
Configuration.register_model(Citation, id: :citation)
|
|
22
18
|
Configuration.register_model(ConceptSource, id: :concept_source)
|
|
@@ -28,9 +24,5 @@ module Glossarist
|
|
|
28
24
|
Configuration.register_model(ManagedConceptData, id: :managed_concept_data)
|
|
29
25
|
Configuration.register_model(ManagedConcept, id: :managed_concept)
|
|
30
26
|
Configuration.register_model(ConceptDocument, id: :concept_document)
|
|
31
|
-
Configuration.register_model(BibliographyEntry, id: :bibliography_entry)
|
|
32
|
-
Configuration.register_model(BibliographyFile, id: :bibliography_file)
|
|
33
|
-
Configuration.register_model(ImageEntry, id: :image_entry)
|
|
34
|
-
Configuration.register_model(ImageFile, id: :image_file)
|
|
35
27
|
end
|
|
36
28
|
end
|
|
@@ -95,9 +95,14 @@ module Glossarist
|
|
|
95
95
|
|
|
96
96
|
def register_non_verb_rep(index, l10n)
|
|
97
97
|
Array(l10n.non_verb_rep).each do |nvr|
|
|
98
|
-
next unless nvr.is_a?(NonVerbRep)
|
|
98
|
+
next unless nvr.is_a?(NonVerbRep)
|
|
99
99
|
|
|
100
|
-
|
|
100
|
+
Array(nvr.images).each do |image|
|
|
101
|
+
next unless image.is_a?(FigureImage)
|
|
102
|
+
next if image.src.nil? || image.src.strip.empty?
|
|
103
|
+
|
|
104
|
+
index.register(image.src.strip)
|
|
105
|
+
end
|
|
101
106
|
end
|
|
102
107
|
end
|
|
103
108
|
|
|
@@ -30,18 +30,15 @@ module Glossarist
|
|
|
30
30
|
|
|
31
31
|
concepts.each { |concept| index_concept_sources(index, concept) }
|
|
32
32
|
index_bibliography_file(index, dataset_path)
|
|
33
|
-
index_images_file(index, dataset_path)
|
|
34
33
|
|
|
35
34
|
index
|
|
36
35
|
end
|
|
37
36
|
|
|
38
|
-
def self.build_from_yaml(concepts, bibliography_yaml: nil
|
|
39
|
-
images_yaml: nil)
|
|
37
|
+
def self.build_from_yaml(concepts, bibliography_yaml: nil)
|
|
40
38
|
index = new
|
|
41
39
|
|
|
42
40
|
concepts.each { |concept| index_concept_sources(index, concept) }
|
|
43
41
|
index_bib_from_yaml_string(index, bibliography_yaml)
|
|
44
|
-
index_images_from_yaml_string(index, images_yaml)
|
|
45
42
|
|
|
46
43
|
index
|
|
47
44
|
end
|
|
@@ -103,7 +100,7 @@ images_yaml: nil)
|
|
|
103
100
|
def index_bibliography_file(index, dataset_path)
|
|
104
101
|
return unless dataset_path
|
|
105
102
|
|
|
106
|
-
bib =
|
|
103
|
+
bib = BibliographyData.from_file(
|
|
107
104
|
File.join(dataset_path, "bibliography.yaml"),
|
|
108
105
|
)
|
|
109
106
|
return unless bib
|
|
@@ -118,27 +115,10 @@ images_yaml: nil)
|
|
|
118
115
|
nil
|
|
119
116
|
end
|
|
120
117
|
|
|
121
|
-
def index_images_file(index, dataset_path)
|
|
122
|
-
return unless dataset_path
|
|
123
|
-
|
|
124
|
-
images = V3::ImageFile.from_file(
|
|
125
|
-
File.join(dataset_path, "images.yaml"),
|
|
126
|
-
)
|
|
127
|
-
return unless images
|
|
128
|
-
|
|
129
|
-
Array(images.entries).each do |entry|
|
|
130
|
-
next unless entry&.id
|
|
131
|
-
|
|
132
|
-
index.register(entry.id, entry)
|
|
133
|
-
end
|
|
134
|
-
rescue StandardError
|
|
135
|
-
nil
|
|
136
|
-
end
|
|
137
|
-
|
|
138
118
|
def index_bib_from_yaml_string(index, yaml_content)
|
|
139
119
|
return unless yaml_content
|
|
140
120
|
|
|
141
|
-
bib =
|
|
121
|
+
bib = BibliographyData.from_yaml(yaml_content)
|
|
142
122
|
bib.entries.each do |entry|
|
|
143
123
|
index.register(entry.id, entry)
|
|
144
124
|
index.register(entry.reference, entry) if entry.reference
|
|
@@ -146,15 +126,6 @@ images_yaml: nil)
|
|
|
146
126
|
rescue StandardError
|
|
147
127
|
nil
|
|
148
128
|
end
|
|
149
|
-
|
|
150
|
-
def index_images_from_yaml_string(index, yaml_content)
|
|
151
|
-
return unless yaml_content
|
|
152
|
-
|
|
153
|
-
images = V3::ImageFile.from_yaml(yaml_content)
|
|
154
|
-
images.entries.each { |entry| index.register(entry.id, entry) }
|
|
155
|
-
rescue StandardError
|
|
156
|
-
nil
|
|
157
|
-
end
|
|
158
129
|
end
|
|
159
130
|
end
|
|
160
131
|
end
|
|
@@ -23,9 +23,7 @@ module Glossarist
|
|
|
23
23
|
next unless text
|
|
24
24
|
|
|
25
25
|
extractor.extract_from_text(text).each do |ref|
|
|
26
|
-
if ref.is_a?(AssetReference)
|
|
27
|
-
referenced_paths.add(ref.path)
|
|
28
|
-
end
|
|
26
|
+
referenced_paths.add(ref.path) if ref.is_a?(AssetReference)
|
|
29
27
|
end
|
|
30
28
|
end
|
|
31
29
|
end
|
|
@@ -35,16 +33,6 @@ module Glossarist
|
|
|
35
33
|
end
|
|
36
34
|
end
|
|
37
35
|
|
|
38
|
-
images_file = load_images_file(context)
|
|
39
|
-
if images_file
|
|
40
|
-
context.bibliography_index.entries.each_value do |entry|
|
|
41
|
-
next unless entry[:source].is_a?(V3::ImageEntry)
|
|
42
|
-
|
|
43
|
-
path = entry[:source].path
|
|
44
|
-
referenced_paths.add(path) if path
|
|
45
|
-
end
|
|
46
|
-
end
|
|
47
|
-
|
|
48
36
|
issues = []
|
|
49
37
|
context.asset_index.each_path do |path|
|
|
50
38
|
next if referenced_paths.include?(path)
|
|
@@ -59,16 +47,6 @@ module Glossarist
|
|
|
59
47
|
|
|
60
48
|
issues
|
|
61
49
|
end
|
|
62
|
-
|
|
63
|
-
private
|
|
64
|
-
|
|
65
|
-
def load_images_file(context)
|
|
66
|
-
return @load_images_file if defined?(@load_images_file)
|
|
67
|
-
|
|
68
|
-
@load_images_file = V3::ImageFile.from_file(
|
|
69
|
-
File.join(context.path, "images.yaml"),
|
|
70
|
-
)
|
|
71
|
-
end
|
|
72
50
|
end
|
|
73
51
|
end
|
|
74
52
|
end
|
data/lib/glossarist/version.rb
CHANGED
data/lib/glossarist.rb
CHANGED
|
@@ -40,6 +40,7 @@ module Glossarist
|
|
|
40
40
|
autoload :Config, "glossarist/config"
|
|
41
41
|
autoload :LocalizedString, "glossarist/localized_string"
|
|
42
42
|
autoload :NonVerbalEntity, "glossarist/non_verbal_entity"
|
|
43
|
+
autoload :SharedNonVerbalEntity, "glossarist/shared_non_verbal_entity"
|
|
43
44
|
autoload :NonVerbalReference, "glossarist/non_verbal_reference"
|
|
44
45
|
autoload :Figure, "glossarist/figure"
|
|
45
46
|
autoload :FigureImage, "glossarist/figure_image"
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: glossarist
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 2.8.
|
|
4
|
+
version: 2.8.16
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Ribose
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-06-
|
|
11
|
+
date: 2026-06-18 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: lutaml-model
|
|
@@ -302,6 +302,7 @@ files:
|
|
|
302
302
|
- lib/glossarist/schema_migration/v0_to_v1.rb
|
|
303
303
|
- lib/glossarist/schema_migration/v2_to_v3.rb
|
|
304
304
|
- lib/glossarist/section.rb
|
|
305
|
+
- lib/glossarist/shared_non_verbal_entity.rb
|
|
305
306
|
- lib/glossarist/sts.rb
|
|
306
307
|
- lib/glossarist/sts/extracted_designation.rb
|
|
307
308
|
- lib/glossarist/sts/extracted_lang_set.rb
|
|
@@ -336,8 +337,6 @@ files:
|
|
|
336
337
|
- lib/glossarist/v2/managed_concept_data.rb
|
|
337
338
|
- lib/glossarist/v2/related_concept.rb
|
|
338
339
|
- lib/glossarist/v3.rb
|
|
339
|
-
- lib/glossarist/v3/bibliography_entry.rb
|
|
340
|
-
- lib/glossarist/v3/bibliography_file.rb
|
|
341
340
|
- lib/glossarist/v3/citation.rb
|
|
342
341
|
- lib/glossarist/v3/concept_data.rb
|
|
343
342
|
- lib/glossarist/v3/concept_document.rb
|
|
@@ -345,8 +344,6 @@ files:
|
|
|
345
344
|
- lib/glossarist/v3/concept_source.rb
|
|
346
345
|
- lib/glossarist/v3/configuration.rb
|
|
347
346
|
- lib/glossarist/v3/detailed_definition.rb
|
|
348
|
-
- lib/glossarist/v3/image_entry.rb
|
|
349
|
-
- lib/glossarist/v3/image_file.rb
|
|
350
347
|
- lib/glossarist/v3/localized_concept.rb
|
|
351
348
|
- lib/glossarist/v3/managed_concept.rb
|
|
352
349
|
- lib/glossarist/v3/managed_concept_data.rb
|
|
@@ -1,19 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Glossarist
|
|
4
|
-
module V3
|
|
5
|
-
class BibliographyEntry < Lutaml::Model::Serializable
|
|
6
|
-
attribute :id, :string
|
|
7
|
-
attribute :reference, :string
|
|
8
|
-
attribute :title, :string
|
|
9
|
-
attribute :link, :string
|
|
10
|
-
|
|
11
|
-
key_value do
|
|
12
|
-
map :id, to: :id
|
|
13
|
-
map :reference, to: :reference
|
|
14
|
-
map :title, to: :title
|
|
15
|
-
map :link, to: :link
|
|
16
|
-
end
|
|
17
|
-
end
|
|
18
|
-
end
|
|
19
|
-
end
|
|
@@ -1,27 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Glossarist
|
|
4
|
-
module V3
|
|
5
|
-
class BibliographyFile < Lutaml::Model::Collection
|
|
6
|
-
instances :entries, BibliographyEntry
|
|
7
|
-
|
|
8
|
-
key_value do
|
|
9
|
-
map_instances to: :entries
|
|
10
|
-
end
|
|
11
|
-
|
|
12
|
-
def self.from_file(path)
|
|
13
|
-
return nil unless File.exist?(path)
|
|
14
|
-
|
|
15
|
-
from_yaml(File.read(path))
|
|
16
|
-
end
|
|
17
|
-
|
|
18
|
-
def resolve?(anchor)
|
|
19
|
-
entries.any? { |e| e.id == anchor.to_s }
|
|
20
|
-
end
|
|
21
|
-
|
|
22
|
-
def [](key)
|
|
23
|
-
entries.find { |e| e.id == key.to_s }
|
|
24
|
-
end
|
|
25
|
-
end
|
|
26
|
-
end
|
|
27
|
-
end
|
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Glossarist
|
|
4
|
-
module V3
|
|
5
|
-
class ImageEntry < Lutaml::Model::Serializable
|
|
6
|
-
attribute :id, :string
|
|
7
|
-
attribute :path, :string
|
|
8
|
-
attribute :type, :string, default: -> { "image" }
|
|
9
|
-
attribute :title, :string
|
|
10
|
-
attribute :alt, :string
|
|
11
|
-
|
|
12
|
-
key_value do
|
|
13
|
-
map :id, to: :id
|
|
14
|
-
map :path, to: :path
|
|
15
|
-
map :type, to: :type
|
|
16
|
-
map :title, to: :title
|
|
17
|
-
map :alt, to: :alt
|
|
18
|
-
end
|
|
19
|
-
end
|
|
20
|
-
end
|
|
21
|
-
end
|
|
@@ -1,31 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Glossarist
|
|
4
|
-
module V3
|
|
5
|
-
class ImageFile < Lutaml::Model::Collection
|
|
6
|
-
instances :entries, ImageEntry
|
|
7
|
-
|
|
8
|
-
key_value do
|
|
9
|
-
map_instances to: :entries
|
|
10
|
-
end
|
|
11
|
-
|
|
12
|
-
def self.from_file(path)
|
|
13
|
-
return nil unless File.exist?(path)
|
|
14
|
-
|
|
15
|
-
from_yaml(File.read(path))
|
|
16
|
-
end
|
|
17
|
-
|
|
18
|
-
def path_for_anchor(anchor)
|
|
19
|
-
entries.find { |e| e.id == anchor.to_s }&.path
|
|
20
|
-
end
|
|
21
|
-
|
|
22
|
-
def anchor_for_path(path)
|
|
23
|
-
entries.find { |e| e.path == path }&.id
|
|
24
|
-
end
|
|
25
|
-
|
|
26
|
-
def path?(path)
|
|
27
|
-
entries.any? { |e| e.path == path }
|
|
28
|
-
end
|
|
29
|
-
end
|
|
30
|
-
end
|
|
31
|
-
end
|