termium 0.3.1 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.adoc +106 -5
- data/lib/termium/cli.rb +1 -1
- data/lib/termium/core.rb +7 -3
- data/lib/termium/language_module.rb +11 -2
- data/lib/termium/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 38f41bac67c56e9aa6ec73f6ec79b729216cfdd677ed27d7317ea1bcd2363bc0
|
4
|
+
data.tar.gz: b69ed20d7a04e49f815a2714c4dc2d3c290af3ecce2b89f8eba9d63bedd85a09
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f7cdfffe9d0e84f8af6a8724b82233646ac43351b9fa3107941de8a3754e5fe664c06bcf8841db0fe43aee606c7c66fef3c2dc29c0c2a137bba37125ed24dc83
|
7
|
+
data.tar.gz: 386cf6b5a8658f7a91686771f083558ef31483c3fd961cdb58860082505ba8d3fc066b684dc35157607438b36886d99689b36e245ccd71fb9d549de5673303f7
|
data/README.adoc
CHANGED
@@ -42,17 +42,37 @@ Results in:
|
|
42
42
|
|
43
43
|
== Commands
|
44
44
|
|
45
|
-
`termium convert`:: Convert a TERMIUM Plus export XML file to a Glossarist dataset
|
45
|
+
`termium convert`:: Convert a TERMIUM Plus export XML file to a Paneron Glossarist dataset.
|
46
46
|
|
47
47
|
|
48
|
-
===
|
48
|
+
=== `termium convert`
|
49
|
+
|
50
|
+
==== Purpose
|
51
|
+
|
52
|
+
This command converts a TERMIUM Plus export XML (`<ns2:termium_extract>`) file
|
53
|
+
to a Paneron Glossarist dataset.
|
54
|
+
|
55
|
+
The resulting dataset will look like this:
|
56
|
+
|
57
|
+
[source]
|
58
|
+
----
|
59
|
+
{OUTPUT_PATH}/
|
60
|
+
├── concepts/
|
61
|
+
│ ├── {CONCEPT_ID}.yaml
|
62
|
+
│ ├── ...
|
63
|
+
├── localized_concepts/
|
64
|
+
├── {LOCALIZED_CONCEPT_ID}.yaml
|
65
|
+
├── ...
|
66
|
+
----
|
67
|
+
|
68
|
+
==== Usage
|
49
69
|
|
50
70
|
[source,sh]
|
51
71
|
----
|
52
|
-
$ termium convert -i INPUT_XML_FILE [-o OUTPUT_PATH]
|
72
|
+
$ termium convert -i INPUT_XML_FILE [-o OUTPUT_PATH] [-o DATE_ACCEPTED]
|
53
73
|
----
|
54
74
|
|
55
|
-
|
75
|
+
==== Options
|
56
76
|
|
57
77
|
[cols="a,a",options="header"]
|
58
78
|
|===
|
@@ -61,7 +81,7 @@ $ termium convert -i INPUT_XML_FILE [-o OUTPUT_PATH]
|
|
61
81
|
|`-i`, `--input-path`
|
62
82
|
|
|
63
83
|
Source path to TERMIUM Plus XML export file.
|
64
|
-
The file needs to start with the `<
|
84
|
+
The file needs to start with the `<ns2:termium_extract>` element.
|
65
85
|
|
66
86
|
|`-o`, `--output-path`
|
67
87
|
|
|
@@ -69,9 +89,90 @@ Destination path to Glossarist dataset directory.
|
|
69
89
|
If the directory doesn't exist it will be created.
|
70
90
|
If not provided, defaults to the basename of the input file, e.g. `foo/bar.xml` will export to `foo/bar/`.
|
71
91
|
|
92
|
+
|`--date-accepted`
|
93
|
+
|
|
94
|
+
Date of acceptance for the dataset. This fills in the `date_accepted` value of
|
95
|
+
the universal concept (which is exported to a YAML file).
|
96
|
+
|
72
97
|
|===
|
73
98
|
|
74
99
|
|
100
|
+
==== Examples
|
101
|
+
|
102
|
+
The data structures of these files can be seen in the following examples.
|
103
|
+
|
104
|
+
|
105
|
+
.Sample of `{CONCEPT_ID}.yaml`
|
106
|
+
[example]
|
107
|
+
====
|
108
|
+
This is `88a7dd87-6199-3516-9cec-f4cd79ff09c6.yaml`.
|
109
|
+
|
110
|
+
[source,yaml]
|
111
|
+
----
|
112
|
+
---
|
113
|
+
data:
|
114
|
+
identifier: '2120638'
|
115
|
+
localized_concepts:
|
116
|
+
eng: e114ee44-e601-5623-9099-48cfc2be2224
|
117
|
+
fre: 9a7b88cb-4ee6-5d59-89bb-230425a3c96a
|
118
|
+
related: []
|
119
|
+
date_accepted: 2015-05-01
|
120
|
+
status: valid
|
121
|
+
id: 88a7dd87-6199-3516-9cec-f4cd79ff09c6
|
122
|
+
----
|
123
|
+
====
|
124
|
+
|
125
|
+
.Sample of `{LOCALIZED_CONCEPT_ID}.yaml`
|
126
|
+
[example]
|
127
|
+
====
|
128
|
+
This is `e114ee44-e601-5623-9099-48cfc2be2224.yaml`.
|
129
|
+
|
130
|
+
[source,yaml]
|
131
|
+
----
|
132
|
+
---
|
133
|
+
data:
|
134
|
+
dates: []
|
135
|
+
definition:
|
136
|
+
- content: layer whose nodes directly communicate with external systems
|
137
|
+
examples: []
|
138
|
+
id: '2120638'
|
139
|
+
notes:
|
140
|
+
- content: 'visible layer: term and definition standardized by ISO/IEC [ISO/IEC
|
141
|
+
2382-34:1999].'
|
142
|
+
- content: 34.02.09 (2382)
|
143
|
+
sources:
|
144
|
+
- origin:
|
145
|
+
ref: ISO/IEC 2382-34:1999
|
146
|
+
type: lineage
|
147
|
+
status: identical
|
148
|
+
- origin:
|
149
|
+
ref: Ranger, Natalie * 2006 * Bureau de la traduction / Translation Bureau *
|
150
|
+
Services linguistiques / Linguistic Services * Bur. dir. Centre de traduction
|
151
|
+
et de terminologie / Dir's Office Translation and Terminology Centre * Div.
|
152
|
+
Citoyenneté et Protection civile / Citizen. & Emergency preparedness Div.
|
153
|
+
* Normalisation terminologique / Terminology Standardization
|
154
|
+
type: lineage
|
155
|
+
status: identical
|
156
|
+
terms:
|
157
|
+
- type: expression
|
158
|
+
normative_status: preferred
|
159
|
+
designation: visible layer
|
160
|
+
grammar_info:
|
161
|
+
- preposition: false
|
162
|
+
participle: false
|
163
|
+
adj: false
|
164
|
+
verb: false
|
165
|
+
adverb: false
|
166
|
+
noun: false
|
167
|
+
gender: []
|
168
|
+
number:
|
169
|
+
- singular
|
170
|
+
language_code: eng
|
171
|
+
----
|
172
|
+
====
|
173
|
+
|
174
|
+
|
175
|
+
|
75
176
|
== Library
|
76
177
|
|
77
178
|
=== Usage
|
data/lib/termium/cli.rb
CHANGED
@@ -53,7 +53,7 @@ module Termium
|
|
53
53
|
puts "Converting to Glossarist..."
|
54
54
|
convert_options = {}
|
55
55
|
if options[:date_accepted]
|
56
|
-
convert_options[:date_accepted] = Date.parse(options[:date_accepted])
|
56
|
+
convert_options[:date_accepted] = Date.parse(options[:date_accepted]).iso8601
|
57
57
|
end
|
58
58
|
glossarist_col = termium_extract.to_concept(convert_options)
|
59
59
|
# pp glossarist_col.first
|
data/lib/termium/core.rb
CHANGED
@@ -38,8 +38,8 @@ module Termium
|
|
38
38
|
end
|
39
39
|
|
40
40
|
# Deterministic v4 UUID by using the number string
|
41
|
-
def uuid
|
42
|
-
UUIDTools::UUID.md5_create(UUIDTools::UUID_DNS_NAMESPACE,
|
41
|
+
def uuid(str = identification_number)
|
42
|
+
UUIDTools::UUID.md5_create(UUIDTools::UUID_DNS_NAMESPACE, str).to_s
|
43
43
|
end
|
44
44
|
|
45
45
|
# TODO: Utilize "subject" in the Glossarist object:
|
@@ -60,11 +60,15 @@ module Termium
|
|
60
60
|
concept.date_accepted = options[:date_accepted]
|
61
61
|
end
|
62
62
|
|
63
|
-
language_module.map
|
63
|
+
language_module.map do |lang_mod|
|
64
|
+
localized_concept = lang_mod.to_concept(options)
|
65
|
+
|
64
66
|
# TODO: This is needed to skip the empty french entries of 10031781 and 10031778
|
65
67
|
next if localized_concept.nil?
|
66
68
|
|
67
69
|
localized_concept.id = identification_number
|
70
|
+
localized_concept.uuid = uuid("#{identification_number}-#{lang_mod.language}")
|
71
|
+
|
68
72
|
universal_entry.each do |entry|
|
69
73
|
localized_concept.notes << entry.value
|
70
74
|
end
|
@@ -60,6 +60,7 @@ module Termium
|
|
60
60
|
"definition" => [{ content: definition }],
|
61
61
|
"notes" => notes,
|
62
62
|
"examples" => examples,
|
63
|
+
"entry_status" => "valid",
|
63
64
|
}
|
64
65
|
|
65
66
|
src["domain"] = domain if domain
|
@@ -67,11 +68,19 @@ module Termium
|
|
67
68
|
src
|
68
69
|
end
|
69
70
|
|
70
|
-
def to_concept
|
71
|
+
def to_concept(options = {})
|
71
72
|
x = to_h
|
72
73
|
return nil unless x
|
73
74
|
|
74
|
-
Glossarist::LocalizedConcept.new(x)
|
75
|
+
Glossarist::LocalizedConcept.new(x).tap do |concept|
|
76
|
+
# Fill in register parameters
|
77
|
+
if options[:date_accepted]
|
78
|
+
puts options[:date_accepted].inspect
|
79
|
+
concept.date_accepted = options[:date_accepted]
|
80
|
+
end
|
81
|
+
|
82
|
+
puts concept.inspect
|
83
|
+
end
|
75
84
|
end
|
76
85
|
end
|
77
86
|
end
|
data/lib/termium/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: termium
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-11-
|
11
|
+
date: 2024-11-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: glossarist
|