termium 0.3.1 → 0.3.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 86c7b7ab35fb63b5d7bc8a085125db1452284327970939abd89808bf6e628097
4
- data.tar.gz: 2229305a3891ae26ec4b8ec82ad3ae55735912c7503c19ed5c478c52d3c1db68
3
+ metadata.gz: 38f41bac67c56e9aa6ec73f6ec79b729216cfdd677ed27d7317ea1bcd2363bc0
4
+ data.tar.gz: b69ed20d7a04e49f815a2714c4dc2d3c290af3ecce2b89f8eba9d63bedd85a09
5
5
  SHA512:
6
- metadata.gz: 67d16aa7301c765ec97865d5eff33b98e02de93d8cce9332fb7d8953613fa638719941bc2e5b695a29ae4f0e8e4960864d4ad17a8d1ee8332bc66e877f81c099
7
- data.tar.gz: 97d56933d5946f70209e0c3b054583e683e2e510ff235615d324dbb80c3f76dcdc1acea3fb1fd3003b9f1c417b193636df701685c85dfee0f2f33aee4173bc17
6
+ metadata.gz: f7cdfffe9d0e84f8af6a8724b82233646ac43351b9fa3107941de8a3754e5fe664c06bcf8841db0fe43aee606c7c66fef3c2dc29c0c2a137bba37125ed24dc83
7
+ data.tar.gz: 386cf6b5a8658f7a91686771f083558ef31483c3fd961cdb58860082505ba8d3fc066b684dc35157607438b36886d99689b36e245ccd71fb9d549de5673303f7
data/README.adoc CHANGED
@@ -42,17 +42,37 @@ Results in:
42
42
 
43
43
  == Commands
44
44
 
45
- `termium convert`:: Convert a TERMIUM Plus export XML file to a Glossarist dataset
45
+ `termium convert`:: Convert a TERMIUM Plus export XML file to a Paneron Glossarist dataset.
46
46
 
47
47
 
48
- === Usage
48
+ === `termium convert`
49
+
50
+ ==== Purpose
51
+
52
+ This command converts a TERMIUM Plus export XML (`<ns2:termium_extract>`) file
53
+ to a Paneron Glossarist dataset.
54
+
55
+ The resulting dataset will look like this:
56
+
57
+ [source]
58
+ ----
59
+ {OUTPUT_PATH}/
60
+ ├── concepts/
61
+ │   ├── {CONCEPT_ID}.yaml
62
+ │   ├── ...
63
+ ├── localized_concepts/
64
+    ├── {LOCALIZED_CONCEPT_ID}.yaml
65
+    ├── ...
66
+ ----
67
+
68
+ ==== Usage
49
69
 
50
70
  [source,sh]
51
71
  ----
52
- $ termium convert -i INPUT_XML_FILE [-o OUTPUT_PATH]
72
+ $ termium convert -i INPUT_XML_FILE [-o OUTPUT_PATH] [-o DATE_ACCEPTED]
53
73
  ----
54
74
 
55
- === Options
75
+ ==== Options
56
76
 
57
77
  [cols="a,a",options="header"]
58
78
  |===
@@ -61,7 +81,7 @@ $ termium convert -i INPUT_XML_FILE [-o OUTPUT_PATH]
61
81
  |`-i`, `--input-path`
62
82
  |
63
83
  Source path to TERMIUM Plus XML export file.
64
- The file needs to start with the `<extract>` tag.
84
+ The file needs to start with the `<ns2:termium_extract>` element.
65
85
 
66
86
  |`-o`, `--output-path`
67
87
  |
@@ -69,9 +89,90 @@ Destination path to Glossarist dataset directory.
69
89
  If the directory doesn't exist it will be created.
70
90
  If not provided, defaults to the basename of the input file, e.g. `foo/bar.xml` will export to `foo/bar/`.
71
91
 
92
+ |`--date-accepted`
93
+ |
94
+ Date of acceptance for the dataset. This fills in the `date_accepted` value of
95
+ the universal concept (which is exported to a YAML file).
96
+
72
97
  |===
73
98
 
74
99
 
100
+ ==== Examples
101
+
102
+ The data structures of these files can be seen in the following examples.
103
+
104
+
105
+ .Sample of `{CONCEPT_ID}.yaml`
106
+ [example]
107
+ ====
108
+ This is `88a7dd87-6199-3516-9cec-f4cd79ff09c6.yaml`.
109
+
110
+ [source,yaml]
111
+ ----
112
+ ---
113
+ data:
114
+ identifier: '2120638'
115
+ localized_concepts:
116
+ eng: e114ee44-e601-5623-9099-48cfc2be2224
117
+ fre: 9a7b88cb-4ee6-5d59-89bb-230425a3c96a
118
+ related: []
119
+ date_accepted: 2015-05-01
120
+ status: valid
121
+ id: 88a7dd87-6199-3516-9cec-f4cd79ff09c6
122
+ ----
123
+ ====
124
+
125
+ .Sample of `{LOCALIZED_CONCEPT_ID}.yaml`
126
+ [example]
127
+ ====
128
+ This is `e114ee44-e601-5623-9099-48cfc2be2224.yaml`.
129
+
130
+ [source,yaml]
131
+ ----
132
+ ---
133
+ data:
134
+ dates: []
135
+ definition:
136
+ - content: layer whose nodes directly communicate with external systems
137
+ examples: []
138
+ id: '2120638'
139
+ notes:
140
+ - content: 'visible layer: term and definition standardized by ISO/IEC [ISO/IEC
141
+ 2382-34:1999].'
142
+ - content: 34.02.09 (2382)
143
+ sources:
144
+ - origin:
145
+ ref: ISO/IEC 2382-34:1999
146
+ type: lineage
147
+ status: identical
148
+ - origin:
149
+ ref: Ranger, Natalie * 2006 * Bureau de la traduction / Translation Bureau *
150
+ Services linguistiques / Linguistic Services * Bur. dir. Centre de traduction
151
+ et de terminologie / Dir's Office Translation and Terminology Centre * Div.
152
+ Citoyenneté et Protection civile / Citizen. & Emergency preparedness Div.
153
+ * Normalisation terminologique / Terminology Standardization
154
+ type: lineage
155
+ status: identical
156
+ terms:
157
+ - type: expression
158
+ normative_status: preferred
159
+ designation: visible layer
160
+ grammar_info:
161
+ - preposition: false
162
+ participle: false
163
+ adj: false
164
+ verb: false
165
+ adverb: false
166
+ noun: false
167
+ gender: []
168
+ number:
169
+ - singular
170
+ language_code: eng
171
+ ----
172
+ ====
173
+
174
+
175
+
75
176
  == Library
76
177
 
77
178
  === Usage
data/lib/termium/cli.rb CHANGED
@@ -53,7 +53,7 @@ module Termium
53
53
  puts "Converting to Glossarist..."
54
54
  convert_options = {}
55
55
  if options[:date_accepted]
56
- convert_options[:date_accepted] = Date.parse(options[:date_accepted])
56
+ convert_options[:date_accepted] = Date.parse(options[:date_accepted]).iso8601
57
57
  end
58
58
  glossarist_col = termium_extract.to_concept(convert_options)
59
59
  # pp glossarist_col.first
data/lib/termium/core.rb CHANGED
@@ -38,8 +38,8 @@ module Termium
38
38
  end
39
39
 
40
40
  # Deterministic v4 UUID by using the number string
41
- def uuid
42
- UUIDTools::UUID.md5_create(UUIDTools::UUID_DNS_NAMESPACE, identification_number).to_s
41
+ def uuid(str = identification_number)
42
+ UUIDTools::UUID.md5_create(UUIDTools::UUID_DNS_NAMESPACE, str).to_s
43
43
  end
44
44
 
45
45
  # TODO: Utilize "subject" in the Glossarist object:
@@ -60,11 +60,15 @@ module Termium
60
60
  concept.date_accepted = options[:date_accepted]
61
61
  end
62
62
 
63
- language_module.map(&:to_concept).each do |localized_concept|
63
+ language_module.map do |lang_mod|
64
+ localized_concept = lang_mod.to_concept(options)
65
+
64
66
  # TODO: This is needed to skip the empty french entries of 10031781 and 10031778
65
67
  next if localized_concept.nil?
66
68
 
67
69
  localized_concept.id = identification_number
70
+ localized_concept.uuid = uuid("#{identification_number}-#{lang_mod.language}")
71
+
68
72
  universal_entry.each do |entry|
69
73
  localized_concept.notes << entry.value
70
74
  end
@@ -60,6 +60,7 @@ module Termium
60
60
  "definition" => [{ content: definition }],
61
61
  "notes" => notes,
62
62
  "examples" => examples,
63
+ "entry_status" => "valid",
63
64
  }
64
65
 
65
66
  src["domain"] = domain if domain
@@ -67,11 +68,19 @@ module Termium
67
68
  src
68
69
  end
69
70
 
70
- def to_concept
71
+ def to_concept(options = {})
71
72
  x = to_h
72
73
  return nil unless x
73
74
 
74
- Glossarist::LocalizedConcept.new(x)
75
+ Glossarist::LocalizedConcept.new(x).tap do |concept|
76
+ # Fill in register parameters
77
+ if options[:date_accepted]
78
+ puts options[:date_accepted].inspect
79
+ concept.date_accepted = options[:date_accepted]
80
+ end
81
+
82
+ puts concept.inspect
83
+ end
75
84
  end
76
85
  end
77
86
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Termium
4
- VERSION = "0.3.1"
4
+ VERSION = "0.3.2"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: termium
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 0.3.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-11-18 00:00:00.000000000 Z
11
+ date: 2024-11-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: glossarist