termium 0.3.1 → 0.3.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 86c7b7ab35fb63b5d7bc8a085125db1452284327970939abd89808bf6e628097
4
- data.tar.gz: 2229305a3891ae26ec4b8ec82ad3ae55735912c7503c19ed5c478c52d3c1db68
3
+ metadata.gz: 86902390841b79db99c92c240e1d6f2287cde3bda035c7400483794a0e00cb82
4
+ data.tar.gz: 40a5570756e292e6e3ea806eccd2f10fc979a7f8503f84fb026435148044b31c
5
5
  SHA512:
6
- metadata.gz: 67d16aa7301c765ec97865d5eff33b98e02de93d8cce9332fb7d8953613fa638719941bc2e5b695a29ae4f0e8e4960864d4ad17a8d1ee8332bc66e877f81c099
7
- data.tar.gz: 97d56933d5946f70209e0c3b054583e683e2e510ff235615d324dbb80c3f76dcdc1acea3fb1fd3003b9f1c417b193636df701685c85dfee0f2f33aee4173bc17
6
+ metadata.gz: f59efb1c54b557effc92b7142a97b2c9c4db3155cfa9dbdb2c723257d5bc3cd988566edbefa47d5295fd9d04a77740c5605f9e25f94a0a6eb39ec85ca06abd9a
7
+ data.tar.gz: f45f57601e7e0f19377727ae7ce86d354d4bd7221d4a183b036bca15205719be83b2a9a549b6e9011e8cd1a6b2c8966709eb0dc0c4cc75828f1e277c49f7986a
data/README.adoc CHANGED
@@ -42,17 +42,37 @@ Results in:
42
42
 
43
43
  == Commands
44
44
 
45
- `termium convert`:: Convert a TERMIUM Plus export XML file to a Glossarist dataset
45
+ `termium convert`:: Convert a TERMIUM Plus export XML file to a Paneron Glossarist dataset.
46
46
 
47
47
 
48
- === Usage
48
+ === `termium convert`
49
+
50
+ ==== Purpose
51
+
52
+ This command converts a TERMIUM Plus export XML (`<ns2:termium_extract>`) file
53
+ to a Paneron Glossarist dataset.
54
+
55
+ The resulting dataset will look like this:
56
+
57
+ [source]
58
+ ----
59
+ {OUTPUT_PATH}/
60
+ ├── concepts/
61
+ │   ├── {CONCEPT_ID}.yaml
62
+ │   ├── ...
63
+ ├── localized_concepts/
64
+    ├── {LOCALIZED_CONCEPT_ID}.yaml
65
+    ├── ...
66
+ ----
67
+
68
+ ==== Usage
49
69
 
50
70
  [source,sh]
51
71
  ----
52
- $ termium convert -i INPUT_XML_FILE [-o OUTPUT_PATH]
72
+ $ termium convert -i INPUT_XML_FILE [-o OUTPUT_PATH] [-o DATE_ACCEPTED]
53
73
  ----
54
74
 
55
- === Options
75
+ ==== Options
56
76
 
57
77
  [cols="a,a",options="header"]
58
78
  |===
@@ -61,7 +81,7 @@ $ termium convert -i INPUT_XML_FILE [-o OUTPUT_PATH]
61
81
  |`-i`, `--input-path`
62
82
  |
63
83
  Source path to TERMIUM Plus XML export file.
64
- The file needs to start with the `<extract>` tag.
84
+ The file needs to start with the `<ns2:termium_extract>` element.
65
85
 
66
86
  |`-o`, `--output-path`
67
87
  |
@@ -69,9 +89,90 @@ Destination path to Glossarist dataset directory.
69
89
  If the directory doesn't exist it will be created.
70
90
  If not provided, defaults to the basename of the input file, e.g. `foo/bar.xml` will export to `foo/bar/`.
71
91
 
92
+ |`--date-accepted`
93
+ |
94
+ Date of acceptance for the dataset. This fills in the `date_accepted` value of
95
+ the universal concept (which is exported to a YAML file).
96
+
72
97
  |===
73
98
 
74
99
 
100
+ ==== Examples
101
+
102
+ The data structures of these files can be seen in the following examples.
103
+
104
+
105
+ .Sample of `{CONCEPT_ID}.yaml`
106
+ [example]
107
+ ====
108
+ This is `88a7dd87-6199-3516-9cec-f4cd79ff09c6.yaml`.
109
+
110
+ [source,yaml]
111
+ ----
112
+ ---
113
+ data:
114
+ identifier: '2120638'
115
+ localized_concepts:
116
+ eng: e114ee44-e601-5623-9099-48cfc2be2224
117
+ fre: 9a7b88cb-4ee6-5d59-89bb-230425a3c96a
118
+ related: []
119
+ date_accepted: 2015-05-01
120
+ status: valid
121
+ id: 88a7dd87-6199-3516-9cec-f4cd79ff09c6
122
+ ----
123
+ ====
124
+
125
+ .Sample of `{LOCALIZED_CONCEPT_ID}.yaml`
126
+ [example]
127
+ ====
128
+ This is `e114ee44-e601-5623-9099-48cfc2be2224.yaml`.
129
+
130
+ [source,yaml]
131
+ ----
132
+ ---
133
+ data:
134
+ dates: []
135
+ definition:
136
+ - content: layer whose nodes directly communicate with external systems
137
+ examples: []
138
+ id: '2120638'
139
+ notes:
140
+ - content: 'visible layer: term and definition standardized by ISO/IEC [ISO/IEC
141
+ 2382-34:1999].'
142
+ - content: 34.02.09 (2382)
143
+ sources:
144
+ - origin:
145
+ ref: ISO/IEC 2382-34:1999
146
+ type: lineage
147
+ status: identical
148
+ - origin:
149
+ ref: Ranger, Natalie * 2006 * Bureau de la traduction / Translation Bureau *
150
+ Services linguistiques / Linguistic Services * Bur. dir. Centre de traduction
151
+ et de terminologie / Dir's Office Translation and Terminology Centre * Div.
152
+ Citoyenneté et Protection civile / Citizen. & Emergency preparedness Div.
153
+ * Normalisation terminologique / Terminology Standardization
154
+ type: lineage
155
+ status: identical
156
+ terms:
157
+ - type: expression
158
+ normative_status: preferred
159
+ designation: visible layer
160
+ grammar_info:
161
+ - preposition: false
162
+ participle: false
163
+ adj: false
164
+ verb: false
165
+ adverb: false
166
+ noun: false
167
+ gender: []
168
+ number:
169
+ - singular
170
+ language_code: eng
171
+ ----
172
+ ====
173
+
174
+
175
+
75
176
  == Library
76
177
 
77
178
  === Usage
data/lib/termium/cli.rb CHANGED
@@ -53,7 +53,7 @@ module Termium
53
53
  puts "Converting to Glossarist..."
54
54
  convert_options = {}
55
55
  if options[:date_accepted]
56
- convert_options[:date_accepted] = Date.parse(options[:date_accepted])
56
+ convert_options[:date_accepted] = Date.parse(options[:date_accepted]).iso8601
57
57
  end
58
58
  glossarist_col = termium_extract.to_concept(convert_options)
59
59
  # pp glossarist_col.first
data/lib/termium/core.rb CHANGED
@@ -38,8 +38,8 @@ module Termium
38
38
  end
39
39
 
40
40
  # Deterministic v4 UUID by using the number string
41
- def uuid
42
- UUIDTools::UUID.md5_create(UUIDTools::UUID_DNS_NAMESPACE, identification_number).to_s
41
+ def uuid(str = identification_number)
42
+ UUIDTools::UUID.md5_create(UUIDTools::UUID_DNS_NAMESPACE, str).to_s
43
43
  end
44
44
 
45
45
  # TODO: Utilize "subject" in the Glossarist object:
@@ -60,11 +60,15 @@ module Termium
60
60
  concept.date_accepted = options[:date_accepted]
61
61
  end
62
62
 
63
- language_module.map(&:to_concept).each do |localized_concept|
63
+ language_module.map do |lang_mod|
64
+ localized_concept = lang_mod.to_concept(options)
65
+
64
66
  # TODO: This is needed to skip the empty french entries of 10031781 and 10031778
65
67
  next if localized_concept.nil?
66
68
 
67
69
  localized_concept.id = identification_number
70
+ localized_concept.uuid = uuid("#{identification_number}-#{lang_mod.language}")
71
+
68
72
  universal_entry.each do |entry|
69
73
  localized_concept.notes << entry.value
70
74
  end
@@ -60,6 +60,7 @@ module Termium
60
60
  "definition" => [{ content: definition }],
61
61
  "notes" => notes,
62
62
  "examples" => examples,
63
+ "entry_status" => "valid",
63
64
  }
64
65
 
65
66
  src["domain"] = domain if domain
@@ -67,11 +68,19 @@ module Termium
67
68
  src
68
69
  end
69
70
 
70
- def to_concept
71
+ def to_concept(options = {})
71
72
  x = to_h
72
73
  return nil unless x
73
74
 
74
- Glossarist::LocalizedConcept.new(x)
75
+ Glossarist::LocalizedConcept.new(x).tap do |concept|
76
+ # Fill in register parameters
77
+ if options[:date_accepted]
78
+ puts options[:date_accepted].inspect
79
+ concept.date_accepted = options[:date_accepted]
80
+ end
81
+
82
+ puts concept.inspect
83
+ end
75
84
  end
76
85
  end
77
86
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Termium
4
- VERSION = "0.3.1"
4
+ VERSION = "0.3.3"
5
5
  end
data/termium.gemspec CHANGED
@@ -30,7 +30,7 @@ Gem::Specification.new do |spec|
30
30
  spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
31
31
  spec.require_paths = ["lib"]
32
32
 
33
- spec.add_dependency "glossarist", "~> 2.2"
33
+ spec.add_dependency "glossarist", "~> 2.2.0"
34
34
  spec.add_dependency "lutaml-model"
35
35
  spec.add_dependency "thor"
36
36
  spec.add_dependency "uuidtools"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: termium
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 0.3.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-11-18 00:00:00.000000000 Z
11
+ date: 2025-01-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: glossarist
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '2.2'
19
+ version: 2.2.0
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '2.2'
26
+ version: 2.2.0
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: lutaml-model
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -66,7 +66,7 @@ dependencies:
66
66
  - - ">="
67
67
  - !ruby/object:Gem::Version
68
68
  version: '0'
69
- description:
69
+ description:
70
70
  email:
71
71
  - open.source@ribose.com
72
72
  executables:
@@ -108,7 +108,7 @@ metadata:
108
108
  homepage_uri: https://github.com/glossarist/termium
109
109
  source_code_uri: https://github.com/glossarist/termium
110
110
  bug_tracker_uri: https://github.com/glossarist/termium/issues
111
- post_install_message:
111
+ post_install_message:
112
112
  rdoc_options: []
113
113
  require_paths:
114
114
  - lib
@@ -124,7 +124,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
124
124
  version: '0'
125
125
  requirements: []
126
126
  rubygems_version: 3.3.27
127
- signing_key:
127
+ signing_key:
128
128
  specification_version: 4
129
129
  summary: Parser for the TERMIUM Plus terminology database of the Government of Canada
130
130
  test_files: []