termium 0.1.0 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/termium/cli.rb +32 -26
- data/lib/termium/core.rb +7 -0
- data/lib/termium/entry_term.rb +7 -1
- data/lib/termium/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b6e0e5aefeb1e77b272a066f4e530779c522fa1d58d1e4890beda19db2e2c203
|
4
|
+
data.tar.gz: eccef83149a4c29c751ada19d6d1db07fa961d983322986b49ff40d33954255d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2feb9eef3d42a4f9d07e710c2608ab00bb903484a98ba890ed259a7e7cbc7dbcea68b00bf69e493d9921f4362b79bfc314e961ccb147a559c5e835b287942b62
|
7
|
+
data.tar.gz: 70544c5334f85c6b8a7d7087840169a98578d57249d6ac3eb29412c70ae04759f0f28bc488ebba333a1ac3dbeac55122e44f9dd63baa74757baf9f08ddb3f7ee
|
data/lib/termium/cli.rb
CHANGED
@@ -5,53 +5,59 @@ require_relative "../termium"
|
|
5
5
|
module Termium
|
6
6
|
# Command-line interface
|
7
7
|
class Cli < Thor
|
8
|
-
desc "convert", "Convert
|
8
|
+
desc "convert", "Convert TERMIUM entries into a Glossarist dataset"
|
9
9
|
|
10
10
|
option :input_file, aliases: :i, required: true, desc: "Path to TERMIUM Plus XML extract"
|
11
11
|
option :output_file, aliases: :o, desc: "Output file path"
|
12
12
|
|
13
|
-
|
14
|
-
|
13
|
+
no_commands do
|
14
|
+
def input_file_as_path(input_file)
|
15
|
+
input_path = Pathname.new(Dir.pwd).join(Pathname.new(input_file))
|
15
16
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
input_path
|
22
|
-
end
|
17
|
+
unless input_path.exist?
|
18
|
+
error "TERMIUM export file `#{options[:input_file]}` does not exist."
|
19
|
+
exit 1
|
20
|
+
end
|
23
21
|
|
24
|
-
|
25
|
-
|
26
|
-
output_path ||= input_path.dirname.join(input_path.basename(input_path.extname))
|
22
|
+
input_path
|
23
|
+
end
|
27
24
|
|
28
|
-
|
25
|
+
def output_dir_as_path(output_path, input_path)
|
26
|
+
output_path ||= input_path.dirname.join(input_path.basename(input_path.extname))
|
29
27
|
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
puts "Created directory: #{output_path.relative_path_from(Dir.pwd)}"
|
34
|
-
output_path.mkdir
|
28
|
+
output_path = Pathname.new(Dir.pwd).join(output_path)
|
29
|
+
create_or_use_output_path(output_path)
|
30
|
+
output_path
|
35
31
|
end
|
36
32
|
|
37
|
-
output_path
|
33
|
+
def create_or_use_output_path(output_path)
|
34
|
+
output_path_rel = output_path.relative_path_from(Dir.pwd)
|
35
|
+
if output_path.exist?
|
36
|
+
puts "Using existing directory: #{output_path_rel}"
|
37
|
+
else # and is directory
|
38
|
+
puts "Created directory: #{output_path_rel}"
|
39
|
+
output_path.mkdir
|
40
|
+
end
|
41
|
+
end
|
38
42
|
end
|
39
43
|
|
40
44
|
def convert
|
41
|
-
input_path = input_file_as_path
|
42
|
-
|
45
|
+
input_path = input_file_as_path(options[:input_file])
|
46
|
+
|
47
|
+
puts "Reading TERMIUM export file: #{input_path.relative_path_from(Dir.pwd)}"
|
43
48
|
termium_extract = Termium::Extract.from_xml(IO.read(input_path.expand_path))
|
44
49
|
|
45
|
-
puts "Size of dataset: #{termium_extract.core.size}"
|
50
|
+
puts "Size of TERMIUM dataset: #{termium_extract.core.size}"
|
46
51
|
|
47
52
|
puts "Converting to Glossarist..."
|
48
53
|
glossarist_col = termium_extract.to_concept
|
49
54
|
# pp glossarist_col.first
|
50
55
|
|
51
|
-
output_path =
|
52
|
-
puts "Writing Glossarist dataset
|
56
|
+
output_path = output_dir_as_path(options[:output_file], input_path)
|
57
|
+
puts "Writing Glossarist dataset to: #{output_path.relative_path_from(Dir.pwd)}"
|
53
58
|
glossarist_col.save_to_files(output_path.expand_path)
|
54
|
-
puts "
|
59
|
+
puts "Done."
|
60
|
+
exit 0
|
55
61
|
end
|
56
62
|
|
57
63
|
def method_missing(*args)
|
data/lib/termium/core.rb
CHANGED
@@ -28,10 +28,17 @@ module Termium
|
|
28
28
|
|
29
29
|
# TODO: In Termium XML, each definition per lang or note can be linked to a
|
30
30
|
# particular source via the sourceRef number.
|
31
|
+
# We should utilize "source" order ID in the Glossarist object:
|
32
|
+
# <source order="1" details="ISO-2382-6 * 1987 * * * " />
|
33
|
+
# <source order="2"
|
34
|
+
# details="Ranger, Natalie * 2006 * Bureau de la traduction..." />
|
31
35
|
def concept_sources
|
32
36
|
source.map(&:to_concept_source)
|
33
37
|
end
|
34
38
|
|
39
|
+
# TODO: Utilize "subject" in the Glossarist object:
|
40
|
+
# <subject abbreviation="YBB"
|
41
|
+
# details="Compartment - ISO/IEC JTC 1 Information Technology Vocabulary" />
|
35
42
|
def to_concept
|
36
43
|
concept = Glossarist::ManagedConcept.new(id: identification_number)
|
37
44
|
|
data/lib/termium/entry_term.rb
CHANGED
@@ -62,11 +62,17 @@ module Termium
|
|
62
62
|
end
|
63
63
|
end
|
64
64
|
|
65
|
+
def normative_status
|
66
|
+
return "deprecated" if deprecated
|
67
|
+
|
68
|
+
order == 1 ? "preferred" : "admitted"
|
69
|
+
end
|
70
|
+
|
65
71
|
def to_h
|
66
72
|
set = {
|
67
73
|
"designation" => value,
|
68
74
|
"type" => "expression",
|
69
|
-
"normative_status" =>
|
75
|
+
"normative_status" => normative_status
|
70
76
|
}
|
71
77
|
|
72
78
|
set["geographical_area"] = geographical_area if geographical_area
|
data/lib/termium/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: termium
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-11-
|
11
|
+
date: 2023-11-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: glossarist
|