tc211-termbase 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.gitignore +3 -0
- data/Gemfile.lock +59 -7
- data/README.adoc +10 -4
- data/db/iso/iso_1087_1_2000,_3.4.16,_modified_/342/200/224_the_note_1_to_entry_has_been_added..xml +72 -0
- data/db/iso/iso_1087_1_2000,_3.4.9.xml +72 -0
- data/db/iso/iso_19101_1_2014,_4.1.1.xml +62 -0
- data/db/iso/iso_19101_1_2014,_4.1.2.xml +62 -0
- data/db/iso/iso_19105.xml +96 -0
- data/db/iso/iso_19105_2000.xml +55 -0
- data/db/iso/iso_19116.xml +97 -0
- data/db/iso/iso_19116_2004.xml +56 -0
- data/db/iso/iso_19117_2012,_4.1.xml +60 -0
- data/db/iso/iso_3534_1.xml +112 -0
- data/db/iso/iso_3534_1_1993.xml +71 -0
- data/db/iso/iso_iec_19501.xml +105 -0
- data/db/iso/iso_iec_19501_2005_(adapted_from.xml +60 -0
- data/db/iso/iso_iec_2382_17_1999.xml +77 -0
- data/db/version +1 -0
- data/lib/tc211/termbase/relaton_db.rb +21 -0
- data/lib/tc211/termbase/terms_section.rb +149 -126
- data/lib/tc211/termbase/version.rb +1 -1
- data/tc211-termbase.gemspec +4 -1
- data/vcr_cassettes/terms.yml +491 -0
- metadata +65 -5
data/db/version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.4.1
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require "singleton"
|
2
|
+
require "relaton"
|
3
|
+
|
4
|
+
module Tc211
|
5
|
+
module Termbase
|
6
|
+
# Relaton cach singleton.
|
7
|
+
class RelatonDb
|
8
|
+
include Singleton
|
9
|
+
|
10
|
+
def initialize
|
11
|
+
@db = Relaton::Db.new "db", nil
|
12
|
+
end
|
13
|
+
|
14
|
+
# @param code [String] reference
|
15
|
+
# @return [RelatonIso::IsoBibliongraphicItem]
|
16
|
+
def fetch(code)
|
17
|
+
@db.fetch code
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -1,151 +1,174 @@
|
|
1
1
|
require_relative "sheet_section"
|
2
2
|
require_relative "term"
|
3
|
+
require_relative "relaton_db"
|
3
4
|
|
4
5
|
module Tc211::Termbase
|
5
6
|
|
6
|
-
class TermsSection < SheetSection
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
7
|
+
class TermsSection < SheetSection
|
8
|
+
attr_accessor :structure
|
9
|
+
attr_accessor :header_row
|
10
|
+
|
11
|
+
TERM_HEADER_ROW_MATCH = {
|
12
|
+
"A" => ["ISO 19135 Field\nRE_RegisterItem.itemIdentifier"],
|
13
|
+
"B" => ["ISO 19135 Field\nRE_RegisterItem.name"],
|
14
|
+
"C" => ["ISO 19135 Field\nRE_RegisterItem.\nalternativeExpression"],
|
15
|
+
"D" => ["Country_Code"],
|
16
|
+
# ... We don't need to match all the cells
|
17
|
+
}
|
18
|
+
|
19
|
+
TERM_BODY_COLUMN_MAP = {
|
20
|
+
"Term_ID" => "id",
|
21
|
+
"Term" => "term",
|
22
|
+
"Term .OPERATING LANGUAGE." => "term",
|
23
|
+
# In the English sheet, column is named "Term Abbreviation"
|
24
|
+
"Term Abbreviation" => "abbrev",
|
25
|
+
# In other sheets, column named "Term_Abbreviation"
|
26
|
+
"Term_Abbreviation .OPERATING LANGUAGE." => "abbrev",
|
27
|
+
"Country code" => "country-code",
|
28
|
+
"Definition" => "definition",
|
29
|
+
"Term .OPERATING LANGUAGE - ALTERNATIVE CHARACTER SET." => "alt",
|
30
|
+
"Term in English" => nil,
|
31
|
+
"Entry Status" => "entry-status",
|
32
|
+
## Must be one of 'notValid' 'valid' 'superseded' 'retired'
|
33
|
+
"Term Clasification" => "classification",
|
34
|
+
## Must be one of the following 'preferred' 'admitted' 'deprecated'
|
35
|
+
"Review Indicator" => "review-indicator",
|
36
|
+
## Must be one of the following <empty field> 'Under Review in Source Document'",
|
37
|
+
"Authoritative Source" => "authoritative-source",
|
38
|
+
"Similarity to Authoritative Source" => "authoritative-source-similarity",
|
39
|
+
## Must be one of the following codes: 'identical' = 1 'restyled' = 2 'context added' = 3 'generalisation' = 4 'specialisation' = 5 'unspecified' = 6",
|
40
|
+
"Lineage Source" => "lineage-source",
|
41
|
+
"Similarity to Lineage Source" => "lineage-source-similarity",
|
42
|
+
## Must be one of the following codes: 'identical' = 1 'restyled' = 2 'context added' = 3 'generalisation' = 4 'specialisation' = 5 'unspecified' = 6",
|
43
|
+
"Term Synonyms" => "synonyms",
|
44
|
+
"Date Accepted" => "date-accepted", # yyyy-mm-dd,
|
45
|
+
"Date Amended" => "date-amended", # yyyy-mm-dd,
|
46
|
+
"Review Date" => "review-date", # yyyy-mm-dd,
|
47
|
+
"Review Status" => "review-status", ## Must be one of 'pending' 'tentative' 'final'",
|
48
|
+
"Review Type" => "review-type", ## Must be one of 'supersession', 'retirement'",
|
49
|
+
"Review Decision" => "review-decision", ## Must be one of 'withdrawn', 'accepted' 'notAccepted'",
|
50
|
+
"Review Decision Date" => "review-decision-date", # yyyy-mm-dd
|
51
|
+
"Review Decision Event" => "review-decision-event",
|
52
|
+
"Review Decision Notes" => "review-decision-notes",
|
53
|
+
"Example_1" => "example-1",
|
54
|
+
"Note_1" => "note-1",
|
55
|
+
"Example_2" => "example-2",
|
56
|
+
"Note_2" => "note-2",
|
57
|
+
"Example_3" => "example-3",
|
58
|
+
"Note_3" => "note-3",
|
59
|
+
"Example_4" => "example-4",
|
60
|
+
"Note_4" => "note-4",
|
61
|
+
"Example_5" => "example-5",
|
62
|
+
"Note_5" => "note-5",
|
63
|
+
"Example_6" => "example-6",
|
64
|
+
"Note_6" => "note-6",
|
65
|
+
"Example_7" => "example-7",
|
66
|
+
"Note_7" => "note-7",
|
67
|
+
"Example_8" => "example-8",
|
68
|
+
"Note_8" => "note-8",
|
69
|
+
"Glossary Release" => "release"
|
70
|
+
## Must be one of the following codes 'release1' = 1 'release1_retired' = -1 'release2' = 2 'release2_retired' = -2 etc "
|
71
|
+
}
|
72
|
+
|
73
|
+
def initialize(rows, options={})
|
74
|
+
super
|
75
|
+
raise StandardError.new("Does not match TermsSection header!") unless self.class.match_header(@rows[0])
|
76
|
+
@mapping_rows = @rows[0..1]
|
77
|
+
@header_row = @rows[2]
|
78
|
+
@body_rows = @rows[3..-1]
|
79
|
+
@language_code = options.delete(:language_code)
|
80
|
+
self
|
81
|
+
end
|
82
|
+
|
83
|
+
def structure
|
84
|
+
@structure ||= @header_row.inject({}) do |acc, (key, value)|
|
85
|
+
# puts "#{key}, #{value}, #{GLOSSARY_HEADER_TITLES[value]}"
|
81
86
|
|
82
|
-
|
83
|
-
|
84
|
-
# puts "#{key}, #{value}, #{GLOSSARY_HEADER_TITLES[value]}"
|
87
|
+
# convert whitespace to a single space
|
88
|
+
cleaned_value = value.gsub(/\s+/, ' ')
|
85
89
|
|
86
|
-
|
87
|
-
|
90
|
+
matches = TERM_BODY_COLUMN_MAP.map do |key, value|
|
91
|
+
# puts "key #{key}, value #{value}"
|
92
|
+
if cleaned_value[Regexp.new("^#{key}")]
|
93
|
+
[key, value]
|
94
|
+
end
|
95
|
+
end.compact
|
88
96
|
|
89
|
-
|
90
|
-
|
91
|
-
if cleaned_value[Regexp.new("^#{key}")]
|
92
|
-
[key, value]
|
97
|
+
discard, longest_match_key = matches.max_by do |(a, b)|
|
98
|
+
a.length
|
93
99
|
end
|
94
|
-
end.compact
|
95
100
|
|
96
|
-
|
97
|
-
|
98
|
-
|
101
|
+
# Here we need to skip "Term in English"
|
102
|
+
if key && longest_match_key
|
103
|
+
acc.merge!({ key => longest_match_key })
|
104
|
+
else
|
105
|
+
acc
|
106
|
+
end
|
99
107
|
|
100
|
-
# Here we need to skip "Term in English"
|
101
|
-
if key && longest_match_key
|
102
|
-
acc.merge!({ key => longest_match_key })
|
103
|
-
else
|
104
|
-
acc
|
105
108
|
end
|
106
|
-
|
107
109
|
end
|
108
|
-
end
|
109
110
|
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
111
|
+
def self.match_header(row)
|
112
|
+
# puts "row #{row}"
|
113
|
+
row.inject(true) do |acc, (key, value)|
|
114
|
+
# puts "#{key}, #{value}"
|
115
|
+
if TERM_HEADER_ROW_MATCH[key]
|
116
|
+
acc && TERM_HEADER_ROW_MATCH[key].include?(value)
|
117
|
+
else
|
118
|
+
acc
|
119
|
+
end
|
118
120
|
end
|
119
121
|
end
|
120
|
-
end
|
121
122
|
|
122
|
-
|
123
|
-
|
124
|
-
attributes = {}
|
123
|
+
def parse_row(row)
|
124
|
+
return nil if row.empty?
|
125
125
|
|
126
|
-
|
127
|
-
# puts "#{key}, #{value}, #{row[key]}"
|
128
|
-
attribute_key = value
|
129
|
-
attribute_value = row[key]
|
130
|
-
next if attribute_value.nil?
|
131
|
-
attributes[attribute_key] = attribute_value
|
132
|
-
end
|
126
|
+
attributes = {}
|
133
127
|
|
134
|
-
|
135
|
-
|
128
|
+
structure.each_pair do |key, value|
|
129
|
+
# puts "#{key}, #{value}, #{row[key]}"
|
130
|
+
attribute_key = value
|
131
|
+
next if row[key].nil?
|
132
|
+
|
133
|
+
attribute_value = fetch_attribute row[key], attribute_key
|
134
|
+
attributes[attribute_key] = attribute_value
|
135
|
+
end
|
136
136
|
|
137
|
-
|
138
|
-
@terms ||= @body_rows.map do |row|
|
139
|
-
Term.new(parse_row(row).merge("language_code" => @language_code))
|
137
|
+
attributes
|
140
138
|
end
|
141
|
-
end
|
142
139
|
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
140
|
+
def terms
|
141
|
+
@terms ||= @body_rows.map do |row|
|
142
|
+
Term.new(parse_row(row).merge("language_code" => @language_code))
|
143
|
+
end
|
144
|
+
end
|
148
145
|
|
149
|
-
|
146
|
+
def to_hash
|
147
|
+
{
|
148
|
+
"terms" => terms.map(&:to_hash)
|
149
|
+
}
|
150
|
+
end
|
150
151
|
|
152
|
+
private
|
153
|
+
|
154
|
+
# @param value [String]
|
155
|
+
# @param key [String]
|
156
|
+
# @return [Hash]
|
157
|
+
def fetch_attribute(value, key)
|
158
|
+
case key
|
159
|
+
when "authoritative-source"
|
160
|
+
begin
|
161
|
+
src = { "ref" => value }
|
162
|
+
item = RelatonDb.instance.fetch value
|
163
|
+
src["link"] = item.url if item
|
164
|
+
src
|
165
|
+
rescue RelatonBib::RequestError => e
|
166
|
+
warn e.message
|
167
|
+
src
|
168
|
+
end
|
169
|
+
else
|
170
|
+
value
|
171
|
+
end
|
172
|
+
end
|
173
|
+
end
|
151
174
|
end
|
data/tc211-termbase.gemspec
CHANGED
@@ -24,8 +24,11 @@ Gem::Specification.new do |spec|
|
|
24
24
|
|
25
25
|
spec.add_runtime_dependency "iso-639"
|
26
26
|
spec.add_runtime_dependency "creek"
|
27
|
+
spec.add_runtime_dependency "relaton", "~>0.4.0"
|
27
28
|
|
28
|
-
spec.add_development_dependency "bundler", "~> 1
|
29
|
+
spec.add_development_dependency "bundler", "~> 2.0.1"
|
30
|
+
spec.add_development_dependency "debase"
|
29
31
|
spec.add_development_dependency "rake", "~> 10.0"
|
30
32
|
spec.add_development_dependency "rspec", "~> 3.0"
|
33
|
+
spec.add_development_dependency "ruby-debug-ide"
|
31
34
|
end
|