tc211-termbase 0.1.1 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.gitignore +3 -0
- data/Gemfile.lock +59 -7
- data/README.adoc +10 -4
- data/db/iso/iso_1087_1_2000,_3.4.16,_modified_/342/200/224_the_note_1_to_entry_has_been_added..xml +72 -0
- data/db/iso/iso_1087_1_2000,_3.4.9.xml +72 -0
- data/db/iso/iso_19101_1_2014,_4.1.1.xml +62 -0
- data/db/iso/iso_19101_1_2014,_4.1.2.xml +62 -0
- data/db/iso/iso_19105.xml +96 -0
- data/db/iso/iso_19105_2000.xml +55 -0
- data/db/iso/iso_19116.xml +97 -0
- data/db/iso/iso_19116_2004.xml +56 -0
- data/db/iso/iso_19117_2012,_4.1.xml +60 -0
- data/db/iso/iso_3534_1.xml +112 -0
- data/db/iso/iso_3534_1_1993.xml +71 -0
- data/db/iso/iso_iec_19501.xml +105 -0
- data/db/iso/iso_iec_19501_2005_(adapted_from.xml +60 -0
- data/db/iso/iso_iec_2382_17_1999.xml +77 -0
- data/db/version +1 -0
- data/lib/tc211/termbase/relaton_db.rb +21 -0
- data/lib/tc211/termbase/terms_section.rb +149 -126
- data/lib/tc211/termbase/version.rb +1 -1
- data/tc211-termbase.gemspec +4 -1
- data/vcr_cassettes/terms.yml +491 -0
- metadata +65 -5
data/db/version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.4.1
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require "singleton"
|
2
|
+
require "relaton"
|
3
|
+
|
4
|
+
module Tc211
|
5
|
+
module Termbase
|
6
|
+
# Relaton cach singleton.
|
7
|
+
class RelatonDb
|
8
|
+
include Singleton
|
9
|
+
|
10
|
+
def initialize
|
11
|
+
@db = Relaton::Db.new "db", nil
|
12
|
+
end
|
13
|
+
|
14
|
+
# @param code [String] reference
|
15
|
+
# @return [RelatonIso::IsoBibliongraphicItem]
|
16
|
+
def fetch(code)
|
17
|
+
@db.fetch code
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -1,151 +1,174 @@
|
|
1
1
|
require_relative "sheet_section"
|
2
2
|
require_relative "term"
|
3
|
+
require_relative "relaton_db"
|
3
4
|
|
4
5
|
module Tc211::Termbase
|
5
6
|
|
6
|
-
class TermsSection < SheetSection
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
7
|
+
class TermsSection < SheetSection
|
8
|
+
attr_accessor :structure
|
9
|
+
attr_accessor :header_row
|
10
|
+
|
11
|
+
TERM_HEADER_ROW_MATCH = {
|
12
|
+
"A" => ["ISO 19135 Field\nRE_RegisterItem.itemIdentifier"],
|
13
|
+
"B" => ["ISO 19135 Field\nRE_RegisterItem.name"],
|
14
|
+
"C" => ["ISO 19135 Field\nRE_RegisterItem.\nalternativeExpression"],
|
15
|
+
"D" => ["Country_Code"],
|
16
|
+
# ... We don't need to match all the cells
|
17
|
+
}
|
18
|
+
|
19
|
+
TERM_BODY_COLUMN_MAP = {
|
20
|
+
"Term_ID" => "id",
|
21
|
+
"Term" => "term",
|
22
|
+
"Term .OPERATING LANGUAGE." => "term",
|
23
|
+
# In the English sheet, column is named "Term Abbreviation"
|
24
|
+
"Term Abbreviation" => "abbrev",
|
25
|
+
# In other sheets, column named "Term_Abbreviation"
|
26
|
+
"Term_Abbreviation .OPERATING LANGUAGE." => "abbrev",
|
27
|
+
"Country code" => "country-code",
|
28
|
+
"Definition" => "definition",
|
29
|
+
"Term .OPERATING LANGUAGE - ALTERNATIVE CHARACTER SET." => "alt",
|
30
|
+
"Term in English" => nil,
|
31
|
+
"Entry Status" => "entry-status",
|
32
|
+
## Must be one of 'notValid' 'valid' 'superseded' 'retired'
|
33
|
+
"Term Clasification" => "classification",
|
34
|
+
## Must be one of the following 'preferred' 'admitted' 'deprecated'
|
35
|
+
"Review Indicator" => "review-indicator",
|
36
|
+
## Must be one of the following <empty field> 'Under Review in Source Document'",
|
37
|
+
"Authoritative Source" => "authoritative-source",
|
38
|
+
"Similarity to Authoritative Source" => "authoritative-source-similarity",
|
39
|
+
## Must be one of the following codes: 'identical' = 1 'restyled' = 2 'context added' = 3 'generalisation' = 4 'specialisation' = 5 'unspecified' = 6",
|
40
|
+
"Lineage Source" => "lineage-source",
|
41
|
+
"Similarity to Lineage Source" => "lineage-source-similarity",
|
42
|
+
## Must be one of the following codes: 'identical' = 1 'restyled' = 2 'context added' = 3 'generalisation' = 4 'specialisation' = 5 'unspecified' = 6",
|
43
|
+
"Term Synonyms" => "synonyms",
|
44
|
+
"Date Accepted" => "date-accepted", # yyyy-mm-dd,
|
45
|
+
"Date Amended" => "date-amended", # yyyy-mm-dd,
|
46
|
+
"Review Date" => "review-date", # yyyy-mm-dd,
|
47
|
+
"Review Status" => "review-status", ## Must be one of 'pending' 'tentative' 'final'",
|
48
|
+
"Review Type" => "review-type", ## Must be one of 'supersession', 'retirement'",
|
49
|
+
"Review Decision" => "review-decision", ## Must be one of 'withdrawn', 'accepted' 'notAccepted'",
|
50
|
+
"Review Decision Date" => "review-decision-date", # yyyy-mm-dd
|
51
|
+
"Review Decision Event" => "review-decision-event",
|
52
|
+
"Review Decision Notes" => "review-decision-notes",
|
53
|
+
"Example_1" => "example-1",
|
54
|
+
"Note_1" => "note-1",
|
55
|
+
"Example_2" => "example-2",
|
56
|
+
"Note_2" => "note-2",
|
57
|
+
"Example_3" => "example-3",
|
58
|
+
"Note_3" => "note-3",
|
59
|
+
"Example_4" => "example-4",
|
60
|
+
"Note_4" => "note-4",
|
61
|
+
"Example_5" => "example-5",
|
62
|
+
"Note_5" => "note-5",
|
63
|
+
"Example_6" => "example-6",
|
64
|
+
"Note_6" => "note-6",
|
65
|
+
"Example_7" => "example-7",
|
66
|
+
"Note_7" => "note-7",
|
67
|
+
"Example_8" => "example-8",
|
68
|
+
"Note_8" => "note-8",
|
69
|
+
"Glossary Release" => "release"
|
70
|
+
## Must be one of the following codes 'release1' = 1 'release1_retired' = -1 'release2' = 2 'release2_retired' = -2 etc "
|
71
|
+
}
|
72
|
+
|
73
|
+
def initialize(rows, options={})
|
74
|
+
super
|
75
|
+
raise StandardError.new("Does not match TermsSection header!") unless self.class.match_header(@rows[0])
|
76
|
+
@mapping_rows = @rows[0..1]
|
77
|
+
@header_row = @rows[2]
|
78
|
+
@body_rows = @rows[3..-1]
|
79
|
+
@language_code = options.delete(:language_code)
|
80
|
+
self
|
81
|
+
end
|
82
|
+
|
83
|
+
def structure
|
84
|
+
@structure ||= @header_row.inject({}) do |acc, (key, value)|
|
85
|
+
# puts "#{key}, #{value}, #{GLOSSARY_HEADER_TITLES[value]}"
|
81
86
|
|
82
|
-
|
83
|
-
|
84
|
-
# puts "#{key}, #{value}, #{GLOSSARY_HEADER_TITLES[value]}"
|
87
|
+
# convert whitespace to a single space
|
88
|
+
cleaned_value = value.gsub(/\s+/, ' ')
|
85
89
|
|
86
|
-
|
87
|
-
|
90
|
+
matches = TERM_BODY_COLUMN_MAP.map do |key, value|
|
91
|
+
# puts "key #{key}, value #{value}"
|
92
|
+
if cleaned_value[Regexp.new("^#{key}")]
|
93
|
+
[key, value]
|
94
|
+
end
|
95
|
+
end.compact
|
88
96
|
|
89
|
-
|
90
|
-
|
91
|
-
if cleaned_value[Regexp.new("^#{key}")]
|
92
|
-
[key, value]
|
97
|
+
discard, longest_match_key = matches.max_by do |(a, b)|
|
98
|
+
a.length
|
93
99
|
end
|
94
|
-
end.compact
|
95
100
|
|
96
|
-
|
97
|
-
|
98
|
-
|
101
|
+
# Here we need to skip "Term in English"
|
102
|
+
if key && longest_match_key
|
103
|
+
acc.merge!({ key => longest_match_key })
|
104
|
+
else
|
105
|
+
acc
|
106
|
+
end
|
99
107
|
|
100
|
-
# Here we need to skip "Term in English"
|
101
|
-
if key && longest_match_key
|
102
|
-
acc.merge!({ key => longest_match_key })
|
103
|
-
else
|
104
|
-
acc
|
105
108
|
end
|
106
|
-
|
107
109
|
end
|
108
|
-
end
|
109
110
|
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
111
|
+
def self.match_header(row)
|
112
|
+
# puts "row #{row}"
|
113
|
+
row.inject(true) do |acc, (key, value)|
|
114
|
+
# puts "#{key}, #{value}"
|
115
|
+
if TERM_HEADER_ROW_MATCH[key]
|
116
|
+
acc && TERM_HEADER_ROW_MATCH[key].include?(value)
|
117
|
+
else
|
118
|
+
acc
|
119
|
+
end
|
118
120
|
end
|
119
121
|
end
|
120
|
-
end
|
121
122
|
|
122
|
-
|
123
|
-
|
124
|
-
attributes = {}
|
123
|
+
def parse_row(row)
|
124
|
+
return nil if row.empty?
|
125
125
|
|
126
|
-
|
127
|
-
# puts "#{key}, #{value}, #{row[key]}"
|
128
|
-
attribute_key = value
|
129
|
-
attribute_value = row[key]
|
130
|
-
next if attribute_value.nil?
|
131
|
-
attributes[attribute_key] = attribute_value
|
132
|
-
end
|
126
|
+
attributes = {}
|
133
127
|
|
134
|
-
|
135
|
-
|
128
|
+
structure.each_pair do |key, value|
|
129
|
+
# puts "#{key}, #{value}, #{row[key]}"
|
130
|
+
attribute_key = value
|
131
|
+
next if row[key].nil?
|
132
|
+
|
133
|
+
attribute_value = fetch_attribute row[key], attribute_key
|
134
|
+
attributes[attribute_key] = attribute_value
|
135
|
+
end
|
136
136
|
|
137
|
-
|
138
|
-
@terms ||= @body_rows.map do |row|
|
139
|
-
Term.new(parse_row(row).merge("language_code" => @language_code))
|
137
|
+
attributes
|
140
138
|
end
|
141
|
-
end
|
142
139
|
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
140
|
+
def terms
|
141
|
+
@terms ||= @body_rows.map do |row|
|
142
|
+
Term.new(parse_row(row).merge("language_code" => @language_code))
|
143
|
+
end
|
144
|
+
end
|
148
145
|
|
149
|
-
|
146
|
+
def to_hash
|
147
|
+
{
|
148
|
+
"terms" => terms.map(&:to_hash)
|
149
|
+
}
|
150
|
+
end
|
150
151
|
|
152
|
+
private
|
153
|
+
|
154
|
+
# @param value [String]
|
155
|
+
# @param key [String]
|
156
|
+
# @return [Hash]
|
157
|
+
def fetch_attribute(value, key)
|
158
|
+
case key
|
159
|
+
when "authoritative-source"
|
160
|
+
begin
|
161
|
+
src = { "ref" => value }
|
162
|
+
item = RelatonDb.instance.fetch value
|
163
|
+
src["link"] = item.url if item
|
164
|
+
src
|
165
|
+
rescue RelatonBib::RequestError => e
|
166
|
+
warn e.message
|
167
|
+
src
|
168
|
+
end
|
169
|
+
else
|
170
|
+
value
|
171
|
+
end
|
172
|
+
end
|
173
|
+
end
|
151
174
|
end
|
data/tc211-termbase.gemspec
CHANGED
@@ -24,8 +24,11 @@ Gem::Specification.new do |spec|
|
|
24
24
|
|
25
25
|
spec.add_runtime_dependency "iso-639"
|
26
26
|
spec.add_runtime_dependency "creek"
|
27
|
+
spec.add_runtime_dependency "relaton", "~>0.4.0"
|
27
28
|
|
28
|
-
spec.add_development_dependency "bundler", "~> 1
|
29
|
+
spec.add_development_dependency "bundler", "~> 2.0.1"
|
30
|
+
spec.add_development_dependency "debase"
|
29
31
|
spec.add_development_dependency "rake", "~> 10.0"
|
30
32
|
spec.add_development_dependency "rspec", "~> 3.0"
|
33
|
+
spec.add_development_dependency "ruby-debug-ide"
|
31
34
|
end
|