tc211-termbase 0.1.1 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
data/db/version ADDED
@@ -0,0 +1 @@
1
+ 0.4.1
@@ -0,0 +1,21 @@
1
+ require "singleton"
2
+ require "relaton"
3
+
4
+ module Tc211
5
+ module Termbase
6
+ # Relaton cach singleton.
7
+ class RelatonDb
8
+ include Singleton
9
+
10
+ def initialize
11
+ @db = Relaton::Db.new "db", nil
12
+ end
13
+
14
+ # @param code [String] reference
15
+ # @return [RelatonIso::IsoBibliongraphicItem]
16
+ def fetch(code)
17
+ @db.fetch code
18
+ end
19
+ end
20
+ end
21
+ end
@@ -1,151 +1,174 @@
1
1
  require_relative "sheet_section"
2
2
  require_relative "term"
3
+ require_relative "relaton_db"
3
4
 
4
5
  module Tc211::Termbase
5
6
 
6
- class TermsSection < SheetSection
7
- attr_accessor :structure
8
- attr_accessor :header_row
9
-
10
- TERM_HEADER_ROW_MATCH = {
11
- "A" => ["ISO 19135 Field\nRE_RegisterItem.itemIdentifier"],
12
- "B" => ["ISO 19135 Field\nRE_RegisterItem.name"],
13
- "C" => ["ISO 19135 Field\nRE_RegisterItem.\nalternativeExpression"],
14
- "D" => ["Country_Code"],
15
- # ... We don't need to match all the cells
16
- }
17
-
18
- TERM_BODY_COLUMN_MAP = {
19
- "Term_ID" => "id",
20
- "Term" => "term",
21
- "Term .OPERATING LANGUAGE." => "term",
22
- # In the English sheet, column is named "Term Abbreviation"
23
- "Term Abbreviation" => "abbrev",
24
- # In other sheets, column named "Term_Abbreviation"
25
- "Term_Abbreviation .OPERATING LANGUAGE." => "abbrev",
26
- "Country code" => "country-code",
27
- "Definition" => "definition",
28
- "Term .OPERATING LANGUAGE - ALTERNATIVE CHARACTER SET." => "alt",
29
- "Term in English" => nil,
30
- "Entry Status" => "entry-status",
31
- ## Must be one of 'notValid' 'valid' 'superseded' 'retired'
32
- "Term Clasification" => "classification",
33
- ## Must be one of the following 'preferred' 'admitted' 'deprecated'
34
- "Review Indicator" => "review-indicator",
35
- ## Must be one of the following <empty field> 'Under Review in Source Document'",
36
- "Authoritative Source" => "authoritative-source",
37
- "Similarity to Authoritative Source" => "authoritative-source-similarity",
38
- ## Must be one of the following codes: 'identical' = 1 'restyled' = 2 'context added' = 3 'generalisation' = 4 'specialisation' = 5 'unspecified' = 6",
39
- "Lineage Source" => "lineage-source",
40
- "Similarity to Lineage Source" => "lineage-source-similarity",
41
- ## Must be one of the following codes: 'identical' = 1 'restyled' = 2 'context added' = 3 'generalisation' = 4 'specialisation' = 5 'unspecified' = 6",
42
- "Term Synonyms" => "synonyms",
43
- "Date Accepted" => "date-accepted", # yyyy-mm-dd,
44
- "Date Amended" => "date-amended", # yyyy-mm-dd,
45
- "Review Date" => "review-date", # yyyy-mm-dd,
46
- "Review Status" => "review-status", ## Must be one of 'pending' 'tentative' 'final'",
47
- "Review Type" => "review-type", ## Must be one of 'supersession', 'retirement'",
48
- "Review Decision" => "review-decision", ## Must be one of 'withdrawn', 'accepted' 'notAccepted'",
49
- "Review Decision Date" => "review-decision-date", # yyyy-mm-dd
50
- "Review Decision Event" => "review-decision-event",
51
- "Review Decision Notes" => "review-decision-notes",
52
- "Example_1" => "example-1",
53
- "Note_1" => "note-1",
54
- "Example_2" => "example-2",
55
- "Note_2" => "note-2",
56
- "Example_3" => "example-3",
57
- "Note_3" => "note-3",
58
- "Example_4" => "example-4",
59
- "Note_4" => "note-4",
60
- "Example_5" => "example-5",
61
- "Note_5" => "note-5",
62
- "Example_6" => "example-6",
63
- "Note_6" => "note-6",
64
- "Example_7" => "example-7",
65
- "Note_7" => "note-7",
66
- "Example_8" => "example-8",
67
- "Note_8" => "note-8",
68
- "Glossary Release" => "release"
69
- ## Must be one of the following codes 'release1' = 1 'release1_retired' = -1 'release2' = 2 'release2_retired' = -2 etc "
70
- }
71
-
72
- def initialize(rows, options={})
73
- super
74
- raise StandardError.new("Does not match TermsSection header!") unless self.class.match_header(@rows[0])
75
- @mapping_rows = @rows[0..1]
76
- @header_row = @rows[2]
77
- @body_rows = @rows[3..-1]
78
- @language_code = options.delete(:language_code)
79
- self
80
- end
7
+ class TermsSection < SheetSection
8
+ attr_accessor :structure
9
+ attr_accessor :header_row
10
+
11
+ TERM_HEADER_ROW_MATCH = {
12
+ "A" => ["ISO 19135 Field\nRE_RegisterItem.itemIdentifier"],
13
+ "B" => ["ISO 19135 Field\nRE_RegisterItem.name"],
14
+ "C" => ["ISO 19135 Field\nRE_RegisterItem.\nalternativeExpression"],
15
+ "D" => ["Country_Code"],
16
+ # ... We don't need to match all the cells
17
+ }
18
+
19
+ TERM_BODY_COLUMN_MAP = {
20
+ "Term_ID" => "id",
21
+ "Term" => "term",
22
+ "Term .OPERATING LANGUAGE." => "term",
23
+ # In the English sheet, column is named "Term Abbreviation"
24
+ "Term Abbreviation" => "abbrev",
25
+ # In other sheets, column named "Term_Abbreviation"
26
+ "Term_Abbreviation .OPERATING LANGUAGE." => "abbrev",
27
+ "Country code" => "country-code",
28
+ "Definition" => "definition",
29
+ "Term .OPERATING LANGUAGE - ALTERNATIVE CHARACTER SET." => "alt",
30
+ "Term in English" => nil,
31
+ "Entry Status" => "entry-status",
32
+ ## Must be one of 'notValid' 'valid' 'superseded' 'retired'
33
+ "Term Clasification" => "classification",
34
+ ## Must be one of the following 'preferred' 'admitted' 'deprecated'
35
+ "Review Indicator" => "review-indicator",
36
+ ## Must be one of the following <empty field> 'Under Review in Source Document'",
37
+ "Authoritative Source" => "authoritative-source",
38
+ "Similarity to Authoritative Source" => "authoritative-source-similarity",
39
+ ## Must be one of the following codes: 'identical' = 1 'restyled' = 2 'context added' = 3 'generalisation' = 4 'specialisation' = 5 'unspecified' = 6",
40
+ "Lineage Source" => "lineage-source",
41
+ "Similarity to Lineage Source" => "lineage-source-similarity",
42
+ ## Must be one of the following codes: 'identical' = 1 'restyled' = 2 'context added' = 3 'generalisation' = 4 'specialisation' = 5 'unspecified' = 6",
43
+ "Term Synonyms" => "synonyms",
44
+ "Date Accepted" => "date-accepted", # yyyy-mm-dd,
45
+ "Date Amended" => "date-amended", # yyyy-mm-dd,
46
+ "Review Date" => "review-date", # yyyy-mm-dd,
47
+ "Review Status" => "review-status", ## Must be one of 'pending' 'tentative' 'final'",
48
+ "Review Type" => "review-type", ## Must be one of 'supersession', 'retirement'",
49
+ "Review Decision" => "review-decision", ## Must be one of 'withdrawn', 'accepted' 'notAccepted'",
50
+ "Review Decision Date" => "review-decision-date", # yyyy-mm-dd
51
+ "Review Decision Event" => "review-decision-event",
52
+ "Review Decision Notes" => "review-decision-notes",
53
+ "Example_1" => "example-1",
54
+ "Note_1" => "note-1",
55
+ "Example_2" => "example-2",
56
+ "Note_2" => "note-2",
57
+ "Example_3" => "example-3",
58
+ "Note_3" => "note-3",
59
+ "Example_4" => "example-4",
60
+ "Note_4" => "note-4",
61
+ "Example_5" => "example-5",
62
+ "Note_5" => "note-5",
63
+ "Example_6" => "example-6",
64
+ "Note_6" => "note-6",
65
+ "Example_7" => "example-7",
66
+ "Note_7" => "note-7",
67
+ "Example_8" => "example-8",
68
+ "Note_8" => "note-8",
69
+ "Glossary Release" => "release"
70
+ ## Must be one of the following codes 'release1' = 1 'release1_retired' = -1 'release2' = 2 'release2_retired' = -2 etc "
71
+ }
72
+
73
+ def initialize(rows, options={})
74
+ super
75
+ raise StandardError.new("Does not match TermsSection header!") unless self.class.match_header(@rows[0])
76
+ @mapping_rows = @rows[0..1]
77
+ @header_row = @rows[2]
78
+ @body_rows = @rows[3..-1]
79
+ @language_code = options.delete(:language_code)
80
+ self
81
+ end
82
+
83
+ def structure
84
+ @structure ||= @header_row.inject({}) do |acc, (key, value)|
85
+ # puts "#{key}, #{value}, #{GLOSSARY_HEADER_TITLES[value]}"
81
86
 
82
- def structure
83
- @structure ||= @header_row.inject({}) do |acc, (key, value)|
84
- # puts "#{key}, #{value}, #{GLOSSARY_HEADER_TITLES[value]}"
87
+ # convert whitespace to a single space
88
+ cleaned_value = value.gsub(/\s+/, ' ')
85
89
 
86
- # convert whitespace to a single space
87
- cleaned_value = value.gsub(/\s+/, ' ')
90
+ matches = TERM_BODY_COLUMN_MAP.map do |key, value|
91
+ # puts "key #{key}, value #{value}"
92
+ if cleaned_value[Regexp.new("^#{key}")]
93
+ [key, value]
94
+ end
95
+ end.compact
88
96
 
89
- matches = TERM_BODY_COLUMN_MAP.map do |key, value|
90
- # puts "key #{key}, value #{value}"
91
- if cleaned_value[Regexp.new("^#{key}")]
92
- [key, value]
97
+ discard, longest_match_key = matches.max_by do |(a, b)|
98
+ a.length
93
99
  end
94
- end.compact
95
100
 
96
- discard, longest_match_key = matches.max_by do |(a, b)|
97
- a.length
98
- end
101
+ # Here we need to skip "Term in English"
102
+ if key && longest_match_key
103
+ acc.merge!({ key => longest_match_key })
104
+ else
105
+ acc
106
+ end
99
107
 
100
- # Here we need to skip "Term in English"
101
- if key && longest_match_key
102
- acc.merge!({ key => longest_match_key })
103
- else
104
- acc
105
108
  end
106
-
107
109
  end
108
- end
109
110
 
110
- def self.match_header(row)
111
- # puts "row #{row}"
112
- row.inject(true) do |acc, (key, value)|
113
- # puts "#{key}, #{value}"
114
- if TERM_HEADER_ROW_MATCH[key]
115
- acc && TERM_HEADER_ROW_MATCH[key].include?(value)
116
- else
117
- acc
111
+ def self.match_header(row)
112
+ # puts "row #{row}"
113
+ row.inject(true) do |acc, (key, value)|
114
+ # puts "#{key}, #{value}"
115
+ if TERM_HEADER_ROW_MATCH[key]
116
+ acc && TERM_HEADER_ROW_MATCH[key].include?(value)
117
+ else
118
+ acc
119
+ end
118
120
  end
119
121
  end
120
- end
121
122
 
122
- def parse_row(row)
123
- return nil if row.empty?
124
- attributes = {}
123
+ def parse_row(row)
124
+ return nil if row.empty?
125
125
 
126
- structure.each_pair do |key, value|
127
- # puts "#{key}, #{value}, #{row[key]}"
128
- attribute_key = value
129
- attribute_value = row[key]
130
- next if attribute_value.nil?
131
- attributes[attribute_key] = attribute_value
132
- end
126
+ attributes = {}
133
127
 
134
- attributes
135
- end
128
+ structure.each_pair do |key, value|
129
+ # puts "#{key}, #{value}, #{row[key]}"
130
+ attribute_key = value
131
+ next if row[key].nil?
132
+
133
+ attribute_value = fetch_attribute row[key], attribute_key
134
+ attributes[attribute_key] = attribute_value
135
+ end
136
136
 
137
- def terms
138
- @terms ||= @body_rows.map do |row|
139
- Term.new(parse_row(row).merge("language_code" => @language_code))
137
+ attributes
140
138
  end
141
- end
142
139
 
143
- def to_hash
144
- {
145
- "terms" => terms.map(&:to_hash)
146
- }
147
- end
140
+ def terms
141
+ @terms ||= @body_rows.map do |row|
142
+ Term.new(parse_row(row).merge("language_code" => @language_code))
143
+ end
144
+ end
148
145
 
149
- end
146
+ def to_hash
147
+ {
148
+ "terms" => terms.map(&:to_hash)
149
+ }
150
+ end
150
151
 
152
+ private
153
+
154
+ # @param value [String]
155
+ # @param key [String]
156
+ # @return [Hash]
157
+ def fetch_attribute(value, key)
158
+ case key
159
+ when "authoritative-source"
160
+ begin
161
+ src = { "ref" => value }
162
+ item = RelatonDb.instance.fetch value
163
+ src["link"] = item.url if item
164
+ src
165
+ rescue RelatonBib::RequestError => e
166
+ warn e.message
167
+ src
168
+ end
169
+ else
170
+ value
171
+ end
172
+ end
173
+ end
151
174
  end
@@ -1,5 +1,5 @@
1
1
  module Tc211
2
2
  module Termbase
3
- VERSION = "0.1.1"
3
+ VERSION = "0.1.2"
4
4
  end
5
5
  end
@@ -24,8 +24,11 @@ Gem::Specification.new do |spec|
24
24
 
25
25
  spec.add_runtime_dependency "iso-639"
26
26
  spec.add_runtime_dependency "creek"
27
+ spec.add_runtime_dependency "relaton", "~>0.4.0"
27
28
 
28
- spec.add_development_dependency "bundler", "~> 1.17"
29
+ spec.add_development_dependency "bundler", "~> 2.0.1"
30
+ spec.add_development_dependency "debase"
29
31
  spec.add_development_dependency "rake", "~> 10.0"
30
32
  spec.add_development_dependency "rspec", "~> 3.0"
33
+ spec.add_development_dependency "ruby-debug-ide"
31
34
  end