tc211-termbase 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/db/version ADDED
@@ -0,0 +1 @@
1
+ 0.4.1
@@ -0,0 +1,21 @@
1
+ require "singleton"
2
+ require "relaton"
3
+
4
+ module Tc211
5
+ module Termbase
6
+ # Relaton cach singleton.
7
+ class RelatonDb
8
+ include Singleton
9
+
10
+ def initialize
11
+ @db = Relaton::Db.new "db", nil
12
+ end
13
+
14
+ # @param code [String] reference
15
+ # @return [RelatonIso::IsoBibliongraphicItem]
16
+ def fetch(code)
17
+ @db.fetch code
18
+ end
19
+ end
20
+ end
21
+ end
@@ -1,151 +1,174 @@
1
1
  require_relative "sheet_section"
2
2
  require_relative "term"
3
+ require_relative "relaton_db"
3
4
 
4
5
  module Tc211::Termbase
5
6
 
6
- class TermsSection < SheetSection
7
- attr_accessor :structure
8
- attr_accessor :header_row
9
-
10
- TERM_HEADER_ROW_MATCH = {
11
- "A" => ["ISO 19135 Field\nRE_RegisterItem.itemIdentifier"],
12
- "B" => ["ISO 19135 Field\nRE_RegisterItem.name"],
13
- "C" => ["ISO 19135 Field\nRE_RegisterItem.\nalternativeExpression"],
14
- "D" => ["Country_Code"],
15
- # ... We don't need to match all the cells
16
- }
17
-
18
- TERM_BODY_COLUMN_MAP = {
19
- "Term_ID" => "id",
20
- "Term" => "term",
21
- "Term .OPERATING LANGUAGE." => "term",
22
- # In the English sheet, column is named "Term Abbreviation"
23
- "Term Abbreviation" => "abbrev",
24
- # In other sheets, column named "Term_Abbreviation"
25
- "Term_Abbreviation .OPERATING LANGUAGE." => "abbrev",
26
- "Country code" => "country-code",
27
- "Definition" => "definition",
28
- "Term .OPERATING LANGUAGE - ALTERNATIVE CHARACTER SET." => "alt",
29
- "Term in English" => nil,
30
- "Entry Status" => "entry-status",
31
- ## Must be one of 'notValid' 'valid' 'superseded' 'retired'
32
- "Term Clasification" => "classification",
33
- ## Must be one of the following 'preferred' 'admitted' 'deprecated'
34
- "Review Indicator" => "review-indicator",
35
- ## Must be one of the following <empty field> 'Under Review in Source Document'",
36
- "Authoritative Source" => "authoritative-source",
37
- "Similarity to Authoritative Source" => "authoritative-source-similarity",
38
- ## Must be one of the following codes: 'identical' = 1 'restyled' = 2 'context added' = 3 'generalisation' = 4 'specialisation' = 5 'unspecified' = 6",
39
- "Lineage Source" => "lineage-source",
40
- "Similarity to Lineage Source" => "lineage-source-similarity",
41
- ## Must be one of the following codes: 'identical' = 1 'restyled' = 2 'context added' = 3 'generalisation' = 4 'specialisation' = 5 'unspecified' = 6",
42
- "Term Synonyms" => "synonyms",
43
- "Date Accepted" => "date-accepted", # yyyy-mm-dd,
44
- "Date Amended" => "date-amended", # yyyy-mm-dd,
45
- "Review Date" => "review-date", # yyyy-mm-dd,
46
- "Review Status" => "review-status", ## Must be one of 'pending' 'tentative' 'final'",
47
- "Review Type" => "review-type", ## Must be one of 'supersession', 'retirement'",
48
- "Review Decision" => "review-decision", ## Must be one of 'withdrawn', 'accepted' 'notAccepted'",
49
- "Review Decision Date" => "review-decision-date", # yyyy-mm-dd
50
- "Review Decision Event" => "review-decision-event",
51
- "Review Decision Notes" => "review-decision-notes",
52
- "Example_1" => "example-1",
53
- "Note_1" => "note-1",
54
- "Example_2" => "example-2",
55
- "Note_2" => "note-2",
56
- "Example_3" => "example-3",
57
- "Note_3" => "note-3",
58
- "Example_4" => "example-4",
59
- "Note_4" => "note-4",
60
- "Example_5" => "example-5",
61
- "Note_5" => "note-5",
62
- "Example_6" => "example-6",
63
- "Note_6" => "note-6",
64
- "Example_7" => "example-7",
65
- "Note_7" => "note-7",
66
- "Example_8" => "example-8",
67
- "Note_8" => "note-8",
68
- "Glossary Release" => "release"
69
- ## Must be one of the following codes 'release1' = 1 'release1_retired' = -1 'release2' = 2 'release2_retired' = -2 etc "
70
- }
71
-
72
- def initialize(rows, options={})
73
- super
74
- raise StandardError.new("Does not match TermsSection header!") unless self.class.match_header(@rows[0])
75
- @mapping_rows = @rows[0..1]
76
- @header_row = @rows[2]
77
- @body_rows = @rows[3..-1]
78
- @language_code = options.delete(:language_code)
79
- self
80
- end
7
+ class TermsSection < SheetSection
8
+ attr_accessor :structure
9
+ attr_accessor :header_row
10
+
11
+ TERM_HEADER_ROW_MATCH = {
12
+ "A" => ["ISO 19135 Field\nRE_RegisterItem.itemIdentifier"],
13
+ "B" => ["ISO 19135 Field\nRE_RegisterItem.name"],
14
+ "C" => ["ISO 19135 Field\nRE_RegisterItem.\nalternativeExpression"],
15
+ "D" => ["Country_Code"],
16
+ # ... We don't need to match all the cells
17
+ }
18
+
19
+ TERM_BODY_COLUMN_MAP = {
20
+ "Term_ID" => "id",
21
+ "Term" => "term",
22
+ "Term .OPERATING LANGUAGE." => "term",
23
+ # In the English sheet, column is named "Term Abbreviation"
24
+ "Term Abbreviation" => "abbrev",
25
+ # In other sheets, column named "Term_Abbreviation"
26
+ "Term_Abbreviation .OPERATING LANGUAGE." => "abbrev",
27
+ "Country code" => "country-code",
28
+ "Definition" => "definition",
29
+ "Term .OPERATING LANGUAGE - ALTERNATIVE CHARACTER SET." => "alt",
30
+ "Term in English" => nil,
31
+ "Entry Status" => "entry-status",
32
+ ## Must be one of 'notValid' 'valid' 'superseded' 'retired'
33
+ "Term Clasification" => "classification",
34
+ ## Must be one of the following 'preferred' 'admitted' 'deprecated'
35
+ "Review Indicator" => "review-indicator",
36
+ ## Must be one of the following <empty field> 'Under Review in Source Document'",
37
+ "Authoritative Source" => "authoritative-source",
38
+ "Similarity to Authoritative Source" => "authoritative-source-similarity",
39
+ ## Must be one of the following codes: 'identical' = 1 'restyled' = 2 'context added' = 3 'generalisation' = 4 'specialisation' = 5 'unspecified' = 6",
40
+ "Lineage Source" => "lineage-source",
41
+ "Similarity to Lineage Source" => "lineage-source-similarity",
42
+ ## Must be one of the following codes: 'identical' = 1 'restyled' = 2 'context added' = 3 'generalisation' = 4 'specialisation' = 5 'unspecified' = 6",
43
+ "Term Synonyms" => "synonyms",
44
+ "Date Accepted" => "date-accepted", # yyyy-mm-dd,
45
+ "Date Amended" => "date-amended", # yyyy-mm-dd,
46
+ "Review Date" => "review-date", # yyyy-mm-dd,
47
+ "Review Status" => "review-status", ## Must be one of 'pending' 'tentative' 'final'",
48
+ "Review Type" => "review-type", ## Must be one of 'supersession', 'retirement'",
49
+ "Review Decision" => "review-decision", ## Must be one of 'withdrawn', 'accepted' 'notAccepted'",
50
+ "Review Decision Date" => "review-decision-date", # yyyy-mm-dd
51
+ "Review Decision Event" => "review-decision-event",
52
+ "Review Decision Notes" => "review-decision-notes",
53
+ "Example_1" => "example-1",
54
+ "Note_1" => "note-1",
55
+ "Example_2" => "example-2",
56
+ "Note_2" => "note-2",
57
+ "Example_3" => "example-3",
58
+ "Note_3" => "note-3",
59
+ "Example_4" => "example-4",
60
+ "Note_4" => "note-4",
61
+ "Example_5" => "example-5",
62
+ "Note_5" => "note-5",
63
+ "Example_6" => "example-6",
64
+ "Note_6" => "note-6",
65
+ "Example_7" => "example-7",
66
+ "Note_7" => "note-7",
67
+ "Example_8" => "example-8",
68
+ "Note_8" => "note-8",
69
+ "Glossary Release" => "release"
70
+ ## Must be one of the following codes 'release1' = 1 'release1_retired' = -1 'release2' = 2 'release2_retired' = -2 etc "
71
+ }
72
+
73
+ def initialize(rows, options={})
74
+ super
75
+ raise StandardError.new("Does not match TermsSection header!") unless self.class.match_header(@rows[0])
76
+ @mapping_rows = @rows[0..1]
77
+ @header_row = @rows[2]
78
+ @body_rows = @rows[3..-1]
79
+ @language_code = options.delete(:language_code)
80
+ self
81
+ end
82
+
83
+ def structure
84
+ @structure ||= @header_row.inject({}) do |acc, (key, value)|
85
+ # puts "#{key}, #{value}, #{GLOSSARY_HEADER_TITLES[value]}"
81
86
 
82
- def structure
83
- @structure ||= @header_row.inject({}) do |acc, (key, value)|
84
- # puts "#{key}, #{value}, #{GLOSSARY_HEADER_TITLES[value]}"
87
+ # convert whitespace to a single space
88
+ cleaned_value = value.gsub(/\s+/, ' ')
85
89
 
86
- # convert whitespace to a single space
87
- cleaned_value = value.gsub(/\s+/, ' ')
90
+ matches = TERM_BODY_COLUMN_MAP.map do |key, value|
91
+ # puts "key #{key}, value #{value}"
92
+ if cleaned_value[Regexp.new("^#{key}")]
93
+ [key, value]
94
+ end
95
+ end.compact
88
96
 
89
- matches = TERM_BODY_COLUMN_MAP.map do |key, value|
90
- # puts "key #{key}, value #{value}"
91
- if cleaned_value[Regexp.new("^#{key}")]
92
- [key, value]
97
+ discard, longest_match_key = matches.max_by do |(a, b)|
98
+ a.length
93
99
  end
94
- end.compact
95
100
 
96
- discard, longest_match_key = matches.max_by do |(a, b)|
97
- a.length
98
- end
101
+ # Here we need to skip "Term in English"
102
+ if key && longest_match_key
103
+ acc.merge!({ key => longest_match_key })
104
+ else
105
+ acc
106
+ end
99
107
 
100
- # Here we need to skip "Term in English"
101
- if key && longest_match_key
102
- acc.merge!({ key => longest_match_key })
103
- else
104
- acc
105
108
  end
106
-
107
109
  end
108
- end
109
110
 
110
- def self.match_header(row)
111
- # puts "row #{row}"
112
- row.inject(true) do |acc, (key, value)|
113
- # puts "#{key}, #{value}"
114
- if TERM_HEADER_ROW_MATCH[key]
115
- acc && TERM_HEADER_ROW_MATCH[key].include?(value)
116
- else
117
- acc
111
+ def self.match_header(row)
112
+ # puts "row #{row}"
113
+ row.inject(true) do |acc, (key, value)|
114
+ # puts "#{key}, #{value}"
115
+ if TERM_HEADER_ROW_MATCH[key]
116
+ acc && TERM_HEADER_ROW_MATCH[key].include?(value)
117
+ else
118
+ acc
119
+ end
118
120
  end
119
121
  end
120
- end
121
122
 
122
- def parse_row(row)
123
- return nil if row.empty?
124
- attributes = {}
123
+ def parse_row(row)
124
+ return nil if row.empty?
125
125
 
126
- structure.each_pair do |key, value|
127
- # puts "#{key}, #{value}, #{row[key]}"
128
- attribute_key = value
129
- attribute_value = row[key]
130
- next if attribute_value.nil?
131
- attributes[attribute_key] = attribute_value
132
- end
126
+ attributes = {}
133
127
 
134
- attributes
135
- end
128
+ structure.each_pair do |key, value|
129
+ # puts "#{key}, #{value}, #{row[key]}"
130
+ attribute_key = value
131
+ next if row[key].nil?
132
+
133
+ attribute_value = fetch_attribute row[key], attribute_key
134
+ attributes[attribute_key] = attribute_value
135
+ end
136
136
 
137
- def terms
138
- @terms ||= @body_rows.map do |row|
139
- Term.new(parse_row(row).merge("language_code" => @language_code))
137
+ attributes
140
138
  end
141
- end
142
139
 
143
- def to_hash
144
- {
145
- "terms" => terms.map(&:to_hash)
146
- }
147
- end
140
+ def terms
141
+ @terms ||= @body_rows.map do |row|
142
+ Term.new(parse_row(row).merge("language_code" => @language_code))
143
+ end
144
+ end
148
145
 
149
- end
146
+ def to_hash
147
+ {
148
+ "terms" => terms.map(&:to_hash)
149
+ }
150
+ end
150
151
 
152
+ private
153
+
154
+ # @param value [String]
155
+ # @param key [String]
156
+ # @return [Hash]
157
+ def fetch_attribute(value, key)
158
+ case key
159
+ when "authoritative-source"
160
+ begin
161
+ src = { "ref" => value }
162
+ item = RelatonDb.instance.fetch value
163
+ src["link"] = item.url if item
164
+ src
165
+ rescue RelatonBib::RequestError => e
166
+ warn e.message
167
+ src
168
+ end
169
+ else
170
+ value
171
+ end
172
+ end
173
+ end
151
174
  end
@@ -1,5 +1,5 @@
1
1
  module Tc211
2
2
  module Termbase
3
- VERSION = "0.1.1"
3
+ VERSION = "0.1.2"
4
4
  end
5
5
  end
@@ -24,8 +24,11 @@ Gem::Specification.new do |spec|
24
24
 
25
25
  spec.add_runtime_dependency "iso-639"
26
26
  spec.add_runtime_dependency "creek"
27
+ spec.add_runtime_dependency "relaton", "~>0.4.0"
27
28
 
28
- spec.add_development_dependency "bundler", "~> 1.17"
29
+ spec.add_development_dependency "bundler", "~> 2.0.1"
30
+ spec.add_development_dependency "debase"
29
31
  spec.add_development_dependency "rake", "~> 10.0"
30
32
  spec.add_development_dependency "rspec", "~> 3.0"
33
+ spec.add_development_dependency "ruby-debug-ide"
31
34
  end