bcl 0.1.4 → 0.1.5
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/bcl/MasterTaxonomy.rb +188 -59
- data/lib/bcl/current_taxonomy.json +0 -0
- data/lib/bcl/current_taxonomy.xml +2913 -416
- metadata +4 -4
data/lib/bcl/MasterTaxonomy.rb
CHANGED
@@ -38,13 +38,19 @@ module BCL
|
|
38
38
|
TagStruct = Struct.new(:level_hierarchy, :name, :description, :parent_tag, :child_tags, :terms)
|
39
39
|
|
40
40
|
# each TermStruct represents a row in the master taxonomy
|
41
|
-
TermStruct = Struct.new(:first_level, :second_level, :third_level, :level_hierarchy, :name, :description
|
41
|
+
TermStruct = Struct.new(:first_level, :second_level, :third_level, :level_hierarchy, :name, :description,
|
42
|
+
:abbr, :data_type, :enums, :ip_written, :ip_symbol, :ip_mask, :si_written, :si_symbol, :si_mask, :allow_multiple, :row, :tp_include, :tp_required, :tp_use_in_search, :tp_use_in_facets, :tp_hide_from_data_users, :tp_third_party_testing, :tp_additional_web_dev_info, :tp_additional_data_user_info, :tp_additional_data_submitter_info)
|
43
|
+
|
44
|
+
|
45
|
+
|
46
|
+
|
42
47
|
|
43
48
|
# class for parsing, validating, and querying the master taxonomy document
|
44
49
|
class MasterTaxonomy
|
45
50
|
|
46
51
|
# parse the master taxonomy document
|
47
|
-
def initialize(xlsx_path = nil)
|
52
|
+
def initialize(xlsx_path = nil, sort_alpha = false)
|
53
|
+
@sort_alphabetical = sort_alpha
|
48
54
|
|
49
55
|
# hash of level_taxonomy to tag
|
50
56
|
@tag_hash = Hash.new
|
@@ -89,13 +95,14 @@ class MasterTaxonomy
|
|
89
95
|
path = current_taxonomy_path
|
90
96
|
end
|
91
97
|
puts "Saving current taxonomy to #{path}"
|
98
|
+
# this is really not JSON... it is a persisted format of ruby
|
92
99
|
File.open(path, 'w') do |file|
|
93
100
|
Marshal.dump(@tag_hash, file)
|
94
101
|
end
|
95
102
|
end
|
96
103
|
|
97
104
|
# write taxonomy to xml
|
98
|
-
def write_xml(path)
|
105
|
+
def write_xml(path, output_type = 'tpex')
|
99
106
|
|
100
107
|
root_tag = @tag_hash[""]
|
101
108
|
|
@@ -110,7 +117,7 @@ class MasterTaxonomy
|
|
110
117
|
#setup the xml file
|
111
118
|
xml.instruct!(:xml, :version=>"1.0", :encoding=>"UTF-8")
|
112
119
|
xml.schema("xmlns:xsi"=>"http://www.w3.org/2001/XMLSchema-instance") {
|
113
|
-
write_tag_to_xml(root_tag, xml)
|
120
|
+
write_tag_to_xml(root_tag, 0, xml, output_type)
|
114
121
|
}
|
115
122
|
end
|
116
123
|
|
@@ -122,20 +129,28 @@ class MasterTaxonomy
|
|
122
129
|
def get_terms(tag)
|
123
130
|
|
124
131
|
terms = tag.terms
|
125
|
-
|
132
|
+
|
126
133
|
parent_tag = tag.parent_tag
|
127
134
|
while not parent_tag.nil?
|
128
|
-
|
135
|
+
terms.concat(parent_tag.terms)
|
129
136
|
parent_tag = parent_tag.parent_tag
|
130
137
|
end
|
131
|
-
|
132
|
-
|
138
|
+
|
139
|
+
|
140
|
+
#sort the terms as they come out
|
141
|
+
result = terms.uniq
|
142
|
+
if !@sort_alphabetical
|
143
|
+
result = result.sort {|x, y| x.row <=> y.row}
|
144
|
+
else
|
145
|
+
result = result.sort {|x, y| x.name <=> y.name}
|
146
|
+
end
|
147
|
+
|
148
|
+
return result
|
133
149
|
end
|
134
150
|
|
135
151
|
# check that the given component is conforms with the master taxonomy
|
136
152
|
def check_component(component)
|
137
153
|
valid = true
|
138
|
-
|
139
154
|
tag = nil
|
140
155
|
|
141
156
|
# see if we can find the component's tag in the taxonomy
|
@@ -208,6 +223,8 @@ class MasterTaxonomy
|
|
208
223
|
# add root tag
|
209
224
|
root_terms = []
|
210
225
|
root_terms << TermStruct.new("", "", "", "", "OpenStudio Type", "Type of OpenStudio Object")
|
226
|
+
root_terms[0].row = 0
|
227
|
+
#root_terms << TermStruct.new()
|
211
228
|
root_tag = TagStruct.new("", "root", "Root of the taxonomy", nil, [], root_terms)
|
212
229
|
@tag_hash[""] = root_tag
|
213
230
|
|
@@ -234,39 +251,89 @@ class MasterTaxonomy
|
|
234
251
|
|
235
252
|
|
236
253
|
def validate_terms_header(terms_worksheet)
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
254
|
+
test_arr = []
|
255
|
+
test_arr << "First Level"
|
256
|
+
test_arr << "Second Level"
|
257
|
+
test_arr << "Third Level"
|
258
|
+
test_arr << "Level Hierarchy"
|
259
|
+
test_arr << "Term"
|
260
|
+
test_arr << "Abbr"
|
261
|
+
test_arr << "Description"
|
262
|
+
test_arr << "Data Type"
|
263
|
+
test_arr << "Allow Multiple"
|
264
|
+
test_arr << "Enumerations"
|
265
|
+
test_arr << "IP Units Written Out"
|
266
|
+
test_arr << "IP Units Symbol"
|
267
|
+
test_arr << "IP Display Mask"
|
268
|
+
test_arr << "SI Units Written Out"
|
269
|
+
test_arr << "SI Units Symbol"
|
270
|
+
test_arr << "SI Display Mask"
|
271
|
+
test_arr << "Unit Conversion"
|
272
|
+
test_arr << "Default"
|
273
|
+
test_arr << "Min"
|
274
|
+
test_arr << "Max"
|
275
|
+
test_arr << "Source"
|
276
|
+
test_arr << "Review State"
|
277
|
+
test_arr << "General Comments"
|
278
|
+
test_arr << "Requested By / Project"
|
279
|
+
test_arr << "Include in TPE"
|
280
|
+
test_arr << "Required for Adding a New Product"
|
281
|
+
test_arr << "Use in Search Results"
|
282
|
+
test_arr << "Use in Search Facets"
|
283
|
+
test_arr << "Show/Hide Data from Data Users"
|
284
|
+
test_arr << "Additional Instructions for Web Developers"
|
285
|
+
test_arr << "Related Third Party Testing Standards"
|
286
|
+
test_arr << "Additional Guidance to Data Submitters"
|
287
|
+
test_arr << "Additional Guidance to Data Users"
|
288
|
+
|
289
|
+
|
290
|
+
parse = true
|
291
|
+
col = 1
|
292
|
+
while parse
|
293
|
+
if terms_worksheet.Columns(col).Rows(2).Value.nil? || col > test_arr.size
|
294
|
+
parse = false
|
295
|
+
else
|
296
|
+
if not terms_worksheet.Columns(col).Rows(2).Value == test_arr[col-1]
|
297
|
+
raise "Header does not match: #{col}: '#{terms_worksheet.Columns(col).Rows(2).Value} <> #{test_arr[col-1]}'"
|
298
|
+
end
|
299
|
+
end
|
300
|
+
col += 1
|
301
|
+
end
|
256
302
|
end
|
257
303
|
|
258
304
|
def parse_term(terms_worksheet, row)
|
259
305
|
|
260
306
|
term = TermStruct.new
|
261
|
-
|
262
|
-
term.
|
263
|
-
term.
|
264
|
-
term.
|
265
|
-
term.
|
266
|
-
|
267
|
-
term.
|
268
|
-
|
269
|
-
|
307
|
+
term.row = row
|
308
|
+
term.first_level = terms_worksheet.Columns(1).Rows(row).Value
|
309
|
+
term.second_level = terms_worksheet.Columns(2).Rows(row).Value
|
310
|
+
term.third_level = terms_worksheet.Columns(3).Rows(row).Value
|
311
|
+
term.level_hierarchy = terms_worksheet.Columns(4).Rows(row).Value
|
312
|
+
term.name = terms_worksheet.Columns(5).Rows(row).Value
|
313
|
+
term.abbr = terms_worksheet.Columns(6).Rows(row).Value
|
314
|
+
term.description = terms_worksheet.Columns(7).Rows(row).Value
|
315
|
+
term.data_type = terms_worksheet.Columns(8).Rows(row).Value
|
316
|
+
term.allow_multiple = terms_worksheet.Columns(9).Rows(row).Value
|
317
|
+
term.enums = terms_worksheet.Columns(10).Rows(row).Value
|
318
|
+
term.ip_written = terms_worksheet.Columns(11).Rows(row).Value
|
319
|
+
term.ip_symbol = terms_worksheet.Columns(12).Rows(row).Value
|
320
|
+
term.ip_mask = terms_worksheet.Columns(13).Rows(row).Value
|
321
|
+
term.si_written = terms_worksheet.Columns(14).Rows(row).Value
|
322
|
+
term.si_symbol = terms_worksheet.Columns(15).Rows(row).Value
|
323
|
+
term.si_mask = terms_worksheet.Columns(16).Rows(row).Value
|
324
|
+
|
325
|
+
#custom TPex Columns
|
326
|
+
term.tp_include = terms_worksheet.Columns(25).Rows(row).Value
|
327
|
+
term.tp_required = terms_worksheet.Columns(26).Rows(row).Value
|
328
|
+
term.tp_use_in_search = terms_worksheet.Columns(27).Rows(row).Value
|
329
|
+
term.tp_use_in_facets = terms_worksheet.Columns(28).Rows(row).Value
|
330
|
+
term.tp_hide_from_data_users = terms_worksheet.Columns(29).Rows(row).Value
|
331
|
+
term.tp_third_party_testing = terms_worksheet.Columns(30).Rows(row).Value
|
332
|
+
term.tp_additional_web_dev_info = terms_worksheet.Columns(31).Rows(row).Value
|
333
|
+
term.tp_additional_data_user_info = terms_worksheet.Columns(32).Rows(row).Value
|
334
|
+
term.tp_additional_data_submitter_info = terms_worksheet.Columns(33).Rows(row).Value
|
335
|
+
|
336
|
+
# trigger to quit parsing the xcel doc
|
270
337
|
if term.first_level.nil? or term.first_level.empty?
|
271
338
|
return nil
|
272
339
|
end
|
@@ -327,17 +394,21 @@ class MasterTaxonomy
|
|
327
394
|
end
|
328
395
|
|
329
396
|
def sort_tag(tag)
|
330
|
-
tag.terms = tag.terms.sort {|x, y| x.
|
331
|
-
tag.child_tags = tag.child_tags.sort {|x, y| x.
|
397
|
+
#tag.terms = tag.terms.sort {|x, y| x.level_hierarchy <=> y.level_hierarchy}
|
398
|
+
tag.child_tags = tag.child_tags.sort {|x, y| x.level_hierarchy <=> y.level_hierarchy}
|
332
399
|
tag.child_tags.each {|child_tag| sort_tag(child_tag) }
|
400
|
+
|
401
|
+
#tag.terms = tag.terms.sort {|x, y| x.name <=> y.name}
|
402
|
+
#tag.child_tags = tag.child_tags.sort {|x, y| x.name <=> y.name}
|
403
|
+
#tag.child_tags.each {|child_tag| sort_tag(child_tag) }
|
333
404
|
end
|
334
405
|
|
335
406
|
def check_tag(tag)
|
336
407
|
|
337
408
|
if tag.description.nil? or tag.description.empty?
|
338
|
-
puts "tag '#{tag.level_hierarchy}' has no description"
|
409
|
+
#puts "tag '#{tag.level_hierarchy}' has no description"
|
339
410
|
end
|
340
|
-
|
411
|
+
|
341
412
|
tag.terms.each {|term| check_term(term) }
|
342
413
|
tag.child_tags.each {|child_tag| check_tag(child_tag) }
|
343
414
|
end
|
@@ -372,34 +443,92 @@ class MasterTaxonomy
|
|
372
443
|
valid = false
|
373
444
|
end
|
374
445
|
|
375
|
-
|
446
|
+
if !term.data_type.nil?
|
447
|
+
valid_types = ["double", "integer", "enum", "file", "string"]
|
448
|
+
if (term.data_type.downcase != term.data_type) || !valid_types.include?(term.data_type)
|
449
|
+
puts "[ERROR] Term '#{term.name}' does not have a valid data type with '#{term.data_type}'"
|
450
|
+
end
|
451
|
+
|
452
|
+
if term.data_type.downcase == "enum"
|
453
|
+
if term.enums.nil? || term.enums == "" || term.enums.downcase == "no enum found"
|
454
|
+
puts "[ERROR] Term '#{term.name}' does not have valid enumerations"
|
455
|
+
end
|
456
|
+
end
|
457
|
+
end
|
376
458
|
|
377
459
|
return valid
|
378
460
|
end
|
379
461
|
|
380
462
|
def check_term(term)
|
381
463
|
if term.description.nil? or term.description.empty?
|
382
|
-
puts "term '#{term.level_hierarchy}.#{term.name}' has no description"
|
464
|
+
#puts "term '#{term.level_hierarchy}.#{term.name}' has no description"
|
383
465
|
end
|
384
466
|
end
|
385
467
|
|
468
|
+
# write term to xml
|
469
|
+
def write_terms_to_xml(tag, xml, output_type)
|
470
|
+
terms = get_terms(tag)
|
471
|
+
if terms.size > 0
|
472
|
+
terms.each do |term|
|
473
|
+
xml.term {
|
474
|
+
xml.name term.name
|
475
|
+
xml.abbr term.abbr if !term.abbr.nil?
|
476
|
+
xml.description term.description if !term.description.nil?
|
477
|
+
xml.data_type term.data_type if !term.data_type.nil?
|
478
|
+
xml.allow_multiple term.allow_multiple if !term.allow_multiple.nil?
|
479
|
+
|
480
|
+
if !term.enums.nil? && term.enums != ""
|
481
|
+
xml.enumerations {
|
482
|
+
out = term.enums.split("|")
|
483
|
+
out.sort! if @sort_alphabetical
|
484
|
+
out.each do |enum|
|
485
|
+
xml.enumeration enum
|
486
|
+
end
|
487
|
+
}
|
488
|
+
end
|
489
|
+
xml.ip_written term.ip_written if !term.ip_written.nil?
|
490
|
+
xml.ip_symbol term.ip_symbol if !term.ip_symbol.nil?
|
491
|
+
xml.ip_mask term.ip_mask if !term.ip_mask.nil?
|
492
|
+
xml.si_written term.si_written if !term.si_written.nil?
|
493
|
+
xml.si_symbol term.si_symbol if !term.si_symbol.nil?
|
494
|
+
xml.si_mask term.si_mask if !term.si_mask.nil?
|
495
|
+
xml.row term.row if !term.row.nil?
|
496
|
+
|
497
|
+
if output_type == 'tpex'
|
498
|
+
xml.tp_include term.tp_include if !term.tp_include.nil?
|
499
|
+
xml.tp_required term.tp_required if !term.tp_required.nil?
|
500
|
+
xml.tp_use_in_search term.tp_use_in_search if !term.tp_use_in_search.nil?
|
501
|
+
xml.tp_use_in_facets term.tp_use_in_facets if !term.tp_use_in_facets.nil?
|
502
|
+
xml.tp_hide_from_data_users term.tp_hide_from_data_users if !term.tp_hide_from_data_users.nil?
|
503
|
+
xml.tp_third_party_testing term.tp_third_party_testing if !term.tp_third_party_testing.nil?
|
504
|
+
xml.tp_additional_web_dev_info term.tp_additional_web_dev_info if !term.tp_additional_web_dev_info.nil?
|
505
|
+
xml.tp_additional_data_user_info term.tp_additional_data_user_info if !term.tp_additional_data_user_info.nil?
|
506
|
+
xml.tp_additional_data_submitter_info term.tp_additional_data_submitter_info if !term.tp_additional_data_submitter_info.nil?
|
507
|
+
end
|
508
|
+
}
|
509
|
+
end
|
510
|
+
end
|
511
|
+
end
|
512
|
+
|
386
513
|
# write a tag to xml
|
387
|
-
def write_tag_to_xml(tag, xml)
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
514
|
+
def write_tag_to_xml(tag, level, xml, output_type)
|
515
|
+
level_string = "level_#{level}"
|
516
|
+
xml.tag!(level_string) {
|
517
|
+
s_temp = tag.name
|
518
|
+
xml.name s_temp
|
519
|
+
|
520
|
+
level += 1
|
521
|
+
|
522
|
+
if tag.child_tags.size == 0
|
523
|
+
write_terms_to_xml(tag, xml, output_type)
|
524
|
+
end
|
525
|
+
|
526
|
+
child_tags = tag.child_tags
|
527
|
+
child_tags.each do |child_tag|
|
528
|
+
write_tag_to_xml(child_tag, level, xml, output_type)
|
529
|
+
end
|
530
|
+
|
531
|
+
}
|
403
532
|
end
|
404
533
|
|
405
534
|
end
|
Binary file
|