bcl 0.1.4 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/bcl/MasterTaxonomy.rb +188 -59
- data/lib/bcl/current_taxonomy.json +0 -0
- data/lib/bcl/current_taxonomy.xml +2913 -416
- metadata +4 -4
data/lib/bcl/MasterTaxonomy.rb
CHANGED
@@ -38,13 +38,19 @@ module BCL
|
|
38
38
|
TagStruct = Struct.new(:level_hierarchy, :name, :description, :parent_tag, :child_tags, :terms)
|
39
39
|
|
40
40
|
# each TermStruct represents a row in the master taxonomy
|
41
|
-
TermStruct = Struct.new(:first_level, :second_level, :third_level, :level_hierarchy, :name, :description
|
41
|
+
TermStruct = Struct.new(:first_level, :second_level, :third_level, :level_hierarchy, :name, :description,
|
42
|
+
:abbr, :data_type, :enums, :ip_written, :ip_symbol, :ip_mask, :si_written, :si_symbol, :si_mask, :allow_multiple, :row, :tp_include, :tp_required, :tp_use_in_search, :tp_use_in_facets, :tp_hide_from_data_users, :tp_third_party_testing, :tp_additional_web_dev_info, :tp_additional_data_user_info, :tp_additional_data_submitter_info)
|
43
|
+
|
44
|
+
|
45
|
+
|
46
|
+
|
42
47
|
|
43
48
|
# class for parsing, validating, and querying the master taxonomy document
|
44
49
|
class MasterTaxonomy
|
45
50
|
|
46
51
|
# parse the master taxonomy document
|
47
|
-
def initialize(xlsx_path = nil)
|
52
|
+
def initialize(xlsx_path = nil, sort_alpha = false)
|
53
|
+
@sort_alphabetical = sort_alpha
|
48
54
|
|
49
55
|
# hash of level_taxonomy to tag
|
50
56
|
@tag_hash = Hash.new
|
@@ -89,13 +95,14 @@ class MasterTaxonomy
|
|
89
95
|
path = current_taxonomy_path
|
90
96
|
end
|
91
97
|
puts "Saving current taxonomy to #{path}"
|
98
|
+
# this is really not JSON... it is a persisted format of ruby
|
92
99
|
File.open(path, 'w') do |file|
|
93
100
|
Marshal.dump(@tag_hash, file)
|
94
101
|
end
|
95
102
|
end
|
96
103
|
|
97
104
|
# write taxonomy to xml
|
98
|
-
def write_xml(path)
|
105
|
+
def write_xml(path, output_type = 'tpex')
|
99
106
|
|
100
107
|
root_tag = @tag_hash[""]
|
101
108
|
|
@@ -110,7 +117,7 @@ class MasterTaxonomy
|
|
110
117
|
#setup the xml file
|
111
118
|
xml.instruct!(:xml, :version=>"1.0", :encoding=>"UTF-8")
|
112
119
|
xml.schema("xmlns:xsi"=>"http://www.w3.org/2001/XMLSchema-instance") {
|
113
|
-
write_tag_to_xml(root_tag, xml)
|
120
|
+
write_tag_to_xml(root_tag, 0, xml, output_type)
|
114
121
|
}
|
115
122
|
end
|
116
123
|
|
@@ -122,20 +129,28 @@ class MasterTaxonomy
|
|
122
129
|
def get_terms(tag)
|
123
130
|
|
124
131
|
terms = tag.terms
|
125
|
-
|
132
|
+
|
126
133
|
parent_tag = tag.parent_tag
|
127
134
|
while not parent_tag.nil?
|
128
|
-
|
135
|
+
terms.concat(parent_tag.terms)
|
129
136
|
parent_tag = parent_tag.parent_tag
|
130
137
|
end
|
131
|
-
|
132
|
-
|
138
|
+
|
139
|
+
|
140
|
+
#sort the terms as they come out
|
141
|
+
result = terms.uniq
|
142
|
+
if !@sort_alphabetical
|
143
|
+
result = result.sort {|x, y| x.row <=> y.row}
|
144
|
+
else
|
145
|
+
result = result.sort {|x, y| x.name <=> y.name}
|
146
|
+
end
|
147
|
+
|
148
|
+
return result
|
133
149
|
end
|
134
150
|
|
135
151
|
# check that the given component is conforms with the master taxonomy
|
136
152
|
def check_component(component)
|
137
153
|
valid = true
|
138
|
-
|
139
154
|
tag = nil
|
140
155
|
|
141
156
|
# see if we can find the component's tag in the taxonomy
|
@@ -208,6 +223,8 @@ class MasterTaxonomy
|
|
208
223
|
# add root tag
|
209
224
|
root_terms = []
|
210
225
|
root_terms << TermStruct.new("", "", "", "", "OpenStudio Type", "Type of OpenStudio Object")
|
226
|
+
root_terms[0].row = 0
|
227
|
+
#root_terms << TermStruct.new()
|
211
228
|
root_tag = TagStruct.new("", "root", "Root of the taxonomy", nil, [], root_terms)
|
212
229
|
@tag_hash[""] = root_tag
|
213
230
|
|
@@ -234,39 +251,89 @@ class MasterTaxonomy
|
|
234
251
|
|
235
252
|
|
236
253
|
def validate_terms_header(terms_worksheet)
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
254
|
+
test_arr = []
|
255
|
+
test_arr << "First Level"
|
256
|
+
test_arr << "Second Level"
|
257
|
+
test_arr << "Third Level"
|
258
|
+
test_arr << "Level Hierarchy"
|
259
|
+
test_arr << "Term"
|
260
|
+
test_arr << "Abbr"
|
261
|
+
test_arr << "Description"
|
262
|
+
test_arr << "Data Type"
|
263
|
+
test_arr << "Allow Multiple"
|
264
|
+
test_arr << "Enumerations"
|
265
|
+
test_arr << "IP Units Written Out"
|
266
|
+
test_arr << "IP Units Symbol"
|
267
|
+
test_arr << "IP Display Mask"
|
268
|
+
test_arr << "SI Units Written Out"
|
269
|
+
test_arr << "SI Units Symbol"
|
270
|
+
test_arr << "SI Display Mask"
|
271
|
+
test_arr << "Unit Conversion"
|
272
|
+
test_arr << "Default"
|
273
|
+
test_arr << "Min"
|
274
|
+
test_arr << "Max"
|
275
|
+
test_arr << "Source"
|
276
|
+
test_arr << "Review State"
|
277
|
+
test_arr << "General Comments"
|
278
|
+
test_arr << "Requested By / Project"
|
279
|
+
test_arr << "Include in TPE"
|
280
|
+
test_arr << "Required for Adding a New Product"
|
281
|
+
test_arr << "Use in Search Results"
|
282
|
+
test_arr << "Use in Search Facets"
|
283
|
+
test_arr << "Show/Hide Data from Data Users"
|
284
|
+
test_arr << "Additional Instructions for Web Developers"
|
285
|
+
test_arr << "Related Third Party Testing Standards"
|
286
|
+
test_arr << "Additional Guidance to Data Submitters"
|
287
|
+
test_arr << "Additional Guidance to Data Users"
|
288
|
+
|
289
|
+
|
290
|
+
parse = true
|
291
|
+
col = 1
|
292
|
+
while parse
|
293
|
+
if terms_worksheet.Columns(col).Rows(2).Value.nil? || col > test_arr.size
|
294
|
+
parse = false
|
295
|
+
else
|
296
|
+
if not terms_worksheet.Columns(col).Rows(2).Value == test_arr[col-1]
|
297
|
+
raise "Header does not match: #{col}: '#{terms_worksheet.Columns(col).Rows(2).Value} <> #{test_arr[col-1]}'"
|
298
|
+
end
|
299
|
+
end
|
300
|
+
col += 1
|
301
|
+
end
|
256
302
|
end
|
257
303
|
|
258
304
|
def parse_term(terms_worksheet, row)
|
259
305
|
|
260
306
|
term = TermStruct.new
|
261
|
-
|
262
|
-
term.
|
263
|
-
term.
|
264
|
-
term.
|
265
|
-
term.
|
266
|
-
|
267
|
-
term.
|
268
|
-
|
269
|
-
|
307
|
+
term.row = row
|
308
|
+
term.first_level = terms_worksheet.Columns(1).Rows(row).Value
|
309
|
+
term.second_level = terms_worksheet.Columns(2).Rows(row).Value
|
310
|
+
term.third_level = terms_worksheet.Columns(3).Rows(row).Value
|
311
|
+
term.level_hierarchy = terms_worksheet.Columns(4).Rows(row).Value
|
312
|
+
term.name = terms_worksheet.Columns(5).Rows(row).Value
|
313
|
+
term.abbr = terms_worksheet.Columns(6).Rows(row).Value
|
314
|
+
term.description = terms_worksheet.Columns(7).Rows(row).Value
|
315
|
+
term.data_type = terms_worksheet.Columns(8).Rows(row).Value
|
316
|
+
term.allow_multiple = terms_worksheet.Columns(9).Rows(row).Value
|
317
|
+
term.enums = terms_worksheet.Columns(10).Rows(row).Value
|
318
|
+
term.ip_written = terms_worksheet.Columns(11).Rows(row).Value
|
319
|
+
term.ip_symbol = terms_worksheet.Columns(12).Rows(row).Value
|
320
|
+
term.ip_mask = terms_worksheet.Columns(13).Rows(row).Value
|
321
|
+
term.si_written = terms_worksheet.Columns(14).Rows(row).Value
|
322
|
+
term.si_symbol = terms_worksheet.Columns(15).Rows(row).Value
|
323
|
+
term.si_mask = terms_worksheet.Columns(16).Rows(row).Value
|
324
|
+
|
325
|
+
#custom TPex Columns
|
326
|
+
term.tp_include = terms_worksheet.Columns(25).Rows(row).Value
|
327
|
+
term.tp_required = terms_worksheet.Columns(26).Rows(row).Value
|
328
|
+
term.tp_use_in_search = terms_worksheet.Columns(27).Rows(row).Value
|
329
|
+
term.tp_use_in_facets = terms_worksheet.Columns(28).Rows(row).Value
|
330
|
+
term.tp_hide_from_data_users = terms_worksheet.Columns(29).Rows(row).Value
|
331
|
+
term.tp_third_party_testing = terms_worksheet.Columns(30).Rows(row).Value
|
332
|
+
term.tp_additional_web_dev_info = terms_worksheet.Columns(31).Rows(row).Value
|
333
|
+
term.tp_additional_data_user_info = terms_worksheet.Columns(32).Rows(row).Value
|
334
|
+
term.tp_additional_data_submitter_info = terms_worksheet.Columns(33).Rows(row).Value
|
335
|
+
|
336
|
+
# trigger to quit parsing the xcel doc
|
270
337
|
if term.first_level.nil? or term.first_level.empty?
|
271
338
|
return nil
|
272
339
|
end
|
@@ -327,17 +394,21 @@ class MasterTaxonomy
|
|
327
394
|
end
|
328
395
|
|
329
396
|
def sort_tag(tag)
|
330
|
-
tag.terms = tag.terms.sort {|x, y| x.
|
331
|
-
tag.child_tags = tag.child_tags.sort {|x, y| x.
|
397
|
+
#tag.terms = tag.terms.sort {|x, y| x.level_hierarchy <=> y.level_hierarchy}
|
398
|
+
tag.child_tags = tag.child_tags.sort {|x, y| x.level_hierarchy <=> y.level_hierarchy}
|
332
399
|
tag.child_tags.each {|child_tag| sort_tag(child_tag) }
|
400
|
+
|
401
|
+
#tag.terms = tag.terms.sort {|x, y| x.name <=> y.name}
|
402
|
+
#tag.child_tags = tag.child_tags.sort {|x, y| x.name <=> y.name}
|
403
|
+
#tag.child_tags.each {|child_tag| sort_tag(child_tag) }
|
333
404
|
end
|
334
405
|
|
335
406
|
def check_tag(tag)
|
336
407
|
|
337
408
|
if tag.description.nil? or tag.description.empty?
|
338
|
-
puts "tag '#{tag.level_hierarchy}' has no description"
|
409
|
+
#puts "tag '#{tag.level_hierarchy}' has no description"
|
339
410
|
end
|
340
|
-
|
411
|
+
|
341
412
|
tag.terms.each {|term| check_term(term) }
|
342
413
|
tag.child_tags.each {|child_tag| check_tag(child_tag) }
|
343
414
|
end
|
@@ -372,34 +443,92 @@ class MasterTaxonomy
|
|
372
443
|
valid = false
|
373
444
|
end
|
374
445
|
|
375
|
-
|
446
|
+
if !term.data_type.nil?
|
447
|
+
valid_types = ["double", "integer", "enum", "file", "string"]
|
448
|
+
if (term.data_type.downcase != term.data_type) || !valid_types.include?(term.data_type)
|
449
|
+
puts "[ERROR] Term '#{term.name}' does not have a valid data type with '#{term.data_type}'"
|
450
|
+
end
|
451
|
+
|
452
|
+
if term.data_type.downcase == "enum"
|
453
|
+
if term.enums.nil? || term.enums == "" || term.enums.downcase == "no enum found"
|
454
|
+
puts "[ERROR] Term '#{term.name}' does not have valid enumerations"
|
455
|
+
end
|
456
|
+
end
|
457
|
+
end
|
376
458
|
|
377
459
|
return valid
|
378
460
|
end
|
379
461
|
|
380
462
|
def check_term(term)
|
381
463
|
if term.description.nil? or term.description.empty?
|
382
|
-
puts "term '#{term.level_hierarchy}.#{term.name}' has no description"
|
464
|
+
#puts "term '#{term.level_hierarchy}.#{term.name}' has no description"
|
383
465
|
end
|
384
466
|
end
|
385
467
|
|
468
|
+
# write term to xml
|
469
|
+
def write_terms_to_xml(tag, xml, output_type)
|
470
|
+
terms = get_terms(tag)
|
471
|
+
if terms.size > 0
|
472
|
+
terms.each do |term|
|
473
|
+
xml.term {
|
474
|
+
xml.name term.name
|
475
|
+
xml.abbr term.abbr if !term.abbr.nil?
|
476
|
+
xml.description term.description if !term.description.nil?
|
477
|
+
xml.data_type term.data_type if !term.data_type.nil?
|
478
|
+
xml.allow_multiple term.allow_multiple if !term.allow_multiple.nil?
|
479
|
+
|
480
|
+
if !term.enums.nil? && term.enums != ""
|
481
|
+
xml.enumerations {
|
482
|
+
out = term.enums.split("|")
|
483
|
+
out.sort! if @sort_alphabetical
|
484
|
+
out.each do |enum|
|
485
|
+
xml.enumeration enum
|
486
|
+
end
|
487
|
+
}
|
488
|
+
end
|
489
|
+
xml.ip_written term.ip_written if !term.ip_written.nil?
|
490
|
+
xml.ip_symbol term.ip_symbol if !term.ip_symbol.nil?
|
491
|
+
xml.ip_mask term.ip_mask if !term.ip_mask.nil?
|
492
|
+
xml.si_written term.si_written if !term.si_written.nil?
|
493
|
+
xml.si_symbol term.si_symbol if !term.si_symbol.nil?
|
494
|
+
xml.si_mask term.si_mask if !term.si_mask.nil?
|
495
|
+
xml.row term.row if !term.row.nil?
|
496
|
+
|
497
|
+
if output_type == 'tpex'
|
498
|
+
xml.tp_include term.tp_include if !term.tp_include.nil?
|
499
|
+
xml.tp_required term.tp_required if !term.tp_required.nil?
|
500
|
+
xml.tp_use_in_search term.tp_use_in_search if !term.tp_use_in_search.nil?
|
501
|
+
xml.tp_use_in_facets term.tp_use_in_facets if !term.tp_use_in_facets.nil?
|
502
|
+
xml.tp_hide_from_data_users term.tp_hide_from_data_users if !term.tp_hide_from_data_users.nil?
|
503
|
+
xml.tp_third_party_testing term.tp_third_party_testing if !term.tp_third_party_testing.nil?
|
504
|
+
xml.tp_additional_web_dev_info term.tp_additional_web_dev_info if !term.tp_additional_web_dev_info.nil?
|
505
|
+
xml.tp_additional_data_user_info term.tp_additional_data_user_info if !term.tp_additional_data_user_info.nil?
|
506
|
+
xml.tp_additional_data_submitter_info term.tp_additional_data_submitter_info if !term.tp_additional_data_submitter_info.nil?
|
507
|
+
end
|
508
|
+
}
|
509
|
+
end
|
510
|
+
end
|
511
|
+
end
|
512
|
+
|
386
513
|
# write a tag to xml
|
387
|
-
def write_tag_to_xml(tag, xml)
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
514
|
+
def write_tag_to_xml(tag, level, xml, output_type)
|
515
|
+
level_string = "level_#{level}"
|
516
|
+
xml.tag!(level_string) {
|
517
|
+
s_temp = tag.name
|
518
|
+
xml.name s_temp
|
519
|
+
|
520
|
+
level += 1
|
521
|
+
|
522
|
+
if tag.child_tags.size == 0
|
523
|
+
write_terms_to_xml(tag, xml, output_type)
|
524
|
+
end
|
525
|
+
|
526
|
+
child_tags = tag.child_tags
|
527
|
+
child_tags.each do |child_tag|
|
528
|
+
write_tag_to_xml(child_tag, level, xml, output_type)
|
529
|
+
end
|
530
|
+
|
531
|
+
}
|
403
532
|
end
|
404
533
|
|
405
534
|
end
|
Binary file
|