bcl 0.5.3 → 0.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,552 +1,533 @@
1
- ######################################################################
2
- # Copyright (c) 2008-2013, Alliance for Sustainable Energy.
3
- # All rights reserved.
4
- #
5
- # This library is free software; you can redistribute it and/or
6
- # modify it under the terms of the GNU Lesser General Public
7
- # License as published by the Free Software Foundation; either
8
- # version 2.1 of the License, or (at your option) any later version.
9
- #
10
- # This library is distributed in the hope that it will be useful,
11
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13
- # Lesser General Public License for more details.
14
- #
15
- # You should have received a copy of the GNU Lesser General Public
16
- # License along with this library; if not, write to the Free Software
17
- # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
- ######################################################################
19
-
20
- $have_win32ole = false
21
-
22
- if RUBY_PLATFORM =~ /mswin|mingw|cygwin/
23
- begin
24
- # apparently this is not a gem
25
- require 'win32ole'
26
- mod = WIN32OLE
27
- $have_win32ole = true
28
- rescue NameError
29
- # do not have win32ole
30
- end
31
- end
32
-
33
- module BCL
34
-
35
- # each TagStruct represents a node in the taxonomy tree
36
- TagStruct = Struct.new(:level_hierarchy, :name, :description, :parent_tag, :child_tags, :terms)
37
-
38
- # each TermStruct represents a row in the master taxonomy
39
- TermStruct = Struct.new(:first_level, :second_level, :third_level, :level_hierarchy, :name, :description,
40
- :abbr, :data_type, :enums, :ip_written, :ip_symbol, :ip_mask, :si_written, :si_symbol, :si_mask,
41
- :unit_conversion, :default_val, :min_val, :max_val, :allow_multiple, :row, :tp_include,
42
- :tp_required, :tp_use_in_search, :tp_use_in_facets, :tp_show_data_to_data_users, :tp_third_party_testing,
43
- :tp_additional_web_dev_info, :tp_additional_data_user_info, :tp_additional_data_submitter_info)
44
-
45
-
46
- # class for parsing, validating, and querying the master taxonomy document
47
- class MasterTaxonomy
48
-
49
- # parse the master taxonomy document
50
- def initialize(xlsx_path = nil, sort_alpha = false)
51
- @sort_alphabetical = sort_alpha
52
-
53
- # hash of level_taxonomy to tag
54
- @tag_hash = Hash.new
55
-
56
- if xlsx_path.nil?
57
- # load from the current taxonomy
58
- path = current_taxonomy_path
59
- puts "Loading current taxonomy from #{path}"
60
- File.open(path, 'r') do |file|
61
- @tag_hash = Marshal.load(file)
62
- end
63
- else
64
- xlsx_path = Pathname.new(xlsx_path).realpath.to_s
65
- puts "Loading taxonomy file #{xlsx_path}"
66
-
67
- # WINDOWS ONLY SECTION BECAUSE THIS USES WIN32OLE
68
- if $have_win32ole
69
- begin
70
- excel = WIN32OLE::new('Excel.Application')
71
- xlsx = excel.Workbooks.Open(xlsx_path)
72
- terms_worksheet = xlsx.Worksheets("Terms")
73
- parse_terms(terms_worksheet)
74
- ensure
75
- # not really saving just pretending so don't get prompted on quit
76
- xlsx.saved = true
77
- excel.Quit
78
- WIN32OLE.ole_free(excel)
79
- excel.ole_free
80
- xlsx=nil
81
- excel=nil
82
- GC.start
83
- end
84
- else # if $have_win32ole
85
- puts "MasterTaxonomy class requires 'win32ole' to parse master taxonomy document."
86
- puts "MasterTaxonomy may also be stored and loaded from JSON if your platform does not support win32ole."
87
- end # if $have_win32ole
88
- end
89
- end
90
-
91
- # save the current taxonomy
92
- def save_as_current_taxonomy(path = nil)
93
- if not path
94
- path = current_taxonomy_path
95
- end
96
- puts "Saving current taxonomy to #{path}"
97
- # this is really not JSON... it is a persisted format of ruby
98
- File.open(path, 'w') do |file|
99
- Marshal.dump(@tag_hash, file)
100
- end
101
- end
102
-
103
- # write taxonomy to xml
104
- def write_xml(path, output_type = 'tpex')
105
-
106
- root_tag = @tag_hash[""]
107
-
108
- if root_tag.nil?
109
- puts "Cannot find root tag"
110
- return false
111
- end
112
-
113
- File.open(path, 'w') do |file|
114
- xml = Builder::XmlMarkup.new(:target => file, :indent=>2)
115
-
116
- #setup the xml file
117
- xml.instruct!(:xml, :version=>"1.0", :encoding=>"UTF-8")
118
- xml.schema("xmlns:xsi"=>"http://www.w3.org/2001/XMLSchema-instance") {
119
- write_tag_to_xml(root_tag, 0, xml, output_type)
120
- }
121
- end
122
-
123
- end
124
-
125
- # get all terms for a given tag
126
- # this includes terms that are inherited from parent levels
127
- # e.g. master_taxonomy.get_terms("Space Use.Lighting.Lamp Ballast")
128
- def get_terms(tag)
129
-
130
- terms = tag.terms
131
-
132
- parent_tag = tag.parent_tag
133
- while not parent_tag.nil?
134
- terms.concat(parent_tag.terms)
135
- parent_tag = parent_tag.parent_tag
136
- end
137
-
138
-
139
- #sort the terms as they come out
140
- result = terms.uniq
141
- if !@sort_alphabetical
142
- result = result.sort {|x, y| x.row <=> y.row}
143
- else
144
- result = result.sort {|x, y| x.name <=> y.name}
145
- end
146
-
147
- return result
148
- end
149
-
150
- # check that the given component is conforms with the master taxonomy
151
- def check_component(component)
152
- valid = true
153
- tag = nil
154
-
155
- # see if we can find the component's tag in the taxonomy
156
- tags = component.tags
157
- if tags.empty?
158
- puts "[Check Component ERROR] Component does not have any tags"
159
- valid = false
160
- elsif tags.size > 1
161
- puts "[Check Component ERROR] Component has multiple tags"
162
- valid = false
163
- else
164
- tag = @tag_hash[tags[0].descriptor]
165
- if not tag
166
- puts "[Check Component ERROR] Cannot find #{tags[0].descriptor} in the master taxonomy"
167
- valid = false
168
- end
169
- end
170
-
171
- if not tag
172
- return false
173
- end
174
-
175
- terms = get_terms(tag)
176
-
177
- # todo: check for all required attributes
178
- terms.each do |term|
179
- #if term.required
180
- # make sure we find attribute
181
- #end
182
- end
183
-
184
- # check that all attributes are allowed
185
- component.attributes.each do |attribute|
186
-
187
- term = nil
188
- terms.each do |t|
189
- if t.name == attribute.name
190
- term = t
191
- break
192
- end
193
- end
194
-
195
- if not term
196
- puts "[Check Component ERROR] Cannot find term for #{attribute.name} in #{tag.level_hierarchy}"
197
- valid = false
198
- next
199
- end
200
-
201
- # todo: validate value, datatype, units
202
-
203
- end
204
-
205
- return valid
206
- end
207
-
208
- private
209
-
210
- def current_taxonomy_path
211
- return File.dirname(__FILE__) + "/current_taxonomy.json"
212
- end
213
-
214
- def parse_terms(terms_worksheet)
215
-
216
- # check header
217
- header_error = validate_terms_header(terms_worksheet)
218
- if header_error
219
- raise "Header Error on Terms Worksheet"
220
- end
221
-
222
- # add root tag
223
- root_terms = []
224
- root_terms << TermStruct.new("", "", "", "", "OpenStudio Type", "Type of OpenStudio Object")
225
- root_terms[0].row = 0
226
- #root_terms << TermStruct.new()
227
- root_tag = TagStruct.new("", "root", "Root of the taxonomy", nil, [], root_terms)
228
- @tag_hash[""] = root_tag
229
-
230
- ### puts "**** tag hash: #{@tag_hash}"
231
-
232
- # find number of rows by parsing until hit empty value in first column
233
- row_num = 3
234
- while true do
235
- term = parse_term(terms_worksheet, row_num)
236
- if term.nil?
237
- break
238
- end
239
-
240
- add_term(term)
241
-
242
- row_num += 1
243
- end
244
-
245
- # sort the tag tree
246
- sort_tag(root_tag)
247
-
248
- # check the tag tree
249
- check_tag(root_tag)
250
-
251
- end
252
-
253
-
254
- def validate_terms_header(terms_worksheet)
255
- test_arr = []
256
- test_arr << {"name"=>"First Level", "strict"=>true}
257
- test_arr << {"name"=>"Second Level", "strict"=>true}
258
- test_arr << {"name"=>"Third Level", "strict"=>true}
259
- test_arr << {"name"=>"Level Hierarchy", "strict"=>true}
260
- test_arr << {"name"=>"Term", "strict"=>true}
261
- test_arr << {"name"=>"Abbr", "strict"=>true}
262
- test_arr << {"name"=>"Description", "strict"=>true}
263
- test_arr << {"name"=>"Data Type", "strict"=>true}
264
- test_arr << {"name"=>"Allow Multiple", "strict"=>true}
265
- test_arr << {"name"=>"Enumerations", "strict"=>true}
266
- test_arr << {"name"=>"IP Units Written Out", "strict"=>true}
267
- test_arr << {"name"=>"IP Units Symbol", "strict"=>true}
268
- test_arr << {"name"=>"IP Display Mask", "strict"=>true}
269
- test_arr << {"name"=>"SI Units Written Out", "strict"=>true}
270
- test_arr << {"name"=>"SI Units Symbol", "strict"=>true}
271
- test_arr << {"name"=>"SI Display Mask", "strict"=>true}
272
- test_arr << {"name"=>"Unit Conversion", "strict"=>true}
273
- test_arr << {"name"=>"Default", "strict"=>true}
274
- test_arr << {"name"=>"Min", "strict"=>true}
275
- test_arr << {"name"=>"Max", "strict"=>true}
276
- test_arr << {"name"=>"Source", "strict"=>true}
277
- test_arr << {"name"=>"Review State", "strict"=>true}
278
- test_arr << {"name"=>"General Comments", "strict"=>true}
279
- test_arr << {"name"=>"Requested By / Project", "strict"=>true}
280
- test_arr << {"name"=>"Include in TPE", "strict"=>false}
281
- test_arr << {"name"=>"Required for Adding a New Product", "strict"=>false}
282
- test_arr << {"name"=>"Use as a Column Header in Search Results", "strict"=>false}
283
- test_arr << {"name"=>"Allow Users to Filter with this Facet", "strict"=>false}
284
- test_arr << {"name"=>"Show Data to Data Users", "strict"=>false}
285
- test_arr << {"name"=>"Additional Instructions for Web Developers", "strict"=>false}
286
- test_arr << {"name"=>"Related Third Party Testing Standards", "strict"=>false}
287
- test_arr << {"name"=>"Additional Guidance to Data Submitters", "strict"=>false}
288
- test_arr << {"name"=>"Additional Guidance to Data Users", "strict"=>false}
289
-
290
-
291
- parse = true
292
- col = 1
293
- while parse
294
- if terms_worksheet.Columns(col).Rows(2).Value.nil? || col > test_arr.size
295
- parse = false
296
- else
297
- if not terms_worksheet.Columns(col).Rows(2).Value == test_arr[col-1]["name"]
298
- if test_arr[col-1]["strict"]
299
- raise "[ERROR] Header does not match: #{col}: '#{terms_worksheet.Columns(col).Rows(2).Value} <> #{test_arr[col-1]["name"]}'"
300
- else
301
- puts "[WARNING] Header does not match: #{col}: '#{terms_worksheet.Columns(col).Rows(2).Value} <> #{test_arr[col-1]["name"]}'"
302
- end
303
- end
304
- end
305
- col += 1
306
- end
307
- end
308
-
309
- def parse_term(terms_worksheet, row)
310
-
311
- term = TermStruct.new
312
- term.row = row
313
- term.first_level = terms_worksheet.Columns(1).Rows(row).Value
314
- term.second_level = terms_worksheet.Columns(2).Rows(row).Value
315
- term.third_level = terms_worksheet.Columns(3).Rows(row).Value
316
- term.level_hierarchy = terms_worksheet.Columns(4).Rows(row).Value
317
- term.name = terms_worksheet.Columns(5).Rows(row).Value
318
- term.abbr = terms_worksheet.Columns(6).Rows(row).Value
319
- term.description = terms_worksheet.Columns(7).Rows(row).Value
320
- term.data_type = terms_worksheet.Columns(8).Rows(row).Value
321
- term.allow_multiple = terms_worksheet.Columns(9).Rows(row).Value
322
- term.enums = terms_worksheet.Columns(10).Rows(row).Value
323
- term.ip_written = terms_worksheet.Columns(11).Rows(row).Value
324
- term.ip_symbol = terms_worksheet.Columns(12).Rows(row).Value
325
- term.ip_mask = terms_worksheet.Columns(13).Rows(row).Value
326
- term.si_written = terms_worksheet.Columns(14).Rows(row).Value
327
- term.si_symbol = terms_worksheet.Columns(15).Rows(row).Value
328
- term.si_mask = terms_worksheet.Columns(16).Rows(row).Value
329
- term.unit_conversion = terms_worksheet.Columns(17).Rows(row).Value
330
- term.default_val = terms_worksheet.Columns(18).Rows(row).Value
331
- term.min_val = terms_worksheet.Columns(19).Rows(row).Value
332
- term.max_val = terms_worksheet.Columns(20).Rows(row).Value
333
-
334
- #custom TPex Columns
335
- term.tp_include = terms_worksheet.Columns(25).Rows(row).Value
336
- term.tp_required = terms_worksheet.Columns(26).Rows(row).Value
337
- term.tp_use_in_search = terms_worksheet.Columns(27).Rows(row).Value
338
- term.tp_use_in_facets = terms_worksheet.Columns(28).Rows(row).Value
339
- term.tp_show_data_to_data_users = terms_worksheet.Columns(29).Rows(row).Value
340
- term.tp_additional_web_dev_info = terms_worksheet.Columns(30).Rows(row).Value
341
- term.tp_third_party_testing = terms_worksheet.Columns(31).Rows(row).Value
342
- term.tp_additional_data_submitter_info = terms_worksheet.Columns(32).Rows(row).Value
343
- term.tp_additional_data_user_info = terms_worksheet.Columns(33).Rows(row).Value
344
-
345
- # trigger to quit parsing the xcel doc
346
- if term.first_level.nil? or term.first_level.empty?
347
- return nil
348
- end
349
-
350
- return term
351
- end
352
-
353
- def add_term(term)
354
-
355
- level_hierarchy = term.level_hierarchy
356
-
357
- # create the tag
358
- tag = @tag_hash[level_hierarchy]
359
-
360
- if tag.nil?
361
- tag = create_tag(level_hierarchy, term.description)
362
- end
363
-
364
- if term.name.nil? or term.name.strip.empty?
365
- # this row is really about the tag
366
- tag.description = term.description
367
-
368
- else
369
- # this row is about a term
370
- if not validate_term(term)
371
- return nil
372
- end
373
-
374
- tag.terms = [] if tag.terms.nil?
375
- tag.terms << term
376
- end
377
- end
378
-
379
- def create_tag(level_hierarchy, tag_description="")
380
-
381
- #puts "create_tag called for #{level_hierarchy}"
382
-
383
- parts = level_hierarchy.split('.')
384
-
385
- name = parts[-1]
386
- parent_level = parts[0..-2].join('.')
387
-
388
- parent_tag = @tag_hash[parent_level]
389
- if parent_tag.nil?
390
- parent_tag = create_tag(parent_level)
391
- end
392
-
393
- description = tag_description
394
- child_tags = []
395
- terms = []
396
- tag = TagStruct.new(level_hierarchy, name, description, parent_tag, child_tags, terms)
397
-
398
- parent_tag.child_tags << tag
399
-
400
- @tag_hash[level_hierarchy] = tag
401
-
402
- return tag
403
- end
404
-
405
- def sort_tag(tag)
406
- #tag.terms = tag.terms.sort {|x, y| x.level_hierarchy <=> y.level_hierarchy}
407
- tag.child_tags = tag.child_tags.sort {|x, y| x.level_hierarchy <=> y.level_hierarchy}
408
- tag.child_tags.each {|child_tag| sort_tag(child_tag) }
409
-
410
- #tag.terms = tag.terms.sort {|x, y| x.name <=> y.name}
411
- #tag.child_tags = tag.child_tags.sort {|x, y| x.name <=> y.name}
412
- #tag.child_tags.each {|child_tag| sort_tag(child_tag) }
413
- end
414
-
415
- def check_tag(tag)
416
-
417
- if tag.description.nil? or tag.description.empty?
418
- puts "[check_tag] tag '#{tag.level_hierarchy}' has no description"
419
- end
420
-
421
- tag.terms.each {|term| check_term(term) }
422
- tag.child_tags.each {|child_tag| check_tag(child_tag) }
423
- end
424
-
425
- def validate_term(term)
426
- valid = true
427
-
428
- parts = term.level_hierarchy.split('.')
429
-
430
- if parts.empty?
431
- puts "Hierarchy parts empty, #{term.level_hierarchy}"
432
- valid = false
433
- end
434
-
435
- if parts.size >= 1 and not term.first_level == parts[0]
436
- puts "First level '#{term.first_level}' does not match level hierarchy '#{term.level_hierarchy}', skipping term"
437
- valid = false
438
- end
439
-
440
- if parts.size >= 2 and not term.second_level == parts[1]
441
- puts "Second level '#{term.second_level}' does not match level hierarchy '#{term.level_hierarchy}', skipping term"
442
- valid = false
443
- end
444
-
445
- if parts.size >= 3 and not term.third_level == parts[2]
446
- puts "Third level '#{term.third_level}' does not match level hierarchy '#{term.level_hierarchy}', skipping term"
447
- valid = false
448
- end
449
-
450
- if parts.size > 3
451
- puts "Hierarchy cannot have more than 3 parts '#{term.level_hierarchy}', skipping term"
452
- valid = false
453
- end
454
-
455
- if !term.data_type.nil?
456
- valid_types = ["double", "integer", "enum", "file", "string", "autocomplete"]
457
- if (term.data_type.downcase != term.data_type) || !valid_types.include?(term.data_type)
458
- puts "[ERROR] Term '#{term.name}' does not have a valid data type with '#{term.data_type}'"
459
- end
460
-
461
- if term.data_type.downcase == "enum"
462
- if term.enums.nil? || term.enums == "" || term.enums.downcase == "no enum found"
463
- puts "[ERROR] Term '#{term.name}' does not have valid enumerations"
464
- end
465
- end
466
- end
467
-
468
- return valid
469
- end
470
-
471
- def check_term(term)
472
- if term.description.nil? or term.description.empty?
473
- #puts "[check_term] term '#{term.level_hierarchy}.#{term.name}' has no description"
474
- end
475
- end
476
-
477
- # write term to xml
478
- def write_terms_to_xml(tag, xml, output_type)
479
- terms = get_terms(tag)
480
- if terms.size > 0
481
- terms.each do |term|
482
- xml.term {
483
- xml.name term.name
484
- xml.abbr term.abbr if !term.abbr.nil?
485
- xml.description term.description if !term.description.nil?
486
- xml.data_type term.data_type if !term.data_type.nil?
487
- xml.allow_multiple term.allow_multiple if !term.allow_multiple.nil?
488
-
489
- if !term.enums.nil? && term.enums != ""
490
- xml.enumerations {
491
- out = term.enums.split("|")
492
- out.sort! if @sort_alphabetical
493
- out.each do |enum|
494
- xml.enumeration enum
495
- end
496
- }
497
- end
498
- xml.ip_written term.ip_written if !term.ip_written.nil?
499
- xml.ip_symbol term.ip_symbol if !term.ip_symbol.nil?
500
- xml.ip_mask term.ip_mask if !term.ip_mask.nil?
501
- xml.si_written term.si_written if !term.si_written.nil?
502
- xml.si_symbol term.si_symbol if !term.si_symbol.nil?
503
- xml.si_mask term.si_mask if !term.si_mask.nil?
504
- xml.row term.row if !term.row.nil?
505
- xml.unit_conversion term.unit_conversion if !term.unit_conversion.nil?
506
- xml.default_val term.default_val if !term.default_val.nil?
507
- xml.min_val term.min_val if !term.min_val.nil?
508
- xml.max_val term.max_val if !term.max_val.nil?
509
-
510
- if output_type == 'tpex'
511
- xml.tp_include term.tp_include if !term.tp_include.nil?
512
- xml.tp_required term.tp_required if !term.tp_required.nil?
513
- xml.tp_use_in_search term.tp_use_in_search if !term.tp_use_in_search.nil?
514
- xml.tp_use_in_facets term.tp_use_in_facets if !term.tp_use_in_facets.nil?
515
- xml.tp_show_data_to_data_users term.tp_show_data_to_data_users if !term.tp_show_data_to_data_users.nil?
516
- xml.tp_third_party_testing term.tp_third_party_testing if !term.tp_third_party_testing.nil?
517
- xml.tp_additional_web_dev_info term.tp_additional_web_dev_info if !term.tp_additional_web_dev_info.nil?
518
- xml.tp_additional_data_user_info term.tp_additional_data_user_info if !term.tp_additional_data_user_info.nil?
519
- xml.tp_additional_data_submitter_info term.tp_additional_data_submitter_info if !term.tp_additional_data_submitter_info.nil?
520
- end
521
- }
522
- end
523
- end
524
- end
525
-
526
- # write a tag to xml
527
- def write_tag_to_xml(tag, level, xml, output_type)
528
- level_string = "level_#{level}"
529
- xml.tag!(level_string) {
530
- s_temp = tag.name
531
- xml.name s_temp
532
- xml.description tag.description
533
-
534
- level += 1
535
-
536
- if tag.child_tags.size == 0
537
- write_terms_to_xml(tag, xml, output_type)
538
- end
539
-
540
- child_tags = tag.child_tags
541
- child_tags.each do |child_tag|
542
- write_tag_to_xml(child_tag, level, xml, output_type)
543
- end
544
-
545
- }
546
- end
547
-
548
- end
549
-
550
- end # module BCL
551
-
552
-
1
+ ######################################################################
2
+ # Copyright (c) 2008-2014, Alliance for Sustainable Energy.
3
+ # All rights reserved.
4
+ #
5
+ # This library is free software; you can redistribute it and/or
6
+ # modify it under the terms of the GNU Lesser General Public
7
+ # License as published by the Free Software Foundation; either
8
+ # version 2.1 of the License, or (at your option) any later version.
9
+ #
10
+ # This library is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13
+ # Lesser General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU Lesser General Public
16
+ # License along with this library; if not, write to the Free Software
17
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
+ ######################################################################
19
+
20
+ $have_win32ole = false
21
+
22
+ if RUBY_PLATFORM =~ /mswin|mingw|cygwin/
23
+ begin
24
+ # apparently this is not a gem
25
+ require 'win32ole'
26
+ mod = WIN32OLE
27
+ $have_win32ole = true
28
+ rescue NameError
29
+ # do not have win32ole
30
+ end
31
+ end
32
+
33
+ module BCL
34
+ # each TagStruct represents a node in the taxonomy tree
35
+ TagStruct = Struct.new(:level_hierarchy, :name, :description, :parent_tag, :child_tags, :terms)
36
+
37
+ # each TermStruct represents a row in the master taxonomy
38
+ TermStruct = Struct.new(:first_level, :second_level, :third_level, :level_hierarchy, :name, :description,
39
+ :abbr, :data_type, :enums, :ip_written, :ip_symbol, :ip_mask, :si_written, :si_symbol, :si_mask,
40
+ :unit_conversion, :default_val, :min_val, :max_val, :allow_multiple, :row, :tp_include,
41
+ :tp_required, :tp_use_in_search, :tp_use_in_facets, :tp_show_data_to_data_users, :tp_third_party_testing,
42
+ :tp_additional_web_dev_info, :tp_additional_data_user_info, :tp_additional_data_submitter_info)
43
+
44
+ # class for parsing, validating, and querying the master taxonomy document
45
+ class MasterTaxonomy
46
+ # parse the master taxonomy document
47
+ def initialize(xlsx_path = nil, sort_alpha = false)
48
+ @sort_alphabetical = sort_alpha
49
+
50
+ # hash of level_taxonomy to tag
51
+ @tag_hash = {}
52
+
53
+ if xlsx_path.nil?
54
+ # load from the current taxonomy
55
+ path = current_taxonomy_path
56
+ puts "Loading current taxonomy from #{path}"
57
+ File.open(path, 'r') do |file|
58
+ @tag_hash = Marshal.load(file)
59
+ end
60
+ else
61
+ xlsx_path = Pathname.new(xlsx_path).realpath.to_s
62
+ puts "Loading taxonomy file #{xlsx_path}"
63
+
64
+ # WINDOWS ONLY SECTION BECAUSE THIS USES WIN32OLE
65
+ if $have_win32ole
66
+ begin
67
+ excel = WIN32OLE.new('Excel.Application')
68
+ xlsx = excel.Workbooks.Open(xlsx_path)
69
+ terms_worksheet = xlsx.Worksheets('Terms')
70
+ parse_terms(terms_worksheet)
71
+ ensure
72
+ # not really saving just pretending so don't get prompted on quit
73
+ xlsx.saved = true
74
+ excel.Quit
75
+ WIN32OLE.ole_free(excel)
76
+ excel.ole_free
77
+ xlsx = nil
78
+ excel = nil
79
+ GC.start
80
+ end
81
+ else # if $have_win32ole
82
+ puts "MasterTaxonomy class requires 'win32ole' to parse master taxonomy document."
83
+ puts 'MasterTaxonomy may also be stored and loaded from JSON if your platform does not support win32ole.'
84
+ end # if $have_win32ole
85
+ end
86
+ end
87
+
88
+ # save the current taxonomy
89
+ def save_as_current_taxonomy(path = nil)
90
+ unless path
91
+ path = current_taxonomy_path
92
+ end
93
+ puts "Saving current taxonomy to #{path}"
94
+ # this is really not JSON... it is a persisted format of ruby
95
+ File.open(path, 'w') do |file|
96
+ Marshal.dump(@tag_hash, file)
97
+ end
98
+ end
99
+
100
+ # write taxonomy to xml
101
+ def write_xml(path, output_type = 'tpex')
102
+ root_tag = @tag_hash['']
103
+
104
+ if root_tag.nil?
105
+ puts 'Cannot find root tag'
106
+ return false
107
+ end
108
+
109
+ File.open(path, 'w') do |file|
110
+ xml = Builder::XmlMarkup.new(target: file, indent: 2)
111
+
112
+ # setup the xml file
113
+ xml.instruct!(:xml, version: '1.0', encoding: 'UTF-8')
114
+ xml.schema('xmlns:xsi' => 'http://www.w3.org/2001/XMLSchema-instance') {
115
+ write_tag_to_xml(root_tag, 0, xml, output_type)
116
+ }
117
+ end
118
+ end
119
+
120
+ # get all terms for a given tag
121
+ # this includes terms that are inherited from parent levels
122
+ # e.g. master_taxonomy.get_terms("Space Use.Lighting.Lamp Ballast")
123
+ def get_terms(tag)
124
+ terms = tag.terms
125
+
126
+ parent_tag = tag.parent_tag
127
+ until parent_tag.nil?
128
+ terms.concat(parent_tag.terms)
129
+ parent_tag = parent_tag.parent_tag
130
+ end
131
+
132
+ # sort the terms as they come out
133
+ result = terms.uniq
134
+ if !@sort_alphabetical
135
+ result = result.sort { |x, y| x.row <=> y.row }
136
+ else
137
+ result = result.sort { |x, y| x.name <=> y.name }
138
+ end
139
+
140
+ result
141
+ end
142
+
143
+ # check that the given component is conforms with the master taxonomy
144
+ def check_component(component)
145
+ valid = true
146
+ tag = nil
147
+
148
+ # see if we can find the component's tag in the taxonomy
149
+ tags = component.tags
150
+ if tags.empty?
151
+ puts '[Check Component ERROR] Component does not have any tags'
152
+ valid = false
153
+ elsif tags.size > 1
154
+ puts '[Check Component ERROR] Component has multiple tags'
155
+ valid = false
156
+ else
157
+ tag = @tag_hash[tags[0].descriptor]
158
+ unless tag
159
+ puts "[Check Component ERROR] Cannot find #{tags[0].descriptor} in the master taxonomy"
160
+ valid = false
161
+ end
162
+ end
163
+
164
+ unless tag
165
+ return false
166
+ end
167
+
168
+ terms = get_terms(tag)
169
+
170
+ # todo: check for all required attributes
171
+ terms.each do |_term|
172
+ # if term.required
173
+ # make sure we find attribute
174
+ # end
175
+ end
176
+
177
+ # check that all attributes are allowed
178
+ component.attributes.each do |attribute|
179
+
180
+ term = nil
181
+ terms.each do |t|
182
+ if t.name == attribute.name
183
+ term = t
184
+ break
185
+ end
186
+ end
187
+
188
+ unless term
189
+ puts "[Check Component ERROR] Cannot find term for #{attribute.name} in #{tag.level_hierarchy}"
190
+ valid = false
191
+ next
192
+ end
193
+
194
+ # todo: validate value, datatype, units
195
+
196
+ end
197
+
198
+ valid
199
+ end
200
+
201
+ private
202
+
203
+ def current_taxonomy_path
204
+ File.dirname(__FILE__) + '/current_taxonomy.json'
205
+ end
206
+
207
+ def parse_terms(terms_worksheet)
208
+ # check header
209
+ header_error = validate_terms_header(terms_worksheet)
210
+ if header_error
211
+ fail 'Header Error on Terms Worksheet'
212
+ end
213
+
214
+ # add root tag
215
+ root_terms = []
216
+ root_terms << TermStruct.new('', '', '', '', 'OpenStudio Type', 'Type of OpenStudio Object')
217
+ root_terms[0].row = 0
218
+ # root_terms << TermStruct.new()
219
+ root_tag = TagStruct.new('', 'root', 'Root of the taxonomy', nil, [], root_terms)
220
+ @tag_hash[''] = root_tag
221
+
222
+ ### puts "**** tag hash: #{@tag_hash}"
223
+
224
+ # find number of rows by parsing until hit empty value in first column
225
+ row_num = 3
226
+ while true
227
+ term = parse_term(terms_worksheet, row_num)
228
+ if term.nil?
229
+ break
230
+ end
231
+
232
+ add_term(term)
233
+
234
+ row_num += 1
235
+ end
236
+
237
+ # sort the tag tree
238
+ sort_tag(root_tag)
239
+
240
+ # check the tag tree
241
+ check_tag(root_tag)
242
+ end
243
+
244
+ def validate_terms_header(terms_worksheet)
245
+ test_arr = []
246
+ test_arr << { 'name' => 'First Level', 'strict' => true }
247
+ test_arr << { 'name' => 'Second Level', 'strict' => true }
248
+ test_arr << { 'name' => 'Third Level', 'strict' => true }
249
+ test_arr << { 'name' => 'Level Hierarchy', 'strict' => true }
250
+ test_arr << { 'name' => 'Term', 'strict' => true }
251
+ test_arr << { 'name' => 'Abbr', 'strict' => true }
252
+ test_arr << { 'name' => 'Description', 'strict' => true }
253
+ test_arr << { 'name' => 'Data Type', 'strict' => true }
254
+ test_arr << { 'name' => 'Allow Multiple', 'strict' => true }
255
+ test_arr << { 'name' => 'Enumerations', 'strict' => true }
256
+ test_arr << { 'name' => 'IP Units Written Out', 'strict' => true }
257
+ test_arr << { 'name' => 'IP Units Symbol', 'strict' => true }
258
+ test_arr << { 'name' => 'IP Display Mask', 'strict' => true }
259
+ test_arr << { 'name' => 'SI Units Written Out', 'strict' => true }
260
+ test_arr << { 'name' => 'SI Units Symbol', 'strict' => true }
261
+ test_arr << { 'name' => 'SI Display Mask', 'strict' => true }
262
+ test_arr << { 'name' => 'Unit Conversion', 'strict' => true }
263
+ test_arr << { 'name' => 'Default', 'strict' => true }
264
+ test_arr << { 'name' => 'Min', 'strict' => true }
265
+ test_arr << { 'name' => 'Max', 'strict' => true }
266
+ test_arr << { 'name' => 'Source', 'strict' => true }
267
+ test_arr << { 'name' => 'Review State', 'strict' => true }
268
+ test_arr << { 'name' => 'General Comments', 'strict' => true }
269
+ test_arr << { 'name' => 'Requested By / Project', 'strict' => true }
270
+ test_arr << { 'name' => 'Include in TPE', 'strict' => false }
271
+ test_arr << { 'name' => 'Required for Adding a New Product', 'strict' => false }
272
+ test_arr << { 'name' => 'Use as a Column Header in Search Results', 'strict' => false }
273
+ test_arr << { 'name' => 'Allow Users to Filter with this Facet', 'strict' => false }
274
+ test_arr << { 'name' => 'Show Data to Data Users', 'strict' => false }
275
+ test_arr << { 'name' => 'Additional Instructions for Web Developers', 'strict' => false }
276
+ test_arr << { 'name' => 'Related Third Party Testing Standards', 'strict' => false }
277
+ test_arr << { 'name' => 'Additional Guidance to Data Submitters', 'strict' => false }
278
+ test_arr << { 'name' => 'Additional Guidance to Data Users', 'strict' => false }
279
+
280
+ parse = true
281
+ col = 1
282
+ while parse
283
+ if terms_worksheet.Columns(col).Rows(2).Value.nil? || col > test_arr.size
284
+ parse = false
285
+ else
286
+ unless terms_worksheet.Columns(col).Rows(2).Value == test_arr[col - 1]['name']
287
+ if test_arr[col - 1]['strict']
288
+ fail "[ERROR] Header does not match: #{col}: '#{terms_worksheet.Columns(col).Rows(2).Value} <> #{test_arr[col - 1]['name']}'"
289
+ else
290
+ puts "[WARNING] Header does not match: #{col}: '#{terms_worksheet.Columns(col).Rows(2).Value} <> #{test_arr[col - 1]['name']}'"
291
+ end
292
+ end
293
+ end
294
+ col += 1
295
+ end
296
+ end
297
+
298
+ def parse_term(terms_worksheet, row)
299
+ term = TermStruct.new
300
+ term.row = row
301
+ term.first_level = terms_worksheet.Columns(1).Rows(row).Value
302
+ term.second_level = terms_worksheet.Columns(2).Rows(row).Value
303
+ term.third_level = terms_worksheet.Columns(3).Rows(row).Value
304
+ term.level_hierarchy = terms_worksheet.Columns(4).Rows(row).Value
305
+ term.name = terms_worksheet.Columns(5).Rows(row).Value
306
+ term.abbr = terms_worksheet.Columns(6).Rows(row).Value
307
+ term.description = terms_worksheet.Columns(7).Rows(row).Value
308
+ term.data_type = terms_worksheet.Columns(8).Rows(row).Value
309
+ term.allow_multiple = terms_worksheet.Columns(9).Rows(row).Value
310
+ term.enums = terms_worksheet.Columns(10).Rows(row).Value
311
+ term.ip_written = terms_worksheet.Columns(11).Rows(row).Value
312
+ term.ip_symbol = terms_worksheet.Columns(12).Rows(row).Value
313
+ term.ip_mask = terms_worksheet.Columns(13).Rows(row).Value
314
+ term.si_written = terms_worksheet.Columns(14).Rows(row).Value
315
+ term.si_symbol = terms_worksheet.Columns(15).Rows(row).Value
316
+ term.si_mask = terms_worksheet.Columns(16).Rows(row).Value
317
+ term.unit_conversion = terms_worksheet.Columns(17).Rows(row).Value
318
+ term.default_val = terms_worksheet.Columns(18).Rows(row).Value
319
+ term.min_val = terms_worksheet.Columns(19).Rows(row).Value
320
+ term.max_val = terms_worksheet.Columns(20).Rows(row).Value
321
+
322
+ # custom TPex Columns
323
+ term.tp_include = terms_worksheet.Columns(25).Rows(row).Value
324
+ term.tp_required = terms_worksheet.Columns(26).Rows(row).Value
325
+ term.tp_use_in_search = terms_worksheet.Columns(27).Rows(row).Value
326
+ term.tp_use_in_facets = terms_worksheet.Columns(28).Rows(row).Value
327
+ term.tp_show_data_to_data_users = terms_worksheet.Columns(29).Rows(row).Value
328
+ term.tp_additional_web_dev_info = terms_worksheet.Columns(30).Rows(row).Value
329
+ term.tp_third_party_testing = terms_worksheet.Columns(31).Rows(row).Value
330
+ term.tp_additional_data_submitter_info = terms_worksheet.Columns(32).Rows(row).Value
331
+ term.tp_additional_data_user_info = terms_worksheet.Columns(33).Rows(row).Value
332
+
333
+ # trigger to quit parsing the xcel doc
334
+ if term.first_level.nil? or term.first_level.empty?
335
+ return nil
336
+ end
337
+
338
+ term
339
+ end
340
+
341
+ def add_term(term)
342
+ level_hierarchy = term.level_hierarchy
343
+
344
+ # create the tag
345
+ tag = @tag_hash[level_hierarchy]
346
+
347
+ if tag.nil?
348
+ tag = create_tag(level_hierarchy, term.description)
349
+ end
350
+
351
+ if term.name.nil? or term.name.strip.empty?
352
+ # this row is really about the tag
353
+ tag.description = term.description
354
+
355
+ else
356
+ # this row is about a term
357
+ unless validate_term(term)
358
+ return nil
359
+ end
360
+
361
+ tag.terms = [] if tag.terms.nil?
362
+ tag.terms << term
363
+ end
364
+ end
365
+
366
+ def create_tag(level_hierarchy, tag_description = '')
367
+ # puts "create_tag called for #{level_hierarchy}"
368
+
369
+ parts = level_hierarchy.split('.')
370
+
371
+ name = parts[-1]
372
+ parent_level = parts[0..-2].join('.')
373
+
374
+ parent_tag = @tag_hash[parent_level]
375
+ if parent_tag.nil?
376
+ parent_tag = create_tag(parent_level)
377
+ end
378
+
379
+ description = tag_description
380
+ child_tags = []
381
+ terms = []
382
+ tag = TagStruct.new(level_hierarchy, name, description, parent_tag, child_tags, terms)
383
+
384
+ parent_tag.child_tags << tag
385
+
386
+ @tag_hash[level_hierarchy] = tag
387
+
388
+ tag
389
+ end
390
+
391
+ def sort_tag(tag)
392
+ # tag.terms = tag.terms.sort {|x, y| x.level_hierarchy <=> y.level_hierarchy}
393
+ tag.child_tags = tag.child_tags.sort { |x, y| x.level_hierarchy <=> y.level_hierarchy }
394
+ tag.child_tags.each { |child_tag| sort_tag(child_tag) }
395
+
396
+ # tag.terms = tag.terms.sort {|x, y| x.name <=> y.name}
397
+ # tag.child_tags = tag.child_tags.sort {|x, y| x.name <=> y.name}
398
+ # tag.child_tags.each {|child_tag| sort_tag(child_tag) }
399
+ end
400
+
401
+ def check_tag(tag)
402
+ if tag.description.nil? or tag.description.empty?
403
+ puts "[check_tag] tag '#{tag.level_hierarchy}' has no description"
404
+ end
405
+
406
+ tag.terms.each { |term| check_term(term) }
407
+ tag.child_tags.each { |child_tag| check_tag(child_tag) }
408
+ end
409
+
410
+ def validate_term(term)
411
+ valid = true
412
+
413
+ parts = term.level_hierarchy.split('.')
414
+
415
+ if parts.empty?
416
+ puts "Hierarchy parts empty, #{term.level_hierarchy}"
417
+ valid = false
418
+ end
419
+
420
+ if parts.size >= 1 && !term.first_level == parts[0]
421
+ puts "First level '#{term.first_level}' does not match level hierarchy '#{term.level_hierarchy}', skipping term"
422
+ valid = false
423
+ end
424
+
425
+ if parts.size >= 2 && !term.second_level == parts[1]
426
+ puts "Second level '#{term.second_level}' does not match level hierarchy '#{term.level_hierarchy}', skipping term"
427
+ valid = false
428
+ end
429
+
430
+ if parts.size >= 3 && !term.third_level == parts[2]
431
+ puts "Third level '#{term.third_level}' does not match level hierarchy '#{term.level_hierarchy}', skipping term"
432
+ valid = false
433
+ end
434
+
435
+ if parts.size > 3
436
+ puts "Hierarchy cannot have more than 3 parts '#{term.level_hierarchy}', skipping term"
437
+ valid = false
438
+ end
439
+
440
+ unless term.data_type.nil?
441
+ valid_types = %w(double integer enum file string autocomplete)
442
+ if (term.data_type.downcase != term.data_type) || !valid_types.include?(term.data_type)
443
+ puts "[ERROR] Term '#{term.name}' does not have a valid data type with '#{term.data_type}'"
444
+ end
445
+
446
+ if term.data_type.downcase == 'enum'
447
+ if term.enums.nil? || term.enums == '' || term.enums.downcase == 'no enum found'
448
+ puts "[ERROR] Term '#{term.name}' does not have valid enumerations"
449
+ end
450
+ end
451
+ end
452
+
453
+ valid
454
+ end
455
+
456
+ def check_term(term)
457
+ if term.description.nil? or term.description.empty?
458
+ # puts "[check_term] term '#{term.level_hierarchy}.#{term.name}' has no description"
459
+ end
460
+ end
461
+
462
+ # write term to xml
463
+ def write_terms_to_xml(tag, xml, output_type)
464
+ terms = get_terms(tag)
465
+ if terms.size > 0
466
+ terms.each do |term|
467
+ xml.term {
468
+ xml.name term.name
469
+ xml.abbr term.abbr unless term.abbr.nil?
470
+ xml.description term.description unless term.description.nil?
471
+ xml.data_type term.data_type unless term.data_type.nil?
472
+ xml.allow_multiple term.allow_multiple unless term.allow_multiple.nil?
473
+
474
+ if !term.enums.nil? && term.enums != ''
475
+ xml.enumerations {
476
+ out = term.enums.split('|')
477
+ out.sort! if @sort_alphabetical
478
+ out.each do |enum|
479
+ xml.enumeration enum
480
+ end
481
+ }
482
+ end
483
+ xml.ip_written term.ip_written unless term.ip_written.nil?
484
+ xml.ip_symbol term.ip_symbol unless term.ip_symbol.nil?
485
+ xml.ip_mask term.ip_mask unless term.ip_mask.nil?
486
+ xml.si_written term.si_written unless term.si_written.nil?
487
+ xml.si_symbol term.si_symbol unless term.si_symbol.nil?
488
+ xml.si_mask term.si_mask unless term.si_mask.nil?
489
+ xml.row term.row unless term.row.nil?
490
+ xml.unit_conversion term.unit_conversion unless term.unit_conversion.nil?
491
+ xml.default_val term.default_val unless term.default_val.nil?
492
+ xml.min_val term.min_val unless term.min_val.nil?
493
+ xml.max_val term.max_val unless term.max_val.nil?
494
+
495
+ if output_type == 'tpex'
496
+ xml.tp_include term.tp_include unless term.tp_include.nil?
497
+ xml.tp_required term.tp_required unless term.tp_required.nil?
498
+ xml.tp_use_in_search term.tp_use_in_search unless term.tp_use_in_search.nil?
499
+ xml.tp_use_in_facets term.tp_use_in_facets unless term.tp_use_in_facets.nil?
500
+ xml.tp_show_data_to_data_users term.tp_show_data_to_data_users unless term.tp_show_data_to_data_users.nil?
501
+ xml.tp_third_party_testing term.tp_third_party_testing unless term.tp_third_party_testing.nil?
502
+ xml.tp_additional_web_dev_info term.tp_additional_web_dev_info unless term.tp_additional_web_dev_info.nil?
503
+ xml.tp_additional_data_user_info term.tp_additional_data_user_info unless term.tp_additional_data_user_info.nil?
504
+ xml.tp_additional_data_submitter_info term.tp_additional_data_submitter_info unless term.tp_additional_data_submitter_info.nil?
505
+ end
506
+ }
507
+ end
508
+ end
509
+ end
510
+
511
+ # write a tag to xml
512
+ def write_tag_to_xml(tag, level, xml, output_type)
513
+ level_string = "level_#{level}"
514
+ xml.tag!(level_string) {
515
+ s_temp = tag.name
516
+ xml.name s_temp
517
+ xml.description tag.description
518
+
519
+ level += 1
520
+
521
+ if tag.child_tags.size == 0
522
+ write_terms_to_xml(tag, xml, output_type)
523
+ end
524
+
525
+ child_tags = tag.child_tags
526
+ child_tags.each do |child_tag|
527
+ write_tag_to_xml(child_tag, level, xml, output_type)
528
+ end
529
+
530
+ }
531
+ end
532
+ end
533
+ end # module BCL