bcl 0.5.3 → 0.5.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,552 +1,533 @@
1
- ######################################################################
2
- # Copyright (c) 2008-2013, Alliance for Sustainable Energy.
3
- # All rights reserved.
4
- #
5
- # This library is free software; you can redistribute it and/or
6
- # modify it under the terms of the GNU Lesser General Public
7
- # License as published by the Free Software Foundation; either
8
- # version 2.1 of the License, or (at your option) any later version.
9
- #
10
- # This library is distributed in the hope that it will be useful,
11
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13
- # Lesser General Public License for more details.
14
- #
15
- # You should have received a copy of the GNU Lesser General Public
16
- # License along with this library; if not, write to the Free Software
17
- # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
- ######################################################################
19
-
20
- $have_win32ole = false
21
-
22
- if RUBY_PLATFORM =~ /mswin|mingw|cygwin/
23
- begin
24
- # apparently this is not a gem
25
- require 'win32ole'
26
- mod = WIN32OLE
27
- $have_win32ole = true
28
- rescue NameError
29
- # do not have win32ole
30
- end
31
- end
32
-
33
- module BCL
34
-
35
- # each TagStruct represents a node in the taxonomy tree
36
- TagStruct = Struct.new(:level_hierarchy, :name, :description, :parent_tag, :child_tags, :terms)
37
-
38
- # each TermStruct represents a row in the master taxonomy
39
- TermStruct = Struct.new(:first_level, :second_level, :third_level, :level_hierarchy, :name, :description,
40
- :abbr, :data_type, :enums, :ip_written, :ip_symbol, :ip_mask, :si_written, :si_symbol, :si_mask,
41
- :unit_conversion, :default_val, :min_val, :max_val, :allow_multiple, :row, :tp_include,
42
- :tp_required, :tp_use_in_search, :tp_use_in_facets, :tp_show_data_to_data_users, :tp_third_party_testing,
43
- :tp_additional_web_dev_info, :tp_additional_data_user_info, :tp_additional_data_submitter_info)
44
-
45
-
46
- # class for parsing, validating, and querying the master taxonomy document
47
- class MasterTaxonomy
48
-
49
- # parse the master taxonomy document
50
- def initialize(xlsx_path = nil, sort_alpha = false)
51
- @sort_alphabetical = sort_alpha
52
-
53
- # hash of level_taxonomy to tag
54
- @tag_hash = Hash.new
55
-
56
- if xlsx_path.nil?
57
- # load from the current taxonomy
58
- path = current_taxonomy_path
59
- puts "Loading current taxonomy from #{path}"
60
- File.open(path, 'r') do |file|
61
- @tag_hash = Marshal.load(file)
62
- end
63
- else
64
- xlsx_path = Pathname.new(xlsx_path).realpath.to_s
65
- puts "Loading taxonomy file #{xlsx_path}"
66
-
67
- # WINDOWS ONLY SECTION BECAUSE THIS USES WIN32OLE
68
- if $have_win32ole
69
- begin
70
- excel = WIN32OLE::new('Excel.Application')
71
- xlsx = excel.Workbooks.Open(xlsx_path)
72
- terms_worksheet = xlsx.Worksheets("Terms")
73
- parse_terms(terms_worksheet)
74
- ensure
75
- # not really saving just pretending so don't get prompted on quit
76
- xlsx.saved = true
77
- excel.Quit
78
- WIN32OLE.ole_free(excel)
79
- excel.ole_free
80
- xlsx=nil
81
- excel=nil
82
- GC.start
83
- end
84
- else # if $have_win32ole
85
- puts "MasterTaxonomy class requires 'win32ole' to parse master taxonomy document."
86
- puts "MasterTaxonomy may also be stored and loaded from JSON if your platform does not support win32ole."
87
- end # if $have_win32ole
88
- end
89
- end
90
-
91
- # save the current taxonomy
92
- def save_as_current_taxonomy(path = nil)
93
- if not path
94
- path = current_taxonomy_path
95
- end
96
- puts "Saving current taxonomy to #{path}"
97
- # this is really not JSON... it is a persisted format of ruby
98
- File.open(path, 'w') do |file|
99
- Marshal.dump(@tag_hash, file)
100
- end
101
- end
102
-
103
- # write taxonomy to xml
104
- def write_xml(path, output_type = 'tpex')
105
-
106
- root_tag = @tag_hash[""]
107
-
108
- if root_tag.nil?
109
- puts "Cannot find root tag"
110
- return false
111
- end
112
-
113
- File.open(path, 'w') do |file|
114
- xml = Builder::XmlMarkup.new(:target => file, :indent=>2)
115
-
116
- #setup the xml file
117
- xml.instruct!(:xml, :version=>"1.0", :encoding=>"UTF-8")
118
- xml.schema("xmlns:xsi"=>"http://www.w3.org/2001/XMLSchema-instance") {
119
- write_tag_to_xml(root_tag, 0, xml, output_type)
120
- }
121
- end
122
-
123
- end
124
-
125
- # get all terms for a given tag
126
- # this includes terms that are inherited from parent levels
127
- # e.g. master_taxonomy.get_terms("Space Use.Lighting.Lamp Ballast")
128
- def get_terms(tag)
129
-
130
- terms = tag.terms
131
-
132
- parent_tag = tag.parent_tag
133
- while not parent_tag.nil?
134
- terms.concat(parent_tag.terms)
135
- parent_tag = parent_tag.parent_tag
136
- end
137
-
138
-
139
- #sort the terms as they come out
140
- result = terms.uniq
141
- if !@sort_alphabetical
142
- result = result.sort {|x, y| x.row <=> y.row}
143
- else
144
- result = result.sort {|x, y| x.name <=> y.name}
145
- end
146
-
147
- return result
148
- end
149
-
150
- # check that the given component is conforms with the master taxonomy
151
- def check_component(component)
152
- valid = true
153
- tag = nil
154
-
155
- # see if we can find the component's tag in the taxonomy
156
- tags = component.tags
157
- if tags.empty?
158
- puts "[Check Component ERROR] Component does not have any tags"
159
- valid = false
160
- elsif tags.size > 1
161
- puts "[Check Component ERROR] Component has multiple tags"
162
- valid = false
163
- else
164
- tag = @tag_hash[tags[0].descriptor]
165
- if not tag
166
- puts "[Check Component ERROR] Cannot find #{tags[0].descriptor} in the master taxonomy"
167
- valid = false
168
- end
169
- end
170
-
171
- if not tag
172
- return false
173
- end
174
-
175
- terms = get_terms(tag)
176
-
177
- # todo: check for all required attributes
178
- terms.each do |term|
179
- #if term.required
180
- # make sure we find attribute
181
- #end
182
- end
183
-
184
- # check that all attributes are allowed
185
- component.attributes.each do |attribute|
186
-
187
- term = nil
188
- terms.each do |t|
189
- if t.name == attribute.name
190
- term = t
191
- break
192
- end
193
- end
194
-
195
- if not term
196
- puts "[Check Component ERROR] Cannot find term for #{attribute.name} in #{tag.level_hierarchy}"
197
- valid = false
198
- next
199
- end
200
-
201
- # todo: validate value, datatype, units
202
-
203
- end
204
-
205
- return valid
206
- end
207
-
208
- private
209
-
210
- def current_taxonomy_path
211
- return File.dirname(__FILE__) + "/current_taxonomy.json"
212
- end
213
-
214
- def parse_terms(terms_worksheet)
215
-
216
- # check header
217
- header_error = validate_terms_header(terms_worksheet)
218
- if header_error
219
- raise "Header Error on Terms Worksheet"
220
- end
221
-
222
- # add root tag
223
- root_terms = []
224
- root_terms << TermStruct.new("", "", "", "", "OpenStudio Type", "Type of OpenStudio Object")
225
- root_terms[0].row = 0
226
- #root_terms << TermStruct.new()
227
- root_tag = TagStruct.new("", "root", "Root of the taxonomy", nil, [], root_terms)
228
- @tag_hash[""] = root_tag
229
-
230
- ### puts "**** tag hash: #{@tag_hash}"
231
-
232
- # find number of rows by parsing until hit empty value in first column
233
- row_num = 3
234
- while true do
235
- term = parse_term(terms_worksheet, row_num)
236
- if term.nil?
237
- break
238
- end
239
-
240
- add_term(term)
241
-
242
- row_num += 1
243
- end
244
-
245
- # sort the tag tree
246
- sort_tag(root_tag)
247
-
248
- # check the tag tree
249
- check_tag(root_tag)
250
-
251
- end
252
-
253
-
254
- def validate_terms_header(terms_worksheet)
255
- test_arr = []
256
- test_arr << {"name"=>"First Level", "strict"=>true}
257
- test_arr << {"name"=>"Second Level", "strict"=>true}
258
- test_arr << {"name"=>"Third Level", "strict"=>true}
259
- test_arr << {"name"=>"Level Hierarchy", "strict"=>true}
260
- test_arr << {"name"=>"Term", "strict"=>true}
261
- test_arr << {"name"=>"Abbr", "strict"=>true}
262
- test_arr << {"name"=>"Description", "strict"=>true}
263
- test_arr << {"name"=>"Data Type", "strict"=>true}
264
- test_arr << {"name"=>"Allow Multiple", "strict"=>true}
265
- test_arr << {"name"=>"Enumerations", "strict"=>true}
266
- test_arr << {"name"=>"IP Units Written Out", "strict"=>true}
267
- test_arr << {"name"=>"IP Units Symbol", "strict"=>true}
268
- test_arr << {"name"=>"IP Display Mask", "strict"=>true}
269
- test_arr << {"name"=>"SI Units Written Out", "strict"=>true}
270
- test_arr << {"name"=>"SI Units Symbol", "strict"=>true}
271
- test_arr << {"name"=>"SI Display Mask", "strict"=>true}
272
- test_arr << {"name"=>"Unit Conversion", "strict"=>true}
273
- test_arr << {"name"=>"Default", "strict"=>true}
274
- test_arr << {"name"=>"Min", "strict"=>true}
275
- test_arr << {"name"=>"Max", "strict"=>true}
276
- test_arr << {"name"=>"Source", "strict"=>true}
277
- test_arr << {"name"=>"Review State", "strict"=>true}
278
- test_arr << {"name"=>"General Comments", "strict"=>true}
279
- test_arr << {"name"=>"Requested By / Project", "strict"=>true}
280
- test_arr << {"name"=>"Include in TPE", "strict"=>false}
281
- test_arr << {"name"=>"Required for Adding a New Product", "strict"=>false}
282
- test_arr << {"name"=>"Use as a Column Header in Search Results", "strict"=>false}
283
- test_arr << {"name"=>"Allow Users to Filter with this Facet", "strict"=>false}
284
- test_arr << {"name"=>"Show Data to Data Users", "strict"=>false}
285
- test_arr << {"name"=>"Additional Instructions for Web Developers", "strict"=>false}
286
- test_arr << {"name"=>"Related Third Party Testing Standards", "strict"=>false}
287
- test_arr << {"name"=>"Additional Guidance to Data Submitters", "strict"=>false}
288
- test_arr << {"name"=>"Additional Guidance to Data Users", "strict"=>false}
289
-
290
-
291
- parse = true
292
- col = 1
293
- while parse
294
- if terms_worksheet.Columns(col).Rows(2).Value.nil? || col > test_arr.size
295
- parse = false
296
- else
297
- if not terms_worksheet.Columns(col).Rows(2).Value == test_arr[col-1]["name"]
298
- if test_arr[col-1]["strict"]
299
- raise "[ERROR] Header does not match: #{col}: '#{terms_worksheet.Columns(col).Rows(2).Value} <> #{test_arr[col-1]["name"]}'"
300
- else
301
- puts "[WARNING] Header does not match: #{col}: '#{terms_worksheet.Columns(col).Rows(2).Value} <> #{test_arr[col-1]["name"]}'"
302
- end
303
- end
304
- end
305
- col += 1
306
- end
307
- end
308
-
309
- def parse_term(terms_worksheet, row)
310
-
311
- term = TermStruct.new
312
- term.row = row
313
- term.first_level = terms_worksheet.Columns(1).Rows(row).Value
314
- term.second_level = terms_worksheet.Columns(2).Rows(row).Value
315
- term.third_level = terms_worksheet.Columns(3).Rows(row).Value
316
- term.level_hierarchy = terms_worksheet.Columns(4).Rows(row).Value
317
- term.name = terms_worksheet.Columns(5).Rows(row).Value
318
- term.abbr = terms_worksheet.Columns(6).Rows(row).Value
319
- term.description = terms_worksheet.Columns(7).Rows(row).Value
320
- term.data_type = terms_worksheet.Columns(8).Rows(row).Value
321
- term.allow_multiple = terms_worksheet.Columns(9).Rows(row).Value
322
- term.enums = terms_worksheet.Columns(10).Rows(row).Value
323
- term.ip_written = terms_worksheet.Columns(11).Rows(row).Value
324
- term.ip_symbol = terms_worksheet.Columns(12).Rows(row).Value
325
- term.ip_mask = terms_worksheet.Columns(13).Rows(row).Value
326
- term.si_written = terms_worksheet.Columns(14).Rows(row).Value
327
- term.si_symbol = terms_worksheet.Columns(15).Rows(row).Value
328
- term.si_mask = terms_worksheet.Columns(16).Rows(row).Value
329
- term.unit_conversion = terms_worksheet.Columns(17).Rows(row).Value
330
- term.default_val = terms_worksheet.Columns(18).Rows(row).Value
331
- term.min_val = terms_worksheet.Columns(19).Rows(row).Value
332
- term.max_val = terms_worksheet.Columns(20).Rows(row).Value
333
-
334
- #custom TPex Columns
335
- term.tp_include = terms_worksheet.Columns(25).Rows(row).Value
336
- term.tp_required = terms_worksheet.Columns(26).Rows(row).Value
337
- term.tp_use_in_search = terms_worksheet.Columns(27).Rows(row).Value
338
- term.tp_use_in_facets = terms_worksheet.Columns(28).Rows(row).Value
339
- term.tp_show_data_to_data_users = terms_worksheet.Columns(29).Rows(row).Value
340
- term.tp_additional_web_dev_info = terms_worksheet.Columns(30).Rows(row).Value
341
- term.tp_third_party_testing = terms_worksheet.Columns(31).Rows(row).Value
342
- term.tp_additional_data_submitter_info = terms_worksheet.Columns(32).Rows(row).Value
343
- term.tp_additional_data_user_info = terms_worksheet.Columns(33).Rows(row).Value
344
-
345
- # trigger to quit parsing the xcel doc
346
- if term.first_level.nil? or term.first_level.empty?
347
- return nil
348
- end
349
-
350
- return term
351
- end
352
-
353
- def add_term(term)
354
-
355
- level_hierarchy = term.level_hierarchy
356
-
357
- # create the tag
358
- tag = @tag_hash[level_hierarchy]
359
-
360
- if tag.nil?
361
- tag = create_tag(level_hierarchy, term.description)
362
- end
363
-
364
- if term.name.nil? or term.name.strip.empty?
365
- # this row is really about the tag
366
- tag.description = term.description
367
-
368
- else
369
- # this row is about a term
370
- if not validate_term(term)
371
- return nil
372
- end
373
-
374
- tag.terms = [] if tag.terms.nil?
375
- tag.terms << term
376
- end
377
- end
378
-
379
- def create_tag(level_hierarchy, tag_description="")
380
-
381
- #puts "create_tag called for #{level_hierarchy}"
382
-
383
- parts = level_hierarchy.split('.')
384
-
385
- name = parts[-1]
386
- parent_level = parts[0..-2].join('.')
387
-
388
- parent_tag = @tag_hash[parent_level]
389
- if parent_tag.nil?
390
- parent_tag = create_tag(parent_level)
391
- end
392
-
393
- description = tag_description
394
- child_tags = []
395
- terms = []
396
- tag = TagStruct.new(level_hierarchy, name, description, parent_tag, child_tags, terms)
397
-
398
- parent_tag.child_tags << tag
399
-
400
- @tag_hash[level_hierarchy] = tag
401
-
402
- return tag
403
- end
404
-
405
- def sort_tag(tag)
406
- #tag.terms = tag.terms.sort {|x, y| x.level_hierarchy <=> y.level_hierarchy}
407
- tag.child_tags = tag.child_tags.sort {|x, y| x.level_hierarchy <=> y.level_hierarchy}
408
- tag.child_tags.each {|child_tag| sort_tag(child_tag) }
409
-
410
- #tag.terms = tag.terms.sort {|x, y| x.name <=> y.name}
411
- #tag.child_tags = tag.child_tags.sort {|x, y| x.name <=> y.name}
412
- #tag.child_tags.each {|child_tag| sort_tag(child_tag) }
413
- end
414
-
415
- def check_tag(tag)
416
-
417
- if tag.description.nil? or tag.description.empty?
418
- puts "[check_tag] tag '#{tag.level_hierarchy}' has no description"
419
- end
420
-
421
- tag.terms.each {|term| check_term(term) }
422
- tag.child_tags.each {|child_tag| check_tag(child_tag) }
423
- end
424
-
425
- def validate_term(term)
426
- valid = true
427
-
428
- parts = term.level_hierarchy.split('.')
429
-
430
- if parts.empty?
431
- puts "Hierarchy parts empty, #{term.level_hierarchy}"
432
- valid = false
433
- end
434
-
435
- if parts.size >= 1 and not term.first_level == parts[0]
436
- puts "First level '#{term.first_level}' does not match level hierarchy '#{term.level_hierarchy}', skipping term"
437
- valid = false
438
- end
439
-
440
- if parts.size >= 2 and not term.second_level == parts[1]
441
- puts "Second level '#{term.second_level}' does not match level hierarchy '#{term.level_hierarchy}', skipping term"
442
- valid = false
443
- end
444
-
445
- if parts.size >= 3 and not term.third_level == parts[2]
446
- puts "Third level '#{term.third_level}' does not match level hierarchy '#{term.level_hierarchy}', skipping term"
447
- valid = false
448
- end
449
-
450
- if parts.size > 3
451
- puts "Hierarchy cannot have more than 3 parts '#{term.level_hierarchy}', skipping term"
452
- valid = false
453
- end
454
-
455
- if !term.data_type.nil?
456
- valid_types = ["double", "integer", "enum", "file", "string", "autocomplete"]
457
- if (term.data_type.downcase != term.data_type) || !valid_types.include?(term.data_type)
458
- puts "[ERROR] Term '#{term.name}' does not have a valid data type with '#{term.data_type}'"
459
- end
460
-
461
- if term.data_type.downcase == "enum"
462
- if term.enums.nil? || term.enums == "" || term.enums.downcase == "no enum found"
463
- puts "[ERROR] Term '#{term.name}' does not have valid enumerations"
464
- end
465
- end
466
- end
467
-
468
- return valid
469
- end
470
-
471
- def check_term(term)
472
- if term.description.nil? or term.description.empty?
473
- #puts "[check_term] term '#{term.level_hierarchy}.#{term.name}' has no description"
474
- end
475
- end
476
-
477
- # write term to xml
478
- def write_terms_to_xml(tag, xml, output_type)
479
- terms = get_terms(tag)
480
- if terms.size > 0
481
- terms.each do |term|
482
- xml.term {
483
- xml.name term.name
484
- xml.abbr term.abbr if !term.abbr.nil?
485
- xml.description term.description if !term.description.nil?
486
- xml.data_type term.data_type if !term.data_type.nil?
487
- xml.allow_multiple term.allow_multiple if !term.allow_multiple.nil?
488
-
489
- if !term.enums.nil? && term.enums != ""
490
- xml.enumerations {
491
- out = term.enums.split("|")
492
- out.sort! if @sort_alphabetical
493
- out.each do |enum|
494
- xml.enumeration enum
495
- end
496
- }
497
- end
498
- xml.ip_written term.ip_written if !term.ip_written.nil?
499
- xml.ip_symbol term.ip_symbol if !term.ip_symbol.nil?
500
- xml.ip_mask term.ip_mask if !term.ip_mask.nil?
501
- xml.si_written term.si_written if !term.si_written.nil?
502
- xml.si_symbol term.si_symbol if !term.si_symbol.nil?
503
- xml.si_mask term.si_mask if !term.si_mask.nil?
504
- xml.row term.row if !term.row.nil?
505
- xml.unit_conversion term.unit_conversion if !term.unit_conversion.nil?
506
- xml.default_val term.default_val if !term.default_val.nil?
507
- xml.min_val term.min_val if !term.min_val.nil?
508
- xml.max_val term.max_val if !term.max_val.nil?
509
-
510
- if output_type == 'tpex'
511
- xml.tp_include term.tp_include if !term.tp_include.nil?
512
- xml.tp_required term.tp_required if !term.tp_required.nil?
513
- xml.tp_use_in_search term.tp_use_in_search if !term.tp_use_in_search.nil?
514
- xml.tp_use_in_facets term.tp_use_in_facets if !term.tp_use_in_facets.nil?
515
- xml.tp_show_data_to_data_users term.tp_show_data_to_data_users if !term.tp_show_data_to_data_users.nil?
516
- xml.tp_third_party_testing term.tp_third_party_testing if !term.tp_third_party_testing.nil?
517
- xml.tp_additional_web_dev_info term.tp_additional_web_dev_info if !term.tp_additional_web_dev_info.nil?
518
- xml.tp_additional_data_user_info term.tp_additional_data_user_info if !term.tp_additional_data_user_info.nil?
519
- xml.tp_additional_data_submitter_info term.tp_additional_data_submitter_info if !term.tp_additional_data_submitter_info.nil?
520
- end
521
- }
522
- end
523
- end
524
- end
525
-
526
- # write a tag to xml
527
- def write_tag_to_xml(tag, level, xml, output_type)
528
- level_string = "level_#{level}"
529
- xml.tag!(level_string) {
530
- s_temp = tag.name
531
- xml.name s_temp
532
- xml.description tag.description
533
-
534
- level += 1
535
-
536
- if tag.child_tags.size == 0
537
- write_terms_to_xml(tag, xml, output_type)
538
- end
539
-
540
- child_tags = tag.child_tags
541
- child_tags.each do |child_tag|
542
- write_tag_to_xml(child_tag, level, xml, output_type)
543
- end
544
-
545
- }
546
- end
547
-
548
- end
549
-
550
- end # module BCL
551
-
552
-
1
+ ######################################################################
2
+ # Copyright (c) 2008-2014, Alliance for Sustainable Energy.
3
+ # All rights reserved.
4
+ #
5
+ # This library is free software; you can redistribute it and/or
6
+ # modify it under the terms of the GNU Lesser General Public
7
+ # License as published by the Free Software Foundation; either
8
+ # version 2.1 of the License, or (at your option) any later version.
9
+ #
10
+ # This library is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13
+ # Lesser General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU Lesser General Public
16
+ # License along with this library; if not, write to the Free Software
17
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
+ ######################################################################
19
+
20
+ $have_win32ole = false
21
+
22
+ if RUBY_PLATFORM =~ /mswin|mingw|cygwin/
23
+ begin
24
+ # apparently this is not a gem
25
+ require 'win32ole'
26
+ mod = WIN32OLE
27
+ $have_win32ole = true
28
+ rescue NameError
29
+ # do not have win32ole
30
+ end
31
+ end
32
+
33
+ module BCL
34
+ # each TagStruct represents a node in the taxonomy tree
35
+ TagStruct = Struct.new(:level_hierarchy, :name, :description, :parent_tag, :child_tags, :terms)
36
+
37
+ # each TermStruct represents a row in the master taxonomy
38
+ TermStruct = Struct.new(:first_level, :second_level, :third_level, :level_hierarchy, :name, :description,
39
+ :abbr, :data_type, :enums, :ip_written, :ip_symbol, :ip_mask, :si_written, :si_symbol, :si_mask,
40
+ :unit_conversion, :default_val, :min_val, :max_val, :allow_multiple, :row, :tp_include,
41
+ :tp_required, :tp_use_in_search, :tp_use_in_facets, :tp_show_data_to_data_users, :tp_third_party_testing,
42
+ :tp_additional_web_dev_info, :tp_additional_data_user_info, :tp_additional_data_submitter_info)
43
+
44
+ # class for parsing, validating, and querying the master taxonomy document
45
+ class MasterTaxonomy
46
+ # parse the master taxonomy document
47
+ def initialize(xlsx_path = nil, sort_alpha = false)
48
+ @sort_alphabetical = sort_alpha
49
+
50
+ # hash of level_taxonomy to tag
51
+ @tag_hash = {}
52
+
53
+ if xlsx_path.nil?
54
+ # load from the current taxonomy
55
+ path = current_taxonomy_path
56
+ puts "Loading current taxonomy from #{path}"
57
+ File.open(path, 'r') do |file|
58
+ @tag_hash = Marshal.load(file)
59
+ end
60
+ else
61
+ xlsx_path = Pathname.new(xlsx_path).realpath.to_s
62
+ puts "Loading taxonomy file #{xlsx_path}"
63
+
64
+ # WINDOWS ONLY SECTION BECAUSE THIS USES WIN32OLE
65
+ if $have_win32ole
66
+ begin
67
+ excel = WIN32OLE.new('Excel.Application')
68
+ xlsx = excel.Workbooks.Open(xlsx_path)
69
+ terms_worksheet = xlsx.Worksheets('Terms')
70
+ parse_terms(terms_worksheet)
71
+ ensure
72
+ # not really saving just pretending so don't get prompted on quit
73
+ xlsx.saved = true
74
+ excel.Quit
75
+ WIN32OLE.ole_free(excel)
76
+ excel.ole_free
77
+ xlsx = nil
78
+ excel = nil
79
+ GC.start
80
+ end
81
+ else # if $have_win32ole
82
+ puts "MasterTaxonomy class requires 'win32ole' to parse master taxonomy document."
83
+ puts 'MasterTaxonomy may also be stored and loaded from JSON if your platform does not support win32ole.'
84
+ end # if $have_win32ole
85
+ end
86
+ end
87
+
88
+ # save the current taxonomy
89
+ def save_as_current_taxonomy(path = nil)
90
+ unless path
91
+ path = current_taxonomy_path
92
+ end
93
+ puts "Saving current taxonomy to #{path}"
94
+ # this is really not JSON... it is a persisted format of ruby
95
+ File.open(path, 'w') do |file|
96
+ Marshal.dump(@tag_hash, file)
97
+ end
98
+ end
99
+
100
+ # write taxonomy to xml
101
+ def write_xml(path, output_type = 'tpex')
102
+ root_tag = @tag_hash['']
103
+
104
+ if root_tag.nil?
105
+ puts 'Cannot find root tag'
106
+ return false
107
+ end
108
+
109
+ File.open(path, 'w') do |file|
110
+ xml = Builder::XmlMarkup.new(target: file, indent: 2)
111
+
112
+ # setup the xml file
113
+ xml.instruct!(:xml, version: '1.0', encoding: 'UTF-8')
114
+ xml.schema('xmlns:xsi' => 'http://www.w3.org/2001/XMLSchema-instance') {
115
+ write_tag_to_xml(root_tag, 0, xml, output_type)
116
+ }
117
+ end
118
+ end
119
+
120
+ # get all terms for a given tag
121
+ # this includes terms that are inherited from parent levels
122
+ # e.g. master_taxonomy.get_terms("Space Use.Lighting.Lamp Ballast")
123
+ def get_terms(tag)
124
+ terms = tag.terms
125
+
126
+ parent_tag = tag.parent_tag
127
+ until parent_tag.nil?
128
+ terms.concat(parent_tag.terms)
129
+ parent_tag = parent_tag.parent_tag
130
+ end
131
+
132
+ # sort the terms as they come out
133
+ result = terms.uniq
134
+ if !@sort_alphabetical
135
+ result = result.sort { |x, y| x.row <=> y.row }
136
+ else
137
+ result = result.sort { |x, y| x.name <=> y.name }
138
+ end
139
+
140
+ result
141
+ end
142
+
143
+ # check that the given component is conforms with the master taxonomy
144
+ def check_component(component)
145
+ valid = true
146
+ tag = nil
147
+
148
+ # see if we can find the component's tag in the taxonomy
149
+ tags = component.tags
150
+ if tags.empty?
151
+ puts '[Check Component ERROR] Component does not have any tags'
152
+ valid = false
153
+ elsif tags.size > 1
154
+ puts '[Check Component ERROR] Component has multiple tags'
155
+ valid = false
156
+ else
157
+ tag = @tag_hash[tags[0].descriptor]
158
+ unless tag
159
+ puts "[Check Component ERROR] Cannot find #{tags[0].descriptor} in the master taxonomy"
160
+ valid = false
161
+ end
162
+ end
163
+
164
+ unless tag
165
+ return false
166
+ end
167
+
168
+ terms = get_terms(tag)
169
+
170
+ # todo: check for all required attributes
171
+ terms.each do |_term|
172
+ # if term.required
173
+ # make sure we find attribute
174
+ # end
175
+ end
176
+
177
+ # check that all attributes are allowed
178
+ component.attributes.each do |attribute|
179
+
180
+ term = nil
181
+ terms.each do |t|
182
+ if t.name == attribute.name
183
+ term = t
184
+ break
185
+ end
186
+ end
187
+
188
+ unless term
189
+ puts "[Check Component ERROR] Cannot find term for #{attribute.name} in #{tag.level_hierarchy}"
190
+ valid = false
191
+ next
192
+ end
193
+
194
+ # todo: validate value, datatype, units
195
+
196
+ end
197
+
198
+ valid
199
+ end
200
+
201
+ private
202
+
203
+ def current_taxonomy_path
204
+ File.dirname(__FILE__) + '/current_taxonomy.json'
205
+ end
206
+
207
+ def parse_terms(terms_worksheet)
208
+ # check header
209
+ header_error = validate_terms_header(terms_worksheet)
210
+ if header_error
211
+ fail 'Header Error on Terms Worksheet'
212
+ end
213
+
214
+ # add root tag
215
+ root_terms = []
216
+ root_terms << TermStruct.new('', '', '', '', 'OpenStudio Type', 'Type of OpenStudio Object')
217
+ root_terms[0].row = 0
218
+ # root_terms << TermStruct.new()
219
+ root_tag = TagStruct.new('', 'root', 'Root of the taxonomy', nil, [], root_terms)
220
+ @tag_hash[''] = root_tag
221
+
222
+ ### puts "**** tag hash: #{@tag_hash}"
223
+
224
+ # find number of rows by parsing until hit empty value in first column
225
+ row_num = 3
226
+ while true
227
+ term = parse_term(terms_worksheet, row_num)
228
+ if term.nil?
229
+ break
230
+ end
231
+
232
+ add_term(term)
233
+
234
+ row_num += 1
235
+ end
236
+
237
+ # sort the tag tree
238
+ sort_tag(root_tag)
239
+
240
+ # check the tag tree
241
+ check_tag(root_tag)
242
+ end
243
+
244
+ def validate_terms_header(terms_worksheet)
245
+ test_arr = []
246
+ test_arr << { 'name' => 'First Level', 'strict' => true }
247
+ test_arr << { 'name' => 'Second Level', 'strict' => true }
248
+ test_arr << { 'name' => 'Third Level', 'strict' => true }
249
+ test_arr << { 'name' => 'Level Hierarchy', 'strict' => true }
250
+ test_arr << { 'name' => 'Term', 'strict' => true }
251
+ test_arr << { 'name' => 'Abbr', 'strict' => true }
252
+ test_arr << { 'name' => 'Description', 'strict' => true }
253
+ test_arr << { 'name' => 'Data Type', 'strict' => true }
254
+ test_arr << { 'name' => 'Allow Multiple', 'strict' => true }
255
+ test_arr << { 'name' => 'Enumerations', 'strict' => true }
256
+ test_arr << { 'name' => 'IP Units Written Out', 'strict' => true }
257
+ test_arr << { 'name' => 'IP Units Symbol', 'strict' => true }
258
+ test_arr << { 'name' => 'IP Display Mask', 'strict' => true }
259
+ test_arr << { 'name' => 'SI Units Written Out', 'strict' => true }
260
+ test_arr << { 'name' => 'SI Units Symbol', 'strict' => true }
261
+ test_arr << { 'name' => 'SI Display Mask', 'strict' => true }
262
+ test_arr << { 'name' => 'Unit Conversion', 'strict' => true }
263
+ test_arr << { 'name' => 'Default', 'strict' => true }
264
+ test_arr << { 'name' => 'Min', 'strict' => true }
265
+ test_arr << { 'name' => 'Max', 'strict' => true }
266
+ test_arr << { 'name' => 'Source', 'strict' => true }
267
+ test_arr << { 'name' => 'Review State', 'strict' => true }
268
+ test_arr << { 'name' => 'General Comments', 'strict' => true }
269
+ test_arr << { 'name' => 'Requested By / Project', 'strict' => true }
270
+ test_arr << { 'name' => 'Include in TPE', 'strict' => false }
271
+ test_arr << { 'name' => 'Required for Adding a New Product', 'strict' => false }
272
+ test_arr << { 'name' => 'Use as a Column Header in Search Results', 'strict' => false }
273
+ test_arr << { 'name' => 'Allow Users to Filter with this Facet', 'strict' => false }
274
+ test_arr << { 'name' => 'Show Data to Data Users', 'strict' => false }
275
+ test_arr << { 'name' => 'Additional Instructions for Web Developers', 'strict' => false }
276
+ test_arr << { 'name' => 'Related Third Party Testing Standards', 'strict' => false }
277
+ test_arr << { 'name' => 'Additional Guidance to Data Submitters', 'strict' => false }
278
+ test_arr << { 'name' => 'Additional Guidance to Data Users', 'strict' => false }
279
+
280
+ parse = true
281
+ col = 1
282
+ while parse
283
+ if terms_worksheet.Columns(col).Rows(2).Value.nil? || col > test_arr.size
284
+ parse = false
285
+ else
286
+ unless terms_worksheet.Columns(col).Rows(2).Value == test_arr[col - 1]['name']
287
+ if test_arr[col - 1]['strict']
288
+ fail "[ERROR] Header does not match: #{col}: '#{terms_worksheet.Columns(col).Rows(2).Value} <> #{test_arr[col - 1]['name']}'"
289
+ else
290
+ puts "[WARNING] Header does not match: #{col}: '#{terms_worksheet.Columns(col).Rows(2).Value} <> #{test_arr[col - 1]['name']}'"
291
+ end
292
+ end
293
+ end
294
+ col += 1
295
+ end
296
+ end
297
+
298
+ def parse_term(terms_worksheet, row)
299
+ term = TermStruct.new
300
+ term.row = row
301
+ term.first_level = terms_worksheet.Columns(1).Rows(row).Value
302
+ term.second_level = terms_worksheet.Columns(2).Rows(row).Value
303
+ term.third_level = terms_worksheet.Columns(3).Rows(row).Value
304
+ term.level_hierarchy = terms_worksheet.Columns(4).Rows(row).Value
305
+ term.name = terms_worksheet.Columns(5).Rows(row).Value
306
+ term.abbr = terms_worksheet.Columns(6).Rows(row).Value
307
+ term.description = terms_worksheet.Columns(7).Rows(row).Value
308
+ term.data_type = terms_worksheet.Columns(8).Rows(row).Value
309
+ term.allow_multiple = terms_worksheet.Columns(9).Rows(row).Value
310
+ term.enums = terms_worksheet.Columns(10).Rows(row).Value
311
+ term.ip_written = terms_worksheet.Columns(11).Rows(row).Value
312
+ term.ip_symbol = terms_worksheet.Columns(12).Rows(row).Value
313
+ term.ip_mask = terms_worksheet.Columns(13).Rows(row).Value
314
+ term.si_written = terms_worksheet.Columns(14).Rows(row).Value
315
+ term.si_symbol = terms_worksheet.Columns(15).Rows(row).Value
316
+ term.si_mask = terms_worksheet.Columns(16).Rows(row).Value
317
+ term.unit_conversion = terms_worksheet.Columns(17).Rows(row).Value
318
+ term.default_val = terms_worksheet.Columns(18).Rows(row).Value
319
+ term.min_val = terms_worksheet.Columns(19).Rows(row).Value
320
+ term.max_val = terms_worksheet.Columns(20).Rows(row).Value
321
+
322
+ # custom TPex Columns
323
+ term.tp_include = terms_worksheet.Columns(25).Rows(row).Value
324
+ term.tp_required = terms_worksheet.Columns(26).Rows(row).Value
325
+ term.tp_use_in_search = terms_worksheet.Columns(27).Rows(row).Value
326
+ term.tp_use_in_facets = terms_worksheet.Columns(28).Rows(row).Value
327
+ term.tp_show_data_to_data_users = terms_worksheet.Columns(29).Rows(row).Value
328
+ term.tp_additional_web_dev_info = terms_worksheet.Columns(30).Rows(row).Value
329
+ term.tp_third_party_testing = terms_worksheet.Columns(31).Rows(row).Value
330
+ term.tp_additional_data_submitter_info = terms_worksheet.Columns(32).Rows(row).Value
331
+ term.tp_additional_data_user_info = terms_worksheet.Columns(33).Rows(row).Value
332
+
333
+ # trigger to quit parsing the xcel doc
334
+ if term.first_level.nil? or term.first_level.empty?
335
+ return nil
336
+ end
337
+
338
+ term
339
+ end
340
+
341
+ def add_term(term)
342
+ level_hierarchy = term.level_hierarchy
343
+
344
+ # create the tag
345
+ tag = @tag_hash[level_hierarchy]
346
+
347
+ if tag.nil?
348
+ tag = create_tag(level_hierarchy, term.description)
349
+ end
350
+
351
+ if term.name.nil? or term.name.strip.empty?
352
+ # this row is really about the tag
353
+ tag.description = term.description
354
+
355
+ else
356
+ # this row is about a term
357
+ unless validate_term(term)
358
+ return nil
359
+ end
360
+
361
+ tag.terms = [] if tag.terms.nil?
362
+ tag.terms << term
363
+ end
364
+ end
365
+
366
+ def create_tag(level_hierarchy, tag_description = '')
367
+ # puts "create_tag called for #{level_hierarchy}"
368
+
369
+ parts = level_hierarchy.split('.')
370
+
371
+ name = parts[-1]
372
+ parent_level = parts[0..-2].join('.')
373
+
374
+ parent_tag = @tag_hash[parent_level]
375
+ if parent_tag.nil?
376
+ parent_tag = create_tag(parent_level)
377
+ end
378
+
379
+ description = tag_description
380
+ child_tags = []
381
+ terms = []
382
+ tag = TagStruct.new(level_hierarchy, name, description, parent_tag, child_tags, terms)
383
+
384
+ parent_tag.child_tags << tag
385
+
386
+ @tag_hash[level_hierarchy] = tag
387
+
388
+ tag
389
+ end
390
+
391
+ def sort_tag(tag)
392
+ # tag.terms = tag.terms.sort {|x, y| x.level_hierarchy <=> y.level_hierarchy}
393
+ tag.child_tags = tag.child_tags.sort { |x, y| x.level_hierarchy <=> y.level_hierarchy }
394
+ tag.child_tags.each { |child_tag| sort_tag(child_tag) }
395
+
396
+ # tag.terms = tag.terms.sort {|x, y| x.name <=> y.name}
397
+ # tag.child_tags = tag.child_tags.sort {|x, y| x.name <=> y.name}
398
+ # tag.child_tags.each {|child_tag| sort_tag(child_tag) }
399
+ end
400
+
401
+ def check_tag(tag)
402
+ if tag.description.nil? or tag.description.empty?
403
+ puts "[check_tag] tag '#{tag.level_hierarchy}' has no description"
404
+ end
405
+
406
+ tag.terms.each { |term| check_term(term) }
407
+ tag.child_tags.each { |child_tag| check_tag(child_tag) }
408
+ end
409
+
410
+ def validate_term(term)
411
+ valid = true
412
+
413
+ parts = term.level_hierarchy.split('.')
414
+
415
+ if parts.empty?
416
+ puts "Hierarchy parts empty, #{term.level_hierarchy}"
417
+ valid = false
418
+ end
419
+
420
+ if parts.size >= 1 && !term.first_level == parts[0]
421
+ puts "First level '#{term.first_level}' does not match level hierarchy '#{term.level_hierarchy}', skipping term"
422
+ valid = false
423
+ end
424
+
425
+ if parts.size >= 2 && !term.second_level == parts[1]
426
+ puts "Second level '#{term.second_level}' does not match level hierarchy '#{term.level_hierarchy}', skipping term"
427
+ valid = false
428
+ end
429
+
430
+ if parts.size >= 3 && !term.third_level == parts[2]
431
+ puts "Third level '#{term.third_level}' does not match level hierarchy '#{term.level_hierarchy}', skipping term"
432
+ valid = false
433
+ end
434
+
435
+ if parts.size > 3
436
+ puts "Hierarchy cannot have more than 3 parts '#{term.level_hierarchy}', skipping term"
437
+ valid = false
438
+ end
439
+
440
+ unless term.data_type.nil?
441
+ valid_types = %w(double integer enum file string autocomplete)
442
+ if (term.data_type.downcase != term.data_type) || !valid_types.include?(term.data_type)
443
+ puts "[ERROR] Term '#{term.name}' does not have a valid data type with '#{term.data_type}'"
444
+ end
445
+
446
+ if term.data_type.downcase == 'enum'
447
+ if term.enums.nil? || term.enums == '' || term.enums.downcase == 'no enum found'
448
+ puts "[ERROR] Term '#{term.name}' does not have valid enumerations"
449
+ end
450
+ end
451
+ end
452
+
453
+ valid
454
+ end
455
+
456
+ def check_term(term)
457
+ if term.description.nil? or term.description.empty?
458
+ # puts "[check_term] term '#{term.level_hierarchy}.#{term.name}' has no description"
459
+ end
460
+ end
461
+
462
+ # write term to xml
463
+ def write_terms_to_xml(tag, xml, output_type)
464
+ terms = get_terms(tag)
465
+ if terms.size > 0
466
+ terms.each do |term|
467
+ xml.term {
468
+ xml.name term.name
469
+ xml.abbr term.abbr unless term.abbr.nil?
470
+ xml.description term.description unless term.description.nil?
471
+ xml.data_type term.data_type unless term.data_type.nil?
472
+ xml.allow_multiple term.allow_multiple unless term.allow_multiple.nil?
473
+
474
+ if !term.enums.nil? && term.enums != ''
475
+ xml.enumerations {
476
+ out = term.enums.split('|')
477
+ out.sort! if @sort_alphabetical
478
+ out.each do |enum|
479
+ xml.enumeration enum
480
+ end
481
+ }
482
+ end
483
+ xml.ip_written term.ip_written unless term.ip_written.nil?
484
+ xml.ip_symbol term.ip_symbol unless term.ip_symbol.nil?
485
+ xml.ip_mask term.ip_mask unless term.ip_mask.nil?
486
+ xml.si_written term.si_written unless term.si_written.nil?
487
+ xml.si_symbol term.si_symbol unless term.si_symbol.nil?
488
+ xml.si_mask term.si_mask unless term.si_mask.nil?
489
+ xml.row term.row unless term.row.nil?
490
+ xml.unit_conversion term.unit_conversion unless term.unit_conversion.nil?
491
+ xml.default_val term.default_val unless term.default_val.nil?
492
+ xml.min_val term.min_val unless term.min_val.nil?
493
+ xml.max_val term.max_val unless term.max_val.nil?
494
+
495
+ if output_type == 'tpex'
496
+ xml.tp_include term.tp_include unless term.tp_include.nil?
497
+ xml.tp_required term.tp_required unless term.tp_required.nil?
498
+ xml.tp_use_in_search term.tp_use_in_search unless term.tp_use_in_search.nil?
499
+ xml.tp_use_in_facets term.tp_use_in_facets unless term.tp_use_in_facets.nil?
500
+ xml.tp_show_data_to_data_users term.tp_show_data_to_data_users unless term.tp_show_data_to_data_users.nil?
501
+ xml.tp_third_party_testing term.tp_third_party_testing unless term.tp_third_party_testing.nil?
502
+ xml.tp_additional_web_dev_info term.tp_additional_web_dev_info unless term.tp_additional_web_dev_info.nil?
503
+ xml.tp_additional_data_user_info term.tp_additional_data_user_info unless term.tp_additional_data_user_info.nil?
504
+ xml.tp_additional_data_submitter_info term.tp_additional_data_submitter_info unless term.tp_additional_data_submitter_info.nil?
505
+ end
506
+ }
507
+ end
508
+ end
509
+ end
510
+
511
+ # write a tag to xml
512
+ def write_tag_to_xml(tag, level, xml, output_type)
513
+ level_string = "level_#{level}"
514
+ xml.tag!(level_string) {
515
+ s_temp = tag.name
516
+ xml.name s_temp
517
+ xml.description tag.description
518
+
519
+ level += 1
520
+
521
+ if tag.child_tags.size == 0
522
+ write_terms_to_xml(tag, xml, output_type)
523
+ end
524
+
525
+ child_tags = tag.child_tags
526
+ child_tags.each do |child_tag|
527
+ write_tag_to_xml(child_tag, level, xml, output_type)
528
+ end
529
+
530
+ }
531
+ end
532
+ end
533
+ end # module BCL