bcl 0.2.3 → 0.2.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,561 @@
1
+ ######################################################################
2
+ # Copyright (c) 2008-2013, Alliance for Sustainable Energy.
3
+ # All rights reserved.
4
+ #
5
+ # This library is free software; you can redistribute it and/or
6
+ # modify it under the terms of the GNU Lesser General Public
7
+ # License as published by the Free Software Foundation; either
8
+ # version 2.1 of the License, or (at your option) any later version.
9
+ #
10
+ # This library is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13
+ # Lesser General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU Lesser General Public
16
+ # License along with this library; if not, write to the Free Software
17
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
+ ######################################################################
19
+
20
+ require 'rubygems'
21
+ require 'pathname'
22
+ require 'fileutils'
23
+ require 'builder' #gem install builder (creates xml files)
24
+ require 'rbconfig'
25
+
26
+ $have_win32ole = false
27
+
28
+ if RbConfig::CONFIG['host_os'] =~ /mswin|mingw|cygwin/
29
+ begin
30
+ # apparently this is not a gem
31
+ require 'win32ole'
32
+ mod = WIN32OLE
33
+ $have_win32ole = true
34
+ rescue NameError
35
+ # do not have win32ole
36
+ end
37
+ end
38
+
39
+ module BCL
40
+
41
+ # each TagStruct represents a node in the taxonomy tree
42
+ TagStruct = Struct.new(:level_hierarchy, :name, :description, :parent_tag, :child_tags, :terms)
43
+
44
+ # each TermStruct represents a row in the master taxonomy
45
+ TermStruct = Struct.new(:first_level, :second_level, :third_level, :level_hierarchy, :name, :description,
46
+ :abbr, :data_type, :enums, :ip_written, :ip_symbol, :ip_mask, :si_written, :si_symbol, :si_mask, :unit_conversion, :default_val, :min_val, :max_val, :allow_multiple, :row, :tp_include, :tp_required, :tp_use_in_search, :tp_use_in_facets, :tp_show_data_to_data_users, :tp_third_party_testing, :tp_additional_web_dev_info, :tp_additional_data_user_info, :tp_additional_data_submitter_info)
47
+
48
+
49
+ # class for parsing, validating, and querying the master taxonomy document
50
+ class MasterTaxonomy
51
+
52
+ # parse the master taxonomy document
53
+ def initialize(xlsx_path = nil, sort_alpha = false)
54
+ @sort_alphabetical = sort_alpha
55
+
56
+ # hash of level_taxonomy to tag
57
+ @tag_hash = Hash.new
58
+
59
+ if xlsx_path.nil?
60
+ # load from the current taxonomy
61
+ path = current_taxonomy_path
62
+ puts "Loading current taxonomy from #{path}"
63
+ File.open(path, 'r') do |file|
64
+ @tag_hash = Marshal.load(file)
65
+ end
66
+ else
67
+ xlsx_path = Pathname.new(xlsx_path).realpath.to_s
68
+ puts "Loading taxonomy file #{xlsx_path}"
69
+
70
+ # WINDOWS ONLY SECTION BECAUSE THIS USES WIN32OLE
71
+ if $have_win32ole
72
+ begin
73
+ excel = WIN32OLE::new('Excel.Application')
74
+ xlsx = excel.Workbooks.Open(xlsx_path)
75
+ terms_worksheet = xlsx.Worksheets("Terms")
76
+ parse_terms(terms_worksheet)
77
+ ensure
78
+ # not really saving just pretending so don't get prompted on quit
79
+ xlsx.saved = true
80
+ excel.Quit
81
+ WIN32OLE.ole_free(excel)
82
+ excel.ole_free
83
+ xlsx=nil
84
+ excel=nil
85
+ GC.start
86
+ end
87
+ else # if $have_win32ole
88
+ puts "MasterTaxonomy class requires 'win32ole' to parse master taxonomy document."
89
+ puts "MasterTaxonomy may also be stored and loaded from JSON if your platform does not support win32ole."
90
+ end # if $have_win32ole
91
+ end
92
+ end
93
+
94
+ # save the current taxonomy
95
+ def save_as_current_taxonomy(path = nil)
96
+ if not path
97
+ path = current_taxonomy_path
98
+ end
99
+ puts "Saving current taxonomy to #{path}"
100
+ # this is really not JSON... it is a persisted format of ruby
101
+ File.open(path, 'w') do |file|
102
+ Marshal.dump(@tag_hash, file)
103
+ end
104
+ end
105
+
106
+ # write taxonomy to xml
107
+ def write_xml(path, output_type = 'tpex')
108
+
109
+ root_tag = @tag_hash[""]
110
+
111
+ if root_tag.nil?
112
+ puts "Cannot find root tag"
113
+ return false
114
+ end
115
+
116
+ File.open(path, 'w') do |file|
117
+ xml = Builder::XmlMarkup.new(:target => file, :indent=>2)
118
+
119
+ #setup the xml file
120
+ xml.instruct!(:xml, :version=>"1.0", :encoding=>"UTF-8")
121
+ xml.schema("xmlns:xsi"=>"http://www.w3.org/2001/XMLSchema-instance") {
122
+ write_tag_to_xml(root_tag, 0, xml, output_type)
123
+ }
124
+ end
125
+
126
+ end
127
+
128
+ # get all terms for a given tag
129
+ # this includes terms that are inherited from parent levels
130
+ # e.g. master_taxonomy.get_terms("Space Use.Lighting.Lamp Ballast")
131
+ def get_terms(tag)
132
+
133
+ terms = tag.terms
134
+
135
+ parent_tag = tag.parent_tag
136
+ while not parent_tag.nil?
137
+ terms.concat(parent_tag.terms)
138
+ parent_tag = parent_tag.parent_tag
139
+ end
140
+
141
+
142
+ #sort the terms as they come out
143
+ result = terms.uniq
144
+ if !@sort_alphabetical
145
+ result = result.sort {|x, y| x.row <=> y.row}
146
+ else
147
+ result = result.sort {|x, y| x.name <=> y.name}
148
+ end
149
+
150
+ return result
151
+ end
152
+
153
+ # check that the given component is conforms with the master taxonomy
154
+ def check_component(component)
155
+ valid = true
156
+ tag = nil
157
+
158
+ # see if we can find the component's tag in the taxonomy
159
+ tags = component.tags
160
+ if tags.empty?
161
+ puts "[Check Component ERROR] Component does not have any tags"
162
+ valid = false
163
+ elsif tags.size > 1
164
+ puts "[Check Component ERROR] Component has multiple tags"
165
+ valid = false
166
+ else
167
+ tag = @tag_hash[tags[0].descriptor]
168
+ if not tag
169
+ puts "[Check Component ERROR] Cannot find #{tags[0].descriptor} in the master taxonomy"
170
+ valid = false
171
+ end
172
+ end
173
+
174
+ if not tag
175
+ return false
176
+ end
177
+
178
+ terms = get_terms(tag)
179
+
180
+ # todo: check for all required attributes
181
+ terms.each do |term|
182
+ #if term.required
183
+ # make sure we find attribute
184
+ #end
185
+ end
186
+
187
+ # check that all attributes are allowed
188
+ component.attributes.each do |attribute|
189
+
190
+ term = nil
191
+ terms.each do |t|
192
+ if t.name == attribute.name
193
+ term = t
194
+ break
195
+ end
196
+ end
197
+
198
+ if not term
199
+ puts "[Check Component ERROR] Cannot find term for #{attribute.name} in #{tag.level_hierarchy}"
200
+ valid = false
201
+ next
202
+ end
203
+
204
+ # todo: validate value, datatype, units
205
+
206
+ end
207
+
208
+ return valid
209
+ end
210
+
211
+ private
212
+
213
+ def current_taxonomy_path
214
+ return File.dirname(__FILE__) + "/current_taxonomy.json"
215
+ end
216
+
217
+ def parse_terms(terms_worksheet)
218
+
219
+ # check header
220
+ header_error = validate_terms_header(terms_worksheet)
221
+ if header_error
222
+ raise "Header Error on Terms Worksheet"
223
+ end
224
+
225
+ # add root tag
226
+ root_terms = []
227
+ root_terms << TermStruct.new("", "", "", "", "OpenStudio Type", "Type of OpenStudio Object")
228
+ root_terms[0].row = 0
229
+ #root_terms << TermStruct.new()
230
+ root_tag = TagStruct.new("", "root", "Root of the taxonomy", nil, [], root_terms)
231
+ @tag_hash[""] = root_tag
232
+
233
+ ### puts "**** tag hash: #{@tag_hash}"
234
+
235
+ # find number of rows by parsing until hit empty value in first column
236
+ row_num = 3
237
+ while true do
238
+ term = parse_term(terms_worksheet, row_num)
239
+ if term.nil?
240
+ break
241
+ end
242
+
243
+ add_term(term)
244
+
245
+ row_num += 1
246
+ end
247
+
248
+ # sort the tag tree
249
+ sort_tag(root_tag)
250
+
251
+ # check the tag tree
252
+ check_tag(root_tag)
253
+
254
+ end
255
+
256
+
257
+ def validate_terms_header(terms_worksheet)
258
+ test_arr = []
259
+ test_arr << {"name"=>"First Level", "strict"=>true}
260
+ test_arr << {"name"=>"Second Level", "strict"=>true}
261
+ test_arr << {"name"=>"Third Level", "strict"=>true}
262
+ test_arr << {"name"=>"Level Hierarchy", "strict"=>true}
263
+ test_arr << {"name"=>"Term", "strict"=>true}
264
+ test_arr << {"name"=>"Abbr", "strict"=>true}
265
+ test_arr << {"name"=>"Description", "strict"=>true}
266
+ test_arr << {"name"=>"Data Type", "strict"=>true}
267
+ test_arr << {"name"=>"Allow Multiple", "strict"=>true}
268
+ test_arr << {"name"=>"Enumerations", "strict"=>true}
269
+ test_arr << {"name"=>"IP Units Written Out", "strict"=>true}
270
+ test_arr << {"name"=>"IP Units Symbol", "strict"=>true}
271
+ test_arr << {"name"=>"IP Display Mask", "strict"=>true}
272
+ test_arr << {"name"=>"SI Units Written Out", "strict"=>true}
273
+ test_arr << {"name"=>"SI Units Symbol", "strict"=>true}
274
+ test_arr << {"name"=>"SI Display Mask", "strict"=>true}
275
+ test_arr << {"name"=>"Unit Conversion", "strict"=>true}
276
+ test_arr << {"name"=>"Default", "strict"=>true}
277
+ test_arr << {"name"=>"Min", "strict"=>true}
278
+ test_arr << {"name"=>"Max", "strict"=>true}
279
+ test_arr << {"name"=>"Source", "strict"=>true}
280
+ test_arr << {"name"=>"Review State", "strict"=>true}
281
+ test_arr << {"name"=>"General Comments", "strict"=>true}
282
+ test_arr << {"name"=>"Requested By / Project", "strict"=>true}
283
+ test_arr << {"name"=>"Include in TPE", "strict"=>false}
284
+ test_arr << {"name"=>"Required for Adding a New Product", "strict"=>false}
285
+ <<<<<<< .mine
286
+ test_arr << {"name"=>"Use as a Column Header in Search Results", "strict"=>false}
287
+ test_arr << {"name"=>"Allow Users to Filter with this Facet", "strict"=>false}
288
+ test_arr << {"name"=>"Show/Hide Data from Data Users", "strict"=>false}
289
+ =======
290
+ test_arr << {"name"=>"Use as a Column Header in Search Results", "strict"=>false}
291
+ test_arr << {"name"=>"Allow Users to Filter with this Facet", "strict"=>false}
292
+ test_arr << {"name"=>"Show Data to Data Users", "strict"=>false}
293
+ >>>>>>> .r727
294
+ test_arr << {"name"=>"Additional Instructions for Web Developers", "strict"=>false}
295
+ test_arr << {"name"=>"Related Third Party Testing Standards", "strict"=>false}
296
+ test_arr << {"name"=>"Additional Guidance to Data Submitters", "strict"=>false}
297
+ test_arr << {"name"=>"Additional Guidance to Data Users", "strict"=>false}
298
+
299
+
300
+ parse = true
301
+ col = 1
302
+ while parse
303
+ if terms_worksheet.Columns(col).Rows(2).Value.nil? || col > test_arr.size
304
+ parse = false
305
+ else
306
+ if not terms_worksheet.Columns(col).Rows(2).Value == test_arr[col-1]["name"]
307
+ if test_arr[col-1]["strict"]
308
+ raise "[ERROR] Header does not match: #{col}: '#{terms_worksheet.Columns(col).Rows(2).Value} <> #{test_arr[col-1]["name"]}'"
309
+ else
310
+ puts "[WARNING] Header does not match: #{col}: '#{terms_worksheet.Columns(col).Rows(2).Value} <> #{test_arr[col-1]["name"]}'"
311
+ end
312
+ end
313
+ end
314
+ col += 1
315
+ end
316
+ end
317
+
318
+ def parse_term(terms_worksheet, row)
319
+
320
+ term = TermStruct.new
321
+ term.row = row
322
+ term.first_level = terms_worksheet.Columns(1).Rows(row).Value
323
+ term.second_level = terms_worksheet.Columns(2).Rows(row).Value
324
+ term.third_level = terms_worksheet.Columns(3).Rows(row).Value
325
+ term.level_hierarchy = terms_worksheet.Columns(4).Rows(row).Value
326
+ term.name = terms_worksheet.Columns(5).Rows(row).Value
327
+ term.abbr = terms_worksheet.Columns(6).Rows(row).Value
328
+ term.description = terms_worksheet.Columns(7).Rows(row).Value
329
+ term.data_type = terms_worksheet.Columns(8).Rows(row).Value
330
+ term.allow_multiple = terms_worksheet.Columns(9).Rows(row).Value
331
+ term.enums = terms_worksheet.Columns(10).Rows(row).Value
332
+ term.ip_written = terms_worksheet.Columns(11).Rows(row).Value
333
+ term.ip_symbol = terms_worksheet.Columns(12).Rows(row).Value
334
+ term.ip_mask = terms_worksheet.Columns(13).Rows(row).Value
335
+ term.si_written = terms_worksheet.Columns(14).Rows(row).Value
336
+ term.si_symbol = terms_worksheet.Columns(15).Rows(row).Value
337
+ term.si_mask = terms_worksheet.Columns(16).Rows(row).Value
338
+ term.unit_conversion = terms_worksheet.Columns(17).Rows(row).Value
339
+ term.default_val = terms_worksheet.Columns(18).Rows(row).Value
340
+ term.min_val = terms_worksheet.Columns(19).Rows(row).Value
341
+ term.max_val = terms_worksheet.Columns(20).Rows(row).Value
342
+
343
+ #custom TPex Columns
344
+ term.tp_include = terms_worksheet.Columns(25).Rows(row).Value
345
+ term.tp_required = terms_worksheet.Columns(26).Rows(row).Value
346
+ term.tp_use_in_search = terms_worksheet.Columns(27).Rows(row).Value
347
+ term.tp_use_in_facets = terms_worksheet.Columns(28).Rows(row).Value
348
+ term.tp_show_data_to_data_users = terms_worksheet.Columns(29).Rows(row).Value
349
+ term.tp_additional_web_dev_info = terms_worksheet.Columns(30).Rows(row).Value
350
+ term.tp_third_party_testing = terms_worksheet.Columns(31).Rows(row).Value
351
+ term.tp_additional_data_submitter_info = terms_worksheet.Columns(32).Rows(row).Value
352
+ term.tp_additional_data_user_info = terms_worksheet.Columns(33).Rows(row).Value
353
+
354
+ # trigger to quit parsing the xcel doc
355
+ if term.first_level.nil? or term.first_level.empty?
356
+ return nil
357
+ end
358
+
359
+ return term
360
+ end
361
+
362
+ def add_term(term)
363
+
364
+ level_hierarchy = term.level_hierarchy
365
+
366
+ # create the tag
367
+ tag = @tag_hash[level_hierarchy]
368
+
369
+ if tag.nil?
370
+ tag = create_tag(level_hierarchy, term.description)
371
+ end
372
+
373
+ if term.name.nil? or term.name.strip.empty?
374
+ # this row is really about the tag
375
+ tag.description = term.description
376
+
377
+ else
378
+ # this row is about a term
379
+ if not validate_term(term)
380
+ return nil
381
+ end
382
+
383
+ tag.terms = [] if tag.terms.nil?
384
+ tag.terms << term
385
+ end
386
+ end
387
+
388
+ def create_tag(level_hierarchy, tag_description="")
389
+
390
+ #puts "create_tag called for #{level_hierarchy}"
391
+
392
+ parts = level_hierarchy.split('.')
393
+
394
+ name = parts[-1]
395
+ parent_level = parts[0..-2].join('.')
396
+
397
+ parent_tag = @tag_hash[parent_level]
398
+ if parent_tag.nil?
399
+ parent_tag = create_tag(parent_level)
400
+ end
401
+
402
+ description = tag_description
403
+ child_tags = []
404
+ terms = []
405
+ tag = TagStruct.new(level_hierarchy, name, description, parent_tag, child_tags, terms)
406
+
407
+ parent_tag.child_tags << tag
408
+
409
+ @tag_hash[level_hierarchy] = tag
410
+
411
+ return tag
412
+ end
413
+
414
+ def sort_tag(tag)
415
+ #tag.terms = tag.terms.sort {|x, y| x.level_hierarchy <=> y.level_hierarchy}
416
+ tag.child_tags = tag.child_tags.sort {|x, y| x.level_hierarchy <=> y.level_hierarchy}
417
+ tag.child_tags.each {|child_tag| sort_tag(child_tag) }
418
+
419
+ #tag.terms = tag.terms.sort {|x, y| x.name <=> y.name}
420
+ #tag.child_tags = tag.child_tags.sort {|x, y| x.name <=> y.name}
421
+ #tag.child_tags.each {|child_tag| sort_tag(child_tag) }
422
+ end
423
+
424
+ def check_tag(tag)
425
+
426
+ if tag.description.nil? or tag.description.empty?
427
+ puts "[check_tag] tag '#{tag.level_hierarchy}' has no description"
428
+ end
429
+
430
+ tag.terms.each {|term| check_term(term) }
431
+ tag.child_tags.each {|child_tag| check_tag(child_tag) }
432
+ end
433
+
434
+ def validate_term(term)
435
+ valid = true
436
+
437
+ parts = term.level_hierarchy.split('.')
438
+
439
+ if parts.empty?
440
+ puts "Hierarchy parts empty, #{term.level_hierarchy}"
441
+ valid = false
442
+ end
443
+
444
+ if parts.size >= 1 and not term.first_level == parts[0]
445
+ puts "First level '#{term.first_level}' does not match level hierarchy '#{term.level_hierarchy}', skipping term"
446
+ valid = false
447
+ end
448
+
449
+ if parts.size >= 2 and not term.second_level == parts[1]
450
+ puts "Second level '#{term.second_level}' does not match level hierarchy '#{term.level_hierarchy}', skipping term"
451
+ valid = false
452
+ end
453
+
454
+ if parts.size >= 3 and not term.third_level == parts[2]
455
+ puts "Third level '#{term.third_level}' does not match level hierarchy '#{term.level_hierarchy}', skipping term"
456
+ valid = false
457
+ end
458
+
459
+ if parts.size > 3
460
+ puts "Hierarchy cannot have more than 3 parts '#{term.level_hierarchy}', skipping term"
461
+ valid = false
462
+ end
463
+
464
+ if !term.data_type.nil?
465
+ valid_types = ["double", "integer", "enum", "file", "string", "autocomplete"]
466
+ if (term.data_type.downcase != term.data_type) || !valid_types.include?(term.data_type)
467
+ puts "[ERROR] Term '#{term.name}' does not have a valid data type with '#{term.data_type}'"
468
+ end
469
+
470
+ if term.data_type.downcase == "enum"
471
+ if term.enums.nil? || term.enums == "" || term.enums.downcase == "no enum found"
472
+ puts "[ERROR] Term '#{term.name}' does not have valid enumerations"
473
+ end
474
+ end
475
+ end
476
+
477
+ return valid
478
+ end
479
+
480
+ def check_term(term)
481
+ if term.description.nil? or term.description.empty?
482
+ #puts "[check_term] term '#{term.level_hierarchy}.#{term.name}' has no description"
483
+ end
484
+ end
485
+
486
+ # write term to xml
487
+ def write_terms_to_xml(tag, xml, output_type)
488
+ terms = get_terms(tag)
489
+ if terms.size > 0
490
+ terms.each do |term|
491
+ xml.term {
492
+ xml.name term.name
493
+ xml.abbr term.abbr if !term.abbr.nil?
494
+ xml.description term.description if !term.description.nil?
495
+ xml.data_type term.data_type if !term.data_type.nil?
496
+ xml.allow_multiple term.allow_multiple if !term.allow_multiple.nil?
497
+
498
+ if !term.enums.nil? && term.enums != ""
499
+ xml.enumerations {
500
+ out = term.enums.split("|")
501
+ out.sort! if @sort_alphabetical
502
+ out.each do |enum|
503
+ xml.enumeration enum
504
+ end
505
+ }
506
+ end
507
+ xml.ip_written term.ip_written if !term.ip_written.nil?
508
+ xml.ip_symbol term.ip_symbol if !term.ip_symbol.nil?
509
+ xml.ip_mask term.ip_mask if !term.ip_mask.nil?
510
+ xml.si_written term.si_written if !term.si_written.nil?
511
+ xml.si_symbol term.si_symbol if !term.si_symbol.nil?
512
+ xml.si_mask term.si_mask if !term.si_mask.nil?
513
+ xml.row term.row if !term.row.nil?
514
+ xml.unit_conversion term.unit_conversion if !term.unit_conversion.nil?
515
+ xml.default_val term.default_val if !term.default_val.nil?
516
+ xml.min_val term.min_val if !term.min_val.nil?
517
+ xml.max_val term.max_val if !term.max_val.nil?
518
+
519
+ if output_type == 'tpex'
520
+ xml.tp_include term.tp_include if !term.tp_include.nil?
521
+ xml.tp_required term.tp_required if !term.tp_required.nil?
522
+ xml.tp_use_in_search term.tp_use_in_search if !term.tp_use_in_search.nil?
523
+ xml.tp_use_in_facets term.tp_use_in_facets if !term.tp_use_in_facets.nil?
524
+ xml.tp_show_data_to_data_users term.tp_show_data_to_data_users if !term.tp_show_data_to_data_users.nil?
525
+ xml.tp_third_party_testing term.tp_third_party_testing if !term.tp_third_party_testing.nil?
526
+ xml.tp_additional_web_dev_info term.tp_additional_web_dev_info if !term.tp_additional_web_dev_info.nil?
527
+ xml.tp_additional_data_user_info term.tp_additional_data_user_info if !term.tp_additional_data_user_info.nil?
528
+ xml.tp_additional_data_submitter_info term.tp_additional_data_submitter_info if !term.tp_additional_data_submitter_info.nil?
529
+ end
530
+ }
531
+ end
532
+ end
533
+ end
534
+
535
+ # write a tag to xml
536
+ def write_tag_to_xml(tag, level, xml, output_type)
537
+ level_string = "level_#{level}"
538
+ xml.tag!(level_string) {
539
+ s_temp = tag.name
540
+ xml.name s_temp
541
+ xml.description tag.description
542
+
543
+ level += 1
544
+
545
+ if tag.child_tags.size == 0
546
+ write_terms_to_xml(tag, xml, output_type)
547
+ end
548
+
549
+ child_tags = tag.child_tags
550
+ child_tags.each do |child_tag|
551
+ write_tag_to_xml(child_tag, level, xml, output_type)
552
+ end
553
+
554
+ }
555
+ end
556
+
557
+ end
558
+
559
+ end # module BCL
560
+
561
+