bcl 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/bcl.rb +6 -0
- data/lib/bcl/ComponentSpreadsheet.rb +292 -0
- data/lib/bcl/ComponentXml.rb +451 -0
- data/lib/bcl/GatherComponents.rb +94 -0
- data/lib/bcl/MasterTaxonomy.rb +409 -0
- data/lib/bcl/MongoToComponent.rb +116 -0
- data/lib/bcl/TarBall.rb +51 -0
- data/lib/bcl/current_taxonomy.json +0 -0
- data/lib/bcl/current_taxonomy.xml +451 -0
- metadata +145 -0
@@ -0,0 +1,94 @@
|
|
1
|
+
######################################################################
|
2
|
+
# Copyright (c) 2008-2010, Alliance for Sustainable Energy.
|
3
|
+
# All rights reserved.
|
4
|
+
#
|
5
|
+
# This library is free software; you can redistribute it and/or
|
6
|
+
# modify it under the terms of the GNU Lesser General Public
|
7
|
+
# License as published by the Free Software Foundation; either
|
8
|
+
# version 2.1 of the License, or (at your option) any later version.
|
9
|
+
#
|
10
|
+
# This library is distributed in the hope that it will be useful,
|
11
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
12
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
13
|
+
# Lesser General Public License for more details.
|
14
|
+
#
|
15
|
+
# You should have received a copy of the GNU Lesser General Public
|
16
|
+
# License along with this library; if not, write to the Free Software
|
17
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
18
|
+
######################################################################
|
19
|
+
|
20
|
+
require 'rubygems'
|
21
|
+
require 'pathname'
|
22
|
+
require 'fileutils'
|
23
|
+
require 'enumerator'
|
24
|
+
|
25
|
+
require 'bcl/TarBall'
|
26
|
+
|
27
|
+
module BCL
|
28
|
+
module_function
|
29
|
+
|
30
|
+
def gather_components(component_dir)
|
31
|
+
#store the starting directory
|
32
|
+
current_dir = Dir.pwd
|
33
|
+
|
34
|
+
#an array to hold reporting info about the batches
|
35
|
+
gather_components_report = []
|
36
|
+
|
37
|
+
#go to the directory containing the components
|
38
|
+
Dir.chdir(component_dir)
|
39
|
+
|
40
|
+
#delete old gather files first
|
41
|
+
gather_dest_base = "components.tar.gz"
|
42
|
+
#File.delete("./gather/" + gather_dest_base) if File.exists?("./0gather/" + gather_dest_base)
|
43
|
+
|
44
|
+
#copy all the components' tar.gz files into a single directory
|
45
|
+
targzs = Pathname.glob("./**/*.tar.gz")
|
46
|
+
targzs.each do |targz|
|
47
|
+
destination = "./0gather/#{File.basename(targz.to_s)}"
|
48
|
+
puts "copying #{targz.to_s} to #{destination}"
|
49
|
+
Dir.mkdir("./0gather") unless File.directory?("./0gather") #named so it will be at top of directory list
|
50
|
+
File.delete(destination) if File.exists?(destination)
|
51
|
+
FileUtils.cp(targz.to_s, destination)
|
52
|
+
end
|
53
|
+
|
54
|
+
#go into that directory
|
55
|
+
Dir.chdir("./0gather")
|
56
|
+
|
57
|
+
#get a list of all the tar.gz files in the new directory
|
58
|
+
targzs = Pathname.glob("*.tar.gz")
|
59
|
+
|
60
|
+
#report the total number of components in the directory
|
61
|
+
gather_components_report << "Total components = #{targzs.length}"
|
62
|
+
|
63
|
+
#define an iterator to keep track of the number of batches
|
64
|
+
batch_num = 0
|
65
|
+
|
66
|
+
#package all the tar.gzs in the directory into a few master tar.gz files of 1000 components or less
|
67
|
+
targzs.each_slice(1000) do |batch|
|
68
|
+
|
69
|
+
gather_components_report << " batch #{batch_num} contains #{batch.length} components"
|
70
|
+
|
71
|
+
#put all the paths in the batch into an array
|
72
|
+
paths = []
|
73
|
+
batch.each do |targz|
|
74
|
+
paths << File.basename(targz.to_s)
|
75
|
+
end
|
76
|
+
|
77
|
+
#path where the batch tarball is going
|
78
|
+
gather_dest = "0_#{batch_num}_#{gather_dest_base}" #prefix to move to top of directory
|
79
|
+
|
80
|
+
#tar up the batch
|
81
|
+
tarball(gather_dest, paths)
|
82
|
+
|
83
|
+
batch_num += 1
|
84
|
+
end
|
85
|
+
|
86
|
+
#report out
|
87
|
+
puts gather_components_report
|
88
|
+
|
89
|
+
#change back to the directory where we started
|
90
|
+
Dir.chdir(current_dir)
|
91
|
+
|
92
|
+
end
|
93
|
+
|
94
|
+
end # module BCL
|
@@ -0,0 +1,409 @@
|
|
1
|
+
######################################################################
|
2
|
+
# Copyright (c) 2008-2010, Alliance for Sustainable Energy.
|
3
|
+
# All rights reserved.
|
4
|
+
#
|
5
|
+
# This library is free software; you can redistribute it and/or
|
6
|
+
# modify it under the terms of the GNU Lesser General Public
|
7
|
+
# License as published by the Free Software Foundation; either
|
8
|
+
# version 2.1 of the License, or (at your option) any later version.
|
9
|
+
#
|
10
|
+
# This library is distributed in the hope that it will be useful,
|
11
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
12
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
13
|
+
# Lesser General Public License for more details.
|
14
|
+
#
|
15
|
+
# You should have received a copy of the GNU Lesser General Public
|
16
|
+
# License along with this library; if not, write to the Free Software
|
17
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
18
|
+
######################################################################
|
19
|
+
|
20
|
+
require 'rubygems'
|
21
|
+
require 'pathname'
|
22
|
+
require 'fileutils'
|
23
|
+
require 'builder' #gem install builder (creates xml files)
|
24
|
+
|
25
|
+
$have_win32ole = false
|
26
|
+
begin
|
27
|
+
# apparently this is not a gem
|
28
|
+
require 'win32ole'
|
29
|
+
mod = WIN32OLE
|
30
|
+
$have_win32ole = true
|
31
|
+
rescue NameError
|
32
|
+
# do not have win32ole
|
33
|
+
end
|
34
|
+
|
35
|
+
module BCL
|
36
|
+
|
37
|
+
# each TagStruct represents a node in the taxonomy tree
|
38
|
+
TagStruct = Struct.new(:level_hierarchy, :name, :description, :parent_tag, :child_tags, :terms)
|
39
|
+
|
40
|
+
# each TermStruct represents a row in the master taxonomy
|
41
|
+
TermStruct = Struct.new(:first_level, :second_level, :third_level, :level_hierarchy, :name, :description)
|
42
|
+
|
43
|
+
# class for parsing, validating, and querying the master taxonomy document
|
44
|
+
class MasterTaxonomy
|
45
|
+
|
46
|
+
# parse the master taxonomy document
|
47
|
+
def initialize(xlsx_path = nil)
|
48
|
+
|
49
|
+
# hash of level_taxonomy to tag
|
50
|
+
@tag_hash = Hash.new
|
51
|
+
|
52
|
+
if xlsx_path.nil?
|
53
|
+
# load from the current taxonomy
|
54
|
+
path = current_taxonomy_path
|
55
|
+
puts "Loading current taxonomy from #{path}"
|
56
|
+
File.open(path, 'r') do |file|
|
57
|
+
@tag_hash = Marshal.load(file)
|
58
|
+
end
|
59
|
+
else
|
60
|
+
xlsx_path = Pathname.new(xlsx_path).realpath.to_s
|
61
|
+
|
62
|
+
# WINDOWS ONLY SECTION BECAUSE THIS USES WIN32OLE
|
63
|
+
if $have_win32ole
|
64
|
+
begin
|
65
|
+
excel = WIN32OLE::new('Excel.Application')
|
66
|
+
xlsx = excel.Workbooks.Open(xlsx_path)
|
67
|
+
terms_worksheet = xlsx.Worksheets("Terms")
|
68
|
+
parse_terms(terms_worksheet)
|
69
|
+
ensure
|
70
|
+
# not really saving just pretending so don't get prompted on quit
|
71
|
+
xlsx.saved = true
|
72
|
+
excel.Quit
|
73
|
+
WIN32OLE.ole_free(excel)
|
74
|
+
excel.ole_free
|
75
|
+
xlsx=nil
|
76
|
+
excel=nil
|
77
|
+
GC.start
|
78
|
+
end
|
79
|
+
else # if $have_win32ole
|
80
|
+
puts "MasterTaxonomy class requires 'win32ole' to parse master taxonomy document."
|
81
|
+
puts "MasterTaxonomy may also be stored and loaded from JSON if your platform does not support win32ole."
|
82
|
+
end # if $have_win32ole
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
# save the current taxonomy
|
87
|
+
def save_as_current_taxonomy(path = nil)
|
88
|
+
if not path
|
89
|
+
path = current_taxonomy_path
|
90
|
+
end
|
91
|
+
puts "Saving current taxonomy to #{path}"
|
92
|
+
File.open(path, 'w') do |file|
|
93
|
+
Marshal.dump(@tag_hash, file)
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
# write taxonomy to xml
|
98
|
+
def write_xml(path)
|
99
|
+
|
100
|
+
root_tag = @tag_hash[""]
|
101
|
+
|
102
|
+
if root_tag.nil?
|
103
|
+
puts "Cannot find root tag"
|
104
|
+
return false
|
105
|
+
end
|
106
|
+
|
107
|
+
File.open(path, 'w') do |file|
|
108
|
+
xml = Builder::XmlMarkup.new(:target => file, :indent=>2)
|
109
|
+
|
110
|
+
#setup the xml file
|
111
|
+
xml.instruct!(:xml, :version=>"1.0", :encoding=>"UTF-8")
|
112
|
+
xml.schema("xmlns:xsi"=>"http://www.w3.org/2001/XMLSchema-instance") {
|
113
|
+
write_tag_to_xml(root_tag, xml)
|
114
|
+
}
|
115
|
+
end
|
116
|
+
|
117
|
+
end
|
118
|
+
|
119
|
+
# get all terms for a given tag
|
120
|
+
# this includes terms that are inherited from parent levels
|
121
|
+
# e.g. master_taxonomy.get_terms("Space Use.Lighting.Lamp Ballast")
|
122
|
+
def get_terms(tag)
|
123
|
+
|
124
|
+
terms = tag.terms
|
125
|
+
|
126
|
+
parent_tag = tag.parent_tag
|
127
|
+
while not parent_tag.nil?
|
128
|
+
terms.concat(parent_tag.terms)
|
129
|
+
parent_tag = parent_tag.parent_tag
|
130
|
+
end
|
131
|
+
|
132
|
+
return terms.reverse.uniq
|
133
|
+
end
|
134
|
+
|
135
|
+
# check that the given component is conforms with the master taxonomy
|
136
|
+
def check_component(component)
|
137
|
+
valid = true
|
138
|
+
|
139
|
+
tag = nil
|
140
|
+
|
141
|
+
# see if we can find the component's tag in the taxonomy
|
142
|
+
tags = component.tags
|
143
|
+
if tags.empty?
|
144
|
+
puts "[Check Component ERROR] Component does not have any tags"
|
145
|
+
valid = false
|
146
|
+
elsif tags.size > 1
|
147
|
+
puts "[Check Component ERROR] Component has multiple tags"
|
148
|
+
valid = false
|
149
|
+
else
|
150
|
+
tag = @tag_hash[tags[0].descriptor]
|
151
|
+
if not tag
|
152
|
+
puts "[Check Component ERROR] Cannot find #{tags[0].descriptor} in the master taxonomy"
|
153
|
+
valid = false
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
if not tag
|
158
|
+
return false
|
159
|
+
end
|
160
|
+
|
161
|
+
terms = get_terms(tag)
|
162
|
+
|
163
|
+
# todo: check for all required attributes
|
164
|
+
terms.each do |term|
|
165
|
+
#if term.required
|
166
|
+
# make sure we find attribute
|
167
|
+
#end
|
168
|
+
end
|
169
|
+
|
170
|
+
# check that all attributes are allowed
|
171
|
+
component.attributes.each do |attribute|
|
172
|
+
|
173
|
+
term = nil
|
174
|
+
terms.each do |t|
|
175
|
+
if t.name == attribute.name
|
176
|
+
term = t
|
177
|
+
break
|
178
|
+
end
|
179
|
+
end
|
180
|
+
|
181
|
+
if not term
|
182
|
+
puts "[Check Component ERROR] Cannot find term for #{attribute.name} in #{tag.level_hierarchy}"
|
183
|
+
valid = false
|
184
|
+
next
|
185
|
+
end
|
186
|
+
|
187
|
+
# todo: validate value, datatype, units
|
188
|
+
|
189
|
+
end
|
190
|
+
|
191
|
+
return valid
|
192
|
+
end
|
193
|
+
|
194
|
+
private
|
195
|
+
|
196
|
+
def current_taxonomy_path
|
197
|
+
return File.dirname(__FILE__) + "/current_taxonomy.json"
|
198
|
+
end
|
199
|
+
|
200
|
+
def parse_terms(terms_worksheet)
|
201
|
+
|
202
|
+
# check header
|
203
|
+
header_error = validate_terms_header(terms_worksheet)
|
204
|
+
if header_error
|
205
|
+
raise "Header Error on Terms Worksheet"
|
206
|
+
end
|
207
|
+
|
208
|
+
# add root tag
|
209
|
+
root_terms = []
|
210
|
+
root_terms << TermStruct.new("", "", "", "", "OpenStudio Type", "Type of OpenStudio Object")
|
211
|
+
root_tag = TagStruct.new("", "root", "Root of the taxonomy", nil, [], root_terms)
|
212
|
+
@tag_hash[""] = root_tag
|
213
|
+
|
214
|
+
# find number of rows by parsing until hit empty value in first column
|
215
|
+
row_num = 3
|
216
|
+
while true do
|
217
|
+
term = parse_term(terms_worksheet, row_num)
|
218
|
+
if term.nil?
|
219
|
+
break
|
220
|
+
end
|
221
|
+
|
222
|
+
add_term(term)
|
223
|
+
|
224
|
+
row_num += 1
|
225
|
+
end
|
226
|
+
|
227
|
+
# sort the tag tree
|
228
|
+
sort_tag(root_tag)
|
229
|
+
|
230
|
+
# check the tag tree
|
231
|
+
check_tag(root_tag)
|
232
|
+
|
233
|
+
end
|
234
|
+
|
235
|
+
|
236
|
+
def validate_terms_header(terms_worksheet)
|
237
|
+
header_error = false
|
238
|
+
|
239
|
+
first_level = terms_worksheet.Range("A2").Value
|
240
|
+
second_level = terms_worksheet.Range("B2").Value
|
241
|
+
third_level = terms_worksheet.Range("C2").Value
|
242
|
+
level_hierarchy = terms_worksheet.Range("D2").Value
|
243
|
+
name = terms_worksheet.Range("E2").Value
|
244
|
+
abbr = terms_worksheet.Range("F2").Value
|
245
|
+
description = terms_worksheet.Range("G2").Value
|
246
|
+
|
247
|
+
header_error = true if not first_level == "First Level"
|
248
|
+
header_error = true if not second_level == "Second Level"
|
249
|
+
header_error = true if not third_level == "Third Level"
|
250
|
+
header_error = true if not level_hierarchy == "Level Hierarchy"
|
251
|
+
header_error = true if not name == "Term"
|
252
|
+
header_error = true if not abbr == "Abbr."
|
253
|
+
header_error = true if not description == "Description"
|
254
|
+
|
255
|
+
return header_error
|
256
|
+
end
|
257
|
+
|
258
|
+
def parse_term(terms_worksheet, row)
|
259
|
+
|
260
|
+
term = TermStruct.new
|
261
|
+
term.first_level = terms_worksheet.Range("A#{row}").Value
|
262
|
+
term.second_level = terms_worksheet.Range("B#{row}").Value
|
263
|
+
term.third_level = terms_worksheet.Range("C#{row}").Value
|
264
|
+
term.level_hierarchy = terms_worksheet.Range("D#{row}").Value
|
265
|
+
term.name = terms_worksheet.Range("E#{row}").Value
|
266
|
+
#term.abbr = terms_worksheet.Range("F#{row}").Value
|
267
|
+
term.description = terms_worksheet.Range("G#{row}").Value
|
268
|
+
|
269
|
+
# trigger to quit parsing the xcel doc
|
270
|
+
if term.first_level.nil? or term.first_level.empty?
|
271
|
+
return nil
|
272
|
+
end
|
273
|
+
|
274
|
+
return term
|
275
|
+
end
|
276
|
+
|
277
|
+
def add_term(term)
|
278
|
+
|
279
|
+
level_hierarchy = term.level_hierarchy
|
280
|
+
|
281
|
+
#puts "add_term called for #{level_hierarchy}"
|
282
|
+
|
283
|
+
# create the tag
|
284
|
+
tag = @tag_hash[level_hierarchy]
|
285
|
+
if tag.nil?
|
286
|
+
tag = create_tag(level_hierarchy)
|
287
|
+
end
|
288
|
+
|
289
|
+
if term.name.nil? or term.name.strip.empty?
|
290
|
+
# this row is really about the tag
|
291
|
+
tag.description = term.description
|
292
|
+
else
|
293
|
+
# this row is about a term
|
294
|
+
if not validate_term(term)
|
295
|
+
return nil
|
296
|
+
end
|
297
|
+
|
298
|
+
tag.terms = [] if tag.terms.nil?
|
299
|
+
tag.terms << term
|
300
|
+
end
|
301
|
+
end
|
302
|
+
|
303
|
+
def create_tag(level_hierarchy)
|
304
|
+
|
305
|
+
#puts "create_tag called for #{level_hierarchy}"
|
306
|
+
|
307
|
+
parts = level_hierarchy.split('.')
|
308
|
+
|
309
|
+
name = parts[-1]
|
310
|
+
parent_level = parts[0..-2].join('.')
|
311
|
+
|
312
|
+
parent_tag = @tag_hash[parent_level]
|
313
|
+
if parent_tag.nil?
|
314
|
+
parent_tag = create_tag(parent_level)
|
315
|
+
end
|
316
|
+
|
317
|
+
description = ""
|
318
|
+
child_tags = []
|
319
|
+
terms = []
|
320
|
+
tag = TagStruct.new(level_hierarchy, name, description, parent_tag, child_tags, terms)
|
321
|
+
|
322
|
+
parent_tag.child_tags << tag
|
323
|
+
|
324
|
+
@tag_hash[level_hierarchy] = tag
|
325
|
+
|
326
|
+
return tag
|
327
|
+
end
|
328
|
+
|
329
|
+
def sort_tag(tag)
|
330
|
+
tag.terms = tag.terms.sort {|x, y| x.name <=> y.name}
|
331
|
+
tag.child_tags = tag.child_tags.sort {|x, y| x.name <=> y.name}
|
332
|
+
tag.child_tags.each {|child_tag| sort_tag(child_tag) }
|
333
|
+
end
|
334
|
+
|
335
|
+
def check_tag(tag)
|
336
|
+
|
337
|
+
if tag.description.nil? or tag.description.empty?
|
338
|
+
puts "tag '#{tag.level_hierarchy}' has no description"
|
339
|
+
end
|
340
|
+
|
341
|
+
tag.terms.each {|term| check_term(term) }
|
342
|
+
tag.child_tags.each {|child_tag| check_tag(child_tag) }
|
343
|
+
end
|
344
|
+
|
345
|
+
def validate_term(term)
|
346
|
+
valid = true
|
347
|
+
|
348
|
+
parts = term.level_hierarchy.split('.')
|
349
|
+
|
350
|
+
if parts.empty?
|
351
|
+
puts "Hierarchy parts empty, #{term.level_hierarchy}"
|
352
|
+
valid = false
|
353
|
+
end
|
354
|
+
|
355
|
+
if parts.size >= 1 and not term.first_level == parts[0]
|
356
|
+
puts "First level '#{term.first_level}' does not match level hierarchy '#{term.level_hierarchy}', skipping term"
|
357
|
+
valid = false
|
358
|
+
end
|
359
|
+
|
360
|
+
if parts.size >= 2 and not term.second_level == parts[1]
|
361
|
+
puts "Second level '#{term.second_level}' does not match level hierarchy '#{term.level_hierarchy}', skipping term"
|
362
|
+
valid = false
|
363
|
+
end
|
364
|
+
|
365
|
+
if parts.size >= 3 and not term.third_level == parts[2]
|
366
|
+
puts "Third level '#{term.third_level}' does not match level hierarchy '#{term.level_hierarchy}', skipping term"
|
367
|
+
valid = false
|
368
|
+
end
|
369
|
+
|
370
|
+
if parts.size > 3
|
371
|
+
puts "Hierarchy cannot have more than 3 parts '#{term.level_hierarchy}', skipping term"
|
372
|
+
valid = false
|
373
|
+
end
|
374
|
+
|
375
|
+
# todo: check description, data type, enumerations, units, source, author
|
376
|
+
|
377
|
+
return valid
|
378
|
+
end
|
379
|
+
|
380
|
+
def check_term(term)
|
381
|
+
if term.description.nil? or term.description.empty?
|
382
|
+
puts "term '#{term.level_hierarchy}.#{term.name}' has no description"
|
383
|
+
end
|
384
|
+
end
|
385
|
+
|
386
|
+
# write a tag to xml
|
387
|
+
def write_tag_to_xml(tag, xml)
|
388
|
+
xml.tag(:name => "#{tag.name}") {
|
389
|
+
#xml.terms {
|
390
|
+
#terms = tag.terms # only direct terms
|
391
|
+
terms = get_terms(tag) # all terms, ordered by inheritence
|
392
|
+
terms.each do |term|
|
393
|
+
xml.term(:name => "#{term.name}")
|
394
|
+
end
|
395
|
+
#}
|
396
|
+
#xml.tags {
|
397
|
+
child_tags = tag.child_tags
|
398
|
+
child_tags.each do |child_tag|
|
399
|
+
write_tag_to_xml(child_tag, xml)
|
400
|
+
end
|
401
|
+
#}
|
402
|
+
}
|
403
|
+
end
|
404
|
+
|
405
|
+
end
|
406
|
+
|
407
|
+
end # module BCL
|
408
|
+
|
409
|
+
|