bcl 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/bcl.rb +6 -0
- data/lib/bcl/ComponentSpreadsheet.rb +292 -0
- data/lib/bcl/ComponentXml.rb +451 -0
- data/lib/bcl/GatherComponents.rb +94 -0
- data/lib/bcl/MasterTaxonomy.rb +409 -0
- data/lib/bcl/MongoToComponent.rb +116 -0
- data/lib/bcl/TarBall.rb +51 -0
- data/lib/bcl/current_taxonomy.json +0 -0
- data/lib/bcl/current_taxonomy.xml +451 -0
- metadata +145 -0
@@ -0,0 +1,94 @@
|
|
1
|
+
######################################################################
|
2
|
+
# Copyright (c) 2008-2010, Alliance for Sustainable Energy.
|
3
|
+
# All rights reserved.
|
4
|
+
#
|
5
|
+
# This library is free software; you can redistribute it and/or
|
6
|
+
# modify it under the terms of the GNU Lesser General Public
|
7
|
+
# License as published by the Free Software Foundation; either
|
8
|
+
# version 2.1 of the License, or (at your option) any later version.
|
9
|
+
#
|
10
|
+
# This library is distributed in the hope that it will be useful,
|
11
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
12
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
13
|
+
# Lesser General Public License for more details.
|
14
|
+
#
|
15
|
+
# You should have received a copy of the GNU Lesser General Public
|
16
|
+
# License along with this library; if not, write to the Free Software
|
17
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
18
|
+
######################################################################
|
19
|
+
|
20
|
+
require 'rubygems'
|
21
|
+
require 'pathname'
|
22
|
+
require 'fileutils'
|
23
|
+
require 'enumerator'
|
24
|
+
|
25
|
+
require 'bcl/TarBall'
|
26
|
+
|
27
|
+
module BCL
|
28
|
+
module_function
|
29
|
+
|
30
|
+
def gather_components(component_dir)
|
31
|
+
#store the starting directory
|
32
|
+
current_dir = Dir.pwd
|
33
|
+
|
34
|
+
#an array to hold reporting info about the batches
|
35
|
+
gather_components_report = []
|
36
|
+
|
37
|
+
#go to the directory containing the components
|
38
|
+
Dir.chdir(component_dir)
|
39
|
+
|
40
|
+
#delete old gather files first
|
41
|
+
gather_dest_base = "components.tar.gz"
|
42
|
+
#File.delete("./gather/" + gather_dest_base) if File.exists?("./0gather/" + gather_dest_base)
|
43
|
+
|
44
|
+
#copy all the components' tar.gz files into a single directory
|
45
|
+
targzs = Pathname.glob("./**/*.tar.gz")
|
46
|
+
targzs.each do |targz|
|
47
|
+
destination = "./0gather/#{File.basename(targz.to_s)}"
|
48
|
+
puts "copying #{targz.to_s} to #{destination}"
|
49
|
+
Dir.mkdir("./0gather") unless File.directory?("./0gather") #named so it will be at top of directory list
|
50
|
+
File.delete(destination) if File.exists?(destination)
|
51
|
+
FileUtils.cp(targz.to_s, destination)
|
52
|
+
end
|
53
|
+
|
54
|
+
#go into that directory
|
55
|
+
Dir.chdir("./0gather")
|
56
|
+
|
57
|
+
#get a list of all the tar.gz files in the new directory
|
58
|
+
targzs = Pathname.glob("*.tar.gz")
|
59
|
+
|
60
|
+
#report the total number of components in the directory
|
61
|
+
gather_components_report << "Total components = #{targzs.length}"
|
62
|
+
|
63
|
+
#define an iterator to keep track of the number of batches
|
64
|
+
batch_num = 0
|
65
|
+
|
66
|
+
#package all the tar.gzs in the directory into a few master tar.gz files of 1000 components or less
|
67
|
+
targzs.each_slice(1000) do |batch|
|
68
|
+
|
69
|
+
gather_components_report << " batch #{batch_num} contains #{batch.length} components"
|
70
|
+
|
71
|
+
#put all the paths in the batch into an array
|
72
|
+
paths = []
|
73
|
+
batch.each do |targz|
|
74
|
+
paths << File.basename(targz.to_s)
|
75
|
+
end
|
76
|
+
|
77
|
+
#path where the batch tarball is going
|
78
|
+
gather_dest = "0_#{batch_num}_#{gather_dest_base}" #prefix to move to top of directory
|
79
|
+
|
80
|
+
#tar up the batch
|
81
|
+
tarball(gather_dest, paths)
|
82
|
+
|
83
|
+
batch_num += 1
|
84
|
+
end
|
85
|
+
|
86
|
+
#report out
|
87
|
+
puts gather_components_report
|
88
|
+
|
89
|
+
#change back to the directory where we started
|
90
|
+
Dir.chdir(current_dir)
|
91
|
+
|
92
|
+
end
|
93
|
+
|
94
|
+
end # module BCL
|
@@ -0,0 +1,409 @@
|
|
1
|
+
######################################################################
|
2
|
+
# Copyright (c) 2008-2010, Alliance for Sustainable Energy.
|
3
|
+
# All rights reserved.
|
4
|
+
#
|
5
|
+
# This library is free software; you can redistribute it and/or
|
6
|
+
# modify it under the terms of the GNU Lesser General Public
|
7
|
+
# License as published by the Free Software Foundation; either
|
8
|
+
# version 2.1 of the License, or (at your option) any later version.
|
9
|
+
#
|
10
|
+
# This library is distributed in the hope that it will be useful,
|
11
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
12
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
13
|
+
# Lesser General Public License for more details.
|
14
|
+
#
|
15
|
+
# You should have received a copy of the GNU Lesser General Public
|
16
|
+
# License along with this library; if not, write to the Free Software
|
17
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
18
|
+
######################################################################
|
19
|
+
|
20
|
+
require 'rubygems'
|
21
|
+
require 'pathname'
|
22
|
+
require 'fileutils'
|
23
|
+
require 'builder' #gem install builder (creates xml files)
|
24
|
+
|
25
|
+
$have_win32ole = false
|
26
|
+
begin
|
27
|
+
# apparently this is not a gem
|
28
|
+
require 'win32ole'
|
29
|
+
mod = WIN32OLE
|
30
|
+
$have_win32ole = true
|
31
|
+
rescue NameError
|
32
|
+
# do not have win32ole
|
33
|
+
end
|
34
|
+
|
35
|
+
module BCL
|
36
|
+
|
37
|
+
# each TagStruct represents a node in the taxonomy tree
|
38
|
+
TagStruct = Struct.new(:level_hierarchy, :name, :description, :parent_tag, :child_tags, :terms)
|
39
|
+
|
40
|
+
# each TermStruct represents a row in the master taxonomy
|
41
|
+
TermStruct = Struct.new(:first_level, :second_level, :third_level, :level_hierarchy, :name, :description)
|
42
|
+
|
43
|
+
# class for parsing, validating, and querying the master taxonomy document
|
44
|
+
class MasterTaxonomy
|
45
|
+
|
46
|
+
# parse the master taxonomy document
|
47
|
+
def initialize(xlsx_path = nil)
|
48
|
+
|
49
|
+
# hash of level_taxonomy to tag
|
50
|
+
@tag_hash = Hash.new
|
51
|
+
|
52
|
+
if xlsx_path.nil?
|
53
|
+
# load from the current taxonomy
|
54
|
+
path = current_taxonomy_path
|
55
|
+
puts "Loading current taxonomy from #{path}"
|
56
|
+
File.open(path, 'r') do |file|
|
57
|
+
@tag_hash = Marshal.load(file)
|
58
|
+
end
|
59
|
+
else
|
60
|
+
xlsx_path = Pathname.new(xlsx_path).realpath.to_s
|
61
|
+
|
62
|
+
# WINDOWS ONLY SECTION BECAUSE THIS USES WIN32OLE
|
63
|
+
if $have_win32ole
|
64
|
+
begin
|
65
|
+
excel = WIN32OLE::new('Excel.Application')
|
66
|
+
xlsx = excel.Workbooks.Open(xlsx_path)
|
67
|
+
terms_worksheet = xlsx.Worksheets("Terms")
|
68
|
+
parse_terms(terms_worksheet)
|
69
|
+
ensure
|
70
|
+
# not really saving just pretending so don't get prompted on quit
|
71
|
+
xlsx.saved = true
|
72
|
+
excel.Quit
|
73
|
+
WIN32OLE.ole_free(excel)
|
74
|
+
excel.ole_free
|
75
|
+
xlsx=nil
|
76
|
+
excel=nil
|
77
|
+
GC.start
|
78
|
+
end
|
79
|
+
else # if $have_win32ole
|
80
|
+
puts "MasterTaxonomy class requires 'win32ole' to parse master taxonomy document."
|
81
|
+
puts "MasterTaxonomy may also be stored and loaded from JSON if your platform does not support win32ole."
|
82
|
+
end # if $have_win32ole
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
# save the current taxonomy
|
87
|
+
def save_as_current_taxonomy(path = nil)
|
88
|
+
if not path
|
89
|
+
path = current_taxonomy_path
|
90
|
+
end
|
91
|
+
puts "Saving current taxonomy to #{path}"
|
92
|
+
File.open(path, 'w') do |file|
|
93
|
+
Marshal.dump(@tag_hash, file)
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
# write taxonomy to xml
|
98
|
+
def write_xml(path)
|
99
|
+
|
100
|
+
root_tag = @tag_hash[""]
|
101
|
+
|
102
|
+
if root_tag.nil?
|
103
|
+
puts "Cannot find root tag"
|
104
|
+
return false
|
105
|
+
end
|
106
|
+
|
107
|
+
File.open(path, 'w') do |file|
|
108
|
+
xml = Builder::XmlMarkup.new(:target => file, :indent=>2)
|
109
|
+
|
110
|
+
#setup the xml file
|
111
|
+
xml.instruct!(:xml, :version=>"1.0", :encoding=>"UTF-8")
|
112
|
+
xml.schema("xmlns:xsi"=>"http://www.w3.org/2001/XMLSchema-instance") {
|
113
|
+
write_tag_to_xml(root_tag, xml)
|
114
|
+
}
|
115
|
+
end
|
116
|
+
|
117
|
+
end
|
118
|
+
|
119
|
+
# get all terms for a given tag
|
120
|
+
# this includes terms that are inherited from parent levels
|
121
|
+
# e.g. master_taxonomy.get_terms("Space Use.Lighting.Lamp Ballast")
|
122
|
+
def get_terms(tag)
|
123
|
+
|
124
|
+
terms = tag.terms
|
125
|
+
|
126
|
+
parent_tag = tag.parent_tag
|
127
|
+
while not parent_tag.nil?
|
128
|
+
terms.concat(parent_tag.terms)
|
129
|
+
parent_tag = parent_tag.parent_tag
|
130
|
+
end
|
131
|
+
|
132
|
+
return terms.reverse.uniq
|
133
|
+
end
|
134
|
+
|
135
|
+
# check that the given component is conforms with the master taxonomy
|
136
|
+
def check_component(component)
|
137
|
+
valid = true
|
138
|
+
|
139
|
+
tag = nil
|
140
|
+
|
141
|
+
# see if we can find the component's tag in the taxonomy
|
142
|
+
tags = component.tags
|
143
|
+
if tags.empty?
|
144
|
+
puts "[Check Component ERROR] Component does not have any tags"
|
145
|
+
valid = false
|
146
|
+
elsif tags.size > 1
|
147
|
+
puts "[Check Component ERROR] Component has multiple tags"
|
148
|
+
valid = false
|
149
|
+
else
|
150
|
+
tag = @tag_hash[tags[0].descriptor]
|
151
|
+
if not tag
|
152
|
+
puts "[Check Component ERROR] Cannot find #{tags[0].descriptor} in the master taxonomy"
|
153
|
+
valid = false
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
if not tag
|
158
|
+
return false
|
159
|
+
end
|
160
|
+
|
161
|
+
terms = get_terms(tag)
|
162
|
+
|
163
|
+
# todo: check for all required attributes
|
164
|
+
terms.each do |term|
|
165
|
+
#if term.required
|
166
|
+
# make sure we find attribute
|
167
|
+
#end
|
168
|
+
end
|
169
|
+
|
170
|
+
# check that all attributes are allowed
|
171
|
+
component.attributes.each do |attribute|
|
172
|
+
|
173
|
+
term = nil
|
174
|
+
terms.each do |t|
|
175
|
+
if t.name == attribute.name
|
176
|
+
term = t
|
177
|
+
break
|
178
|
+
end
|
179
|
+
end
|
180
|
+
|
181
|
+
if not term
|
182
|
+
puts "[Check Component ERROR] Cannot find term for #{attribute.name} in #{tag.level_hierarchy}"
|
183
|
+
valid = false
|
184
|
+
next
|
185
|
+
end
|
186
|
+
|
187
|
+
# todo: validate value, datatype, units
|
188
|
+
|
189
|
+
end
|
190
|
+
|
191
|
+
return valid
|
192
|
+
end
|
193
|
+
|
194
|
+
private
|
195
|
+
|
196
|
+
def current_taxonomy_path
|
197
|
+
return File.dirname(__FILE__) + "/current_taxonomy.json"
|
198
|
+
end
|
199
|
+
|
200
|
+
def parse_terms(terms_worksheet)
|
201
|
+
|
202
|
+
# check header
|
203
|
+
header_error = validate_terms_header(terms_worksheet)
|
204
|
+
if header_error
|
205
|
+
raise "Header Error on Terms Worksheet"
|
206
|
+
end
|
207
|
+
|
208
|
+
# add root tag
|
209
|
+
root_terms = []
|
210
|
+
root_terms << TermStruct.new("", "", "", "", "OpenStudio Type", "Type of OpenStudio Object")
|
211
|
+
root_tag = TagStruct.new("", "root", "Root of the taxonomy", nil, [], root_terms)
|
212
|
+
@tag_hash[""] = root_tag
|
213
|
+
|
214
|
+
# find number of rows by parsing until hit empty value in first column
|
215
|
+
row_num = 3
|
216
|
+
while true do
|
217
|
+
term = parse_term(terms_worksheet, row_num)
|
218
|
+
if term.nil?
|
219
|
+
break
|
220
|
+
end
|
221
|
+
|
222
|
+
add_term(term)
|
223
|
+
|
224
|
+
row_num += 1
|
225
|
+
end
|
226
|
+
|
227
|
+
# sort the tag tree
|
228
|
+
sort_tag(root_tag)
|
229
|
+
|
230
|
+
# check the tag tree
|
231
|
+
check_tag(root_tag)
|
232
|
+
|
233
|
+
end
|
234
|
+
|
235
|
+
|
236
|
+
def validate_terms_header(terms_worksheet)
|
237
|
+
header_error = false
|
238
|
+
|
239
|
+
first_level = terms_worksheet.Range("A2").Value
|
240
|
+
second_level = terms_worksheet.Range("B2").Value
|
241
|
+
third_level = terms_worksheet.Range("C2").Value
|
242
|
+
level_hierarchy = terms_worksheet.Range("D2").Value
|
243
|
+
name = terms_worksheet.Range("E2").Value
|
244
|
+
abbr = terms_worksheet.Range("F2").Value
|
245
|
+
description = terms_worksheet.Range("G2").Value
|
246
|
+
|
247
|
+
header_error = true if not first_level == "First Level"
|
248
|
+
header_error = true if not second_level == "Second Level"
|
249
|
+
header_error = true if not third_level == "Third Level"
|
250
|
+
header_error = true if not level_hierarchy == "Level Hierarchy"
|
251
|
+
header_error = true if not name == "Term"
|
252
|
+
header_error = true if not abbr == "Abbr."
|
253
|
+
header_error = true if not description == "Description"
|
254
|
+
|
255
|
+
return header_error
|
256
|
+
end
|
257
|
+
|
258
|
+
def parse_term(terms_worksheet, row)
|
259
|
+
|
260
|
+
term = TermStruct.new
|
261
|
+
term.first_level = terms_worksheet.Range("A#{row}").Value
|
262
|
+
term.second_level = terms_worksheet.Range("B#{row}").Value
|
263
|
+
term.third_level = terms_worksheet.Range("C#{row}").Value
|
264
|
+
term.level_hierarchy = terms_worksheet.Range("D#{row}").Value
|
265
|
+
term.name = terms_worksheet.Range("E#{row}").Value
|
266
|
+
#term.abbr = terms_worksheet.Range("F#{row}").Value
|
267
|
+
term.description = terms_worksheet.Range("G#{row}").Value
|
268
|
+
|
269
|
+
# trigger to quit parsing the xcel doc
|
270
|
+
if term.first_level.nil? or term.first_level.empty?
|
271
|
+
return nil
|
272
|
+
end
|
273
|
+
|
274
|
+
return term
|
275
|
+
end
|
276
|
+
|
277
|
+
def add_term(term)
|
278
|
+
|
279
|
+
level_hierarchy = term.level_hierarchy
|
280
|
+
|
281
|
+
#puts "add_term called for #{level_hierarchy}"
|
282
|
+
|
283
|
+
# create the tag
|
284
|
+
tag = @tag_hash[level_hierarchy]
|
285
|
+
if tag.nil?
|
286
|
+
tag = create_tag(level_hierarchy)
|
287
|
+
end
|
288
|
+
|
289
|
+
if term.name.nil? or term.name.strip.empty?
|
290
|
+
# this row is really about the tag
|
291
|
+
tag.description = term.description
|
292
|
+
else
|
293
|
+
# this row is about a term
|
294
|
+
if not validate_term(term)
|
295
|
+
return nil
|
296
|
+
end
|
297
|
+
|
298
|
+
tag.terms = [] if tag.terms.nil?
|
299
|
+
tag.terms << term
|
300
|
+
end
|
301
|
+
end
|
302
|
+
|
303
|
+
def create_tag(level_hierarchy)
|
304
|
+
|
305
|
+
#puts "create_tag called for #{level_hierarchy}"
|
306
|
+
|
307
|
+
parts = level_hierarchy.split('.')
|
308
|
+
|
309
|
+
name = parts[-1]
|
310
|
+
parent_level = parts[0..-2].join('.')
|
311
|
+
|
312
|
+
parent_tag = @tag_hash[parent_level]
|
313
|
+
if parent_tag.nil?
|
314
|
+
parent_tag = create_tag(parent_level)
|
315
|
+
end
|
316
|
+
|
317
|
+
description = ""
|
318
|
+
child_tags = []
|
319
|
+
terms = []
|
320
|
+
tag = TagStruct.new(level_hierarchy, name, description, parent_tag, child_tags, terms)
|
321
|
+
|
322
|
+
parent_tag.child_tags << tag
|
323
|
+
|
324
|
+
@tag_hash[level_hierarchy] = tag
|
325
|
+
|
326
|
+
return tag
|
327
|
+
end
|
328
|
+
|
329
|
+
def sort_tag(tag)
|
330
|
+
tag.terms = tag.terms.sort {|x, y| x.name <=> y.name}
|
331
|
+
tag.child_tags = tag.child_tags.sort {|x, y| x.name <=> y.name}
|
332
|
+
tag.child_tags.each {|child_tag| sort_tag(child_tag) }
|
333
|
+
end
|
334
|
+
|
335
|
+
def check_tag(tag)
|
336
|
+
|
337
|
+
if tag.description.nil? or tag.description.empty?
|
338
|
+
puts "tag '#{tag.level_hierarchy}' has no description"
|
339
|
+
end
|
340
|
+
|
341
|
+
tag.terms.each {|term| check_term(term) }
|
342
|
+
tag.child_tags.each {|child_tag| check_tag(child_tag) }
|
343
|
+
end
|
344
|
+
|
345
|
+
def validate_term(term)
|
346
|
+
valid = true
|
347
|
+
|
348
|
+
parts = term.level_hierarchy.split('.')
|
349
|
+
|
350
|
+
if parts.empty?
|
351
|
+
puts "Hierarchy parts empty, #{term.level_hierarchy}"
|
352
|
+
valid = false
|
353
|
+
end
|
354
|
+
|
355
|
+
if parts.size >= 1 and not term.first_level == parts[0]
|
356
|
+
puts "First level '#{term.first_level}' does not match level hierarchy '#{term.level_hierarchy}', skipping term"
|
357
|
+
valid = false
|
358
|
+
end
|
359
|
+
|
360
|
+
if parts.size >= 2 and not term.second_level == parts[1]
|
361
|
+
puts "Second level '#{term.second_level}' does not match level hierarchy '#{term.level_hierarchy}', skipping term"
|
362
|
+
valid = false
|
363
|
+
end
|
364
|
+
|
365
|
+
if parts.size >= 3 and not term.third_level == parts[2]
|
366
|
+
puts "Third level '#{term.third_level}' does not match level hierarchy '#{term.level_hierarchy}', skipping term"
|
367
|
+
valid = false
|
368
|
+
end
|
369
|
+
|
370
|
+
if parts.size > 3
|
371
|
+
puts "Hierarchy cannot have more than 3 parts '#{term.level_hierarchy}', skipping term"
|
372
|
+
valid = false
|
373
|
+
end
|
374
|
+
|
375
|
+
# todo: check description, data type, enumerations, units, source, author
|
376
|
+
|
377
|
+
return valid
|
378
|
+
end
|
379
|
+
|
380
|
+
def check_term(term)
|
381
|
+
if term.description.nil? or term.description.empty?
|
382
|
+
puts "term '#{term.level_hierarchy}.#{term.name}' has no description"
|
383
|
+
end
|
384
|
+
end
|
385
|
+
|
386
|
+
# write a tag to xml
|
387
|
+
def write_tag_to_xml(tag, xml)
|
388
|
+
xml.tag(:name => "#{tag.name}") {
|
389
|
+
#xml.terms {
|
390
|
+
#terms = tag.terms # only direct terms
|
391
|
+
terms = get_terms(tag) # all terms, ordered by inheritence
|
392
|
+
terms.each do |term|
|
393
|
+
xml.term(:name => "#{term.name}")
|
394
|
+
end
|
395
|
+
#}
|
396
|
+
#xml.tags {
|
397
|
+
child_tags = tag.child_tags
|
398
|
+
child_tags.each do |child_tag|
|
399
|
+
write_tag_to_xml(child_tag, xml)
|
400
|
+
end
|
401
|
+
#}
|
402
|
+
}
|
403
|
+
end
|
404
|
+
|
405
|
+
end
|
406
|
+
|
407
|
+
end # module BCL
|
408
|
+
|
409
|
+
|