bcl 0.5.3 → 0.5.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,309 +1,295 @@
1
- ######################################################################
2
- # Copyright (c) 2008-2013, Alliance for Sustainable Energy.
3
- # All rights reserved.
4
- #
5
- # This library is free software; you can redistribute it and/or
6
- # modify it under the terms of the GNU Lesser General Public
7
- # License as published by the Free Software Foundation; either
8
- # version 2.1 of the License, or (at your option) any later version.
9
- #
10
- # This library is distributed in the hope that it will be useful,
11
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13
- # Lesser General Public License for more details.
14
- #
15
- # You should have received a copy of the GNU Lesser General Public
16
- # License along with this library; if not, write to the Free Software
17
- # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
- ######################################################################
19
-
20
- # Converts a custom Excel spreadsheet format to BCL components for upload
21
- # Format of the Excel spreadsheet is documented in /doc/ComponentSpreadsheet.docx
22
-
23
- if RUBY_PLATFORM =~ /mswin|mingw|cygwin/
24
- begin
25
- # apparently this is not a gem (todo: need to remove and replace with roo)
26
- require 'win32ole'
27
- mod = WIN32OLE
28
- $have_win32ole = true
29
- rescue NameError
30
- # do not have win32ole
31
- end
32
- end
33
-
34
- module BCL
35
-
36
- WorksheetStruct = Struct.new(:name, :components)
37
- HeaderStruct = Struct.new(:name, :children)
38
- ComponentStruct = Struct.new(:row, :name, :uid, :version_id, :headers, :values)
39
-
40
- class ComponentSpreadsheet
41
-
42
- public
43
-
44
- # WINDOWS ONLY SECTION BECAUSE THIS USES WIN32OLE
45
- if $have_win32ole
46
-
47
- #initialize with Excel spreadsheet to read
48
- def initialize(xlsx_path, worksheet_names =["all"])
49
-
50
- @xlsx_path = Pathname.new(xlsx_path).realpath.to_s
51
- @worksheets = []
52
-
53
- begin
54
-
55
- excel = WIN32OLE::new('Excel.Application')
56
-
57
- xlsx = excel.Workbooks.Open(@xlsx_path)
58
-
59
- #by default, operate on all worksheets
60
- if worksheet_names == ["all"]
61
- xlsx.Worksheets.each do |xlsx_worksheet|
62
- parse_xlsx_worksheet(xlsx_worksheet)
63
- end
64
- else #if specific worksheets are specified, operate on them
65
- worksheet_names.each do |worksheet_name|
66
- parse_xlsx_worksheet(xlsx.Worksheets(worksheet_name))
67
- end
68
- end
69
-
70
- #save spreadsheet if changes have been made
71
- if xlsx.saved == true
72
- #puts "[ComponentSpreadsheet] Spreadsheet unchanged; not saving"
73
- else
74
- xlsx.Save
75
- puts "[ComponentSpreadsheet] Spreadsheet changes saved"
76
- end
77
-
78
- ensure
79
-
80
- excel.Quit
81
- WIN32OLE.ole_free(excel)
82
- excel.ole_free
83
- xlsx=nil
84
- excel=nil
85
- GC.start
86
-
87
- end
88
-
89
- end
90
-
91
- else # if $have_win32ole
92
-
93
- # parse the master taxonomy document
94
- def initialize(xlsx_path)
95
- puts "ComponentSpreadsheet class requires 'win32ole' to parse the component spreadsheet."
96
- puts "ComponentSpreadsheet may also be stored and loaded from JSON if your platform does not support win32ole."
97
- end
98
-
99
- end # if $have_win32ole
100
-
101
- def save(save_path, chunk_size = 1000, delete_old_gather = false)
102
-
103
- # load master taxonomy to validate components
104
- taxonomy = BCL::MasterTaxonomy.new
105
-
106
- #FileUtils.rm_rf(save_path) if File.exists?(save_path) and File.directory?(save_path)
107
-
108
- @worksheets.each do |worksheet|
109
- worksheet.components.each do |component|
110
-
111
- component_xml = Component.new("#{save_path}/components")
112
- component_xml.name = component.name
113
- component_xml.uid = component.uid
114
- component_xml.comp_version_id = component.version_id
115
-
116
- # this tag is how we know where this goes in the taxonomy
117
- component_xml.add_tag(worksheet.name)
118
-
119
- values = component.values[0]
120
- component.headers.each do |header|
121
-
122
- if /description/i.match(header.name)
123
-
124
- name = values.delete_at(0)
125
- uid = values.delete_at(0)
126
- version_id = values.delete_at(0)
127
- description = values.delete_at(0)
128
- fidelity_level = values.delete_at(0).to_int
129
- # name, uid, and version_id already processed
130
- component_xml.description = description
131
- component_xml.fidelity_level = fidelity_level
132
-
133
- elsif /provenance/i.match(header.name)
134
-
135
- author = values.delete_at(0)
136
- datetime = values.delete_at(0)
137
- if datetime.nil?
138
- #puts "[ComponentSpreadsheet] WARNING missing the date in the datetime column in the spreadsheet - assuming today"
139
- datetime = DateTime.new
140
- else
141
- datetime = DateTime.parse(datetime)
142
- end
143
-
144
- comment = values.delete_at(0)
145
- component_xml.add_provenance(author.to_s, datetime.to_s, comment.to_s)
146
-
147
- elsif /tag/i.match(header.name)
148
-
149
- value = values.delete_at(0)
150
- component_xml.add_tag(value)
151
-
152
- elsif /attribute/i.match(header.name)
153
-
154
- value = values.delete_at(0)
155
- name = header.children[0]
156
- units = ""
157
- if match_data = /(.*)\((.*)\)/.match(name)
158
- name = match_data[1].strip
159
- units = match_data[2].strip
160
- end
161
- component_xml.add_attribute(name, value, units)
162
-
163
- elsif /source/i.match(header.name)
164
-
165
- manufacturer = values.delete_at(0)
166
- model = values.delete_at(0)
167
- serial_no = values.delete_at(0)
168
- year = values.delete_at(0)
169
- url = values.delete_at(0)
170
- component_xml.source_manufacturer = manufacturer
171
- component_xml.source_model = model
172
- component_xml.source_serial_no = serial_no
173
- component_xml.source_year = year
174
- component_xml.source_url = url
175
-
176
- elsif /file/i.match(header.name)
177
-
178
- software_program = values.delete_at(0)
179
- version = values.delete_at(0)
180
- filename = values.delete_at(0)
181
- filetype = values.delete_at(0)
182
- filepath = values.delete_at(0)
183
- #not all components(rows) have all files; skip if filename "" or nil
184
- next if filename == "" or filename == nil
185
- #skip the file if it doesn't exist at the specified location
186
- if not File.exists?(filepath)
187
- puts "[ComponentSpreadsheet] ERROR #{filepath} -> File does not exist, will not be included in component xml"
188
- next #go to the next file
189
- end
190
- component_xml.add_file(software_program, version, filepath, filename, filetype)
191
-
192
- else
193
- raise "Unknown section #{header.name}"
194
-
195
- end
196
-
197
- end
198
-
199
- taxonomy.check_component(component_xml)
200
-
201
- component_xml.save_tar_gz(false)
202
-
203
- end
204
-
205
- end
206
-
207
- BCL.gather_components(save_path, chunk_size, delete_old_gather)
208
-
209
- end
210
-
211
- private
212
-
213
- def parse_xlsx_worksheet(xlsx_worksheet)
214
-
215
- worksheet = WorksheetStruct.new
216
- worksheet.name = xlsx_worksheet.Range("A1").Value
217
- worksheet.components = []
218
- puts "[ComponentSpreadsheet] Starting parsing components of type #{worksheet.name}"
219
-
220
- # find number of rows, first column should be name, should not be empty
221
- num_rows = 1
222
- while true do
223
- test = xlsx_worksheet.Range("A#{num_rows}").Value
224
- if test.nil? or test.empty?
225
- num_rows -= 1
226
- break
227
- end
228
- num_rows += 1
229
- end
230
-
231
- # scan number of columns
232
- headers = []
233
- header = nil
234
- max_col = nil
235
- xlsx_worksheet.Columns.each do |col|
236
- value1 = col.Rows("1").Value
237
- value2 = col.Rows("2").Value
238
-
239
- if not value1.nil? and not value1.empty?
240
- if not header.nil?
241
- headers << header
242
- end
243
- header = HeaderStruct.new
244
- header.name = value1
245
- header.children = []
246
- end
247
-
248
- if not value2.nil? and not value2.empty?
249
- if not header.nil?
250
- header.children << value2
251
- end
252
- end
253
-
254
- if (value1.nil? or value1.empty?) and (value2.nil? or value2.empty?)
255
- break
256
- end
257
-
258
- matchdata = /^\$(.+):/.match(col.Address)
259
- max_col = matchdata[1]
260
- end
261
-
262
- if not header.nil?
263
- headers << header
264
- end
265
-
266
- if not headers.empty?
267
- headers[0].name = "description"
268
- end
269
-
270
- puts " Found #{num_rows - 2} components"
271
-
272
- components = []
273
- for i in 3..num_rows do
274
- component = ComponentStruct.new
275
- component.row = i
276
-
277
- # get name
278
- component.name = xlsx_worksheet.Range("A#{i}").value
279
-
280
- # get uid, if empty set it
281
- component.uid = xlsx_worksheet.Range("B#{i}").value
282
- if component.uid.nil? or component.uid.empty?
283
- component.uid = UUID.new.generate
284
- puts "#{component.name} uid missing; creating new one"
285
- xlsx_worksheet.Range("B#{i}").value = component.uid
286
- end
287
-
288
- # get version_id, if empty set it
289
- component.version_id = xlsx_worksheet.Range("C#{i}").value
290
- if component.version_id.nil? or component.version_id.empty?
291
- component.version_id = UUID.new.generate
292
- puts "#{component.name} version id missing; creating new one"
293
- xlsx_worksheet.Range("C#{i}").value = component.version_id
294
- end
295
-
296
- component.headers = headers
297
- component.values = xlsx_worksheet.Range("A#{i}:#{max_col}#{i}").value
298
- worksheet.components << component
299
- end
300
-
301
- @worksheets << worksheet
302
-
303
- puts "[ComponentSpreadsheet] Finished parsing components of type #{worksheet.name}"
304
-
305
- end
306
-
307
- end
308
-
309
- end # module BCL
1
+ ######################################################################
2
+ # Copyright (c) 2008-2014, Alliance for Sustainable Energy.
3
+ # All rights reserved.
4
+ #
5
+ # This library is free software; you can redistribute it and/or
6
+ # modify it under the terms of the GNU Lesser General Public
7
+ # License as published by the Free Software Foundation; either
8
+ # version 2.1 of the License, or (at your option) any later version.
9
+ #
10
+ # This library is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13
+ # Lesser General Public License for more details.
14
+ #
15
+ # You should have received a copy of the GNU Lesser General Public
16
+ # License along with this library; if not, write to the Free Software
17
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
+ ######################################################################
19
+
20
+ # Converts a custom Excel spreadsheet format to BCL components for upload
21
+ # Format of the Excel spreadsheet is documented in /doc/ComponentSpreadsheet.docx
22
+
23
+ if RUBY_PLATFORM =~ /mswin|mingw|cygwin/
24
+ begin
25
+ # apparently this is not a gem (todo: need to remove and replace with roo)
26
+ require 'win32ole'
27
+ mod = WIN32OLE
28
+ $have_win32ole = true
29
+ rescue NameError
30
+ # do not have win32ole
31
+ end
32
+ end
33
+
34
+ module BCL
35
+ class ComponentSpreadsheet
36
+ public
37
+
38
+ # WINDOWS ONLY SECTION BECAUSE THIS USES WIN32OLE
39
+ if $have_win32ole
40
+
41
+ # initialize with Excel spreadsheet to read
42
+ def initialize(xlsx_path, worksheet_names = ['all'])
43
+ @xlsx_path = Pathname.new(xlsx_path).realpath.to_s
44
+ @worksheets = []
45
+
46
+ begin
47
+
48
+ excel = WIN32OLE.new('Excel.Application')
49
+
50
+ xlsx = excel.Workbooks.Open(@xlsx_path)
51
+
52
+ # by default, operate on all worksheets
53
+ if worksheet_names == ['all']
54
+ xlsx.Worksheets.each do |xlsx_worksheet|
55
+ parse_xlsx_worksheet(xlsx_worksheet)
56
+ end
57
+ else # if specific worksheets are specified, operate on them
58
+ worksheet_names.each do |worksheet_name|
59
+ parse_xlsx_worksheet(xlsx.Worksheets(worksheet_name))
60
+ end
61
+ end
62
+
63
+ # save spreadsheet if changes have been made
64
+ if xlsx.saved == true
65
+ # puts "[ComponentSpreadsheet] Spreadsheet unchanged; not saving"
66
+ else
67
+ xlsx.Save
68
+ puts '[ComponentSpreadsheet] Spreadsheet changes saved'
69
+ end
70
+
71
+ ensure
72
+
73
+ excel.Quit
74
+ WIN32OLE.ole_free(excel)
75
+ excel.ole_free
76
+ xlsx = nil
77
+ excel = nil
78
+ GC.start
79
+
80
+ end
81
+ end
82
+
83
+ else # if $have_win32ole
84
+
85
+ # parse the master taxonomy document
86
+ def initialize(_xlsx_path)
87
+ puts "ComponentSpreadsheet class requires 'win32ole' to parse the component spreadsheet."
88
+ puts 'ComponentSpreadsheet may also be stored and loaded from JSON if your platform does not support win32ole.'
89
+ end
90
+
91
+ end # if $have_win32ole
92
+
93
+ def save(save_path, chunk_size = 1000, delete_old_gather = false)
94
+ # load master taxonomy to validate components
95
+ taxonomy = BCL::MasterTaxonomy.new
96
+
97
+ # FileUtils.rm_rf(save_path) if File.exists?(save_path) and File.directory?(save_path)
98
+
99
+ @worksheets.each do |worksheet|
100
+ worksheet.components.each do |component|
101
+
102
+ component_xml = Component.new("#{save_path}/components")
103
+ component_xml.name = component.name
104
+ component_xml.uid = component.uid
105
+ component_xml.comp_version_id = component.version_id
106
+
107
+ # this tag is how we know where this goes in the taxonomy
108
+ component_xml.add_tag(worksheet.name)
109
+
110
+ values = component.values[0]
111
+ component.headers.each do |header|
112
+
113
+ if /description/i.match(header.name)
114
+
115
+ name = values.delete_at(0)
116
+ uid = values.delete_at(0)
117
+ version_id = values.delete_at(0)
118
+ description = values.delete_at(0)
119
+ fidelity_level = values.delete_at(0).to_int
120
+ # name, uid, and version_id already processed
121
+ component_xml.description = description
122
+ component_xml.fidelity_level = fidelity_level
123
+
124
+ elsif /provenance/i.match(header.name)
125
+
126
+ author = values.delete_at(0)
127
+ datetime = values.delete_at(0)
128
+ if datetime.nil?
129
+ # puts "[ComponentSpreadsheet] WARNING missing the date in the datetime column in the spreadsheet - assuming today"
130
+ datetime = DateTime.new
131
+ else
132
+ datetime = DateTime.parse(datetime)
133
+ end
134
+
135
+ comment = values.delete_at(0)
136
+ component_xml.add_provenance(author.to_s, datetime.to_s, comment.to_s)
137
+
138
+ elsif /tag/i.match(header.name)
139
+
140
+ value = values.delete_at(0)
141
+ component_xml.add_tag(value)
142
+
143
+ elsif /attribute/i.match(header.name)
144
+
145
+ value = values.delete_at(0)
146
+ name = header.children[0]
147
+ units = ''
148
+ if match_data = /(.*)\((.*)\)/.match(name)
149
+ name = match_data[1].strip
150
+ units = match_data[2].strip
151
+ end
152
+ component_xml.add_attribute(name, value, units)
153
+
154
+ elsif /source/i.match(header.name)
155
+
156
+ manufacturer = values.delete_at(0)
157
+ model = values.delete_at(0)
158
+ serial_no = values.delete_at(0)
159
+ year = values.delete_at(0)
160
+ url = values.delete_at(0)
161
+ component_xml.source_manufacturer = manufacturer
162
+ component_xml.source_model = model
163
+ component_xml.source_serial_no = serial_no
164
+ component_xml.source_year = year
165
+ component_xml.source_url = url
166
+
167
+ elsif /file/i.match(header.name)
168
+
169
+ software_program = values.delete_at(0)
170
+ version = values.delete_at(0)
171
+ filename = values.delete_at(0)
172
+ filetype = values.delete_at(0)
173
+ filepath = values.delete_at(0)
174
+ # not all components(rows) have all files; skip if filename "" or nil
175
+ next if filename == '' or filename.nil?
176
+ # skip the file if it doesn't exist at the specified location
177
+ unless File.exist?(filepath)
178
+ puts "[ComponentSpreadsheet] ERROR #{filepath} -> File does not exist, will not be included in component xml"
179
+ next # go to the next file
180
+ end
181
+ component_xml.add_file(software_program, version, filepath, filename, filetype)
182
+
183
+ else
184
+ fail "Unknown section #{header.name}"
185
+
186
+ end
187
+
188
+ end
189
+
190
+ taxonomy.check_component(component_xml)
191
+
192
+ component_xml.save_tar_gz(false)
193
+
194
+ end
195
+
196
+ end
197
+
198
+ BCL.gather_components(save_path, chunk_size, delete_old_gather)
199
+ end
200
+
201
+ private
202
+
203
+ def parse_xlsx_worksheet(xlsx_worksheet)
204
+ worksheet = WorksheetStruct.new
205
+ worksheet.name = xlsx_worksheet.Range('A1').Value
206
+ worksheet.components = []
207
+ puts "[ComponentSpreadsheet] Starting parsing components of type #{worksheet.name}"
208
+
209
+ # find number of rows, first column should be name, should not be empty
210
+ num_rows = 1
211
+ while true
212
+ test = xlsx_worksheet.Range("A#{num_rows}").Value
213
+ if test.nil? or test.empty?
214
+ num_rows -= 1
215
+ break
216
+ end
217
+ num_rows += 1
218
+ end
219
+
220
+ # scan number of columns
221
+ headers = []
222
+ header = nil
223
+ max_col = nil
224
+ xlsx_worksheet.Columns.each do |col|
225
+ value1 = col.Rows('1').Value
226
+ value2 = col.Rows('2').Value
227
+
228
+ if not value1.nil? and not value1.empty?
229
+ unless header.nil?
230
+ headers << header
231
+ end
232
+ header = HeaderStruct.new
233
+ header.name = value1
234
+ header.children = []
235
+ end
236
+
237
+ if not value2.nil? and not value2.empty?
238
+ unless header.nil?
239
+ header.children << value2
240
+ end
241
+ end
242
+
243
+ if (value1.nil? or value1.empty?) and (value2.nil? or value2.empty?)
244
+ break
245
+ end
246
+
247
+ matchdata = /^\$(.+):/.match(col.Address)
248
+ max_col = matchdata[1]
249
+ end
250
+
251
+ unless header.nil?
252
+ headers << header
253
+ end
254
+
255
+ unless headers.empty?
256
+ headers[0].name = 'description'
257
+ end
258
+
259
+ puts " Found #{num_rows - 2} components"
260
+
261
+ components = []
262
+ for i in 3..num_rows do
263
+ component = ComponentStruct.new
264
+ component.row = i
265
+
266
+ # get name
267
+ component.name = xlsx_worksheet.Range("A#{i}").value
268
+
269
+ # get uid, if empty set it
270
+ component.uid = xlsx_worksheet.Range("B#{i}").value
271
+ if component.uid.nil? or component.uid.empty?
272
+ component.uid = UUID.new.generate
273
+ puts "#{component.name} uid missing; creating new one"
274
+ xlsx_worksheet.Range("B#{i}").value = component.uid
275
+ end
276
+
277
+ # get version_id, if empty set it
278
+ component.version_id = xlsx_worksheet.Range("C#{i}").value
279
+ if component.version_id.nil? or component.version_id.empty?
280
+ component.version_id = UUID.new.generate
281
+ puts "#{component.name} version id missing; creating new one"
282
+ xlsx_worksheet.Range("C#{i}").value = component.version_id
283
+ end
284
+
285
+ component.headers = headers
286
+ component.values = xlsx_worksheet.Range("A#{i}:#{max_col}#{i}").value
287
+ worksheet.components << component
288
+ end
289
+
290
+ @worksheets << worksheet
291
+
292
+ puts "[ComponentSpreadsheet] Finished parsing components of type #{worksheet.name}"
293
+ end
294
+ end
295
+ end # module BCL