twb 1.0.5 → 1.9.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,508 +0,0 @@
1
- # calculatedfieldsanalyzer.rb - this Ruby script Copyright 2017 Christopher Gerrard
2
- #
3
- # This program is free software: you can redistribute it and/or modify
4
- # it under the terms of the GNU General Public License as published by
5
- # the Free Software Foundation, either version 3 of the License, or
6
- # (at your option) any later version.
7
- #
8
- # This program is distributed in the hope that it will be useful,
9
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
- # GNU General Public License for more details.
12
- #
13
- # You should have received a copy of the GNU General Public License
14
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
15
-
16
- require 'nokogiri'
17
- require 'twb'
18
- require 'set'
19
- require 'csv'
20
- require 'logger'
21
-
22
- module Twb
23
- module Analysis
24
-
25
- class CalculatedFieldsAnalyzer
26
-
27
- attr_reader :calculatedFieldsCount, :formulaFieldsCount
28
-
29
- @@ttlogfile = 'CalculatedFieldsAnalyzer.ttlog'
30
- @@gvDotLocation = 'C:\\tech\\graphviz\\Graphviz2.38\\bin\\dot.exe'
31
- @@processName = '.CalculatedFields'
32
-
33
- @@calcFieldsCSVFileName = 'TwbCalculatedFields.csv'
34
- @@calcFieldsCSVFileHeader = ['Record #',
35
- 'Workbook', 'Workbook Dir',
36
- 'Data Source', 'Data Source Caption', 'Data Source Name (tech)',
37
- 'Field Name', 'Field Caption', 'Field Name (tech)',
38
- 'Data Source + Field Name (tech)',
39
- 'Data Type', 'Role', 'Type',
40
- 'Class',
41
- 'Scope Isolation',
42
- 'Formula Length',
43
- 'Formula Code',
44
- 'Formula',
45
- 'Formula Comments',
46
- 'Formula LOD?'
47
- ]
48
-
49
- @@formFieldsCSVFileName = 'TwbFormulaFields.csv'
50
- @@formFieldsCSVFileHeader = ['Rec #',
51
- 'Workbook', 'Workbook Dir',
52
- 'Data Source',
53
- 'Field - Calculated',
54
- 'Data Source - Formula (tech)',
55
- 'Data Source - Formula',
56
- 'Field - Formula (tech)',
57
- 'Field - Formula',
58
- 'Data Source + Field - Calculated',
59
- 'Table'
60
- ]
61
-
62
- @techUINames = {}
63
- @fieldTables = {}
64
-
65
-
66
- @@dotHeader = <<DOTHEADER
67
- digraph g {
68
- graph [rankdir="LR" splines=line];
69
- node [shape="box" width="2"];
70
-
71
- DOTHEADER
72
-
73
- def initialize
74
- #-- Logging setup --
75
- @logger = Logger.new(@@ttlogfile)
76
- @logger.level = Logger::DEBUG
77
- #-- CSV files setup --
78
- @calcFieldsCSVFile = CSV.open(@@calcFieldsCSVFileName,'w')
79
- @calcFieldsCSVFile << @@calcFieldsCSVFileHeader
80
- # --
81
- @formFieldsCSVFile = CSV.open(@@formFieldsCSVFileName ,'w')
82
- @formFieldsCSVFile << @@formFieldsCSVFileHeader
83
- #-- Counters setup --
84
- @twbCount = 0
85
- @calculatedFieldsCount = 0
86
- @formulaFieldsCount = 0
87
- # --
88
- @referencedFields = SortedSet.new
89
- # --
90
- @localEmit = false
91
- emit "\n\nLogging activity to: #{File.basename(@@ttlogfile)}"
92
- @imageFiles = []
93
- end
94
-
95
- def loadFieldTables dataSource
96
- emit "FIELD TABLES"
97
- @records = CSV.read('C:\Professional\Clients\Incapsulate\Internal Project Monitoring\Project Portfolio v2\Salesforce Fields.csv')
98
- @records.each do |rec|
99
- emit "-- #{rec}"
100
- m = {}
101
- m['table'] = rec[1]
102
- m['dbFieldName'] = rec[2]
103
- @fieldTables[rec[0]] = m
104
- end
105
- emit "=========="
106
- emit @fieldTables
107
- emit "=========="
108
- emit "FIELD TABLES"
109
- end
110
-
111
- def processTWB twbWithDir
112
- twb = File.basename(twbWithDir)
113
- @twb = Twb::Workbook.new twbWithDir
114
- emit "- Workbook: #{twbWithDir}"
115
- emit " version: #{@twb.version}"
116
- return if twbWithDir.end_with? == "Tableau Calculated Fields Analyses.twb"
117
- twbDir = File.dirname(File.expand_path(twbWithDir))
118
- edges = Set.new
119
- # -- processing
120
- dss = @twb.datasources
121
- twbRootFields = Set.new
122
- dss.each do |ds|
123
- emit "Datasource: '#{ds.uiname}' -> #{ds.Parameters?}"
124
- next if ds.Parameters? # don't process the Parameters data source
125
- # it requires special handling, has different XML structure
126
- #-- For tracking unreferenced (root) calculated fields = calculatedFields - referencedFields
127
- calculatedFields = SortedSet.new
128
- referencedFields = SortedSet.new
129
- #--
130
- dsTechName = ds.name
131
- dsCaption = ds.caption
132
- dsName = ds.uiname
133
- dsID = dsTechName + ':::' + dsName
134
- emit "\n\n "
135
- emit "======================================================"
136
- emit "======================================================"
137
- emit "======= DATA SOURCE: #{ds.uiname} ====== "
138
- emit "======================================================"
139
- emit "======================================================\n\n "
140
- dsGraphNode = Twb::Util::Graphnode.new(name: dsName, id: dsID, type: :TwbDataConnection, properties: {workbook: twbWithDir})
141
- emit "\t dsgnode: #{dsGraphNode}"
142
- fieldUINames = ds.fieldUINames
143
- calculationNodes = ds.calculatedFields
144
- emit "calculationNodes : nil? '#{calculationNodes.nil?}'" # - len '#{calculationNodes.length}'"
145
- calculationNodes.each do |calcNode|
146
- calculation = Twb::FieldCalculation.new(calcNode, ds)
147
- emit "HANDLING CALCULATION NODE:"
148
- emit calcNode.attributes
149
- #-- field names --
150
- fldCaption, = calcNode.xpath('../@caption').text
151
- fldTechName = calcNode.xpath('../@name').text.gsub(/^\[/,'').gsub(/\]$/,'')
152
- fldName = if fldCaption == ''
153
- then fldTechName
154
- else fldCaption
155
- end
156
- emit "\t Field : #{fldName}"
157
- emit "\t Formula : #{calcNode.attribute('formula')}"
158
- dataType = calcNode.xpath('../@datatype').text
159
- role = calcNode.xpath('../@role').text
160
- type = calcNode.xpath('../@type').text
161
- fieldID = fldTechName+'::'+dsName
162
- calculatedFields.add fieldID
163
- srcGraphNode = Twb::Util::Graphnode.new(name: fldName, id: fieldID, type: :CalculatedField, properties: {:DataSource => dsName})
164
- dsFieldEdge = Twb::Util::Graphedge.new(from: dsGraphNode, to: srcGraphNode, relationship: 'contains')
165
- edges.add dsFieldEdge
166
- hasFormula = calcNode.has_attribute?('formula')
167
- if hasFormula
168
- formulaText = calcNode.attribute('formula').text
169
- emit "\t Formula: #{formulaText}"
170
- #-- field attributes --
171
- # fldDispLabel = "#{fldName}\n--\n#{formulaText.gsub('"', "'")}" #fldName + '\n--' + formulaText.to_s
172
- emit "\t srfnode: #{srcGraphNode} "
173
- emit "\t dsfedge: #{dsFieldEdge} "
174
- emit " "
175
- emit "\tFIELD cap: #{fldCaption} "
176
- emit "\t name: #{fldTechName} "
177
- emit "\t uiname: #{fldName} "
178
- emit "\t------------------------------------------------------------"
179
- #-- calculation --
180
- formulaFlat = formulaText.gsub(/\r\n/, ' ## ').gsub(/\n/, ' ## ').gsub(/[ ]+/,' ')
181
- formulaFlatFlat = formulaFlat.upcase
182
- formulaLOD = formulaFlatFlat.include?('{FIXED') || formulaFlatFlat.include?('{INCLUDE') || formulaFlatFlat.include?('{EXCLUDE')
183
- formulaLength = formulaText.length
184
- emit "\tFORMULA TEXT: #{formulaText} "
185
- emit "\t FLAT: #{formulaFlat}"
186
- emit "\t------------------------------------------------------------"
187
- comments = calculation.comments # getComments( formulaText )
188
- calcClass = calculation.class # d.xpath('./@class').text
189
- scopeIsolation = calcNode.xpath('./@scope-isolation').text
190
- # -- resolved fields: {internal field name => datasource}
191
- # -- datasource is only present for fields located in other data sources
192
- resolvedFields = calculation.resolvedFields
193
- # prepare UI formula, replacing technical field names with their UI forms
194
- uiFormula = formulaFlat.gsub(' XX ',' ')
195
- resolvedFields.each do |rf|
196
- emit "\tRESOLVED FLD: #{rf.inspect}"
197
- calcFieldName = rf[:field]
198
- if rf[:source].nil?
199
- calcFieldRef = "[%s]" % [ calcFieldName ]
200
- dispFieldRef = "[%s]" % [ ds.fieldUIName(calcFieldName) ]
201
- else
202
- remoteDS = @twb.datasource(rf[:source])
203
- remoteDSName = remoteDS.uiname
204
- remoteDSFld = remoteDS.fieldUIName(calcFieldName)
205
- calcFieldRef = "[%s].[%s]" % [ rf[:source], calcFieldName ]
206
- dispFieldRef = "[%s].[%s]" % [ remoteDSName, remoteDSFld ]
207
- end
208
- emit "\tcalcFieldRef: #{calcFieldRef}"
209
- emit "\tdispFieldRef: #{dispFieldRef}"
210
- uiFormula = uiFormula.gsub(calcFieldRef, dispFieldRef)
211
- end
212
- emit "\t FLAT: #{formulaFlat}"
213
- emit "\t Resolved: #{uiFormula}\n\t--"
214
- @calcFieldsCSVFile << [
215
- @calculatedFieldsCount += 1,
216
- twb, twbDir,
217
- dsName, dsCaption, dsTechName,
218
- fldName, fldCaption, fldTechName,
219
- dsTechName + '::' + fldTechName,
220
- dataType, role, type,
221
- calcClass, scopeIsolation,
222
- formulaLength, formulaFlat, uiFormula,
223
- comments,
224
- formulaLOD
225
- ]
226
- resolvedFields.each do |rf|
227
- emit "\t\t res field : #{rf[:field]} "
228
- emit "\t\t res source: #{rf[:source]}"
229
- calcFieldName = rf[:field]
230
- calcDataSource = rf[:source]
231
- localDataSource = rf[:source].nil? # if there isn't a rf[:source] value
232
- # the field is from this data source
233
- # else the field is from an alien data source (in the same workbook)
234
- refDataSource = localDataSource ? ds : @twb.datasource(calcDataSource)
235
- dispFieldName = refDataSource.fieldUIName(calcFieldName)
236
- calcFieldTable = refDataSource.fieldTable(calcFieldName)
237
- emit "\t\t calc field : #{dispFieldName} nil?<#{dispFieldName.nil?}>"
238
- emit "\t\t data source: #{refDataSource.uiname}"
239
- emit "\t\t table: #{calcFieldTable} nil?<#{calcFieldTable.nil?}>"
240
- properties = {'DataSource' => dsName, 'DataSourceReference' => 'local'}
241
- if dispFieldName.nil?
242
- dispFieldName = "<#{calcFieldName}>::<#{calcDataSource}> UNDEFINED"
243
- properties['status'] = 'UNDEFINED'
244
- end
245
- calcFieldID = "#{calcFieldName}::#{refDataSource.uiname}"
246
- if !localDataSource
247
- calcFieldID = "#{calcFieldName}:LDS:#{ds.uiname}:RDS:#{refDataSource.uiname}"
248
- properties['DataSourceReference'] = 'remote'
249
- end
250
- calcFieldTable = ds.fieldTable(calcFieldName)
251
- calcFieldType = calcFieldTable.nil? ? :CalculatedField : :DatabaseField
252
- calcFieldNode = Twb::Util::Graphnode.new(name: dispFieldName, id: calcFieldID, type: calcFieldType, properties: properties)
253
- fieldFieldEdge = Twb::Util::Graphedge.new(from: srcGraphNode, to: calcFieldNode, relationship: 'references')
254
- edges.add fieldFieldEdge
255
- referencedFields.add calcFieldID
256
- # @formulaFieldsCount+=1
257
- emit "\t\t calcFieldNode: #{calcFieldNode}"
258
- emit "\t\t graphEdge: #{fieldFieldEdge}"
259
- fldToDsNode = calcFieldNode
260
- if !calcFieldTable.nil?
261
- tableID = calcFieldTable + ':::' + ds.uiname
262
- tableName = "-[#{calcFieldTable}]-"
263
- tableNode = Twb::Util::Graphnode.new(name: tableName, id: tableID, type: :DBTable, properties: properties)
264
- fieldFieldEdge = Twb::Util::Graphedge.new(from: calcFieldNode, to: tableNode, relationship: 'is a field in')
265
- edges.add fieldFieldEdge
266
- fldToDsNode = tableNode
267
- end
268
- if !localDataSource
269
- alienDSNode = Twb::Util::Graphnode.new( name: '==>' + refDataSource.uiname,
270
- id: "#{ds.uiname}::::=>#{refDataSource.uiname}",
271
- type: :DBTable,
272
- properties: {'Home Source' => dsName, 'Remote Source' => refDataSource.uiname}
273
- )
274
- fieldFieldEdge = Twb::Util::Graphedge.new(from: fldToDsNode, to: alienDSNode, relationship: 'In Remote Data Source')
275
- edges.add fieldFieldEdge
276
- end
277
- @formFieldsCSVFile << [ @formulaFieldsCount+=1,
278
- twb,
279
- twbDir,
280
- dsName,
281
- fldName,
282
- refDataSource.name,
283
- refDataSource.uiname,
284
- calcFieldName,
285
- dispFieldName,
286
- dsName + '::' + dispFieldName,
287
- 'fieldTable'
288
- ]
289
- end
290
- end # if hasFormula
291
- end # calculationNodes.each
292
- dsRootFields = calculatedFields - referencedFields
293
- @referencedFields.merge referencedFields
294
- #--
295
- emit "--\nCalculated Fields\n-----------------"
296
- calculatedFields.each { |f| emit f }
297
- emit "--\nReferenced Fields\n-----------------"
298
- referencedFields.each { |f| emit f }
299
- emit "--\nDS Root Fields\n-----------------"
300
- dsRootFields.each { |f| emit f }
301
- emit "--"
302
- # --
303
- twbRootFields.merge dsRootFields
304
- end # dss.each
305
- @twbCount += 1
306
- mapTwb twb, edges, twbRootFields
307
- graphEdges twb, edges
308
- emit "#######################"
309
- return @imageFiles
310
- end
311
-
312
-
313
- def mapTwb twb, edges, rootFields
314
- dotFile = initDot twb
315
- dotFileName = File.basename dotFile
316
- dotFile.puts "\n // subgraph cluster_1 {"
317
- dotFile.puts " // color= grey;"
318
- dotFile.puts ""
319
- edgesAsStrings = SortedSet.new
320
- # this two step process coalesces the edges into a unique set, avoiding duplicating the dot
321
- # file entries, and can be shrunk when graph edges expose the bits necessary for management by Set
322
- emit "\n========================\nLoading Edges\n========================\n From DC? Referenced? Edge \n %s %s %s" % ['--------', '-----------', '-'*45]
323
- edges.each do |e|
324
- # don't want to emit edge which is from a Data Connection to a
325
- # Calculated Field which is also referenced by another calculated field
326
- isFromDC = e.from.type == :TwbDataConnection
327
- isRefField = @referencedFields.include?(e.to.id)
328
- edgesAsStrings.add(e.dot) unless isFromDC && isRefField
329
- end
330
- emit "------------------------\n "
331
- edgesAsStrings.each do |es|
332
- dotFile.puts " #{es}"
333
- emit " #{es}"
334
- end
335
- emit "========================\n "
336
- dotFile.puts ""
337
- dotFile.puts " // }"
338
- dotFile.puts "\n\n // 4--------------------------------------------------------------------"
339
- # "table::JIRA_HARVEST_Correspondence__c::Jira" [label="JIRA_HARVEST_Correspondence__c"]
340
- nodes = SortedSet.new
341
- edges.each do |e|
342
- nodes.add e.from.dotLabel
343
- nodes.add e.to.dotLabel
344
- end
345
- nodes.each do |n|
346
- dotFile.puts n
347
- end
348
- dotFile.puts "\n\n // 5--------------------------------------------------------------------"
349
- emitTypes( edges, dotFile )
350
- rankRootFields( dotFile, rootFields )
351
- closeDot( dotFile, twb )
352
- # renderPng(twb.name,dotFileName)
353
- # renderPdf(twb.name,dotFileName)
354
- renderDot(twb,dotFileName,'pdf')
355
- renderDot(twb,dotFileName,'png')
356
- renderDot(twb,dotFileName,'svg')
357
- emitEdges edges
358
- end
359
-
360
-
361
- def graphEdges twb, edges
362
- graphFile = File.new(twb + '.cypher', 'w')
363
- # graphFile.puts "OKEY DOKE, graphing away"
364
- cypherCode = Set.new
365
- edges.each do |edge|
366
- cypherCode.add edge.from.cypherCreate
367
- cypherCode.add edge.to.cypherCreate
368
- cypherCode.add edge.cypherCreate
369
- end
370
- cypherCode.each do |cc|
371
- graphFile.puts cc
372
- end
373
- graphFile.puts "\nreturn *"
374
- graphFile.close unless graphFile.nil?
375
- @imageFiles << File.basename(graphFile)
376
- end
377
-
378
- def emitEdges edges
379
- emit " %-15s %s" % ['type', 'Edge']
380
- emit " %-15s %s" % ['-'*15, '-'*35]
381
- edges.each do |edge|
382
- emit " %-15s %s" % [edge.from.type, edge.from]
383
- emit " %-15s %s" % [edge.to.type, edge.to]
384
- emit "\n "
385
- end
386
- end
387
-
388
- def emitTypes edges, dotFile
389
- typedNodes = {}
390
- dotFile.puts "\n\n // 2--------------------------------------------------------------------"
391
- edges.each do |edge|
392
- emit " EDGE :: #{edge}"
393
- loadNodeType typedNodes, edge.from
394
- loadNodeType typedNodes, edge.to
395
- end
396
- typedNodes.each do |type, nodes|
397
- emit "+++++++++ typedNodes of '#{type}'' "
398
- nodes.each do |node|
399
- emit " -n- #{node}"
400
- end
401
- rankSame(dotFile, type, nodes) unless type == :CalculatedField
402
- end
403
- # labelTypes dotFile, edges
404
- end
405
-
406
- def loadNodeType set, node
407
- type = node.type
408
- set[type] = Set.new unless set.include? type
409
- set[type].add node
410
- end
411
-
412
- def rankSame dotFile, type, nodes
413
- dotFile.puts "\n // '#{type}' --------------------------------------------------------------------"
414
- dotFile.puts "\n {rank=same "
415
- # dotFile.puts " \"#{type}\" [shape=\"box3d\" style=\"filled\" ]" unless ''.eql? type # [shape=\"box3d\" style=\"filled\" ]\"" unless label.equal? ''
416
- nodes.each do |node|
417
- dotFile.puts " \"#{node.id}\""
418
- end
419
- dotFile.puts " }"
420
- end
421
-
422
- def rankRootFields dotFile, dsRootFields
423
- dotFile.puts "\n // Unreferenced (root) Calculated Fields -----------------------------------------"
424
- dotFile.puts "\n {rank=same "
425
- dsRootFields.each do |rf|
426
- dotFile.puts " \"#{rf}\""
427
- end
428
- dotFile.puts " }"
429
- end
430
-
431
-
432
- def labelTypes dotFile, edges
433
- fromTos = Set.new
434
- edges.each do |edge|
435
- # fromTos.add "\"Alien Data Source\" -> \"Alien Data Source\""
436
- fromTos.add "\"#{edge.from.type}\""
437
- fromTos.add "\"#{edge.to.type}\""
438
- end
439
- return if fromTos.empty?
440
- dotFile.puts "\n // 3--------------------------------------------------------------------"
441
- dotFile.puts ' subgraph cluster_0 {'
442
- dotFile.puts ' color=white;'
443
- dotFile.puts ' node [shape="box3d" style="filled" ];'
444
- fromTos.each do |ft|
445
- dotFile.puts " #{ft}"
446
- end
447
- dotFile.puts ' }'
448
- end
449
-
450
-
451
- def emit(local=@localEmit, stuff)
452
- #puts "\nstuff.class #{stuff.class} :: #{stuff}" if local
453
- if stuff.is_a? String then
454
- lines = stuff.split(/\n/)
455
- lines.each do |line|
456
- @logger.debug "#{@emitPrefix}#{line}"
457
- puts "#{@emitPrefix}#{line}" if local
458
- end
459
- else
460
- @logger.debug "#{@emitPrefix}#{stuff}"
461
- puts "#{@emitPrefix}#{stuff}" if local
462
- end
463
- end
464
-
465
-
466
- def initDot twb
467
- dotFile = File.open("#{twb}#{@@processName}.dot",'w')
468
- dotFile.puts @@dotHeader
469
- return dotFile
470
- end
471
-
472
- def closeDot dotFile, twb
473
- dotFile.puts ' '
474
- dotFile.puts '// -------------------------------------------------------------'
475
- dotFile.puts ' '
476
- dotFile.puts ' subgraph cluster_1 {'
477
- # dotFile.puts ' color=white;'
478
- dotFile.puts ' style=invis;'
479
- # dotFile.puts ' border=0;'
480
- dotFile.puts ' node [border=blue];'
481
- dotFile.puts ' '
482
- dotFile.puts ' "" [style=invis]'
483
- dotFile.puts " \"Tableau Tools\\nCalculated Fields Map\\nWorkbook '#{twb}'\\n#{Time.new.ctime}\" [penwidth=0]"
484
- # dotFile.puts " \"Tableau Tools Workbook Calculated Fields Map\\n#{Time.new.ctime}\" -> \"\" [style=invis]"
485
- dotFile.puts ' '
486
- dotFile.puts ' }'
487
- dotFile.puts ' '
488
- dotFile.puts '}'
489
- dotFile.close
490
- end
491
-
492
-
493
- def renderDot twb, dot, format
494
- emit "Rendering DOT file\n - #{twb}\n - #{dot}\n - #{format}"
495
- imageType = '-T' + format
496
- imageFile = twb + @@processName + 'Graph.' + format
497
- imageParam = '-o' + imageFile
498
- emit "system #{@@gvDotLocation} #{imageType} #{imageParam} #{dot}"
499
- system @@gvDotLocation, imageType, imageParam, dot
500
- @imageFiles << imageFile
501
- return imageFile
502
- end
503
-
504
-
505
- end # class
506
-
507
- end # module Analysis
508
- end # module Twb