twb 1.0.5 → 1.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,508 +0,0 @@
1
- # calculatedfieldsanalyzer.rb - this Ruby script Copyright 2017 Christopher Gerrard
2
- #
3
- # This program is free software: you can redistribute it and/or modify
4
- # it under the terms of the GNU General Public License as published by
5
- # the Free Software Foundation, either version 3 of the License, or
6
- # (at your option) any later version.
7
- #
8
- # This program is distributed in the hope that it will be useful,
9
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
- # GNU General Public License for more details.
12
- #
13
- # You should have received a copy of the GNU General Public License
14
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
15
-
16
- require 'nokogiri'
17
- require 'twb'
18
- require 'set'
19
- require 'csv'
20
- require 'logger'
21
-
22
- module Twb
23
- module Analysis
24
-
25
- class CalculatedFieldsAnalyzer
26
-
27
- attr_reader :calculatedFieldsCount, :formulaFieldsCount
28
-
29
- @@ttlogfile = 'CalculatedFieldsAnalyzer.ttlog'
30
- @@gvDotLocation = 'C:\\tech\\graphviz\\Graphviz2.38\\bin\\dot.exe'
31
- @@processName = '.CalculatedFields'
32
-
33
- @@calcFieldsCSVFileName = 'TwbCalculatedFields.csv'
34
- @@calcFieldsCSVFileHeader = ['Record #',
35
- 'Workbook', 'Workbook Dir',
36
- 'Data Source', 'Data Source Caption', 'Data Source Name (tech)',
37
- 'Field Name', 'Field Caption', 'Field Name (tech)',
38
- 'Data Source + Field Name (tech)',
39
- 'Data Type', 'Role', 'Type',
40
- 'Class',
41
- 'Scope Isolation',
42
- 'Formula Length',
43
- 'Formula Code',
44
- 'Formula',
45
- 'Formula Comments',
46
- 'Formula LOD?'
47
- ]
48
-
49
- @@formFieldsCSVFileName = 'TwbFormulaFields.csv'
50
- @@formFieldsCSVFileHeader = ['Rec #',
51
- 'Workbook', 'Workbook Dir',
52
- 'Data Source',
53
- 'Field - Calculated',
54
- 'Data Source - Formula (tech)',
55
- 'Data Source - Formula',
56
- 'Field - Formula (tech)',
57
- 'Field - Formula',
58
- 'Data Source + Field - Calculated',
59
- 'Table'
60
- ]
61
-
62
- @techUINames = {}
63
- @fieldTables = {}
64
-
65
-
66
- @@dotHeader = <<DOTHEADER
67
- digraph g {
68
- graph [rankdir="LR" splines=line];
69
- node [shape="box" width="2"];
70
-
71
- DOTHEADER
72
-
73
- def initialize
74
- #-- Logging setup --
75
- @logger = Logger.new(@@ttlogfile)
76
- @logger.level = Logger::DEBUG
77
- #-- CSV files setup --
78
- @calcFieldsCSVFile = CSV.open(@@calcFieldsCSVFileName,'w')
79
- @calcFieldsCSVFile << @@calcFieldsCSVFileHeader
80
- # --
81
- @formFieldsCSVFile = CSV.open(@@formFieldsCSVFileName ,'w')
82
- @formFieldsCSVFile << @@formFieldsCSVFileHeader
83
- #-- Counters setup --
84
- @twbCount = 0
85
- @calculatedFieldsCount = 0
86
- @formulaFieldsCount = 0
87
- # --
88
- @referencedFields = SortedSet.new
89
- # --
90
- @localEmit = false
91
- emit "\n\nLogging activity to: #{File.basename(@@ttlogfile)}"
92
- @imageFiles = []
93
- end
94
-
95
- def loadFieldTables dataSource
96
- emit "FIELD TABLES"
97
- @records = CSV.read('C:\Professional\Clients\Incapsulate\Internal Project Monitoring\Project Portfolio v2\Salesforce Fields.csv')
98
- @records.each do |rec|
99
- emit "-- #{rec}"
100
- m = {}
101
- m['table'] = rec[1]
102
- m['dbFieldName'] = rec[2]
103
- @fieldTables[rec[0]] = m
104
- end
105
- emit "=========="
106
- emit @fieldTables
107
- emit "=========="
108
- emit "FIELD TABLES"
109
- end
110
-
111
- def processTWB twbWithDir
112
- twb = File.basename(twbWithDir)
113
- @twb = Twb::Workbook.new twbWithDir
114
- emit "- Workbook: #{twbWithDir}"
115
- emit " version: #{@twb.version}"
116
- return if twbWithDir.end_with? == "Tableau Calculated Fields Analyses.twb"
117
- twbDir = File.dirname(File.expand_path(twbWithDir))
118
- edges = Set.new
119
- # -- processing
120
- dss = @twb.datasources
121
- twbRootFields = Set.new
122
- dss.each do |ds|
123
- emit "Datasource: '#{ds.uiname}' -> #{ds.Parameters?}"
124
- next if ds.Parameters? # don't process the Parameters data source
125
- # it requires special handling, has different XML structure
126
- #-- For tracking unreferenced (root) calculated fields = calculatedFields - referencedFields
127
- calculatedFields = SortedSet.new
128
- referencedFields = SortedSet.new
129
- #--
130
- dsTechName = ds.name
131
- dsCaption = ds.caption
132
- dsName = ds.uiname
133
- dsID = dsTechName + ':::' + dsName
134
- emit "\n\n "
135
- emit "======================================================"
136
- emit "======================================================"
137
- emit "======= DATA SOURCE: #{ds.uiname} ====== "
138
- emit "======================================================"
139
- emit "======================================================\n\n "
140
- dsGraphNode = Twb::Util::Graphnode.new(name: dsName, id: dsID, type: :TwbDataConnection, properties: {workbook: twbWithDir})
141
- emit "\t dsgnode: #{dsGraphNode}"
142
- fieldUINames = ds.fieldUINames
143
- calculationNodes = ds.calculatedFields
144
- emit "calculationNodes : nil? '#{calculationNodes.nil?}'" # - len '#{calculationNodes.length}'"
145
- calculationNodes.each do |calcNode|
146
- calculation = Twb::FieldCalculation.new(calcNode, ds)
147
- emit "HANDLING CALCULATION NODE:"
148
- emit calcNode.attributes
149
- #-- field names --
150
- fldCaption, = calcNode.xpath('../@caption').text
151
- fldTechName = calcNode.xpath('../@name').text.gsub(/^\[/,'').gsub(/\]$/,'')
152
- fldName = if fldCaption == ''
153
- then fldTechName
154
- else fldCaption
155
- end
156
- emit "\t Field : #{fldName}"
157
- emit "\t Formula : #{calcNode.attribute('formula')}"
158
- dataType = calcNode.xpath('../@datatype').text
159
- role = calcNode.xpath('../@role').text
160
- type = calcNode.xpath('../@type').text
161
- fieldID = fldTechName+'::'+dsName
162
- calculatedFields.add fieldID
163
- srcGraphNode = Twb::Util::Graphnode.new(name: fldName, id: fieldID, type: :CalculatedField, properties: {:DataSource => dsName})
164
- dsFieldEdge = Twb::Util::Graphedge.new(from: dsGraphNode, to: srcGraphNode, relationship: 'contains')
165
- edges.add dsFieldEdge
166
- hasFormula = calcNode.has_attribute?('formula')
167
- if hasFormula
168
- formulaText = calcNode.attribute('formula').text
169
- emit "\t Formula: #{formulaText}"
170
- #-- field attributes --
171
- # fldDispLabel = "#{fldName}\n--\n#{formulaText.gsub('"', "'")}" #fldName + '\n--' + formulaText.to_s
172
- emit "\t srfnode: #{srcGraphNode} "
173
- emit "\t dsfedge: #{dsFieldEdge} "
174
- emit " "
175
- emit "\tFIELD cap: #{fldCaption} "
176
- emit "\t name: #{fldTechName} "
177
- emit "\t uiname: #{fldName} "
178
- emit "\t------------------------------------------------------------"
179
- #-- calculation --
180
- formulaFlat = formulaText.gsub(/\r\n/, ' ## ').gsub(/\n/, ' ## ').gsub(/[ ]+/,' ')
181
- formulaFlatFlat = formulaFlat.upcase
182
- formulaLOD = formulaFlatFlat.include?('{FIXED') || formulaFlatFlat.include?('{INCLUDE') || formulaFlatFlat.include?('{EXCLUDE')
183
- formulaLength = formulaText.length
184
- emit "\tFORMULA TEXT: #{formulaText} "
185
- emit "\t FLAT: #{formulaFlat}"
186
- emit "\t------------------------------------------------------------"
187
- comments = calculation.comments # getComments( formulaText )
188
- calcClass = calculation.class # d.xpath('./@class').text
189
- scopeIsolation = calcNode.xpath('./@scope-isolation').text
190
- # -- resolved fields: {internal field name => datasource}
191
- # -- datasource is only present for fields located in other data sources
192
- resolvedFields = calculation.resolvedFields
193
- # prepare UI formula, replacing technical field names with their UI forms
194
- uiFormula = formulaFlat.gsub(' XX ',' ')
195
- resolvedFields.each do |rf|
196
- emit "\tRESOLVED FLD: #{rf.inspect}"
197
- calcFieldName = rf[:field]
198
- if rf[:source].nil?
199
- calcFieldRef = "[%s]" % [ calcFieldName ]
200
- dispFieldRef = "[%s]" % [ ds.fieldUIName(calcFieldName) ]
201
- else
202
- remoteDS = @twb.datasource(rf[:source])
203
- remoteDSName = remoteDS.uiname
204
- remoteDSFld = remoteDS.fieldUIName(calcFieldName)
205
- calcFieldRef = "[%s].[%s]" % [ rf[:source], calcFieldName ]
206
- dispFieldRef = "[%s].[%s]" % [ remoteDSName, remoteDSFld ]
207
- end
208
- emit "\tcalcFieldRef: #{calcFieldRef}"
209
- emit "\tdispFieldRef: #{dispFieldRef}"
210
- uiFormula = uiFormula.gsub(calcFieldRef, dispFieldRef)
211
- end
212
- emit "\t FLAT: #{formulaFlat}"
213
- emit "\t Resolved: #{uiFormula}\n\t--"
214
- @calcFieldsCSVFile << [
215
- @calculatedFieldsCount += 1,
216
- twb, twbDir,
217
- dsName, dsCaption, dsTechName,
218
- fldName, fldCaption, fldTechName,
219
- dsTechName + '::' + fldTechName,
220
- dataType, role, type,
221
- calcClass, scopeIsolation,
222
- formulaLength, formulaFlat, uiFormula,
223
- comments,
224
- formulaLOD
225
- ]
226
- resolvedFields.each do |rf|
227
- emit "\t\t res field : #{rf[:field]} "
228
- emit "\t\t res source: #{rf[:source]}"
229
- calcFieldName = rf[:field]
230
- calcDataSource = rf[:source]
231
- localDataSource = rf[:source].nil? # if there isn't a rf[:source] value
232
- # the field is from this data source
233
- # else the field is from an alien data source (in the same workbook)
234
- refDataSource = localDataSource ? ds : @twb.datasource(calcDataSource)
235
- dispFieldName = refDataSource.fieldUIName(calcFieldName)
236
- calcFieldTable = refDataSource.fieldTable(calcFieldName)
237
- emit "\t\t calc field : #{dispFieldName} nil?<#{dispFieldName.nil?}>"
238
- emit "\t\t data source: #{refDataSource.uiname}"
239
- emit "\t\t table: #{calcFieldTable} nil?<#{calcFieldTable.nil?}>"
240
- properties = {'DataSource' => dsName, 'DataSourceReference' => 'local'}
241
- if dispFieldName.nil?
242
- dispFieldName = "<#{calcFieldName}>::<#{calcDataSource}> UNDEFINED"
243
- properties['status'] = 'UNDEFINED'
244
- end
245
- calcFieldID = "#{calcFieldName}::#{refDataSource.uiname}"
246
- if !localDataSource
247
- calcFieldID = "#{calcFieldName}:LDS:#{ds.uiname}:RDS:#{refDataSource.uiname}"
248
- properties['DataSourceReference'] = 'remote'
249
- end
250
- calcFieldTable = ds.fieldTable(calcFieldName)
251
- calcFieldType = calcFieldTable.nil? ? :CalculatedField : :DatabaseField
252
- calcFieldNode = Twb::Util::Graphnode.new(name: dispFieldName, id: calcFieldID, type: calcFieldType, properties: properties)
253
- fieldFieldEdge = Twb::Util::Graphedge.new(from: srcGraphNode, to: calcFieldNode, relationship: 'references')
254
- edges.add fieldFieldEdge
255
- referencedFields.add calcFieldID
256
- # @formulaFieldsCount+=1
257
- emit "\t\t calcFieldNode: #{calcFieldNode}"
258
- emit "\t\t graphEdge: #{fieldFieldEdge}"
259
- fldToDsNode = calcFieldNode
260
- if !calcFieldTable.nil?
261
- tableID = calcFieldTable + ':::' + ds.uiname
262
- tableName = "-[#{calcFieldTable}]-"
263
- tableNode = Twb::Util::Graphnode.new(name: tableName, id: tableID, type: :DBTable, properties: properties)
264
- fieldFieldEdge = Twb::Util::Graphedge.new(from: calcFieldNode, to: tableNode, relationship: 'is a field in')
265
- edges.add fieldFieldEdge
266
- fldToDsNode = tableNode
267
- end
268
- if !localDataSource
269
- alienDSNode = Twb::Util::Graphnode.new( name: '==>' + refDataSource.uiname,
270
- id: "#{ds.uiname}::::=>#{refDataSource.uiname}",
271
- type: :DBTable,
272
- properties: {'Home Source' => dsName, 'Remote Source' => refDataSource.uiname}
273
- )
274
- fieldFieldEdge = Twb::Util::Graphedge.new(from: fldToDsNode, to: alienDSNode, relationship: 'In Remote Data Source')
275
- edges.add fieldFieldEdge
276
- end
277
- @formFieldsCSVFile << [ @formulaFieldsCount+=1,
278
- twb,
279
- twbDir,
280
- dsName,
281
- fldName,
282
- refDataSource.name,
283
- refDataSource.uiname,
284
- calcFieldName,
285
- dispFieldName,
286
- dsName + '::' + dispFieldName,
287
- 'fieldTable'
288
- ]
289
- end
290
- end # if hasFormula
291
- end # calculationNodes.each
292
- dsRootFields = calculatedFields - referencedFields
293
- @referencedFields.merge referencedFields
294
- #--
295
- emit "--\nCalculated Fields\n-----------------"
296
- calculatedFields.each { |f| emit f }
297
- emit "--\nReferenced Fields\n-----------------"
298
- referencedFields.each { |f| emit f }
299
- emit "--\nDS Root Fields\n-----------------"
300
- dsRootFields.each { |f| emit f }
301
- emit "--"
302
- # --
303
- twbRootFields.merge dsRootFields
304
- end # dss.each
305
- @twbCount += 1
306
- mapTwb twb, edges, twbRootFields
307
- graphEdges twb, edges
308
- emit "#######################"
309
- return @imageFiles
310
- end
311
-
312
-
313
- def mapTwb twb, edges, rootFields
314
- dotFile = initDot twb
315
- dotFileName = File.basename dotFile
316
- dotFile.puts "\n // subgraph cluster_1 {"
317
- dotFile.puts " // color= grey;"
318
- dotFile.puts ""
319
- edgesAsStrings = SortedSet.new
320
- # this two step process coalesces the edges into a unique set, avoiding duplicating the dot
321
- # file entries, and can be shrunk when graph edges expose the bits necessary for management by Set
322
- emit "\n========================\nLoading Edges\n========================\n From DC? Referenced? Edge \n %s %s %s" % ['--------', '-----------', '-'*45]
323
- edges.each do |e|
324
- # don't want to emit edge which is from a Data Connection to a
325
- # Calculated Field which is also referenced by another calculated field
326
- isFromDC = e.from.type == :TwbDataConnection
327
- isRefField = @referencedFields.include?(e.to.id)
328
- edgesAsStrings.add(e.dot) unless isFromDC && isRefField
329
- end
330
- emit "------------------------\n "
331
- edgesAsStrings.each do |es|
332
- dotFile.puts " #{es}"
333
- emit " #{es}"
334
- end
335
- emit "========================\n "
336
- dotFile.puts ""
337
- dotFile.puts " // }"
338
- dotFile.puts "\n\n // 4--------------------------------------------------------------------"
339
- # "table::JIRA_HARVEST_Correspondence__c::Jira" [label="JIRA_HARVEST_Correspondence__c"]
340
- nodes = SortedSet.new
341
- edges.each do |e|
342
- nodes.add e.from.dotLabel
343
- nodes.add e.to.dotLabel
344
- end
345
- nodes.each do |n|
346
- dotFile.puts n
347
- end
348
- dotFile.puts "\n\n // 5--------------------------------------------------------------------"
349
- emitTypes( edges, dotFile )
350
- rankRootFields( dotFile, rootFields )
351
- closeDot( dotFile, twb )
352
- # renderPng(twb.name,dotFileName)
353
- # renderPdf(twb.name,dotFileName)
354
- renderDot(twb,dotFileName,'pdf')
355
- renderDot(twb,dotFileName,'png')
356
- renderDot(twb,dotFileName,'svg')
357
- emitEdges edges
358
- end
359
-
360
-
361
- def graphEdges twb, edges
362
- graphFile = File.new(twb + '.cypher', 'w')
363
- # graphFile.puts "OKEY DOKE, graphing away"
364
- cypherCode = Set.new
365
- edges.each do |edge|
366
- cypherCode.add edge.from.cypherCreate
367
- cypherCode.add edge.to.cypherCreate
368
- cypherCode.add edge.cypherCreate
369
- end
370
- cypherCode.each do |cc|
371
- graphFile.puts cc
372
- end
373
- graphFile.puts "\nreturn *"
374
- graphFile.close unless graphFile.nil?
375
- @imageFiles << File.basename(graphFile)
376
- end
377
-
378
- def emitEdges edges
379
- emit " %-15s %s" % ['type', 'Edge']
380
- emit " %-15s %s" % ['-'*15, '-'*35]
381
- edges.each do |edge|
382
- emit " %-15s %s" % [edge.from.type, edge.from]
383
- emit " %-15s %s" % [edge.to.type, edge.to]
384
- emit "\n "
385
- end
386
- end
387
-
388
- def emitTypes edges, dotFile
389
- typedNodes = {}
390
- dotFile.puts "\n\n // 2--------------------------------------------------------------------"
391
- edges.each do |edge|
392
- emit " EDGE :: #{edge}"
393
- loadNodeType typedNodes, edge.from
394
- loadNodeType typedNodes, edge.to
395
- end
396
- typedNodes.each do |type, nodes|
397
- emit "+++++++++ typedNodes of '#{type}'' "
398
- nodes.each do |node|
399
- emit " -n- #{node}"
400
- end
401
- rankSame(dotFile, type, nodes) unless type == :CalculatedField
402
- end
403
- # labelTypes dotFile, edges
404
- end
405
-
406
- def loadNodeType set, node
407
- type = node.type
408
- set[type] = Set.new unless set.include? type
409
- set[type].add node
410
- end
411
-
412
- def rankSame dotFile, type, nodes
413
- dotFile.puts "\n // '#{type}' --------------------------------------------------------------------"
414
- dotFile.puts "\n {rank=same "
415
- # dotFile.puts " \"#{type}\" [shape=\"box3d\" style=\"filled\" ]" unless ''.eql? type # [shape=\"box3d\" style=\"filled\" ]\"" unless label.equal? ''
416
- nodes.each do |node|
417
- dotFile.puts " \"#{node.id}\""
418
- end
419
- dotFile.puts " }"
420
- end
421
-
422
- def rankRootFields dotFile, dsRootFields
423
- dotFile.puts "\n // Unreferenced (root) Calculated Fields -----------------------------------------"
424
- dotFile.puts "\n {rank=same "
425
- dsRootFields.each do |rf|
426
- dotFile.puts " \"#{rf}\""
427
- end
428
- dotFile.puts " }"
429
- end
430
-
431
-
432
- def labelTypes dotFile, edges
433
- fromTos = Set.new
434
- edges.each do |edge|
435
- # fromTos.add "\"Alien Data Source\" -> \"Alien Data Source\""
436
- fromTos.add "\"#{edge.from.type}\""
437
- fromTos.add "\"#{edge.to.type}\""
438
- end
439
- return if fromTos.empty?
440
- dotFile.puts "\n // 3--------------------------------------------------------------------"
441
- dotFile.puts ' subgraph cluster_0 {'
442
- dotFile.puts ' color=white;'
443
- dotFile.puts ' node [shape="box3d" style="filled" ];'
444
- fromTos.each do |ft|
445
- dotFile.puts " #{ft}"
446
- end
447
- dotFile.puts ' }'
448
- end
449
-
450
-
451
- def emit(local=@localEmit, stuff)
452
- #puts "\nstuff.class #{stuff.class} :: #{stuff}" if local
453
- if stuff.is_a? String then
454
- lines = stuff.split(/\n/)
455
- lines.each do |line|
456
- @logger.debug "#{@emitPrefix}#{line}"
457
- puts "#{@emitPrefix}#{line}" if local
458
- end
459
- else
460
- @logger.debug "#{@emitPrefix}#{stuff}"
461
- puts "#{@emitPrefix}#{stuff}" if local
462
- end
463
- end
464
-
465
-
466
- def initDot twb
467
- dotFile = File.open("#{twb}#{@@processName}.dot",'w')
468
- dotFile.puts @@dotHeader
469
- return dotFile
470
- end
471
-
472
- def closeDot dotFile, twb
473
- dotFile.puts ' '
474
- dotFile.puts '// -------------------------------------------------------------'
475
- dotFile.puts ' '
476
- dotFile.puts ' subgraph cluster_1 {'
477
- # dotFile.puts ' color=white;'
478
- dotFile.puts ' style=invis;'
479
- # dotFile.puts ' border=0;'
480
- dotFile.puts ' node [border=blue];'
481
- dotFile.puts ' '
482
- dotFile.puts ' "" [style=invis]'
483
- dotFile.puts " \"Tableau Tools\\nCalculated Fields Map\\nWorkbook '#{twb}'\\n#{Time.new.ctime}\" [penwidth=0]"
484
- # dotFile.puts " \"Tableau Tools Workbook Calculated Fields Map\\n#{Time.new.ctime}\" -> \"\" [style=invis]"
485
- dotFile.puts ' '
486
- dotFile.puts ' }'
487
- dotFile.puts ' '
488
- dotFile.puts '}'
489
- dotFile.close
490
- end
491
-
492
-
493
- def renderDot twb, dot, format
494
- emit "Rendering DOT file\n - #{twb}\n - #{dot}\n - #{format}"
495
- imageType = '-T' + format
496
- imageFile = twb + @@processName + 'Graph.' + format
497
- imageParam = '-o' + imageFile
498
- emit "system #{@@gvDotLocation} #{imageType} #{imageParam} #{dot}"
499
- system @@gvDotLocation, imageType, imageParam, dot
500
- @imageFiles << imageFile
501
- return imageFile
502
- end
503
-
504
-
505
- end # class
506
-
507
- end # module Analysis
508
- end # module Twb