aepp 0.5.2.post1__tar.gz → 0.5.2.post2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {aepp-0.5.2.post1/aepp.egg-info → aepp-0.5.2.post2}/PKG-INFO +1 -1
- aepp-0.5.2.post2/aepp/__version__.py +1 -0
- {aepp-0.5.2.post1 → aepp-0.5.2.post2}/aepp/cli/__main__.py +129 -5
- aepp-0.5.2.post2/aepp/cli/upsfieldsanalyzer.py +271 -0
- {aepp-0.5.2.post1 → aepp-0.5.2.post2/aepp.egg-info}/PKG-INFO +1 -1
- {aepp-0.5.2.post1 → aepp-0.5.2.post2}/aepp.egg-info/SOURCES.txt +2 -1
- aepp-0.5.2.post1/aepp/__version__.py +0 -1
- {aepp-0.5.2.post1 → aepp-0.5.2.post2}/LICENSE +0 -0
- {aepp-0.5.2.post1 → aepp-0.5.2.post2}/MANIFEST.in +0 -0
- {aepp-0.5.2.post1 → aepp-0.5.2.post2}/README.md +0 -0
- {aepp-0.5.2.post1 → aepp-0.5.2.post2}/aepp/__init__.py +0 -0
- {aepp-0.5.2.post1 → aepp-0.5.2.post2}/aepp/accesscontrol.py +0 -0
- {aepp-0.5.2.post1 → aepp-0.5.2.post2}/aepp/catalog.py +0 -0
- {aepp-0.5.2.post1 → aepp-0.5.2.post2}/aepp/classmanager.py +0 -0
- {aepp-0.5.2.post1 → aepp-0.5.2.post2}/aepp/cli/__init__.py +0 -0
- {aepp-0.5.2.post1 → aepp-0.5.2.post2}/aepp/config.py +0 -0
- {aepp-0.5.2.post1 → aepp-0.5.2.post2}/aepp/configs.py +0 -0
- {aepp-0.5.2.post1 → aepp-0.5.2.post2}/aepp/connector.py +0 -0
- {aepp-0.5.2.post1 → aepp-0.5.2.post2}/aepp/customerprofile.py +0 -0
- {aepp-0.5.2.post1 → aepp-0.5.2.post2}/aepp/dataaccess.py +0 -0
- {aepp-0.5.2.post1 → aepp-0.5.2.post2}/aepp/dataprep.py +0 -0
- {aepp-0.5.2.post1 → aepp-0.5.2.post2}/aepp/datasets.py +0 -0
- {aepp-0.5.2.post1 → aepp-0.5.2.post2}/aepp/datatypemanager.py +0 -0
- {aepp-0.5.2.post1 → aepp-0.5.2.post2}/aepp/deletion.py +0 -0
- {aepp-0.5.2.post1 → aepp-0.5.2.post2}/aepp/destination.py +0 -0
- {aepp-0.5.2.post1 → aepp-0.5.2.post2}/aepp/destinationinstanceservice.py +0 -0
- {aepp-0.5.2.post1 → aepp-0.5.2.post2}/aepp/edge.py +0 -0
- {aepp-0.5.2.post1 → aepp-0.5.2.post2}/aepp/exportDatasetToDataLandingZone.py +0 -0
- {aepp-0.5.2.post1 → aepp-0.5.2.post2}/aepp/fieldgroupmanager.py +0 -0
- {aepp-0.5.2.post1 → aepp-0.5.2.post2}/aepp/flowservice.py +0 -0
- {aepp-0.5.2.post1 → aepp-0.5.2.post2}/aepp/hygiene.py +0 -0
- {aepp-0.5.2.post1 → aepp-0.5.2.post2}/aepp/identity.py +0 -0
- {aepp-0.5.2.post1 → aepp-0.5.2.post2}/aepp/ingestion.py +0 -0
- {aepp-0.5.2.post1 → aepp-0.5.2.post2}/aepp/observability.py +0 -0
- {aepp-0.5.2.post1 → aepp-0.5.2.post2}/aepp/policy.py +0 -0
- {aepp-0.5.2.post1 → aepp-0.5.2.post2}/aepp/privacyservice.py +0 -0
- {aepp-0.5.2.post1 → aepp-0.5.2.post2}/aepp/queryservice.py +0 -0
- {aepp-0.5.2.post1 → aepp-0.5.2.post2}/aepp/sandboxes.py +0 -0
- {aepp-0.5.2.post1 → aepp-0.5.2.post2}/aepp/schema.py +0 -0
- {aepp-0.5.2.post1 → aepp-0.5.2.post2}/aepp/schemamanager.py +0 -0
- {aepp-0.5.2.post1 → aepp-0.5.2.post2}/aepp/segmentation.py +0 -0
- {aepp-0.5.2.post1 → aepp-0.5.2.post2}/aepp/sensei.py +0 -0
- {aepp-0.5.2.post1 → aepp-0.5.2.post2}/aepp/som.py +0 -0
- {aepp-0.5.2.post1 → aepp-0.5.2.post2}/aepp/synchronizer.py +0 -0
- {aepp-0.5.2.post1 → aepp-0.5.2.post2}/aepp/tags.py +0 -0
- {aepp-0.5.2.post1 → aepp-0.5.2.post2}/aepp/utils.py +0 -0
- {aepp-0.5.2.post1 → aepp-0.5.2.post2}/aepp.egg-info/dependency_links.txt +0 -0
- {aepp-0.5.2.post1 → aepp-0.5.2.post2}/aepp.egg-info/entry_points.txt +0 -0
- {aepp-0.5.2.post1 → aepp-0.5.2.post2}/aepp.egg-info/requires.txt +0 -0
- {aepp-0.5.2.post1 → aepp-0.5.2.post2}/aepp.egg-info/top_level.txt +0 -0
- {aepp-0.5.2.post1 → aepp-0.5.2.post2}/pyproject.toml +0 -0
- {aepp-0.5.2.post1 → aepp-0.5.2.post2}/setup.cfg +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.5.2-2"
|
|
@@ -2,6 +2,7 @@ from ast import arg
|
|
|
2
2
|
from matplotlib.pyplot import table
|
|
3
3
|
import aepp
|
|
4
4
|
from aepp import synchronizer, schema, schemamanager, fieldgroupmanager, datatypemanager, identity, queryservice,catalog,flowservice,sandboxes, segmentation
|
|
5
|
+
from aepp.cli.upsfieldsanalyzer import UpsFieldsAnalyzer
|
|
5
6
|
import argparse, cmd, shlex, json
|
|
6
7
|
from functools import wraps
|
|
7
8
|
from rich.console import Console
|
|
@@ -37,6 +38,7 @@ class ServiceShell(cmd.Cmd):
|
|
|
37
38
|
super().__init__()
|
|
38
39
|
self.config = None
|
|
39
40
|
self.connectInstance = True
|
|
41
|
+
self.ups_profile_analyzer:UpsFieldsAnalyzer|None = None
|
|
40
42
|
if kwargs.get("config_file") is not None:
|
|
41
43
|
config_path = Path(kwargs.get("config_file"))
|
|
42
44
|
if not config_path.is_absolute():
|
|
@@ -69,7 +71,7 @@ class ServiceShell(cmd.Cmd):
|
|
|
69
71
|
)
|
|
70
72
|
self.prompt = f"{self.config.sandbox}> "
|
|
71
73
|
console.print(Panel(f"Connected to [bold green]{self.sandbox}[/bold green]", style="blue"))
|
|
72
|
-
|
|
74
|
+
|
|
73
75
|
def do_createConfigFile(self, arg:Any) -> None:
|
|
74
76
|
"""Create a configuration file for future use"""
|
|
75
77
|
parser = argparse.ArgumentParser(prog='createConfigFile', add_help=True)
|
|
@@ -169,7 +171,112 @@ class ServiceShell(cmd.Cmd):
|
|
|
169
171
|
console.print(f"(!) Error: {str(e)}", style="red")
|
|
170
172
|
except SystemExit:
|
|
171
173
|
return
|
|
172
|
-
|
|
174
|
+
|
|
175
|
+
@login_required
|
|
176
|
+
def do_get_profile_paths_info(self,args:Any)->None:
|
|
177
|
+
"""Get usage information for all Profile paths"""
|
|
178
|
+
parser = argparse.ArgumentParser(prog='get_profile_paths_info', add_help=True)
|
|
179
|
+
try:
|
|
180
|
+
args = parser.parse_args(shlex.split(args))
|
|
181
|
+
if self.ups_profile_analyzer is None:
|
|
182
|
+
console.print("Initializing Profile UPS Fields Analyzer. This will take few minutes...", style="blue")
|
|
183
|
+
self.ups_profile_analyzer = UpsFieldsAnalyzer(config=self.config)
|
|
184
|
+
else:
|
|
185
|
+
if self.config.sandbox != self.ups_profile_analyzer.sandbox:
|
|
186
|
+
console.print("Re-initializing Profile UPS Fields Analyzer for the new sandbox. This will take few minutes...", style="blue")
|
|
187
|
+
self.ups_profile_analyzer = UpsFieldsAnalyzer(config=self.config)
|
|
188
|
+
console.print("Analyzing all Profile paths information. This will take few minutes...", style="blue")
|
|
189
|
+
df_analysis:pd.DataFrame = self.ups_profile_analyzer.analyzePaths(output='df')
|
|
190
|
+
if df_analysis is not None:
|
|
191
|
+
console.print(df_analysis)
|
|
192
|
+
df_analysis.to_csv(f"profile_all_paths_info.csv", index=False)
|
|
193
|
+
console.print(f"Profile all paths information data exported to profile_all_paths_info.csv", style="green")
|
|
194
|
+
else:
|
|
195
|
+
console.print("(!) No profile paths information data found.", style="red")
|
|
196
|
+
except Exception as e:
|
|
197
|
+
console.print(f"(!) Error: {str(e)}", style="red")
|
|
198
|
+
except SystemExit:
|
|
199
|
+
return
|
|
200
|
+
|
|
201
|
+
@login_required
|
|
202
|
+
def do_get_profile_path_info(self, args:Any) -> None:
|
|
203
|
+
"""Get path information on Profile"""
|
|
204
|
+
parser = argparse.ArgumentParser(prog='get_profile_path_info', add_help=True)
|
|
205
|
+
parser.add_argument("path", help="Dot notation of the path to analyze in Profile Storage", default=None,type=str)
|
|
206
|
+
try:
|
|
207
|
+
args = parser.parse_args(shlex.split(args))
|
|
208
|
+
if self.ups_profile_analyzer is None:
|
|
209
|
+
console.print("Initializing Profile UPS Fields Analyzer. This will take few minutes...", style="blue")
|
|
210
|
+
self.ups_profile_analyzer = UpsFieldsAnalyzer(config=self.config)
|
|
211
|
+
else:
|
|
212
|
+
if self.config.sandbox != self.ups_profile_analyzer.sandbox:
|
|
213
|
+
console.print("Re-initializing Profile UPS Fields Analyzer for the new sandbox. This will take few minutes...", style="blue")
|
|
214
|
+
self.ups_profile_analyzer = UpsFieldsAnalyzer(config=self.config)
|
|
215
|
+
analysis = self.ups_profile_analyzer.analyzePath(args.path)
|
|
216
|
+
if analysis:
|
|
217
|
+
console.print_json(data=analysis)
|
|
218
|
+
with open(f"profile_path_info_{args.path.replace('/','_')}.json", 'w') as f:
|
|
219
|
+
json.dump(analysis, f, indent=4)
|
|
220
|
+
console.print(f"Profile path information data exported to profile_path_info_{args.path.replace('/','_')}.json", style="green")
|
|
221
|
+
else:
|
|
222
|
+
console.print("(!) No profile path information data found.", style="red")
|
|
223
|
+
except Exception as e:
|
|
224
|
+
console.print(f"(!) Error: {str(e)}", style="red")
|
|
225
|
+
except SystemExit:
|
|
226
|
+
return
|
|
227
|
+
|
|
228
|
+
@login_required
|
|
229
|
+
def do_get_event_paths_info(self,args:Any)->None:
|
|
230
|
+
"""Get information for all Experience Event paths"""
|
|
231
|
+
parser = argparse.ArgumentParser(prog='get_event_paths_info', add_help=True)
|
|
232
|
+
try:
|
|
233
|
+
args = parser.parse_args(shlex.split(args))
|
|
234
|
+
if self.ups_profile_analyzer is None:
|
|
235
|
+
console.print("Initializing Event UPS Fields Analyzer. This will take few minutes...", style="blue")
|
|
236
|
+
self.ups_profile_analyzer = UpsFieldsAnalyzer(config=self.config,union='https://ns.adobe.com/xdm/context/experienceevent__union')
|
|
237
|
+
else:
|
|
238
|
+
if self.config.sandbox != self.ups_profile_analyzer.sandbox:
|
|
239
|
+
console.print("Re-initializing Event UPS Fields Analyzer for the new sandbox. This will take few minutes...", style="blue")
|
|
240
|
+
self.ups_profile_analyzer = UpsFieldsAnalyzer(config=self.config,union='https://ns.adobe.com/xdm/context/experienceevent__union')
|
|
241
|
+
console.print("Analyzing all Event paths information. This will take few minutes...", style="blue")
|
|
242
|
+
df_analysis:pd.DataFrame = self.ups_profile_analyzer.analyzePaths(output='df')
|
|
243
|
+
if df_analysis is not None:
|
|
244
|
+
console.print(df_analysis)
|
|
245
|
+
df_analysis.to_csv(f"event_all_paths_info.csv", index=False)
|
|
246
|
+
console.print(f"Event all paths information data exported to event_all_paths_info.csv", style="green")
|
|
247
|
+
else:
|
|
248
|
+
console.print("(!) No event paths information data found.", style="red")
|
|
249
|
+
except Exception as e:
|
|
250
|
+
console.print(f"(!) Error: {str(e)}", style="red")
|
|
251
|
+
except SystemExit:
|
|
252
|
+
return
|
|
253
|
+
|
|
254
|
+
@login_required
|
|
255
|
+
def do_get_event_path_info(self, args:Any) -> None:
|
|
256
|
+
"""Get path information on Experience Event"""
|
|
257
|
+
parser = argparse.ArgumentParser(prog='get_event_path_info', add_help=True)
|
|
258
|
+
parser.add_argument("path", help="Dot notation of the path to analyze in Experience Event Storage", default=None,type=str)
|
|
259
|
+
try:
|
|
260
|
+
args = parser.parse_args(shlex.split(args))
|
|
261
|
+
if self.ups_profile_analyzer is None:
|
|
262
|
+
console.print("Initializing Event UPS Fields Analyzer. This will take few minutes...", style="blue")
|
|
263
|
+
self.ups_profile_analyzer = UpsFieldsAnalyzer(config=self.config,union='https://ns.adobe.com/xdm/context/experienceevent__union')
|
|
264
|
+
else:
|
|
265
|
+
if self.config.sandbox != self.ups_profile_analyzer.sandbox:
|
|
266
|
+
console.print("Re-initializing Event UPS Fields Analyzer for the new sandbox. This will take few minutes...", style="blue")
|
|
267
|
+
self.ups_profile_analyzer = UpsFieldsAnalyzer(config=self.config,union='https://ns.adobe.com/xdm/context/experienceevent__union')
|
|
268
|
+
analysis = self.ups_profile_analyzer.analyzePath(args.path)
|
|
269
|
+
if analysis:
|
|
270
|
+
console.print_json(data=analysis)
|
|
271
|
+
with open(f"event_path_info_{args.path.replace('/','_')}.json", 'w') as f:
|
|
272
|
+
json.dump(analysis, f, indent=4)
|
|
273
|
+
console.print(f"Event path information data exported to event_path_info_{args.path.replace('/','_')}.json", style="green")
|
|
274
|
+
else:
|
|
275
|
+
console.print("(!) No event path information data found.", style="red")
|
|
276
|
+
except Exception as e:
|
|
277
|
+
console.print(f"(!) Error: {str(e)}", style="red")
|
|
278
|
+
except SystemExit:
|
|
279
|
+
return
|
|
173
280
|
|
|
174
281
|
@login_required
|
|
175
282
|
def do_get_schemas(self, args:Any) -> None:
|
|
@@ -991,6 +1098,23 @@ class ServiceShell(cmd.Cmd):
|
|
|
991
1098
|
args = parser.parse_args(shlex.split(args))
|
|
992
1099
|
aepp_audience = segmentation.Segmentation(config=self.config)
|
|
993
1100
|
audiences = aepp_audience.getAudiences()
|
|
1101
|
+
flw = flowservice.FlowService(config=self.config)
|
|
1102
|
+
destinations = flw.getFlows(onlyDestinations=True)
|
|
1103
|
+
segments_shared = []
|
|
1104
|
+
for tmpFlow in destinations:
|
|
1105
|
+
if len(tmpFlow['transformations'])>0:
|
|
1106
|
+
tmpSegmentShared = tmpFlow['transformations'][0].get('params',{}).get('segmentSelectors',{}).get('selectors',[])
|
|
1107
|
+
for s in tmpSegmentShared:
|
|
1108
|
+
s['flowId'] = tmpFlow['id']
|
|
1109
|
+
segments_shared += tmpSegmentShared
|
|
1110
|
+
segment_shared_dict = {seg.get('value',{}).get('id'):{
|
|
1111
|
+
"exportMode" : seg.get('value',{}).get('exportMode'),
|
|
1112
|
+
"scheduleFrequency": seg.get('value',{}).get("schedule",{}).get('frequency',''),
|
|
1113
|
+
"flowId" : seg["flowId"]
|
|
1114
|
+
} for seg in segments_shared}
|
|
1115
|
+
for aud in audiences:
|
|
1116
|
+
aud['usedInFlow'] = True if segment_shared_dict.get(aud.get("id","N/A"),{}) != {} else False
|
|
1117
|
+
aud['sharedInfo'] = segment_shared_dict.get(aud.get("id","N/A"),{})
|
|
994
1118
|
df_audiences = pd.DataFrame(audiences)
|
|
995
1119
|
df_audiences.to_csv(f"{self.config.sandbox}_audiences.csv",index=False)
|
|
996
1120
|
console.print(f"Audiences exported to {self.config.sandbox}_audiences.csv", style="green")
|
|
@@ -999,14 +1123,14 @@ class ServiceShell(cmd.Cmd):
|
|
|
999
1123
|
table.add_column("Name", style="magenta")
|
|
1000
1124
|
table.add_column("Evaluation", style="yellow")
|
|
1001
1125
|
table.add_column("Total Profiles", style="green")
|
|
1002
|
-
table.add_column("
|
|
1126
|
+
table.add_column("Shared", style="white")
|
|
1003
1127
|
for aud in audiences:
|
|
1004
1128
|
table.add_row(
|
|
1005
1129
|
aud.get("id","N/A"),
|
|
1006
1130
|
aud.get("name","N/A"),
|
|
1007
|
-
'[
|
|
1131
|
+
'[bright_blue]Batch[/bright_blue]' if aud.get("evaluationInfo",{}).get("batch",{}).get('enabled') else '[chartreuse1]Streaming[/chartreuse1]' if aud.get("evaluationInfo",{}).get("continuous",{}).get('enabled') else '[purple]Edge[/purple]' if aud.get("evaluationInfo",{}).get("synchronous",{}).get('enabled') else 'N/A',
|
|
1008
1132
|
str(aud.get('metrics',{}).get('data',{}).get('totalProfiles','N/A')),
|
|
1009
|
-
|
|
1133
|
+
'[green3]True[/green3]' if aud.get("usedInFlow",False) else '[red3]False[/red3]',
|
|
1010
1134
|
)
|
|
1011
1135
|
console.print(table)
|
|
1012
1136
|
except Exception as e:
|
|
@@ -0,0 +1,271 @@
|
|
|
1
|
+
import aepp
|
|
2
|
+
from aepp import schema,catalog,segmentation,flowservice, ConnectObject,schemamanager
|
|
3
|
+
from typing import Union
|
|
4
|
+
from copy import deepcopy
|
|
5
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
6
|
+
import pandas as pd
|
|
7
|
+
import re
|
|
8
|
+
|
|
9
|
+
class UpsFieldsAnalyzer:
|
|
10
|
+
"""
|
|
11
|
+
Class that extract the relationships of the fields for union schemas
|
|
12
|
+
"""
|
|
13
|
+
loggingEnabled = False
|
|
14
|
+
logger = None
|
|
15
|
+
|
|
16
|
+
def __init__(
|
|
17
|
+
self,
|
|
18
|
+
union:str="https://ns.adobe.com/xdm/context/profile__union",
|
|
19
|
+
config: Union[dict,'ConnectObject'] = aepp.config.config_object,
|
|
20
|
+
region:str='nld2',
|
|
21
|
+
**kwargs,
|
|
22
|
+
):
|
|
23
|
+
"""
|
|
24
|
+
Instantiate the data Lineage class.
|
|
25
|
+
Arguments:
|
|
26
|
+
config : REQUIRED : Either ConnectObject instance or a config file to connect to the sandbox.
|
|
27
|
+
union : REQUIRED : The union schema you want to analyze. Default: https://ns.adobe.com/xdm/context/profile__union
|
|
28
|
+
Possible values:
|
|
29
|
+
'https://ns.adobe.com/xdm/context/experienceevent__union'
|
|
30
|
+
'https://ns.adobe.com/experience/journeyOrchestration/stepEvents/journeyStepEvent__union'
|
|
31
|
+
'https://ns.adobe.com/experience/journeyOrchestration/stepEvents/journeyStepEvent__union'
|
|
32
|
+
'https://ns.adobe.com/xdm/context/segmentdefinition__union'
|
|
33
|
+
'https://ns.adobe.com/experience/customerJourneyManagement/ajoEntity__union'
|
|
34
|
+
region : OPTIONAL : If you are using a different region than the one automatically assigned (default : nld2, possible option: va7,aus5)
|
|
35
|
+
Additional kwargs will update the header.
|
|
36
|
+
"""
|
|
37
|
+
if union is None:
|
|
38
|
+
raise ValueError("Requires the usage of an union schema definition")
|
|
39
|
+
self.union = union
|
|
40
|
+
self.classId = self.union.split('__')[0]
|
|
41
|
+
self.config = config
|
|
42
|
+
self.region = region
|
|
43
|
+
self.sandbox = config.sandbox
|
|
44
|
+
self.schemaAPI = schema.Schema(config=config)
|
|
45
|
+
self.catalogAPI = catalog.Catalog(config=config)
|
|
46
|
+
self.segmentationAPI = segmentation.Segmentation(config=config)
|
|
47
|
+
self.flowAPI = flowservice.FlowService(config=config)
|
|
48
|
+
self.unionSchema = schemamanager.SchemaManager(union,config=config)
|
|
49
|
+
df_union = self.unionSchema.to_dataframe(queryPath=True)
|
|
50
|
+
self.df_union = df_union.set_index('querypath',drop=True)
|
|
51
|
+
self.__schemaInfo__(config=config)
|
|
52
|
+
self.__datasetInfo__()
|
|
53
|
+
self.__audienceInfo__()
|
|
54
|
+
self.__flowserviceInfoDestinations__()
|
|
55
|
+
self.__flowserviceInfoSource__()
|
|
56
|
+
self.__audienceInfo__()
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def __schemaInfo__(self,config)->None:
|
|
60
|
+
"""
|
|
61
|
+
Extract the information of schema.
|
|
62
|
+
Provide the following attributes:
|
|
63
|
+
* schemaManagers : dict {$id:schemaManager}
|
|
64
|
+
|
|
65
|
+
"""
|
|
66
|
+
schemas = self.schemaAPI.getSchemas(classFilter=self.classId)
|
|
67
|
+
list_schemaIds = [sch.get('$id') for sch in schemas]
|
|
68
|
+
none_params = [None for _ in range(len(list_schemaIds))]
|
|
69
|
+
config_params = [deepcopy(config) for _ in range(len(list_schemaIds))]
|
|
70
|
+
self.schemaManagers = {}
|
|
71
|
+
with ThreadPoolExecutor(max_workers=10) as executor:
|
|
72
|
+
schemaDetails = list(executor.map(schemamanager.SchemaManager, list_schemaIds,none_params,none_params,none_params,none_params,config_params))
|
|
73
|
+
for sch in schemaDetails:
|
|
74
|
+
self.schemaManagers[sch.id] = sch
|
|
75
|
+
|
|
76
|
+
def __audienceInfo__(self)->None:
|
|
77
|
+
"""
|
|
78
|
+
Extract the segmentation information
|
|
79
|
+
Provide the following attributes:
|
|
80
|
+
* audiences : list of audiences
|
|
81
|
+
* audiences_definitions : dict { id : {definition, class}}
|
|
82
|
+
"""
|
|
83
|
+
audiences = self.segmentationAPI.getAudiences()
|
|
84
|
+
self.audiences_definitions = {
|
|
85
|
+
seg['id']:{
|
|
86
|
+
'name':seg.get('name'),
|
|
87
|
+
'definition':seg,
|
|
88
|
+
'format' : seg.get('expression',{}).get('format'),
|
|
89
|
+
'class':[el.get("$ref") for el in seg.get('definedOn',[{}])]
|
|
90
|
+
}
|
|
91
|
+
for seg
|
|
92
|
+
in audiences
|
|
93
|
+
if self.union in [el.get("$ref") for el in seg.get('definedOn',[{}])]
|
|
94
|
+
}
|
|
95
|
+
self.paths_audiences = {path:{} for path in self.df_union['path'].to_list()}
|
|
96
|
+
for segId in self.audiences_definitions:
|
|
97
|
+
paths = self.segmentationAPI.extractPaths(self.audiences_definitions[segId].get('definition'))
|
|
98
|
+
for path in paths:
|
|
99
|
+
if path in self.paths_audiences.keys():
|
|
100
|
+
self.paths_audiences[path][segId] = {
|
|
101
|
+
"name": self.audiences_definitions[segId]["name"]
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
def __datasetInfo__(self):
|
|
105
|
+
"""
|
|
106
|
+
Extract the dataset information
|
|
107
|
+
Provide the following attributes:
|
|
108
|
+
* dict_datasetId_name : dict { id : name }
|
|
109
|
+
* observableSchemas : dict { id : ObsSchema}
|
|
110
|
+
* observable_df : dict { id : df }
|
|
111
|
+
* dataset_schema : dict { id : schema $id }
|
|
112
|
+
* datasets : list (of dataset ID)
|
|
113
|
+
"""
|
|
114
|
+
datasets = self.catalogAPI.getDataSets(output='list')
|
|
115
|
+
enabledDatasets = []
|
|
116
|
+
self.dict_datasetId_name = {}
|
|
117
|
+
list_enabled_datasetIds = []
|
|
118
|
+
for ds in datasets:
|
|
119
|
+
if 'enabled:true' in ds.get('tags',{}).get('unifiedProfile',[]):
|
|
120
|
+
enabledDatasets.append(ds)
|
|
121
|
+
self.dict_datasetId_name[ds['id']] = ds['name']
|
|
122
|
+
list_enabled_datasetIds.append(ds['id'])
|
|
123
|
+
with ThreadPoolExecutor(max_workers=10) as executor:
|
|
124
|
+
observableSchemasList = list(executor.map(self.catalogAPI.getDataSetObservableSchema, list_enabled_datasetIds,[True]*len(list_enabled_datasetIds)))
|
|
125
|
+
self.observableSchemas = {}
|
|
126
|
+
self.observable_df = {}
|
|
127
|
+
self.dataset_schema = {}
|
|
128
|
+
self.datasets = []
|
|
129
|
+
for element in observableSchemasList:
|
|
130
|
+
obs = catalog.ObservableSchemaManager(element)
|
|
131
|
+
if obs.schemaId is not None:
|
|
132
|
+
datasetSchema = self.schemaAPI.getSchema(obs.schemaId)
|
|
133
|
+
if datasetSchema.get('meta:class') == self.classId:
|
|
134
|
+
self.datasets.append(obs.datasetId)
|
|
135
|
+
self.observableSchemas[element.get('datasetId')] = obs
|
|
136
|
+
self.dataset_schema[element.get('datasetId')] = datasetSchema
|
|
137
|
+
self.observable_df[element.get('datasetId')] = self.observableSchemas[element.get('datasetId')].to_dataframe()
|
|
138
|
+
|
|
139
|
+
def __flowserviceInfoDestinations__(self)->dict:
|
|
140
|
+
"""
|
|
141
|
+
Build the flow service data for destination
|
|
142
|
+
Provide the following attributes:
|
|
143
|
+
* destinationsPath : dict { id : {name:str, paths:list }
|
|
144
|
+
"""
|
|
145
|
+
selectors = set()
|
|
146
|
+
destinationFlows = self.flowAPI.getFlows(onlyDestinations=True)
|
|
147
|
+
self.destinationsPath = {}
|
|
148
|
+
for destination in destinationFlows:
|
|
149
|
+
transformations = destination.get('transformations',[{}])
|
|
150
|
+
if len(transformations) > 0:
|
|
151
|
+
if transformations[0].get('name') == 'GeneralTransform':
|
|
152
|
+
name = destination['name']
|
|
153
|
+
transformationParams = destination.get('transformations',[{}])[0].get('params',{})
|
|
154
|
+
if 'profileSelectors' in transformationParams.keys():
|
|
155
|
+
for selector in transformationParams['profileSelectors'].get('selectors',[]):
|
|
156
|
+
selectors.add(selector.get('value',{}).get('path'))
|
|
157
|
+
self.destinationsPath[destination['id']]={
|
|
158
|
+
'name':name,
|
|
159
|
+
"paths":list(selectors)
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
def __flowserviceInfoSource__(self)->dict:
|
|
163
|
+
"""
|
|
164
|
+
Build the flow service data for source
|
|
165
|
+
Provide the following attributes:
|
|
166
|
+
* destinationsPath : dict { id : {name:str, datasetId:str,schemaRef:str }
|
|
167
|
+
"""
|
|
168
|
+
sourceFlows = self.flowAPI.getFlows(onlySources=True)
|
|
169
|
+
self.sourceFlows = {}
|
|
170
|
+
def getTargetDetails(sourceConnId)->dict:
|
|
171
|
+
tmp_sourceConnection = self.flowAPI.getTargetConnection(sourceConnId)
|
|
172
|
+
return tmp_sourceConnection
|
|
173
|
+
def getFlowSpec(specId)->dict:
|
|
174
|
+
tmp_sourceSpec = self.flowAPI.getFlowSpec(specId)
|
|
175
|
+
return tmp_sourceSpec
|
|
176
|
+
list_targetIds = [source.get('targetConnectionIds')[0] for source in sourceFlows]
|
|
177
|
+
list_flowSpecIds = [source.get('flowSpec',{}).get('id') for source in sourceFlows if source.get('flowSpec',{}).get('id') is not None]
|
|
178
|
+
with ThreadPoolExecutor(max_workers=10) as executor:
|
|
179
|
+
targetconnections = list(executor.map(getTargetDetails, list_targetIds))
|
|
180
|
+
flowSpecs = list(executor.map(getFlowSpec, list_flowSpecIds))
|
|
181
|
+
for source in sourceFlows:
|
|
182
|
+
sourceName = source['name']
|
|
183
|
+
sourceId = source['id']
|
|
184
|
+
tmp_sourceTargetId = source.get('targetConnectionIds')[0]
|
|
185
|
+
tmp_sourceTarget = [item for item in targetconnections if item['id'] == tmp_sourceTargetId][0]
|
|
186
|
+
params = tmp_sourceTarget.get('params',{})
|
|
187
|
+
specId = source.get('flowSpec',{}).get('id')
|
|
188
|
+
frequency = None
|
|
189
|
+
if specId is not None:
|
|
190
|
+
tmp_sourceSpec = [item for item in flowSpecs if item['id'] == specId][0]
|
|
191
|
+
frequency = tmp_sourceSpec.get('attributes',{}).get('frequency')
|
|
192
|
+
datasetId = params.get('dataSetId',params.get('datasetId'))
|
|
193
|
+
if datasetId in self.datasets:
|
|
194
|
+
self.sourceFlows[sourceId] = {
|
|
195
|
+
'name' : sourceName,
|
|
196
|
+
'datasetId' : datasetId,
|
|
197
|
+
'schemaRef' : self.dataset_schema[datasetId],
|
|
198
|
+
'frequency':frequency
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
def __buildRelationships__(self,path:str)->dict:
|
|
203
|
+
"""
|
|
204
|
+
Build relationship between a path and the different elements
|
|
205
|
+
Arguments:
|
|
206
|
+
path : REQUIRED : the path to analyze
|
|
207
|
+
"""
|
|
208
|
+
result_dict = {'path':path}
|
|
209
|
+
if path in self.df_union.index:
|
|
210
|
+
result_dict['description'] = self.df_union.at[path,'description']
|
|
211
|
+
result_dict['fieldGroup'] = self.df_union.at[path,'fieldGroup']
|
|
212
|
+
result_dict['type'] = self.df_union.at[path,'type']
|
|
213
|
+
result_dict['schemas'] = {}
|
|
214
|
+
for schemaId in self.schemaManagers:
|
|
215
|
+
if path in self.schemaManagers[schemaId].to_dataframe()['path'].to_list():
|
|
216
|
+
result_dict['schemas'][schemaId] = self.schemaManagers[schemaId].title
|
|
217
|
+
result_dict['datasets'] = {}
|
|
218
|
+
for dsId in self.datasets:
|
|
219
|
+
if path in self.observable_df[dsId]['path'].to_list():
|
|
220
|
+
result_dict['datasets'][dsId] = self.dict_datasetId_name[dsId]
|
|
221
|
+
result_dict['destinationFlows'] = {}
|
|
222
|
+
for flowId in self.destinationsPath:
|
|
223
|
+
if path in self.destinationsPath[flowId]['paths']:
|
|
224
|
+
result_dict['destinationFlows'][flowId] = self.destinationsPath[flowId]['name']
|
|
225
|
+
result_dict['sourceFlows'] = {}
|
|
226
|
+
for sourceId in self.sourceFlows:
|
|
227
|
+
datasetId = self.sourceFlows[sourceId]['datasetId']
|
|
228
|
+
if path in self.observable_df[datasetId]['path'].to_list():
|
|
229
|
+
result_dict['sourceFlows'][sourceId] = {'name':self.sourceFlows[sourceId]['name'],'frequency':self.sourceFlows[sourceId]['frequency']}
|
|
230
|
+
result_dict['audiences'] = self.paths_audiences[path]
|
|
231
|
+
return result_dict
|
|
232
|
+
|
|
233
|
+
def analyzePaths(self,output:str='df')->Union[list,pd.DataFrame]:
|
|
234
|
+
"""
|
|
235
|
+
Analyze the paths of your union schema
|
|
236
|
+
Arguments:
|
|
237
|
+
output : OPTIONAL : The type of output provided. Default "df", possible: "raw" (list)
|
|
238
|
+
"""
|
|
239
|
+
list_dictionary = []
|
|
240
|
+
for path in self.df_union.path.to_list():
|
|
241
|
+
list_dictionary.append(self.analyzePath(path))
|
|
242
|
+
if output=='df':
|
|
243
|
+
df = pd.DataFrame(list_dictionary)
|
|
244
|
+
return df
|
|
245
|
+
return list_dictionary
|
|
246
|
+
|
|
247
|
+
def analyzePath(self,path:str=None,output:str='dict')->Union[dict,pd.DataFrame]:
|
|
248
|
+
"""
|
|
249
|
+
Analyze a specific path
|
|
250
|
+
Arguments:
|
|
251
|
+
path : REQUIRED : The path to analyze
|
|
252
|
+
output : OPTIONAL : The type of output provided ('dict' (default) or 'dataframe' )
|
|
253
|
+
"""
|
|
254
|
+
if path is None:
|
|
255
|
+
raise ValueError('path must be specified')
|
|
256
|
+
res = self.__buildRelationships__(path)
|
|
257
|
+
return res
|
|
258
|
+
|
|
259
|
+
def to_dataframe(self,save:bool=False)->pd.DataFrame:
|
|
260
|
+
"""
|
|
261
|
+
Returns the union schema as dataframe.
|
|
262
|
+
Arguments:
|
|
263
|
+
save : OPTIONAL : If the dataframe is to be saved in a file
|
|
264
|
+
"""
|
|
265
|
+
return self.unionSchema.to_dataframe(save=save)
|
|
266
|
+
|
|
267
|
+
def to_dict(self)->dict:
|
|
268
|
+
"""
|
|
269
|
+
Returns the union schema as dictionary.
|
|
270
|
+
"""
|
|
271
|
+
return self.unionSchema.to_dict()
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "0.5.2-1"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|