aepp 0.5.2__py3-none-any.whl → 0.5.2.post2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aepp/__version__.py +1 -1
- aepp/cli/__main__.py +303 -9
- aepp/cli/upsfieldsanalyzer.py +271 -0
- {aepp-0.5.2.dist-info → aepp-0.5.2.post2.dist-info}/METADATA +1 -1
- {aepp-0.5.2.dist-info → aepp-0.5.2.post2.dist-info}/RECORD +9 -8
- {aepp-0.5.2.dist-info → aepp-0.5.2.post2.dist-info}/WHEEL +0 -0
- {aepp-0.5.2.dist-info → aepp-0.5.2.post2.dist-info}/entry_points.txt +0 -0
- {aepp-0.5.2.dist-info → aepp-0.5.2.post2.dist-info}/licenses/LICENSE +0 -0
- {aepp-0.5.2.dist-info → aepp-0.5.2.post2.dist-info}/top_level.txt +0 -0
aepp/__version__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.5.2"
|
|
1
|
+
__version__ = "0.5.2-2"
|
aepp/cli/__main__.py
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
from ast import arg
|
|
2
2
|
from matplotlib.pyplot import table
|
|
3
3
|
import aepp
|
|
4
|
-
from aepp import synchronizer, schema, schemamanager, fieldgroupmanager, datatypemanager, identity, queryservice,catalog,flowservice
|
|
4
|
+
from aepp import synchronizer, schema, schemamanager, fieldgroupmanager, datatypemanager, identity, queryservice,catalog,flowservice,sandboxes, segmentation
|
|
5
|
+
from aepp.cli.upsfieldsanalyzer import UpsFieldsAnalyzer
|
|
5
6
|
import argparse, cmd, shlex, json
|
|
6
7
|
from functools import wraps
|
|
7
8
|
from rich.console import Console
|
|
@@ -37,6 +38,7 @@ class ServiceShell(cmd.Cmd):
|
|
|
37
38
|
super().__init__()
|
|
38
39
|
self.config = None
|
|
39
40
|
self.connectInstance = True
|
|
41
|
+
self.ups_profile_analyzer:UpsFieldsAnalyzer|None = None
|
|
40
42
|
if kwargs.get("config_file") is not None:
|
|
41
43
|
config_path = Path(kwargs.get("config_file"))
|
|
42
44
|
if not config_path.is_absolute():
|
|
@@ -69,7 +71,7 @@ class ServiceShell(cmd.Cmd):
|
|
|
69
71
|
)
|
|
70
72
|
self.prompt = f"{self.config.sandbox}> "
|
|
71
73
|
console.print(Panel(f"Connected to [bold green]{self.sandbox}[/bold green]", style="blue"))
|
|
72
|
-
|
|
74
|
+
|
|
73
75
|
def do_createConfigFile(self, arg:Any) -> None:
|
|
74
76
|
"""Create a configuration file for future use"""
|
|
75
77
|
parser = argparse.ArgumentParser(prog='createConfigFile', add_help=True)
|
|
@@ -134,6 +136,147 @@ class ServiceShell(cmd.Cmd):
|
|
|
134
136
|
else:
|
|
135
137
|
console.print(Panel("(!) You must configure the connection first using the 'config' command.", style="red"))
|
|
136
138
|
|
|
139
|
+
@login_required
|
|
140
|
+
def do_get_sandboxes(self, args:Any) -> None:
|
|
141
|
+
"""List all sandboxes for the current organization"""
|
|
142
|
+
parser = argparse.ArgumentParser(prog='get_sandboxes', add_help=True)
|
|
143
|
+
parser.add_argument("-sv", "--save",help="Save sandboxes to CSV file")
|
|
144
|
+
try:
|
|
145
|
+
args = parser.parse_args(shlex.split(args))
|
|
146
|
+
aepp_sandboxes = sandboxes.Sandboxes(config=self.config)
|
|
147
|
+
sandboxes_list = aepp_sandboxes.getSandboxes()
|
|
148
|
+
if sandboxes_list:
|
|
149
|
+
table = Table(title=f"Sandboxes in Org: {self.config.org_id}")
|
|
150
|
+
table.add_column("Name", style="cyan")
|
|
151
|
+
table.add_column("Title", style="magenta")
|
|
152
|
+
table.add_column("Type", style="green")
|
|
153
|
+
table.add_column("Region", style="yellow")
|
|
154
|
+
table.add_column("Created", style="medium_violet_red")
|
|
155
|
+
for sb in sandboxes_list:
|
|
156
|
+
table.add_row(
|
|
157
|
+
sb.get("name","N/A"),
|
|
158
|
+
sb.get("title","N/A"),
|
|
159
|
+
sb.get("type","N/A"),
|
|
160
|
+
sb.get("region","N/A"),
|
|
161
|
+
sb.get("createdDate","N/A"),
|
|
162
|
+
)
|
|
163
|
+
console.print(table)
|
|
164
|
+
if args.save:
|
|
165
|
+
df_sandboxes = pd.DataFrame(sandboxes_list)
|
|
166
|
+
df_sandboxes.to_csv(f"sandboxes_{self.config.org_id}.csv", index=False)
|
|
167
|
+
console.print(f"Sandboxes exported to sandboxes_{self.config.org_id}.csv", style="green")
|
|
168
|
+
else:
|
|
169
|
+
console.print("(!) No sandboxes found.", style="red")
|
|
170
|
+
except Exception as e:
|
|
171
|
+
console.print(f"(!) Error: {str(e)}", style="red")
|
|
172
|
+
except SystemExit:
|
|
173
|
+
return
|
|
174
|
+
|
|
175
|
+
@login_required
|
|
176
|
+
def do_get_profile_paths_info(self,args:Any)->None:
|
|
177
|
+
"""Get usage information for all Profile paths"""
|
|
178
|
+
parser = argparse.ArgumentParser(prog='get_profile_paths_info', add_help=True)
|
|
179
|
+
try:
|
|
180
|
+
args = parser.parse_args(shlex.split(args))
|
|
181
|
+
if self.ups_profile_analyzer is None:
|
|
182
|
+
console.print("Initializing Profile UPS Fields Analyzer. This will take few minutes...", style="blue")
|
|
183
|
+
self.ups_profile_analyzer = UpsFieldsAnalyzer(config=self.config)
|
|
184
|
+
else:
|
|
185
|
+
if self.config.sandbox != self.ups_profile_analyzer.sandbox:
|
|
186
|
+
console.print("Re-initializing Profile UPS Fields Analyzer for the new sandbox. This will take few minutes...", style="blue")
|
|
187
|
+
self.ups_profile_analyzer = UpsFieldsAnalyzer(config=self.config)
|
|
188
|
+
console.print("Analyzing all Profile paths information. This will take few minutes...", style="blue")
|
|
189
|
+
df_analysis:pd.DataFrame = self.ups_profile_analyzer.analyzePaths(output='df')
|
|
190
|
+
if df_analysis is not None:
|
|
191
|
+
console.print(df_analysis)
|
|
192
|
+
df_analysis.to_csv(f"profile_all_paths_info.csv", index=False)
|
|
193
|
+
console.print(f"Profile all paths information data exported to profile_all_paths_info.csv", style="green")
|
|
194
|
+
else:
|
|
195
|
+
console.print("(!) No profile paths information data found.", style="red")
|
|
196
|
+
except Exception as e:
|
|
197
|
+
console.print(f"(!) Error: {str(e)}", style="red")
|
|
198
|
+
except SystemExit:
|
|
199
|
+
return
|
|
200
|
+
|
|
201
|
+
@login_required
|
|
202
|
+
def do_get_profile_path_info(self, args:Any) -> None:
|
|
203
|
+
"""Get path information on Profile"""
|
|
204
|
+
parser = argparse.ArgumentParser(prog='get_profile_path_info', add_help=True)
|
|
205
|
+
parser.add_argument("path", help="Dot notation of the path to analyze in Profile Storage", default=None,type=str)
|
|
206
|
+
try:
|
|
207
|
+
args = parser.parse_args(shlex.split(args))
|
|
208
|
+
if self.ups_profile_analyzer is None:
|
|
209
|
+
console.print("Initializing Profile UPS Fields Analyzer. This will take few minutes...", style="blue")
|
|
210
|
+
self.ups_profile_analyzer = UpsFieldsAnalyzer(config=self.config)
|
|
211
|
+
else:
|
|
212
|
+
if self.config.sandbox != self.ups_profile_analyzer.sandbox:
|
|
213
|
+
console.print("Re-initializing Profile UPS Fields Analyzer for the new sandbox. This will take few minutes...", style="blue")
|
|
214
|
+
self.ups_profile_analyzer = UpsFieldsAnalyzer(config=self.config)
|
|
215
|
+
analysis = self.ups_profile_analyzer.analyzePath(args.path)
|
|
216
|
+
if analysis:
|
|
217
|
+
console.print_json(data=analysis)
|
|
218
|
+
with open(f"profile_path_info_{args.path.replace('/','_')}.json", 'w') as f:
|
|
219
|
+
json.dump(analysis, f, indent=4)
|
|
220
|
+
console.print(f"Profile path information data exported to profile_path_info_{args.path.replace('/','_')}.json", style="green")
|
|
221
|
+
else:
|
|
222
|
+
console.print("(!) No profile path information data found.", style="red")
|
|
223
|
+
except Exception as e:
|
|
224
|
+
console.print(f"(!) Error: {str(e)}", style="red")
|
|
225
|
+
except SystemExit:
|
|
226
|
+
return
|
|
227
|
+
|
|
228
|
+
@login_required
|
|
229
|
+
def do_get_event_paths_info(self,args:Any)->None:
|
|
230
|
+
"""Get information for all Experience Event paths"""
|
|
231
|
+
parser = argparse.ArgumentParser(prog='get_event_paths_info', add_help=True)
|
|
232
|
+
try:
|
|
233
|
+
args = parser.parse_args(shlex.split(args))
|
|
234
|
+
if self.ups_profile_analyzer is None:
|
|
235
|
+
console.print("Initializing Event UPS Fields Analyzer. This will take few minutes...", style="blue")
|
|
236
|
+
self.ups_profile_analyzer = UpsFieldsAnalyzer(config=self.config,union='https://ns.adobe.com/xdm/context/experienceevent__union')
|
|
237
|
+
else:
|
|
238
|
+
if self.config.sandbox != self.ups_profile_analyzer.sandbox:
|
|
239
|
+
console.print("Re-initializing Event UPS Fields Analyzer for the new sandbox. This will take few minutes...", style="blue")
|
|
240
|
+
self.ups_profile_analyzer = UpsFieldsAnalyzer(config=self.config,union='https://ns.adobe.com/xdm/context/experienceevent__union')
|
|
241
|
+
console.print("Analyzing all Event paths information. This will take few minutes...", style="blue")
|
|
242
|
+
df_analysis:pd.DataFrame = self.ups_profile_analyzer.analyzePaths(output='df')
|
|
243
|
+
if df_analysis is not None:
|
|
244
|
+
console.print(df_analysis)
|
|
245
|
+
df_analysis.to_csv(f"event_all_paths_info.csv", index=False)
|
|
246
|
+
console.print(f"Event all paths information data exported to event_all_paths_info.csv", style="green")
|
|
247
|
+
else:
|
|
248
|
+
console.print("(!) No event paths information data found.", style="red")
|
|
249
|
+
except Exception as e:
|
|
250
|
+
console.print(f"(!) Error: {str(e)}", style="red")
|
|
251
|
+
except SystemExit:
|
|
252
|
+
return
|
|
253
|
+
|
|
254
|
+
@login_required
|
|
255
|
+
def do_get_event_path_info(self, args:Any) -> None:
|
|
256
|
+
"""Get path information on Experience Event"""
|
|
257
|
+
parser = argparse.ArgumentParser(prog='get_event_path_info', add_help=True)
|
|
258
|
+
parser.add_argument("path", help="Dot notation of the path to analyze in Experience Event Storage", default=None,type=str)
|
|
259
|
+
try:
|
|
260
|
+
args = parser.parse_args(shlex.split(args))
|
|
261
|
+
if self.ups_profile_analyzer is None:
|
|
262
|
+
console.print("Initializing Event UPS Fields Analyzer. This will take few minutes...", style="blue")
|
|
263
|
+
self.ups_profile_analyzer = UpsFieldsAnalyzer(config=self.config,union='https://ns.adobe.com/xdm/context/experienceevent__union')
|
|
264
|
+
else:
|
|
265
|
+
if self.config.sandbox != self.ups_profile_analyzer.sandbox:
|
|
266
|
+
console.print("Re-initializing Event UPS Fields Analyzer for the new sandbox. This will take few minutes...", style="blue")
|
|
267
|
+
self.ups_profile_analyzer = UpsFieldsAnalyzer(config=self.config,union='https://ns.adobe.com/xdm/context/experienceevent__union')
|
|
268
|
+
analysis = self.ups_profile_analyzer.analyzePath(args.path)
|
|
269
|
+
if analysis:
|
|
270
|
+
console.print_json(data=analysis)
|
|
271
|
+
with open(f"event_path_info_{args.path.replace('/','_')}.json", 'w') as f:
|
|
272
|
+
json.dump(analysis, f, indent=4)
|
|
273
|
+
console.print(f"Event path information data exported to event_path_info_{args.path.replace('/','_')}.json", style="green")
|
|
274
|
+
else:
|
|
275
|
+
console.print("(!) No event path information data found.", style="red")
|
|
276
|
+
except Exception as e:
|
|
277
|
+
console.print(f"(!) Error: {str(e)}", style="red")
|
|
278
|
+
except SystemExit:
|
|
279
|
+
return
|
|
137
280
|
|
|
138
281
|
@login_required
|
|
139
282
|
def do_get_schemas(self, args:Any) -> None:
|
|
@@ -738,13 +881,81 @@ class ServiceShell(cmd.Cmd):
|
|
|
738
881
|
ds.get("name","N/A"),
|
|
739
882
|
datetime.fromtimestamp(ds.get("created",1000)/1000).isoformat().split('T')[0],
|
|
740
883
|
str(ds.get("dataIngested",False)),
|
|
741
|
-
ds.get(
|
|
884
|
+
ds.get('classification').get('dataBehavior','N/A'),
|
|
885
|
+
)
|
|
886
|
+
console.print(table)
|
|
887
|
+
except Exception as e:
|
|
888
|
+
console.print(f"(!) Error: {str(e)}", style="red")
|
|
889
|
+
except SystemExit:
|
|
890
|
+
return
|
|
891
|
+
|
|
892
|
+
@login_required
|
|
893
|
+
def do_get_datasets_tableName(self, args:Any) -> None:
|
|
894
|
+
parser = argparse.ArgumentParser(prog='get_datasets', add_help=True)
|
|
895
|
+
try:
|
|
896
|
+
args = parser.parse_args(shlex.split(args))
|
|
897
|
+
aepp_cat = catalog.Catalog(config=self.config)
|
|
898
|
+
datasets = aepp_cat.getDataSets(output='list')
|
|
899
|
+
table = Table(title=f"Datasets in Sandbox: {self.config.sandbox}")
|
|
900
|
+
table.add_column("Name", style="white")
|
|
901
|
+
table.add_column("Table Name", style="cyan",no_wrap=True)
|
|
902
|
+
table.add_column("Data Type", style="red")
|
|
903
|
+
for ds in datasets:
|
|
904
|
+
table.add_row(
|
|
905
|
+
ds.get("name","N/A"),
|
|
906
|
+
ds.get('tags',{}).get('adobe/pqs/table',["N/A"])[0],
|
|
907
|
+
ds.get('classification').get('dataBehavior','N/A'),
|
|
742
908
|
)
|
|
743
909
|
console.print(table)
|
|
744
910
|
except Exception as e:
|
|
745
911
|
console.print(f"(!) Error: {str(e)}", style="red")
|
|
746
912
|
except SystemExit:
|
|
747
913
|
return
|
|
914
|
+
|
|
915
|
+
@login_required
|
|
916
|
+
def do_get_observable_schema_json(self,args:Any) -> None:
|
|
917
|
+
"""Get the observable schema for a dataset by name or ID"""
|
|
918
|
+
parser = argparse.ArgumentParser(prog='get_observable_schema', add_help=True)
|
|
919
|
+
parser.add_argument("dataset", help="Dataset ID or Dataset Name to retrieve observable schema for",type=str)
|
|
920
|
+
try:
|
|
921
|
+
args = parser.parse_args(shlex.split(args))
|
|
922
|
+
aepp_cat = catalog.Catalog(config=self.config)
|
|
923
|
+
datasets = aepp_cat.getDataSets(output='list')
|
|
924
|
+
for ds in datasets:
|
|
925
|
+
if ds.get("name","") == args.dataset or ds.get("id","") == args.dataset:
|
|
926
|
+
datasetId = ds.get("id")
|
|
927
|
+
schema_json = aepp_cat.getDataSetObservableSchema(datasetId=datasetId,appendDatasetInfo=True)
|
|
928
|
+
myObs = catalog.ObservableSchemaManager(schema_json,config=self.config)
|
|
929
|
+
data = myObs.to_dict()
|
|
930
|
+
with open(f"{args.dataset}_observable_schema.json", 'w') as f:
|
|
931
|
+
json.dump(data, f, indent=4)
|
|
932
|
+
console.print(f"Saved Observable schema to {args.dataset}_observable_schema.json.", style="green")
|
|
933
|
+
except Exception as e:
|
|
934
|
+
console.print(f"(!) Error: {str(e)}", style="red")
|
|
935
|
+
except SystemExit:
|
|
936
|
+
return
|
|
937
|
+
|
|
938
|
+
@login_required
|
|
939
|
+
def do_get_observable_schema_csv(self,args:Any) -> None:
|
|
940
|
+
"""Get the observable schema for a dataset by name or ID"""
|
|
941
|
+
parser = argparse.ArgumentParser(prog='get_observable_schema', add_help=True)
|
|
942
|
+
parser.add_argument("dataset", help="Dataset ID or Dataset Name to retrieve observable schema for",type=str)
|
|
943
|
+
try:
|
|
944
|
+
args = parser.parse_args(shlex.split(args))
|
|
945
|
+
aepp_cat = catalog.Catalog(config=self.config)
|
|
946
|
+
datasets = aepp_cat.getDataSets(output='list')
|
|
947
|
+
for ds in datasets:
|
|
948
|
+
if ds.get("name","") == args.dataset or ds.get("id","") == args.dataset:
|
|
949
|
+
datasetId = ds.get("id")
|
|
950
|
+
schema_json = aepp_cat.getDataSetObservableSchema(datasetId=datasetId,appendDatasetInfo=True)
|
|
951
|
+
myObs = catalog.ObservableSchemaManager(schema_json,config=self.config)
|
|
952
|
+
data = myObs.to_dataframe()
|
|
953
|
+
data.to_csv(f"{args.dataset}_observable_schema.csv", index=False)
|
|
954
|
+
console.print(f"Saved Observable schema to {args.dataset}_observable_schema.csv.", style="green")
|
|
955
|
+
except Exception as e:
|
|
956
|
+
console.print(f"(!) Error: {str(e)}", style="red")
|
|
957
|
+
except SystemExit:
|
|
958
|
+
return
|
|
748
959
|
|
|
749
960
|
@login_required
|
|
750
961
|
def do_get_datasets_infos(self, args:Any) -> None:
|
|
@@ -754,23 +965,58 @@ class ServiceShell(cmd.Cmd):
|
|
|
754
965
|
args = parser.parse_args(shlex.split(args))
|
|
755
966
|
aepp_cat = catalog.Catalog(config=self.config)
|
|
756
967
|
datasets = aepp_cat.getDataSets()
|
|
968
|
+
aepp_cat.data.infos = aepp_cat.data.infos.sort_values(by=['ups_storageSize','datalake_storageSize'], ascending=False)
|
|
757
969
|
aepp_cat.data.infos.to_csv(f"{aepp_cat.sandbox}_datasets_infos.csv",index=False)
|
|
758
970
|
console.print(f"Datasets infos exported to {aepp_cat.sandbox}_datasets_infos.csv", style="green")
|
|
759
971
|
table = Table(title=f"Datasets in Sandbox: {self.config.sandbox}")
|
|
760
972
|
table.add_column("ID", style="white")
|
|
761
973
|
table.add_column("Name", style="white",no_wrap=True)
|
|
762
|
-
table.add_column("
|
|
763
|
-
table.add_column("
|
|
764
|
-
table.add_column("
|
|
765
|
-
table.add_column("
|
|
974
|
+
table.add_column("UPS Rows", style="cyan")
|
|
975
|
+
table.add_column("UPS Storage Size", style="green")
|
|
976
|
+
table.add_column("Datalake Rows", style="magenta")
|
|
977
|
+
table.add_column("Datalake Storage Size", style="yellow")
|
|
766
978
|
for _, ds in aepp_cat.data.infos.iterrows():
|
|
767
979
|
table.add_row(
|
|
768
980
|
ds.get("id","N/A"),
|
|
769
981
|
ds.get("name","N/A"),
|
|
982
|
+
str(ds.get("ups_rows","N/A")),
|
|
983
|
+
str(ds.get("ups_storageSize","N/A")),
|
|
770
984
|
str(ds.get("datalake_rows","N/A")),
|
|
771
985
|
str(ds.get("datalake_storageSize","N/A")),
|
|
772
|
-
|
|
773
|
-
|
|
986
|
+
)
|
|
987
|
+
console.print(table)
|
|
988
|
+
except Exception as e:
|
|
989
|
+
console.print(f"(!) Error: {str(e)}", style="red")
|
|
990
|
+
except SystemExit:
|
|
991
|
+
return
|
|
992
|
+
|
|
993
|
+
@login_required
|
|
994
|
+
def do_get_snapshot_datasets(self,args:Any) -> None:
|
|
995
|
+
"""List all snapshot datasets in the current sandbox"""
|
|
996
|
+
parser = argparse.ArgumentParser(prog='get_snapshot_datasets', add_help=True)
|
|
997
|
+
try:
|
|
998
|
+
args = parser.parse_args(shlex.split(args))
|
|
999
|
+
aepp_cat = catalog.Catalog(config=self.config)
|
|
1000
|
+
datasets = aepp_cat.getProfileSnapshotDatasets(explicitMergePolicy=True)
|
|
1001
|
+
list_ds = []
|
|
1002
|
+
for key, ds in datasets.items():
|
|
1003
|
+
obj = ds
|
|
1004
|
+
obj['id'] = key
|
|
1005
|
+
list_ds.append(obj)
|
|
1006
|
+
df_datasets = pd.DataFrame(list_ds)
|
|
1007
|
+
df_datasets.to_csv(f"{self.config.sandbox}_snapshot_datasets.csv",index=False)
|
|
1008
|
+
console.print(f"Snapshot Datasets exported to {self.config.sandbox}_snapshot_datasets.csv", style="green")
|
|
1009
|
+
table = Table(title=f"Snapshot Datasets in Sandbox: {self.config.sandbox}")
|
|
1010
|
+
table.add_column("ID", style="white")
|
|
1011
|
+
table.add_column("Table Name", style="white")
|
|
1012
|
+
table.add_column("Merge Policy Name", style="yellow")
|
|
1013
|
+
table.add_column("Merge Policy ID", style="green")
|
|
1014
|
+
for ds in list_ds:
|
|
1015
|
+
table.add_row(
|
|
1016
|
+
ds.get("id","N/A"),
|
|
1017
|
+
ds.get("tags",{}).get('adobe/pqs/table',["N/A"])[0],
|
|
1018
|
+
ds.get('mergePolicyName','N/A'),
|
|
1019
|
+
[el.split(':')[1] for el in ds.get('tags',{}).get('unifiedProfile',[]) if el.startswith('mergePolicyId')][0]
|
|
774
1020
|
)
|
|
775
1021
|
console.print(table)
|
|
776
1022
|
except Exception as e:
|
|
@@ -844,6 +1090,54 @@ class ServiceShell(cmd.Cmd):
|
|
|
844
1090
|
except SystemExit:
|
|
845
1091
|
return
|
|
846
1092
|
|
|
1093
|
+
@login_required
|
|
1094
|
+
def do_get_audiences(self, args:Any) -> None:
|
|
1095
|
+
"""List all audiences in the current sandbox"""
|
|
1096
|
+
parser = argparse.ArgumentParser(prog='get_audiences', add_help=True)
|
|
1097
|
+
try:
|
|
1098
|
+
args = parser.parse_args(shlex.split(args))
|
|
1099
|
+
aepp_audience = segmentation.Segmentation(config=self.config)
|
|
1100
|
+
audiences = aepp_audience.getAudiences()
|
|
1101
|
+
flw = flowservice.FlowService(config=self.config)
|
|
1102
|
+
destinations = flw.getFlows(onlyDestinations=True)
|
|
1103
|
+
segments_shared = []
|
|
1104
|
+
for tmpFlow in destinations:
|
|
1105
|
+
if len(tmpFlow['transformations'])>0:
|
|
1106
|
+
tmpSegmentShared = tmpFlow['transformations'][0].get('params',{}).get('segmentSelectors',{}).get('selectors',[])
|
|
1107
|
+
for s in tmpSegmentShared:
|
|
1108
|
+
s['flowId'] = tmpFlow['id']
|
|
1109
|
+
segments_shared += tmpSegmentShared
|
|
1110
|
+
segment_shared_dict = {seg.get('value',{}).get('id'):{
|
|
1111
|
+
"exportMode" : seg.get('value',{}).get('exportMode'),
|
|
1112
|
+
"scheduleFrequency": seg.get('value',{}).get("schedule",{}).get('frequency',''),
|
|
1113
|
+
"flowId" : seg["flowId"]
|
|
1114
|
+
} for seg in segments_shared}
|
|
1115
|
+
for aud in audiences:
|
|
1116
|
+
aud['usedInFlow'] = True if segment_shared_dict.get(aud.get("id","N/A"),{}) != {} else False
|
|
1117
|
+
aud['sharedInfo'] = segment_shared_dict.get(aud.get("id","N/A"),{})
|
|
1118
|
+
df_audiences = pd.DataFrame(audiences)
|
|
1119
|
+
df_audiences.to_csv(f"{self.config.sandbox}_audiences.csv",index=False)
|
|
1120
|
+
console.print(f"Audiences exported to {self.config.sandbox}_audiences.csv", style="green")
|
|
1121
|
+
table = Table(title=f"Audiences in Sandbox: {self.config.sandbox}")
|
|
1122
|
+
table.add_column("ID", style="cyan")
|
|
1123
|
+
table.add_column("Name", style="magenta")
|
|
1124
|
+
table.add_column("Evaluation", style="yellow")
|
|
1125
|
+
table.add_column("Total Profiles", style="green")
|
|
1126
|
+
table.add_column("Shared", style="white")
|
|
1127
|
+
for aud in audiences:
|
|
1128
|
+
table.add_row(
|
|
1129
|
+
aud.get("id","N/A"),
|
|
1130
|
+
aud.get("name","N/A"),
|
|
1131
|
+
'[bright_blue]Batch[/bright_blue]' if aud.get("evaluationInfo",{}).get("batch",{}).get('enabled') else '[chartreuse1]Streaming[/chartreuse1]' if aud.get("evaluationInfo",{}).get("continuous",{}).get('enabled') else '[purple]Edge[/purple]' if aud.get("evaluationInfo",{}).get("synchronous",{}).get('enabled') else 'N/A',
|
|
1132
|
+
str(aud.get('metrics',{}).get('data',{}).get('totalProfiles','N/A')),
|
|
1133
|
+
'[green3]True[/green3]' if aud.get("usedInFlow",False) else '[red3]False[/red3]',
|
|
1134
|
+
)
|
|
1135
|
+
console.print(table)
|
|
1136
|
+
except Exception as e:
|
|
1137
|
+
console.print(f"(!) Error: {str(e)}", style="red")
|
|
1138
|
+
except SystemExit:
|
|
1139
|
+
return
|
|
1140
|
+
|
|
847
1141
|
@login_required
|
|
848
1142
|
def do_get_flows(self, args:Any) -> None:
|
|
849
1143
|
"""List flows in the current sandbox based on parameters provided. By default, list all sources and destinations."""
|
|
@@ -0,0 +1,271 @@
|
|
|
1
|
+
import aepp
|
|
2
|
+
from aepp import schema,catalog,segmentation,flowservice, ConnectObject,schemamanager
|
|
3
|
+
from typing import Union
|
|
4
|
+
from copy import deepcopy
|
|
5
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
6
|
+
import pandas as pd
|
|
7
|
+
import re
|
|
8
|
+
|
|
9
|
+
class UpsFieldsAnalyzer:
|
|
10
|
+
"""
|
|
11
|
+
Class that extract the relationships of the fields for union schemas
|
|
12
|
+
"""
|
|
13
|
+
loggingEnabled = False
|
|
14
|
+
logger = None
|
|
15
|
+
|
|
16
|
+
def __init__(
|
|
17
|
+
self,
|
|
18
|
+
union:str="https://ns.adobe.com/xdm/context/profile__union",
|
|
19
|
+
config: Union[dict,'ConnectObject'] = aepp.config.config_object,
|
|
20
|
+
region:str='nld2',
|
|
21
|
+
**kwargs,
|
|
22
|
+
):
|
|
23
|
+
"""
|
|
24
|
+
Instantiate the data Lineage class.
|
|
25
|
+
Arguments:
|
|
26
|
+
config : REQUIRED : Either ConnectObject instance or a config file to connect to the sandbox.
|
|
27
|
+
union : REQUIRED : The union schema you want to analyze. Default: https://ns.adobe.com/xdm/context/profile__union
|
|
28
|
+
Possible values:
|
|
29
|
+
'https://ns.adobe.com/xdm/context/experienceevent__union'
|
|
30
|
+
'https://ns.adobe.com/experience/journeyOrchestration/stepEvents/journeyStepEvent__union'
|
|
31
|
+
'https://ns.adobe.com/experience/journeyOrchestration/stepEvents/journeyStepEvent__union'
|
|
32
|
+
'https://ns.adobe.com/xdm/context/segmentdefinition__union'
|
|
33
|
+
'https://ns.adobe.com/experience/customerJourneyManagement/ajoEntity__union'
|
|
34
|
+
region : OPTIONAL : If you are using a different region than the one automatically assigned (default : nld2, possible option: va7,aus5)
|
|
35
|
+
Additional kwargs will update the header.
|
|
36
|
+
"""
|
|
37
|
+
if union is None:
|
|
38
|
+
raise ValueError("Requires the usage of an union schema definition")
|
|
39
|
+
self.union = union
|
|
40
|
+
self.classId = self.union.split('__')[0]
|
|
41
|
+
self.config = config
|
|
42
|
+
self.region = region
|
|
43
|
+
self.sandbox = config.sandbox
|
|
44
|
+
self.schemaAPI = schema.Schema(config=config)
|
|
45
|
+
self.catalogAPI = catalog.Catalog(config=config)
|
|
46
|
+
self.segmentationAPI = segmentation.Segmentation(config=config)
|
|
47
|
+
self.flowAPI = flowservice.FlowService(config=config)
|
|
48
|
+
self.unionSchema = schemamanager.SchemaManager(union,config=config)
|
|
49
|
+
df_union = self.unionSchema.to_dataframe(queryPath=True)
|
|
50
|
+
self.df_union = df_union.set_index('querypath',drop=True)
|
|
51
|
+
self.__schemaInfo__(config=config)
|
|
52
|
+
self.__datasetInfo__()
|
|
53
|
+
self.__audienceInfo__()
|
|
54
|
+
self.__flowserviceInfoDestinations__()
|
|
55
|
+
self.__flowserviceInfoSource__()
|
|
56
|
+
self.__audienceInfo__()
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def __schemaInfo__(self,config)->None:
|
|
60
|
+
"""
|
|
61
|
+
Extract the information of schema.
|
|
62
|
+
Provide the following attributes:
|
|
63
|
+
* schemaManagers : dict {$id:schemaManager}
|
|
64
|
+
|
|
65
|
+
"""
|
|
66
|
+
schemas = self.schemaAPI.getSchemas(classFilter=self.classId)
|
|
67
|
+
list_schemaIds = [sch.get('$id') for sch in schemas]
|
|
68
|
+
none_params = [None for _ in range(len(list_schemaIds))]
|
|
69
|
+
config_params = [deepcopy(config) for _ in range(len(list_schemaIds))]
|
|
70
|
+
self.schemaManagers = {}
|
|
71
|
+
with ThreadPoolExecutor(max_workers=10) as executor:
|
|
72
|
+
schemaDetails = list(executor.map(schemamanager.SchemaManager, list_schemaIds,none_params,none_params,none_params,none_params,config_params))
|
|
73
|
+
for sch in schemaDetails:
|
|
74
|
+
self.schemaManagers[sch.id] = sch
|
|
75
|
+
|
|
76
|
+
def __audienceInfo__(self)->None:
|
|
77
|
+
"""
|
|
78
|
+
Extract the segmentation information
|
|
79
|
+
Provide the following attributes:
|
|
80
|
+
* audiences : list of audiences
|
|
81
|
+
* audiences_definitions : dict { id : {definition, class}}
|
|
82
|
+
"""
|
|
83
|
+
audiences = self.segmentationAPI.getAudiences()
|
|
84
|
+
self.audiences_definitions = {
|
|
85
|
+
seg['id']:{
|
|
86
|
+
'name':seg.get('name'),
|
|
87
|
+
'definition':seg,
|
|
88
|
+
'format' : seg.get('expression',{}).get('format'),
|
|
89
|
+
'class':[el.get("$ref") for el in seg.get('definedOn',[{}])]
|
|
90
|
+
}
|
|
91
|
+
for seg
|
|
92
|
+
in audiences
|
|
93
|
+
if self.union in [el.get("$ref") for el in seg.get('definedOn',[{}])]
|
|
94
|
+
}
|
|
95
|
+
self.paths_audiences = {path:{} for path in self.df_union['path'].to_list()}
|
|
96
|
+
for segId in self.audiences_definitions:
|
|
97
|
+
paths = self.segmentationAPI.extractPaths(self.audiences_definitions[segId].get('definition'))
|
|
98
|
+
for path in paths:
|
|
99
|
+
if path in self.paths_audiences.keys():
|
|
100
|
+
self.paths_audiences[path][segId] = {
|
|
101
|
+
"name": self.audiences_definitions[segId]["name"]
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
def __datasetInfo__(self):
|
|
105
|
+
"""
|
|
106
|
+
Extract the dataset information
|
|
107
|
+
Provide the following attributes:
|
|
108
|
+
* dict_datasetId_name : dict { id : name }
|
|
109
|
+
* observableSchemas : dict { id : ObsSchema}
|
|
110
|
+
* observable_df : dict { id : df }
|
|
111
|
+
* dataset_schema : dict { id : schema $id }
|
|
112
|
+
* datasets : list (of dataset ID)
|
|
113
|
+
"""
|
|
114
|
+
datasets = self.catalogAPI.getDataSets(output='list')
|
|
115
|
+
enabledDatasets = []
|
|
116
|
+
self.dict_datasetId_name = {}
|
|
117
|
+
list_enabled_datasetIds = []
|
|
118
|
+
for ds in datasets:
|
|
119
|
+
if 'enabled:true' in ds.get('tags',{}).get('unifiedProfile',[]):
|
|
120
|
+
enabledDatasets.append(ds)
|
|
121
|
+
self.dict_datasetId_name[ds['id']] = ds['name']
|
|
122
|
+
list_enabled_datasetIds.append(ds['id'])
|
|
123
|
+
with ThreadPoolExecutor(max_workers=10) as executor:
|
|
124
|
+
observableSchemasList = list(executor.map(self.catalogAPI.getDataSetObservableSchema, list_enabled_datasetIds,[True]*len(list_enabled_datasetIds)))
|
|
125
|
+
self.observableSchemas = {}
|
|
126
|
+
self.observable_df = {}
|
|
127
|
+
self.dataset_schema = {}
|
|
128
|
+
self.datasets = []
|
|
129
|
+
for element in observableSchemasList:
|
|
130
|
+
obs = catalog.ObservableSchemaManager(element)
|
|
131
|
+
if obs.schemaId is not None:
|
|
132
|
+
datasetSchema = self.schemaAPI.getSchema(obs.schemaId)
|
|
133
|
+
if datasetSchema.get('meta:class') == self.classId:
|
|
134
|
+
self.datasets.append(obs.datasetId)
|
|
135
|
+
self.observableSchemas[element.get('datasetId')] = obs
|
|
136
|
+
self.dataset_schema[element.get('datasetId')] = datasetSchema
|
|
137
|
+
self.observable_df[element.get('datasetId')] = self.observableSchemas[element.get('datasetId')].to_dataframe()
|
|
138
|
+
|
|
139
|
+
def __flowserviceInfoDestinations__(self)->dict:
|
|
140
|
+
"""
|
|
141
|
+
Build the flow service data for destination
|
|
142
|
+
Provide the following attributes:
|
|
143
|
+
* destinationsPath : dict { id : {name:str, paths:list }
|
|
144
|
+
"""
|
|
145
|
+
selectors = set()
|
|
146
|
+
destinationFlows = self.flowAPI.getFlows(onlyDestinations=True)
|
|
147
|
+
self.destinationsPath = {}
|
|
148
|
+
for destination in destinationFlows:
|
|
149
|
+
transformations = destination.get('transformations',[{}])
|
|
150
|
+
if len(transformations) > 0:
|
|
151
|
+
if transformations[0].get('name') == 'GeneralTransform':
|
|
152
|
+
name = destination['name']
|
|
153
|
+
transformationParams = destination.get('transformations',[{}])[0].get('params',{})
|
|
154
|
+
if 'profileSelectors' in transformationParams.keys():
|
|
155
|
+
for selector in transformationParams['profileSelectors'].get('selectors',[]):
|
|
156
|
+
selectors.add(selector.get('value',{}).get('path'))
|
|
157
|
+
self.destinationsPath[destination['id']]={
|
|
158
|
+
'name':name,
|
|
159
|
+
"paths":list(selectors)
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
def __flowserviceInfoSource__(self)->dict:
|
|
163
|
+
"""
|
|
164
|
+
Build the flow service data for source
|
|
165
|
+
Provide the following attributes:
|
|
166
|
+
* destinationsPath : dict { id : {name:str, datasetId:str,schemaRef:str }
|
|
167
|
+
"""
|
|
168
|
+
sourceFlows = self.flowAPI.getFlows(onlySources=True)
|
|
169
|
+
self.sourceFlows = {}
|
|
170
|
+
def getTargetDetails(sourceConnId)->dict:
|
|
171
|
+
tmp_sourceConnection = self.flowAPI.getTargetConnection(sourceConnId)
|
|
172
|
+
return tmp_sourceConnection
|
|
173
|
+
def getFlowSpec(specId)->dict:
|
|
174
|
+
tmp_sourceSpec = self.flowAPI.getFlowSpec(specId)
|
|
175
|
+
return tmp_sourceSpec
|
|
176
|
+
list_targetIds = [source.get('targetConnectionIds')[0] for source in sourceFlows]
|
|
177
|
+
list_flowSpecIds = [source.get('flowSpec',{}).get('id') for source in sourceFlows if source.get('flowSpec',{}).get('id') is not None]
|
|
178
|
+
with ThreadPoolExecutor(max_workers=10) as executor:
|
|
179
|
+
targetconnections = list(executor.map(getTargetDetails, list_targetIds))
|
|
180
|
+
flowSpecs = list(executor.map(getFlowSpec, list_flowSpecIds))
|
|
181
|
+
for source in sourceFlows:
|
|
182
|
+
sourceName = source['name']
|
|
183
|
+
sourceId = source['id']
|
|
184
|
+
tmp_sourceTargetId = source.get('targetConnectionIds')[0]
|
|
185
|
+
tmp_sourceTarget = [item for item in targetconnections if item['id'] == tmp_sourceTargetId][0]
|
|
186
|
+
params = tmp_sourceTarget.get('params',{})
|
|
187
|
+
specId = source.get('flowSpec',{}).get('id')
|
|
188
|
+
frequency = None
|
|
189
|
+
if specId is not None:
|
|
190
|
+
tmp_sourceSpec = [item for item in flowSpecs if item['id'] == specId][0]
|
|
191
|
+
frequency = tmp_sourceSpec.get('attributes',{}).get('frequency')
|
|
192
|
+
datasetId = params.get('dataSetId',params.get('datasetId'))
|
|
193
|
+
if datasetId in self.datasets:
|
|
194
|
+
self.sourceFlows[sourceId] = {
|
|
195
|
+
'name' : sourceName,
|
|
196
|
+
'datasetId' : datasetId,
|
|
197
|
+
'schemaRef' : self.dataset_schema[datasetId],
|
|
198
|
+
'frequency':frequency
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
def __buildRelationships__(self,path:str)->dict:
|
|
203
|
+
"""
|
|
204
|
+
Build relationship between a path and the different elements
|
|
205
|
+
Arguments:
|
|
206
|
+
path : REQUIRED : the path to analyze
|
|
207
|
+
"""
|
|
208
|
+
result_dict = {'path':path}
|
|
209
|
+
if path in self.df_union.index:
|
|
210
|
+
result_dict['description'] = self.df_union.at[path,'description']
|
|
211
|
+
result_dict['fieldGroup'] = self.df_union.at[path,'fieldGroup']
|
|
212
|
+
result_dict['type'] = self.df_union.at[path,'type']
|
|
213
|
+
result_dict['schemas'] = {}
|
|
214
|
+
for schemaId in self.schemaManagers:
|
|
215
|
+
if path in self.schemaManagers[schemaId].to_dataframe()['path'].to_list():
|
|
216
|
+
result_dict['schemas'][schemaId] = self.schemaManagers[schemaId].title
|
|
217
|
+
result_dict['datasets'] = {}
|
|
218
|
+
for dsId in self.datasets:
|
|
219
|
+
if path in self.observable_df[dsId]['path'].to_list():
|
|
220
|
+
result_dict['datasets'][dsId] = self.dict_datasetId_name[dsId]
|
|
221
|
+
result_dict['destinationFlows'] = {}
|
|
222
|
+
for flowId in self.destinationsPath:
|
|
223
|
+
if path in self.destinationsPath[flowId]['paths']:
|
|
224
|
+
result_dict['destinationFlows'][flowId] = self.destinationsPath[flowId]['name']
|
|
225
|
+
result_dict['sourceFlows'] = {}
|
|
226
|
+
for sourceId in self.sourceFlows:
|
|
227
|
+
datasetId = self.sourceFlows[sourceId]['datasetId']
|
|
228
|
+
if path in self.observable_df[datasetId]['path'].to_list():
|
|
229
|
+
result_dict['sourceFlows'][sourceId] = {'name':self.sourceFlows[sourceId]['name'],'frequency':self.sourceFlows[sourceId]['frequency']}
|
|
230
|
+
result_dict['audiences'] = self.paths_audiences[path]
|
|
231
|
+
return result_dict
|
|
232
|
+
|
|
233
|
+
def analyzePaths(self,output:str='df')->Union[list,pd.DataFrame]:
|
|
234
|
+
"""
|
|
235
|
+
Analyze the paths of your union schema
|
|
236
|
+
Arguments:
|
|
237
|
+
output : OPTIONAL : The type of output provided. Default "df", possible: "raw" (list)
|
|
238
|
+
"""
|
|
239
|
+
list_dictionary = []
|
|
240
|
+
for path in self.df_union.path.to_list():
|
|
241
|
+
list_dictionary.append(self.analyzePath(path))
|
|
242
|
+
if output=='df':
|
|
243
|
+
df = pd.DataFrame(list_dictionary)
|
|
244
|
+
return df
|
|
245
|
+
return list_dictionary
|
|
246
|
+
|
|
247
|
+
def analyzePath(self,path:str=None,output:str='dict')->Union[dict,pd.DataFrame]:
|
|
248
|
+
"""
|
|
249
|
+
Analyze a specific path
|
|
250
|
+
Arguments:
|
|
251
|
+
path : REQUIRED : The path to analyze
|
|
252
|
+
output : OPTIONAL : The type of output provided ('dict' (default) or 'dataframe' )
|
|
253
|
+
"""
|
|
254
|
+
if path is None:
|
|
255
|
+
raise ValueError('path must be specified')
|
|
256
|
+
res = self.__buildRelationships__(path)
|
|
257
|
+
return res
|
|
258
|
+
|
|
259
|
+
def to_dataframe(self,save:bool=False)->pd.DataFrame:
|
|
260
|
+
"""
|
|
261
|
+
Returns the union schema as dataframe.
|
|
262
|
+
Arguments:
|
|
263
|
+
save : OPTIONAL : If the dataframe is to be saved in a file
|
|
264
|
+
"""
|
|
265
|
+
return self.unionSchema.to_dataframe(save=save)
|
|
266
|
+
|
|
267
|
+
def to_dict(self)->dict:
|
|
268
|
+
"""
|
|
269
|
+
Returns the union schema as dictionary.
|
|
270
|
+
"""
|
|
271
|
+
return self.unionSchema.to_dict()
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
aepp/__init__.py,sha256=rsU4OMu3pJIgy8emJAD6lhAfqH0-raZ6GyIMJanNBdM,27912
|
|
2
|
-
aepp/__version__.py,sha256=
|
|
2
|
+
aepp/__version__.py,sha256=Cg8MGKgYvVWLyB1zXrrIwnb1heE7cYSFsuUcUYCJuQk,23
|
|
3
3
|
aepp/accesscontrol.py,sha256=PB3FcrO4bvDjdNxjHx7p_20hp4ahBXewoOSxuTGMXC8,17423
|
|
4
4
|
aepp/catalog.py,sha256=hK9m3SAP0fhgkYqu14Tcfq14qBhw54tLCOF0mH31b1M,68237
|
|
5
5
|
aepp/classmanager.py,sha256=16hx_hptg3PYwmezZCr9dLjvOkNSunih1PK3Q-iPoZY,66099
|
|
@@ -35,10 +35,11 @@ aepp/synchronizer.py,sha256=3scwuimQJIBVdEqJ9fVsT1UgmFc9EkH3mpYxUwSoAOE,79363
|
|
|
35
35
|
aepp/tags.py,sha256=t2qBallTcWR4IOXcDBmrPpqjbSay1z3E2bcRijzVm1s,17641
|
|
36
36
|
aepp/utils.py,sha256=tG-YVXylm38-bynqfp5N_Mzyo7mhlZj-dLo7wLoO4tM,1200
|
|
37
37
|
aepp/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
38
|
-
aepp/cli/__main__.py,sha256=
|
|
39
|
-
aepp
|
|
40
|
-
aepp-0.5.2.dist-info/
|
|
41
|
-
aepp-0.5.2.dist-info/
|
|
42
|
-
aepp-0.5.2.dist-info/
|
|
43
|
-
aepp-0.5.2.dist-info/
|
|
44
|
-
aepp-0.5.2.dist-info/
|
|
38
|
+
aepp/cli/__main__.py,sha256=yQGX-aCz_fDCvzjK0g9XVPjHrfJ4ZeWeVXj9aFq928A,85462
|
|
39
|
+
aepp/cli/upsfieldsanalyzer.py,sha256=GAVBfXN6U8_BfU7doZwcuox71NMwdqQsEpuNgM2Osjc,13124
|
|
40
|
+
aepp-0.5.2.post2.dist-info/licenses/LICENSE,sha256=HjYTlfne3BbS5gNHzNqJ5COCiTQLUdf87QkzRyFbE4Y,10337
|
|
41
|
+
aepp-0.5.2.post2.dist-info/METADATA,sha256=IbeoTe1HUPrINR-0ZAYJJan3Q8wAGH5lxGg1KuR-1sY,5344
|
|
42
|
+
aepp-0.5.2.post2.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
43
|
+
aepp-0.5.2.post2.dist-info/entry_points.txt,sha256=e7HAumUTymoUiCuVRzFlcchennUBLcjxvuiimySF98Y,48
|
|
44
|
+
aepp-0.5.2.post2.dist-info/top_level.txt,sha256=dtZJI8SzhWVgZRl68PHKZX_fD6amvDiFR-lqD9FSJvE,5
|
|
45
|
+
aepp-0.5.2.post2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|