aepp 0.5.2__tar.gz → 0.5.2.post2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. {aepp-0.5.2/aepp.egg-info → aepp-0.5.2.post2}/PKG-INFO +1 -1
  2. aepp-0.5.2.post2/aepp/__version__.py +1 -0
  3. {aepp-0.5.2 → aepp-0.5.2.post2}/aepp/cli/__main__.py +303 -9
  4. aepp-0.5.2.post2/aepp/cli/upsfieldsanalyzer.py +271 -0
  5. {aepp-0.5.2 → aepp-0.5.2.post2/aepp.egg-info}/PKG-INFO +1 -1
  6. {aepp-0.5.2 → aepp-0.5.2.post2}/aepp.egg-info/SOURCES.txt +2 -1
  7. aepp-0.5.2/aepp/__version__.py +0 -1
  8. {aepp-0.5.2 → aepp-0.5.2.post2}/LICENSE +0 -0
  9. {aepp-0.5.2 → aepp-0.5.2.post2}/MANIFEST.in +0 -0
  10. {aepp-0.5.2 → aepp-0.5.2.post2}/README.md +0 -0
  11. {aepp-0.5.2 → aepp-0.5.2.post2}/aepp/__init__.py +0 -0
  12. {aepp-0.5.2 → aepp-0.5.2.post2}/aepp/accesscontrol.py +0 -0
  13. {aepp-0.5.2 → aepp-0.5.2.post2}/aepp/catalog.py +0 -0
  14. {aepp-0.5.2 → aepp-0.5.2.post2}/aepp/classmanager.py +0 -0
  15. {aepp-0.5.2 → aepp-0.5.2.post2}/aepp/cli/__init__.py +0 -0
  16. {aepp-0.5.2 → aepp-0.5.2.post2}/aepp/config.py +0 -0
  17. {aepp-0.5.2 → aepp-0.5.2.post2}/aepp/configs.py +0 -0
  18. {aepp-0.5.2 → aepp-0.5.2.post2}/aepp/connector.py +0 -0
  19. {aepp-0.5.2 → aepp-0.5.2.post2}/aepp/customerprofile.py +0 -0
  20. {aepp-0.5.2 → aepp-0.5.2.post2}/aepp/dataaccess.py +0 -0
  21. {aepp-0.5.2 → aepp-0.5.2.post2}/aepp/dataprep.py +0 -0
  22. {aepp-0.5.2 → aepp-0.5.2.post2}/aepp/datasets.py +0 -0
  23. {aepp-0.5.2 → aepp-0.5.2.post2}/aepp/datatypemanager.py +0 -0
  24. {aepp-0.5.2 → aepp-0.5.2.post2}/aepp/deletion.py +0 -0
  25. {aepp-0.5.2 → aepp-0.5.2.post2}/aepp/destination.py +0 -0
  26. {aepp-0.5.2 → aepp-0.5.2.post2}/aepp/destinationinstanceservice.py +0 -0
  27. {aepp-0.5.2 → aepp-0.5.2.post2}/aepp/edge.py +0 -0
  28. {aepp-0.5.2 → aepp-0.5.2.post2}/aepp/exportDatasetToDataLandingZone.py +0 -0
  29. {aepp-0.5.2 → aepp-0.5.2.post2}/aepp/fieldgroupmanager.py +0 -0
  30. {aepp-0.5.2 → aepp-0.5.2.post2}/aepp/flowservice.py +0 -0
  31. {aepp-0.5.2 → aepp-0.5.2.post2}/aepp/hygiene.py +0 -0
  32. {aepp-0.5.2 → aepp-0.5.2.post2}/aepp/identity.py +0 -0
  33. {aepp-0.5.2 → aepp-0.5.2.post2}/aepp/ingestion.py +0 -0
  34. {aepp-0.5.2 → aepp-0.5.2.post2}/aepp/observability.py +0 -0
  35. {aepp-0.5.2 → aepp-0.5.2.post2}/aepp/policy.py +0 -0
  36. {aepp-0.5.2 → aepp-0.5.2.post2}/aepp/privacyservice.py +0 -0
  37. {aepp-0.5.2 → aepp-0.5.2.post2}/aepp/queryservice.py +0 -0
  38. {aepp-0.5.2 → aepp-0.5.2.post2}/aepp/sandboxes.py +0 -0
  39. {aepp-0.5.2 → aepp-0.5.2.post2}/aepp/schema.py +0 -0
  40. {aepp-0.5.2 → aepp-0.5.2.post2}/aepp/schemamanager.py +0 -0
  41. {aepp-0.5.2 → aepp-0.5.2.post2}/aepp/segmentation.py +0 -0
  42. {aepp-0.5.2 → aepp-0.5.2.post2}/aepp/sensei.py +0 -0
  43. {aepp-0.5.2 → aepp-0.5.2.post2}/aepp/som.py +0 -0
  44. {aepp-0.5.2 → aepp-0.5.2.post2}/aepp/synchronizer.py +0 -0
  45. {aepp-0.5.2 → aepp-0.5.2.post2}/aepp/tags.py +0 -0
  46. {aepp-0.5.2 → aepp-0.5.2.post2}/aepp/utils.py +0 -0
  47. {aepp-0.5.2 → aepp-0.5.2.post2}/aepp.egg-info/dependency_links.txt +0 -0
  48. {aepp-0.5.2 → aepp-0.5.2.post2}/aepp.egg-info/entry_points.txt +0 -0
  49. {aepp-0.5.2 → aepp-0.5.2.post2}/aepp.egg-info/requires.txt +0 -0
  50. {aepp-0.5.2 → aepp-0.5.2.post2}/aepp.egg-info/top_level.txt +0 -0
  51. {aepp-0.5.2 → aepp-0.5.2.post2}/pyproject.toml +0 -0
  52. {aepp-0.5.2 → aepp-0.5.2.post2}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: aepp
3
- Version: 0.5.2
3
+ Version: 0.5.2.post2
4
4
  Summary: Package to manage AEP API endpoint and some helper functions
5
5
  Author-email: Julien Piccini <piccini.julien@gmail.com>
6
6
  License: Apache-2.0
@@ -0,0 +1 @@
1
+ __version__ = "0.5.2-2"
@@ -1,7 +1,8 @@
1
1
  from ast import arg
2
2
  from matplotlib.pyplot import table
3
3
  import aepp
4
- from aepp import synchronizer, schema, schemamanager, fieldgroupmanager, datatypemanager, identity, queryservice,catalog,flowservice
4
+ from aepp import synchronizer, schema, schemamanager, fieldgroupmanager, datatypemanager, identity, queryservice,catalog,flowservice,sandboxes, segmentation
5
+ from aepp.cli.upsfieldsanalyzer import UpsFieldsAnalyzer
5
6
  import argparse, cmd, shlex, json
6
7
  from functools import wraps
7
8
  from rich.console import Console
@@ -37,6 +38,7 @@ class ServiceShell(cmd.Cmd):
37
38
  super().__init__()
38
39
  self.config = None
39
40
  self.connectInstance = True
41
+ self.ups_profile_analyzer:UpsFieldsAnalyzer|None = None
40
42
  if kwargs.get("config_file") is not None:
41
43
  config_path = Path(kwargs.get("config_file"))
42
44
  if not config_path.is_absolute():
@@ -69,7 +71,7 @@ class ServiceShell(cmd.Cmd):
69
71
  )
70
72
  self.prompt = f"{self.config.sandbox}> "
71
73
  console.print(Panel(f"Connected to [bold green]{self.sandbox}[/bold green]", style="blue"))
72
-
74
+
73
75
  def do_createConfigFile(self, arg:Any) -> None:
74
76
  """Create a configuration file for future use"""
75
77
  parser = argparse.ArgumentParser(prog='createConfigFile', add_help=True)
@@ -134,6 +136,147 @@ class ServiceShell(cmd.Cmd):
134
136
  else:
135
137
  console.print(Panel("(!) You must configure the connection first using the 'config' command.", style="red"))
136
138
 
139
+ @login_required
140
+ def do_get_sandboxes(self, args:Any) -> None:
141
+ """List all sandboxes for the current organization"""
142
+ parser = argparse.ArgumentParser(prog='get_sandboxes', add_help=True)
143
+ parser.add_argument("-sv", "--save",help="Save sandboxes to CSV file")
144
+ try:
145
+ args = parser.parse_args(shlex.split(args))
146
+ aepp_sandboxes = sandboxes.Sandboxes(config=self.config)
147
+ sandboxes_list = aepp_sandboxes.getSandboxes()
148
+ if sandboxes_list:
149
+ table = Table(title=f"Sandboxes in Org: {self.config.org_id}")
150
+ table.add_column("Name", style="cyan")
151
+ table.add_column("Title", style="magenta")
152
+ table.add_column("Type", style="green")
153
+ table.add_column("Region", style="yellow")
154
+ table.add_column("Created", style="medium_violet_red")
155
+ for sb in sandboxes_list:
156
+ table.add_row(
157
+ sb.get("name","N/A"),
158
+ sb.get("title","N/A"),
159
+ sb.get("type","N/A"),
160
+ sb.get("region","N/A"),
161
+ sb.get("createdDate","N/A"),
162
+ )
163
+ console.print(table)
164
+ if args.save:
165
+ df_sandboxes = pd.DataFrame(sandboxes_list)
166
+ df_sandboxes.to_csv(f"sandboxes_{self.config.org_id}.csv", index=False)
167
+ console.print(f"Sandboxes exported to sandboxes_{self.config.org_id}.csv", style="green")
168
+ else:
169
+ console.print("(!) No sandboxes found.", style="red")
170
+ except Exception as e:
171
+ console.print(f"(!) Error: {str(e)}", style="red")
172
+ except SystemExit:
173
+ return
174
+
175
+ @login_required
176
+ def do_get_profile_paths_info(self,args:Any)->None:
177
+ """Get usage information for all Profile paths"""
178
+ parser = argparse.ArgumentParser(prog='get_profile_paths_info', add_help=True)
179
+ try:
180
+ args = parser.parse_args(shlex.split(args))
181
+ if self.ups_profile_analyzer is None:
182
+ console.print("Initializing Profile UPS Fields Analyzer. This will take few minutes...", style="blue")
183
+ self.ups_profile_analyzer = UpsFieldsAnalyzer(config=self.config)
184
+ else:
185
+ if self.config.sandbox != self.ups_profile_analyzer.sandbox:
186
+ console.print("Re-initializing Profile UPS Fields Analyzer for the new sandbox. This will take few minutes...", style="blue")
187
+ self.ups_profile_analyzer = UpsFieldsAnalyzer(config=self.config)
188
+ console.print("Analyzing all Profile paths information. This will take few minutes...", style="blue")
189
+ df_analysis:pd.DataFrame = self.ups_profile_analyzer.analyzePaths(output='df')
190
+ if df_analysis is not None:
191
+ console.print(df_analysis)
192
+ df_analysis.to_csv(f"profile_all_paths_info.csv", index=False)
193
+ console.print(f"Profile all paths information data exported to profile_all_paths_info.csv", style="green")
194
+ else:
195
+ console.print("(!) No profile paths information data found.", style="red")
196
+ except Exception as e:
197
+ console.print(f"(!) Error: {str(e)}", style="red")
198
+ except SystemExit:
199
+ return
200
+
201
+ @login_required
202
+ def do_get_profile_path_info(self, args:Any) -> None:
203
+ """Get path information on Profile"""
204
+ parser = argparse.ArgumentParser(prog='get_profile_path_info', add_help=True)
205
+ parser.add_argument("path", help="Dot notation of the path to analyze in Profile Storage", default=None,type=str)
206
+ try:
207
+ args = parser.parse_args(shlex.split(args))
208
+ if self.ups_profile_analyzer is None:
209
+ console.print("Initializing Profile UPS Fields Analyzer. This will take few minutes...", style="blue")
210
+ self.ups_profile_analyzer = UpsFieldsAnalyzer(config=self.config)
211
+ else:
212
+ if self.config.sandbox != self.ups_profile_analyzer.sandbox:
213
+ console.print("Re-initializing Profile UPS Fields Analyzer for the new sandbox. This will take few minutes...", style="blue")
214
+ self.ups_profile_analyzer = UpsFieldsAnalyzer(config=self.config)
215
+ analysis = self.ups_profile_analyzer.analyzePath(args.path)
216
+ if analysis:
217
+ console.print_json(data=analysis)
218
+ with open(f"profile_path_info_{args.path.replace('/','_')}.json", 'w') as f:
219
+ json.dump(analysis, f, indent=4)
220
+ console.print(f"Profile path information data exported to profile_path_info_{args.path.replace('/','_')}.json", style="green")
221
+ else:
222
+ console.print("(!) No profile path information data found.", style="red")
223
+ except Exception as e:
224
+ console.print(f"(!) Error: {str(e)}", style="red")
225
+ except SystemExit:
226
+ return
227
+
228
+ @login_required
229
+ def do_get_event_paths_info(self,args:Any)->None:
230
+ """Get information for all Experience Event paths"""
231
+ parser = argparse.ArgumentParser(prog='get_event_paths_info', add_help=True)
232
+ try:
233
+ args = parser.parse_args(shlex.split(args))
234
+ if self.ups_profile_analyzer is None:
235
+ console.print("Initializing Event UPS Fields Analyzer. This will take few minutes...", style="blue")
236
+ self.ups_profile_analyzer = UpsFieldsAnalyzer(config=self.config,union='https://ns.adobe.com/xdm/context/experienceevent__union')
237
+ else:
238
+ if self.config.sandbox != self.ups_profile_analyzer.sandbox:
239
+ console.print("Re-initializing Event UPS Fields Analyzer for the new sandbox. This will take few minutes...", style="blue")
240
+ self.ups_profile_analyzer = UpsFieldsAnalyzer(config=self.config,union='https://ns.adobe.com/xdm/context/experienceevent__union')
241
+ console.print("Analyzing all Event paths information. This will take few minutes...", style="blue")
242
+ df_analysis:pd.DataFrame = self.ups_profile_analyzer.analyzePaths(output='df')
243
+ if df_analysis is not None:
244
+ console.print(df_analysis)
245
+ df_analysis.to_csv(f"event_all_paths_info.csv", index=False)
246
+ console.print(f"Event all paths information data exported to event_all_paths_info.csv", style="green")
247
+ else:
248
+ console.print("(!) No event paths information data found.", style="red")
249
+ except Exception as e:
250
+ console.print(f"(!) Error: {str(e)}", style="red")
251
+ except SystemExit:
252
+ return
253
+
254
+ @login_required
255
+ def do_get_event_path_info(self, args:Any) -> None:
256
+ """Get path information on Experience Event"""
257
+ parser = argparse.ArgumentParser(prog='get_event_path_info', add_help=True)
258
+ parser.add_argument("path", help="Dot notation of the path to analyze in Experience Event Storage", default=None,type=str)
259
+ try:
260
+ args = parser.parse_args(shlex.split(args))
261
+ if self.ups_profile_analyzer is None:
262
+ console.print("Initializing Event UPS Fields Analyzer. This will take few minutes...", style="blue")
263
+ self.ups_profile_analyzer = UpsFieldsAnalyzer(config=self.config,union='https://ns.adobe.com/xdm/context/experienceevent__union')
264
+ else:
265
+ if self.config.sandbox != self.ups_profile_analyzer.sandbox:
266
+ console.print("Re-initializing Event UPS Fields Analyzer for the new sandbox. This will take few minutes...", style="blue")
267
+ self.ups_profile_analyzer = UpsFieldsAnalyzer(config=self.config,union='https://ns.adobe.com/xdm/context/experienceevent__union')
268
+ analysis = self.ups_profile_analyzer.analyzePath(args.path)
269
+ if analysis:
270
+ console.print_json(data=analysis)
271
+ with open(f"event_path_info_{args.path.replace('/','_')}.json", 'w') as f:
272
+ json.dump(analysis, f, indent=4)
273
+ console.print(f"Event path information data exported to event_path_info_{args.path.replace('/','_')}.json", style="green")
274
+ else:
275
+ console.print("(!) No event path information data found.", style="red")
276
+ except Exception as e:
277
+ console.print(f"(!) Error: {str(e)}", style="red")
278
+ except SystemExit:
279
+ return
137
280
 
138
281
  @login_required
139
282
  def do_get_schemas(self, args:Any) -> None:
@@ -738,13 +881,81 @@ class ServiceShell(cmd.Cmd):
738
881
  ds.get("name","N/A"),
739
882
  datetime.fromtimestamp(ds.get("created",1000)/1000).isoformat().split('T')[0],
740
883
  str(ds.get("dataIngested",False)),
741
- ds.get("classification",{}).get("dataBehavior","unknown")
884
+ ds.get('classification').get('dataBehavior','N/A'),
885
+ )
886
+ console.print(table)
887
+ except Exception as e:
888
+ console.print(f"(!) Error: {str(e)}", style="red")
889
+ except SystemExit:
890
+ return
891
+
892
+ @login_required
893
+ def do_get_datasets_tableName(self, args:Any) -> None:
894
+ parser = argparse.ArgumentParser(prog='get_datasets', add_help=True)
895
+ try:
896
+ args = parser.parse_args(shlex.split(args))
897
+ aepp_cat = catalog.Catalog(config=self.config)
898
+ datasets = aepp_cat.getDataSets(output='list')
899
+ table = Table(title=f"Datasets in Sandbox: {self.config.sandbox}")
900
+ table.add_column("Name", style="white")
901
+ table.add_column("Table Name", style="cyan",no_wrap=True)
902
+ table.add_column("Data Type", style="red")
903
+ for ds in datasets:
904
+ table.add_row(
905
+ ds.get("name","N/A"),
906
+ ds.get('tags',{}).get('adobe/pqs/table',["N/A"])[0],
907
+ ds.get('classification').get('dataBehavior','N/A'),
742
908
  )
743
909
  console.print(table)
744
910
  except Exception as e:
745
911
  console.print(f"(!) Error: {str(e)}", style="red")
746
912
  except SystemExit:
747
913
  return
914
+
915
+ @login_required
916
+ def do_get_observable_schema_json(self,args:Any) -> None:
917
+ """Get the observable schema for a dataset by name or ID"""
918
+ parser = argparse.ArgumentParser(prog='get_observable_schema', add_help=True)
919
+ parser.add_argument("dataset", help="Dataset ID or Dataset Name to retrieve observable schema for",type=str)
920
+ try:
921
+ args = parser.parse_args(shlex.split(args))
922
+ aepp_cat = catalog.Catalog(config=self.config)
923
+ datasets = aepp_cat.getDataSets(output='list')
924
+ for ds in datasets:
925
+ if ds.get("name","") == args.dataset or ds.get("id","") == args.dataset:
926
+ datasetId = ds.get("id")
927
+ schema_json = aepp_cat.getDataSetObservableSchema(datasetId=datasetId,appendDatasetInfo=True)
928
+ myObs = catalog.ObservableSchemaManager(schema_json,config=self.config)
929
+ data = myObs.to_dict()
930
+ with open(f"{args.dataset}_observable_schema.json", 'w') as f:
931
+ json.dump(data, f, indent=4)
932
+ console.print(f"Saved Observable schema to {args.dataset}_observable_schema.json.", style="green")
933
+ except Exception as e:
934
+ console.print(f"(!) Error: {str(e)}", style="red")
935
+ except SystemExit:
936
+ return
937
+
938
+ @login_required
939
+ def do_get_observable_schema_csv(self,args:Any) -> None:
940
+ """Get the observable schema for a dataset by name or ID"""
941
+ parser = argparse.ArgumentParser(prog='get_observable_schema', add_help=True)
942
+ parser.add_argument("dataset", help="Dataset ID or Dataset Name to retrieve observable schema for",type=str)
943
+ try:
944
+ args = parser.parse_args(shlex.split(args))
945
+ aepp_cat = catalog.Catalog(config=self.config)
946
+ datasets = aepp_cat.getDataSets(output='list')
947
+ for ds in datasets:
948
+ if ds.get("name","") == args.dataset or ds.get("id","") == args.dataset:
949
+ datasetId = ds.get("id")
950
+ schema_json = aepp_cat.getDataSetObservableSchema(datasetId=datasetId,appendDatasetInfo=True)
951
+ myObs = catalog.ObservableSchemaManager(schema_json,config=self.config)
952
+ data = myObs.to_dataframe()
953
+ data.to_csv(f"{args.dataset}_observable_schema.csv", index=False)
954
+ console.print(f"Saved Observable schema to {args.dataset}_observable_schema.csv.", style="green")
955
+ except Exception as e:
956
+ console.print(f"(!) Error: {str(e)}", style="red")
957
+ except SystemExit:
958
+ return
748
959
 
749
960
  @login_required
750
961
  def do_get_datasets_infos(self, args:Any) -> None:
@@ -754,23 +965,58 @@ class ServiceShell(cmd.Cmd):
754
965
  args = parser.parse_args(shlex.split(args))
755
966
  aepp_cat = catalog.Catalog(config=self.config)
756
967
  datasets = aepp_cat.getDataSets()
968
+ aepp_cat.data.infos = aepp_cat.data.infos.sort_values(by=['ups_storageSize','datalake_storageSize'], ascending=False)
757
969
  aepp_cat.data.infos.to_csv(f"{aepp_cat.sandbox}_datasets_infos.csv",index=False)
758
970
  console.print(f"Datasets infos exported to {aepp_cat.sandbox}_datasets_infos.csv", style="green")
759
971
  table = Table(title=f"Datasets in Sandbox: {self.config.sandbox}")
760
972
  table.add_column("ID", style="white")
761
973
  table.add_column("Name", style="white",no_wrap=True)
762
- table.add_column("Datalake_rows", style="blue")
763
- table.add_column("Datalake_storage", style="blue")
764
- table.add_column("UPS_rows", style="magenta")
765
- table.add_column("UPS_storage", style="magenta")
974
+ table.add_column("UPS Rows", style="cyan")
975
+ table.add_column("UPS Storage Size", style="green")
976
+ table.add_column("Datalake Rows", style="magenta")
977
+ table.add_column("Datalake Storage Size", style="yellow")
766
978
  for _, ds in aepp_cat.data.infos.iterrows():
767
979
  table.add_row(
768
980
  ds.get("id","N/A"),
769
981
  ds.get("name","N/A"),
982
+ str(ds.get("ups_rows","N/A")),
983
+ str(ds.get("ups_storageSize","N/A")),
770
984
  str(ds.get("datalake_rows","N/A")),
771
985
  str(ds.get("datalake_storageSize","N/A")),
772
- str(ds.get("ups_rows","N/A")),
773
- str(ds.get("ups_storageSize","N/A"))
986
+ )
987
+ console.print(table)
988
+ except Exception as e:
989
+ console.print(f"(!) Error: {str(e)}", style="red")
990
+ except SystemExit:
991
+ return
992
+
993
+ @login_required
994
+ def do_get_snapshot_datasets(self,args:Any) -> None:
995
+ """List all snapshot datasets in the current sandbox"""
996
+ parser = argparse.ArgumentParser(prog='get_snapshot_datasets', add_help=True)
997
+ try:
998
+ args = parser.parse_args(shlex.split(args))
999
+ aepp_cat = catalog.Catalog(config=self.config)
1000
+ datasets = aepp_cat.getProfileSnapshotDatasets(explicitMergePolicy=True)
1001
+ list_ds = []
1002
+ for key, ds in datasets.items():
1003
+ obj = ds
1004
+ obj['id'] = key
1005
+ list_ds.append(obj)
1006
+ df_datasets = pd.DataFrame(list_ds)
1007
+ df_datasets.to_csv(f"{self.config.sandbox}_snapshot_datasets.csv",index=False)
1008
+ console.print(f"Snapshot Datasets exported to {self.config.sandbox}_snapshot_datasets.csv", style="green")
1009
+ table = Table(title=f"Snapshot Datasets in Sandbox: {self.config.sandbox}")
1010
+ table.add_column("ID", style="white")
1011
+ table.add_column("Table Name", style="white")
1012
+ table.add_column("Merge Policy Name", style="yellow")
1013
+ table.add_column("Merge Policy ID", style="green")
1014
+ for ds in list_ds:
1015
+ table.add_row(
1016
+ ds.get("id","N/A"),
1017
+ ds.get("tags",{}).get('adobe/pqs/table',["N/A"])[0],
1018
+ ds.get('mergePolicyName','N/A'),
1019
+ [el.split(':')[1] for el in ds.get('tags',{}).get('unifiedProfile',[]) if el.startswith('mergePolicyId')][0]
774
1020
  )
775
1021
  console.print(table)
776
1022
  except Exception as e:
@@ -844,6 +1090,54 @@ class ServiceShell(cmd.Cmd):
844
1090
  except SystemExit:
845
1091
  return
846
1092
 
1093
+ @login_required
1094
+ def do_get_audiences(self, args:Any) -> None:
1095
+ """List all audiences in the current sandbox"""
1096
+ parser = argparse.ArgumentParser(prog='get_audiences', add_help=True)
1097
+ try:
1098
+ args = parser.parse_args(shlex.split(args))
1099
+ aepp_audience = segmentation.Segmentation(config=self.config)
1100
+ audiences = aepp_audience.getAudiences()
1101
+ flw = flowservice.FlowService(config=self.config)
1102
+ destinations = flw.getFlows(onlyDestinations=True)
1103
+ segments_shared = []
1104
+ for tmpFlow in destinations:
1105
+ if len(tmpFlow['transformations'])>0:
1106
+ tmpSegmentShared = tmpFlow['transformations'][0].get('params',{}).get('segmentSelectors',{}).get('selectors',[])
1107
+ for s in tmpSegmentShared:
1108
+ s['flowId'] = tmpFlow['id']
1109
+ segments_shared += tmpSegmentShared
1110
+ segment_shared_dict = {seg.get('value',{}).get('id'):{
1111
+ "exportMode" : seg.get('value',{}).get('exportMode'),
1112
+ "scheduleFrequency": seg.get('value',{}).get("schedule",{}).get('frequency',''),
1113
+ "flowId" : seg["flowId"]
1114
+ } for seg in segments_shared}
1115
+ for aud in audiences:
1116
+ aud['usedInFlow'] = True if segment_shared_dict.get(aud.get("id","N/A"),{}) != {} else False
1117
+ aud['sharedInfo'] = segment_shared_dict.get(aud.get("id","N/A"),{})
1118
+ df_audiences = pd.DataFrame(audiences)
1119
+ df_audiences.to_csv(f"{self.config.sandbox}_audiences.csv",index=False)
1120
+ console.print(f"Audiences exported to {self.config.sandbox}_audiences.csv", style="green")
1121
+ table = Table(title=f"Audiences in Sandbox: {self.config.sandbox}")
1122
+ table.add_column("ID", style="cyan")
1123
+ table.add_column("Name", style="magenta")
1124
+ table.add_column("Evaluation", style="yellow")
1125
+ table.add_column("Total Profiles", style="green")
1126
+ table.add_column("Shared", style="white")
1127
+ for aud in audiences:
1128
+ table.add_row(
1129
+ aud.get("id","N/A"),
1130
+ aud.get("name","N/A"),
1131
+ '[bright_blue]Batch[/bright_blue]' if aud.get("evaluationInfo",{}).get("batch",{}).get('enabled') else '[chartreuse1]Streaming[/chartreuse1]' if aud.get("evaluationInfo",{}).get("continuous",{}).get('enabled') else '[purple]Edge[/purple]' if aud.get("evaluationInfo",{}).get("synchronous",{}).get('enabled') else 'N/A',
1132
+ str(aud.get('metrics',{}).get('data',{}).get('totalProfiles','N/A')),
1133
+ '[green3]True[/green3]' if aud.get("usedInFlow",False) else '[red3]False[/red3]',
1134
+ )
1135
+ console.print(table)
1136
+ except Exception as e:
1137
+ console.print(f"(!) Error: {str(e)}", style="red")
1138
+ except SystemExit:
1139
+ return
1140
+
847
1141
  @login_required
848
1142
  def do_get_flows(self, args:Any) -> None:
849
1143
  """List flows in the current sandbox based on parameters provided. By default, list all sources and destinations."""
@@ -0,0 +1,271 @@
1
+ import aepp
2
+ from aepp import schema,catalog,segmentation,flowservice, ConnectObject,schemamanager
3
+ from typing import Union
4
+ from copy import deepcopy
5
+ from concurrent.futures import ThreadPoolExecutor
6
+ import pandas as pd
7
+ import re
8
+
9
+ class UpsFieldsAnalyzer:
10
+ """
11
+ Class that extract the relationships of the fields for union schemas
12
+ """
13
+ loggingEnabled = False
14
+ logger = None
15
+
16
+ def __init__(
17
+ self,
18
+ union:str="https://ns.adobe.com/xdm/context/profile__union",
19
+ config: Union[dict,'ConnectObject'] = aepp.config.config_object,
20
+ region:str='nld2',
21
+ **kwargs,
22
+ ):
23
+ """
24
+ Instantiate the data Lineage class.
25
+ Arguments:
26
+ config : REQUIRED : Either ConnectObject instance or a config file to connect to the sandbox.
27
+ union : REQUIRED : The union schema you want to analyze. Default: https://ns.adobe.com/xdm/context/profile__union
28
+ Possible values:
29
+ 'https://ns.adobe.com/xdm/context/experienceevent__union'
30
+ 'https://ns.adobe.com/experience/journeyOrchestration/stepEvents/journeyStepEvent__union'
31
+ 'https://ns.adobe.com/experience/journeyOrchestration/stepEvents/journeyStepEvent__union'
32
+ 'https://ns.adobe.com/xdm/context/segmentdefinition__union'
33
+ 'https://ns.adobe.com/experience/customerJourneyManagement/ajoEntity__union'
34
+ region : OPTIONAL : If you are using a different region than the one automatically assigned (default : nld2, possible option: va7,aus5)
35
+ Additional kwargs will update the header.
36
+ """
37
+ if union is None:
38
+ raise ValueError("Requires the usage of an union schema definition")
39
+ self.union = union
40
+ self.classId = self.union.split('__')[0]
41
+ self.config = config
42
+ self.region = region
43
+ self.sandbox = config.sandbox
44
+ self.schemaAPI = schema.Schema(config=config)
45
+ self.catalogAPI = catalog.Catalog(config=config)
46
+ self.segmentationAPI = segmentation.Segmentation(config=config)
47
+ self.flowAPI = flowservice.FlowService(config=config)
48
+ self.unionSchema = schemamanager.SchemaManager(union,config=config)
49
+ df_union = self.unionSchema.to_dataframe(queryPath=True)
50
+ self.df_union = df_union.set_index('querypath',drop=True)
51
+ self.__schemaInfo__(config=config)
52
+ self.__datasetInfo__()
53
+ self.__audienceInfo__()
54
+ self.__flowserviceInfoDestinations__()
55
+ self.__flowserviceInfoSource__()
56
+ self.__audienceInfo__()
57
+
58
+
59
+ def __schemaInfo__(self,config)->None:
60
+ """
61
+ Extract the information of schema.
62
+ Provide the following attributes:
63
+ * schemaManagers : dict {$id:schemaManager}
64
+
65
+ """
66
+ schemas = self.schemaAPI.getSchemas(classFilter=self.classId)
67
+ list_schemaIds = [sch.get('$id') for sch in schemas]
68
+ none_params = [None for _ in range(len(list_schemaIds))]
69
+ config_params = [deepcopy(config) for _ in range(len(list_schemaIds))]
70
+ self.schemaManagers = {}
71
+ with ThreadPoolExecutor(max_workers=10) as executor:
72
+ schemaDetails = list(executor.map(schemamanager.SchemaManager, list_schemaIds,none_params,none_params,none_params,none_params,config_params))
73
+ for sch in schemaDetails:
74
+ self.schemaManagers[sch.id] = sch
75
+
76
+ def __audienceInfo__(self)->None:
77
+ """
78
+ Extract the segmentation information
79
+ Provide the following attributes:
80
+ * audiences : list of audiences
81
+ * audiences_definitions : dict { id : {definition, class}}
82
+ """
83
+ audiences = self.segmentationAPI.getAudiences()
84
+ self.audiences_definitions = {
85
+ seg['id']:{
86
+ 'name':seg.get('name'),
87
+ 'definition':seg,
88
+ 'format' : seg.get('expression',{}).get('format'),
89
+ 'class':[el.get("$ref") for el in seg.get('definedOn',[{}])]
90
+ }
91
+ for seg
92
+ in audiences
93
+ if self.union in [el.get("$ref") for el in seg.get('definedOn',[{}])]
94
+ }
95
+ self.paths_audiences = {path:{} for path in self.df_union['path'].to_list()}
96
+ for segId in self.audiences_definitions:
97
+ paths = self.segmentationAPI.extractPaths(self.audiences_definitions[segId].get('definition'))
98
+ for path in paths:
99
+ if path in self.paths_audiences.keys():
100
+ self.paths_audiences[path][segId] = {
101
+ "name": self.audiences_definitions[segId]["name"]
102
+ }
103
+
104
+ def __datasetInfo__(self):
105
+ """
106
+ Extract the dataset information
107
+ Provide the following attributes:
108
+ * dict_datasetId_name : dict { id : name }
109
+ * observableSchemas : dict { id : ObsSchema}
110
+ * observable_df : dict { id : df }
111
+ * dataset_schema : dict { id : schema $id }
112
+ * datasets : list (of dataset ID)
113
+ """
114
+ datasets = self.catalogAPI.getDataSets(output='list')
115
+ enabledDatasets = []
116
+ self.dict_datasetId_name = {}
117
+ list_enabled_datasetIds = []
118
+ for ds in datasets:
119
+ if 'enabled:true' in ds.get('tags',{}).get('unifiedProfile',[]):
120
+ enabledDatasets.append(ds)
121
+ self.dict_datasetId_name[ds['id']] = ds['name']
122
+ list_enabled_datasetIds.append(ds['id'])
123
+ with ThreadPoolExecutor(max_workers=10) as executor:
124
+ observableSchemasList = list(executor.map(self.catalogAPI.getDataSetObservableSchema, list_enabled_datasetIds,[True]*len(list_enabled_datasetIds)))
125
+ self.observableSchemas = {}
126
+ self.observable_df = {}
127
+ self.dataset_schema = {}
128
+ self.datasets = []
129
+ for element in observableSchemasList:
130
+ obs = catalog.ObservableSchemaManager(element)
131
+ if obs.schemaId is not None:
132
+ datasetSchema = self.schemaAPI.getSchema(obs.schemaId)
133
+ if datasetSchema.get('meta:class') == self.classId:
134
+ self.datasets.append(obs.datasetId)
135
+ self.observableSchemas[element.get('datasetId')] = obs
136
+ self.dataset_schema[element.get('datasetId')] = datasetSchema
137
+ self.observable_df[element.get('datasetId')] = self.observableSchemas[element.get('datasetId')].to_dataframe()
138
+
139
+ def __flowserviceInfoDestinations__(self)->dict:
140
+ """
141
+ Build the flow service data for destination
142
+ Provide the following attributes:
143
+ * destinationsPath : dict { id : {name:str, paths:list }
144
+ """
145
+ selectors = set()
146
+ destinationFlows = self.flowAPI.getFlows(onlyDestinations=True)
147
+ self.destinationsPath = {}
148
+ for destination in destinationFlows:
149
+ transformations = destination.get('transformations',[{}])
150
+ if len(transformations) > 0:
151
+ if transformations[0].get('name') == 'GeneralTransform':
152
+ name = destination['name']
153
+ transformationParams = destination.get('transformations',[{}])[0].get('params',{})
154
+ if 'profileSelectors' in transformationParams.keys():
155
+ for selector in transformationParams['profileSelectors'].get('selectors',[]):
156
+ selectors.add(selector.get('value',{}).get('path'))
157
+ self.destinationsPath[destination['id']]={
158
+ 'name':name,
159
+ "paths":list(selectors)
160
+ }
161
+
162
+ def __flowserviceInfoSource__(self)->dict:
163
+ """
164
+ Build the flow service data for source
165
+ Provide the following attributes:
166
+ * destinationsPath : dict { id : {name:str, datasetId:str,schemaRef:str }
167
+ """
168
+ sourceFlows = self.flowAPI.getFlows(onlySources=True)
169
+ self.sourceFlows = {}
170
+ def getTargetDetails(sourceConnId)->dict:
171
+ tmp_sourceConnection = self.flowAPI.getTargetConnection(sourceConnId)
172
+ return tmp_sourceConnection
173
+ def getFlowSpec(specId)->dict:
174
+ tmp_sourceSpec = self.flowAPI.getFlowSpec(specId)
175
+ return tmp_sourceSpec
176
+ list_targetIds = [source.get('targetConnectionIds')[0] for source in sourceFlows]
177
+ list_flowSpecIds = [source.get('flowSpec',{}).get('id') for source in sourceFlows if source.get('flowSpec',{}).get('id') is not None]
178
+ with ThreadPoolExecutor(max_workers=10) as executor:
179
+ targetconnections = list(executor.map(getTargetDetails, list_targetIds))
180
+ flowSpecs = list(executor.map(getFlowSpec, list_flowSpecIds))
181
+ for source in sourceFlows:
182
+ sourceName = source['name']
183
+ sourceId = source['id']
184
+ tmp_sourceTargetId = source.get('targetConnectionIds')[0]
185
+ tmp_sourceTarget = [item for item in targetconnections if item['id'] == tmp_sourceTargetId][0]
186
+ params = tmp_sourceTarget.get('params',{})
187
+ specId = source.get('flowSpec',{}).get('id')
188
+ frequency = None
189
+ if specId is not None:
190
+ tmp_sourceSpec = [item for item in flowSpecs if item['id'] == specId][0]
191
+ frequency = tmp_sourceSpec.get('attributes',{}).get('frequency')
192
+ datasetId = params.get('dataSetId',params.get('datasetId'))
193
+ if datasetId in self.datasets:
194
+ self.sourceFlows[sourceId] = {
195
+ 'name' : sourceName,
196
+ 'datasetId' : datasetId,
197
+ 'schemaRef' : self.dataset_schema[datasetId],
198
+ 'frequency':frequency
199
+ }
200
+
201
+
202
+ def __buildRelationships__(self,path:str)->dict:
203
+ """
204
+ Build relationship between a path and the different elements
205
+ Arguments:
206
+ path : REQUIRED : the path to analyze
207
+ """
208
+ result_dict = {'path':path}
209
+ if path in self.df_union.index:
210
+ result_dict['description'] = self.df_union.at[path,'description']
211
+ result_dict['fieldGroup'] = self.df_union.at[path,'fieldGroup']
212
+ result_dict['type'] = self.df_union.at[path,'type']
213
+ result_dict['schemas'] = {}
214
+ for schemaId in self.schemaManagers:
215
+ if path in self.schemaManagers[schemaId].to_dataframe()['path'].to_list():
216
+ result_dict['schemas'][schemaId] = self.schemaManagers[schemaId].title
217
+ result_dict['datasets'] = {}
218
+ for dsId in self.datasets:
219
+ if path in self.observable_df[dsId]['path'].to_list():
220
+ result_dict['datasets'][dsId] = self.dict_datasetId_name[dsId]
221
+ result_dict['destinationFlows'] = {}
222
+ for flowId in self.destinationsPath:
223
+ if path in self.destinationsPath[flowId]['paths']:
224
+ result_dict['destinationFlows'][flowId] = self.destinationsPath[flowId]['name']
225
+ result_dict['sourceFlows'] = {}
226
+ for sourceId in self.sourceFlows:
227
+ datasetId = self.sourceFlows[sourceId]['datasetId']
228
+ if path in self.observable_df[datasetId]['path'].to_list():
229
+ result_dict['sourceFlows'][sourceId] = {'name':self.sourceFlows[sourceId]['name'],'frequency':self.sourceFlows[sourceId]['frequency']}
230
+ result_dict['audiences'] = self.paths_audiences[path]
231
+ return result_dict
232
+
233
+ def analyzePaths(self,output:str='df')->Union[list,pd.DataFrame]:
234
+ """
235
+ Analyze the paths of your union schema
236
+ Arguments:
237
+ output : OPTIONAL : The type of output provided. Default "df", possible: "raw" (list)
238
+ """
239
+ list_dictionary = []
240
+ for path in self.df_union.path.to_list():
241
+ list_dictionary.append(self.analyzePath(path))
242
+ if output=='df':
243
+ df = pd.DataFrame(list_dictionary)
244
+ return df
245
+ return list_dictionary
246
+
247
+ def analyzePath(self,path:str=None,output:str='dict')->Union[dict,pd.DataFrame]:
248
+ """
249
+ Analyze a specific path
250
+ Arguments:
251
+ path : REQUIRED : The path to analyze
252
+ output : OPTIONAL : The type of output provided ('dict' (default) or 'dataframe' )
253
+ """
254
+ if path is None:
255
+ raise ValueError('path must be specified')
256
+ res = self.__buildRelationships__(path)
257
+ return res
258
+
259
+ def to_dataframe(self,save:bool=False)->pd.DataFrame:
260
+ """
261
+ Returns the union schema as dataframe.
262
+ Arguments:
263
+ save : OPTIONAL : If the dataframe is to be saved in a file
264
+ """
265
+ return self.unionSchema.to_dataframe(save=save)
266
+
267
+ def to_dict(self)->dict:
268
+ """
269
+ Returns the union schema as dictionary.
270
+ """
271
+ return self.unionSchema.to_dict()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: aepp
3
- Version: 0.5.2
3
+ Version: 0.5.2.post2
4
4
  Summary: Package to manage AEP API endpoint and some helper functions
5
5
  Author-email: Julien Piccini <piccini.julien@gmail.com>
6
6
  License: Apache-2.0
@@ -45,4 +45,5 @@ aepp.egg-info/entry_points.txt
45
45
  aepp.egg-info/requires.txt
46
46
  aepp.egg-info/top_level.txt
47
47
  aepp/cli/__init__.py
48
- aepp/cli/__main__.py
48
+ aepp/cli/__main__.py
49
+ aepp/cli/upsfieldsanalyzer.py
@@ -1 +0,0 @@
1
- __version__ = "0.5.2"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes