aepp 0.5.2.post1__py3-none-any.whl → 0.5.2.post3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,271 @@
1
+ import aepp
2
+ from aepp import schema,catalog,segmentation,flowservice, ConnectObject,schemamanager
3
+ from typing import Union
4
+ from copy import deepcopy
5
+ from concurrent.futures import ThreadPoolExecutor
6
+ import pandas as pd
7
+ import re
8
+
9
+ class UpsFieldsAnalyzer:
10
+ """
11
+ Class that extract the relationships of the fields for union schemas
12
+ """
13
+ loggingEnabled = False
14
+ logger = None
15
+
16
+ def __init__(
17
+ self,
18
+ union:str="https://ns.adobe.com/xdm/context/profile__union",
19
+ config: Union[dict,'ConnectObject'] = aepp.config.config_object,
20
+ region:str='nld2',
21
+ **kwargs,
22
+ ):
23
+ """
24
+ Instantiate the data Lineage class.
25
+ Arguments:
26
+ config : REQUIRED : Either ConnectObject instance or a config file to connect to the sandbox.
27
+ union : REQUIRED : The union schema you want to analyze. Default: https://ns.adobe.com/xdm/context/profile__union
28
+ Possible values:
29
+ 'https://ns.adobe.com/xdm/context/experienceevent__union'
30
+ 'https://ns.adobe.com/experience/journeyOrchestration/stepEvents/journeyStepEvent__union'
31
+ 'https://ns.adobe.com/experience/journeyOrchestration/stepEvents/journeyStepEvent__union'
32
+ 'https://ns.adobe.com/xdm/context/segmentdefinition__union'
33
+ 'https://ns.adobe.com/experience/customerJourneyManagement/ajoEntity__union'
34
+ region : OPTIONAL : If you are using a different region than the one automatically assigned (default : nld2, possible option: va7,aus5)
35
+ Additional kwargs will update the header.
36
+ """
37
+ if union is None:
38
+ raise ValueError("Requires the usage of an union schema definition")
39
+ self.union = union
40
+ self.classId = self.union.split('__')[0]
41
+ self.config = config
42
+ self.region = region
43
+ self.sandbox = config.sandbox
44
+ self.schemaAPI = schema.Schema(config=config)
45
+ self.catalogAPI = catalog.Catalog(config=config)
46
+ self.segmentationAPI = segmentation.Segmentation(config=config)
47
+ self.flowAPI = flowservice.FlowService(config=config)
48
+ self.unionSchema = schemamanager.SchemaManager(union,config=config)
49
+ df_union = self.unionSchema.to_dataframe(queryPath=True)
50
+ self.df_union = df_union.set_index('querypath',drop=True)
51
+ self.__schemaInfo__(config=config)
52
+ self.__datasetInfo__()
53
+ self.__audienceInfo__()
54
+ self.__flowserviceInfoDestinations__()
55
+ self.__flowserviceInfoSource__()
56
+ self.__audienceInfo__()
57
+
58
+
59
+ def __schemaInfo__(self,config)->None:
60
+ """
61
+ Extract the information of schema.
62
+ Provide the following attributes:
63
+ * schemaManagers : dict {$id:schemaManager}
64
+
65
+ """
66
+ schemas = self.schemaAPI.getSchemas(classFilter=self.classId)
67
+ list_schemaIds = [sch.get('$id') for sch in schemas]
68
+ none_params = [None for _ in range(len(list_schemaIds))]
69
+ config_params = [deepcopy(config) for _ in range(len(list_schemaIds))]
70
+ self.schemaManagers = {}
71
+ with ThreadPoolExecutor(max_workers=10) as executor:
72
+ schemaDetails = list(executor.map(schemamanager.SchemaManager, list_schemaIds,none_params,none_params,none_params,none_params,config_params))
73
+ for sch in schemaDetails:
74
+ self.schemaManagers[sch.id] = sch
75
+
76
+ def __audienceInfo__(self)->None:
77
+ """
78
+ Extract the segmentation information
79
+ Provide the following attributes:
80
+ * audiences : list of audiences
81
+ * audiences_definitions : dict { id : {definition, class}}
82
+ """
83
+ audiences = self.segmentationAPI.getAudiences()
84
+ self.audiences_definitions = {
85
+ seg['id']:{
86
+ 'name':seg.get('name'),
87
+ 'definition':seg,
88
+ 'format' : seg.get('expression',{}).get('format'),
89
+ 'class':[el.get("$ref") for el in seg.get('definedOn',[{}])]
90
+ }
91
+ for seg
92
+ in audiences
93
+ if self.union in [el.get("$ref") for el in seg.get('definedOn',[{}])]
94
+ }
95
+ self.paths_audiences = {path:{} for path in self.df_union['path'].to_list()}
96
+ for segId in self.audiences_definitions:
97
+ paths = self.segmentationAPI.extractPaths(self.audiences_definitions[segId].get('definition'))
98
+ for path in paths:
99
+ if path in self.paths_audiences.keys():
100
+ self.paths_audiences[path][segId] = {
101
+ "name": self.audiences_definitions[segId]["name"]
102
+ }
103
+
104
+ def __datasetInfo__(self):
105
+ """
106
+ Extract the dataset information
107
+ Provide the following attributes:
108
+ * dict_datasetId_name : dict { id : name }
109
+ * observableSchemas : dict { id : ObsSchema}
110
+ * observable_df : dict { id : df }
111
+ * dataset_schema : dict { id : schema $id }
112
+ * datasets : list (of dataset ID)
113
+ """
114
+ datasets = self.catalogAPI.getDataSets(output='list')
115
+ enabledDatasets = []
116
+ self.dict_datasetId_name = {}
117
+ list_enabled_datasetIds = []
118
+ for ds in datasets:
119
+ if 'enabled:true' in ds.get('tags',{}).get('unifiedProfile',[]):
120
+ enabledDatasets.append(ds)
121
+ self.dict_datasetId_name[ds['id']] = ds['name']
122
+ list_enabled_datasetIds.append(ds['id'])
123
+ with ThreadPoolExecutor(max_workers=10) as executor:
124
+ observableSchemasList = list(executor.map(self.catalogAPI.getDataSetObservableSchema, list_enabled_datasetIds,[True]*len(list_enabled_datasetIds)))
125
+ self.observableSchemas = {}
126
+ self.observable_df = {}
127
+ self.dataset_schema = {}
128
+ self.datasets = []
129
+ for element in observableSchemasList:
130
+ obs = catalog.ObservableSchemaManager(element)
131
+ if obs.schemaId is not None:
132
+ datasetSchema = self.schemaAPI.getSchema(obs.schemaId)
133
+ if datasetSchema.get('meta:class') == self.classId:
134
+ self.datasets.append(obs.datasetId)
135
+ self.observableSchemas[element.get('datasetId')] = obs
136
+ self.dataset_schema[element.get('datasetId')] = datasetSchema
137
+ self.observable_df[element.get('datasetId')] = self.observableSchemas[element.get('datasetId')].to_dataframe()
138
+
139
+ def __flowserviceInfoDestinations__(self)->dict:
140
+ """
141
+ Build the flow service data for destination
142
+ Provide the following attributes:
143
+ * destinationsPath : dict { id : {name:str, paths:list }
144
+ """
145
+ selectors = set()
146
+ destinationFlows = self.flowAPI.getFlows(onlyDestinations=True)
147
+ self.destinationsPath = {}
148
+ for destination in destinationFlows:
149
+ transformations = destination.get('transformations',[{}])
150
+ if len(transformations) > 0:
151
+ if transformations[0].get('name') == 'GeneralTransform':
152
+ name = destination['name']
153
+ transformationParams = destination.get('transformations',[{}])[0].get('params',{})
154
+ if 'profileSelectors' in transformationParams.keys():
155
+ for selector in transformationParams['profileSelectors'].get('selectors',[]):
156
+ selectors.add(selector.get('value',{}).get('path'))
157
+ self.destinationsPath[destination['id']]={
158
+ 'name':name,
159
+ "paths":list(selectors)
160
+ }
161
+
162
+ def __flowserviceInfoSource__(self)->dict:
163
+ """
164
+ Build the flow service data for source
165
+ Provide the following attributes:
166
+ * destinationsPath : dict { id : {name:str, datasetId:str,schemaRef:str }
167
+ """
168
+ sourceFlows = self.flowAPI.getFlows(onlySources=True)
169
+ self.sourceFlows = {}
170
+ def getTargetDetails(sourceConnId)->dict:
171
+ tmp_sourceConnection = self.flowAPI.getTargetConnection(sourceConnId)
172
+ return tmp_sourceConnection
173
+ def getFlowSpec(specId)->dict:
174
+ tmp_sourceSpec = self.flowAPI.getFlowSpec(specId)
175
+ return tmp_sourceSpec
176
+ list_targetIds = [source.get('targetConnectionIds')[0] for source in sourceFlows]
177
+ list_flowSpecIds = [source.get('flowSpec',{}).get('id') for source in sourceFlows if source.get('flowSpec',{}).get('id') is not None]
178
+ with ThreadPoolExecutor(max_workers=10) as executor:
179
+ targetconnections = list(executor.map(getTargetDetails, list_targetIds))
180
+ flowSpecs = list(executor.map(getFlowSpec, list_flowSpecIds))
181
+ for source in sourceFlows:
182
+ sourceName = source['name']
183
+ sourceId = source['id']
184
+ tmp_sourceTargetId = source.get('targetConnectionIds')[0]
185
+ tmp_sourceTarget = [item for item in targetconnections if item['id'] == tmp_sourceTargetId][0]
186
+ params = tmp_sourceTarget.get('params',{})
187
+ specId = source.get('flowSpec',{}).get('id')
188
+ frequency = None
189
+ if specId is not None:
190
+ tmp_sourceSpec = [item for item in flowSpecs if item['id'] == specId][0]
191
+ frequency = tmp_sourceSpec.get('attributes',{}).get('frequency')
192
+ datasetId = params.get('dataSetId',params.get('datasetId'))
193
+ if datasetId in self.datasets:
194
+ self.sourceFlows[sourceId] = {
195
+ 'name' : sourceName,
196
+ 'datasetId' : datasetId,
197
+ 'schemaRef' : self.dataset_schema[datasetId],
198
+ 'frequency':frequency
199
+ }
200
+
201
+
202
+ def __buildRelationships__(self,path:str)->dict:
203
+ """
204
+ Build relationship between a path and the different elements
205
+ Arguments:
206
+ path : REQUIRED : the path to analyze
207
+ """
208
+ result_dict = {'path':path}
209
+ if path in self.df_union.index:
210
+ result_dict['description'] = self.df_union.at[path,'description']
211
+ result_dict['fieldGroup'] = self.df_union.at[path,'fieldGroup']
212
+ result_dict['type'] = self.df_union.at[path,'type']
213
+ result_dict['schemas'] = {}
214
+ for schemaId in self.schemaManagers:
215
+ if path in self.schemaManagers[schemaId].to_dataframe()['path'].to_list():
216
+ result_dict['schemas'][schemaId] = self.schemaManagers[schemaId].title
217
+ result_dict['datasets'] = {}
218
+ for dsId in self.datasets:
219
+ if path in self.observable_df[dsId]['path'].to_list():
220
+ result_dict['datasets'][dsId] = self.dict_datasetId_name[dsId]
221
+ result_dict['destinationFlows'] = {}
222
+ for flowId in self.destinationsPath:
223
+ if path in self.destinationsPath[flowId]['paths']:
224
+ result_dict['destinationFlows'][flowId] = self.destinationsPath[flowId]['name']
225
+ result_dict['sourceFlows'] = {}
226
+ for sourceId in self.sourceFlows:
227
+ datasetId = self.sourceFlows[sourceId]['datasetId']
228
+ if path in self.observable_df[datasetId]['path'].to_list():
229
+ result_dict['sourceFlows'][sourceId] = {'name':self.sourceFlows[sourceId]['name'],'frequency':self.sourceFlows[sourceId]['frequency']}
230
+ result_dict['audiences'] = self.paths_audiences[path]
231
+ return result_dict
232
+
233
+ def analyzePaths(self,output:str='df')->Union[list,pd.DataFrame]:
234
+ """
235
+ Analyze the paths of your union schema
236
+ Arguments:
237
+ output : OPTIONAL : The type of output provided. Default "df", possible: "raw" (list)
238
+ """
239
+ list_dictionary = []
240
+ for path in self.df_union.path.to_list():
241
+ list_dictionary.append(self.analyzePath(path))
242
+ if output=='df':
243
+ df = pd.DataFrame(list_dictionary)
244
+ return df
245
+ return list_dictionary
246
+
247
+ def analyzePath(self,path:str=None,output:str='dict')->Union[dict,pd.DataFrame]:
248
+ """
249
+ Analyze a specific path
250
+ Arguments:
251
+ path : REQUIRED : The path to analyze
252
+ output : OPTIONAL : The type of output provided ('dict' (default) or 'dataframe' )
253
+ """
254
+ if path is None:
255
+ raise ValueError('path must be specified')
256
+ res = self.__buildRelationships__(path)
257
+ return res
258
+
259
+ def to_dataframe(self,save:bool=False)->pd.DataFrame:
260
+ """
261
+ Returns the union schema as dataframe.
262
+ Arguments:
263
+ save : OPTIONAL : If the dataframe is to be saved in a file
264
+ """
265
+ return self.unionSchema.to_dataframe(save=save)
266
+
267
+ def to_dict(self)->dict:
268
+ """
269
+ Returns the union schema as dictionary.
270
+ """
271
+ return self.unionSchema.to_dict()
aepp/fieldgroupmanager.py CHANGED
@@ -241,7 +241,7 @@ class FieldGroupManager:
241
241
  if '/datatypes/' in str(self.fieldGroup):
242
242
  dataTypeSearch = f"(https://ns.adobe.com/{self.tenantId[1:]}/datatypes/[0-9a-z]+?)'"
243
243
  dataTypes = re.findall(dataTypeSearch,str(self.fieldGroup.get('definitions')))
244
- for file in self.datatypeFolder.glob('*.json'):
244
+ for file in folder.glob('*.json'):
245
245
  tmp_def = json.load(FileIO(file))
246
246
  if tmp_def.get('$id') in dataTypes or tmp_def.get('meta:altId') in dataTypes:
247
247
  dt_manager = DataTypeManager(tmp_def,localFolder=self.localfolder,sandbox=self.sandbox,tenantId=self.tenantId)
aepp/identity.py CHANGED
@@ -86,8 +86,8 @@ class Identity:
86
86
  self.sandbox = self.connector.config["sandbox"]
87
87
 
88
88
  environment = config["environment"]
89
- base_url = f"https://platform-{region}.adobe.io"
90
-
89
+ #base_url = f"https://platform-{region}.adobe.io"
90
+ base_url = f"https://platform.adobe.io"
91
91
  if environment != "prod":
92
92
  base_url = f"https://platform-{environment}-{region}.adobe.io"
93
93
 
aepp/schema.py CHANGED
@@ -74,8 +74,8 @@ class Schema:
74
74
 
75
75
  def __init__(
76
76
  self,
77
- containerId: str = "tenant",
78
77
  config: Union[dict,ConnectObject] = aepp.config.config_object,
78
+ containerId: str = "tenant",
79
79
  header=aepp.config.header,
80
80
  loggingObject: dict = None,
81
81
  **kwargs,
@@ -83,12 +83,10 @@ class Schema:
83
83
  """
84
84
  Copy the token and header and initiate the object to retrieve schema elements.
85
85
  Arguments:
86
- containerId : OPTIONAL : "tenant"(default) or "global"
87
- loggingObject : OPTIONAL : logging object to log messages.
88
86
  config : OPTIONAL : config object in the config module.
87
+ containerId : OPTIONAL : "tenant"(default) or "global"
89
88
  header : OPTIONAL : header object in the config module.
90
- possible kwargs:
91
- x-sandbox-name : name of the sandbox you want to use (default : "prod").
89
+ loggingObject : OPTIONAL : logging object to log messages.
92
90
  """
93
91
  if loggingObject is not None and sorted(
94
92
  ["level", "stream", "format", "filename", "file"]
aepp/schemamanager.py CHANGED
@@ -167,8 +167,10 @@ class SchemaManager:
167
167
  fgM = FieldGroupManager(fieldGroup=definition,schemaAPI=self.schemaAPI,localFolder=localFolder,tenantId=self.tenantId,sandbox=self.sandbox)
168
168
  self.fieldGroupsManagers[fgM.title] = fgM
169
169
  for clas in self.classIds:
170
+ clsM = None
170
171
  clsM = ClassManager(clas,schemaAPI=self.schemaAPI,localFolder=localFolder,tenantId=self.tenantId,sandbox=self.sandbox)
171
- self.classManagers[clsM.title] = clsM
172
+ if clsM is not None:
173
+ self.classManagers[clsM.title] = clsM
172
174
  elif type(schema) == str:
173
175
  if self.schemaAPI is not None:
174
176
  self.schema = self.schemaAPI.getSchema(schema,full=False,schema_type='xed')
@@ -254,6 +256,7 @@ class SchemaManager:
254
256
  fgM = FieldGroupManager(fieldGroup=definition,schemaAPI=self.schemaAPI,localFolder=localFolder,tenantId=self.tenantId,sandbox=self.sandbox)
255
257
  self.fieldGroupsManagers[fgM.title] = fgM
256
258
  for clas in self.classIds:
259
+ clsM = None
257
260
  if self.localfolder is not None:
258
261
  found = False
259
262
  for folder in self.classFolder:
@@ -269,7 +272,8 @@ class SchemaManager:
269
272
  break
270
273
  elif self.schemaAPI is not None:
271
274
  clsM = ClassManager(clas,schemaAPI=self.schemaAPI,localFolder=localFolder,tenantId=self.tenantId,sandbox=self.sandbox)
272
- self.classManagers[clsM.title] = clsM
275
+ if clsM is not None:
276
+ self.classManagers[clsM.title] = clsM
273
277
  elif schema is None:
274
278
  self.STATE = "NEW"
275
279
  self.classId = schemaClass
@@ -284,8 +288,10 @@ class SchemaManager:
284
288
  ]
285
289
  }
286
290
  for clas in self.classIds:
291
+ clsM = None
287
292
  clsM = ClassManager(clas,schemaAPI=self.schemaAPI,localFolder=localFolder,tenantId=self.tenantId,sandbox=self.sandbox)
288
- self.classManagers[clsM.title] = clsM
293
+ if clsM is not None:
294
+ self.classManagers[clsM.title] = clsM
289
295
  if fieldGroups is not None and type(fieldGroups) == list:
290
296
  if fieldGroups[0] == str:
291
297
  for fgId in fieldGroups:
@@ -322,9 +328,11 @@ class SchemaManager:
322
328
  self.fieldGroupsManagers[fgM.title] = fgM
323
329
  elif fieldGroups[0] == dict:
324
330
  for fg in fieldGroups:
331
+ fgM = None
325
332
  self.fieldGroupIds.append(fg.get('$id'))
326
333
  fgM = FieldGroupManager(fg,schemaAPI=self.schemaAPI, localFolder=localFolder,tenantId=self.tenantId,sandbox=self.sandbox)
327
- self.fieldGroupsManagers[fgM.title] = fgM
334
+ if fgM is not None:
335
+ self.fieldGroupsManagers[fgM.title] = fgM
328
336
  self.fieldGroupTitles= tuple(fg.title for fg in list(self.fieldGroupsManagers.values()))
329
337
  self.fieldGroups = {fg.id:fg.title for fg in list(self.fieldGroupsManagers.values())}
330
338
  self.fieldGroupIds = tuple(fg.id for fg in list(self.fieldGroupsManagers.values()))