PyPI - aepp - Versions diffs - 0.5.2__tar.gz → 0.5.2.post2__tar.gz - Mend

aepp 0.5.2tar.gz → 0.5.2.post2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

{aepp-0.5.2/aepp.egg-info → aepp-0.5.2.post2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: aepp
-Version: 0.5.2
+Version: 0.5.2.post2
 Summary: Package to manage AEP API endpoint and some helper functions
 Author-email: Julien Piccini <piccini.julien@gmail.com>
 License: Apache-2.0

aepp-0.5.2.post2/aepp/__version__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__ = "0.5.2-2"

{aepp-0.5.2 → aepp-0.5.2.post2}/aepp/cli/__main__.py RENAMED Viewed

@@ -1,7 +1,8 @@
 from ast import arg
 from matplotlib.pyplot import table
 import aepp
-from aepp import synchronizer, schema, schemamanager, fieldgroupmanager, datatypemanager, identity, queryservice,catalog,flowservice
+from aepp import synchronizer, schema, schemamanager, fieldgroupmanager, datatypemanager, identity, queryservice,catalog,flowservice,sandboxes, segmentation
+from aepp.cli.upsfieldsanalyzer import UpsFieldsAnalyzer
 import argparse, cmd, shlex, json
 from functools import wraps
 from rich.console import Console
@@ -37,6 +38,7 @@ class ServiceShell(cmd.Cmd):
         super().__init__()
         self.config = None
         self.connectInstance = True
+        self.ups_profile_analyzer:UpsFieldsAnalyzer|None = None
         if kwargs.get("config_file") is not None:
             config_path = Path(kwargs.get("config_file"))
             if not config_path.is_absolute():
@@ -69,7 +71,7 @@ class ServiceShell(cmd.Cmd):
             )
             self.prompt = f"{self.config.sandbox}> "
             console.print(Panel(f"Connected to [bold green]{self.sandbox}[/bold green]", style="blue"))
     def do_createConfigFile(self, arg:Any) -> None:
         """Create a configuration file for future use"""
         parser = argparse.ArgumentParser(prog='createConfigFile', add_help=True)
@@ -134,6 +136,147 @@ class ServiceShell(cmd.Cmd):
         else:
             console.print(Panel("(!) You must configure the connection first using the 'config' command.", style="red"))
+    @login_required
+    def do_get_sandboxes(self, args:Any) -> None:
+        """List all sandboxes for the current organization"""
+        parser = argparse.ArgumentParser(prog='get_sandboxes', add_help=True)
+        parser.add_argument("-sv", "--save",help="Save sandboxes to CSV file")
+        try:
+            args = parser.parse_args(shlex.split(args))
+            aepp_sandboxes = sandboxes.Sandboxes(config=self.config)
+            sandboxes_list = aepp_sandboxes.getSandboxes()
+            if sandboxes_list:
+                table = Table(title=f"Sandboxes in Org: {self.config.org_id}")
+                table.add_column("Name", style="cyan")
+                table.add_column("Title", style="magenta")
+                table.add_column("Type", style="green")
+                table.add_column("Region", style="yellow")
+                table.add_column("Created", style="medium_violet_red")
+                for sb in sandboxes_list:
+                    table.add_row(
+                        sb.get("name","N/A"),
+                        sb.get("title","N/A"),
+                        sb.get("type","N/A"),
+                        sb.get("region","N/A"),
+                        sb.get("createdDate","N/A"),
+                    )
+                console.print(table)
+                if args.save:
+                    df_sandboxes = pd.DataFrame(sandboxes_list)
+                    df_sandboxes.to_csv(f"sandboxes_{self.config.org_id}.csv", index=False)
+                    console.print(f"Sandboxes exported to sandboxes_{self.config.org_id}.csv", style="green")
+            else:
+                console.print("(!) No sandboxes found.", style="red")
+        except Exception as e:
+            console.print(f"(!) Error: {str(e)}", style="red")
+        except SystemExit:
+            return
+    @login_required
+    def do_get_profile_paths_info(self,args:Any)->None:
+        """Get usage information for all Profile paths"""
+        parser = argparse.ArgumentParser(prog='get_profile_paths_info', add_help=True)
+        try:
+            args = parser.parse_args(shlex.split(args))
+            if self.ups_profile_analyzer is None:
+                console.print("Initializing Profile UPS Fields Analyzer. This will take few minutes...", style="blue")
+                self.ups_profile_analyzer = UpsFieldsAnalyzer(config=self.config)
+            else:
+                if self.config.sandbox != self.ups_profile_analyzer.sandbox:
+                    console.print("Re-initializing Profile UPS Fields Analyzer for the new sandbox. This will take few minutes...", style="blue")
+                    self.ups_profile_analyzer = UpsFieldsAnalyzer(config=self.config)
+            console.print("Analyzing all Profile paths information. This will take few minutes...", style="blue")
+            df_analysis:pd.DataFrame = self.ups_profile_analyzer.analyzePaths(output='df')
+            if df_analysis is not None:
+                console.print(df_analysis)
+                df_analysis.to_csv(f"profile_all_paths_info.csv", index=False)
+                console.print(f"Profile all paths information data exported to profile_all_paths_info.csv", style="green")
+            else:
+                console.print("(!) No profile paths information data found.", style="red")
+        except Exception as e:
+            console.print(f"(!) Error: {str(e)}", style="red")
+        except SystemExit:
+            return
+    @login_required
+    def do_get_profile_path_info(self, args:Any) -> None:
+        """Get path information on Profile"""
+        parser = argparse.ArgumentParser(prog='get_profile_path_info', add_help=True)
+        parser.add_argument("path", help="Dot notation of the path to analyze in Profile Storage", default=None,type=str)
+        try:
+            args = parser.parse_args(shlex.split(args))
+            if self.ups_profile_analyzer is None:
+                console.print("Initializing Profile UPS Fields Analyzer. This will take few minutes...", style="blue")
+                self.ups_profile_analyzer = UpsFieldsAnalyzer(config=self.config)
+            else:
+                if self.config.sandbox != self.ups_profile_analyzer.sandbox:
+                    console.print("Re-initializing Profile UPS Fields Analyzer for the new sandbox. This will take few minutes...", style="blue")
+                    self.ups_profile_analyzer = UpsFieldsAnalyzer(config=self.config)
+            analysis = self.ups_profile_analyzer.analyzePath(args.path)
+            if analysis:
+                console.print_json(data=analysis)
+                with open(f"profile_path_info_{args.path.replace('/','_')}.json", 'w') as f:
+                    json.dump(analysis, f, indent=4)
+                console.print(f"Profile path information data exported to profile_path_info_{args.path.replace('/','_')}.json", style="green")
+            else:
+                console.print("(!) No profile path information data found.", style="red")
+        except Exception as e:
+            console.print(f"(!) Error: {str(e)}", style="red")
+        except SystemExit:
+            return
+    @login_required
+    def do_get_event_paths_info(self,args:Any)->None:
+        """Get information for all Experience Event paths"""
+        parser = argparse.ArgumentParser(prog='get_event_paths_info', add_help=True)
+        try:
+            args = parser.parse_args(shlex.split(args))
+            if self.ups_profile_analyzer is None:
+                console.print("Initializing Event UPS Fields Analyzer. This will take few minutes...", style="blue")
+                self.ups_profile_analyzer = UpsFieldsAnalyzer(config=self.config,union='https://ns.adobe.com/xdm/context/experienceevent__union')
+            else:
+                if self.config.sandbox != self.ups_profile_analyzer.sandbox:
+                    console.print("Re-initializing Event UPS Fields Analyzer for the new sandbox. This will take few minutes...", style="blue")
+                    self.ups_profile_analyzer = UpsFieldsAnalyzer(config=self.config,union='https://ns.adobe.com/xdm/context/experienceevent__union')
+            console.print("Analyzing all Event paths information. This will take few minutes...", style="blue")
+            df_analysis:pd.DataFrame = self.ups_profile_analyzer.analyzePaths(output='df')
+            if df_analysis is not None:
+                console.print(df_analysis)
+                df_analysis.to_csv(f"event_all_paths_info.csv", index=False)
+                console.print(f"Event all paths information data exported to event_all_paths_info.csv", style="green")
+            else:
+                console.print("(!) No event paths information data found.", style="red")
+        except Exception as e:
+            console.print(f"(!) Error: {str(e)}", style="red")
+        except SystemExit:
+            return
+    @login_required
+    def do_get_event_path_info(self, args:Any) -> None:
+        """Get path information on Experience Event"""
+        parser = argparse.ArgumentParser(prog='get_event_path_info', add_help=True)
+        parser.add_argument("path", help="Dot notation of the path to analyze in Experience Event Storage", default=None,type=str)
+        try:
+            args = parser.parse_args(shlex.split(args))
+            if self.ups_profile_analyzer is None:
+                console.print("Initializing Event UPS Fields Analyzer. This will take few minutes...", style="blue")
+                self.ups_profile_analyzer = UpsFieldsAnalyzer(config=self.config,union='https://ns.adobe.com/xdm/context/experienceevent__union')
+            else:
+                if self.config.sandbox != self.ups_profile_analyzer.sandbox:
+                    console.print("Re-initializing Event UPS Fields Analyzer for the new sandbox. This will take few minutes...", style="blue")
+                    self.ups_profile_analyzer = UpsFieldsAnalyzer(config=self.config,union='https://ns.adobe.com/xdm/context/experienceevent__union')
+            analysis = self.ups_profile_analyzer.analyzePath(args.path)
+            if analysis:
+                console.print_json(data=analysis)
+                with open(f"event_path_info_{args.path.replace('/','_')}.json", 'w') as f:
+                    json.dump(analysis, f, indent=4)
+                console.print(f"Event path information data exported to event_path_info_{args.path.replace('/','_')}.json", style="green")
+            else:
+                console.print("(!) No event path information data found.", style="red")
+        except Exception as e:
+            console.print(f"(!) Error: {str(e)}", style="red")
+        except SystemExit:
+            return
     @login_required
     def do_get_schemas(self, args:Any) -> None:
@@ -738,13 +881,81 @@ class ServiceShell(cmd.Cmd):
                     ds.get("name","N/A"),
                     datetime.fromtimestamp(ds.get("created",1000)/1000).isoformat().split('T')[0],
                     str(ds.get("dataIngested",False)),
-                    ds.get("classification",{}).get("dataBehavior","unknown")
+                    ds.get('classification').get('dataBehavior','N/A'),
+                )
+            console.print(table)
+        except Exception as e:
+            console.print(f"(!) Error: {str(e)}", style="red")
+        except SystemExit:
+            return
+    @login_required
+    def do_get_datasets_tableName(self, args:Any) -> None:
+        parser = argparse.ArgumentParser(prog='get_datasets', add_help=True)
+        try:
+            args = parser.parse_args(shlex.split(args))
+            aepp_cat = catalog.Catalog(config=self.config)
+            datasets = aepp_cat.getDataSets(output='list')
+            table = Table(title=f"Datasets in Sandbox: {self.config.sandbox}")
+            table.add_column("Name", style="white")
+            table.add_column("Table Name", style="cyan",no_wrap=True)
+            table.add_column("Data Type", style="red")
+            for ds in datasets:
+                table.add_row(
+                    ds.get("name","N/A"),
+                    ds.get('tags',{}).get('adobe/pqs/table',["N/A"])[0],
+                    ds.get('classification').get('dataBehavior','N/A'),
                 )
             console.print(table)
         except Exception as e:
             console.print(f"(!) Error: {str(e)}", style="red")
         except SystemExit:
             return
+    @login_required
+    def do_get_observable_schema_json(self,args:Any) -> None:
+        """Get the observable schema for a dataset by name or ID"""
+        parser = argparse.ArgumentParser(prog='get_observable_schema', add_help=True)
+        parser.add_argument("dataset", help="Dataset ID or Dataset Name to retrieve observable schema for",type=str)
+        try:
+            args = parser.parse_args(shlex.split(args))
+            aepp_cat = catalog.Catalog(config=self.config)
+            datasets = aepp_cat.getDataSets(output='list')
+            for ds in datasets:
+                if ds.get("name","") == args.dataset or ds.get("id","") == args.dataset:
+                    datasetId = ds.get("id")
+            schema_json = aepp_cat.getDataSetObservableSchema(datasetId=datasetId,appendDatasetInfo=True)
+            myObs = catalog.ObservableSchemaManager(schema_json,config=self.config)
+            data = myObs.to_dict()
+            with open(f"{args.dataset}_observable_schema.json", 'w') as f:
+                json.dump(data, f, indent=4)
+            console.print(f"Saved Observable schema to {args.dataset}_observable_schema.json.", style="green")
+        except Exception as e:
+            console.print(f"(!) Error: {str(e)}", style="red")
+        except SystemExit:
+            return
+    @login_required
+    def do_get_observable_schema_csv(self,args:Any) -> None:
+        """Get the observable schema for a dataset by name or ID"""
+        parser = argparse.ArgumentParser(prog='get_observable_schema', add_help=True)
+        parser.add_argument("dataset", help="Dataset ID or Dataset Name to retrieve observable schema for",type=str)
+        try:
+            args = parser.parse_args(shlex.split(args))
+            aepp_cat = catalog.Catalog(config=self.config)
+            datasets = aepp_cat.getDataSets(output='list')
+            for ds in datasets:
+                if ds.get("name","") == args.dataset or ds.get("id","") == args.dataset:
+                    datasetId = ds.get("id")
+            schema_json = aepp_cat.getDataSetObservableSchema(datasetId=datasetId,appendDatasetInfo=True)
+            myObs = catalog.ObservableSchemaManager(schema_json,config=self.config)
+            data = myObs.to_dataframe()
+            data.to_csv(f"{args.dataset}_observable_schema.csv", index=False)
+            console.print(f"Saved Observable schema to {args.dataset}_observable_schema.csv.", style="green")
+        except Exception as e:
+            console.print(f"(!) Error: {str(e)}", style="red")
+        except SystemExit:
+            return
     @login_required
     def do_get_datasets_infos(self, args:Any) -> None:
@@ -754,23 +965,58 @@ class ServiceShell(cmd.Cmd):
             args = parser.parse_args(shlex.split(args))
             aepp_cat = catalog.Catalog(config=self.config)
             datasets = aepp_cat.getDataSets()
+            aepp_cat.data.infos = aepp_cat.data.infos.sort_values(by=['ups_storageSize','datalake_storageSize'], ascending=False)
             aepp_cat.data.infos.to_csv(f"{aepp_cat.sandbox}_datasets_infos.csv",index=False)
             console.print(f"Datasets infos exported to {aepp_cat.sandbox}_datasets_infos.csv", style="green")
             table = Table(title=f"Datasets in Sandbox: {self.config.sandbox}")
             table.add_column("ID", style="white")
             table.add_column("Name", style="white",no_wrap=True)
-            table.add_column("Datalake_rows", style="blue")
-            table.add_column("Datalake_storage", style="blue")
-            table.add_column("UPS_rows", style="magenta")
-            table.add_column("UPS_storage", style="magenta")
+            table.add_column("UPS Rows", style="cyan")
+            table.add_column("UPS Storage Size", style="green")
+            table.add_column("Datalake Rows", style="magenta")
+            table.add_column("Datalake Storage Size", style="yellow")
             for _, ds in aepp_cat.data.infos.iterrows():
                 table.add_row(
                     ds.get("id","N/A"),
                     ds.get("name","N/A"),
+                    str(ds.get("ups_rows","N/A")),
+                    str(ds.get("ups_storageSize","N/A")),
                     str(ds.get("datalake_rows","N/A")),
                     str(ds.get("datalake_storageSize","N/A")),
-                    str(ds.get("ups_rows","N/A")),
-                    str(ds.get("ups_storageSize","N/A"))
+                )
+            console.print(table)
+        except Exception as e:
+            console.print(f"(!) Error: {str(e)}", style="red")
+        except SystemExit:
+            return
+    @login_required
+    def do_get_snapshot_datasets(self,args:Any) -> None:
+        """List all snapshot datasets in the current sandbox"""
+        parser = argparse.ArgumentParser(prog='get_snapshot_datasets', add_help=True)
+        try:
+            args = parser.parse_args(shlex.split(args))
+            aepp_cat = catalog.Catalog(config=self.config)
+            datasets = aepp_cat.getProfileSnapshotDatasets(explicitMergePolicy=True)
+            list_ds = []
+            for key, ds in datasets.items():
+                obj = ds
+                obj['id'] = key
+                list_ds.append(obj)
+            df_datasets = pd.DataFrame(list_ds)
+            df_datasets.to_csv(f"{self.config.sandbox}_snapshot_datasets.csv",index=False)
+            console.print(f"Snapshot Datasets exported to {self.config.sandbox}_snapshot_datasets.csv", style="green")
+            table = Table(title=f"Snapshot Datasets in Sandbox: {self.config.sandbox}")
+            table.add_column("ID", style="white")
+            table.add_column("Table Name", style="white")
+            table.add_column("Merge Policy Name", style="yellow")
+            table.add_column("Merge Policy ID", style="green")
+            for ds in list_ds:
+                table.add_row(
+                    ds.get("id","N/A"),
+                    ds.get("tags",{}).get('adobe/pqs/table',["N/A"])[0],
+                    ds.get('mergePolicyName','N/A'),
+                    [el.split(':')[1] for el in ds.get('tags',{}).get('unifiedProfile',[]) if el.startswith('mergePolicyId')][0]
                 )
             console.print(table)
         except Exception as e:
@@ -844,6 +1090,54 @@ class ServiceShell(cmd.Cmd):
         except SystemExit:
             return
+    @login_required
+    def do_get_audiences(self, args:Any) -> None:
+        """List all audiences in the current sandbox"""
+        parser = argparse.ArgumentParser(prog='get_audiences', add_help=True)
+        try:
+            args = parser.parse_args(shlex.split(args))
+            aepp_audience = segmentation.Segmentation(config=self.config)
+            audiences = aepp_audience.getAudiences()
+            flw = flowservice.FlowService(config=self.config)
+            destinations = flw.getFlows(onlyDestinations=True)
+            segments_shared = []
+            for tmpFlow in destinations:
+                if len(tmpFlow['transformations'])>0:
+                    tmpSegmentShared = tmpFlow['transformations'][0].get('params',{}).get('segmentSelectors',{}).get('selectors',[])
+                    for s in tmpSegmentShared:
+                        s['flowId'] = tmpFlow['id']
+                    segments_shared += tmpSegmentShared
+            segment_shared_dict = {seg.get('value',{}).get('id'):{
+                "exportMode" : seg.get('value',{}).get('exportMode'),
+                "scheduleFrequency": seg.get('value',{}).get("schedule",{}).get('frequency',''),
+                "flowId" : seg["flowId"]
+            } for seg in segments_shared}
+            for aud in audiences:
+                aud['usedInFlow'] = True if segment_shared_dict.get(aud.get("id","N/A"),{}) != {} else False
+                aud['sharedInfo'] = segment_shared_dict.get(aud.get("id","N/A"),{})
+            df_audiences = pd.DataFrame(audiences)
+            df_audiences.to_csv(f"{self.config.sandbox}_audiences.csv",index=False)
+            console.print(f"Audiences exported to {self.config.sandbox}_audiences.csv", style="green")
+            table = Table(title=f"Audiences in Sandbox: {self.config.sandbox}")
+            table.add_column("ID", style="cyan")
+            table.add_column("Name", style="magenta")
+            table.add_column("Evaluation", style="yellow")
+            table.add_column("Total Profiles", style="green")
+            table.add_column("Shared", style="white")
+            for aud in audiences:
+                table.add_row(
+                    aud.get("id","N/A"),
+                    aud.get("name","N/A"),
+                    '[bright_blue]Batch[/bright_blue]' if aud.get("evaluationInfo",{}).get("batch",{}).get('enabled') else '[chartreuse1]Streaming[/chartreuse1]' if aud.get("evaluationInfo",{}).get("continuous",{}).get('enabled') else '[purple]Edge[/purple]' if aud.get("evaluationInfo",{}).get("synchronous",{}).get('enabled') else 'N/A',
+                    str(aud.get('metrics',{}).get('data',{}).get('totalProfiles','N/A')),
+                    '[green3]True[/green3]' if aud.get("usedInFlow",False) else '[red3]False[/red3]',
+                )
+            console.print(table)
+        except Exception as e:
+            console.print(f"(!) Error: {str(e)}", style="red")
+        except SystemExit:
+            return
     @login_required
     def do_get_flows(self, args:Any) -> None:
         """List flows in the current sandbox based on parameters provided. By default, list all sources and destinations."""

aepp-0.5.2.post2/aepp/cli/upsfieldsanalyzer.py ADDED Viewed

@@ -0,0 +1,271 @@
+import aepp
+from aepp import schema,catalog,segmentation,flowservice, ConnectObject,schemamanager
+from typing import Union
+from copy import deepcopy
+from concurrent.futures import ThreadPoolExecutor
+import pandas as pd
+import re
+class UpsFieldsAnalyzer:
+    """
+    Class that extract the relationships of the fields for union schemas
+    """
+    loggingEnabled = False
+    logger = None
+    def __init__(
+            self,
+        union:str="https://ns.adobe.com/xdm/context/profile__union",
+        config: Union[dict,'ConnectObject'] = aepp.config.config_object,
+        region:str='nld2',
+        **kwargs,
+    ):
+        """
+        Instantiate the data Lineage class.
+        Arguments:
+            config : REQUIRED : Either ConnectObject instance or a config file to connect to the sandbox.
+            union : REQUIRED : The union schema you want to analyze. Default: https://ns.adobe.com/xdm/context/profile__union
+                Possible values:
+                    'https://ns.adobe.com/xdm/context/experienceevent__union'
+                    'https://ns.adobe.com/experience/journeyOrchestration/stepEvents/journeyStepEvent__union'
+                    'https://ns.adobe.com/experience/journeyOrchestration/stepEvents/journeyStepEvent__union'
+                    'https://ns.adobe.com/xdm/context/segmentdefinition__union'
+                    'https://ns.adobe.com/experience/customerJourneyManagement/ajoEntity__union'
+            region : OPTIONAL : If you are using a different region than the one automatically assigned (default : nld2, possible option: va7,aus5)
+        Additional kwargs will update the header.
+        """
+        if union is None:
+            raise ValueError("Requires the usage of an union schema definition")
+        self.union = union
+        self.classId = self.union.split('__')[0]
+        self.config = config
+        self.region = region
+        self.sandbox = config.sandbox
+        self.schemaAPI = schema.Schema(config=config)
+        self.catalogAPI = catalog.Catalog(config=config)
+        self.segmentationAPI = segmentation.Segmentation(config=config)
+        self.flowAPI = flowservice.FlowService(config=config)
+        self.unionSchema = schemamanager.SchemaManager(union,config=config)
+        df_union = self.unionSchema.to_dataframe(queryPath=True)
+        self.df_union = df_union.set_index('querypath',drop=True)
+        self.__schemaInfo__(config=config)
+        self.__datasetInfo__()
+        self.__audienceInfo__()
+        self.__flowserviceInfoDestinations__()
+        self.__flowserviceInfoSource__()
+        self.__audienceInfo__()
+    def __schemaInfo__(self,config)->None:
+        """
+        Extract the information of schema.
+        Provide the following attributes:
+        * schemaManagers : dict {$id:schemaManager}
+        """
+        schemas = self.schemaAPI.getSchemas(classFilter=self.classId)
+        list_schemaIds = [sch.get('$id') for sch in schemas]
+        none_params = [None for _ in range(len(list_schemaIds))]
+        config_params = [deepcopy(config) for _ in range(len(list_schemaIds))]
+        self.schemaManagers = {}
+        with ThreadPoolExecutor(max_workers=10) as executor:
+            schemaDetails = list(executor.map(schemamanager.SchemaManager, list_schemaIds,none_params,none_params,none_params,none_params,config_params))
+        for sch in schemaDetails:
+            self.schemaManagers[sch.id] = sch
+    def __audienceInfo__(self)->None:
+        """
+        Extract the segmentation information
+        Provide the following attributes:
+        * audiences : list of audiences
+        * audiences_definitions : dict { id : {definition, class}}
+        """
+        audiences = self.segmentationAPI.getAudiences()
+        self.audiences_definitions = {
+            seg['id']:{
+                'name':seg.get('name'),
+                'definition':seg,
+                'format' : seg.get('expression',{}).get('format'),
+                'class':[el.get("$ref") for el in seg.get('definedOn',[{}])]
+                }
+                for seg
+                in audiences
+                if self.union in [el.get("$ref") for el in seg.get('definedOn',[{}])]
+            }
+        self.paths_audiences = {path:{} for path in self.df_union['path'].to_list()}
+        for segId in self.audiences_definitions:
+            paths = self.segmentationAPI.extractPaths(self.audiences_definitions[segId].get('definition'))
+            for path in paths:
+                if path in self.paths_audiences.keys():
+                    self.paths_audiences[path][segId] = {
+                            "name": self.audiences_definitions[segId]["name"]
+                        }
+    def __datasetInfo__(self):
+        """
+        Extract the dataset information
+        Provide the following attributes:
+        * dict_datasetId_name : dict { id : name }
+        * observableSchemas : dict { id : ObsSchema}
+        * observable_df : dict { id : df }
+        * dataset_schema : dict { id : schema $id }
+        * datasets : list (of dataset ID)
+        """
+        datasets = self.catalogAPI.getDataSets(output='list')
+        enabledDatasets = []
+        self.dict_datasetId_name = {}
+        list_enabled_datasetIds = []
+        for ds in datasets:
+            if 'enabled:true' in ds.get('tags',{}).get('unifiedProfile',[]):
+                enabledDatasets.append(ds)
+                self.dict_datasetId_name[ds['id']] = ds['name']
+                list_enabled_datasetIds.append(ds['id'])
+        with ThreadPoolExecutor(max_workers=10) as executor:
+            observableSchemasList = list(executor.map(self.catalogAPI.getDataSetObservableSchema, list_enabled_datasetIds,[True]*len(list_enabled_datasetIds)))
+        self.observableSchemas = {}
+        self.observable_df = {}
+        self.dataset_schema = {}
+        self.datasets = []
+        for element in observableSchemasList:
+            obs = catalog.ObservableSchemaManager(element)
+            if obs.schemaId is not None:
+                datasetSchema = self.schemaAPI.getSchema(obs.schemaId)
+                if datasetSchema.get('meta:class') == self.classId:
+                    self.datasets.append(obs.datasetId)
+                    self.observableSchemas[element.get('datasetId')] = obs
+                    self.dataset_schema[element.get('datasetId')] = datasetSchema
+                    self.observable_df[element.get('datasetId')] = self.observableSchemas[element.get('datasetId')].to_dataframe()
+    def __flowserviceInfoDestinations__(self)->dict:
+        """
+        Build the flow service data for destination
+        Provide the following attributes:
+        * destinationsPath : dict { id : {name:str, paths:list }
+        """
+        selectors = set()
+        destinationFlows = self.flowAPI.getFlows(onlyDestinations=True)
+        self.destinationsPath = {}
+        for destination in destinationFlows:
+            transformations = destination.get('transformations',[{}])
+            if len(transformations) > 0:
+                if transformations[0].get('name') == 'GeneralTransform':
+                    name = destination['name']
+                    transformationParams = destination.get('transformations',[{}])[0].get('params',{})
+                    if 'profileSelectors' in transformationParams.keys():
+                        for selector in transformationParams['profileSelectors'].get('selectors',[]):
+                            selectors.add(selector.get('value',{}).get('path'))
+                    self.destinationsPath[destination['id']]={
+                        'name':name,
+                        "paths":list(selectors)
+                    }
+    def __flowserviceInfoSource__(self)->dict:
+        """
+        Build the flow service data for source
+        Provide the following attributes:
+        * destinationsPath : dict { id : {name:str, datasetId:str,schemaRef:str }
+        """
+        sourceFlows = self.flowAPI.getFlows(onlySources=True)
+        self.sourceFlows = {}
+        def getTargetDetails(sourceConnId)->dict:
+            tmp_sourceConnection = self.flowAPI.getTargetConnection(sourceConnId)
+            return tmp_sourceConnection
+        def getFlowSpec(specId)->dict:
+            tmp_sourceSpec = self.flowAPI.getFlowSpec(specId)
+            return tmp_sourceSpec
+        list_targetIds = [source.get('targetConnectionIds')[0] for source in sourceFlows]
+        list_flowSpecIds = [source.get('flowSpec',{}).get('id') for source in sourceFlows if source.get('flowSpec',{}).get('id') is not None]
+        with ThreadPoolExecutor(max_workers=10) as executor:
+            targetconnections = list(executor.map(getTargetDetails, list_targetIds))
+            flowSpecs = list(executor.map(getFlowSpec, list_flowSpecIds))
+        for source in sourceFlows:
+            sourceName = source['name']
+            sourceId = source['id']
+            tmp_sourceTargetId = source.get('targetConnectionIds')[0]
+            tmp_sourceTarget = [item for item in targetconnections if item['id'] == tmp_sourceTargetId][0]
+            params = tmp_sourceTarget.get('params',{})
+            specId = source.get('flowSpec',{}).get('id')
+            frequency = None
+            if specId is not None:
+                tmp_sourceSpec = [item for item in flowSpecs if item['id'] == specId][0]
+                frequency = tmp_sourceSpec.get('attributes',{}).get('frequency')
+            datasetId = params.get('dataSetId',params.get('datasetId'))
+            if datasetId in self.datasets:
+                self.sourceFlows[sourceId] = {
+                    'name' : sourceName,
+                    'datasetId' : datasetId,
+                    'schemaRef' : self.dataset_schema[datasetId],
+                    'frequency':frequency
+                }
+    def __buildRelationships__(self,path:str)->dict:
+        """
+        Build relationship between a path and the different elements
+        Arguments:
+            path : REQUIRED : the path to analyze
+        """
+        result_dict = {'path':path}
+        if path in self.df_union.index:
+            result_dict['description'] = self.df_union.at[path,'description']
+            result_dict['fieldGroup'] = self.df_union.at[path,'fieldGroup']
+            result_dict['type'] = self.df_union.at[path,'type']
+        result_dict['schemas'] = {}
+        for schemaId in self.schemaManagers:
+            if path in self.schemaManagers[schemaId].to_dataframe()['path'].to_list():
+                result_dict['schemas'][schemaId] = self.schemaManagers[schemaId].title
+        result_dict['datasets'] = {}
+        for dsId in self.datasets:
+            if path in self.observable_df[dsId]['path'].to_list():
+                result_dict['datasets'][dsId] = self.dict_datasetId_name[dsId]
+        result_dict['destinationFlows'] = {}
+        for flowId in self.destinationsPath:
+            if path in self.destinationsPath[flowId]['paths']:
+                result_dict['destinationFlows'][flowId] = self.destinationsPath[flowId]['name']
+        result_dict['sourceFlows'] = {}
+        for sourceId in self.sourceFlows:
+            datasetId = self.sourceFlows[sourceId]['datasetId']
+            if path in self.observable_df[datasetId]['path'].to_list():
+                result_dict['sourceFlows'][sourceId] = {'name':self.sourceFlows[sourceId]['name'],'frequency':self.sourceFlows[sourceId]['frequency']}
+        result_dict['audiences'] = self.paths_audiences[path]
+        return result_dict
+    def analyzePaths(self,output:str='df')->Union[list,pd.DataFrame]:
+        """
+        Analyze the paths of your union schema
+        Arguments:
+            output : OPTIONAL : The type of output provided. Default "df", possible: "raw" (list)
+        """
+        list_dictionary = []
+        for path in self.df_union.path.to_list():
+            list_dictionary.append(self.analyzePath(path))
+        if output=='df':
+            df = pd.DataFrame(list_dictionary)
+            return df
+        return list_dictionary
+    def analyzePath(self,path:str=None,output:str='dict')->Union[dict,pd.DataFrame]:
+        """
+        Analyze a specific path
+        Arguments:
+            path : REQUIRED : The path to analyze
+            output : OPTIONAL : The type of output provided ('dict' (default) or 'dataframe' )
+        """
+        if path is None:
+            raise ValueError('path must be specified')
+        res = self.__buildRelationships__(path)
+        return res
+    def to_dataframe(self,save:bool=False)->pd.DataFrame:
+        """
+        Returns the union schema as dataframe.
+        Arguments:
+            save : OPTIONAL : If the dataframe is to be saved in a file
+        """
+        return self.unionSchema.to_dataframe(save=save)
+    def to_dict(self)->dict:
+        """
+        Returns the union schema as dictionary.
+        """
+        return self.unionSchema.to_dict()

{aepp-0.5.2 → aepp-0.5.2.post2/aepp.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: aepp
-Version: 0.5.2
+Version: 0.5.2.post2
 Summary: Package to manage AEP API endpoint and some helper functions
 Author-email: Julien Piccini <piccini.julien@gmail.com>
 License: Apache-2.0

{aepp-0.5.2 → aepp-0.5.2.post2}/aepp.egg-info/SOURCES.txt RENAMED Viewed

@@ -45,4 +45,5 @@ aepp.egg-info/entry_points.txt
 aepp.egg-info/requires.txt
 aepp.egg-info/top_level.txt
 aepp/cli/__init__.py
-aepp/cli/__main__.py
+aepp/cli/__main__.py
+aepp/cli/upsfieldsanalyzer.py