PyPI - findly.unified-reporting-sdk - Versions diffs - 0.6.17__py3-none-any.whl - Mend

findly.unified-reporting-sdk 0.6.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

findly/__init__.py ADDED Viewed

File without changes

findly/unified_reporting_sdk/__init__.py ADDED Viewed

@@ -0,0 +1,10 @@
+from findly.unified_reporting_sdk.urs import Urs
+from findly.unified_reporting_sdk.data_sources.common.reports_client import (
+    ReportsClient,
+)
+from findly.unified_reporting_sdk.protos.findly_semantic_layer_pb2 import (
+    DataSourceIntegration,
+    QueryArgs,
+)
+__all__ = ["Urs", "ReportsClient", "QueryArgs", "DataSourceIntegration"]

findly/unified_reporting_sdk/data_sources/__init__.py ADDED Viewed

File without changes

findly/unified_reporting_sdk/data_sources/common/__init__.py ADDED Viewed

File without changes

findly/unified_reporting_sdk/data_sources/common/common_parser.py ADDED Viewed

@@ -0,0 +1,213 @@
+import re
+import pandas as pd
+from datetime import datetime
+from dateutil.relativedelta import relativedelta
+from typing import List, Optional, Callable, Tuple, Any, TypedDict
+from findly.unified_reporting_sdk.protos.findly_semantic_layer_pb2 import (
+    QueryArgs,
+    DateStrRange,
+)
+from findly.unified_reporting_sdk.data_sources.common.date_range_helper import (
+    create_fallback_date_range,
+    parse_date_str_to_datetime,
+)
+NONE_VALUE = "none"
+RESERVED_TOTAL = "RESERVED_TOTAL"
+class DefaultFormattedDateRange(TypedDict):
+    since: str
+    until: str
+def format_date_range_default(
+    start_date: datetime, end_date: datetime
+) -> DefaultFormattedDateRange:
+    return {
+        "since": start_date.strftime("%Y-%m-%d"),
+        "until": end_date.strftime("%Y-%m-%d"),
+    }
+class CommonParser:
+    def __init__(self) -> None:
+        pass
+    @staticmethod
+    def get_date_ranges(
+        date_str_range_list: Optional[List[DateStrRange]] = None,
+        format_function: Callable[
+            [datetime, datetime], Any
+        ] = format_date_range_default,
+    ) -> Any:
+        fallback_start_date, fallback_end_date = create_fallback_date_range()
+        if date_str_range_list is None or date_str_range_list == "":
+            return [format_function(fallback_start_date, fallback_end_date)]
+        if len(date_str_range_list) == 0:
+            raise ValueError("date_str_range_list cannot be empty")
+        def mapper(date_str_range: DateStrRange) -> Tuple[datetime, datetime]:
+            def create_candidate_date(date_str: str) -> Optional[datetime]:
+                if not date_str or date_str.lower() == NONE_VALUE:
+                    return None
+                return parse_date_str_to_datetime(date_str)
+            start_date_str = date_str_range.start_date
+            end_date_str = date_str_range.end_date
+            start_date_candidate = create_candidate_date(start_date_str)
+            end_date_candidate = create_candidate_date(end_date_str)
+            if start_date_candidate is None and end_date_candidate is None:
+                raise ValueError("Both start and end dates cannot be None")
+            elif start_date_candidate is None:
+                assert end_date_candidate
+                start_date = end_date_candidate - relativedelta(years=1)
+                end_date = end_date_candidate
+            elif end_date_candidate is None:
+                start_date = start_date_candidate
+                end_date = datetime.now()
+            else:
+                if start_date_candidate > end_date_candidate:
+                    raise ValueError("Start date cannot be greater than end date")
+                start_date = start_date_candidate
+                end_date = end_date_candidate
+            return start_date, end_date
+        date_ranges_tuple = [
+            mapper(date_str_range) for date_str_range in date_str_range_list
+        ]
+        # Sort the date_ranges list in descending order based on end_date
+        # If the end_date is the same, sort in descending order based on start_date
+        date_ranges_tuple.sort(key=lambda x: (x[1], x[0]), reverse=True)
+        date_ranges = [
+            format_function(start_date, end_date)
+            for start_date, end_date in date_ranges_tuple
+        ]
+        return date_ranges
+    async def parse_query_args_to_sql(self, query: QueryArgs) -> str:
+        """
+        Parses the QueryArgs object into a SQL query.
+        Args:
+            query (QueryArgs): The query args object.
+        Returns:
+            str: The SQL query generated from the QueryArgs object.
+        """
+        query_parts = []
+        select_parts = []
+        # Metrics and Metrics Expression
+        if query.metrics:
+            select_parts.append(", ".join(query.metrics))
+        elif query.metrics_expression:
+            select_parts.append(", ".join(query.metrics_expression))
+        if query.group_by_columns:
+            select_parts.append(", ".join(query.group_by_columns))
+        if len(select_parts) > 0:
+            select_str = "SELECT " + ", ".join(select_parts)
+            query_parts.append(select_str)
+        # Where clause
+        conditions = []
+        if query.where_clause:
+            where_clause_modified = re.sub(
+                r"\bwhere\b", "", query.where_clause, flags=re.IGNORECASE
+            ).strip()
+            conditions.append(f"WHERE {where_clause_modified}")
+        # Date Where clause
+        if query.date_ranges:
+            sql_date_range = await self.get_date_ranges(
+                date_str_range_list=list(query.date_ranges)
+            )
+            for date_range in sql_date_range:
+                start_date = date_range["since"]
+                end_date = date_range["until"]
+                # Check if start and end dates are the same
+                if start_date == end_date:
+                    # Use equality condition when dates are the same
+                    conditions.append(f"date = '{start_date}'")
+                else:
+                    # Use BETWEEN when dates are different
+                    conditions.append(f"date BETWEEN '{start_date}' AND '{end_date}'")
+        if conditions:
+            query_parts.append(f"{' AND '.join(conditions)}")
+        # Group By
+        if query.group_by_columns:
+            group_by_str = ", ".join(query.group_by_columns)
+            query_parts.append(f"GROUP BY {group_by_str}")
+        # Having
+        if query.having_clause:
+            having_clause_modified = re.sub(
+                r"\bhaving\b", "", query.having_clause, flags=re.IGNORECASE
+            ).strip()
+            query_parts.append(f"HAVING {having_clause_modified}")
+        # Order By
+        if query.order_by:
+            query_parts.append(f"ORDER BY {query.order_by}")
+        # Limit
+        if query.limit:
+            query_parts.append(f"LIMIT {query.limit}")
+        return "\n".join(query_parts)
+    def equalize_dataframe_rows(
+        self, dataframes: List[pd.DataFrame], dimensions: List[str]
+    ) -> List[pd.DataFrame]:
+        if len(dataframes) <= 1 or not dimensions:
+            return dataframes
+        # Check if all dimensions exist in all dataframes
+        if not all(all(dim in df.columns for dim in dimensions) for df in dataframes):
+            return dataframes
+        # Combine all dataframes into one to get all unique combinations of dimension values
+        combined_df = pd.concat(dataframes)
+        unique_combinations = combined_df[dimensions].drop_duplicates()
+        # Create a list to store the equalized dataframes
+        equalized_dataframes = []
+        # For each dataframe, merge it with the DataFrame of unique combinations
+        for i, df in enumerate(dataframes):
+            equalized_df = pd.merge(unique_combinations, df, how="left", on=dimensions)
+            equalized_df["origin"] = i
+            equalized_dataframes.append(equalized_df)
+        # Sort the combined dataframe by the 'origin' column to preserve the original order
+        combined_df = pd.concat(equalized_dataframes)
+        combined_df.sort_values(by="origin", inplace=True)
+        # Split the combined dataframe back into individual dataframes
+        equalized_dataframes = [
+            df.drop(columns="origin").reset_index(drop=True)
+            for _, df in combined_df.groupby("origin")
+        ]
+        # Ensure that rows with RESERVED_TOTAL are always the last row in each dataframe
+        for df in equalized_dataframes:
+            df["sort"] = df.index
+            for dimension in dimensions:
+                df["sort"] = df["sort"].where(
+                    df[dimension] != RESERVED_TOTAL, df["sort"] + len(df)
+                )
+            df.sort_values(by="sort", inplace=True)
+            df.drop("sort", axis=1, inplace=True)
+            df.reset_index(drop=True, inplace=True)
+        return equalized_dataframes

findly/unified_reporting_sdk/data_sources/common/date_range_helper.py ADDED Viewed

@@ -0,0 +1,33 @@
+import datetime
+from dateutil.relativedelta import relativedelta
+from typing import Tuple
+DateRange = Tuple[datetime.datetime, datetime.datetime]
+def create_fallback_date_range() -> DateRange:
+    """
+    Returns:
+        Tuple[datetime.datetime, datetime.datetime]: A date range corresponding to the time between now and
+        one year ago.
+    """
+    end_time = datetime.datetime.now()
+    start_time = end_time - relativedelta(years=1)
+    return start_time, end_time
+def parse_date_str_to_datetime(date_str: str) -> datetime.datetime:
+    """
+    Parses a date string into a datetime object.
+    Args:
+        date_str (str): A string representing the date in the format 'YYYY-MM-DD' or 'YYYYMMDD'.
+    Returns:
+        datetime: A datetime object corresponding to the given date string.
+    """
+    if "-" in date_str:
+        return datetime.datetime.strptime(date_str, "%Y-%m-%d")
+    else:
+        return datetime.datetime.strptime(date_str, "%Y%m%d")

findly/unified_reporting_sdk/data_sources/common/reports_client.py ADDED Viewed

@@ -0,0 +1,116 @@
+import logging
+import pandas as pd
+from abc import ABC, abstractmethod
+from typing import List, Tuple, Optional, Dict
+from findly.unified_reporting_sdk.protos.findly_semantic_layer_pb2 import (
+    Dimension,
+    Metric,
+    QueryArgs,
+)
+class ReportsClient(ABC):
+    @abstractmethod
+    async def list_property_ids(self, **kwargs: str) -> Optional[List[str]]:
+        """
+        List all property ids for the authenticated user.
+        Returns:
+            Optional[List[str]]: A list of property ids.
+        """
+    @abstractmethod
+    async def query(
+        self, query_args: QueryArgs, property_id: str, **kwargs: str
+    ) -> Optional[Tuple[List[pd.DataFrame], List[pd.DataFrame]]]:
+        """
+        Executes the integration API request based on the SQL query parts.
+        Args:
+            query_args (QueryArgs): The parts of the SQL query to execute.
+            property_id (str): The property ID to execute the query for.
+        Returns:
+            Optional[Tuple[List[pd.DataFrame], List[pd.DataFrame]]]: A tuple containing two lists of pandas DataFrames, or None if the query failed.
+        """
+        pass
+    @abstractmethod
+    async def get_dimension_values(
+        self, dimension: Dimension, top_n: int, property_id: str, **kwargs: str
+    ) -> Optional[List[str]]:
+        """
+        Retrieves a sample of the top N values of a dimension.
+        Args:
+            dimension (Dimension): The dimension to retrieve the values for.
+            top_n (int): The number of top values to retrieve.
+            property_id (str): The property ID to retrieve the values for.
+        Returns:
+            Optional[List[str]]: A list of the top N dimension values, or None if the retrieval failed.
+        """
+        pass
+    @abstractmethod
+    async def list_dimensions(
+        self, property_id: str, **kwargs: str
+    ) -> Optional[List[Dimension]]:
+        """
+        Retrieves a list of all property dimensions from the API.
+        Args:
+            property_id (str): The property ID to retrieve the dimensions for.
+        Returns:
+            Optional[List[Dimension]]: A list of all dimensions, or None if the retrieval failed.
+        """
+        pass
+    @abstractmethod
+    async def list_metrics(
+        self, property_id: str, **kwargs: str
+    ) -> Optional[List[Metric]]:
+        """
+        Retrieves a list of all property metrics from the API.
+        Args:
+            property_id: The property ID to retrieve the metrics for.
+        Returns:
+            A list of all metrics, or None if the retrieval failed.
+        """
+        pass
+    @abstractmethod
+    async def get_dimension_from_name(
+        self, dimension_name: str, property_id: str, **kwargs: str
+    ) -> Optional[Dimension]:
+        """
+        Retrieves a dimension object by its name.
+        Args:
+            dimension_name (str): The name of the dimension to retrieve.
+            property_id (str): The property ID to retrieve the dimension for.
+        Returns:
+            Optional[Dimension]: The dimension with the given name, or None if the dimension was not found.
+        """
+        pass
+    @abstractmethod
+    async def get_metric_from_name(
+        self, metric_name: str, property_id: str, **kwargs: str
+    ) -> Optional[Metric]:
+        """
+        Retrieves a metric object by its name.
+        Args:
+            metric_name (str): The name of the metric to retrieve.
+            property_id (str): The property ID to retrieve the metric for.
+        Returns:
+            Optional[Metric]: The metric with the given name, or None if the metric was not found.
+        """
+        pass

findly/unified_reporting_sdk/data_sources/common/where_string_comparison.py ADDED Viewed

@@ -0,0 +1,149 @@
+# Contains classes which help in getting the where conditions when we are
+# comparing string
+import dataclasses
+from dataclasses import dataclass
+import logging
+from typing import List, Optional
+from sqlglot import parse_one
+from sqlglot import expressions as sqlglot_expressions
+from sqlglot import Expression
+LOGGER = logging.getLogger(__name__)
+@dataclass
+class WhereClauseInformation:
+    column_name: str
+    column_operator: str
+    column_value: str
+    is_not_condition: bool = False
+def parse_where_columns_from_sql_query(
+    sql_query: str,
+    dialect: str,
+    where_clause_str: Optional[str] = None,
+) -> Optional[List[WhereClauseInformation]]:
+    try:
+        where_clause_maybe: Optional[Expression] = parse_one(
+            sql=sql_query,
+            read=dialect.lower(),
+        ).args.get("where")
+        if where_clause_maybe is None:
+            LOGGER.info(
+                {
+                    "msg": "parse_where_column_condition_no_where_clause",
+                    "where_clause": where_clause_str,
+                    "sql_query": sql_query,
+                }
+            )
+            return None
+        where_clause: Expression = where_clause_maybe
+        where_clause_list: list = list(
+            # TODO: Too complicated to type this in right now, it uses a
+            # base class to generate the information
+            where_clause.find_all((sqlglot_expressions.Predicate,))  # type: ignore
+        )
+        where_clause_values: List[WhereClauseInformation] = []
+        for condition in where_clause_list:
+            not_condition = False
+            try:
+                if condition.parent.key.lower() == "not":
+                    not_condition = True
+            except Exception:
+                pass
+            column_operator_used_for_where_clause = condition.key
+            if column_operator_used_for_where_clause == "in":
+                # expression list cause this is an IN operator, so we get back
+                # an array, lets handle that by iterating over the expressions
+                for expression in condition.expressions:
+                    where_clause_values.append(
+                        WhereClauseInformation(
+                            column_name=condition.this.sql(),
+                            column_operator=condition.key,
+                            column_value=expression.sql(),
+                            is_not_condition=not_condition,
+                        )
+                    )
+            elif column_operator_used_for_where_clause == "between":
+                where_clause_values.append(
+                    WhereClauseInformation(
+                        column_name=condition.args["this"].sql(),
+                        column_operator=condition.key,
+                        column_value=f"{condition.args['low'].sql()} AND {condition.args['high'].sql()}",
+                        is_not_condition=not_condition,
+                    )
+                )
+            else:
+                # We fail on parsing things like:
+                # CAST(A as B)
+                # for now this only happens with joining conditions and with
+                # dates, since both are not necessary for edit distance fix
+                # we can safeguard against the exception and keep going
+                try:
+                    where_clause_values.append(
+                        WhereClauseInformation(
+                            column_name=condition.this.sql(),
+                            column_operator=condition.key,
+                            column_value=condition.expression.sql(),
+                            is_not_condition=not_condition,
+                        )
+                    )
+                except Exception as e:
+                    LOGGER.warning(
+                        {
+                            "msg": "error_parsing_where_condition",
+                            "condition": condition.this.sql(),
+                            "where_clause": where_clause_str,
+                            "sql_query": sql_query,
+                            "error": str(e),
+                        }
+                    )
+        LOGGER.info(
+            {
+                "msg": "parse_where_column_condition_values",
+                "where_clause": where_clause_str,
+                "sql_query": sql_query,
+                "where_clause_values": [
+                    dataclasses.asdict(value) for value in where_clause_values
+                ],
+            }
+        )
+        return where_clause_values
+    except Exception as e:
+        LOGGER.error(
+            {
+                "msg": "error_parse_where_column_condition",
+                "where_clause": where_clause_str,
+                "sql_query": sql_query,
+                "error": str(e),
+            }
+        )
+        return None
+# We are going to parse the where columns and just log them, to see if
+# we can do it correctly
+# if all things work, we can use edit distance here to fix things
+def parse_where_column_condition(
+    where_clause_str: str,
+    dialect: str,
+) -> Optional[List[WhereClauseInformation]]:
+    dummy_select_string = "select * from table"
+    # if the where_clause_str doesn't start with a where, we need to add it.
+    if not where_clause_str.lower().startswith("where"):
+        where_clause_str = "where " + where_clause_str
+    # The where clause already starts with a where on the completion, so
+    # just appending it to the dummy string should work
+    complete_sql = dummy_select_string + " " + where_clause_str
+    return parse_where_columns_from_sql_query(
+        sql_query=complete_sql,
+        dialect=dialect,
+        where_clause_str=where_clause_str,
+    )

findly/unified_reporting_sdk/data_sources/fb_ads/__init__.py ADDED Viewed

File without changes