findly.unified-reporting-sdk 0.6.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. findly/__init__.py +0 -0
  2. findly/unified_reporting_sdk/__init__.py +10 -0
  3. findly/unified_reporting_sdk/data_sources/__init__.py +0 -0
  4. findly/unified_reporting_sdk/data_sources/common/__init__.py +0 -0
  5. findly/unified_reporting_sdk/data_sources/common/common_parser.py +213 -0
  6. findly/unified_reporting_sdk/data_sources/common/date_range_helper.py +33 -0
  7. findly/unified_reporting_sdk/data_sources/common/reports_client.py +116 -0
  8. findly/unified_reporting_sdk/data_sources/common/where_string_comparison.py +149 -0
  9. findly/unified_reporting_sdk/data_sources/fb_ads/__init__.py +0 -0
  10. findly/unified_reporting_sdk/data_sources/fb_ads/fb_ads_client.py +608 -0
  11. findly/unified_reporting_sdk/data_sources/fb_ads/fb_ads_query_args_parser.py +828 -0
  12. findly/unified_reporting_sdk/data_sources/fb_ads/metadata/action_breakdowns.csv +11 -0
  13. findly/unified_reporting_sdk/data_sources/fb_ads/metadata/breakdowns.csv +44 -0
  14. findly/unified_reporting_sdk/data_sources/fb_ads/metadata/dimensions.jsonl +75 -0
  15. findly/unified_reporting_sdk/data_sources/fb_ads/metadata/fields.csv +135 -0
  16. findly/unified_reporting_sdk/data_sources/fb_ads/metadata/metrics.jsonl +102 -0
  17. findly/unified_reporting_sdk/data_sources/ga4/__init__.py +0 -0
  18. findly/unified_reporting_sdk/data_sources/ga4/ga4_client.py +1127 -0
  19. findly/unified_reporting_sdk/data_sources/ga4/ga4_query_args_parser.py +751 -0
  20. findly/unified_reporting_sdk/data_sources/ga4/metadata/dimensions.jsonl +109 -0
  21. findly/unified_reporting_sdk/data_sources/gsc/__init__.py +0 -0
  22. findly/unified_reporting_sdk/data_sources/gsc/gsc_client.py +0 -0
  23. findly/unified_reporting_sdk/data_sources/gsc/gsc_service.py +55 -0
  24. findly/unified_reporting_sdk/protos/.gitignore +3 -0
  25. findly/unified_reporting_sdk/protos/__init__.py +5 -0
  26. findly/unified_reporting_sdk/urs.py +87 -0
  27. findly/unified_reporting_sdk/util/__init__.py +0 -0
  28. findly/unified_reporting_sdk/util/create_numeric_string_series.py +16 -0
  29. findly_unified_reporting_sdk-0.6.17.dist-info/LICENSE +674 -0
  30. findly_unified_reporting_sdk-0.6.17.dist-info/METADATA +99 -0
  31. findly_unified_reporting_sdk-0.6.17.dist-info/RECORD +32 -0
  32. findly_unified_reporting_sdk-0.6.17.dist-info/WHEEL +4 -0
findly/__init__.py ADDED
File without changes
@@ -0,0 +1,10 @@
1
+ from findly.unified_reporting_sdk.urs import Urs
2
+ from findly.unified_reporting_sdk.data_sources.common.reports_client import (
3
+ ReportsClient,
4
+ )
5
+ from findly.unified_reporting_sdk.protos.findly_semantic_layer_pb2 import (
6
+ DataSourceIntegration,
7
+ QueryArgs,
8
+ )
9
+
10
+ __all__ = ["Urs", "ReportsClient", "QueryArgs", "DataSourceIntegration"]
File without changes
@@ -0,0 +1,213 @@
1
+ import re
2
+ import pandas as pd
3
+ from datetime import datetime
4
+ from dateutil.relativedelta import relativedelta
5
+
6
+ from typing import List, Optional, Callable, Tuple, Any, TypedDict
7
+ from findly.unified_reporting_sdk.protos.findly_semantic_layer_pb2 import (
8
+ QueryArgs,
9
+ DateStrRange,
10
+ )
11
+ from findly.unified_reporting_sdk.data_sources.common.date_range_helper import (
12
+ create_fallback_date_range,
13
+ parse_date_str_to_datetime,
14
+ )
15
+
16
+ NONE_VALUE = "none"
17
+ RESERVED_TOTAL = "RESERVED_TOTAL"
18
+
19
+
20
+ class DefaultFormattedDateRange(TypedDict):
21
+ since: str
22
+ until: str
23
+
24
+
25
+ def format_date_range_default(
26
+ start_date: datetime, end_date: datetime
27
+ ) -> DefaultFormattedDateRange:
28
+ return {
29
+ "since": start_date.strftime("%Y-%m-%d"),
30
+ "until": end_date.strftime("%Y-%m-%d"),
31
+ }
32
+
33
+
34
+ class CommonParser:
35
+ def __init__(self) -> None:
36
+ pass
37
+
38
+ @staticmethod
39
+ def get_date_ranges(
40
+ date_str_range_list: Optional[List[DateStrRange]] = None,
41
+ format_function: Callable[
42
+ [datetime, datetime], Any
43
+ ] = format_date_range_default,
44
+ ) -> Any:
45
+ fallback_start_date, fallback_end_date = create_fallback_date_range()
46
+
47
+ if date_str_range_list is None or date_str_range_list == "":
48
+ return [format_function(fallback_start_date, fallback_end_date)]
49
+
50
+ if len(date_str_range_list) == 0:
51
+ raise ValueError("date_str_range_list cannot be empty")
52
+
53
+ def mapper(date_str_range: DateStrRange) -> Tuple[datetime, datetime]:
54
+ def create_candidate_date(date_str: str) -> Optional[datetime]:
55
+ if not date_str or date_str.lower() == NONE_VALUE:
56
+ return None
57
+ return parse_date_str_to_datetime(date_str)
58
+
59
+ start_date_str = date_str_range.start_date
60
+ end_date_str = date_str_range.end_date
61
+
62
+ start_date_candidate = create_candidate_date(start_date_str)
63
+ end_date_candidate = create_candidate_date(end_date_str)
64
+
65
+ if start_date_candidate is None and end_date_candidate is None:
66
+ raise ValueError("Both start and end dates cannot be None")
67
+ elif start_date_candidate is None:
68
+ assert end_date_candidate
69
+ start_date = end_date_candidate - relativedelta(years=1)
70
+ end_date = end_date_candidate
71
+ elif end_date_candidate is None:
72
+ start_date = start_date_candidate
73
+ end_date = datetime.now()
74
+ else:
75
+ if start_date_candidate > end_date_candidate:
76
+ raise ValueError("Start date cannot be greater than end date")
77
+ start_date = start_date_candidate
78
+ end_date = end_date_candidate
79
+
80
+ return start_date, end_date
81
+
82
+ date_ranges_tuple = [
83
+ mapper(date_str_range) for date_str_range in date_str_range_list
84
+ ]
85
+
86
+ # Sort the date_ranges list in descending order based on end_date
87
+ # If the end_date is the same, sort in descending order based on start_date
88
+ date_ranges_tuple.sort(key=lambda x: (x[1], x[0]), reverse=True)
89
+ date_ranges = [
90
+ format_function(start_date, end_date)
91
+ for start_date, end_date in date_ranges_tuple
92
+ ]
93
+
94
+ return date_ranges
95
+
96
+ async def parse_query_args_to_sql(self, query: QueryArgs) -> str:
97
+ """
98
+ Parses the QueryArgs object into a SQL query.
99
+
100
+ Args:
101
+ query (QueryArgs): The query args object.
102
+
103
+ Returns:
104
+ str: The SQL query generated from the QueryArgs object.
105
+ """
106
+ query_parts = []
107
+
108
+ select_parts = []
109
+ # Metrics and Metrics Expression
110
+ if query.metrics:
111
+ select_parts.append(", ".join(query.metrics))
112
+ elif query.metrics_expression:
113
+ select_parts.append(", ".join(query.metrics_expression))
114
+ if query.group_by_columns:
115
+ select_parts.append(", ".join(query.group_by_columns))
116
+ if len(select_parts) > 0:
117
+ select_str = "SELECT " + ", ".join(select_parts)
118
+ query_parts.append(select_str)
119
+
120
+ # Where clause
121
+ conditions = []
122
+ if query.where_clause:
123
+ where_clause_modified = re.sub(
124
+ r"\bwhere\b", "", query.where_clause, flags=re.IGNORECASE
125
+ ).strip()
126
+ conditions.append(f"WHERE {where_clause_modified}")
127
+
128
+ # Date Where clause
129
+ if query.date_ranges:
130
+ sql_date_range = await self.get_date_ranges(
131
+ date_str_range_list=list(query.date_ranges)
132
+ )
133
+ for date_range in sql_date_range:
134
+ start_date = date_range["since"]
135
+ end_date = date_range["until"]
136
+ # Check if start and end dates are the same
137
+ if start_date == end_date:
138
+ # Use equality condition when dates are the same
139
+ conditions.append(f"date = '{start_date}'")
140
+ else:
141
+ # Use BETWEEN when dates are different
142
+ conditions.append(f"date BETWEEN '{start_date}' AND '{end_date}'")
143
+
144
+ if conditions:
145
+ query_parts.append(f"{' AND '.join(conditions)}")
146
+
147
+ # Group By
148
+ if query.group_by_columns:
149
+ group_by_str = ", ".join(query.group_by_columns)
150
+ query_parts.append(f"GROUP BY {group_by_str}")
151
+
152
+ # Having
153
+ if query.having_clause:
154
+ having_clause_modified = re.sub(
155
+ r"\bhaving\b", "", query.having_clause, flags=re.IGNORECASE
156
+ ).strip()
157
+ query_parts.append(f"HAVING {having_clause_modified}")
158
+
159
+ # Order By
160
+ if query.order_by:
161
+ query_parts.append(f"ORDER BY {query.order_by}")
162
+
163
+ # Limit
164
+ if query.limit:
165
+ query_parts.append(f"LIMIT {query.limit}")
166
+
167
+ return "\n".join(query_parts)
168
+
169
+ def equalize_dataframe_rows(
170
+ self, dataframes: List[pd.DataFrame], dimensions: List[str]
171
+ ) -> List[pd.DataFrame]:
172
+ if len(dataframes) <= 1 or not dimensions:
173
+ return dataframes
174
+
175
+ # Check if all dimensions exist in all dataframes
176
+ if not all(all(dim in df.columns for dim in dimensions) for df in dataframes):
177
+ return dataframes
178
+
179
+ # Combine all dataframes into one to get all unique combinations of dimension values
180
+ combined_df = pd.concat(dataframes)
181
+ unique_combinations = combined_df[dimensions].drop_duplicates()
182
+
183
+ # Create a list to store the equalized dataframes
184
+ equalized_dataframes = []
185
+
186
+ # For each dataframe, merge it with the DataFrame of unique combinations
187
+ for i, df in enumerate(dataframes):
188
+ equalized_df = pd.merge(unique_combinations, df, how="left", on=dimensions)
189
+ equalized_df["origin"] = i
190
+ equalized_dataframes.append(equalized_df)
191
+
192
+ # Sort the combined dataframe by the 'origin' column to preserve the original order
193
+ combined_df = pd.concat(equalized_dataframes)
194
+ combined_df.sort_values(by="origin", inplace=True)
195
+
196
+ # Split the combined dataframe back into individual dataframes
197
+ equalized_dataframes = [
198
+ df.drop(columns="origin").reset_index(drop=True)
199
+ for _, df in combined_df.groupby("origin")
200
+ ]
201
+
202
+ # Ensure that rows with RESERVED_TOTAL are always the last row in each dataframe
203
+ for df in equalized_dataframes:
204
+ df["sort"] = df.index
205
+ for dimension in dimensions:
206
+ df["sort"] = df["sort"].where(
207
+ df[dimension] != RESERVED_TOTAL, df["sort"] + len(df)
208
+ )
209
+ df.sort_values(by="sort", inplace=True)
210
+ df.drop("sort", axis=1, inplace=True)
211
+ df.reset_index(drop=True, inplace=True)
212
+
213
+ return equalized_dataframes
@@ -0,0 +1,33 @@
1
+ import datetime
2
+ from dateutil.relativedelta import relativedelta
3
+ from typing import Tuple
4
+
5
+ DateRange = Tuple[datetime.datetime, datetime.datetime]
6
+
7
+
8
+ def create_fallback_date_range() -> DateRange:
9
+ """
10
+ Returns:
11
+ Tuple[datetime.datetime, datetime.datetime]: A date range corresponding to the time between now and
12
+ one year ago.
13
+ """
14
+ end_time = datetime.datetime.now()
15
+ start_time = end_time - relativedelta(years=1)
16
+
17
+ return start_time, end_time
18
+
19
+
20
+ def parse_date_str_to_datetime(date_str: str) -> datetime.datetime:
21
+ """
22
+ Parses a date string into a datetime object.
23
+
24
+ Args:
25
+ date_str (str): A string representing the date in the format 'YYYY-MM-DD' or 'YYYYMMDD'.
26
+
27
+ Returns:
28
+ datetime: A datetime object corresponding to the given date string.
29
+ """
30
+ if "-" in date_str:
31
+ return datetime.datetime.strptime(date_str, "%Y-%m-%d")
32
+ else:
33
+ return datetime.datetime.strptime(date_str, "%Y%m%d")
@@ -0,0 +1,116 @@
1
+ import logging
2
+ import pandas as pd
3
+ from abc import ABC, abstractmethod
4
+ from typing import List, Tuple, Optional, Dict
5
+
6
+ from findly.unified_reporting_sdk.protos.findly_semantic_layer_pb2 import (
7
+ Dimension,
8
+ Metric,
9
+ QueryArgs,
10
+ )
11
+
12
+
13
+ class ReportsClient(ABC):
14
+ @abstractmethod
15
+ async def list_property_ids(self, **kwargs: str) -> Optional[List[str]]:
16
+ """
17
+ List all property ids for the authenticated user.
18
+
19
+ Returns:
20
+ Optional[List[str]]: A list of property ids.
21
+ """
22
+
23
+ @abstractmethod
24
+ async def query(
25
+ self, query_args: QueryArgs, property_id: str, **kwargs: str
26
+ ) -> Optional[Tuple[List[pd.DataFrame], List[pd.DataFrame]]]:
27
+ """
28
+ Executes the integration API request based on the SQL query parts.
29
+
30
+ Args:
31
+ query_args (QueryArgs): The parts of the SQL query to execute.
32
+ property_id (str): The property ID to execute the query for.
33
+
34
+ Returns:
35
+ Optional[Tuple[List[pd.DataFrame], List[pd.DataFrame]]]: A tuple containing two lists of pandas DataFrames, or None if the query failed.
36
+ """
37
+ pass
38
+
39
+ @abstractmethod
40
+ async def get_dimension_values(
41
+ self, dimension: Dimension, top_n: int, property_id: str, **kwargs: str
42
+ ) -> Optional[List[str]]:
43
+ """
44
+ Retrieves a sample of the top N values of a dimension.
45
+
46
+ Args:
47
+ dimension (Dimension): The dimension to retrieve the values for.
48
+ top_n (int): The number of top values to retrieve.
49
+ property_id (str): The property ID to retrieve the values for.
50
+
51
+ Returns:
52
+ Optional[List[str]]: A list of the top N dimension values, or None if the retrieval failed.
53
+ """
54
+ pass
55
+
56
+ @abstractmethod
57
+ async def list_dimensions(
58
+ self, property_id: str, **kwargs: str
59
+ ) -> Optional[List[Dimension]]:
60
+ """
61
+ Retrieves a list of all property dimensions from the API.
62
+
63
+ Args:
64
+ property_id (str): The property ID to retrieve the dimensions for.
65
+
66
+ Returns:
67
+ Optional[List[Dimension]]: A list of all dimensions, or None if the retrieval failed.
68
+ """
69
+ pass
70
+
71
+ @abstractmethod
72
+ async def list_metrics(
73
+ self, property_id: str, **kwargs: str
74
+ ) -> Optional[List[Metric]]:
75
+ """
76
+ Retrieves a list of all property metrics from the API.
77
+
78
+ Args:
79
+ property_id: The property ID to retrieve the metrics for.
80
+
81
+ Returns:
82
+ A list of all metrics, or None if the retrieval failed.
83
+ """
84
+ pass
85
+
86
+ @abstractmethod
87
+ async def get_dimension_from_name(
88
+ self, dimension_name: str, property_id: str, **kwargs: str
89
+ ) -> Optional[Dimension]:
90
+ """
91
+ Retrieves a dimension object by its name.
92
+
93
+ Args:
94
+ dimension_name (str): The name of the dimension to retrieve.
95
+ property_id (str): The property ID to retrieve the dimension for.
96
+
97
+ Returns:
98
+ Optional[Dimension]: The dimension with the given name, or None if the dimension was not found.
99
+ """
100
+ pass
101
+
102
+ @abstractmethod
103
+ async def get_metric_from_name(
104
+ self, metric_name: str, property_id: str, **kwargs: str
105
+ ) -> Optional[Metric]:
106
+ """
107
+ Retrieves a metric object by its name.
108
+
109
+ Args:
110
+ metric_name (str): The name of the metric to retrieve.
111
+ property_id (str): The property ID to retrieve the metric for.
112
+
113
+ Returns:
114
+ Optional[Metric]: The metric with the given name, or None if the metric was not found.
115
+ """
116
+ pass
@@ -0,0 +1,149 @@
1
+ # Contains classes which help in getting the where conditions when we are
2
+ # comparing string
3
+
4
+ import dataclasses
5
+ from dataclasses import dataclass
6
+ import logging
7
+ from typing import List, Optional
8
+ from sqlglot import parse_one
9
+ from sqlglot import expressions as sqlglot_expressions
10
+ from sqlglot import Expression
11
+
12
+ LOGGER = logging.getLogger(__name__)
13
+
14
+
15
+ @dataclass
16
+ class WhereClauseInformation:
17
+ column_name: str
18
+ column_operator: str
19
+ column_value: str
20
+ is_not_condition: bool = False
21
+
22
+
23
+ def parse_where_columns_from_sql_query(
24
+ sql_query: str,
25
+ dialect: str,
26
+ where_clause_str: Optional[str] = None,
27
+ ) -> Optional[List[WhereClauseInformation]]:
28
+ try:
29
+ where_clause_maybe: Optional[Expression] = parse_one(
30
+ sql=sql_query,
31
+ read=dialect.lower(),
32
+ ).args.get("where")
33
+
34
+ if where_clause_maybe is None:
35
+ LOGGER.info(
36
+ {
37
+ "msg": "parse_where_column_condition_no_where_clause",
38
+ "where_clause": where_clause_str,
39
+ "sql_query": sql_query,
40
+ }
41
+ )
42
+ return None
43
+
44
+ where_clause: Expression = where_clause_maybe
45
+ where_clause_list: list = list(
46
+ # TODO: Too complicated to type this in right now, it uses a
47
+ # base class to generate the information
48
+ where_clause.find_all((sqlglot_expressions.Predicate,)) # type: ignore
49
+ )
50
+ where_clause_values: List[WhereClauseInformation] = []
51
+ for condition in where_clause_list:
52
+ not_condition = False
53
+ try:
54
+ if condition.parent.key.lower() == "not":
55
+ not_condition = True
56
+ except Exception:
57
+ pass
58
+
59
+ column_operator_used_for_where_clause = condition.key
60
+ if column_operator_used_for_where_clause == "in":
61
+ # expression list cause this is an IN operator, so we get back
62
+ # an array, lets handle that by iterating over the expressions
63
+ for expression in condition.expressions:
64
+ where_clause_values.append(
65
+ WhereClauseInformation(
66
+ column_name=condition.this.sql(),
67
+ column_operator=condition.key,
68
+ column_value=expression.sql(),
69
+ is_not_condition=not_condition,
70
+ )
71
+ )
72
+ elif column_operator_used_for_where_clause == "between":
73
+ where_clause_values.append(
74
+ WhereClauseInformation(
75
+ column_name=condition.args["this"].sql(),
76
+ column_operator=condition.key,
77
+ column_value=f"{condition.args['low'].sql()} AND {condition.args['high'].sql()}",
78
+ is_not_condition=not_condition,
79
+ )
80
+ )
81
+ else:
82
+ # We fail on parsing things like:
83
+ # CAST(A as B)
84
+ # for now this only happens with joining conditions and with
85
+ # dates, since both are not necessary for edit distance fix
86
+ # we can safeguard against the exception and keep going
87
+ try:
88
+ where_clause_values.append(
89
+ WhereClauseInformation(
90
+ column_name=condition.this.sql(),
91
+ column_operator=condition.key,
92
+ column_value=condition.expression.sql(),
93
+ is_not_condition=not_condition,
94
+ )
95
+ )
96
+ except Exception as e:
97
+ LOGGER.warning(
98
+ {
99
+ "msg": "error_parsing_where_condition",
100
+ "condition": condition.this.sql(),
101
+ "where_clause": where_clause_str,
102
+ "sql_query": sql_query,
103
+ "error": str(e),
104
+ }
105
+ )
106
+ LOGGER.info(
107
+ {
108
+ "msg": "parse_where_column_condition_values",
109
+ "where_clause": where_clause_str,
110
+ "sql_query": sql_query,
111
+ "where_clause_values": [
112
+ dataclasses.asdict(value) for value in where_clause_values
113
+ ],
114
+ }
115
+ )
116
+ return where_clause_values
117
+ except Exception as e:
118
+ LOGGER.error(
119
+ {
120
+ "msg": "error_parse_where_column_condition",
121
+ "where_clause": where_clause_str,
122
+ "sql_query": sql_query,
123
+ "error": str(e),
124
+ }
125
+ )
126
+ return None
127
+
128
+
129
+ # We are going to parse the where columns and just log them, to see if
130
+ # we can do it correctly
131
+ # if all things work, we can use edit distance here to fix things
132
+ def parse_where_column_condition(
133
+ where_clause_str: str,
134
+ dialect: str,
135
+ ) -> Optional[List[WhereClauseInformation]]:
136
+ dummy_select_string = "select * from table"
137
+
138
+ # if the where_clause_str doesn't start with a where, we need to add it.
139
+ if not where_clause_str.lower().startswith("where"):
140
+ where_clause_str = "where " + where_clause_str
141
+
142
+ # The where clause already starts with a where on the completion, so
143
+ # just appending it to the dummy string should work
144
+ complete_sql = dummy_select_string + " " + where_clause_str
145
+ return parse_where_columns_from_sql_query(
146
+ sql_query=complete_sql,
147
+ dialect=dialect,
148
+ where_clause_str=where_clause_str,
149
+ )