cloe-logging 0.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,3 @@
1
+ from .logger_factory import LoggerFactory
2
+
3
+ __all__ = ["LoggerFactory"]
@@ -0,0 +1,3 @@
1
+ from .devops_decorator import build_logger
2
+
3
+ __all__ = ["build_logger"]
@@ -0,0 +1,63 @@
1
+ import functools
2
+ import logging
3
+ import sys
4
+ import time
5
+ from typing import Any
6
+ from collections.abc import Callable
7
+
8
+ from cloe_logging.formatters import DevOpsFormatter
9
+
10
+
11
+ def filter_arg_logger(arg: Any) -> bool:
12
+ """
13
+ Filter out all arguments that shall NOT be printed, i.e.,
14
+ - strings with more than 25 characters
15
+ - dictionaries
16
+ - logging.Logger instances
17
+ """
18
+ match arg:
19
+ case str() if len(arg) >= 25:
20
+ result = False
21
+ case dict():
22
+ result = False
23
+ case logging.Logger():
24
+ result = False
25
+ case _:
26
+ result = True
27
+ return result
28
+
29
+
30
+ def init_logging() -> logging.Logger:
31
+ logger = logging.getLogger("azure-pipeline-logger")
32
+ logger.setLevel(logging.INFO)
33
+ section_formatter = DevOpsFormatter(section_info=True)
34
+ section_handler = logging.StreamHandler()
35
+ section_handler.setFormatter(section_formatter)
36
+ logger.addHandler(section_handler)
37
+ return logger
38
+
39
+
40
+ def build_logger():
41
+ def log_decorator_info(func: Callable):
42
+ @functools.wraps(func)
43
+ def log_decorator_wrapper(*args, **kwargs):
44
+ logger = init_logging()
45
+ args_passed_in_function = [repr(a) for a in args if filter_arg_logger(a)]
46
+ kwargs_passed_in_function = [f"{k}={v!r}" for k, v in kwargs.items()]
47
+ formatted_arguments = ", ".join(args_passed_in_function + kwargs_passed_in_function)
48
+
49
+ logger.info(f"##### START {func.__name__} WITH args [ {formatted_arguments} ] #####\n")
50
+
51
+ try:
52
+ start = time.time()
53
+ value = func(*args, **kwargs)
54
+ end = time.time()
55
+ logger.info(f"\n##### END {func.__name__} DURATION [ '{round(end-start)}'s ] #####")
56
+ except:
57
+ logger.error(f"ERROR: {str(sys.exc_info()[1])}")
58
+ raise
59
+ return value
60
+
61
+ return log_decorator_wrapper
62
+
63
+ return log_decorator_info
@@ -0,0 +1,4 @@
1
+ from .dict_formatter import DictFormatter
2
+ from .devops_formatter import DevOpsFormatter
3
+
4
+ __all__ = ["DictFormatter", "DevOpsFormatter"]
@@ -0,0 +1,84 @@
1
+ import logging
2
+ import re
3
+
4
+
5
+ class DevOpsFormatter(logging.Formatter):
6
+ error_format = "##vso[task.logissue type=error]%(levelname)s -- %(name)s -- %(message)s"
7
+ warning_format = "##vso[task.logissue type=warning]%(levelname)s -- %(name)s -- %(message)s"
8
+ dbg_fmt = "DBG: %(module)s: %(lineno)d: %(msg)s"
9
+ info_format = "%(message)s" # "%(name)s -- %(message)s"
10
+ section_format = "%(name)s -- %(message)s"
11
+
12
+ def __init__(self, fmt="%(levelno)s: %(msg)s", section_info=False):
13
+ super().__init__(fmt=fmt)
14
+ self._section_info = section_info
15
+
16
+ def parse_progress(self, message: str) -> str:
17
+ """
18
+ Parses Method for progress information
19
+ """
20
+ progress_pattern = re.compile(r".*PROGRESS\s\[\s*'(?P<x>\d+)\/(?P<y>\d+)'\s*\].*", re.IGNORECASE | re.MULTILINE)
21
+
22
+ if progress_pattern.match(message):
23
+ progress_match = progress_pattern.search(message)
24
+ if progress_match:
25
+ x = int(progress_match.group("x"))
26
+ y = int(progress_match.group("y"))
27
+ progress_value = round((x / y) * 100)
28
+ return f"##vso[task.setprogress value={progress_value};]script progress\n"
29
+
30
+ return ""
31
+
32
+ def parse_group_start(self, message: str) -> str:
33
+ """
34
+ Parses if its a group start and prepends a command string to the message
35
+ """
36
+ start_pattern = re.compile(
37
+ r".*#####\s*START\s*(?P<gname>.+?)\sWITH.*", re.IGNORECASE | re.MULTILINE | re.DOTALL
38
+ )
39
+
40
+ if start_pattern.match(message):
41
+ start_match = start_pattern.search(message)
42
+ if start_match:
43
+ return f"##[group]{start_match.group('gname')}\n"
44
+ return ""
45
+
46
+ def parse_group_end(self, message: str) -> str:
47
+ """
48
+ Parses if its a group end and appends a command string to the message
49
+ """
50
+ end_pattern = re.compile(r".*#####\sEND.*", re.IGNORECASE | re.MULTILINE | re.DOTALL)
51
+
52
+ if end_pattern.match(message):
53
+ return "\n##[endgroup]"
54
+ return ""
55
+
56
+ def format(self, record):
57
+ # Save the original format configured by the user
58
+ # when the logger formatter was instantiated
59
+ format_orig = self._style._fmt
60
+
61
+ if record.levelno == logging.INFO:
62
+ record_message = record.msg # f"{record.name} -- {record.msg}"
63
+
64
+ return f"{self.parse_progress(record.msg)}{self.parse_group_start(record.msg)}{record_message}{self.parse_group_end(record.msg)}"
65
+
66
+ # Replace the original format with one customized by logging level
67
+ if record.levelno == logging.DEBUG:
68
+ self._fmt = DevOpsFormatter.dbg_fmt
69
+ elif record.levelno == logging.INFO and not self._section_info:
70
+ self._style._fmt = DevOpsFormatter.info_format
71
+
72
+ elif record.levelno == logging.INFO and self._section_info:
73
+ self._style._fmt = DevOpsFormatter.section_format
74
+
75
+ elif record.levelno == logging.ERROR:
76
+ self._style._fmt = DevOpsFormatter.error_format
77
+
78
+ # Call the original formatter class to do the grunt work
79
+ result = logging.Formatter.format(self, record)
80
+
81
+ # Restore the original format configured by the user
82
+ self._style._fmt = format_orig
83
+
84
+ return result
@@ -0,0 +1,44 @@
1
+ import logging
2
+ import json
3
+
4
+
5
+ class DictFormatter(logging.Formatter):
6
+ def __init__(
7
+ self,
8
+ column_split_char: str = "|",
9
+ key_value_split_char: str = ":",
10
+ fmt=None,
11
+ datefmt=None,
12
+ style="%",
13
+ skip_missing_key_value_split_char: bool = False,
14
+ ):
15
+ super().__init__(fmt, datefmt, style)
16
+ self.column_split_char: str = column_split_char
17
+ self.key_value_split_char: str = key_value_split_char
18
+ self.skip_missing_key_value_split_char: bool = skip_missing_key_value_split_char
19
+
20
+ def format(self, record):
21
+ """
22
+ Converts a formatted string to a dictionary.
23
+
24
+ Parameters:
25
+ record: The log record to be converted to a dictionary.
26
+
27
+ Returns:
28
+ str: The converted dictionary as a JSON string.
29
+ """
30
+ log_record = super().format(record)
31
+ parts = [part.strip() for part in log_record.split(self.column_split_char)]
32
+ result_dict = {
33
+ "timestamp": self.formatTime(record),
34
+ "level": record.levelname,
35
+ }
36
+ for part in parts:
37
+ if self.key_value_split_char in part:
38
+ key, value = part.split(self.key_value_split_char, maxsplit=1)
39
+ result_dict[key.strip()] = value.strip()
40
+ else:
41
+ if self.skip_missing_key_value_split_char:
42
+ continue
43
+ raise ValueError(f"Each part of the record must contain the key_value_split_char. Part: {part}")
44
+ return json.dumps(result_dict)
@@ -0,0 +1,5 @@
1
+ from .unity_catalog_handler import UnityCatalogHandler
2
+ from .log_analytics_handler import LogAnalyticsHandler
3
+ from .snowflake_handler import SnowflakeHandler
4
+
5
+ __all__ = ["UnityCatalogHandler", "LogAnalyticsHandler", "SnowflakeHandler"]
@@ -0,0 +1,217 @@
1
+ import base64
2
+ import hashlib
3
+ import hmac
4
+ import logging
5
+ import os
6
+ from datetime import datetime
7
+
8
+ try:
9
+ import requests
10
+ except ImportError:
11
+ requests = None # type: ignore
12
+ print("Optional dependency 'log_analytics' is not installed. Some functionalities may not be available.")
13
+
14
+ from cloe_logging.utility.serializer import create_logserializer
15
+
16
+
17
+ class LogAnalyticsHandler(logging.Handler):
18
+ """A custom logging handler for Azure Log Analytics.
19
+
20
+ The handler will by default always send the timestamp and loglevel of the log message.
21
+
22
+ Attributes:
23
+ METHOD (str): The HTTP method for the requests.
24
+ RESOURCE (str): The resource path for the requests.
25
+ CONTENT_TYPE (str): The content type for the requests.
26
+ """
27
+
28
+ METHOD = "POST"
29
+ RESOURCE = "/api/logs"
30
+ CONTENT_TYPE = "application/json; charset=utf-8"
31
+
32
+ def __init__(
33
+ self,
34
+ workspace_id: str | None = None,
35
+ shared_key: str | None = None,
36
+ log_type: str | None = None,
37
+ test_connectivity: bool = True,
38
+ column_split_char: str = "|",
39
+ key_value_split_char: str = ":",
40
+ **kwargs, # required to work with the Factory
41
+ ):
42
+ """Initializes a new instance of the LogAnalyticsHandler class.
43
+
44
+ Args:
45
+ workspace_id (str): The workspace ID for Azure Log Analytics.
46
+ shared_key (str): The shared key for Azure Log Analytics.
47
+ log_type (str): The log type for Azure Log Analytics.
48
+ column_split_char (str, optional): The character used to split columns in the log message. Defaults to "|".
49
+ key_value_split_char (str, optional): The character used to split keys and values in the log message.
50
+ Defaults to ":".
51
+ test_connectivity (bool, optional): Whether to test connectivity to Azure Log Analytics when initializing
52
+ the handler. Defaults to True.
53
+ """
54
+ self.column_split_char: str = column_split_char
55
+ self.key_value_split_char: str = key_value_split_char
56
+ self.workspace_id: str | None = workspace_id or os.environ.get("LOG_ANALYTICS_WORKSPACE_ID")
57
+ self.shared_key: str | None = shared_key or os.environ.get("LOG_ANALYTICS_WORKSPACE_SHARED_KEY")
58
+ self.log_type: str | None = log_type or os.environ.get("LOG_TYPE")
59
+ if not self.workspace_id or not self.shared_key or not self.log_type:
60
+ raise ValueError(
61
+ "The workspace_id, shared_key, and log_type must be provided or set as environment variables."
62
+ )
63
+ logging.Handler.__init__(self)
64
+ self.session = requests.Session()
65
+ formatter = logging.Formatter("timestamp:%(asctime)s | level: %(levelname)-8s | %(message)s")
66
+ self.setFormatter(formatter)
67
+ self.serializer = create_logserializer()
68
+ self.serializer.column_split_char = self.column_split_char
69
+ self.serializer.key_value_split_char = self.key_value_split_char
70
+ if test_connectivity:
71
+ self.test_connectivity()
72
+
73
+ def test_connectivity(self):
74
+ """Checks the connectivity to the Log Analytics workspace without sending a log.
75
+
76
+ Raises:
77
+ ValueError: If the connection to Azure Log Analytics fails.
78
+ """
79
+
80
+ class FakeRecord(logging.LogRecord):
81
+ """Mock Record to use in the emit method."""
82
+
83
+ def __init__(self, msg, level=logging.INFO):
84
+ name = "test"
85
+ pathname = "test_path"
86
+ lineno = 1
87
+ args = ()
88
+ exc_info = None
89
+ super().__init__(
90
+ name,
91
+ level,
92
+ pathname,
93
+ lineno,
94
+ msg,
95
+ args,
96
+ exc_info,
97
+ func=None,
98
+ sinfo=None,
99
+ )
100
+ self.levelname = "INFO"
101
+
102
+ def getMessage(self):
103
+ return self.msg
104
+
105
+ try:
106
+ self.emit(FakeRecord(msg=f"''{self.key_value_split_char}''"))
107
+ except ValueError as err:
108
+ raise ValueError(f"Failed to connect to Azure Log Analytics: {str(err)}") from err
109
+
110
+ def __eq__(self, other):
111
+ """Checks if two LogAnalyticsHandler instances are equal.
112
+
113
+ Instances are considered equal if they have the same workspace_id, shared_key, and log_type.
114
+ This will prevent the same handler from being added multiple times to a single logger.
115
+
116
+ Args:
117
+ other (LogAnalyticsHandler): The other LogAnalyticsHandler instance to compare with.
118
+
119
+ Returns:
120
+ bool: True if instances are equal, False otherwise.
121
+ """
122
+ if isinstance(other, LogAnalyticsHandler):
123
+ return (
124
+ self.workspace_id == other.workspace_id
125
+ and self.shared_key == other.shared_key
126
+ and self.log_type == other.log_type
127
+ )
128
+ return False
129
+
130
+ def __hash__(self):
131
+ """Generates a unique hash value for the object.
132
+
133
+ This method overrides the built-in `__hash__` method to generate a unique hash value for the object,
134
+ which is particularly useful for using the object in sets or as keys in dictionaries.
135
+
136
+ The hash value is computed based on the 'workspace_id', 'shared_key', and 'log_type' attributes of the object.
137
+ """
138
+ return hash((self.workspace_id, self.shared_key, self.log_type))
139
+
140
+ def _build_signature(self, date, content_length):
141
+ """Builds the signature for the request.
142
+
143
+ Args:
144
+ date (str): The date of the request.
145
+ content_length (int): The length of the content in the request.
146
+
147
+ Returns:
148
+ str: The authorization signature for the request.
149
+ """
150
+ x_headers = "x-ms-date:" + date
151
+ string_to_hash = f"{self.METHOD}\n{content_length}\n{self.CONTENT_TYPE}\n{x_headers}\n{self.RESOURCE}"
152
+ bytes_to_hash = bytes(string_to_hash, encoding="utf-8")
153
+ decoded_key = base64.b64decode(self.shared_key)
154
+ encoded_hash = base64.b64encode(
155
+ hmac.new(decoded_key, bytes_to_hash, digestmod=hashlib.sha256).digest(),
156
+ ).decode()
157
+ authorization = f"SharedKey {self.workspace_id}:{encoded_hash}"
158
+ return authorization
159
+
160
+ def _make_message_compliant(self, input_string):
161
+ """Encodes the input string as UTF-8 to make it compliant.
162
+
163
+ Args:
164
+ input_string (str): The string to be encoded.
165
+
166
+ Returns:
167
+ str: The encoded string.
168
+ """
169
+ return str(input_string).encode("utf-8")
170
+
171
+ def _get_url(self):
172
+ """Generates the URL for the Azure Log Analytics workspace.
173
+
174
+ Returns:
175
+ str: The URL of the Azure Log Analytics workspace.
176
+ """
177
+ uri = f"https://{self.workspace_id}.ods.opinsights.azure.com{self.RESOURCE}?api-version=2016-04-01"
178
+ return uri
179
+
180
+ def emit(self, record: logging.LogRecord):
181
+ """Sends the log message to Azure Log Analytics.
182
+
183
+ Args:
184
+ record (logging.LogRecord): The record instance with the log message.
185
+
186
+ Raises:
187
+ ValueError: If record.msg is not a string, or if failed to send log to Azure Log Analytics.
188
+
189
+ Note:
190
+ This method uses the following methods:
191
+ - _parse_string_to_dict to convert the log message to a dictionary.
192
+ - _make_message_compliant to make the log message compliant.
193
+ - _build_signature to build the signature for the request.
194
+ - _get_url to get the URL of the Azure Log Analytics workspace.
195
+ """
196
+ try:
197
+ log_message = self.format(record)
198
+ log_message_dict = self.serializer.serialize(log_message)
199
+ compliant_log_message = self._make_message_compliant(str(log_message_dict))
200
+ content_length = len(compliant_log_message)
201
+ rfc1123date = datetime.utcnow().strftime("%a, %d %b %Y %H:%M:%S GMT")
202
+ signature = self._build_signature(rfc1123date, content_length)
203
+
204
+ headers = {
205
+ "content-type": self.CONTENT_TYPE,
206
+ "Authorization": signature,
207
+ "Log-Type": self.log_type,
208
+ "x-ms-date": rfc1123date,
209
+ }
210
+ response = self.session.post(self._get_url(), data=compliant_log_message, headers=headers, timeout=30)
211
+ response.raise_for_status()
212
+ except AttributeError as exc:
213
+ raise ValueError(exc) from exc
214
+ except requests.exceptions.RequestException as exc:
215
+ raise ValueError(f"Failed to send log to Azure Log Analytics: {exc}") from exc
216
+ except Exception as exc:
217
+ raise ValueError(exc) from exc
@@ -0,0 +1,120 @@
1
+ import logging
2
+ import os
3
+
4
+ try:
5
+ from cloe_util_snowflake_connector.connection_parameters import ConnectionParameters
6
+ from cloe_util_snowflake_connector.snowflake_interface import SnowflakeInterface
7
+ except ImportError:
8
+ ConnectionParameters = None # type: ignore
9
+ SnowflakeInterface = None # type: ignore
10
+ print("Optional dependency 'snowflake' is not installed. Some functionalities may not be available.")
11
+ from cloe_logging.utility.serializer import create_logserializer
12
+
13
+
14
+ class SnowflakeHandler(logging.Handler):
15
+ """A custom logging handler for Snowflake.
16
+
17
+ The handler will by default always send the timestamp and loglevel of the log message.
18
+ """
19
+
20
+ def __init__(
21
+ self,
22
+ target_db: str = "",
23
+ target_schema: str = "",
24
+ target_table: str = "",
25
+ column_split_char: str = "|",
26
+ key_value_split_char: str = ":",
27
+ **kwargs, # required to work with the Factory
28
+ ):
29
+ """Initializes a new instance of the SnowflakeHandler class.
30
+
31
+ Args:
32
+ target_db: The name of the Database to send logs to.
33
+ target_schema: the name of the table to send logs to.
34
+ target_table: the name of the schema to send logs to.
35
+ column_split_char (str, optional): The character used to split columns in the log message. Defaults to "|".
36
+ key_value_split_char (str, optional): The character used to split keys and values in the log message.
37
+ Defaults to ":".
38
+ """
39
+ self.column_split_char: str = column_split_char
40
+ self.key_value_split_char: str = key_value_split_char
41
+ self.target_db: str = os.environ.get("CLOE_SNOWFLAKE_DATABASE", target_db)
42
+ self.target_schema: str = os.environ.get("CLOE_SNOWFLAKE_SCHEMA", target_schema)
43
+ self.target_table: str = os.environ.get("CLOE_SNOWFLAKE_TABLE", target_table)
44
+ logging.Handler.__init__(self)
45
+ self.connection = self._get_snowflake_connection()
46
+ formatter = logging.Formatter("timestamp:%(asctime)s | level: %(levelname)-8s | %(message)s")
47
+ self.setFormatter(formatter)
48
+ self.serializer = create_logserializer()
49
+
50
+ def __eq__(self, other):
51
+ """Checks if two SnowflakeHandler instances are equal.
52
+
53
+ Instances are considered equal if they have the same workspace_id, shared_key, and log_type.
54
+ This will prevent the same handler from being added multiple times to a single logger.
55
+
56
+ Args:
57
+ other (SnowflakeHandler): The other SnowflakeHandler instance to compare with.
58
+
59
+ Returns:
60
+ bool: True if instances are equal, False otherwise.
61
+ """
62
+ if isinstance(other, SnowflakeHandler):
63
+ return (
64
+ self.target_db == other.target_db
65
+ and self.target_schema == other.target_schema
66
+ and self.target_table == other.target_table
67
+ )
68
+ return False
69
+
70
+ def __hash__(self):
71
+ """Generates a unique hash value for the object.
72
+
73
+ This method overrides the built-in `__hash__` method to generate a unique hash value for the object,
74
+ which is particularly useful for using the object in sets or as keys in dictionaries.
75
+
76
+ The hash value is computed based on the target_db, target_schema, and target_table attributes.
77
+ """
78
+ return hash((self.target_db, self.target_schema, self.target_table))
79
+
80
+ def _get_snowflake_connection(self) -> SnowflakeInterface:
81
+ conn_params = ConnectionParameters.init_from_env_variables()
82
+ snowflake_conn = SnowflakeInterface(conn_params)
83
+ return snowflake_conn
84
+
85
+ def _parse_dict_to_sql_insert(self, input_dict: dict) -> str:
86
+ """
87
+ Generate a SQL INSERT statement from a dictionary.
88
+
89
+ Parameters:
90
+ table_name (str): The name of the table to insert into.
91
+ data (dict): A dictionary where keys are column names and values are the data to insert.
92
+
93
+ Returns:
94
+ str: A SQL INSERT statement as a string.
95
+ """
96
+ columns = ", ".join(input_dict.keys())
97
+ values = ", ".join(f"'{str(v)}'" for v in input_dict.values())
98
+ sql_statement = (
99
+ f"INSERT INTO {self.target_db}.{self.target_schema}.{self.target_table} ({columns}) VALUES ({values})"
100
+ )
101
+
102
+ return sql_statement
103
+
104
+ def emit(self, record: logging.LogRecord):
105
+ """Sends the log message to Snowflake.
106
+
107
+ Args:
108
+ record (logging.LogRecord): The record instance with the log message.
109
+
110
+ Raises:
111
+ ValueError: If record.msg is not a string, or if failed to send log to Snowflake.
112
+
113
+ """
114
+ try:
115
+ log_message = self.format(record)
116
+ log_message_dict = self.serializer.serialize(log_message)
117
+ log_insert_statement = self._parse_dict_to_sql_insert(log_message_dict)
118
+ self.connection.run_one_with_return(log_insert_statement)
119
+ except Exception as exc:
120
+ raise ValueError(exc) from exc
@@ -0,0 +1,154 @@
1
+ import json
2
+ import logging
3
+ from typing import cast
4
+
5
+ try:
6
+ from databricks.sdk import WorkspaceClient
7
+ from databricks.sdk.service.sql import ExecuteStatementRequestOnWaitTimeout
8
+ except ImportError:
9
+ WorkspaceClient = None # type: ignore
10
+ ExecuteStatementRequestOnWaitTimeout = None # type: ignore
11
+ print("Optional dependency 'databricks' is not installed. Some functionalities may not be available.")
12
+
13
+ from ..formatters import DictFormatter
14
+
15
+
16
+ class UnityCatalogHandler(logging.Handler):
17
+ """A custom logging handler for Databricks Unity Catalog.
18
+
19
+ The handler will by default always send the timestamp and loglevel of the log message.
20
+ """
21
+
22
+ def __init__(
23
+ self,
24
+ catalog: str | None = None,
25
+ schema: str | None = None,
26
+ table: str | None = None,
27
+ columns: dict[str, str] | None = None,
28
+ workspace_url: str | None = None,
29
+ warehouse_id: str | None = None,
30
+ column_split_char: str = "|",
31
+ key_value_split_char: str = ":",
32
+ workspace_client: WorkspaceClient | None = None,
33
+ formatter: DictFormatter | None = None,
34
+ **kwargs, # required to work with the Factory
35
+ ):
36
+ """Initializes a new instance of the DatabricksHandler class.
37
+
38
+ Note:
39
+ The handler will reuse the existing authentication from the Azure and Databricks CLI or any
40
+ other spark connection that is already established.
41
+
42
+ Args:
43
+ catalog: The name of the catalog to send logs to.
44
+ schema: The name of the schema to send logs to.
45
+ table: The name of the table to send logs to.
46
+ columns: A dictionary of column names and their corresponding data types.
47
+ workspace_url: The URL of the Azure Databricks workspace.
48
+ warehouse_id: The ID of the Databricks warehouse.
49
+ column_split_char: The character used to split columns in the log message. Defaults to "|".
50
+ key_value_split_char: The character used to split keys and values in the log message. Defaults to ":".
51
+ workspace_client: An instance of WorkspaceClient for dependency injection.
52
+ formatter: An instance of DictFormatter for dependency injection.
53
+ """
54
+ self.workspace_url = workspace_url
55
+ self.column_split_char = column_split_char
56
+ self.key_value_split_char = key_value_split_char
57
+ self.catalog = catalog
58
+ self.schema = schema
59
+ self.table = table
60
+ self.warehouse_id = cast(str, warehouse_id)
61
+ if not all([self.catalog, self.schema, self.table, self.warehouse_id, self.workspace_url]):
62
+ raise ValueError(
63
+ "You must provide a workspace_url, warehouse_id, catalog, schema, and table to create a DatabricksHandler."
64
+ )
65
+ self.table_identifier = f"{self.catalog}.{self.schema}.{self.table}"
66
+ self.workspace_client = workspace_client or WorkspaceClient(host=self.workspace_url)
67
+ super().__init__(**kwargs)
68
+ self.setFormatter(
69
+ formatter or DictFormatter(column_split_char=column_split_char, key_value_split_char=key_value_split_char)
70
+ )
71
+ self.ensure_table_exists(columns)
72
+
73
+ def ensure_table_exists(self, columns: dict[str, str] | None) -> None:
74
+ """Ensure that the table exists in the catalog.
75
+
76
+ This method will create the table in the catalog if it does not already exist.
77
+
78
+ Args:
79
+ columns: A dictionary of column names and their corresponding data types
80
+
81
+ Raises:
82
+ ValueError: If the columns dictionary is empty.
83
+ """
84
+ if not columns:
85
+ raise ValueError("You must provide a dictionary of columns to create the logging table.")
86
+ columns = {**columns, "timestamp": "timestamp", "level": "string"}
87
+ table_exists = self.workspace_client.tables.exists(self.table_identifier).table_exists is True
88
+ if table_exists is False:
89
+ columns["timestamp"] = "TIMESTAMP"
90
+ columns["level"] = "STRING"
91
+ columns_definition = ", ".join([f"{col_name} {col_type}" for col_name, col_type in columns.items()])
92
+
93
+ self.workspace_client.statement_execution.execute_statement(
94
+ statement=f"CREATE TABLE IF NOT EXISTS {self.table_identifier} ({columns_definition})",
95
+ warehouse_id=self.warehouse_id,
96
+ wait_timeout="30s",
97
+ )
98
+
99
+ def __eq__(self, other: object) -> bool:
100
+ """Checks if two DatabricksHandler instances are equal.
101
+
102
+ Instances are considered equal if they have the same catalog, schema, and table.
103
+ This will prevent the same handler from being added multiple times to a single logger.
104
+
105
+ Args:
106
+ other: The other DatabricksHandler instance to compare with.
107
+
108
+ Returns:
109
+ True if instances are equal, False otherwise.
110
+ """
111
+ return (
112
+ isinstance(other, UnityCatalogHandler)
113
+ and self.catalog == other.catalog
114
+ and self.schema == other.schema
115
+ and self.table == other.table
116
+ )
117
+
118
+ def __hash__(self):
119
+ """Generates a unique hash value for the object.
120
+
121
+ This method overrides the built-in `__hash__` method to generate a unique hash value for the object,
122
+ which is particularly useful for using the object in sets or as keys in dictionaries.
123
+ The hash value is computed based on the catalog, schema, and table attributes.
124
+ """
125
+ return hash((self.catalog, self.schema, self.table))
126
+
127
+ def _parse_dict_to_sql_insert(self, input_dict: dict) -> str:
128
+ """Generate a SQL INSERT statement from a dictionary.
129
+
130
+ Parameters:
131
+ input_dict (dict): A dictionary where keys are column names and values are the data to insert.
132
+ """
133
+ columns = ", ".join(input_dict.keys())
134
+ values = ", ".join(f"'{str(v)}'" for v in input_dict.values())
135
+ split_values = values.split(", ")
136
+ timestamp = split_values[0]
137
+ casted_timestamp = f"to_timestamp({timestamp}, 'yyyy-MM-dd HH:mm:ss,SSS')"
138
+ joined_values = ", ".join([casted_timestamp] + split_values[1:])
139
+ sql_statement = f"INSERT INTO {self.table_identifier} ({columns}) VALUES ({joined_values})"
140
+ return sql_statement
141
+
142
+ def emit(self, record: logging.LogRecord) -> None:
143
+ """Put a log record into the Queue.
144
+
145
+ Args:
146
+ record (logging.LogRecord): The log record to put into the Queue.
147
+ """
148
+ log_message = self.format(record)
149
+ log_insert_statement = self._parse_dict_to_sql_insert(json.loads(log_message))
150
+ self.workspace_client.statement_execution.execute_statement(
151
+ statement=log_insert_statement,
152
+ warehouse_id=self.warehouse_id,
153
+ on_wait_timeout=ExecuteStatementRequestOnWaitTimeout.CONTINUE,
154
+ )
@@ -0,0 +1,159 @@
1
+ import logging
2
+ from collections.abc import Callable
3
+
4
+ from cloe_logging.handlers import UnityCatalogHandler, SnowflakeHandler, LogAnalyticsHandler
5
+
6
+
7
+ class LoggerFactory:
8
+ DEFAULT_COLUMN_SPLIT_CHAR = "|"
9
+ DEFAULT_KEY_VALUE_SPLIT_CHAR = ":"
10
+
11
+ @staticmethod
12
+ def get_logger(
13
+ handler_types: str | list[str],
14
+ logger_name: str = __name__,
15
+ logging_level: int = logging.INFO,
16
+ log_format: str = "%(asctime)s - %(name)s - %(levelname)s - %(message)s",
17
+ **kwargs,
18
+ ) -> logging.Logger:
19
+ """Creates a logger with the specified handler types.
20
+
21
+ Args:
22
+ handler_types: The type of handler to use for the logger.
23
+ logger_name: The name of the logger.
24
+ logging_level: The logging level for the logger.
25
+ log_format: The format of the log messages.
26
+ kwargs: Additional arguments to pass to the handler.
27
+
28
+ Note:
29
+ Supported handler types are "console", "file", "unity_catalog", "snowflake", and "log_analytics".
30
+
31
+ Returns:
32
+ The logger with the specified handler types.
33
+ """
34
+ logger = logging.getLogger(logger_name)
35
+ logger.setLevel(logging_level)
36
+ if isinstance(handler_types, str):
37
+ handler_types = [handler_types]
38
+
39
+ for handler_type in handler_types:
40
+ handler = LoggerFactory.get_handler(handler_type, log_format, **kwargs)
41
+ LoggerFactory.add_handler_if_not_exists(logger, handler)
42
+ return logger
43
+
44
+ @staticmethod
45
+ def get_handler(
46
+ handler_type: str,
47
+ log_format: str = "%(asctime)s - %(name)s - %(levelname)s - %(message)s",
48
+ **kwargs,
49
+ ) -> logging.Handler:
50
+ HANDLER_FUNCTIONS: dict[str, Callable] = {
51
+ "console": LoggerFactory.get_console_handler,
52
+ "file": LoggerFactory.get_file_handler,
53
+ "unity_catalog": LoggerFactory.get_unity_catalog_handler,
54
+ "snowflake": LoggerFactory.get_snowflake_handler,
55
+ "log_analytics": LoggerFactory.get_log_analytics_handler,
56
+ }
57
+ handler = HANDLER_FUNCTIONS[handler_type](**kwargs, log_format=log_format)
58
+ return handler
59
+
60
+ @staticmethod
61
+ def get_console_handler(log_format: str, **kwargs) -> logging.Handler:
62
+ handler = logging.StreamHandler()
63
+ handler.setFormatter(logging.Formatter(log_format))
64
+ return handler
65
+
66
+ @staticmethod
67
+ def get_file_handler(
68
+ log_format: str,
69
+ filename: str | None = None,
70
+ mode: str = "a",
71
+ encoding: str | None = None,
72
+ delay: bool = False,
73
+ **kwargs,
74
+ ) -> logging.Handler:
75
+ if filename is None:
76
+ raise ValueError("filename is required for file logger")
77
+ handler = logging.FileHandler(filename, mode, encoding, delay)
78
+ handler.setFormatter(logging.Formatter(log_format))
79
+ return handler
80
+
81
+ @classmethod
82
+ def get_unity_catalog_handler(
83
+ cls,
84
+ uc_table_name: str,
85
+ uc_catalog_name: str,
86
+ uc_schema_name: str,
87
+ uc_table_columns: dict[str, str],
88
+ workspace_url: str,
89
+ warehouse_id: str,
90
+ column_split_char: str = DEFAULT_COLUMN_SPLIT_CHAR,
91
+ key_value_split_char: str = DEFAULT_KEY_VALUE_SPLIT_CHAR,
92
+ **kwargs,
93
+ ) -> logging.Handler:
94
+ return UnityCatalogHandler(
95
+ catalog=uc_catalog_name,
96
+ schema=uc_schema_name,
97
+ table=uc_table_name,
98
+ columns=uc_table_columns,
99
+ workspace_url=workspace_url,
100
+ warehouse_id=warehouse_id,
101
+ column_split_char=column_split_char,
102
+ key_value_split_char=key_value_split_char,
103
+ )
104
+
105
+ @classmethod
106
+ def get_snowflake_handler(
107
+ cls,
108
+ target_db: str,
109
+ target_schema: str,
110
+ target_table: str,
111
+ column_split_char: str = DEFAULT_COLUMN_SPLIT_CHAR,
112
+ key_value_split_char: str = DEFAULT_KEY_VALUE_SPLIT_CHAR,
113
+ **kwargs,
114
+ ) -> logging.Handler:
115
+ return SnowflakeHandler(
116
+ target_db=target_db,
117
+ target_schema=target_schema,
118
+ target_table=target_table,
119
+ column_split_char=column_split_char,
120
+ key_value_split_char=key_value_split_char,
121
+ )
122
+
123
+ @classmethod
124
+ def get_log_analytics_handler(
125
+ cls,
126
+ workspace_id: str,
127
+ shared_key: str,
128
+ log_type: str,
129
+ test_connectivity: bool,
130
+ column_split_char: str = DEFAULT_COLUMN_SPLIT_CHAR,
131
+ key_value_split_char: str = DEFAULT_KEY_VALUE_SPLIT_CHAR,
132
+ **kwargs,
133
+ ) -> logging.Handler:
134
+ return LogAnalyticsHandler(
135
+ workspace_id=workspace_id,
136
+ shared_key=shared_key,
137
+ log_type=log_type,
138
+ test_connectivity=test_connectivity,
139
+ column_split_char=column_split_char,
140
+ key_value_split_char=key_value_split_char,
141
+ )
142
+
143
+ @staticmethod
144
+ def add_handler_if_not_exists(logger: logging.Logger, handler: logging.Handler) -> logging.Logger:
145
+ """Adds a handler to the logger if it does not already exist.
146
+
147
+ Args:
148
+ logger: The logger to add the handler to.
149
+ handler: The handler to add to the logger.
150
+
151
+ Returns:
152
+ The logger with the handler added.
153
+ """
154
+ if len(logger.handlers) > 0:
155
+ if not any([isinstance(h, handler.__class__) for h in logger.handlers]):
156
+ logger.addHandler(handler)
157
+ else:
158
+ logger.addHandler(handler)
159
+ return logger
cloe_logging/py.typed ADDED
File without changes
@@ -0,0 +1,3 @@
1
+ from .serializer import LogSerializer
2
+
3
+ __all__ = ["LogSerializer"]
@@ -0,0 +1,80 @@
1
+ from abc import ABC, abstractmethod
2
+
3
+
4
+ class LogSerializer(ABC):
5
+ """
6
+ Used to serialize log records into various formats.
7
+
8
+ Currently supported formats:
9
+ - 'dict'
10
+ """
11
+
12
+ @abstractmethod
13
+ def serialize(self, log_record):
14
+ """
15
+ Serializes a log record into the specified format.
16
+
17
+ Args:
18
+ log_record: The log record to be serialized.
19
+ format: The format to serialize the log record into. Currently supports "dict".
20
+
21
+ Returns:
22
+ dict: The serialized log record in dictionary format.
23
+
24
+ Raises:
25
+ NotImplementedError: If the specified format is not supported.
26
+ """
27
+ pass
28
+
29
+
30
+ class DictSerializer(LogSerializer):
31
+ def __init__(
32
+ self,
33
+ column_split_char: str = "|",
34
+ key_value_split_char: str = ":",
35
+ ):
36
+ self.column_split_char: str = column_split_char
37
+ self.key_value_split_char: str = key_value_split_char
38
+
39
+ def serialize(self, log_record):
40
+ """
41
+ Converts a formatted string to a dictionary.
42
+
43
+ Parameters:
44
+ log_record: The string to be converted to a dictionary.
45
+ column_split_char: The character that separates different key-value pairs in the string (default is "|").
46
+ key_value_split_char: The character that separates keys from values in the string (default is ":").
47
+
48
+ Returns:
49
+ dict: The converted dictionary.
50
+
51
+ Raises:
52
+ ValueError: If the log_record is not a string, or if any part of the log_record does not contain the key_value_split_char.
53
+ """
54
+ if not isinstance(log_record, str):
55
+ raise ValueError("record must be a string.")
56
+ parts = [part.strip() for part in log_record.split(self.column_split_char)]
57
+ result_dict = {}
58
+
59
+ for part in parts:
60
+ try:
61
+ key, value = part.split(self.key_value_split_char, maxsplit=1)
62
+ except ValueError as exc:
63
+ raise ValueError(
64
+ f"Each part of the record must contain the key_value_split_char. Error: {str(exc)}",
65
+ ) from exc
66
+ result_dict[key.strip()] = value.strip()
67
+ return result_dict
68
+
69
+
70
+ def create_logserializer(format="dict"):
71
+ logserializers = {
72
+ "dict": DictSerializer,
73
+ }
74
+ try:
75
+ serializer = logserializers[format]()
76
+ except KeyError as exc:
77
+ raise NotImplementedError(
78
+ f"The selected format is not supported yet. Error: {str(exc)}",
79
+ ) from exc
80
+ return serializer
@@ -0,0 +1,14 @@
1
+ Metadata-Version: 2.4
2
+ Name: cloe-logging
3
+ Version: 0.3.7
4
+ Summary: A Standardized Solution for logging to various targets.
5
+ Home-page: https://initions.com/
6
+ Author: initions
7
+ Author-email: ICSMC_EXT_PYPIORG@accenture.com
8
+ License: MIT
9
+ Requires-Python: <3.12,>=3.11
10
+ Requires-Dist: pydantic<3.0.0,>=2.7.0
11
+ Requires-Dist: typing-extensions<5.0.0,>=4.12.2
12
+ Requires-Dist: databricks-sdk>=0.39.0; extra == 'databricks'
13
+ Requires-Dist: requests<3.0.0,>=2.31.0; extra == 'log-analytics'
14
+ Requires-Dist: cloe-util-snowflake-connector<2.0.0,>=1.0.2; extra == 'snowflake'
@@ -0,0 +1,17 @@
1
+ cloe_logging/__init__.py,sha256=3rsW-XtJbAaENMtN9ah2qRZyQnZOtqTnkCE_OqXBLNw,74
2
+ cloe_logging/logger_factory.py,sha256=az_XcRSrUCge4yG3HXNqFPSPRJ0V8EMKmjPPl54UIms,5541
3
+ cloe_logging/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
+ cloe_logging/decorators/__init__.py,sha256=ZFfADCScnpZ3ofxNgle1UybF322cdH9vuIGCx3S9wUU,71
5
+ cloe_logging/decorators/devops_decorator.py,sha256=1jCfNa22lU87u-QPNUt6fVLJOCINhZncYz76UjUh2XE,1959
6
+ cloe_logging/formatters/__init__.py,sha256=0FbiRRJbo4zYgNb2QSxY5aGthu8evt9Gex5c_-jxCIs,136
7
+ cloe_logging/formatters/devops_formatter.py,sha256=KwarCm_EPpRV-cZk6nGM3GZVNKN39FNOofXfC0KfkQg,3301
8
+ cloe_logging/formatters/dict_formatter.py,sha256=PsbNXxdCZxtEC9g-nIukhdQwCcsCfz42EumHPHFt9-4,1553
9
+ cloe_logging/handlers/__init__.py,sha256=xgYTK6qXJEgNFPZE6yHXwS1o9bmGuQQZ7so_7BncW7A,236
10
+ cloe_logging/handlers/log_analytics_handler.py,sha256=2Z-Hg2mWgvqAbO3nvego9AjDy3PGgzPQOxVWL7x5GAY,9038
11
+ cloe_logging/handlers/snowflake_handler.py,sha256=bZR5UIJIlivAJT3IeniVoFXlsQhq5-YDuSUH4qrXeL0,5054
12
+ cloe_logging/handlers/unity_catalog_handler.py,sha256=aXIiwDq9tHHheAFMusHhEI9uvYpI39KCscuVntFvNLA,6879
13
+ cloe_logging/utility/__init__.py,sha256=wp758l5P1M20bNUbbGojSkBUscFUgRd_TLLdnHgQ_l8,70
14
+ cloe_logging/utility/serializer.py,sha256=uPdmpawqM3WXZQVxz63deCalvKJYxRNecxjBQeDzTIY,2639
15
+ cloe_logging-0.3.7.dist-info/METADATA,sha256=TCZNiRj3PJJpq8PcKGqkiYRpYWnesboKsv5TDGKdAM8,1033
16
+ cloe_logging-0.3.7.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
17
+ cloe_logging-0.3.7.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.27.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any