ecodev-core 0.0.67__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ """
2
+ Module implementing helper methods working on lists
3
+ """
4
+ from enum import Enum
5
+ from typing import Type
6
+ from typing import Union
7
+
8
+ from ecodev_core.safe_utils import stringify
9
+
10
+
11
+ def enum_converter(field: Union[str, float],
12
+ enum_type: Type,
13
+ default: Union[Enum, None] = None
14
+ ) -> Union[Enum, None]:
15
+ """
16
+ Convert possibly None field to an enum_type if possible, return default otherwise
17
+ """
18
+ try:
19
+ return enum_type(stringify(field))
20
+ except ValueError:
21
+ return default
@@ -0,0 +1,79 @@
1
+ """
2
+ Module implementing a connection to an elastic search instance, and basic insertion/retrieval.
3
+ """
4
+ from typing import Any
5
+ from typing import Union
6
+
7
+ import progressbar
8
+ from elasticsearch import Elasticsearch
9
+ from elasticsearch import helpers
10
+ from pydantic_settings import BaseSettings
11
+ from pydantic_settings import SettingsConfigDict
12
+
13
+ from ecodev_core.logger import logger_get
14
+ from ecodev_core.settings import SETTINGS
15
+
16
+ ES_CLIENT: Union[Elasticsearch, None] = None
17
+ log = logger_get(__name__)
18
+ ES_BATCH_SIZE = 5000
19
+
20
+
21
+ class ESAuth(BaseSettings):
22
+ """
23
+ Simple ES authentication configuration class
24
+ """
25
+ host: str = ''
26
+ user: str = ''
27
+ password: str = ''
28
+ port: int = 9200
29
+ index: str = ''
30
+ model_config = SettingsConfigDict(env_file='.env', env_prefix='ES_')
31
+
32
+
33
+ ES_AUTH, ES_SETTINGS = ESAuth(), SETTINGS.elastic_search # type: ignore[attr-defined]
34
+ _HOST, _PORT = ES_SETTINGS.host or ES_AUTH.host, ES_SETTINGS.port or ES_AUTH.port
35
+ _USER, _PASSWD = ES_SETTINGS.user or ES_AUTH.user, ES_SETTINGS.password or ES_AUTH.password
36
+ _INDEX = ES_SETTINGS.index or ES_AUTH.index
37
+
38
+
39
+ def get_es_client():
40
+ """
41
+ Get the elasticsearch client
42
+ """
43
+ global ES_CLIENT
44
+
45
+ if ES_CLIENT is None:
46
+ ES_CLIENT = Elasticsearch(f'http://{_HOST}:{_PORT}/', basic_auth=[_USER, _PASSWD])
47
+
48
+ return ES_CLIENT
49
+
50
+
51
+ def create_es_index(body: dict) -> None:
52
+ """
53
+ create an es index
54
+ """
55
+ client = get_es_client()
56
+ try:
57
+ client.indices.delete(index=_INDEX)
58
+ except Exception:
59
+ pass
60
+ client.indices.create(index=_INDEX, body=body)
61
+ log.info(f'index {_INDEX} created')
62
+
63
+
64
+ def insert_es_fields(operations: list[dict], batch_size: int = ES_BATCH_SIZE) -> None:
65
+ """
66
+ Generic es insertion
67
+ """
68
+ client = get_es_client()
69
+ batches = [list(operations)[i:i + batch_size] for i in range(0, len(operations), batch_size)]
70
+ log.info('indexing fields')
71
+ for batch in progressbar.progressbar(batches, redirect_stdout=False):
72
+ helpers.bulk(client, batch, index=_INDEX)
73
+
74
+
75
+ def retrieve_es_fields(body: dict[str, Any]) -> list[dict]:
76
+ """
77
+ Core call to the elasticsearch index
78
+ """
79
+ return get_es_client().search(index=_INDEX, body=body)
@@ -0,0 +1,134 @@
1
+ """
2
+ Module implementing helper methods working on lists
3
+ """
4
+ from collections import defaultdict
5
+ from itertools import groupby
6
+ from typing import Any
7
+ from typing import Callable
8
+ from typing import Dict
9
+ from typing import Iterator
10
+ from typing import List
11
+ from typing import Optional
12
+ from typing import Tuple
13
+ from typing import Union
14
+
15
+
16
+ def group_by_value(list_to_group: List[Any]) -> Dict[Any, List[int]]:
17
+ """
18
+ Given a list, group together all equal values by storing them in a dictionary.
19
+ The keys are the unique list values (think about overriding the class equals if you pass
20
+ to this method your custom classes) and the values are list of ints, corresponding to the
21
+ position of the current key in the original list.
22
+
23
+ See https://towardsdatascience.com/explaining-the-settingwithcopywarning-in-pandas-ebc19d799d25
24
+ for why not to use df['base_year'][values] for instance
25
+ """
26
+
27
+ indices: Dict[Any, List[int]] = defaultdict(list)
28
+ for i in range(len(list_to_group)):
29
+ indices[list_to_group[i]].append(i)
30
+ return indices
31
+
32
+
33
+ def first_or_default(sequence: Union[List[Any], None],
34
+ condition: Union[Callable, None] = None,
35
+ default: Optional[Any] = None
36
+ ) -> Union[Any, None]:
37
+ """
38
+ Returns the first element of a sequence, or default value if the sequence contains no elements.
39
+ """
40
+ if not sequence:
41
+ return default
42
+
43
+ if condition is None:
44
+ return next(iter(sequence), default)
45
+ return next((elt for elt in sequence if condition(elt)), default)
46
+
47
+
48
+ def sort_by_keys(unsorted_dict: dict, reverse: bool = False) -> dict:
49
+ """
50
+ Returns a sorted dictionary out of the passed unsorted_dict.
51
+ Sorting is done on unsorted_dict keys.
52
+ If reverse is True, reverse sorting
53
+ """
54
+ return dict(sorted(unsorted_dict.items(), reverse=reverse))
55
+
56
+
57
+ def sort_by_values(unsorted_dict: dict, reverse: bool = False) -> dict:
58
+ """
59
+ Returns a sorted dictionary out of the passed unsorted_dict.
60
+ Sorting is done on unsorted_dict values.
61
+ If reverse is True, reverse sorting
62
+ """
63
+ return dict(sorted(unsorted_dict.items(), key=lambda item: item[1], reverse=reverse))
64
+
65
+
66
+ def first_func_or_default(sequence: list[Callable] | None,
67
+ elt: Any,
68
+ condition: Callable | None = None,
69
+ default: Any | None = None
70
+ ) -> Any | None:
71
+ """
72
+ Returns the first element of a functional sequence if a certain criteria is met
73
+ or default value if the criteria is never met.
74
+ The criteria is like so:
75
+ - If no condition is provided, then
76
+ just check that func applied on elt is not None
77
+ - If a condition is provided, then
78
+ check that condition applied on func(elt) is not None
79
+ """
80
+ if not sequence:
81
+ return default
82
+
83
+ return next((func(elt) for func in sequence if (condition or (lambda x: x))(func(elt))),
84
+ default)
85
+
86
+
87
+ def group_by(sequence: List[Any], key: Union[Callable, None]) -> Iterator[Tuple[Any, List[Any]]]:
88
+ """
89
+ Extension of itertools groupby method.
90
+
91
+ Reasons of existence:
92
+ - do the sorting before the grouping to avoid the usual mistake of forgetting the sorting
93
+ - convert the group Iterator to a list. More convenient that the default groupby behaviour
94
+ in all cases where you need to iterate more than once on the group
95
+ """
96
+ for key, group in groupby(sorted(sequence, key=key), key=key):
97
+ yield key, list(group)
98
+
99
+
100
+ def lselect(sequence: List[Any], condition: Union[Callable, None] = None) -> List[Any]:
101
+ """
102
+ Filter the passed sequence according to the passed condition
103
+ """
104
+ return list(filter(condition, sequence))
105
+
106
+
107
+ def lselectfirst(sequence: List[Any], condition: Union[Callable, None] = None) -> Union[Any, None]:
108
+ """
109
+ Select the filtered element of the passed sequence according to the passed condition
110
+ """
111
+
112
+ return filtered_list[0] if (filtered_list := list(filter(condition, sequence))) else None
113
+
114
+
115
+ def first_transformed_or_default(sequence: List[Any], transformation: Callable) -> Union[Any, None]:
116
+ """
117
+ Returns the first non-trivial transformed element of a sequence,
118
+ or default value if no non-trivial transformed elements are found.
119
+ """
120
+ return next((fx for elt in sequence if (fx := transformation(elt)) is not None), None)
121
+
122
+
123
+ def dict_to_class(data: dict):
124
+ """
125
+ Convert a (possibly nested) dictionary to a class.
126
+ """
127
+ return {k: type(k, (), dict_to_class(v)) if isinstance(v, dict) else v for k, v in data.items()}
128
+
129
+
130
+ def list_tuple_to_dict(data: list[tuple]) -> list[dict[str, Any]] | None:
131
+ """
132
+ Transforms the result of a sqlmodel query into a list of Dict
133
+ """
134
+ return [x._asdict() for x in data] if data else None # type: ignore[attr-defined]
ecodev_core/logger.py ADDED
@@ -0,0 +1,122 @@
1
+ """
2
+ Helpers for pretty logging
3
+ """
4
+ import logging
5
+ import sys
6
+ import traceback
7
+
8
+ LIBS = ['azure', 'passlib', 'trimesh', 'fiona',
9
+ 'urllib3', 'botocore', 'boto', 'boto3', 's3transfer']
10
+
11
+
12
+ def log_critical(message: str, logger):
13
+ """
14
+ Traceback enabled for unintended serious errors
15
+ """
16
+ logger.error(message)
17
+ logger.error(traceback.format_exc())
18
+
19
+
20
+ def logger_get(name, level=logging.DEBUG):
21
+ """
22
+ Main method called by all other modules to log
23
+ """
24
+ logging.basicConfig(level=level, stream=sys.stdout)
25
+ for lib in LIBS:
26
+ _safe_log_setter(lib)
27
+ logger = logging.getLogger(name)
28
+ config_log(logger, level, MyFormatter())
29
+ return logger
30
+
31
+
32
+ def _safe_log_setter(lib: str) -> None:
33
+ """
34
+ Safe logger. ERROR level not to be swamped by verbose library info.
35
+ """
36
+ try:
37
+ logging.getLogger(lib).setLevel(logging.ERROR)
38
+ except Exception:
39
+ pass
40
+
41
+
42
+ class MyFormatter(logging.Formatter):
43
+ """
44
+ Formatter to print %(filename)s:%(funcName)s:%(lineno)d on 24 characters
45
+
46
+ Typical format :
47
+ 2016-10-26 14:20:21,379 | DEBUG | logger:log_me:57 : This is a log
48
+ """
49
+ message_width = 110
50
+ cpath_width = 32
51
+ date_fmt = '%Y-%m-%d %H:%M:%S'
52
+
53
+ pink = '\x1b[35m'
54
+ green = '\x1b[32m'
55
+ yellow = '\x1b[33m'
56
+ red = '\x1b[31m'
57
+ bold_red = '\x1b[31;1m'
58
+ reset = '\x1b[0m'
59
+
60
+ FORMATS = {
61
+ logging.DEBUG: pink,
62
+ logging.INFO: green,
63
+ logging.WARNING: yellow,
64
+ logging.ERROR: red,
65
+ logging.CRITICAL: bold_red,
66
+ }
67
+
68
+ def format(self, record):
69
+ """
70
+ Format logs
71
+ """
72
+ initial_record = f'{record.module}:{record.funcName}:{ record.lineno}'
73
+ cpath = initial_record[-self.cpath_width:].ljust(self.cpath_width)
74
+ time = self.formatTime(record, self.date_fmt)
75
+ prefix = f'{time} | {record.levelname} | {record.process} | {cpath}'
76
+
77
+ # fixing max length
78
+ limited_lines = []
79
+ for line in record.getMessage().split(str('\n')):
80
+ while len(line) > self.message_width:
81
+ if (last_space_position := line[:self.message_width - 1].rfind(' ')) > 0:
82
+ splitting_position = last_space_position
83
+ else:
84
+ splitting_position = self.message_width
85
+ limited_lines.append(line[:splitting_position])
86
+ line = line[splitting_position:]
87
+
88
+ # don't forget end of line
89
+ limited_lines.append(line)
90
+
91
+ # formatting final message
92
+ final_message = ''.join(f'{prefix} | {line}\n' for line in limited_lines).rstrip()
93
+
94
+ return f'{self.FORMATS[record.levelno]}{final_message}{self.reset}'
95
+
96
+
97
+ def config_log(logger, level, formatter):
98
+ """ Configures the logging.
99
+
100
+ This function defines the root logger. It needs to be called only once.
101
+ Then, all modules should log like this:
102
+ '''
103
+ from logger.logger import get as logger_get
104
+ log = logger_get(__name__)
105
+ '''
106
+ If the function is called more than once, duplicate handlers are ignored
107
+ to avoid duplicate logging.
108
+
109
+ Args:
110
+ logger: logging object
111
+ level: Logging level
112
+ formatter: Logging format
113
+
114
+ """
115
+ # Get the root logger (because no name is specified in getLogger())
116
+ logger.setLevel(level)
117
+ logger.propagate = False
118
+
119
+ console_handler = logging.StreamHandler(stream=sys.stdout)
120
+ if all(handler.stream.name != console_handler.stream.name for handler in logger.handlers):
121
+ console_handler.setFormatter(formatter)
122
+ logger.addHandler(console_handler)
@@ -0,0 +1,69 @@
1
+ """
2
+ Module implementing some utilitary methods on pandas types
3
+ """
4
+ import tempfile
5
+ from base64 import b64decode
6
+ from pathlib import Path
7
+ from typing import Any
8
+ from typing import Callable
9
+ from typing import Dict
10
+ from typing import Optional
11
+
12
+ import numpy as np
13
+ import pandas as pd
14
+
15
+
16
+ def pd_equals(prediction: pd.DataFrame, gt_path=Path):
17
+ """
18
+ Since some Nones are serialized as Nans by pandas (heavy type inference),
19
+ we store the prediction at a temporary location in order to reload it on the fly and compare it
20
+ to a pre-store ground truth, in order that both gt and prediction benefited from the same
21
+ type inferences.
22
+ """
23
+ with tempfile.TemporaryDirectory() as folder:
24
+ prediction.to_csv(Path(folder) / 'tmp.csv', index=False)
25
+ reloaded_prediction = pd.read_csv(Path(folder) / 'tmp.csv')
26
+ pd.testing.assert_frame_equal(reloaded_prediction, pd.read_csv(gt_path))
27
+
28
+
29
+ def jsonify_series(row: pd.Series) -> Dict:
30
+ """
31
+ Convert a serie into a json compliant dictionary (replacing np.nans by Nones)
32
+ """
33
+ return {key: None if isinstance(value, float) and np.isnan(value) else value for key, value in
34
+ row.to_dict().items()}
35
+
36
+
37
+ def get_excelfile(contents: str) -> pd.ExcelFile:
38
+ """
39
+ Function which converts user xlsx file upload into a pd.ExcelFile
40
+ """
41
+ content_type, content_string = contents.split(',')
42
+ xl = b64decode(content_string)
43
+ return pd.ExcelFile(xl)
44
+
45
+
46
+ def safe_drop_columns(df: pd.DataFrame, columns: list[str]) -> pd.DataFrame:
47
+ """
48
+ Returns a DataFrame without a list of columns, with a prior check on the existence of these
49
+ columns in the DataFrame
50
+ """
51
+
52
+ return df.drop(columns=[col for col in columns if col in df.columns])
53
+
54
+
55
+ def is_null(value: Any) -> bool:
56
+ """
57
+ Checks if a value is null or not
58
+ """
59
+ return value is None or isinstance(value, float) and np.isnan(value)
60
+
61
+
62
+ def get_value(column: str, method: Callable, row: pd.Series) -> Optional[Any]:
63
+ """
64
+ Function which performs a method on a value if the column name is in the row index
65
+ """
66
+ if column not in row.index or is_null(row[column]):
67
+ return None
68
+
69
+ return method(row[column])
@@ -0,0 +1,21 @@
1
+ """
2
+ Module implementing all permission levels an application user can have
3
+ """
4
+ from enum import Enum
5
+ from enum import unique
6
+
7
+
8
+ @unique
9
+ class Permission(str, Enum):
10
+ """
11
+ Enum listing all permission levels an application user can have
12
+ """
13
+ ADMIN = 'Admin'
14
+ Consultant = 'Consultant'
15
+ Client = 'Client'
16
+ CLIENT_ADMIN = 'Client Admin'
17
+ DENIED_PERMISSION = 'Denied Permission'
18
+ FORM_ADMIN = 'Form Admin'
19
+ VALIDATOR = 'Validator'
20
+ APPRAISER = 'Appraiser'
21
+ USER = 'User'
@@ -0,0 +1,33 @@
1
+ """
2
+ Simple Pydantic wrapper classes around BaseModel to accommodate for orm and frozen cases
3
+ """
4
+ from pydantic import BaseModel
5
+ from pydantic import ConfigDict
6
+
7
+
8
+ class Basic(BaseModel):
9
+ """
10
+ Basic pydantic configuration
11
+ """
12
+ model_config = ConfigDict(frozen=False, arbitrary_types_allowed=True)
13
+
14
+
15
+ class Frozen(BaseModel):
16
+ """
17
+ Frozen pydantic configuration
18
+ """
19
+ model_config = ConfigDict(frozen=True)
20
+
21
+
22
+ class CustomFrozen(Frozen):
23
+ """
24
+ Frozen pydantic configuration for custom types
25
+ """
26
+ model_config = ConfigDict(arbitrary_types_allowed=True)
27
+
28
+
29
+ class OrmFrozen(CustomFrozen):
30
+ """
31
+ Frozen pydantic configuration for orm like object
32
+ """
33
+ model_config = ConfigDict(from_attributes=True)
@@ -0,0 +1,52 @@
1
+ """
2
+ Module regrouping low level reading and writing helper methods
3
+ """
4
+ import json
5
+ import os
6
+ from pathlib import Path
7
+ from typing import Dict
8
+ from typing import List
9
+ from typing import Union
10
+
11
+ import yaml
12
+
13
+
14
+ def write_json_file(json_data: Union[Dict, List], file_path: Path):
15
+ """
16
+ Write json_data at file_path location
17
+ """
18
+ os.umask(0)
19
+ with open(file_path, 'w', encoding='utf-8') as f:
20
+ f.write(json.dumps(json_data, indent=4))
21
+
22
+
23
+ def load_json_file(file_path: Path):
24
+ """
25
+ Load a json file at file_path location
26
+ """
27
+ with open(file_path, 'r', encoding='utf-8') as f:
28
+ loaded_json = json.load(f)
29
+
30
+ return loaded_json
31
+
32
+
33
+ def load_yaml_file(file_path: Path):
34
+ """
35
+ Load a yaml file at file_path location
36
+ """
37
+ with open(file_path) as file:
38
+ loaded_yaml = yaml.safe_load(file)
39
+
40
+ return loaded_yaml
41
+
42
+
43
+ def make_dir(directory: Path):
44
+ """
45
+ Helper that create the directory "directory" if it doesn't exist yet
46
+ """
47
+ try:
48
+ os.umask(0)
49
+ os.makedirs(directory)
50
+ except OSError as error:
51
+ if not directory.is_dir():
52
+ raise OSError(f'directory={directory!r} should exist but does not.: {error}') from error