lomas-core 0.3.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,43 @@
1
+ Metadata-Version: 2.1
2
+ Name: lomas-core
3
+ Version: 0.3.4
4
+ Summary: Lomas core.
5
+ Home-page: https://github.com/dscc-admin-ch/lomas/
6
+ Author: Data Science Competence Center, Swiss Federal Statistical Office
7
+ Author-email: dscc@bfs.admin.ch
8
+ License: MIT
9
+ Classifier: Development Status :: 4 - Beta
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: Intended Audience :: Science/Research
12
+ Classifier: License :: OSI Approved :: MIT License
13
+ Classifier: Natural Language :: English
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Topic :: Software Development :: Libraries
17
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
18
+ Classifier: Topic :: Scientific/Engineering
19
+ Classifier: Topic :: Security
20
+ Requires-Python: >=3.11, <3.13
21
+ Description-Content-Type: text/markdown
22
+ Requires-Dist: diffprivlib==0.6.5
23
+ Requires-Dist: diffprivlib_logger>=0.0.3
24
+ Requires-Dist: fastapi>=0.111.1
25
+ Requires-Dist: numpy>=1.26.4
26
+ Requires-Dist: opendp==0.10.0
27
+ Requires-Dist: opendp_logger>=0.3.0
28
+ Requires-Dist: pandas>=2.2.2
29
+ Requires-Dist: pymongo>=4.6.3
30
+ Requires-Dist: scikit-learn>=1.4.2
31
+ Requires-Dist: smartnoise-synth>=1.0.4
32
+ Requires-Dist: smartnoise_synth_logger>=0.0.3
33
+
34
+ <h1 align="center">
35
+ <picture>
36
+ <source media="(prefers-color-scheme: dark)" srcset="https://github.com/dscc-admin-ch/lomas/blob/wip_322_darkmode-logo/images/lomas_logo_darkmode_txt.png" width="300">
37
+ <source media="(prefers-color-scheme: light)" srcset="https://github.com/dscc-admin-ch/lomas/blob/wip_322_darkmode-logo/images/lomas_logo_txt.png" width="300">
38
+ <img alt="This is what is displayed on Pypi" src="https://github.com/dscc-admin-ch/lomas/blob/wip_322_darkmode-logo/images/lomas_logo_txt.png">
39
+ </picture>
40
+ </h1><br>
41
+
42
+ # Core
43
+ See the technical documentation of core: https://dscc-admin-ch.github.io/lomas-docs/core_api.html.
@@ -0,0 +1,10 @@
1
+ <h1 align="center">
2
+ <picture>
3
+ <source media="(prefers-color-scheme: dark)" srcset="https://github.com/dscc-admin-ch/lomas/blob/wip_322_darkmode-logo/images/lomas_logo_darkmode_txt.png" width="300">
4
+ <source media="(prefers-color-scheme: light)" srcset="https://github.com/dscc-admin-ch/lomas/blob/wip_322_darkmode-logo/images/lomas_logo_txt.png" width="300">
5
+ <img alt="This is what is displayed on Pypi" src="https://github.com/dscc-admin-ch/lomas/blob/wip_322_darkmode-logo/images/lomas_logo_txt.png">
6
+ </picture>
7
+ </h1><br>
8
+
9
+ # Core
10
+ See the technical documentation of core: https://dscc-admin-ch.github.io/lomas-docs/core_api.html.
File without changes
@@ -0,0 +1,34 @@
1
+ from enum import StrEnum
2
+
3
+ # Server error messages
4
+ INTERNAL_SERVER_ERROR = (
5
+ "Internal server error. Please contact the administrator of this service."
6
+ )
7
+
8
+
9
+ class DPLibraries(StrEnum):
10
+ """Name of DP Library used in the query."""
11
+
12
+ SMARTNOISE_SQL = "smartnoise_sql"
13
+ SMARTNOISE_SYNTH = "smartnoise_synth"
14
+ OPENDP = "opendp"
15
+ DIFFPRIVLIB = "diffprivlib"
16
+
17
+
18
+ # Smartnoise synth
19
+ class SSynthMarginalSynthesizer(StrEnum):
20
+ """Marginal Synthesizer models for smartnoise synth."""
21
+
22
+ AIM = "aim"
23
+ MWEM = "mwem"
24
+ MST = "mst"
25
+ PAC_SYNTH = "pacsynth"
26
+
27
+
28
+ class SSynthGanSynthesizer(StrEnum):
29
+ """GAN Synthesizer models for smartnoise synth."""
30
+
31
+ DP_CTGAN = "dpctgan"
32
+ PATE_CTGAN = "patectgan"
33
+ PATE_GAN = "pategan"
34
+ DP_GAN = "dpgan"
@@ -0,0 +1,125 @@
1
+ from typing import Type
2
+
3
+ from fastapi import FastAPI, Request, status
4
+ from fastapi.responses import JSONResponse
5
+ from pymongo.errors import WriteConcernError
6
+
7
+ from lomas_core.constants import INTERNAL_SERVER_ERROR
8
+ from lomas_core.logger import LOG
9
+
10
+
11
+ class InvalidQueryException(Exception):
12
+ """
13
+ Custom exception for invalid queries.
14
+
15
+ For example, this exception will occur when the query:
16
+ - is not an opendp measurement
17
+ - cannot be reconstructed properly (for opendp and diffprivlib)
18
+ """
19
+
20
+ def __init__(self, error_message: str) -> None:
21
+ """Invalid Query Exception initialisation.
22
+
23
+ Args:
24
+ error_message (str): _description_
25
+ """
26
+ self.error_message = error_message
27
+
28
+
29
+ class ExternalLibraryException(Exception):
30
+ """
31
+ Custom exception for issues within external libraries.
32
+
33
+ This exception will occur when the processes fail within the
34
+ external libraries (smartnoise-sql, opendp, diffprivlib)
35
+ """
36
+
37
+ def __init__(self, library: str, error_message: str) -> None:
38
+ """External Query Exception initialisation.
39
+
40
+ Args:
41
+ library (str): _description_
42
+ error_message (str): _description_
43
+ """
44
+ self.library = library
45
+ self.error_message = error_message
46
+
47
+
48
+ class UnauthorizedAccessException(Exception):
49
+ """
50
+ Custom exception for unauthorized access:
51
+
52
+ (unknown user, no access to dataset, etc)
53
+ """
54
+
55
+ def __init__(self, error_message: str) -> None:
56
+ self.error_message = error_message
57
+
58
+
59
+ class InternalServerException(Exception):
60
+ """Custom exception for issues within server internal functionalities."""
61
+
62
+ def __init__(self, error_message: str) -> None:
63
+ self.error_message = error_message
64
+
65
+
66
+ KNOWN_EXCEPTIONS: tuple[Type[BaseException], ...] = (
67
+ ExternalLibraryException,
68
+ InternalServerException,
69
+ InvalidQueryException,
70
+ UnauthorizedAccessException,
71
+ WriteConcernError,
72
+ )
73
+
74
+
75
+ # Custom exception handlers
76
+ def add_exception_handlers(app: FastAPI) -> None:
77
+ """
78
+ Translates custom exceptions to JSONResponses.
79
+
80
+ Args:
81
+ app (FastAPI): A fastapi App.
82
+ """
83
+
84
+ @app.exception_handler(InvalidQueryException)
85
+ async def invalid_query_exception_handler(
86
+ _: Request, exc: InvalidQueryException
87
+ ) -> JSONResponse:
88
+ LOG.info(f"InvalidQueryException raised: {exc.error_message}")
89
+ return JSONResponse(
90
+ status_code=status.HTTP_400_BAD_REQUEST,
91
+ content={"InvalidQueryException": exc.error_message},
92
+ )
93
+
94
+ @app.exception_handler(ExternalLibraryException)
95
+ async def external_library_exception_handler(
96
+ _: Request, exc: ExternalLibraryException
97
+ ) -> JSONResponse:
98
+ LOG.info(f"ExternalLibraryException raised: {exc.error_message}")
99
+ return JSONResponse(
100
+ status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
101
+ content={
102
+ "ExternalLibraryException": exc.error_message,
103
+ "library": exc.library,
104
+ },
105
+ )
106
+
107
+ @app.exception_handler(UnauthorizedAccessException)
108
+ async def unauthorized_access_exception_handler(
109
+ _: Request, exc: UnauthorizedAccessException
110
+ ) -> JSONResponse:
111
+ LOG.info(f"UnauthorizedAccessException raised: {exc.error_message}")
112
+ return JSONResponse(
113
+ status_code=status.HTTP_403_FORBIDDEN,
114
+ content={"UnauthorizedAccessException": exc.error_message},
115
+ )
116
+
117
+ @app.exception_handler(InternalServerException)
118
+ async def internal_server_exception_handler(
119
+ _: Request, exc: InternalServerException
120
+ ) -> JSONResponse:
121
+ LOG.info(f"InternalServerException raised: {exc.error_message}")
122
+ return JSONResponse(
123
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
124
+ content={"InternalServerException": INTERNAL_SERVER_ERROR},
125
+ )
@@ -0,0 +1,29 @@
1
+ import logging
2
+ import logging.config
3
+
4
+ LOGGING_CONFIG = {
5
+ "version": 1,
6
+ "disable_existing_loggers": False,
7
+ "formatters": {
8
+ "standard": {
9
+ "format": "%(asctime)s - %(levelname)s - \
10
+ [%(filename)s:%(lineno)s - %(funcName)s()] - %(message)s"
11
+ }
12
+ },
13
+ "handlers": {
14
+ "console": {
15
+ "class": "logging.StreamHandler",
16
+ "formatter": "standard",
17
+ "stream": "ext://sys.stdout",
18
+ "level": "INFO",
19
+ }
20
+ },
21
+ "root": {
22
+ "level": "INFO",
23
+ "handlers": ["console"],
24
+ "propagate": True,
25
+ },
26
+ }
27
+ logging.config.dictConfig(LOGGING_CONFIG)
28
+
29
+ LOG = logging.getLogger("")
File without changes
@@ -0,0 +1,249 @@
1
+ from datetime import datetime
2
+ from typing import Annotated, Any, Dict, List, Literal, Optional, Union
3
+
4
+ from pydantic import BaseModel, Discriminator, Field, Tag, model_validator
5
+
6
+ from lomas_core.models.constants import (
7
+ CARDINALITY_FIELD,
8
+ CATEGORICAL_TYPE_PREFIX,
9
+ DB_TYPE_FIELD,
10
+ TYPE_FIELD,
11
+ MetadataColumnType,
12
+ Precision,
13
+ PrivateDatabaseType,
14
+ )
15
+
16
+ # Dataset of User
17
+ # -----------------------------------------------------------------------------
18
+
19
+
20
+ class DatasetOfUser(BaseModel):
21
+ """BaseModel for informations of a user on a dataset."""
22
+
23
+ dataset_name: str
24
+ initial_epsilon: float
25
+ initial_delta: float
26
+ total_spent_epsilon: float
27
+ total_spent_delta: float
28
+
29
+
30
+ # User
31
+ # -----------------------------------------------------------------------------
32
+
33
+
34
+ class User(BaseModel):
35
+ """BaseModel for a user in a user collection."""
36
+
37
+ user_name: str
38
+ may_query: bool
39
+ datasets_list: List[DatasetOfUser]
40
+
41
+
42
+ class UserCollection(BaseModel):
43
+ """BaseModel for users collection."""
44
+
45
+ users: List[User]
46
+
47
+
48
+ # Dataset Access Data
49
+ # -----------------------------------------------------------------------------
50
+
51
+
52
+ class DSAccess(BaseModel):
53
+ """BaseModel for access info to a private dataset."""
54
+
55
+ database_type: str
56
+
57
+
58
+ class DSPathAccess(DSAccess):
59
+ """BaseModel for a local dataset."""
60
+
61
+ database_type: Literal[PrivateDatabaseType.PATH] # type: ignore
62
+ path: str
63
+
64
+
65
+ class DSS3Access(DSAccess):
66
+ """BaseModel for a dataset on S3."""
67
+
68
+ database_type: Literal[PrivateDatabaseType.S3] # type: ignore
69
+ endpoint_url: str
70
+ bucket: str
71
+ key: str
72
+ access_key_id: Optional[str] = None
73
+ secret_access_key: Optional[str] = None
74
+ credentials_name: str
75
+
76
+
77
+ class DSInfo(BaseModel):
78
+ """BaseModel for a dataset."""
79
+
80
+ dataset_name: str
81
+ dataset_access: Annotated[
82
+ Union[DSPathAccess, DSS3Access], Field(discriminator=DB_TYPE_FIELD)
83
+ ]
84
+ metadata_access: Annotated[
85
+ Union[DSPathAccess, DSS3Access], Field(discriminator=DB_TYPE_FIELD)
86
+ ]
87
+
88
+
89
+ class DatasetsCollection(BaseModel):
90
+ """BaseModel for datasets collection."""
91
+
92
+ datasets: List[DSInfo]
93
+
94
+
95
+ # Metadata
96
+ # -----------------------------------------------------------------------------
97
+
98
+
99
+ class ColumnMetadata(BaseModel):
100
+ """Base model for column metadata."""
101
+
102
+ private_id: bool = False
103
+ nullable: bool = False
104
+ # See issue #323 for checking this and validating.
105
+
106
+ max_partition_length: Optional[Annotated[int, Field(gt=0)]] = None
107
+ max_influenced_partitions: Optional[Annotated[int, Field(gt=0)]] = None
108
+ max_partition_contributions: Optional[Annotated[int, Field(gt=0)]] = None
109
+
110
+
111
+ class StrMetadata(ColumnMetadata):
112
+ """Model for string metadata."""
113
+
114
+ type: Literal[MetadataColumnType.STRING]
115
+
116
+
117
+ class CategoricalColumnMetadata(ColumnMetadata):
118
+ """Model for categorical column metadata."""
119
+
120
+ @model_validator(mode="after")
121
+ def validate_categories(self):
122
+ """Makes sure number of categories matches cardinality."""
123
+ if len(self.categories) != self.cardinality:
124
+ raise ValueError("Number of categories should be equal to cardinality.")
125
+ return self
126
+
127
+
128
+ class StrCategoricalMetadata(CategoricalColumnMetadata):
129
+ """Model for categorical string metadata."""
130
+
131
+ type: Literal[MetadataColumnType.STRING]
132
+ cardinality: int
133
+ categories: List[str]
134
+
135
+
136
+ class BoundedColumnMetadata(ColumnMetadata):
137
+ """Model for columns with bounded data."""
138
+
139
+ @model_validator(mode="after")
140
+ def validate_bounds(self):
141
+ """Validates column bounds."""
142
+ if (
143
+ self.lower is not None
144
+ and self.upper is not None
145
+ and self.lower > self.upper
146
+ ):
147
+ raise ValueError("Lower bound cannot be larger than upper bound.")
148
+
149
+ return self
150
+
151
+
152
+ class IntMetadata(BoundedColumnMetadata):
153
+ """Model for integer column metadata."""
154
+
155
+ type: Literal[MetadataColumnType.INT]
156
+ precision: Precision
157
+ lower: int
158
+ upper: int
159
+
160
+
161
+ class IntCategoricalMetadata(CategoricalColumnMetadata):
162
+ """Model for integer categorical column metadata."""
163
+
164
+ type: Literal[MetadataColumnType.INT]
165
+ precision: Precision
166
+ cardinality: int
167
+ categories: List[int]
168
+
169
+
170
+ class FloatMetadata(BoundedColumnMetadata):
171
+ """Model for float column metadata."""
172
+
173
+ type: Literal[MetadataColumnType.FLOAT]
174
+ precision: Precision
175
+ lower: float
176
+ upper: float
177
+
178
+
179
+ class BooleanMetadata(ColumnMetadata):
180
+ """Model for boolean column metadata."""
181
+
182
+ type: Literal[MetadataColumnType.BOOLEAN]
183
+
184
+
185
+ class DatetimeMetadata(BoundedColumnMetadata):
186
+ """Model for datetime column metadata."""
187
+
188
+ type: Literal[MetadataColumnType.DATETIME]
189
+ lower: datetime
190
+ upper: datetime
191
+
192
+
193
+ def get_column_metadata_discriminator(v: Any) -> str:
194
+ """Discriminator function for determining the type of column metadata.
195
+
196
+ Args:
197
+ v (Any): The unparsed column metadata (either dict or class object)
198
+
199
+ Raises:
200
+ ValueError: If the column type cannot be found.
201
+
202
+ Returns:
203
+ str: The metadata string type.
204
+ """
205
+ if isinstance(v, dict):
206
+ col_type = v.get(TYPE_FIELD)
207
+ else:
208
+ col_type = getattr(v, TYPE_FIELD)
209
+
210
+ if (
211
+ col_type
212
+ in (
213
+ MetadataColumnType.STRING,
214
+ MetadataColumnType.INT,
215
+ )
216
+ ) and (
217
+ ((isinstance(v, dict)) and CARDINALITY_FIELD in v)
218
+ or (hasattr(v, CARDINALITY_FIELD))
219
+ ):
220
+ col_type = f"{CATEGORICAL_TYPE_PREFIX}{col_type}"
221
+
222
+ if not isinstance(col_type, str):
223
+ raise ValueError("Could not find column type.")
224
+
225
+ return col_type
226
+
227
+
228
+ class Metadata(BaseModel):
229
+ """BaseModel for a metadata format."""
230
+
231
+ max_ids: Annotated[int, Field(gt=0)]
232
+ rows: Annotated[int, Field(gt=0)]
233
+ row_privacy: bool
234
+ censor_dims: Optional[bool] = False
235
+ columns: Dict[
236
+ str,
237
+ Annotated[
238
+ Union[
239
+ Annotated[StrMetadata, Tag(MetadataColumnType.STRING)],
240
+ Annotated[StrCategoricalMetadata, Tag(MetadataColumnType.CAT_STRING)],
241
+ Annotated[IntMetadata, Tag(MetadataColumnType.INT)],
242
+ Annotated[IntCategoricalMetadata, Tag(MetadataColumnType.CAT_INT)],
243
+ Annotated[FloatMetadata, Tag(MetadataColumnType.FLOAT)],
244
+ Annotated[BooleanMetadata, Tag(MetadataColumnType.BOOLEAN)],
245
+ Annotated[DatetimeMetadata, Tag(MetadataColumnType.DATETIME)],
246
+ ],
247
+ Discriminator(get_column_metadata_discriminator),
248
+ ],
249
+ ]
@@ -0,0 +1,104 @@
1
+ from typing import Annotated, List, Literal, Union
2
+
3
+ from pydantic import BaseModel, ConfigDict, Field
4
+
5
+ from lomas_core.models.constants import (
6
+ AdminDBType,
7
+ PrivateDatabaseType,
8
+ TimeAttackMethod,
9
+ )
10
+
11
+
12
+ class TimeAttack(BaseModel):
13
+ """BaseModel for configs to prevent timing attacks."""
14
+
15
+ method: TimeAttackMethod
16
+ magnitude: float
17
+
18
+
19
+ class Server(BaseModel):
20
+ """BaseModel for uvicorn server configs."""
21
+
22
+ time_attack: TimeAttack
23
+ host_ip: str
24
+ host_port: int
25
+ log_level: str
26
+ reload: bool
27
+ workers: int
28
+
29
+
30
+ class DBConfig(BaseModel):
31
+ """BaseModel for database type config."""
32
+
33
+
34
+ class YamlDBConfig(DBConfig):
35
+ """BaseModel for dataset store configs in case of a Yaml database."""
36
+
37
+ db_type: Literal[AdminDBType.YAML] # type: ignore
38
+ db_file: str
39
+
40
+
41
+ class MongoDBConfig(DBConfig):
42
+ """BaseModel for dataset store configs in case of a MongoDB database."""
43
+
44
+ db_type: Literal[AdminDBType.MONGODB] # type: ignore
45
+ address: str
46
+ port: int
47
+ username: str
48
+ password: str
49
+ db_name: str
50
+ max_pool_size: int
51
+ min_pool_size: int
52
+ max_connecting: int
53
+
54
+
55
+ class PrivateDBCredentials(BaseModel):
56
+ """BaseModel for private database credentials."""
57
+
58
+
59
+ class S3CredentialsConfig(PrivateDBCredentials):
60
+ """BaseModel for S3 database credentials."""
61
+
62
+ model_config = ConfigDict(extra="allow")
63
+
64
+ db_type: Literal[PrivateDatabaseType.S3] # type: ignore
65
+ credentials_name: str
66
+ access_key_id: str
67
+ secret_access_key: str
68
+
69
+
70
+ class OpenDPConfig(BaseModel):
71
+ """BaseModel for openDP librairy config."""
72
+
73
+ contrib: bool
74
+ floating_point: bool
75
+ honest_but_curious: bool
76
+
77
+
78
+ class DPLibraryConfig(BaseModel):
79
+ """BaseModel for DP librairies config."""
80
+
81
+ opendp: OpenDPConfig
82
+
83
+
84
+ class Config(BaseModel):
85
+ """Server runtime config."""
86
+
87
+ # Develop mode
88
+ develop_mode: bool
89
+
90
+ # Server configs
91
+ server: Server
92
+
93
+ # A limit on the rate which users can submit answers
94
+ submit_limit: float
95
+
96
+ admin_database: Annotated[
97
+ Union[MongoDBConfig, YamlDBConfig], Field(discriminator="db_type")
98
+ ]
99
+
100
+ private_db_credentials: List[Union[S3CredentialsConfig]] = Field(
101
+ ..., discriminator="db_type"
102
+ )
103
+
104
+ dp_libraries: DPLibraryConfig
@@ -0,0 +1,67 @@
1
+ from enum import IntEnum, StrEnum
2
+
3
+ # Field names
4
+ # -----------------------------------------------------------------------------
5
+
6
+ DB_TYPE_FIELD = "database_type"
7
+ TYPE_FIELD = "type"
8
+ CARDINALITY_FIELD = "cardinality"
9
+
10
+
11
+ # Metadata
12
+ # -----------------------------------------------------------------------------
13
+
14
+
15
+ class MetadataColumnType(StrEnum):
16
+ """Column types for metadata."""
17
+
18
+ STRING = "string"
19
+ CAT_STRING = "categorical_string"
20
+ INT = "int"
21
+ CAT_INT = "categorical_int"
22
+ FLOAT = "float"
23
+ BOOLEAN = "boolean"
24
+ DATETIME = "datetime"
25
+
26
+
27
+ CATEGORICAL_TYPE_PREFIX = "categorical_"
28
+
29
+
30
+ class Precision(IntEnum):
31
+ """Precision of integer and float data."""
32
+
33
+ SINGLE = 32
34
+ DOUBLE = 64
35
+
36
+
37
+ # Config / Dataset Connectors
38
+ # -----------------------------------------------------------------------------
39
+
40
+
41
+ class ConfigKeys(StrEnum):
42
+ """Keys of the configuration file."""
43
+
44
+ RUNTIME_ARGS: str = "runtime_args"
45
+ SETTINGS: str = "settings"
46
+
47
+
48
+ class AdminDBType(StrEnum):
49
+ """Types of administration databases."""
50
+
51
+ YAML: str = "yaml"
52
+ MONGODB: str = "mongodb"
53
+
54
+
55
+ class TimeAttackMethod(StrEnum):
56
+ """Possible methods against timing attacks."""
57
+
58
+ JITTER = "jitter"
59
+ STALL = "stall"
60
+
61
+
62
+ # Private Databases
63
+ class PrivateDatabaseType(StrEnum):
64
+ """Type of Private Database for the private data."""
65
+
66
+ PATH = "PATH_DB"
67
+ S3 = "S3_DB"
@@ -0,0 +1,170 @@
1
+ from typing import List, Optional, Union
2
+
3
+ from pydantic import BaseModel, ConfigDict, Field
4
+
5
+ from lomas_core.constants import (
6
+ DPLibraries,
7
+ SSynthGanSynthesizer,
8
+ SSynthMarginalSynthesizer,
9
+ )
10
+ from lomas_core.error_handler import InternalServerException
11
+
12
+
13
+ class LomasRequestModel(BaseModel):
14
+ """Base class for all types of requests to the lomas server.
15
+
16
+ We differentiate between requests and queries:
17
+ - a request does not necessarily require an algorithm
18
+ to be executed on the private dataset (e.g. some cost requests).
19
+ - a query requires executing an algorithm on a private
20
+ dataset (or a potentially a dummy).
21
+ """
22
+
23
+ dataset_name: str
24
+
25
+
26
+ class GetDummyDataset(LomasRequestModel):
27
+ """Model input to get a dummy dataset."""
28
+
29
+ dummy_nb_rows: int = Field(..., gt=0)
30
+ dummy_seed: int
31
+
32
+
33
+ class QueryModel(LomasRequestModel):
34
+ """
35
+ Base input model for any query on a dataset.
36
+
37
+ We differentiate between requests and queries:
38
+ - a request does not necessarily require an algorithm
39
+ to be executed on the private dataset (e.g. some cost requests).
40
+ - a query requires executing an algorithm on a private
41
+ dataset (or a potentially a dummy).
42
+ """
43
+
44
+
45
+ class DummyQueryModel(QueryModel):
46
+ """Input model for a query on a dummy dataset."""
47
+
48
+ dummy_nb_rows: int = Field(..., gt=0)
49
+ dummy_seed: int
50
+
51
+
52
+ # SmartnoiseSQL
53
+ # ----------------------------------------------------------------------------
54
+ class SmartnoiseSQLRequestModel(LomasRequestModel):
55
+ """Base input model for a smarnoise-sql request."""
56
+
57
+ query_str: str
58
+ epsilon: float = Field(..., gt=0)
59
+ delta: float = Field(..., gt=0)
60
+ mechanisms: dict
61
+
62
+
63
+ class SmartnoiseSQLQueryModel(SmartnoiseSQLRequestModel, QueryModel):
64
+ """Base input model for a smartnoise-sql query."""
65
+
66
+ postprocess: bool
67
+
68
+
69
+ class SmartnoiseSQLDummyQueryModel(SmartnoiseSQLQueryModel, DummyQueryModel):
70
+ """Input model for a smartnoise-sql query on a dummy dataset."""
71
+
72
+
73
+ # SmartnoiseSynth
74
+ # ----------------------------------------------------------------------------
75
+ class SmartnoiseSynthRequestModel(LomasRequestModel):
76
+ """Base input model for a SmartnoiseSynth request."""
77
+
78
+ synth_name: Union[SSynthMarginalSynthesizer, SSynthGanSynthesizer]
79
+ epsilon: float = Field(..., gt=0)
80
+ delta: Optional[float] = None
81
+ select_cols: List
82
+ synth_params: dict
83
+ nullable: bool
84
+ constraints: str
85
+
86
+
87
+ class SmartnoiseSynthQueryModel(SmartnoiseSynthRequestModel, QueryModel):
88
+ """Base input model for a smarnoise-synth query."""
89
+
90
+ return_model: bool
91
+ condition: str
92
+ nb_samples: int
93
+
94
+
95
+ class SmartnoiseSynthDummyQueryModel(SmartnoiseSynthQueryModel, DummyQueryModel):
96
+ """Input model for a smarnoise-synth query on a dummy dataset."""
97
+
98
+ # Same as normal query.
99
+ return_model: bool
100
+ condition: str
101
+ nb_samples: int
102
+
103
+
104
+ # OpenDP
105
+ # ----------------------------------------------------------------------------
106
+ class OpenDPRequestModel(LomasRequestModel):
107
+ """Base input model for an opendp request."""
108
+
109
+ model_config = ConfigDict(use_attribute_docstrings=True)
110
+ opendp_json: str
111
+ """Opendp pipeline."""
112
+ fixed_delta: Optional[float] = None
113
+
114
+
115
+ class OpenDPQueryModel(OpenDPRequestModel, QueryModel):
116
+ """Base input model for an opendp query."""
117
+
118
+
119
+ class OpenDPDummyQueryModel(OpenDPRequestModel, DummyQueryModel):
120
+ """Input model for an opendp query on a dummy dataset."""
121
+
122
+
123
+ # DiffPrivLib
124
+ # ----------------------------------------------------------------------------
125
+ class DiffPrivLibRequestModel(LomasRequestModel):
126
+ """Base input model for a diffprivlib request."""
127
+
128
+ diffprivlib_json: str
129
+ feature_columns: list
130
+ target_columns: Optional[list]
131
+ test_size: float = Field(..., gt=0.0, lt=1.0)
132
+ test_train_split_seed: int
133
+ imputer_strategy: str
134
+
135
+
136
+ class DiffPrivLibQueryModel(DiffPrivLibRequestModel, QueryModel):
137
+ """Base input model for a diffprivlib query."""
138
+
139
+
140
+ class DiffPrivLibDummyQueryModel(DiffPrivLibQueryModel, DummyQueryModel):
141
+ """Input model for a DiffPrivLib query on a dummy dataset."""
142
+
143
+
144
+ # Utils
145
+ # ----------------------------------------------------------------------------
146
+
147
+
148
+ def model_input_to_lib(request: LomasRequestModel) -> DPLibraries:
149
+ """Return the type of DP library given a LomasRequestModel.
150
+
151
+ Args:
152
+ request (LomasRequestModel): The user request
153
+
154
+ Raises:
155
+ InternalServerException: If the library type cannot be determined.
156
+
157
+ Returns:
158
+ DPLibraries: The type of library for the request.
159
+ """
160
+ match request:
161
+ case SmartnoiseSQLRequestModel():
162
+ return DPLibraries.SMARTNOISE_SQL
163
+ case SmartnoiseSynthRequestModel():
164
+ return DPLibraries.SMARTNOISE_SYNTH
165
+ case OpenDPRequestModel():
166
+ return DPLibraries.OPENDP
167
+ case DiffPrivLibRequestModel():
168
+ return DPLibraries.DIFFPRIVLIB
169
+ case _:
170
+ raise InternalServerException("Cannot find library type for given model.")
@@ -0,0 +1,171 @@
1
+ from typing import Annotated, Dict, List, Literal, Union
2
+
3
+ import pandas as pd
4
+ from diffprivlib.validation import DiffprivlibMixin
5
+ from pydantic import (
6
+ BaseModel,
7
+ ConfigDict,
8
+ Discriminator,
9
+ PlainSerializer,
10
+ PlainValidator,
11
+ ValidationInfo,
12
+ field_validator,
13
+ )
14
+ from snsynth import Synthesizer
15
+
16
+ from lomas_core.constants import DPLibraries
17
+ from lomas_core.models.utils import (
18
+ dataframe_from_dict,
19
+ dataframe_to_dict,
20
+ deserialize_model,
21
+ serialize_model,
22
+ )
23
+
24
+
25
+ class ResponseModel(BaseModel):
26
+ """Base model for any response from the server."""
27
+
28
+
29
+ class InitialBudgetResponse(ResponseModel):
30
+ """Model for responses to initial budget queries."""
31
+
32
+ initial_epsilon: float
33
+ initial_delta: float
34
+
35
+
36
+ class SpentBudgetResponse(ResponseModel):
37
+ """Model for responses to spent budget queries."""
38
+
39
+ total_spent_epsilon: float
40
+ total_spent_delta: float
41
+
42
+
43
+ class RemainingBudgetResponse(ResponseModel):
44
+ """Model for responses to remaining budget queries."""
45
+
46
+ remaining_epsilon: float
47
+ remaining_delta: float
48
+
49
+
50
+ class DummyDsResponse(ResponseModel):
51
+ """Model for responses to dummy dataset requests."""
52
+
53
+ model_config = ConfigDict(arbitrary_types_allowed=True)
54
+ dtypes: Dict[str, str]
55
+ datetime_columns: List[str]
56
+ dummy_df: Annotated[pd.DataFrame, PlainSerializer(dataframe_to_dict)]
57
+
58
+ @field_validator("dummy_df", mode="before")
59
+ @classmethod
60
+ def deserialize_dummy_df(
61
+ cls, v: pd.DataFrame | dict, info: ValidationInfo
62
+ ) -> pd.DataFrame:
63
+ """Decodes the dict representation of the dummy df with correct types.
64
+
65
+ Only does so if the input value is not already a dataframe.
66
+ Args:
67
+ v (pd.DataFrame | dict): The dataframe to decode.
68
+ info (ValidationInfo): Validation info to access other model fields.
69
+
70
+ Returns:
71
+ pd.DataFrame: The decoded dataframe.
72
+ """
73
+ if isinstance(v, pd.DataFrame):
74
+ return v
75
+
76
+ dtypes = info.data["dtypes"]
77
+ datetime_columns = info.data["datetime_columns"]
78
+ dummy_df = dataframe_from_dict(v)
79
+ dummy_df = dummy_df.astype(dtypes)
80
+ for col in datetime_columns:
81
+ dummy_df[col] = pd.to_datetime(dummy_df[col])
82
+ return dummy_df
83
+
84
+
85
+ class CostResponse(ResponseModel):
86
+ """Model for responses to cost estimation requests."""
87
+
88
+ epsilon: float
89
+ delta: float
90
+
91
+
92
+ # Query Responses
93
+ # -----------------------------------------------------------------------------
94
+
95
+
96
+ # DiffPrivLib
97
+ class DiffPrivLibQueryResult(BaseModel):
98
+ """Model for diffprivlib query result."""
99
+
100
+ model_config = ConfigDict(arbitrary_types_allowed=True)
101
+ res_type: Literal[DPLibraries.DIFFPRIVLIB] = DPLibraries.DIFFPRIVLIB
102
+ score: float
103
+ model: Annotated[
104
+ DiffprivlibMixin,
105
+ PlainSerializer(serialize_model),
106
+ PlainValidator(deserialize_model),
107
+ ]
108
+
109
+
110
+ # SmartnoiseSQL
111
+ class SmartnoiseSQLQueryResult(BaseModel):
112
+ """Type for smartnoise_sql result type."""
113
+
114
+ model_config = ConfigDict(arbitrary_types_allowed=True)
115
+ res_type: Literal[DPLibraries.SMARTNOISE_SQL] = DPLibraries.SMARTNOISE_SQL
116
+ df: Annotated[
117
+ pd.DataFrame,
118
+ PlainSerializer(dataframe_to_dict),
119
+ PlainValidator(dataframe_from_dict),
120
+ ]
121
+
122
+
123
+ # SmartnoiseSynth
124
+ class SmartnoiseSynthModel(BaseModel):
125
+ """Type for smartnoise_synth result when it is a pickled model."""
126
+
127
+ model_config = ConfigDict(arbitrary_types_allowed=True)
128
+ res_type: Literal[DPLibraries.SMARTNOISE_SYNTH] = DPLibraries.SMARTNOISE_SYNTH
129
+ model: Annotated[
130
+ Synthesizer, PlainSerializer(serialize_model), PlainValidator(deserialize_model)
131
+ ]
132
+
133
+
134
+ class SmartnoiseSynthSamples(BaseModel):
135
+ """Type for smartnoise_synth result when it is a dataframe of samples."""
136
+
137
+ model_config = ConfigDict(arbitrary_types_allowed=True)
138
+ res_type: Literal["sn_synth_samples"] = "sn_synth_samples"
139
+ df_samples: Annotated[
140
+ pd.DataFrame,
141
+ PlainSerializer(dataframe_to_dict),
142
+ PlainValidator(dataframe_from_dict),
143
+ ]
144
+
145
+
146
+ # OpenDP
147
+ class OpenDPQueryResult(BaseModel):
148
+ """Type for opendp result."""
149
+
150
+ res_type: Literal[DPLibraries.OPENDP] = DPLibraries.OPENDP
151
+ value: Union[int, float, List[Union[int, float]]]
152
+
153
+
154
+ # Response object
155
+ QueryResultTypeAlias = Union[
156
+ DiffPrivLibQueryResult,
157
+ SmartnoiseSQLQueryResult,
158
+ SmartnoiseSynthModel,
159
+ SmartnoiseSynthSamples,
160
+ OpenDPQueryResult,
161
+ ]
162
+
163
+
164
+ class QueryResponse(CostResponse):
165
+ """Model for responses to queries."""
166
+
167
+ requested_by: str
168
+ result: Annotated[
169
+ QueryResultTypeAlias,
170
+ Discriminator("res_type"),
171
+ ]
@@ -0,0 +1,69 @@
1
+ import pickle
2
+ from base64 import b64decode, b64encode
3
+ from typing import Any
4
+
5
+ import pandas as pd
6
+
7
+ PANDAS_SERIALIZATION_ORIENT = "tight"
8
+
9
+
10
+ def dataframe_to_dict(df: pd.DataFrame) -> dict:
11
+ """Transforms pandas dataframe into a dictionary.
12
+
13
+ Args:
14
+ df (pd.DataFrame): The dataframe to "serialize".
15
+
16
+ Returns:
17
+ dict: The pandas dataframe in dictionary format.
18
+ """
19
+ return df.to_dict(orient=PANDAS_SERIALIZATION_ORIENT)
20
+
21
+
22
+ def dataframe_from_dict(serialized_df: pd.DataFrame | dict) -> pd.DataFrame:
23
+ """Transforms input dict into pandas dataframe.
24
+
25
+ If the input is already a dataframe, it is simply returned unmodified.
26
+
27
+ Args:
28
+ serialized_df (pd.DataFrame | dict): Dataframe in dict format.
29
+ Or pd.Dataframe.
30
+
31
+ Returns:
32
+ pd.DataFrame: The transformed dataframe.
33
+ """
34
+ if isinstance(serialized_df, pd.DataFrame):
35
+ return serialized_df
36
+
37
+ return pd.DataFrame.from_dict(serialized_df, orient=PANDAS_SERIALIZATION_ORIENT)
38
+
39
+
40
+ def serialize_model(model: Any) -> str:
41
+ """
42
+ Serialise a python object into an utf-8 string.
43
+
44
+ Fitted Smartnoise Synth synthesizer or fitted DiffPrivLib pipeline.
45
+
46
+ Args:
47
+ model (Any): An object to serialise
48
+
49
+ Returns:
50
+ str: string of serialised model
51
+ """
52
+ serialized = b64encode(pickle.dumps(model))
53
+ return serialized.decode("utf-8")
54
+
55
+
56
+ def deserialize_model(serialized_model: Any) -> Any:
57
+ """Deserialize a base64 encoded byte string into a python object.
58
+
59
+ Args:
60
+ serialized_model (Any): Encoded python object.
61
+
62
+ Returns:
63
+ Any: Deserialized python object.
64
+ """
65
+ if isinstance(serialized_model, str):
66
+ raw_bytes = b64decode(serialized_model)
67
+ return pickle.loads(raw_bytes)
68
+
69
+ return serialized_model
@@ -0,0 +1,43 @@
1
+ Metadata-Version: 2.1
2
+ Name: lomas-core
3
+ Version: 0.3.4
4
+ Summary: Lomas core.
5
+ Home-page: https://github.com/dscc-admin-ch/lomas/
6
+ Author: Data Science Competence Center, Swiss Federal Statistical Office
7
+ Author-email: dscc@bfs.admin.ch
8
+ License: MIT
9
+ Classifier: Development Status :: 4 - Beta
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: Intended Audience :: Science/Research
12
+ Classifier: License :: OSI Approved :: MIT License
13
+ Classifier: Natural Language :: English
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Topic :: Software Development :: Libraries
17
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
18
+ Classifier: Topic :: Scientific/Engineering
19
+ Classifier: Topic :: Security
20
+ Requires-Python: >=3.11, <3.13
21
+ Description-Content-Type: text/markdown
22
+ Requires-Dist: diffprivlib==0.6.5
23
+ Requires-Dist: diffprivlib_logger>=0.0.3
24
+ Requires-Dist: fastapi>=0.111.1
25
+ Requires-Dist: numpy>=1.26.4
26
+ Requires-Dist: opendp==0.10.0
27
+ Requires-Dist: opendp_logger>=0.3.0
28
+ Requires-Dist: pandas>=2.2.2
29
+ Requires-Dist: pymongo>=4.6.3
30
+ Requires-Dist: scikit-learn>=1.4.2
31
+ Requires-Dist: smartnoise-synth>=1.0.4
32
+ Requires-Dist: smartnoise_synth_logger>=0.0.3
33
+
34
+ <h1 align="center">
35
+ <picture>
36
+ <source media="(prefers-color-scheme: dark)" srcset="https://github.com/dscc-admin-ch/lomas/blob/wip_322_darkmode-logo/images/lomas_logo_darkmode_txt.png" width="300">
37
+ <source media="(prefers-color-scheme: light)" srcset="https://github.com/dscc-admin-ch/lomas/blob/wip_322_darkmode-logo/images/lomas_logo_txt.png" width="300">
38
+ <img alt="This is what is displayed on Pypi" src="https://github.com/dscc-admin-ch/lomas/blob/wip_322_darkmode-logo/images/lomas_logo_txt.png">
39
+ </picture>
40
+ </h1><br>
41
+
42
+ # Core
43
+ See the technical documentation of core: https://dscc-admin-ch.github.io/lomas-docs/core_api.html.
@@ -0,0 +1,19 @@
1
+ README.md
2
+ pyproject.toml
3
+ setup.py
4
+ lomas_core/__init__.py
5
+ lomas_core/constants.py
6
+ lomas_core/error_handler.py
7
+ lomas_core/logger.py
8
+ lomas_core.egg-info/PKG-INFO
9
+ lomas_core.egg-info/SOURCES.txt
10
+ lomas_core.egg-info/dependency_links.txt
11
+ lomas_core.egg-info/requires.txt
12
+ lomas_core.egg-info/top_level.txt
13
+ lomas_core/models/__init__.py
14
+ lomas_core/models/collections.py
15
+ lomas_core/models/config.py
16
+ lomas_core/models/constants.py
17
+ lomas_core/models/requests.py
18
+ lomas_core/models/responses.py
19
+ lomas_core/models/utils.py
@@ -0,0 +1,11 @@
1
+ diffprivlib==0.6.5
2
+ diffprivlib_logger>=0.0.3
3
+ fastapi>=0.111.1
4
+ numpy>=1.26.4
5
+ opendp==0.10.0
6
+ opendp_logger>=0.3.0
7
+ pandas>=2.2.2
8
+ pymongo>=4.6.3
9
+ scikit-learn>=1.4.2
10
+ smartnoise-synth>=1.0.4
11
+ smartnoise_synth_logger>=0.0.3
@@ -0,0 +1 @@
1
+ lomas_core
@@ -0,0 +1,34 @@
1
+ [tool.black]
2
+ line-length = 88
3
+ include = '\.pyi?$'
4
+
5
+ [tool.isort]
6
+ multi_line_output = 3
7
+ include_trailing_comma = true
8
+ force_grid_wrap = 0
9
+ use_parentheses = true
10
+ ensure_newline_before_comments = true
11
+ line_length = 88
12
+ split_on_trailing_comma = true
13
+ src_paths = ["lomas_core"]
14
+
15
+ [tool.flake8]
16
+ max-line-length = 88
17
+
18
+ [tool.mypy]
19
+ disable_error_code = ["import-untyped", "import-not-found", "attr-defined"]
20
+
21
+ [tool.pylint.MASTER]
22
+ max-line-length = 88
23
+
24
+ [tool.pylint."MESSAGES CONTROL"]
25
+ disable = [
26
+ "E0401", # import-error
27
+ "C0114", # missing-module-docstring
28
+ "W1203", # use lazy % formatting in logging functions,
29
+ "R0903", # too-few-public-methods
30
+ ]
31
+
32
+ [tool.pydocstringformatter]
33
+ write = true
34
+ max-line-length = 88
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,49 @@
1
+ import pathlib
2
+
3
+ from setuptools import find_packages, setup
4
+
5
+ here = pathlib.Path(__file__).parent.resolve()
6
+
7
+ this_directory = pathlib.Path(__file__).parent
8
+ long_description = (this_directory / "README.md").read_text()
9
+
10
+
11
+ setup(
12
+ name="lomas-core",
13
+ packages=find_packages(),
14
+ version="0.3.4",
15
+ description="Lomas core.",
16
+ long_description=long_description,
17
+ long_description_content_type="text/markdown",
18
+ url="https://github.com/dscc-admin-ch/lomas/",
19
+ author="Data Science Competence Center, Swiss Federal Statistical Office",
20
+ author_email="dscc@bfs.admin.ch",
21
+ license="MIT",
22
+ classifiers=[
23
+ "Development Status :: 4 - Beta",
24
+ "Intended Audience :: Developers",
25
+ "Intended Audience :: Science/Research",
26
+ "License :: OSI Approved :: MIT License",
27
+ "Natural Language :: English",
28
+ "Programming Language :: Python :: 3.11",
29
+ "Programming Language :: Python :: 3.12",
30
+ "Topic :: Software Development :: Libraries",
31
+ "Topic :: Software Development :: Libraries :: Python Modules",
32
+ "Topic :: Scientific/Engineering",
33
+ "Topic :: Security",
34
+ ],
35
+ python_requires=">=3.11, <3.13",
36
+ install_requires=[
37
+ "diffprivlib==0.6.5",
38
+ "diffprivlib_logger>=0.0.3",
39
+ "fastapi>=0.111.1",
40
+ "numpy>=1.26.4",
41
+ "opendp==0.10.0",
42
+ "opendp_logger>=0.3.0",
43
+ "pandas>=2.2.2",
44
+ "pymongo>=4.6.3",
45
+ "scikit-learn>=1.4.2",
46
+ "smartnoise-synth>=1.0.4",
47
+ "smartnoise_synth_logger>=0.0.3"
48
+ ]
49
+ )