lomas-core 0.3.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lomas_core-0.3.4/PKG-INFO +43 -0
- lomas_core-0.3.4/README.md +10 -0
- lomas_core-0.3.4/lomas_core/__init__.py +0 -0
- lomas_core-0.3.4/lomas_core/constants.py +34 -0
- lomas_core-0.3.4/lomas_core/error_handler.py +125 -0
- lomas_core-0.3.4/lomas_core/logger.py +29 -0
- lomas_core-0.3.4/lomas_core/models/__init__.py +0 -0
- lomas_core-0.3.4/lomas_core/models/collections.py +249 -0
- lomas_core-0.3.4/lomas_core/models/config.py +104 -0
- lomas_core-0.3.4/lomas_core/models/constants.py +67 -0
- lomas_core-0.3.4/lomas_core/models/requests.py +170 -0
- lomas_core-0.3.4/lomas_core/models/responses.py +171 -0
- lomas_core-0.3.4/lomas_core/models/utils.py +69 -0
- lomas_core-0.3.4/lomas_core.egg-info/PKG-INFO +43 -0
- lomas_core-0.3.4/lomas_core.egg-info/SOURCES.txt +19 -0
- lomas_core-0.3.4/lomas_core.egg-info/dependency_links.txt +1 -0
- lomas_core-0.3.4/lomas_core.egg-info/requires.txt +11 -0
- lomas_core-0.3.4/lomas_core.egg-info/top_level.txt +1 -0
- lomas_core-0.3.4/pyproject.toml +34 -0
- lomas_core-0.3.4/setup.cfg +4 -0
- lomas_core-0.3.4/setup.py +49 -0
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: lomas-core
|
|
3
|
+
Version: 0.3.4
|
|
4
|
+
Summary: Lomas core.
|
|
5
|
+
Home-page: https://github.com/dscc-admin-ch/lomas/
|
|
6
|
+
Author: Data Science Competence Center, Swiss Federal Statistical Office
|
|
7
|
+
Author-email: dscc@bfs.admin.ch
|
|
8
|
+
License: MIT
|
|
9
|
+
Classifier: Development Status :: 4 - Beta
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: Intended Audience :: Science/Research
|
|
12
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
13
|
+
Classifier: Natural Language :: English
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
17
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
18
|
+
Classifier: Topic :: Scientific/Engineering
|
|
19
|
+
Classifier: Topic :: Security
|
|
20
|
+
Requires-Python: >=3.11, <3.13
|
|
21
|
+
Description-Content-Type: text/markdown
|
|
22
|
+
Requires-Dist: diffprivlib==0.6.5
|
|
23
|
+
Requires-Dist: diffprivlib_logger>=0.0.3
|
|
24
|
+
Requires-Dist: fastapi>=0.111.1
|
|
25
|
+
Requires-Dist: numpy>=1.26.4
|
|
26
|
+
Requires-Dist: opendp==0.10.0
|
|
27
|
+
Requires-Dist: opendp_logger>=0.3.0
|
|
28
|
+
Requires-Dist: pandas>=2.2.2
|
|
29
|
+
Requires-Dist: pymongo>=4.6.3
|
|
30
|
+
Requires-Dist: scikit-learn>=1.4.2
|
|
31
|
+
Requires-Dist: smartnoise-synth>=1.0.4
|
|
32
|
+
Requires-Dist: smartnoise_synth_logger>=0.0.3
|
|
33
|
+
|
|
34
|
+
<h1 align="center">
|
|
35
|
+
<picture>
|
|
36
|
+
<source media="(prefers-color-scheme: dark)" srcset="https://github.com/dscc-admin-ch/lomas/blob/wip_322_darkmode-logo/images/lomas_logo_darkmode_txt.png" width="300">
|
|
37
|
+
<source media="(prefers-color-scheme: light)" srcset="https://github.com/dscc-admin-ch/lomas/blob/wip_322_darkmode-logo/images/lomas_logo_txt.png" width="300">
|
|
38
|
+
<img alt="This is what is displayed on Pypi" src="https://github.com/dscc-admin-ch/lomas/blob/wip_322_darkmode-logo/images/lomas_logo_txt.png">
|
|
39
|
+
</picture>
|
|
40
|
+
</h1><br>
|
|
41
|
+
|
|
42
|
+
# Core
|
|
43
|
+
See the technical documentation of core: https://dscc-admin-ch.github.io/lomas-docs/core_api.html.
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
<h1 align="center">
|
|
2
|
+
<picture>
|
|
3
|
+
<source media="(prefers-color-scheme: dark)" srcset="https://github.com/dscc-admin-ch/lomas/blob/wip_322_darkmode-logo/images/lomas_logo_darkmode_txt.png" width="300">
|
|
4
|
+
<source media="(prefers-color-scheme: light)" srcset="https://github.com/dscc-admin-ch/lomas/blob/wip_322_darkmode-logo/images/lomas_logo_txt.png" width="300">
|
|
5
|
+
<img alt="This is what is displayed on Pypi" src="https://github.com/dscc-admin-ch/lomas/blob/wip_322_darkmode-logo/images/lomas_logo_txt.png">
|
|
6
|
+
</picture>
|
|
7
|
+
</h1><br>
|
|
8
|
+
|
|
9
|
+
# Core
|
|
10
|
+
See the technical documentation of core: https://dscc-admin-ch.github.io/lomas-docs/core_api.html.
|
|
File without changes
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
from enum import StrEnum
|
|
2
|
+
|
|
3
|
+
# Server error messages
|
|
4
|
+
INTERNAL_SERVER_ERROR = (
|
|
5
|
+
"Internal server error. Please contact the administrator of this service."
|
|
6
|
+
)
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class DPLibraries(StrEnum):
|
|
10
|
+
"""Name of DP Library used in the query."""
|
|
11
|
+
|
|
12
|
+
SMARTNOISE_SQL = "smartnoise_sql"
|
|
13
|
+
SMARTNOISE_SYNTH = "smartnoise_synth"
|
|
14
|
+
OPENDP = "opendp"
|
|
15
|
+
DIFFPRIVLIB = "diffprivlib"
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
# Smartnoise synth
|
|
19
|
+
class SSynthMarginalSynthesizer(StrEnum):
|
|
20
|
+
"""Marginal Synthesizer models for smartnoise synth."""
|
|
21
|
+
|
|
22
|
+
AIM = "aim"
|
|
23
|
+
MWEM = "mwem"
|
|
24
|
+
MST = "mst"
|
|
25
|
+
PAC_SYNTH = "pacsynth"
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class SSynthGanSynthesizer(StrEnum):
|
|
29
|
+
"""GAN Synthesizer models for smartnoise synth."""
|
|
30
|
+
|
|
31
|
+
DP_CTGAN = "dpctgan"
|
|
32
|
+
PATE_CTGAN = "patectgan"
|
|
33
|
+
PATE_GAN = "pategan"
|
|
34
|
+
DP_GAN = "dpgan"
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
from typing import Type
|
|
2
|
+
|
|
3
|
+
from fastapi import FastAPI, Request, status
|
|
4
|
+
from fastapi.responses import JSONResponse
|
|
5
|
+
from pymongo.errors import WriteConcernError
|
|
6
|
+
|
|
7
|
+
from lomas_core.constants import INTERNAL_SERVER_ERROR
|
|
8
|
+
from lomas_core.logger import LOG
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class InvalidQueryException(Exception):
|
|
12
|
+
"""
|
|
13
|
+
Custom exception for invalid queries.
|
|
14
|
+
|
|
15
|
+
For example, this exception will occur when the query:
|
|
16
|
+
- is not an opendp measurement
|
|
17
|
+
- cannot be reconstructed properly (for opendp and diffprivlib)
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
def __init__(self, error_message: str) -> None:
|
|
21
|
+
"""Invalid Query Exception initialisation.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
error_message (str): _description_
|
|
25
|
+
"""
|
|
26
|
+
self.error_message = error_message
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class ExternalLibraryException(Exception):
|
|
30
|
+
"""
|
|
31
|
+
Custom exception for issues within external libraries.
|
|
32
|
+
|
|
33
|
+
This exception will occur when the processes fail within the
|
|
34
|
+
external libraries (smartnoise-sql, opendp, diffprivlib)
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
def __init__(self, library: str, error_message: str) -> None:
|
|
38
|
+
"""External Query Exception initialisation.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
library (str): _description_
|
|
42
|
+
error_message (str): _description_
|
|
43
|
+
"""
|
|
44
|
+
self.library = library
|
|
45
|
+
self.error_message = error_message
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class UnauthorizedAccessException(Exception):
|
|
49
|
+
"""
|
|
50
|
+
Custom exception for unauthorized access:
|
|
51
|
+
|
|
52
|
+
(unknown user, no access to dataset, etc)
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
def __init__(self, error_message: str) -> None:
|
|
56
|
+
self.error_message = error_message
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class InternalServerException(Exception):
|
|
60
|
+
"""Custom exception for issues within server internal functionalities."""
|
|
61
|
+
|
|
62
|
+
def __init__(self, error_message: str) -> None:
|
|
63
|
+
self.error_message = error_message
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
KNOWN_EXCEPTIONS: tuple[Type[BaseException], ...] = (
|
|
67
|
+
ExternalLibraryException,
|
|
68
|
+
InternalServerException,
|
|
69
|
+
InvalidQueryException,
|
|
70
|
+
UnauthorizedAccessException,
|
|
71
|
+
WriteConcernError,
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
# Custom exception handlers
|
|
76
|
+
def add_exception_handlers(app: FastAPI) -> None:
|
|
77
|
+
"""
|
|
78
|
+
Translates custom exceptions to JSONResponses.
|
|
79
|
+
|
|
80
|
+
Args:
|
|
81
|
+
app (FastAPI): A fastapi App.
|
|
82
|
+
"""
|
|
83
|
+
|
|
84
|
+
@app.exception_handler(InvalidQueryException)
|
|
85
|
+
async def invalid_query_exception_handler(
|
|
86
|
+
_: Request, exc: InvalidQueryException
|
|
87
|
+
) -> JSONResponse:
|
|
88
|
+
LOG.info(f"InvalidQueryException raised: {exc.error_message}")
|
|
89
|
+
return JSONResponse(
|
|
90
|
+
status_code=status.HTTP_400_BAD_REQUEST,
|
|
91
|
+
content={"InvalidQueryException": exc.error_message},
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
@app.exception_handler(ExternalLibraryException)
|
|
95
|
+
async def external_library_exception_handler(
|
|
96
|
+
_: Request, exc: ExternalLibraryException
|
|
97
|
+
) -> JSONResponse:
|
|
98
|
+
LOG.info(f"ExternalLibraryException raised: {exc.error_message}")
|
|
99
|
+
return JSONResponse(
|
|
100
|
+
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
|
|
101
|
+
content={
|
|
102
|
+
"ExternalLibraryException": exc.error_message,
|
|
103
|
+
"library": exc.library,
|
|
104
|
+
},
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
@app.exception_handler(UnauthorizedAccessException)
|
|
108
|
+
async def unauthorized_access_exception_handler(
|
|
109
|
+
_: Request, exc: UnauthorizedAccessException
|
|
110
|
+
) -> JSONResponse:
|
|
111
|
+
LOG.info(f"UnauthorizedAccessException raised: {exc.error_message}")
|
|
112
|
+
return JSONResponse(
|
|
113
|
+
status_code=status.HTTP_403_FORBIDDEN,
|
|
114
|
+
content={"UnauthorizedAccessException": exc.error_message},
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
@app.exception_handler(InternalServerException)
|
|
118
|
+
async def internal_server_exception_handler(
|
|
119
|
+
_: Request, exc: InternalServerException
|
|
120
|
+
) -> JSONResponse:
|
|
121
|
+
LOG.info(f"InternalServerException raised: {exc.error_message}")
|
|
122
|
+
return JSONResponse(
|
|
123
|
+
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
124
|
+
content={"InternalServerException": INTERNAL_SERVER_ERROR},
|
|
125
|
+
)
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import logging.config
|
|
3
|
+
|
|
4
|
+
LOGGING_CONFIG = {
|
|
5
|
+
"version": 1,
|
|
6
|
+
"disable_existing_loggers": False,
|
|
7
|
+
"formatters": {
|
|
8
|
+
"standard": {
|
|
9
|
+
"format": "%(asctime)s - %(levelname)s - \
|
|
10
|
+
[%(filename)s:%(lineno)s - %(funcName)s()] - %(message)s"
|
|
11
|
+
}
|
|
12
|
+
},
|
|
13
|
+
"handlers": {
|
|
14
|
+
"console": {
|
|
15
|
+
"class": "logging.StreamHandler",
|
|
16
|
+
"formatter": "standard",
|
|
17
|
+
"stream": "ext://sys.stdout",
|
|
18
|
+
"level": "INFO",
|
|
19
|
+
}
|
|
20
|
+
},
|
|
21
|
+
"root": {
|
|
22
|
+
"level": "INFO",
|
|
23
|
+
"handlers": ["console"],
|
|
24
|
+
"propagate": True,
|
|
25
|
+
},
|
|
26
|
+
}
|
|
27
|
+
logging.config.dictConfig(LOGGING_CONFIG)
|
|
28
|
+
|
|
29
|
+
LOG = logging.getLogger("")
|
|
File without changes
|
|
@@ -0,0 +1,249 @@
|
|
|
1
|
+
from datetime import datetime
|
|
2
|
+
from typing import Annotated, Any, Dict, List, Literal, Optional, Union
|
|
3
|
+
|
|
4
|
+
from pydantic import BaseModel, Discriminator, Field, Tag, model_validator
|
|
5
|
+
|
|
6
|
+
from lomas_core.models.constants import (
|
|
7
|
+
CARDINALITY_FIELD,
|
|
8
|
+
CATEGORICAL_TYPE_PREFIX,
|
|
9
|
+
DB_TYPE_FIELD,
|
|
10
|
+
TYPE_FIELD,
|
|
11
|
+
MetadataColumnType,
|
|
12
|
+
Precision,
|
|
13
|
+
PrivateDatabaseType,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
# Dataset of User
|
|
17
|
+
# -----------------------------------------------------------------------------
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class DatasetOfUser(BaseModel):
|
|
21
|
+
"""BaseModel for informations of a user on a dataset."""
|
|
22
|
+
|
|
23
|
+
dataset_name: str
|
|
24
|
+
initial_epsilon: float
|
|
25
|
+
initial_delta: float
|
|
26
|
+
total_spent_epsilon: float
|
|
27
|
+
total_spent_delta: float
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
# User
|
|
31
|
+
# -----------------------------------------------------------------------------
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class User(BaseModel):
|
|
35
|
+
"""BaseModel for a user in a user collection."""
|
|
36
|
+
|
|
37
|
+
user_name: str
|
|
38
|
+
may_query: bool
|
|
39
|
+
datasets_list: List[DatasetOfUser]
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class UserCollection(BaseModel):
|
|
43
|
+
"""BaseModel for users collection."""
|
|
44
|
+
|
|
45
|
+
users: List[User]
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
# Dataset Access Data
|
|
49
|
+
# -----------------------------------------------------------------------------
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class DSAccess(BaseModel):
|
|
53
|
+
"""BaseModel for access info to a private dataset."""
|
|
54
|
+
|
|
55
|
+
database_type: str
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class DSPathAccess(DSAccess):
|
|
59
|
+
"""BaseModel for a local dataset."""
|
|
60
|
+
|
|
61
|
+
database_type: Literal[PrivateDatabaseType.PATH] # type: ignore
|
|
62
|
+
path: str
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class DSS3Access(DSAccess):
|
|
66
|
+
"""BaseModel for a dataset on S3."""
|
|
67
|
+
|
|
68
|
+
database_type: Literal[PrivateDatabaseType.S3] # type: ignore
|
|
69
|
+
endpoint_url: str
|
|
70
|
+
bucket: str
|
|
71
|
+
key: str
|
|
72
|
+
access_key_id: Optional[str] = None
|
|
73
|
+
secret_access_key: Optional[str] = None
|
|
74
|
+
credentials_name: str
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
class DSInfo(BaseModel):
|
|
78
|
+
"""BaseModel for a dataset."""
|
|
79
|
+
|
|
80
|
+
dataset_name: str
|
|
81
|
+
dataset_access: Annotated[
|
|
82
|
+
Union[DSPathAccess, DSS3Access], Field(discriminator=DB_TYPE_FIELD)
|
|
83
|
+
]
|
|
84
|
+
metadata_access: Annotated[
|
|
85
|
+
Union[DSPathAccess, DSS3Access], Field(discriminator=DB_TYPE_FIELD)
|
|
86
|
+
]
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
class DatasetsCollection(BaseModel):
|
|
90
|
+
"""BaseModel for datasets collection."""
|
|
91
|
+
|
|
92
|
+
datasets: List[DSInfo]
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
# Metadata
|
|
96
|
+
# -----------------------------------------------------------------------------
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
class ColumnMetadata(BaseModel):
|
|
100
|
+
"""Base model for column metadata."""
|
|
101
|
+
|
|
102
|
+
private_id: bool = False
|
|
103
|
+
nullable: bool = False
|
|
104
|
+
# See issue #323 for checking this and validating.
|
|
105
|
+
|
|
106
|
+
max_partition_length: Optional[Annotated[int, Field(gt=0)]] = None
|
|
107
|
+
max_influenced_partitions: Optional[Annotated[int, Field(gt=0)]] = None
|
|
108
|
+
max_partition_contributions: Optional[Annotated[int, Field(gt=0)]] = None
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
class StrMetadata(ColumnMetadata):
|
|
112
|
+
"""Model for string metadata."""
|
|
113
|
+
|
|
114
|
+
type: Literal[MetadataColumnType.STRING]
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
class CategoricalColumnMetadata(ColumnMetadata):
|
|
118
|
+
"""Model for categorical column metadata."""
|
|
119
|
+
|
|
120
|
+
@model_validator(mode="after")
|
|
121
|
+
def validate_categories(self):
|
|
122
|
+
"""Makes sure number of categories matches cardinality."""
|
|
123
|
+
if len(self.categories) != self.cardinality:
|
|
124
|
+
raise ValueError("Number of categories should be equal to cardinality.")
|
|
125
|
+
return self
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
class StrCategoricalMetadata(CategoricalColumnMetadata):
|
|
129
|
+
"""Model for categorical string metadata."""
|
|
130
|
+
|
|
131
|
+
type: Literal[MetadataColumnType.STRING]
|
|
132
|
+
cardinality: int
|
|
133
|
+
categories: List[str]
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
class BoundedColumnMetadata(ColumnMetadata):
|
|
137
|
+
"""Model for columns with bounded data."""
|
|
138
|
+
|
|
139
|
+
@model_validator(mode="after")
|
|
140
|
+
def validate_bounds(self):
|
|
141
|
+
"""Validates column bounds."""
|
|
142
|
+
if (
|
|
143
|
+
self.lower is not None
|
|
144
|
+
and self.upper is not None
|
|
145
|
+
and self.lower > self.upper
|
|
146
|
+
):
|
|
147
|
+
raise ValueError("Lower bound cannot be larger than upper bound.")
|
|
148
|
+
|
|
149
|
+
return self
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
class IntMetadata(BoundedColumnMetadata):
|
|
153
|
+
"""Model for integer column metadata."""
|
|
154
|
+
|
|
155
|
+
type: Literal[MetadataColumnType.INT]
|
|
156
|
+
precision: Precision
|
|
157
|
+
lower: int
|
|
158
|
+
upper: int
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
class IntCategoricalMetadata(CategoricalColumnMetadata):
|
|
162
|
+
"""Model for integer categorical column metadata."""
|
|
163
|
+
|
|
164
|
+
type: Literal[MetadataColumnType.INT]
|
|
165
|
+
precision: Precision
|
|
166
|
+
cardinality: int
|
|
167
|
+
categories: List[int]
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
class FloatMetadata(BoundedColumnMetadata):
|
|
171
|
+
"""Model for float column metadata."""
|
|
172
|
+
|
|
173
|
+
type: Literal[MetadataColumnType.FLOAT]
|
|
174
|
+
precision: Precision
|
|
175
|
+
lower: float
|
|
176
|
+
upper: float
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
class BooleanMetadata(ColumnMetadata):
|
|
180
|
+
"""Model for boolean column metadata."""
|
|
181
|
+
|
|
182
|
+
type: Literal[MetadataColumnType.BOOLEAN]
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
class DatetimeMetadata(BoundedColumnMetadata):
|
|
186
|
+
"""Model for datetime column metadata."""
|
|
187
|
+
|
|
188
|
+
type: Literal[MetadataColumnType.DATETIME]
|
|
189
|
+
lower: datetime
|
|
190
|
+
upper: datetime
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def get_column_metadata_discriminator(v: Any) -> str:
|
|
194
|
+
"""Discriminator function for determining the type of column metadata.
|
|
195
|
+
|
|
196
|
+
Args:
|
|
197
|
+
v (Any): The unparsed column metadata (either dict or class object)
|
|
198
|
+
|
|
199
|
+
Raises:
|
|
200
|
+
ValueError: If the column type cannot be found.
|
|
201
|
+
|
|
202
|
+
Returns:
|
|
203
|
+
str: The metadata string type.
|
|
204
|
+
"""
|
|
205
|
+
if isinstance(v, dict):
|
|
206
|
+
col_type = v.get(TYPE_FIELD)
|
|
207
|
+
else:
|
|
208
|
+
col_type = getattr(v, TYPE_FIELD)
|
|
209
|
+
|
|
210
|
+
if (
|
|
211
|
+
col_type
|
|
212
|
+
in (
|
|
213
|
+
MetadataColumnType.STRING,
|
|
214
|
+
MetadataColumnType.INT,
|
|
215
|
+
)
|
|
216
|
+
) and (
|
|
217
|
+
((isinstance(v, dict)) and CARDINALITY_FIELD in v)
|
|
218
|
+
or (hasattr(v, CARDINALITY_FIELD))
|
|
219
|
+
):
|
|
220
|
+
col_type = f"{CATEGORICAL_TYPE_PREFIX}{col_type}"
|
|
221
|
+
|
|
222
|
+
if not isinstance(col_type, str):
|
|
223
|
+
raise ValueError("Could not find column type.")
|
|
224
|
+
|
|
225
|
+
return col_type
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
class Metadata(BaseModel):
|
|
229
|
+
"""BaseModel for a metadata format."""
|
|
230
|
+
|
|
231
|
+
max_ids: Annotated[int, Field(gt=0)]
|
|
232
|
+
rows: Annotated[int, Field(gt=0)]
|
|
233
|
+
row_privacy: bool
|
|
234
|
+
censor_dims: Optional[bool] = False
|
|
235
|
+
columns: Dict[
|
|
236
|
+
str,
|
|
237
|
+
Annotated[
|
|
238
|
+
Union[
|
|
239
|
+
Annotated[StrMetadata, Tag(MetadataColumnType.STRING)],
|
|
240
|
+
Annotated[StrCategoricalMetadata, Tag(MetadataColumnType.CAT_STRING)],
|
|
241
|
+
Annotated[IntMetadata, Tag(MetadataColumnType.INT)],
|
|
242
|
+
Annotated[IntCategoricalMetadata, Tag(MetadataColumnType.CAT_INT)],
|
|
243
|
+
Annotated[FloatMetadata, Tag(MetadataColumnType.FLOAT)],
|
|
244
|
+
Annotated[BooleanMetadata, Tag(MetadataColumnType.BOOLEAN)],
|
|
245
|
+
Annotated[DatetimeMetadata, Tag(MetadataColumnType.DATETIME)],
|
|
246
|
+
],
|
|
247
|
+
Discriminator(get_column_metadata_discriminator),
|
|
248
|
+
],
|
|
249
|
+
]
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
from typing import Annotated, List, Literal, Union
|
|
2
|
+
|
|
3
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
4
|
+
|
|
5
|
+
from lomas_core.models.constants import (
|
|
6
|
+
AdminDBType,
|
|
7
|
+
PrivateDatabaseType,
|
|
8
|
+
TimeAttackMethod,
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class TimeAttack(BaseModel):
|
|
13
|
+
"""BaseModel for configs to prevent timing attacks."""
|
|
14
|
+
|
|
15
|
+
method: TimeAttackMethod
|
|
16
|
+
magnitude: float
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class Server(BaseModel):
|
|
20
|
+
"""BaseModel for uvicorn server configs."""
|
|
21
|
+
|
|
22
|
+
time_attack: TimeAttack
|
|
23
|
+
host_ip: str
|
|
24
|
+
host_port: int
|
|
25
|
+
log_level: str
|
|
26
|
+
reload: bool
|
|
27
|
+
workers: int
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class DBConfig(BaseModel):
|
|
31
|
+
"""BaseModel for database type config."""
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class YamlDBConfig(DBConfig):
|
|
35
|
+
"""BaseModel for dataset store configs in case of a Yaml database."""
|
|
36
|
+
|
|
37
|
+
db_type: Literal[AdminDBType.YAML] # type: ignore
|
|
38
|
+
db_file: str
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class MongoDBConfig(DBConfig):
|
|
42
|
+
"""BaseModel for dataset store configs in case of a MongoDB database."""
|
|
43
|
+
|
|
44
|
+
db_type: Literal[AdminDBType.MONGODB] # type: ignore
|
|
45
|
+
address: str
|
|
46
|
+
port: int
|
|
47
|
+
username: str
|
|
48
|
+
password: str
|
|
49
|
+
db_name: str
|
|
50
|
+
max_pool_size: int
|
|
51
|
+
min_pool_size: int
|
|
52
|
+
max_connecting: int
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class PrivateDBCredentials(BaseModel):
|
|
56
|
+
"""BaseModel for private database credentials."""
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class S3CredentialsConfig(PrivateDBCredentials):
|
|
60
|
+
"""BaseModel for S3 database credentials."""
|
|
61
|
+
|
|
62
|
+
model_config = ConfigDict(extra="allow")
|
|
63
|
+
|
|
64
|
+
db_type: Literal[PrivateDatabaseType.S3] # type: ignore
|
|
65
|
+
credentials_name: str
|
|
66
|
+
access_key_id: str
|
|
67
|
+
secret_access_key: str
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class OpenDPConfig(BaseModel):
|
|
71
|
+
"""BaseModel for openDP librairy config."""
|
|
72
|
+
|
|
73
|
+
contrib: bool
|
|
74
|
+
floating_point: bool
|
|
75
|
+
honest_but_curious: bool
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class DPLibraryConfig(BaseModel):
|
|
79
|
+
"""BaseModel for DP librairies config."""
|
|
80
|
+
|
|
81
|
+
opendp: OpenDPConfig
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
class Config(BaseModel):
|
|
85
|
+
"""Server runtime config."""
|
|
86
|
+
|
|
87
|
+
# Develop mode
|
|
88
|
+
develop_mode: bool
|
|
89
|
+
|
|
90
|
+
# Server configs
|
|
91
|
+
server: Server
|
|
92
|
+
|
|
93
|
+
# A limit on the rate which users can submit answers
|
|
94
|
+
submit_limit: float
|
|
95
|
+
|
|
96
|
+
admin_database: Annotated[
|
|
97
|
+
Union[MongoDBConfig, YamlDBConfig], Field(discriminator="db_type")
|
|
98
|
+
]
|
|
99
|
+
|
|
100
|
+
private_db_credentials: List[Union[S3CredentialsConfig]] = Field(
|
|
101
|
+
..., discriminator="db_type"
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
dp_libraries: DPLibraryConfig
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
from enum import IntEnum, StrEnum
|
|
2
|
+
|
|
3
|
+
# Field names
|
|
4
|
+
# -----------------------------------------------------------------------------
|
|
5
|
+
|
|
6
|
+
DB_TYPE_FIELD = "database_type"
|
|
7
|
+
TYPE_FIELD = "type"
|
|
8
|
+
CARDINALITY_FIELD = "cardinality"
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
# Metadata
|
|
12
|
+
# -----------------------------------------------------------------------------
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class MetadataColumnType(StrEnum):
|
|
16
|
+
"""Column types for metadata."""
|
|
17
|
+
|
|
18
|
+
STRING = "string"
|
|
19
|
+
CAT_STRING = "categorical_string"
|
|
20
|
+
INT = "int"
|
|
21
|
+
CAT_INT = "categorical_int"
|
|
22
|
+
FLOAT = "float"
|
|
23
|
+
BOOLEAN = "boolean"
|
|
24
|
+
DATETIME = "datetime"
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
CATEGORICAL_TYPE_PREFIX = "categorical_"
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class Precision(IntEnum):
|
|
31
|
+
"""Precision of integer and float data."""
|
|
32
|
+
|
|
33
|
+
SINGLE = 32
|
|
34
|
+
DOUBLE = 64
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
# Config / Dataset Connectors
|
|
38
|
+
# -----------------------------------------------------------------------------
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class ConfigKeys(StrEnum):
|
|
42
|
+
"""Keys of the configuration file."""
|
|
43
|
+
|
|
44
|
+
RUNTIME_ARGS: str = "runtime_args"
|
|
45
|
+
SETTINGS: str = "settings"
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class AdminDBType(StrEnum):
|
|
49
|
+
"""Types of administration databases."""
|
|
50
|
+
|
|
51
|
+
YAML: str = "yaml"
|
|
52
|
+
MONGODB: str = "mongodb"
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class TimeAttackMethod(StrEnum):
|
|
56
|
+
"""Possible methods against timing attacks."""
|
|
57
|
+
|
|
58
|
+
JITTER = "jitter"
|
|
59
|
+
STALL = "stall"
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
# Private Databases
|
|
63
|
+
class PrivateDatabaseType(StrEnum):
|
|
64
|
+
"""Type of Private Database for the private data."""
|
|
65
|
+
|
|
66
|
+
PATH = "PATH_DB"
|
|
67
|
+
S3 = "S3_DB"
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
from typing import List, Optional, Union
|
|
2
|
+
|
|
3
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
4
|
+
|
|
5
|
+
from lomas_core.constants import (
|
|
6
|
+
DPLibraries,
|
|
7
|
+
SSynthGanSynthesizer,
|
|
8
|
+
SSynthMarginalSynthesizer,
|
|
9
|
+
)
|
|
10
|
+
from lomas_core.error_handler import InternalServerException
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class LomasRequestModel(BaseModel):
|
|
14
|
+
"""Base class for all types of requests to the lomas server.
|
|
15
|
+
|
|
16
|
+
We differentiate between requests and queries:
|
|
17
|
+
- a request does not necessarily require an algorithm
|
|
18
|
+
to be executed on the private dataset (e.g. some cost requests).
|
|
19
|
+
- a query requires executing an algorithm on a private
|
|
20
|
+
dataset (or a potentially a dummy).
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
dataset_name: str
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class GetDummyDataset(LomasRequestModel):
|
|
27
|
+
"""Model input to get a dummy dataset."""
|
|
28
|
+
|
|
29
|
+
dummy_nb_rows: int = Field(..., gt=0)
|
|
30
|
+
dummy_seed: int
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class QueryModel(LomasRequestModel):
|
|
34
|
+
"""
|
|
35
|
+
Base input model for any query on a dataset.
|
|
36
|
+
|
|
37
|
+
We differentiate between requests and queries:
|
|
38
|
+
- a request does not necessarily require an algorithm
|
|
39
|
+
to be executed on the private dataset (e.g. some cost requests).
|
|
40
|
+
- a query requires executing an algorithm on a private
|
|
41
|
+
dataset (or a potentially a dummy).
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class DummyQueryModel(QueryModel):
|
|
46
|
+
"""Input model for a query on a dummy dataset."""
|
|
47
|
+
|
|
48
|
+
dummy_nb_rows: int = Field(..., gt=0)
|
|
49
|
+
dummy_seed: int
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
# SmartnoiseSQL
|
|
53
|
+
# ----------------------------------------------------------------------------
|
|
54
|
+
class SmartnoiseSQLRequestModel(LomasRequestModel):
|
|
55
|
+
"""Base input model for a smarnoise-sql request."""
|
|
56
|
+
|
|
57
|
+
query_str: str
|
|
58
|
+
epsilon: float = Field(..., gt=0)
|
|
59
|
+
delta: float = Field(..., gt=0)
|
|
60
|
+
mechanisms: dict
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class SmartnoiseSQLQueryModel(SmartnoiseSQLRequestModel, QueryModel):
|
|
64
|
+
"""Base input model for a smartnoise-sql query."""
|
|
65
|
+
|
|
66
|
+
postprocess: bool
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class SmartnoiseSQLDummyQueryModel(SmartnoiseSQLQueryModel, DummyQueryModel):
|
|
70
|
+
"""Input model for a smartnoise-sql query on a dummy dataset."""
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
# SmartnoiseSynth
|
|
74
|
+
# ----------------------------------------------------------------------------
|
|
75
|
+
class SmartnoiseSynthRequestModel(LomasRequestModel):
|
|
76
|
+
"""Base input model for a SmartnoiseSynth request."""
|
|
77
|
+
|
|
78
|
+
synth_name: Union[SSynthMarginalSynthesizer, SSynthGanSynthesizer]
|
|
79
|
+
epsilon: float = Field(..., gt=0)
|
|
80
|
+
delta: Optional[float] = None
|
|
81
|
+
select_cols: List
|
|
82
|
+
synth_params: dict
|
|
83
|
+
nullable: bool
|
|
84
|
+
constraints: str
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
class SmartnoiseSynthQueryModel(SmartnoiseSynthRequestModel, QueryModel):
|
|
88
|
+
"""Base input model for a smarnoise-synth query."""
|
|
89
|
+
|
|
90
|
+
return_model: bool
|
|
91
|
+
condition: str
|
|
92
|
+
nb_samples: int
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
class SmartnoiseSynthDummyQueryModel(SmartnoiseSynthQueryModel, DummyQueryModel):
|
|
96
|
+
"""Input model for a smarnoise-synth query on a dummy dataset."""
|
|
97
|
+
|
|
98
|
+
# Same as normal query.
|
|
99
|
+
return_model: bool
|
|
100
|
+
condition: str
|
|
101
|
+
nb_samples: int
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
# OpenDP
|
|
105
|
+
# ----------------------------------------------------------------------------
|
|
106
|
+
class OpenDPRequestModel(LomasRequestModel):
|
|
107
|
+
"""Base input model for an opendp request."""
|
|
108
|
+
|
|
109
|
+
model_config = ConfigDict(use_attribute_docstrings=True)
|
|
110
|
+
opendp_json: str
|
|
111
|
+
"""Opendp pipeline."""
|
|
112
|
+
fixed_delta: Optional[float] = None
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
class OpenDPQueryModel(OpenDPRequestModel, QueryModel):
|
|
116
|
+
"""Base input model for an opendp query."""
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
class OpenDPDummyQueryModel(OpenDPRequestModel, DummyQueryModel):
|
|
120
|
+
"""Input model for an opendp query on a dummy dataset."""
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
# DiffPrivLib
|
|
124
|
+
# ----------------------------------------------------------------------------
|
|
125
|
+
class DiffPrivLibRequestModel(LomasRequestModel):
|
|
126
|
+
"""Base input model for a diffprivlib request."""
|
|
127
|
+
|
|
128
|
+
diffprivlib_json: str
|
|
129
|
+
feature_columns: list
|
|
130
|
+
target_columns: Optional[list]
|
|
131
|
+
test_size: float = Field(..., gt=0.0, lt=1.0)
|
|
132
|
+
test_train_split_seed: int
|
|
133
|
+
imputer_strategy: str
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
class DiffPrivLibQueryModel(DiffPrivLibRequestModel, QueryModel):
|
|
137
|
+
"""Base input model for a diffprivlib query."""
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
class DiffPrivLibDummyQueryModel(DiffPrivLibQueryModel, DummyQueryModel):
|
|
141
|
+
"""Input model for a DiffPrivLib query on a dummy dataset."""
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
# Utils
|
|
145
|
+
# ----------------------------------------------------------------------------
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def model_input_to_lib(request: LomasRequestModel) -> DPLibraries:
|
|
149
|
+
"""Return the type of DP library given a LomasRequestModel.
|
|
150
|
+
|
|
151
|
+
Args:
|
|
152
|
+
request (LomasRequestModel): The user request
|
|
153
|
+
|
|
154
|
+
Raises:
|
|
155
|
+
InternalServerException: If the library type cannot be determined.
|
|
156
|
+
|
|
157
|
+
Returns:
|
|
158
|
+
DPLibraries: The type of library for the request.
|
|
159
|
+
"""
|
|
160
|
+
match request:
|
|
161
|
+
case SmartnoiseSQLRequestModel():
|
|
162
|
+
return DPLibraries.SMARTNOISE_SQL
|
|
163
|
+
case SmartnoiseSynthRequestModel():
|
|
164
|
+
return DPLibraries.SMARTNOISE_SYNTH
|
|
165
|
+
case OpenDPRequestModel():
|
|
166
|
+
return DPLibraries.OPENDP
|
|
167
|
+
case DiffPrivLibRequestModel():
|
|
168
|
+
return DPLibraries.DIFFPRIVLIB
|
|
169
|
+
case _:
|
|
170
|
+
raise InternalServerException("Cannot find library type for given model.")
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
from typing import Annotated, Dict, List, Literal, Union
|
|
2
|
+
|
|
3
|
+
import pandas as pd
|
|
4
|
+
from diffprivlib.validation import DiffprivlibMixin
|
|
5
|
+
from pydantic import (
|
|
6
|
+
BaseModel,
|
|
7
|
+
ConfigDict,
|
|
8
|
+
Discriminator,
|
|
9
|
+
PlainSerializer,
|
|
10
|
+
PlainValidator,
|
|
11
|
+
ValidationInfo,
|
|
12
|
+
field_validator,
|
|
13
|
+
)
|
|
14
|
+
from snsynth import Synthesizer
|
|
15
|
+
|
|
16
|
+
from lomas_core.constants import DPLibraries
|
|
17
|
+
from lomas_core.models.utils import (
|
|
18
|
+
dataframe_from_dict,
|
|
19
|
+
dataframe_to_dict,
|
|
20
|
+
deserialize_model,
|
|
21
|
+
serialize_model,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class ResponseModel(BaseModel):
|
|
26
|
+
"""Base model for any response from the server."""
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class InitialBudgetResponse(ResponseModel):
|
|
30
|
+
"""Model for responses to initial budget queries."""
|
|
31
|
+
|
|
32
|
+
initial_epsilon: float
|
|
33
|
+
initial_delta: float
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class SpentBudgetResponse(ResponseModel):
|
|
37
|
+
"""Model for responses to spent budget queries."""
|
|
38
|
+
|
|
39
|
+
total_spent_epsilon: float
|
|
40
|
+
total_spent_delta: float
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class RemainingBudgetResponse(ResponseModel):
|
|
44
|
+
"""Model for responses to remaining budget queries."""
|
|
45
|
+
|
|
46
|
+
remaining_epsilon: float
|
|
47
|
+
remaining_delta: float
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class DummyDsResponse(ResponseModel):
|
|
51
|
+
"""Model for responses to dummy dataset requests."""
|
|
52
|
+
|
|
53
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
54
|
+
dtypes: Dict[str, str]
|
|
55
|
+
datetime_columns: List[str]
|
|
56
|
+
dummy_df: Annotated[pd.DataFrame, PlainSerializer(dataframe_to_dict)]
|
|
57
|
+
|
|
58
|
+
@field_validator("dummy_df", mode="before")
|
|
59
|
+
@classmethod
|
|
60
|
+
def deserialize_dummy_df(
|
|
61
|
+
cls, v: pd.DataFrame | dict, info: ValidationInfo
|
|
62
|
+
) -> pd.DataFrame:
|
|
63
|
+
"""Decodes the dict representation of the dummy df with correct types.
|
|
64
|
+
|
|
65
|
+
Only does so if the input value is not already a dataframe.
|
|
66
|
+
Args:
|
|
67
|
+
v (pd.DataFrame | dict): The dataframe to decode.
|
|
68
|
+
info (ValidationInfo): Validation info to access other model fields.
|
|
69
|
+
|
|
70
|
+
Returns:
|
|
71
|
+
pd.DataFrame: The decoded dataframe.
|
|
72
|
+
"""
|
|
73
|
+
if isinstance(v, pd.DataFrame):
|
|
74
|
+
return v
|
|
75
|
+
|
|
76
|
+
dtypes = info.data["dtypes"]
|
|
77
|
+
datetime_columns = info.data["datetime_columns"]
|
|
78
|
+
dummy_df = dataframe_from_dict(v)
|
|
79
|
+
dummy_df = dummy_df.astype(dtypes)
|
|
80
|
+
for col in datetime_columns:
|
|
81
|
+
dummy_df[col] = pd.to_datetime(dummy_df[col])
|
|
82
|
+
return dummy_df
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class CostResponse(ResponseModel):
|
|
86
|
+
"""Model for responses to cost estimation requests."""
|
|
87
|
+
|
|
88
|
+
epsilon: float
|
|
89
|
+
delta: float
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
# Query Responses
|
|
93
|
+
# -----------------------------------------------------------------------------
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
# DiffPrivLib
|
|
97
|
+
class DiffPrivLibQueryResult(BaseModel):
|
|
98
|
+
"""Model for diffprivlib query result."""
|
|
99
|
+
|
|
100
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
101
|
+
res_type: Literal[DPLibraries.DIFFPRIVLIB] = DPLibraries.DIFFPRIVLIB
|
|
102
|
+
score: float
|
|
103
|
+
model: Annotated[
|
|
104
|
+
DiffprivlibMixin,
|
|
105
|
+
PlainSerializer(serialize_model),
|
|
106
|
+
PlainValidator(deserialize_model),
|
|
107
|
+
]
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
# SmartnoiseSQL
|
|
111
|
+
class SmartnoiseSQLQueryResult(BaseModel):
|
|
112
|
+
"""Type for smartnoise_sql result type."""
|
|
113
|
+
|
|
114
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
115
|
+
res_type: Literal[DPLibraries.SMARTNOISE_SQL] = DPLibraries.SMARTNOISE_SQL
|
|
116
|
+
df: Annotated[
|
|
117
|
+
pd.DataFrame,
|
|
118
|
+
PlainSerializer(dataframe_to_dict),
|
|
119
|
+
PlainValidator(dataframe_from_dict),
|
|
120
|
+
]
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
# SmartnoiseSynth
|
|
124
|
+
class SmartnoiseSynthModel(BaseModel):
|
|
125
|
+
"""Type for smartnoise_synth result when it is a pickled model."""
|
|
126
|
+
|
|
127
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
128
|
+
res_type: Literal[DPLibraries.SMARTNOISE_SYNTH] = DPLibraries.SMARTNOISE_SYNTH
|
|
129
|
+
model: Annotated[
|
|
130
|
+
Synthesizer, PlainSerializer(serialize_model), PlainValidator(deserialize_model)
|
|
131
|
+
]
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
class SmartnoiseSynthSamples(BaseModel):
|
|
135
|
+
"""Type for smartnoise_synth result when it is a dataframe of samples."""
|
|
136
|
+
|
|
137
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
138
|
+
res_type: Literal["sn_synth_samples"] = "sn_synth_samples"
|
|
139
|
+
df_samples: Annotated[
|
|
140
|
+
pd.DataFrame,
|
|
141
|
+
PlainSerializer(dataframe_to_dict),
|
|
142
|
+
PlainValidator(dataframe_from_dict),
|
|
143
|
+
]
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
# OpenDP
|
|
147
|
+
class OpenDPQueryResult(BaseModel):
|
|
148
|
+
"""Type for opendp result."""
|
|
149
|
+
|
|
150
|
+
res_type: Literal[DPLibraries.OPENDP] = DPLibraries.OPENDP
|
|
151
|
+
value: Union[int, float, List[Union[int, float]]]
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
# Response object
|
|
155
|
+
QueryResultTypeAlias = Union[
|
|
156
|
+
DiffPrivLibQueryResult,
|
|
157
|
+
SmartnoiseSQLQueryResult,
|
|
158
|
+
SmartnoiseSynthModel,
|
|
159
|
+
SmartnoiseSynthSamples,
|
|
160
|
+
OpenDPQueryResult,
|
|
161
|
+
]
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
class QueryResponse(CostResponse):
|
|
165
|
+
"""Model for responses to queries."""
|
|
166
|
+
|
|
167
|
+
requested_by: str
|
|
168
|
+
result: Annotated[
|
|
169
|
+
QueryResultTypeAlias,
|
|
170
|
+
Discriminator("res_type"),
|
|
171
|
+
]
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
import pickle
|
|
2
|
+
from base64 import b64decode, b64encode
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
import pandas as pd
|
|
6
|
+
|
|
7
|
+
PANDAS_SERIALIZATION_ORIENT = "tight"
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def dataframe_to_dict(df: pd.DataFrame) -> dict:
|
|
11
|
+
"""Transforms pandas dataframe into a dictionary.
|
|
12
|
+
|
|
13
|
+
Args:
|
|
14
|
+
df (pd.DataFrame): The dataframe to "serialize".
|
|
15
|
+
|
|
16
|
+
Returns:
|
|
17
|
+
dict: The pandas dataframe in dictionary format.
|
|
18
|
+
"""
|
|
19
|
+
return df.to_dict(orient=PANDAS_SERIALIZATION_ORIENT)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def dataframe_from_dict(serialized_df: pd.DataFrame | dict) -> pd.DataFrame:
|
|
23
|
+
"""Transforms input dict into pandas dataframe.
|
|
24
|
+
|
|
25
|
+
If the input is already a dataframe, it is simply returned unmodified.
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
serialized_df (pd.DataFrame | dict): Dataframe in dict format.
|
|
29
|
+
Or pd.Dataframe.
|
|
30
|
+
|
|
31
|
+
Returns:
|
|
32
|
+
pd.DataFrame: The transformed dataframe.
|
|
33
|
+
"""
|
|
34
|
+
if isinstance(serialized_df, pd.DataFrame):
|
|
35
|
+
return serialized_df
|
|
36
|
+
|
|
37
|
+
return pd.DataFrame.from_dict(serialized_df, orient=PANDAS_SERIALIZATION_ORIENT)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def serialize_model(model: Any) -> str:
|
|
41
|
+
"""
|
|
42
|
+
Serialise a python object into an utf-8 string.
|
|
43
|
+
|
|
44
|
+
Fitted Smartnoise Synth synthesizer or fitted DiffPrivLib pipeline.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
model (Any): An object to serialise
|
|
48
|
+
|
|
49
|
+
Returns:
|
|
50
|
+
str: string of serialised model
|
|
51
|
+
"""
|
|
52
|
+
serialized = b64encode(pickle.dumps(model))
|
|
53
|
+
return serialized.decode("utf-8")
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def deserialize_model(serialized_model: Any) -> Any:
|
|
57
|
+
"""Deserialize a base64 encoded byte string into a python object.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
serialized_model (Any): Encoded python object.
|
|
61
|
+
|
|
62
|
+
Returns:
|
|
63
|
+
Any: Deserialized python object.
|
|
64
|
+
"""
|
|
65
|
+
if isinstance(serialized_model, str):
|
|
66
|
+
raw_bytes = b64decode(serialized_model)
|
|
67
|
+
return pickle.loads(raw_bytes)
|
|
68
|
+
|
|
69
|
+
return serialized_model
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: lomas-core
|
|
3
|
+
Version: 0.3.4
|
|
4
|
+
Summary: Lomas core.
|
|
5
|
+
Home-page: https://github.com/dscc-admin-ch/lomas/
|
|
6
|
+
Author: Data Science Competence Center, Swiss Federal Statistical Office
|
|
7
|
+
Author-email: dscc@bfs.admin.ch
|
|
8
|
+
License: MIT
|
|
9
|
+
Classifier: Development Status :: 4 - Beta
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: Intended Audience :: Science/Research
|
|
12
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
13
|
+
Classifier: Natural Language :: English
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
17
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
18
|
+
Classifier: Topic :: Scientific/Engineering
|
|
19
|
+
Classifier: Topic :: Security
|
|
20
|
+
Requires-Python: >=3.11, <3.13
|
|
21
|
+
Description-Content-Type: text/markdown
|
|
22
|
+
Requires-Dist: diffprivlib==0.6.5
|
|
23
|
+
Requires-Dist: diffprivlib_logger>=0.0.3
|
|
24
|
+
Requires-Dist: fastapi>=0.111.1
|
|
25
|
+
Requires-Dist: numpy>=1.26.4
|
|
26
|
+
Requires-Dist: opendp==0.10.0
|
|
27
|
+
Requires-Dist: opendp_logger>=0.3.0
|
|
28
|
+
Requires-Dist: pandas>=2.2.2
|
|
29
|
+
Requires-Dist: pymongo>=4.6.3
|
|
30
|
+
Requires-Dist: scikit-learn>=1.4.2
|
|
31
|
+
Requires-Dist: smartnoise-synth>=1.0.4
|
|
32
|
+
Requires-Dist: smartnoise_synth_logger>=0.0.3
|
|
33
|
+
|
|
34
|
+
<h1 align="center">
|
|
35
|
+
<picture>
|
|
36
|
+
<source media="(prefers-color-scheme: dark)" srcset="https://github.com/dscc-admin-ch/lomas/blob/wip_322_darkmode-logo/images/lomas_logo_darkmode_txt.png" width="300">
|
|
37
|
+
<source media="(prefers-color-scheme: light)" srcset="https://github.com/dscc-admin-ch/lomas/blob/wip_322_darkmode-logo/images/lomas_logo_txt.png" width="300">
|
|
38
|
+
<img alt="This is what is displayed on Pypi" src="https://github.com/dscc-admin-ch/lomas/blob/wip_322_darkmode-logo/images/lomas_logo_txt.png">
|
|
39
|
+
</picture>
|
|
40
|
+
</h1><br>
|
|
41
|
+
|
|
42
|
+
# Core
|
|
43
|
+
See the technical documentation of core: https://dscc-admin-ch.github.io/lomas-docs/core_api.html.
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
pyproject.toml
|
|
3
|
+
setup.py
|
|
4
|
+
lomas_core/__init__.py
|
|
5
|
+
lomas_core/constants.py
|
|
6
|
+
lomas_core/error_handler.py
|
|
7
|
+
lomas_core/logger.py
|
|
8
|
+
lomas_core.egg-info/PKG-INFO
|
|
9
|
+
lomas_core.egg-info/SOURCES.txt
|
|
10
|
+
lomas_core.egg-info/dependency_links.txt
|
|
11
|
+
lomas_core.egg-info/requires.txt
|
|
12
|
+
lomas_core.egg-info/top_level.txt
|
|
13
|
+
lomas_core/models/__init__.py
|
|
14
|
+
lomas_core/models/collections.py
|
|
15
|
+
lomas_core/models/config.py
|
|
16
|
+
lomas_core/models/constants.py
|
|
17
|
+
lomas_core/models/requests.py
|
|
18
|
+
lomas_core/models/responses.py
|
|
19
|
+
lomas_core/models/utils.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
lomas_core
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
[tool.black]
|
|
2
|
+
line-length = 88
|
|
3
|
+
include = '\.pyi?$'
|
|
4
|
+
|
|
5
|
+
[tool.isort]
|
|
6
|
+
multi_line_output = 3
|
|
7
|
+
include_trailing_comma = true
|
|
8
|
+
force_grid_wrap = 0
|
|
9
|
+
use_parentheses = true
|
|
10
|
+
ensure_newline_before_comments = true
|
|
11
|
+
line_length = 88
|
|
12
|
+
split_on_trailing_comma = true
|
|
13
|
+
src_paths = ["lomas_core"]
|
|
14
|
+
|
|
15
|
+
[tool.flake8]
|
|
16
|
+
max-line-length = 88
|
|
17
|
+
|
|
18
|
+
[tool.mypy]
|
|
19
|
+
disable_error_code = ["import-untyped", "import-not-found", "attr-defined"]
|
|
20
|
+
|
|
21
|
+
[tool.pylint.MASTER]
|
|
22
|
+
max-line-length = 88
|
|
23
|
+
|
|
24
|
+
[tool.pylint."MESSAGES CONTROL"]
|
|
25
|
+
disable = [
|
|
26
|
+
"E0401", # import-error
|
|
27
|
+
"C0114", # missing-module-docstring
|
|
28
|
+
"W1203", # use lazy % formatting in logging functions,
|
|
29
|
+
"R0903", # too-few-public-methods
|
|
30
|
+
]
|
|
31
|
+
|
|
32
|
+
[tool.pydocstringformatter]
|
|
33
|
+
write = true
|
|
34
|
+
max-line-length = 88
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
import pathlib
|
|
2
|
+
|
|
3
|
+
from setuptools import find_packages, setup
|
|
4
|
+
|
|
5
|
+
here = pathlib.Path(__file__).parent.resolve()
|
|
6
|
+
|
|
7
|
+
this_directory = pathlib.Path(__file__).parent
|
|
8
|
+
long_description = (this_directory / "README.md").read_text()
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
setup(
|
|
12
|
+
name="lomas-core",
|
|
13
|
+
packages=find_packages(),
|
|
14
|
+
version="0.3.4",
|
|
15
|
+
description="Lomas core.",
|
|
16
|
+
long_description=long_description,
|
|
17
|
+
long_description_content_type="text/markdown",
|
|
18
|
+
url="https://github.com/dscc-admin-ch/lomas/",
|
|
19
|
+
author="Data Science Competence Center, Swiss Federal Statistical Office",
|
|
20
|
+
author_email="dscc@bfs.admin.ch",
|
|
21
|
+
license="MIT",
|
|
22
|
+
classifiers=[
|
|
23
|
+
"Development Status :: 4 - Beta",
|
|
24
|
+
"Intended Audience :: Developers",
|
|
25
|
+
"Intended Audience :: Science/Research",
|
|
26
|
+
"License :: OSI Approved :: MIT License",
|
|
27
|
+
"Natural Language :: English",
|
|
28
|
+
"Programming Language :: Python :: 3.11",
|
|
29
|
+
"Programming Language :: Python :: 3.12",
|
|
30
|
+
"Topic :: Software Development :: Libraries",
|
|
31
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
32
|
+
"Topic :: Scientific/Engineering",
|
|
33
|
+
"Topic :: Security",
|
|
34
|
+
],
|
|
35
|
+
python_requires=">=3.11, <3.13",
|
|
36
|
+
install_requires=[
|
|
37
|
+
"diffprivlib==0.6.5",
|
|
38
|
+
"diffprivlib_logger>=0.0.3",
|
|
39
|
+
"fastapi>=0.111.1",
|
|
40
|
+
"numpy>=1.26.4",
|
|
41
|
+
"opendp==0.10.0",
|
|
42
|
+
"opendp_logger>=0.3.0",
|
|
43
|
+
"pandas>=2.2.2",
|
|
44
|
+
"pymongo>=4.6.3",
|
|
45
|
+
"scikit-learn>=1.4.2",
|
|
46
|
+
"smartnoise-synth>=1.0.4",
|
|
47
|
+
"smartnoise_synth_logger>=0.0.3"
|
|
48
|
+
]
|
|
49
|
+
)
|