kvk-connect 0.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kvk_connect/__init__.py +11 -0
- kvk_connect/api/__init__.py +4 -0
- kvk_connect/api/client.py +183 -0
- kvk_connect/api/endpoints.py +24 -0
- kvk_connect/api/session.py +34 -0
- kvk_connect/cli/main.py +26 -0
- kvk_connect/db/__init__.py +0 -0
- kvk_connect/db/basisprofiel_reader.py +67 -0
- kvk_connect/db/basisprofiel_writer.py +73 -0
- kvk_connect/db/init.py +25 -0
- kvk_connect/db/kvkvestigingen_reader.py +41 -0
- kvk_connect/db/kvkvestigingen_writer.py +73 -0
- kvk_connect/db/signaal_reader.py +23 -0
- kvk_connect/db/signaal_writer.py +73 -0
- kvk_connect/db/vestigingenprofiel_reader.py +66 -0
- kvk_connect/db/vestigingsprofiel_writer.py +92 -0
- kvk_connect/logging_config.py +27 -0
- kvk_connect/mappers/__init__.py +1 -0
- kvk_connect/mappers/kvk_record_mapper.py +100 -0
- kvk_connect/mappers/map_mutatie_abonnement_api_to_mutatieabonnement.py +11 -0
- kvk_connect/mappers/map_vestigingen_api_to_vestigingsnummers.py +14 -0
- kvk_connect/mappers/map_vestigingsprofiel_api_to_vestigingsprofiel_domain.py +41 -0
- kvk_connect/models/__init__.py +0 -0
- kvk_connect/models/api/__init__.py +0 -0
- kvk_connect/models/api/abonnementen_api.py +42 -0
- kvk_connect/models/api/basisprofiel_api.py +233 -0
- kvk_connect/models/api/mutatie_abonnementen_api.py +40 -0
- kvk_connect/models/api/mutatiesignalen_api.py +44 -0
- kvk_connect/models/api/vestigingen_api.py +73 -0
- kvk_connect/models/api/vestigingsprofiel_api.py +71 -0
- kvk_connect/models/domain/__init__.py +6 -0
- kvk_connect/models/domain/basisprofiel.py +65 -0
- kvk_connect/models/domain/kvkvestigingsnummersdomain.py +28 -0
- kvk_connect/models/domain/mutatie_abonnement.py +20 -0
- kvk_connect/models/domain/vestigingsadresdomain.py +62 -0
- kvk_connect/models/domain/vestigingsadressendomain.py +48 -0
- kvk_connect/models/domain/vestigingsprofiel_domain.py +58 -0
- kvk_connect/models/orm/base.py +5 -0
- kvk_connect/models/orm/basisprofiel_orm.py +52 -0
- kvk_connect/models/orm/kvkvestigingen_orm.py +53 -0
- kvk_connect/models/orm/signaal_orm.py +40 -0
- kvk_connect/models/orm/vestigingsprofiel_orm.py +58 -0
- kvk_connect/services/__init__.py +4 -0
- kvk_connect/services/record_service.py +66 -0
- kvk_connect/utils/__init__.py +5 -0
- kvk_connect/utils/env.py +16 -0
- kvk_connect/utils/formatting.py +11 -0
- kvk_connect/utils/rate_limit.py +21 -0
- kvk_connect/utils/tools.py +131 -0
- kvk_connect-0.1.6.dist-info/METADATA +352 -0
- kvk_connect-0.1.6.dist-info/RECORD +52 -0
- kvk_connect-0.1.6.dist-info/WHEEL +4 -0
kvk_connect/__init__.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
"""KVK Connect - Python client for KVK API."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from kvk_connect.api.client import KVKApiClient
|
|
6
|
+
from kvk_connect.services.record_service import KVKRecordService
|
|
7
|
+
|
|
8
|
+
__all__ = [
|
|
9
|
+
"KVKApiClient",
|
|
10
|
+
"KVKRecordService",
|
|
11
|
+
]
|
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import datetime
|
|
4
|
+
import logging
|
|
5
|
+
|
|
6
|
+
import requests
|
|
7
|
+
from requests import Response
|
|
8
|
+
|
|
9
|
+
from ..models.api.basisprofiel_api import BasisProfielAPI
|
|
10
|
+
from ..models.api.mutatiesignalen_api import MutatiesAPI
|
|
11
|
+
from ..models.api.vestigingen_api import VestigingenAPI
|
|
12
|
+
from ..models.api.vestigingsprofiel_api import VestigingsProfielAPI
|
|
13
|
+
from ..utils.rate_limit import global_rate_limit
|
|
14
|
+
from . import endpoints
|
|
15
|
+
from .session import create_session_with_retries
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class KVKApiClient:
|
|
21
|
+
def __init__(self, api_key: str, base_url: str = endpoints.DEFAULT_BASE_URL):
|
|
22
|
+
self.session = create_session_with_retries() # requests.Session()
|
|
23
|
+
self.session.headers.update({"apikey": api_key})
|
|
24
|
+
self.base_url = base_url
|
|
25
|
+
self.timeout = 600
|
|
26
|
+
|
|
27
|
+
def close(self): # noqa: D102
|
|
28
|
+
self.session.close()
|
|
29
|
+
|
|
30
|
+
@staticmethod
|
|
31
|
+
def _get_error_payload(resp: Response) -> str:
|
|
32
|
+
"""Extract error payload from response as string for logging."""
|
|
33
|
+
try:
|
|
34
|
+
error_data = resp.json()
|
|
35
|
+
return str(error_data)
|
|
36
|
+
except (ValueError, requests.exceptions.JSONDecodeError):
|
|
37
|
+
return resp.text if resp.text else "No error details available"
|
|
38
|
+
|
|
39
|
+
@global_rate_limit()
|
|
40
|
+
def get_mutatie_signaal_raw(self, abonnement_id: str, signaal_id: str) -> dict | None:
|
|
41
|
+
"""Get raw mutatie signaal data from KVK API.
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
abonnement_id (str): Abonnement ID.
|
|
45
|
+
signaal_id (str): Signaal ID.
|
|
46
|
+
|
|
47
|
+
Return: Originele JSON of None bij fout.
|
|
48
|
+
"""
|
|
49
|
+
url = endpoints.mutatieservice_signaal(abonnement_id, signaal_id)
|
|
50
|
+
try:
|
|
51
|
+
resp = self.session.get(url, timeout=self.timeout)
|
|
52
|
+
resp.raise_for_status()
|
|
53
|
+
return resp.json()
|
|
54
|
+
except requests.HTTPError as e:
|
|
55
|
+
logger.warning("KVK API error for nummer %s: %s", abonnement_id, e)
|
|
56
|
+
logger.warning("Mogelijke error: %s", self._get_error_payload(resp))
|
|
57
|
+
return None
|
|
58
|
+
|
|
59
|
+
@global_rate_limit()
|
|
60
|
+
def get_mutaties_raw(
|
|
61
|
+
self, abonnement_id: str, from_time: datetime.datetime, to_time: datetime.datetime, page: int, size: int
|
|
62
|
+
) -> dict | None:
|
|
63
|
+
"""Get raw mutaties data from KVK API.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
abonnement_id (str): Abonnement ID.
|
|
67
|
+
from_time (datetime): Start datetime for mutaties.
|
|
68
|
+
to_time (datetime): End datetime for mutaties.
|
|
69
|
+
page (int): Page number.
|
|
70
|
+
size (int): Number of items per page.
|
|
71
|
+
|
|
72
|
+
Return: Originele JSON of None bij fout.
|
|
73
|
+
"""
|
|
74
|
+
url = endpoints.mutatieservice(abonnement_id)
|
|
75
|
+
try:
|
|
76
|
+
params = {
|
|
77
|
+
"vanaf": from_time.strftime("%Y-%m-%dT%H:%M:%SZ"),
|
|
78
|
+
"tot": to_time.strftime("%Y-%m-%dT%H:%M:%SZ"),
|
|
79
|
+
"pagina": str(page),
|
|
80
|
+
"aantal": str(size),
|
|
81
|
+
}
|
|
82
|
+
resp = self.session.get(url, params=params, timeout=self.timeout)
|
|
83
|
+
resp.raise_for_status()
|
|
84
|
+
return resp.json()
|
|
85
|
+
except requests.HTTPError as e:
|
|
86
|
+
logger.warning("KVK API error for nummer %s: %s", abonnement_id, e)
|
|
87
|
+
logger.warning("Mogelijke error: %s", self._get_error_payload(resp))
|
|
88
|
+
return None
|
|
89
|
+
|
|
90
|
+
def get_mutaties(
|
|
91
|
+
self, abonnement_id: str, from_time: datetime.datetime, to_time: datetime.datetime, page: int, size: int
|
|
92
|
+
) -> MutatiesAPI | None:
|
|
93
|
+
"""Get mutaties from KVK API in domein model."""
|
|
94
|
+
data = self.get_mutaties_raw(abonnement_id, from_time, to_time, page, size)
|
|
95
|
+
return None if data is None else MutatiesAPI.from_dict(data)
|
|
96
|
+
|
|
97
|
+
@global_rate_limit()
|
|
98
|
+
def get_basisinformatie_raw(self, kvk_nummer: str, geo_data: bool = True) -> dict | None:
|
|
99
|
+
"""Get raw basisinformatie data from KVK API.
|
|
100
|
+
|
|
101
|
+
Args:
|
|
102
|
+
kvk_nummer (str): KVK nummer.
|
|
103
|
+
geo_data (bool): Include geo data or not.
|
|
104
|
+
|
|
105
|
+
Return: Originele JSON of None bij fout.
|
|
106
|
+
"""
|
|
107
|
+
url = endpoints.basisprofiel(kvk_nummer)
|
|
108
|
+
logger.debug("KVK API url: %s", url)
|
|
109
|
+
try:
|
|
110
|
+
resp = self.session.get(url, params={"geoData": geo_data}, timeout=self.timeout)
|
|
111
|
+
resp.raise_for_status()
|
|
112
|
+
logger.debug(
|
|
113
|
+
"KVK Basisinformatie Raw response for kvk nummer %s: %s, with url: %s", kvk_nummer, resp.json(), url
|
|
114
|
+
)
|
|
115
|
+
return resp.json()
|
|
116
|
+
except requests.HTTPError as e:
|
|
117
|
+
logger.warning("KVK API error for nummer %s: %s", kvk_nummer, e)
|
|
118
|
+
logger.warning("Mogelijke error: %s", self._get_error_payload(resp))
|
|
119
|
+
return None
|
|
120
|
+
|
|
121
|
+
def get_basisinformatie(self, kvk_nummer: str, geo_data: bool = True) -> BasisProfielAPI | None:
|
|
122
|
+
"""Get basisinformatie from KVK API in domein model."""
|
|
123
|
+
data = self.get_basisinformatie_raw(kvk_nummer, geo_data)
|
|
124
|
+
return None if data is None else BasisProfielAPI.from_dict(data)
|
|
125
|
+
|
|
126
|
+
@global_rate_limit()
|
|
127
|
+
def get_vestigingen_raw(self, kvk_nummer: str) -> dict | None:
|
|
128
|
+
"""Get raw vestigingen data from KVK API.
|
|
129
|
+
|
|
130
|
+
Args:
|
|
131
|
+
kvk_nummer (str): KVK nummer.
|
|
132
|
+
geo_data (bool): Include geo data or not.
|
|
133
|
+
|
|
134
|
+
Return: Originele JSON of None bij fout.
|
|
135
|
+
"""
|
|
136
|
+
url = endpoints.vestigingen(kvk_nummer)
|
|
137
|
+
try:
|
|
138
|
+
resp = self.session.get(url, timeout=60)
|
|
139
|
+
resp.raise_for_status()
|
|
140
|
+
logger.debug(
|
|
141
|
+
"KVK Vestigingen Raw response for kvk nummer %s: %s, with url: %s", kvk_nummer, resp.json(), url
|
|
142
|
+
)
|
|
143
|
+
return resp.json()
|
|
144
|
+
except requests.HTTPError as e:
|
|
145
|
+
logger.warning("KVK API error for nummer %s: %s", kvk_nummer, e)
|
|
146
|
+
logger.warning("Mogelijke error: %s", self._get_error_payload(resp))
|
|
147
|
+
return None
|
|
148
|
+
|
|
149
|
+
def get_vestigingen(self, kvk_nummer: str) -> VestigingenAPI | None:
|
|
150
|
+
"""Get vestigingen from KVK API in domein model."""
|
|
151
|
+
data = self.get_vestigingen_raw(kvk_nummer)
|
|
152
|
+
return None if data is None else VestigingenAPI.from_dict(data)
|
|
153
|
+
|
|
154
|
+
@global_rate_limit()
|
|
155
|
+
def get_vestigingsprofiel_raw(self, vestigingsnummer: str, geo_data: bool = True) -> dict | None:
|
|
156
|
+
"""Get raw vestigingsprofiel data from KVK API.
|
|
157
|
+
|
|
158
|
+
Args:
|
|
159
|
+
vestigingsnummer (str): Vestigingsnummer.
|
|
160
|
+
geo_data (bool): Include geo data or not.
|
|
161
|
+
|
|
162
|
+
Return: Originele JSON of None bij fout.
|
|
163
|
+
"""
|
|
164
|
+
url = endpoints.vestigingsprofiel(vestigingsnummer)
|
|
165
|
+
try:
|
|
166
|
+
resp = self.session.get(url, params={"geoData": geo_data}, timeout=self.timeout)
|
|
167
|
+
resp.raise_for_status()
|
|
168
|
+
logger.debug(
|
|
169
|
+
"KVK VestigingenProfiel Raw response for vestigingen nummer %s: %s, with url: %s",
|
|
170
|
+
vestigingsnummer,
|
|
171
|
+
resp.json(),
|
|
172
|
+
url,
|
|
173
|
+
)
|
|
174
|
+
return resp.json()
|
|
175
|
+
except requests.HTTPError as e:
|
|
176
|
+
logger.warning("KVK API error for nummer %s: %s", vestigingsnummer, e)
|
|
177
|
+
logger.warning("Mogelijke error: %s", self._get_error_payload(resp))
|
|
178
|
+
return None
|
|
179
|
+
|
|
180
|
+
def get_vestigingsprofiel(self, vestigingsnummer: str, geo_data: bool = True) -> VestigingsProfielAPI | None:
|
|
181
|
+
"""Get vestigingsprofiel from KVK API in domein model."""
|
|
182
|
+
data = self.get_vestigingsprofiel_raw(vestigingsnummer, geo_data)
|
|
183
|
+
return None if data is None else VestigingsProfielAPI.from_dict(data)
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# ruff: noqa: D103
|
|
2
|
+
import os
|
|
3
|
+
|
|
4
|
+
DEFAULT_BASE_URL = os.getenv("KVK_BASE_URL", "https://api.kvk.nl/api/v1")
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def basisprofiel(kvk_nummer: str) -> str:
|
|
8
|
+
return f"{DEFAULT_BASE_URL}/basisprofielen/{kvk_nummer}"
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def vestigingen(kvk_nummer: str) -> str:
|
|
12
|
+
return f"{DEFAULT_BASE_URL}/basisprofielen/{kvk_nummer}/vestigingen"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def vestigingsprofiel(vestigingsnummer: str) -> str:
|
|
16
|
+
return f"{DEFAULT_BASE_URL}/vestigingsprofielen/{vestigingsnummer}"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def mutatieservice(abonnement_id: str) -> str:
|
|
20
|
+
return f"{DEFAULT_BASE_URL}/abonnementen/{abonnement_id}"
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def mutatieservice_signaal(abonnement_id: str, signaal_id: str) -> str:
|
|
24
|
+
return f"{DEFAULT_BASE_URL}/abonnementen/{abonnement_id}/signalen/{signaal_id}"
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
|
|
3
|
+
from requests import Session
|
|
4
|
+
from requests.adapters import HTTPAdapter
|
|
5
|
+
from urllib3.util.retry import Retry
|
|
6
|
+
|
|
7
|
+
logger = logging.getLogger(__name__)
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def create_session_with_retries(
|
|
11
|
+
retries: int = 3, backoff_factor: float = 1.0, status_forcelist: tuple[int, ...] = (429, 500, 502, 503, 504)
|
|
12
|
+
) -> Session:
|
|
13
|
+
"""Maakt een requests.Session met automatische retry-logica."""
|
|
14
|
+
session = Session()
|
|
15
|
+
retry_strategy = Retry(
|
|
16
|
+
total=retries,
|
|
17
|
+
backoff_factor=backoff_factor,
|
|
18
|
+
status_forcelist=status_forcelist,
|
|
19
|
+
allowed_methods=["GET", "POST", "PUT", "DELETE"],
|
|
20
|
+
)
|
|
21
|
+
adapter = HTTPAdapter(max_retries=retry_strategy)
|
|
22
|
+
session.mount("http://", adapter)
|
|
23
|
+
session.mount("https://", adapter)
|
|
24
|
+
|
|
25
|
+
# logging.getLogger("urllib3.util.retry").setLevel(logging.DEBUG)
|
|
26
|
+
# logging.getLogger("urllib3.connectionpool").setLevel(logging.DEBUG)
|
|
27
|
+
logger.debug(
|
|
28
|
+
"Retry strategy configured: total=%d, backoff_factor=%f, status_forcelist=%s",
|
|
29
|
+
retries,
|
|
30
|
+
backoff_factor,
|
|
31
|
+
status_forcelist,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
return session
|
kvk_connect/cli/main.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
|
|
5
|
+
from kvk_connect import KVKApiClient
|
|
6
|
+
from kvk_connect.utils import get_env
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def run():
|
|
10
|
+
"""Start de CLI tool voor KVK Connect."""
|
|
11
|
+
|
|
12
|
+
parser = argparse.ArgumentParser(prog="kvk-connect")
|
|
13
|
+
parser.add_argument("--kvk", help="KVK number", required=True)
|
|
14
|
+
parser.add_argument("--geo", action="store_true", default=True)
|
|
15
|
+
args = parser.parse_args()
|
|
16
|
+
|
|
17
|
+
api_key = get_env("KVK_API_KEY_PROD", required=True) or "NO_KEY_FOUND"
|
|
18
|
+
client = KVKApiClient(api_key)
|
|
19
|
+
try:
|
|
20
|
+
basis = client.get_basisinformatie(args.kvk, geo_data=args.geo)
|
|
21
|
+
if not basis:
|
|
22
|
+
print("No data")
|
|
23
|
+
return
|
|
24
|
+
print(basis.to_dict())
|
|
25
|
+
finally:
|
|
26
|
+
client.close()
|
|
File without changes
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
import random
|
|
2
|
+
|
|
3
|
+
from sqlalchemy import func, select
|
|
4
|
+
from sqlalchemy.engine import Engine
|
|
5
|
+
from sqlalchemy.orm import Session
|
|
6
|
+
|
|
7
|
+
from kvk_connect.models.orm.basisprofiel_orm import BasisProfielORM
|
|
8
|
+
from kvk_connect.models.orm.signaal_orm import SignaalORM
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class BasisProfielReader:
|
|
12
|
+
def __init__(self, engine: Engine):
|
|
13
|
+
self.engine = engine
|
|
14
|
+
|
|
15
|
+
def get_missing_kvk_nummers(self, limit: int = 50) -> list[str]:
|
|
16
|
+
"""Retourneert random sample van KVK nummers die wel in signalen staan maar nog niet in basisprofielen.
|
|
17
|
+
|
|
18
|
+
Hiermee halen we kvk nummers op die wel uit signalen komen, maar mogelijk nog niet bekend zijn.
|
|
19
|
+
Hierdoor beperking op aantal op te halen nummers per keer (limit), zodat we langzaam over tijd inlopen.
|
|
20
|
+
"""
|
|
21
|
+
fetch_size = limit * 5
|
|
22
|
+
with Session(self.engine) as session:
|
|
23
|
+
stmt = (
|
|
24
|
+
select(SignaalORM.kvknummer)
|
|
25
|
+
.outerjoin(BasisProfielORM, SignaalORM.kvknummer == BasisProfielORM.kvk_nummer)
|
|
26
|
+
.where(BasisProfielORM.kvk_nummer.is_(None))
|
|
27
|
+
.distinct()
|
|
28
|
+
.limit(fetch_size) # maximaal limit nieuwe per keer ophalen
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
result = session.execute(stmt).scalars().all()
|
|
32
|
+
all_kvk_nrs = list(result)
|
|
33
|
+
|
|
34
|
+
# Random sample uit de opgehaalde resultaten
|
|
35
|
+
return random.sample(all_kvk_nrs, min(limit, len(all_kvk_nrs)))
|
|
36
|
+
|
|
37
|
+
def get_missing_kvk_nummers_count(self) -> int:
|
|
38
|
+
"""Retourneert het totaal aantal KVK nummers die wel in signalen staan maar nog niet in basisprofielen."""
|
|
39
|
+
with Session(self.engine) as session:
|
|
40
|
+
stmt = (
|
|
41
|
+
select(func.count(func.distinct(SignaalORM.kvknummer)))
|
|
42
|
+
.outerjoin(BasisProfielORM, SignaalORM.kvknummer == BasisProfielORM.kvk_nummer)
|
|
43
|
+
.where(BasisProfielORM.kvk_nummer.is_(None))
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
result = session.execute(stmt).scalar()
|
|
47
|
+
return result or 0
|
|
48
|
+
|
|
49
|
+
def get_outdated_kvk_nummers(self) -> list[str]:
|
|
50
|
+
"""Retourneert unieke KVK nummers die zowel in signalen als basisprofielen staan.
|
|
51
|
+
|
|
52
|
+
Hierbij worden alleen basisprofielen bekeken de signaal timestamp nieuwer is
|
|
53
|
+
dan het basisprofiel (update nodig).
|
|
54
|
+
"""
|
|
55
|
+
with Session(self.engine) as session:
|
|
56
|
+
stmt = (
|
|
57
|
+
select(SignaalORM.kvknummer)
|
|
58
|
+
.join(BasisProfielORM, SignaalORM.kvknummer == BasisProfielORM.kvk_nummer)
|
|
59
|
+
.where(
|
|
60
|
+
SignaalORM.timestamp > BasisProfielORM.last_updated,
|
|
61
|
+
SignaalORM.vestigingsnummer.is_(None), # Alleen basisprofiel updates, geen vestigingsprofielen
|
|
62
|
+
)
|
|
63
|
+
.distinct()
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
result = session.execute(stmt).scalars().all()
|
|
67
|
+
return list(result)
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from datetime import UTC, datetime
|
|
3
|
+
|
|
4
|
+
from sqlalchemy import Engine
|
|
5
|
+
from sqlalchemy.orm import Session, sessionmaker
|
|
6
|
+
|
|
7
|
+
from kvk_connect.models.domain import BasisProfielDomain
|
|
8
|
+
from kvk_connect.models.orm.basisprofiel_orm import BasisProfielORM
|
|
9
|
+
from kvk_connect.utils.tools import parse_kvk_datum
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class BasisProfielWriter:
|
|
15
|
+
# lage default batch size op 1 om db locking te minimaliseren
|
|
16
|
+
def __init__(self, engine: Engine, batch_size: int = 1):
|
|
17
|
+
logger.info("Initializing BasisProfielWriter, met batch size: %d", batch_size)
|
|
18
|
+
self.Session = sessionmaker(bind=engine)
|
|
19
|
+
self.batch_size = batch_size
|
|
20
|
+
self._session: Session | None = None
|
|
21
|
+
self._count = 0
|
|
22
|
+
|
|
23
|
+
def __enter__(self):
|
|
24
|
+
"""Create a new session for the context."""
|
|
25
|
+
self._session = self.Session()
|
|
26
|
+
return self
|
|
27
|
+
|
|
28
|
+
def __exit__(self, exc_type, exc, tb):
|
|
29
|
+
"""Commit changes if no exception, else rollback. Always close session."""
|
|
30
|
+
try:
|
|
31
|
+
if exc is None:
|
|
32
|
+
self.flush()
|
|
33
|
+
else:
|
|
34
|
+
if self._session:
|
|
35
|
+
self._session.rollback()
|
|
36
|
+
finally:
|
|
37
|
+
if self._session:
|
|
38
|
+
self._session.close()
|
|
39
|
+
self._session = None
|
|
40
|
+
|
|
41
|
+
def flush(self) -> None: # noqa: D102
|
|
42
|
+
if self._session:
|
|
43
|
+
self._session.commit()
|
|
44
|
+
|
|
45
|
+
def add(self, domain_basisprofiel: BasisProfielDomain) -> None: # noqa: D102
|
|
46
|
+
if not self._session:
|
|
47
|
+
raise RuntimeError("Session not initialized. Use context manager.")
|
|
48
|
+
|
|
49
|
+
orm_obj = self._to_orm(domain_basisprofiel)
|
|
50
|
+
orm_obj.last_updated = datetime.now(UTC)
|
|
51
|
+
|
|
52
|
+
self._session.merge(orm_obj)
|
|
53
|
+
self._count += 1
|
|
54
|
+
|
|
55
|
+
if self._count % self.batch_size == 0:
|
|
56
|
+
self._session.commit()
|
|
57
|
+
|
|
58
|
+
@staticmethod
|
|
59
|
+
def _to_orm(api_obj: BasisProfielDomain) -> BasisProfielORM:
|
|
60
|
+
return BasisProfielORM(
|
|
61
|
+
kvk_nummer=api_obj.kvk_nummer,
|
|
62
|
+
naam=api_obj.naam,
|
|
63
|
+
hoofdactiviteit=api_obj.hoofdactiviteit,
|
|
64
|
+
hoofdactiviteit_omschrijving=api_obj.hoofdactiviteit_omschrijving,
|
|
65
|
+
activiteit_overig=api_obj.activiteit_overig,
|
|
66
|
+
rechtsvorm=api_obj.rechtsvorm,
|
|
67
|
+
rechtsvorm_uitgebreid=api_obj.rechtsvorm_uitgebreid,
|
|
68
|
+
eerste_handelsnaam=api_obj.eerste_handelsnaam,
|
|
69
|
+
totaal_werkzame_personen=api_obj.totaal_werkzame_personen,
|
|
70
|
+
websites=api_obj.websites,
|
|
71
|
+
registratie_datum_aanvang=parse_kvk_datum(api_obj.registratie_datum_aanvang),
|
|
72
|
+
registratie_datum_einde=parse_kvk_datum(api_obj.registratie_datum_einde),
|
|
73
|
+
)
|
kvk_connect/db/init.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from typing import TYPE_CHECKING
|
|
5
|
+
|
|
6
|
+
from sqlalchemy import inspect
|
|
7
|
+
|
|
8
|
+
if TYPE_CHECKING:
|
|
9
|
+
from sqlalchemy.engine import Engine
|
|
10
|
+
from sqlalchemy.orm import DeclarativeBase
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def ensure_database_initialized(engine: Engine, base: type[DeclarativeBase]) -> None:
|
|
16
|
+
"""Ensure all tables for the given Base exist in the database.
|
|
17
|
+
|
|
18
|
+
This is safe to run multiple times - existing tables are skipped.
|
|
19
|
+
"""
|
|
20
|
+
logger.info("Ensuring tables exist for %s...", base.__name__)
|
|
21
|
+
base.metadata.create_all(engine)
|
|
22
|
+
|
|
23
|
+
inspector = inspect(engine)
|
|
24
|
+
table_count = len([t for t in inspector.get_table_names() if t in base.metadata.tables])
|
|
25
|
+
logger.info("Database initialized: %s table(s) ready", table_count)
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
from sqlalchemy import select
|
|
2
|
+
from sqlalchemy.engine import Engine
|
|
3
|
+
from sqlalchemy.orm import Session
|
|
4
|
+
|
|
5
|
+
from kvk_connect.models.orm.basisprofiel_orm import BasisProfielORM
|
|
6
|
+
from kvk_connect.models.orm.kvkvestigingen_orm import VestigingenORM
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class KvKVestigingenReader:
|
|
10
|
+
def __init__(self, engine: Engine):
|
|
11
|
+
self.engine = engine
|
|
12
|
+
|
|
13
|
+
def get_missing_kvk_nummers(self) -> list[str]:
|
|
14
|
+
"""Retourneert unieke KVK nummers die wel in basisprofielen staan maar nog niet in kvkvestigingen."""
|
|
15
|
+
with Session(self.engine) as session:
|
|
16
|
+
stmt = (
|
|
17
|
+
select(BasisProfielORM.kvk_nummer)
|
|
18
|
+
.select_from(BasisProfielORM)
|
|
19
|
+
.outerjoin(VestigingenORM, BasisProfielORM.kvk_nummer == VestigingenORM.kvk_nummer)
|
|
20
|
+
.where(VestigingenORM.kvk_nummer.is_(None))
|
|
21
|
+
.distinct()
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
result = session.execute(stmt).scalars().all()
|
|
25
|
+
return list(result)
|
|
26
|
+
|
|
27
|
+
def get_outdated_vestigingen(self) -> list[str]:
|
|
28
|
+
"""Geen een lijst van unieke kvknummers terug waarvan de vestigingen verouderd zijn.
|
|
29
|
+
|
|
30
|
+
Dit is gedefinieerd als basisprofielen die nieuwer zijn dan de laatste update van de vestigingen.
|
|
31
|
+
"""
|
|
32
|
+
with Session(self.engine) as session:
|
|
33
|
+
stmt = (
|
|
34
|
+
select(BasisProfielORM.kvk_nummer)
|
|
35
|
+
.join(VestigingenORM, BasisProfielORM.kvk_nummer == VestigingenORM.kvk_nummer)
|
|
36
|
+
.where(BasisProfielORM.last_updated > VestigingenORM.last_updated)
|
|
37
|
+
.distinct()
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
result = session.execute(stmt).scalars().all()
|
|
41
|
+
return list(result)
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from datetime import UTC, datetime
|
|
3
|
+
|
|
4
|
+
from sqlalchemy.orm import Session, sessionmaker
|
|
5
|
+
|
|
6
|
+
from kvk_connect.models.domain import KvKVestigingsNummersDomain
|
|
7
|
+
from kvk_connect.models.orm.kvkvestigingen_orm import VestigingenORM
|
|
8
|
+
|
|
9
|
+
logger = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class KvKVestigingenWriter:
|
|
13
|
+
# Low batch size by default to avoid locking issues
|
|
14
|
+
def __init__(self, engine, batch_size: int = 1):
|
|
15
|
+
logger.info("Initializing BasisProfielWriter, met batch size: %d", batch_size)
|
|
16
|
+
self.Session = sessionmaker(bind=engine)
|
|
17
|
+
self.batch_size = batch_size
|
|
18
|
+
self._session: Session | None = None
|
|
19
|
+
self._count = 0
|
|
20
|
+
|
|
21
|
+
def __enter__(self):
|
|
22
|
+
"""Start een nieuwe database sessie."""
|
|
23
|
+
self._session = self.Session()
|
|
24
|
+
return self
|
|
25
|
+
|
|
26
|
+
def __exit__(self, exc_type, exc, tb):
|
|
27
|
+
"""Commit of rollback de sessie en sluit deze af."""
|
|
28
|
+
try:
|
|
29
|
+
if exc is None:
|
|
30
|
+
self.flush()
|
|
31
|
+
else:
|
|
32
|
+
if self._session:
|
|
33
|
+
self._session.rollback()
|
|
34
|
+
finally:
|
|
35
|
+
if self._session:
|
|
36
|
+
self._session.close()
|
|
37
|
+
self._session = None
|
|
38
|
+
|
|
39
|
+
def flush(self) -> None: # noqa: D102
|
|
40
|
+
if self._session:
|
|
41
|
+
self._session.commit()
|
|
42
|
+
|
|
43
|
+
def add(self, domain_kvkvestigingen: KvKVestigingsNummersDomain) -> None:
|
|
44
|
+
"""Schrijf alle vestigingsnummers uit het domeinmodel weg naar de database.
|
|
45
|
+
|
|
46
|
+
Creëert een apart database-record per vestigingsnummer met het bijbehorende kvkNummer.
|
|
47
|
+
Als er geen vestigingsnummers zijn, wordt een record met vestigingsnummer=NULL weggeschreven.
|
|
48
|
+
|
|
49
|
+
Params:
|
|
50
|
+
domain_kvkvestigingen: KvKVestigingsNummersDomain - Domain object met kvkNummer en lijst vestigingsnummers
|
|
51
|
+
"""
|
|
52
|
+
if not self._session:
|
|
53
|
+
raise RuntimeError("Session not initialized. Use context manager.")
|
|
54
|
+
|
|
55
|
+
timestamp = datetime.now(UTC)
|
|
56
|
+
vestigingsnummers = domain_kvkvestigingen.vestigingsnummers or [
|
|
57
|
+
VestigingenORM.SENTINEL_VESTIGINGSNUMMER
|
|
58
|
+
] # Gebruik Sentinel waarde als er geen vestigingen zijn
|
|
59
|
+
|
|
60
|
+
# Merge alle vestigingen van dit KvK nummer
|
|
61
|
+
for vestigingsnummer in vestigingsnummers:
|
|
62
|
+
orm_obj = VestigingenORM(
|
|
63
|
+
kvk_nummer=domain_kvkvestigingen.kvk_nummer, vestigingsnummer=vestigingsnummer, last_updated=timestamp
|
|
64
|
+
)
|
|
65
|
+
self._session.merge(orm_obj)
|
|
66
|
+
|
|
67
|
+
# Verhoog counter met totaal aantal vestigingen van dit KvK
|
|
68
|
+
self._count += len(vestigingsnummers)
|
|
69
|
+
|
|
70
|
+
# Commit als batch_size bereikt
|
|
71
|
+
if self._count >= self.batch_size:
|
|
72
|
+
self._session.commit()
|
|
73
|
+
self._count = 0
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
from datetime import datetime
|
|
2
|
+
|
|
3
|
+
from sqlalchemy import func, select
|
|
4
|
+
from sqlalchemy.orm import sessionmaker
|
|
5
|
+
|
|
6
|
+
from kvk_connect.models.orm.signaal_orm import SignaalORM
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class SignaalReader:
|
|
10
|
+
def __init__(self, engine):
|
|
11
|
+
self.Session = sessionmaker(bind=engine)
|
|
12
|
+
|
|
13
|
+
def get_last_timestamp(self) -> datetime | None:
|
|
14
|
+
"""Returns the latest stored signaal timestamp, or None if table is empty."""
|
|
15
|
+
with self.Session() as session:
|
|
16
|
+
stmt = select(func.max(SignaalORM.timestamp))
|
|
17
|
+
return session.execute(stmt).scalar()
|
|
18
|
+
|
|
19
|
+
def get_first_timestamp(self) -> datetime | None:
|
|
20
|
+
"""Returns the latest stored signaal timestamp, or None if table is empty."""
|
|
21
|
+
with self.Session() as session:
|
|
22
|
+
stmt = select(func.min(SignaalORM.timestamp))
|
|
23
|
+
return session.execute(stmt).scalar()
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
# ruff: noqa: D102
|
|
2
|
+
import logging
|
|
3
|
+
|
|
4
|
+
from sqlalchemy.orm import Session, sessionmaker
|
|
5
|
+
|
|
6
|
+
from kvk_connect.models.api.mutatiesignalen_api import MutatieSignaal
|
|
7
|
+
from kvk_connect.models.orm.signaal_orm import SignaalORM
|
|
8
|
+
|
|
9
|
+
logger = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class SignaalWriter:
|
|
13
|
+
def __init__(self, engine, batch_size: int = 10, upsert: bool = True):
|
|
14
|
+
logger.info("Initializing BasisProfielWriter, met batch size: %d", batch_size)
|
|
15
|
+
self.Session = sessionmaker(bind=engine)
|
|
16
|
+
self.batch_size = batch_size
|
|
17
|
+
self.upsert = upsert
|
|
18
|
+
self._session: Session | None = None
|
|
19
|
+
self._buffer: list[SignaalORM] = []
|
|
20
|
+
self._count = 0
|
|
21
|
+
|
|
22
|
+
def __enter__(self):
|
|
23
|
+
"""Create a new session on entry of context manager."""
|
|
24
|
+
self._session = self.Session()
|
|
25
|
+
return self
|
|
26
|
+
|
|
27
|
+
def __exit__(self, exc_type, exc, tb):
|
|
28
|
+
"""Handle commit/rollback on exit of context manager."""
|
|
29
|
+
try:
|
|
30
|
+
if exc is None:
|
|
31
|
+
self.flush()
|
|
32
|
+
else:
|
|
33
|
+
if self._session:
|
|
34
|
+
self._session.rollback()
|
|
35
|
+
finally:
|
|
36
|
+
if self._session:
|
|
37
|
+
self._session.close()
|
|
38
|
+
self._session = None
|
|
39
|
+
|
|
40
|
+
def add(self, api_signaal: MutatieSignaal) -> None:
|
|
41
|
+
if not self._session:
|
|
42
|
+
raise RuntimeError("Session not initialized. Use context manager.")
|
|
43
|
+
|
|
44
|
+
orm_obj = self._to_orm(api_signaal)
|
|
45
|
+
if self.upsert:
|
|
46
|
+
self._session.merge(orm_obj) # upsert per row
|
|
47
|
+
self._count += 1
|
|
48
|
+
if self._count % self.batch_size == 0:
|
|
49
|
+
self._session.commit()
|
|
50
|
+
else:
|
|
51
|
+
self._buffer.append(orm_obj)
|
|
52
|
+
if len(self._buffer) >= self.batch_size:
|
|
53
|
+
self._session.bulk_save_objects(self._buffer)
|
|
54
|
+
self._session.commit()
|
|
55
|
+
self._buffer.clear()
|
|
56
|
+
|
|
57
|
+
def flush(self) -> None:
|
|
58
|
+
if not self._session:
|
|
59
|
+
return
|
|
60
|
+
if not self.upsert and self._buffer:
|
|
61
|
+
self._session.bulk_save_objects(self._buffer)
|
|
62
|
+
self._buffer.clear()
|
|
63
|
+
self._session.commit()
|
|
64
|
+
|
|
65
|
+
@staticmethod
|
|
66
|
+
def _to_orm(s: MutatieSignaal) -> SignaalORM:
|
|
67
|
+
return SignaalORM(
|
|
68
|
+
id=s.id,
|
|
69
|
+
timestamp=s.timestamp,
|
|
70
|
+
kvknummer=s.kvknummer,
|
|
71
|
+
signaal_type=s.signaal_type,
|
|
72
|
+
vestigingsnummer=s.vestigingsnummer,
|
|
73
|
+
)
|