kvk-connect 0.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. kvk_connect/__init__.py +11 -0
  2. kvk_connect/api/__init__.py +4 -0
  3. kvk_connect/api/client.py +183 -0
  4. kvk_connect/api/endpoints.py +24 -0
  5. kvk_connect/api/session.py +34 -0
  6. kvk_connect/cli/main.py +26 -0
  7. kvk_connect/db/__init__.py +0 -0
  8. kvk_connect/db/basisprofiel_reader.py +67 -0
  9. kvk_connect/db/basisprofiel_writer.py +73 -0
  10. kvk_connect/db/init.py +25 -0
  11. kvk_connect/db/kvkvestigingen_reader.py +41 -0
  12. kvk_connect/db/kvkvestigingen_writer.py +73 -0
  13. kvk_connect/db/signaal_reader.py +23 -0
  14. kvk_connect/db/signaal_writer.py +73 -0
  15. kvk_connect/db/vestigingenprofiel_reader.py +66 -0
  16. kvk_connect/db/vestigingsprofiel_writer.py +92 -0
  17. kvk_connect/logging_config.py +27 -0
  18. kvk_connect/mappers/__init__.py +1 -0
  19. kvk_connect/mappers/kvk_record_mapper.py +100 -0
  20. kvk_connect/mappers/map_mutatie_abonnement_api_to_mutatieabonnement.py +11 -0
  21. kvk_connect/mappers/map_vestigingen_api_to_vestigingsnummers.py +14 -0
  22. kvk_connect/mappers/map_vestigingsprofiel_api_to_vestigingsprofiel_domain.py +41 -0
  23. kvk_connect/models/__init__.py +0 -0
  24. kvk_connect/models/api/__init__.py +0 -0
  25. kvk_connect/models/api/abonnementen_api.py +42 -0
  26. kvk_connect/models/api/basisprofiel_api.py +233 -0
  27. kvk_connect/models/api/mutatie_abonnementen_api.py +40 -0
  28. kvk_connect/models/api/mutatiesignalen_api.py +44 -0
  29. kvk_connect/models/api/vestigingen_api.py +73 -0
  30. kvk_connect/models/api/vestigingsprofiel_api.py +71 -0
  31. kvk_connect/models/domain/__init__.py +6 -0
  32. kvk_connect/models/domain/basisprofiel.py +65 -0
  33. kvk_connect/models/domain/kvkvestigingsnummersdomain.py +28 -0
  34. kvk_connect/models/domain/mutatie_abonnement.py +20 -0
  35. kvk_connect/models/domain/vestigingsadresdomain.py +62 -0
  36. kvk_connect/models/domain/vestigingsadressendomain.py +48 -0
  37. kvk_connect/models/domain/vestigingsprofiel_domain.py +58 -0
  38. kvk_connect/models/orm/base.py +5 -0
  39. kvk_connect/models/orm/basisprofiel_orm.py +52 -0
  40. kvk_connect/models/orm/kvkvestigingen_orm.py +53 -0
  41. kvk_connect/models/orm/signaal_orm.py +40 -0
  42. kvk_connect/models/orm/vestigingsprofiel_orm.py +58 -0
  43. kvk_connect/services/__init__.py +4 -0
  44. kvk_connect/services/record_service.py +66 -0
  45. kvk_connect/utils/__init__.py +5 -0
  46. kvk_connect/utils/env.py +16 -0
  47. kvk_connect/utils/formatting.py +11 -0
  48. kvk_connect/utils/rate_limit.py +21 -0
  49. kvk_connect/utils/tools.py +131 -0
  50. kvk_connect-0.1.6.dist-info/METADATA +352 -0
  51. kvk_connect-0.1.6.dist-info/RECORD +52 -0
  52. kvk_connect-0.1.6.dist-info/WHEEL +4 -0
@@ -0,0 +1,11 @@
1
+ """KVK Connect - Python client for KVK API."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from kvk_connect.api.client import KVKApiClient
6
+ from kvk_connect.services.record_service import KVKRecordService
7
+
8
+ __all__ = [
9
+ "KVKApiClient",
10
+ "KVKRecordService",
11
+ ]
@@ -0,0 +1,4 @@
1
+ # mappers package initialization
2
+ from .client import KVKApiClient
3
+
4
+ __all__ = ["KVKApiClient"]
@@ -0,0 +1,183 @@
1
+ from __future__ import annotations
2
+
3
+ import datetime
4
+ import logging
5
+
6
+ import requests
7
+ from requests import Response
8
+
9
+ from ..models.api.basisprofiel_api import BasisProfielAPI
10
+ from ..models.api.mutatiesignalen_api import MutatiesAPI
11
+ from ..models.api.vestigingen_api import VestigingenAPI
12
+ from ..models.api.vestigingsprofiel_api import VestigingsProfielAPI
13
+ from ..utils.rate_limit import global_rate_limit
14
+ from . import endpoints
15
+ from .session import create_session_with_retries
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ class KVKApiClient:
21
+ def __init__(self, api_key: str, base_url: str = endpoints.DEFAULT_BASE_URL):
22
+ self.session = create_session_with_retries() # requests.Session()
23
+ self.session.headers.update({"apikey": api_key})
24
+ self.base_url = base_url
25
+ self.timeout = 600
26
+
27
+ def close(self): # noqa: D102
28
+ self.session.close()
29
+
30
+ @staticmethod
31
+ def _get_error_payload(resp: Response) -> str:
32
+ """Extract error payload from response as string for logging."""
33
+ try:
34
+ error_data = resp.json()
35
+ return str(error_data)
36
+ except (ValueError, requests.exceptions.JSONDecodeError):
37
+ return resp.text if resp.text else "No error details available"
38
+
39
+ @global_rate_limit()
40
+ def get_mutatie_signaal_raw(self, abonnement_id: str, signaal_id: str) -> dict | None:
41
+ """Get raw mutatie signaal data from KVK API.
42
+
43
+ Args:
44
+ abonnement_id (str): Abonnement ID.
45
+ signaal_id (str): Signaal ID.
46
+
47
+ Return: Originele JSON of None bij fout.
48
+ """
49
+ url = endpoints.mutatieservice_signaal(abonnement_id, signaal_id)
50
+ try:
51
+ resp = self.session.get(url, timeout=self.timeout)
52
+ resp.raise_for_status()
53
+ return resp.json()
54
+ except requests.HTTPError as e:
55
+ logger.warning("KVK API error for nummer %s: %s", abonnement_id, e)
56
+ logger.warning("Mogelijke error: %s", self._get_error_payload(resp))
57
+ return None
58
+
59
+ @global_rate_limit()
60
+ def get_mutaties_raw(
61
+ self, abonnement_id: str, from_time: datetime.datetime, to_time: datetime.datetime, page: int, size: int
62
+ ) -> dict | None:
63
+ """Get raw mutaties data from KVK API.
64
+
65
+ Args:
66
+ abonnement_id (str): Abonnement ID.
67
+ from_time (datetime): Start datetime for mutaties.
68
+ to_time (datetime): End datetime for mutaties.
69
+ page (int): Page number.
70
+ size (int): Number of items per page.
71
+
72
+ Return: Originele JSON of None bij fout.
73
+ """
74
+ url = endpoints.mutatieservice(abonnement_id)
75
+ try:
76
+ params = {
77
+ "vanaf": from_time.strftime("%Y-%m-%dT%H:%M:%SZ"),
78
+ "tot": to_time.strftime("%Y-%m-%dT%H:%M:%SZ"),
79
+ "pagina": str(page),
80
+ "aantal": str(size),
81
+ }
82
+ resp = self.session.get(url, params=params, timeout=self.timeout)
83
+ resp.raise_for_status()
84
+ return resp.json()
85
+ except requests.HTTPError as e:
86
+ logger.warning("KVK API error for nummer %s: %s", abonnement_id, e)
87
+ logger.warning("Mogelijke error: %s", self._get_error_payload(resp))
88
+ return None
89
+
90
+ def get_mutaties(
91
+ self, abonnement_id: str, from_time: datetime.datetime, to_time: datetime.datetime, page: int, size: int
92
+ ) -> MutatiesAPI | None:
93
+ """Get mutaties from KVK API in domein model."""
94
+ data = self.get_mutaties_raw(abonnement_id, from_time, to_time, page, size)
95
+ return None if data is None else MutatiesAPI.from_dict(data)
96
+
97
+ @global_rate_limit()
98
+ def get_basisinformatie_raw(self, kvk_nummer: str, geo_data: bool = True) -> dict | None:
99
+ """Get raw basisinformatie data from KVK API.
100
+
101
+ Args:
102
+ kvk_nummer (str): KVK nummer.
103
+ geo_data (bool): Include geo data or not.
104
+
105
+ Return: Originele JSON of None bij fout.
106
+ """
107
+ url = endpoints.basisprofiel(kvk_nummer)
108
+ logger.debug("KVK API url: %s", url)
109
+ try:
110
+ resp = self.session.get(url, params={"geoData": geo_data}, timeout=self.timeout)
111
+ resp.raise_for_status()
112
+ logger.debug(
113
+ "KVK Basisinformatie Raw response for kvk nummer %s: %s, with url: %s", kvk_nummer, resp.json(), url
114
+ )
115
+ return resp.json()
116
+ except requests.HTTPError as e:
117
+ logger.warning("KVK API error for nummer %s: %s", kvk_nummer, e)
118
+ logger.warning("Mogelijke error: %s", self._get_error_payload(resp))
119
+ return None
120
+
121
+ def get_basisinformatie(self, kvk_nummer: str, geo_data: bool = True) -> BasisProfielAPI | None:
122
+ """Get basisinformatie from KVK API in domein model."""
123
+ data = self.get_basisinformatie_raw(kvk_nummer, geo_data)
124
+ return None if data is None else BasisProfielAPI.from_dict(data)
125
+
126
+ @global_rate_limit()
127
+ def get_vestigingen_raw(self, kvk_nummer: str) -> dict | None:
128
+ """Get raw vestigingen data from KVK API.
129
+
130
+ Args:
131
+ kvk_nummer (str): KVK nummer.
132
+ geo_data (bool): Include geo data or not.
133
+
134
+ Return: Originele JSON of None bij fout.
135
+ """
136
+ url = endpoints.vestigingen(kvk_nummer)
137
+ try:
138
+ resp = self.session.get(url, timeout=60)
139
+ resp.raise_for_status()
140
+ logger.debug(
141
+ "KVK Vestigingen Raw response for kvk nummer %s: %s, with url: %s", kvk_nummer, resp.json(), url
142
+ )
143
+ return resp.json()
144
+ except requests.HTTPError as e:
145
+ logger.warning("KVK API error for nummer %s: %s", kvk_nummer, e)
146
+ logger.warning("Mogelijke error: %s", self._get_error_payload(resp))
147
+ return None
148
+
149
+ def get_vestigingen(self, kvk_nummer: str) -> VestigingenAPI | None:
150
+ """Get vestigingen from KVK API in domein model."""
151
+ data = self.get_vestigingen_raw(kvk_nummer)
152
+ return None if data is None else VestigingenAPI.from_dict(data)
153
+
154
+ @global_rate_limit()
155
+ def get_vestigingsprofiel_raw(self, vestigingsnummer: str, geo_data: bool = True) -> dict | None:
156
+ """Get raw vestigingsprofiel data from KVK API.
157
+
158
+ Args:
159
+ vestigingsnummer (str): Vestigingsnummer.
160
+ geo_data (bool): Include geo data or not.
161
+
162
+ Return: Originele JSON of None bij fout.
163
+ """
164
+ url = endpoints.vestigingsprofiel(vestigingsnummer)
165
+ try:
166
+ resp = self.session.get(url, params={"geoData": geo_data}, timeout=self.timeout)
167
+ resp.raise_for_status()
168
+ logger.debug(
169
+ "KVK VestigingenProfiel Raw response for vestigingen nummer %s: %s, with url: %s",
170
+ vestigingsnummer,
171
+ resp.json(),
172
+ url,
173
+ )
174
+ return resp.json()
175
+ except requests.HTTPError as e:
176
+ logger.warning("KVK API error for nummer %s: %s", vestigingsnummer, e)
177
+ logger.warning("Mogelijke error: %s", self._get_error_payload(resp))
178
+ return None
179
+
180
+ def get_vestigingsprofiel(self, vestigingsnummer: str, geo_data: bool = True) -> VestigingsProfielAPI | None:
181
+ """Get vestigingsprofiel from KVK API in domein model."""
182
+ data = self.get_vestigingsprofiel_raw(vestigingsnummer, geo_data)
183
+ return None if data is None else VestigingsProfielAPI.from_dict(data)
@@ -0,0 +1,24 @@
1
+ # ruff: noqa: D103
2
+ import os
3
+
4
+ DEFAULT_BASE_URL = os.getenv("KVK_BASE_URL", "https://api.kvk.nl/api/v1")
5
+
6
+
7
+ def basisprofiel(kvk_nummer: str) -> str:
8
+ return f"{DEFAULT_BASE_URL}/basisprofielen/{kvk_nummer}"
9
+
10
+
11
+ def vestigingen(kvk_nummer: str) -> str:
12
+ return f"{DEFAULT_BASE_URL}/basisprofielen/{kvk_nummer}/vestigingen"
13
+
14
+
15
+ def vestigingsprofiel(vestigingsnummer: str) -> str:
16
+ return f"{DEFAULT_BASE_URL}/vestigingsprofielen/{vestigingsnummer}"
17
+
18
+
19
+ def mutatieservice(abonnement_id: str) -> str:
20
+ return f"{DEFAULT_BASE_URL}/abonnementen/{abonnement_id}"
21
+
22
+
23
+ def mutatieservice_signaal(abonnement_id: str, signaal_id: str) -> str:
24
+ return f"{DEFAULT_BASE_URL}/abonnementen/{abonnement_id}/signalen/{signaal_id}"
@@ -0,0 +1,34 @@
1
+ import logging
2
+
3
+ from requests import Session
4
+ from requests.adapters import HTTPAdapter
5
+ from urllib3.util.retry import Retry
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+
10
+ def create_session_with_retries(
11
+ retries: int = 3, backoff_factor: float = 1.0, status_forcelist: tuple[int, ...] = (429, 500, 502, 503, 504)
12
+ ) -> Session:
13
+ """Maakt een requests.Session met automatische retry-logica."""
14
+ session = Session()
15
+ retry_strategy = Retry(
16
+ total=retries,
17
+ backoff_factor=backoff_factor,
18
+ status_forcelist=status_forcelist,
19
+ allowed_methods=["GET", "POST", "PUT", "DELETE"],
20
+ )
21
+ adapter = HTTPAdapter(max_retries=retry_strategy)
22
+ session.mount("http://", adapter)
23
+ session.mount("https://", adapter)
24
+
25
+ # logging.getLogger("urllib3.util.retry").setLevel(logging.DEBUG)
26
+ # logging.getLogger("urllib3.connectionpool").setLevel(logging.DEBUG)
27
+ logger.debug(
28
+ "Retry strategy configured: total=%d, backoff_factor=%f, status_forcelist=%s",
29
+ retries,
30
+ backoff_factor,
31
+ status_forcelist,
32
+ )
33
+
34
+ return session
@@ -0,0 +1,26 @@
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+
5
+ from kvk_connect import KVKApiClient
6
+ from kvk_connect.utils import get_env
7
+
8
+
9
+ def run():
10
+ """Start de CLI tool voor KVK Connect."""
11
+
12
+ parser = argparse.ArgumentParser(prog="kvk-connect")
13
+ parser.add_argument("--kvk", help="KVK number", required=True)
14
+ parser.add_argument("--geo", action="store_true", default=True)
15
+ args = parser.parse_args()
16
+
17
+ api_key = get_env("KVK_API_KEY_PROD", required=True) or "NO_KEY_FOUND"
18
+ client = KVKApiClient(api_key)
19
+ try:
20
+ basis = client.get_basisinformatie(args.kvk, geo_data=args.geo)
21
+ if not basis:
22
+ print("No data")
23
+ return
24
+ print(basis.to_dict())
25
+ finally:
26
+ client.close()
File without changes
@@ -0,0 +1,67 @@
1
+ import random
2
+
3
+ from sqlalchemy import func, select
4
+ from sqlalchemy.engine import Engine
5
+ from sqlalchemy.orm import Session
6
+
7
+ from kvk_connect.models.orm.basisprofiel_orm import BasisProfielORM
8
+ from kvk_connect.models.orm.signaal_orm import SignaalORM
9
+
10
+
11
+ class BasisProfielReader:
12
+ def __init__(self, engine: Engine):
13
+ self.engine = engine
14
+
15
+ def get_missing_kvk_nummers(self, limit: int = 50) -> list[str]:
16
+ """Retourneert random sample van KVK nummers die wel in signalen staan maar nog niet in basisprofielen.
17
+
18
+ Hiermee halen we kvk nummers op die wel uit signalen komen, maar mogelijk nog niet bekend zijn.
19
+ Hierdoor beperking op aantal op te halen nummers per keer (limit), zodat we langzaam over tijd inlopen.
20
+ """
21
+ fetch_size = limit * 5
22
+ with Session(self.engine) as session:
23
+ stmt = (
24
+ select(SignaalORM.kvknummer)
25
+ .outerjoin(BasisProfielORM, SignaalORM.kvknummer == BasisProfielORM.kvk_nummer)
26
+ .where(BasisProfielORM.kvk_nummer.is_(None))
27
+ .distinct()
28
+ .limit(fetch_size) # maximaal limit nieuwe per keer ophalen
29
+ )
30
+
31
+ result = session.execute(stmt).scalars().all()
32
+ all_kvk_nrs = list(result)
33
+
34
+ # Random sample uit de opgehaalde resultaten
35
+ return random.sample(all_kvk_nrs, min(limit, len(all_kvk_nrs)))
36
+
37
+ def get_missing_kvk_nummers_count(self) -> int:
38
+ """Retourneert het totaal aantal KVK nummers die wel in signalen staan maar nog niet in basisprofielen."""
39
+ with Session(self.engine) as session:
40
+ stmt = (
41
+ select(func.count(func.distinct(SignaalORM.kvknummer)))
42
+ .outerjoin(BasisProfielORM, SignaalORM.kvknummer == BasisProfielORM.kvk_nummer)
43
+ .where(BasisProfielORM.kvk_nummer.is_(None))
44
+ )
45
+
46
+ result = session.execute(stmt).scalar()
47
+ return result or 0
48
+
49
+ def get_outdated_kvk_nummers(self) -> list[str]:
50
+ """Retourneert unieke KVK nummers die zowel in signalen als basisprofielen staan.
51
+
52
+ Hierbij worden alleen basisprofielen bekeken de signaal timestamp nieuwer is
53
+ dan het basisprofiel (update nodig).
54
+ """
55
+ with Session(self.engine) as session:
56
+ stmt = (
57
+ select(SignaalORM.kvknummer)
58
+ .join(BasisProfielORM, SignaalORM.kvknummer == BasisProfielORM.kvk_nummer)
59
+ .where(
60
+ SignaalORM.timestamp > BasisProfielORM.last_updated,
61
+ SignaalORM.vestigingsnummer.is_(None), # Alleen basisprofiel updates, geen vestigingsprofielen
62
+ )
63
+ .distinct()
64
+ )
65
+
66
+ result = session.execute(stmt).scalars().all()
67
+ return list(result)
@@ -0,0 +1,73 @@
1
+ import logging
2
+ from datetime import UTC, datetime
3
+
4
+ from sqlalchemy import Engine
5
+ from sqlalchemy.orm import Session, sessionmaker
6
+
7
+ from kvk_connect.models.domain import BasisProfielDomain
8
+ from kvk_connect.models.orm.basisprofiel_orm import BasisProfielORM
9
+ from kvk_connect.utils.tools import parse_kvk_datum
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ class BasisProfielWriter:
15
+ # lage default batch size op 1 om db locking te minimaliseren
16
+ def __init__(self, engine: Engine, batch_size: int = 1):
17
+ logger.info("Initializing BasisProfielWriter, met batch size: %d", batch_size)
18
+ self.Session = sessionmaker(bind=engine)
19
+ self.batch_size = batch_size
20
+ self._session: Session | None = None
21
+ self._count = 0
22
+
23
+ def __enter__(self):
24
+ """Create a new session for the context."""
25
+ self._session = self.Session()
26
+ return self
27
+
28
+ def __exit__(self, exc_type, exc, tb):
29
+ """Commit changes if no exception, else rollback. Always close session."""
30
+ try:
31
+ if exc is None:
32
+ self.flush()
33
+ else:
34
+ if self._session:
35
+ self._session.rollback()
36
+ finally:
37
+ if self._session:
38
+ self._session.close()
39
+ self._session = None
40
+
41
+ def flush(self) -> None: # noqa: D102
42
+ if self._session:
43
+ self._session.commit()
44
+
45
+ def add(self, domain_basisprofiel: BasisProfielDomain) -> None: # noqa: D102
46
+ if not self._session:
47
+ raise RuntimeError("Session not initialized. Use context manager.")
48
+
49
+ orm_obj = self._to_orm(domain_basisprofiel)
50
+ orm_obj.last_updated = datetime.now(UTC)
51
+
52
+ self._session.merge(orm_obj)
53
+ self._count += 1
54
+
55
+ if self._count % self.batch_size == 0:
56
+ self._session.commit()
57
+
58
+ @staticmethod
59
+ def _to_orm(api_obj: BasisProfielDomain) -> BasisProfielORM:
60
+ return BasisProfielORM(
61
+ kvk_nummer=api_obj.kvk_nummer,
62
+ naam=api_obj.naam,
63
+ hoofdactiviteit=api_obj.hoofdactiviteit,
64
+ hoofdactiviteit_omschrijving=api_obj.hoofdactiviteit_omschrijving,
65
+ activiteit_overig=api_obj.activiteit_overig,
66
+ rechtsvorm=api_obj.rechtsvorm,
67
+ rechtsvorm_uitgebreid=api_obj.rechtsvorm_uitgebreid,
68
+ eerste_handelsnaam=api_obj.eerste_handelsnaam,
69
+ totaal_werkzame_personen=api_obj.totaal_werkzame_personen,
70
+ websites=api_obj.websites,
71
+ registratie_datum_aanvang=parse_kvk_datum(api_obj.registratie_datum_aanvang),
72
+ registratie_datum_einde=parse_kvk_datum(api_obj.registratie_datum_einde),
73
+ )
kvk_connect/db/init.py ADDED
@@ -0,0 +1,25 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ from typing import TYPE_CHECKING
5
+
6
+ from sqlalchemy import inspect
7
+
8
+ if TYPE_CHECKING:
9
+ from sqlalchemy.engine import Engine
10
+ from sqlalchemy.orm import DeclarativeBase
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ def ensure_database_initialized(engine: Engine, base: type[DeclarativeBase]) -> None:
16
+ """Ensure all tables for the given Base exist in the database.
17
+
18
+ This is safe to run multiple times - existing tables are skipped.
19
+ """
20
+ logger.info("Ensuring tables exist for %s...", base.__name__)
21
+ base.metadata.create_all(engine)
22
+
23
+ inspector = inspect(engine)
24
+ table_count = len([t for t in inspector.get_table_names() if t in base.metadata.tables])
25
+ logger.info("Database initialized: %s table(s) ready", table_count)
@@ -0,0 +1,41 @@
1
+ from sqlalchemy import select
2
+ from sqlalchemy.engine import Engine
3
+ from sqlalchemy.orm import Session
4
+
5
+ from kvk_connect.models.orm.basisprofiel_orm import BasisProfielORM
6
+ from kvk_connect.models.orm.kvkvestigingen_orm import VestigingenORM
7
+
8
+
9
+ class KvKVestigingenReader:
10
+ def __init__(self, engine: Engine):
11
+ self.engine = engine
12
+
13
+ def get_missing_kvk_nummers(self) -> list[str]:
14
+ """Retourneert unieke KVK nummers die wel in basisprofielen staan maar nog niet in kvkvestigingen."""
15
+ with Session(self.engine) as session:
16
+ stmt = (
17
+ select(BasisProfielORM.kvk_nummer)
18
+ .select_from(BasisProfielORM)
19
+ .outerjoin(VestigingenORM, BasisProfielORM.kvk_nummer == VestigingenORM.kvk_nummer)
20
+ .where(VestigingenORM.kvk_nummer.is_(None))
21
+ .distinct()
22
+ )
23
+
24
+ result = session.execute(stmt).scalars().all()
25
+ return list(result)
26
+
27
+ def get_outdated_vestigingen(self) -> list[str]:
28
+ """Geen een lijst van unieke kvknummers terug waarvan de vestigingen verouderd zijn.
29
+
30
+ Dit is gedefinieerd als basisprofielen die nieuwer zijn dan de laatste update van de vestigingen.
31
+ """
32
+ with Session(self.engine) as session:
33
+ stmt = (
34
+ select(BasisProfielORM.kvk_nummer)
35
+ .join(VestigingenORM, BasisProfielORM.kvk_nummer == VestigingenORM.kvk_nummer)
36
+ .where(BasisProfielORM.last_updated > VestigingenORM.last_updated)
37
+ .distinct()
38
+ )
39
+
40
+ result = session.execute(stmt).scalars().all()
41
+ return list(result)
@@ -0,0 +1,73 @@
1
+ import logging
2
+ from datetime import UTC, datetime
3
+
4
+ from sqlalchemy.orm import Session, sessionmaker
5
+
6
+ from kvk_connect.models.domain import KvKVestigingsNummersDomain
7
+ from kvk_connect.models.orm.kvkvestigingen_orm import VestigingenORM
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+
12
+ class KvKVestigingenWriter:
13
+ # Low batch size by default to avoid locking issues
14
+ def __init__(self, engine, batch_size: int = 1):
15
+ logger.info("Initializing BasisProfielWriter, met batch size: %d", batch_size)
16
+ self.Session = sessionmaker(bind=engine)
17
+ self.batch_size = batch_size
18
+ self._session: Session | None = None
19
+ self._count = 0
20
+
21
+ def __enter__(self):
22
+ """Start een nieuwe database sessie."""
23
+ self._session = self.Session()
24
+ return self
25
+
26
+ def __exit__(self, exc_type, exc, tb):
27
+ """Commit of rollback de sessie en sluit deze af."""
28
+ try:
29
+ if exc is None:
30
+ self.flush()
31
+ else:
32
+ if self._session:
33
+ self._session.rollback()
34
+ finally:
35
+ if self._session:
36
+ self._session.close()
37
+ self._session = None
38
+
39
+ def flush(self) -> None: # noqa: D102
40
+ if self._session:
41
+ self._session.commit()
42
+
43
+ def add(self, domain_kvkvestigingen: KvKVestigingsNummersDomain) -> None:
44
+ """Schrijf alle vestigingsnummers uit het domeinmodel weg naar de database.
45
+
46
+ Creëert een apart database-record per vestigingsnummer met het bijbehorende kvkNummer.
47
+ Als er geen vestigingsnummers zijn, wordt een record met vestigingsnummer=NULL weggeschreven.
48
+
49
+ Params:
50
+ domain_kvkvestigingen: KvKVestigingsNummersDomain - Domain object met kvkNummer en lijst vestigingsnummers
51
+ """
52
+ if not self._session:
53
+ raise RuntimeError("Session not initialized. Use context manager.")
54
+
55
+ timestamp = datetime.now(UTC)
56
+ vestigingsnummers = domain_kvkvestigingen.vestigingsnummers or [
57
+ VestigingenORM.SENTINEL_VESTIGINGSNUMMER
58
+ ] # Gebruik Sentinel waarde als er geen vestigingen zijn
59
+
60
+ # Merge alle vestigingen van dit KvK nummer
61
+ for vestigingsnummer in vestigingsnummers:
62
+ orm_obj = VestigingenORM(
63
+ kvk_nummer=domain_kvkvestigingen.kvk_nummer, vestigingsnummer=vestigingsnummer, last_updated=timestamp
64
+ )
65
+ self._session.merge(orm_obj)
66
+
67
+ # Verhoog counter met totaal aantal vestigingen van dit KvK
68
+ self._count += len(vestigingsnummers)
69
+
70
+ # Commit als batch_size bereikt
71
+ if self._count >= self.batch_size:
72
+ self._session.commit()
73
+ self._count = 0
@@ -0,0 +1,23 @@
1
+ from datetime import datetime
2
+
3
+ from sqlalchemy import func, select
4
+ from sqlalchemy.orm import sessionmaker
5
+
6
+ from kvk_connect.models.orm.signaal_orm import SignaalORM
7
+
8
+
9
+ class SignaalReader:
10
+ def __init__(self, engine):
11
+ self.Session = sessionmaker(bind=engine)
12
+
13
+ def get_last_timestamp(self) -> datetime | None:
14
+ """Returns the latest stored signaal timestamp, or None if table is empty."""
15
+ with self.Session() as session:
16
+ stmt = select(func.max(SignaalORM.timestamp))
17
+ return session.execute(stmt).scalar()
18
+
19
+ def get_first_timestamp(self) -> datetime | None:
20
+ """Returns the latest stored signaal timestamp, or None if table is empty."""
21
+ with self.Session() as session:
22
+ stmt = select(func.min(SignaalORM.timestamp))
23
+ return session.execute(stmt).scalar()
@@ -0,0 +1,73 @@
1
+ # ruff: noqa: D102
2
+ import logging
3
+
4
+ from sqlalchemy.orm import Session, sessionmaker
5
+
6
+ from kvk_connect.models.api.mutatiesignalen_api import MutatieSignaal
7
+ from kvk_connect.models.orm.signaal_orm import SignaalORM
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+
12
+ class SignaalWriter:
13
+ def __init__(self, engine, batch_size: int = 10, upsert: bool = True):
14
+ logger.info("Initializing BasisProfielWriter, met batch size: %d", batch_size)
15
+ self.Session = sessionmaker(bind=engine)
16
+ self.batch_size = batch_size
17
+ self.upsert = upsert
18
+ self._session: Session | None = None
19
+ self._buffer: list[SignaalORM] = []
20
+ self._count = 0
21
+
22
+ def __enter__(self):
23
+ """Create a new session on entry of context manager."""
24
+ self._session = self.Session()
25
+ return self
26
+
27
+ def __exit__(self, exc_type, exc, tb):
28
+ """Handle commit/rollback on exit of context manager."""
29
+ try:
30
+ if exc is None:
31
+ self.flush()
32
+ else:
33
+ if self._session:
34
+ self._session.rollback()
35
+ finally:
36
+ if self._session:
37
+ self._session.close()
38
+ self._session = None
39
+
40
+ def add(self, api_signaal: MutatieSignaal) -> None:
41
+ if not self._session:
42
+ raise RuntimeError("Session not initialized. Use context manager.")
43
+
44
+ orm_obj = self._to_orm(api_signaal)
45
+ if self.upsert:
46
+ self._session.merge(orm_obj) # upsert per row
47
+ self._count += 1
48
+ if self._count % self.batch_size == 0:
49
+ self._session.commit()
50
+ else:
51
+ self._buffer.append(orm_obj)
52
+ if len(self._buffer) >= self.batch_size:
53
+ self._session.bulk_save_objects(self._buffer)
54
+ self._session.commit()
55
+ self._buffer.clear()
56
+
57
+ def flush(self) -> None:
58
+ if not self._session:
59
+ return
60
+ if not self.upsert and self._buffer:
61
+ self._session.bulk_save_objects(self._buffer)
62
+ self._buffer.clear()
63
+ self._session.commit()
64
+
65
+ @staticmethod
66
+ def _to_orm(s: MutatieSignaal) -> SignaalORM:
67
+ return SignaalORM(
68
+ id=s.id,
69
+ timestamp=s.timestamp,
70
+ kvknummer=s.kvknummer,
71
+ signaal_type=s.signaal_type,
72
+ vestigingsnummer=s.vestigingsnummer,
73
+ )