docid 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docid-0.1.3.dist-info/METADATA +566 -0
- docid-0.1.3.dist-info/RECORD +14 -0
- docid-0.1.3.dist-info/WHEEL +5 -0
- docid-0.1.3.dist-info/entry_points.txt +3 -0
- docid-0.1.3.dist-info/top_level.txt +1 -0
- exef_docid/__init__.py +129 -0
- exef_docid/cli.py +340 -0
- exef_docid/cli_universal.py +517 -0
- exef_docid/document_id.py +720 -0
- exef_docid/document_id_universal.py +370 -0
- exef_docid/extractors/__init__.py +21 -0
- exef_docid/extractors/base.py +508 -0
- exef_docid/ocr_processor.py +579 -0
- exef_docid/pipeline.py +431 -0
|
@@ -0,0 +1,720 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Deterministyczny generator identyfikatorów dokumentów.
|
|
3
|
+
|
|
4
|
+
Generuje zawsze ten sam identyfikator dla tego samego dokumentu,
|
|
5
|
+
niezależnie od formatu źródłowego (skan, PDF, KSeF XML).
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import hashlib
|
|
9
|
+
import re
|
|
10
|
+
import uuid
|
|
11
|
+
from dataclasses import dataclass
|
|
12
|
+
from datetime import date, datetime
|
|
13
|
+
from decimal import ROUND_HALF_UP, Decimal
|
|
14
|
+
from enum import Enum
|
|
15
|
+
from typing import Optional, Union
|
|
16
|
+
|
|
17
|
+
# Namespace UUID dla EXEF (RFC 4122 UUID v5)
|
|
18
|
+
EXEF_NAMESPACE = uuid.UUID('a1b2c3d4-e5f6-7890-abcd-ef1234567890')
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class DocumentType(Enum):
|
|
22
|
+
"""Typy dokumentów obsługiwane przez system."""
|
|
23
|
+
INVOICE = "FV" # Faktura VAT
|
|
24
|
+
RECEIPT = "PAR" # Paragon fiskalny
|
|
25
|
+
CONTRACT = "UMO" # Umowa
|
|
26
|
+
BANK_STATEMENT = "WB" # Wyciąg bankowy
|
|
27
|
+
CORRECTION = "KOR" # Faktura korygująca
|
|
28
|
+
PROFORMA = "PRO" # Faktura proforma
|
|
29
|
+
ADVANCE = "ZAL" # Faktura zaliczkowa
|
|
30
|
+
BILL = "RAC" # Rachunek (bez VAT)
|
|
31
|
+
CASH_IN = "KP" # Kasa Przyjmie (dowód wpłaty)
|
|
32
|
+
CASH_OUT = "KW" # Kasa Wyda (dowód wypłaty)
|
|
33
|
+
DEBIT_NOTE = "NK" # Nota księgowa
|
|
34
|
+
DELIVERY_NOTE = "WZ" # Wydanie zewnętrzne
|
|
35
|
+
RECEIPT_NOTE = "PZ" # Przyjęcie zewnętrzne
|
|
36
|
+
EXPENSE_REPORT = "DEL" # Delegacja / rozliczenie kosztów
|
|
37
|
+
OTHER = "DOC" # Inny dokument
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@dataclass
|
|
41
|
+
class CanonicalData:
|
|
42
|
+
"""Kanoniczne dane dokumentu do generowania ID."""
|
|
43
|
+
document_type: DocumentType
|
|
44
|
+
canonical_string: str
|
|
45
|
+
|
|
46
|
+
# Opcjonalne dane źródłowe dla debugowania
|
|
47
|
+
raw_fields: Optional[dict] = None
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class NIPValidator:
|
|
51
|
+
"""Walidator i normalizator NIP."""
|
|
52
|
+
|
|
53
|
+
@staticmethod
|
|
54
|
+
def normalize(nip: str) -> str:
|
|
55
|
+
"""
|
|
56
|
+
Normalizuje NIP do formatu 10 cyfr bez separatorów.
|
|
57
|
+
|
|
58
|
+
>>> NIPValidator.normalize("521-301-72-28")
|
|
59
|
+
'5213017228'
|
|
60
|
+
>>> NIPValidator.normalize("PL 521 301 72 28")
|
|
61
|
+
'5213017228'
|
|
62
|
+
"""
|
|
63
|
+
if not nip:
|
|
64
|
+
return ""
|
|
65
|
+
# Usuń prefiks kraju, spacje, myślniki
|
|
66
|
+
cleaned = re.sub(r'^[A-Z]{2}', '', nip.upper())
|
|
67
|
+
cleaned = re.sub(r'[\s\-\.]', '', cleaned)
|
|
68
|
+
return cleaned
|
|
69
|
+
|
|
70
|
+
@staticmethod
|
|
71
|
+
def validate(nip: str) -> bool:
|
|
72
|
+
"""
|
|
73
|
+
Waliduje NIP według algorytmu kontrolnego.
|
|
74
|
+
|
|
75
|
+
>>> NIPValidator.validate("5213017228")
|
|
76
|
+
True
|
|
77
|
+
"""
|
|
78
|
+
nip = NIPValidator.normalize(nip)
|
|
79
|
+
if len(nip) != 10 or not nip.isdigit():
|
|
80
|
+
return False
|
|
81
|
+
|
|
82
|
+
weights = [6, 5, 7, 2, 3, 4, 5, 6, 7]
|
|
83
|
+
checksum = sum(int(nip[i]) * weights[i] for i in range(9))
|
|
84
|
+
return checksum % 11 == int(nip[9])
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
class AmountNormalizer:
|
|
88
|
+
"""Normalizator kwot pieniężnych."""
|
|
89
|
+
|
|
90
|
+
@staticmethod
|
|
91
|
+
def normalize(amount: Union[str, float, Decimal]) -> str:
|
|
92
|
+
"""
|
|
93
|
+
Normalizuje kwotę do formatu z 2 miejscami po przecinku.
|
|
94
|
+
|
|
95
|
+
>>> AmountNormalizer.normalize("1 230,50 zł")
|
|
96
|
+
'1230.50'
|
|
97
|
+
>>> AmountNormalizer.normalize(1230.5)
|
|
98
|
+
'1230.50'
|
|
99
|
+
"""
|
|
100
|
+
if isinstance(amount, (int, float)):
|
|
101
|
+
return f"{Decimal(str(amount)).quantize(Decimal('0.01'), rounding=ROUND_HALF_UP)}"
|
|
102
|
+
|
|
103
|
+
# Parsowanie stringa
|
|
104
|
+
cleaned = str(amount).upper()
|
|
105
|
+
# Usuń walutę i spacje
|
|
106
|
+
cleaned = re.sub(r'[ZŁPLN\s]', '', cleaned)
|
|
107
|
+
# Zamień przecinek na kropkę
|
|
108
|
+
cleaned = cleaned.replace(',', '.')
|
|
109
|
+
# Usuń separatory tysięcy (spacje lub kropki przed ostatnią kropką)
|
|
110
|
+
parts = cleaned.rsplit('.', 1)
|
|
111
|
+
if len(parts) == 2:
|
|
112
|
+
integer_part = re.sub(r'[\.\s]', '', parts[0])
|
|
113
|
+
decimal_part = parts[1]
|
|
114
|
+
cleaned = f"{integer_part}.{decimal_part}"
|
|
115
|
+
else:
|
|
116
|
+
cleaned = re.sub(r'[\.\s]', '', cleaned)
|
|
117
|
+
|
|
118
|
+
try:
|
|
119
|
+
decimal_val = Decimal(cleaned).quantize(Decimal('0.01'), rounding=ROUND_HALF_UP)
|
|
120
|
+
return str(decimal_val)
|
|
121
|
+
except Exception:
|
|
122
|
+
return "0.00"
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
class DateNormalizer:
|
|
126
|
+
"""Normalizator dat."""
|
|
127
|
+
|
|
128
|
+
FORMATS = [
|
|
129
|
+
'%Y-%m-%d', # 2025-01-15
|
|
130
|
+
'%d-%m-%Y', # 15-01-2025
|
|
131
|
+
'%d.%m.%Y', # 15.01.2025
|
|
132
|
+
'%d/%m/%Y', # 15/01/2025
|
|
133
|
+
'%Y/%m/%d', # 2025/01/15
|
|
134
|
+
'%d %m %Y', # 15 01 2025
|
|
135
|
+
'%Y%m%d', # 20250115
|
|
136
|
+
]
|
|
137
|
+
|
|
138
|
+
@staticmethod
|
|
139
|
+
def normalize(date_str: Union[str, date, datetime]) -> str:
|
|
140
|
+
"""
|
|
141
|
+
Normalizuje datę do formatu ISO YYYY-MM-DD.
|
|
142
|
+
|
|
143
|
+
>>> DateNormalizer.normalize("15.01.2025")
|
|
144
|
+
'2025-01-15'
|
|
145
|
+
>>> DateNormalizer.normalize("2025-01-15")
|
|
146
|
+
'2025-01-15'
|
|
147
|
+
"""
|
|
148
|
+
if isinstance(date_str, datetime):
|
|
149
|
+
return date_str.strftime('%Y-%m-%d')
|
|
150
|
+
if isinstance(date_str, date):
|
|
151
|
+
return date_str.strftime('%Y-%m-%d')
|
|
152
|
+
|
|
153
|
+
cleaned = str(date_str).strip()
|
|
154
|
+
|
|
155
|
+
for fmt in DateNormalizer.FORMATS:
|
|
156
|
+
try:
|
|
157
|
+
parsed = datetime.strptime(cleaned, fmt)
|
|
158
|
+
return parsed.strftime('%Y-%m-%d')
|
|
159
|
+
except ValueError:
|
|
160
|
+
continue
|
|
161
|
+
|
|
162
|
+
# Fallback - spróbuj wyciągnąć cyfry
|
|
163
|
+
digits = re.findall(r'\d+', cleaned)
|
|
164
|
+
if len(digits) >= 3:
|
|
165
|
+
# Zgaduj format na podstawie wartości
|
|
166
|
+
if len(digits[0]) == 4: # Rok pierwszy
|
|
167
|
+
return f"{digits[0]}-{digits[1].zfill(2)}-{digits[2].zfill(2)}"
|
|
168
|
+
elif len(digits[2]) == 4: # Rok ostatni
|
|
169
|
+
return f"{digits[2]}-{digits[1].zfill(2)}-{digits[0].zfill(2)}"
|
|
170
|
+
|
|
171
|
+
return cleaned # Zwróć oryginał jeśli nie można sparsować
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
class InvoiceNumberNormalizer:
|
|
175
|
+
"""Normalizator numerów faktur."""
|
|
176
|
+
|
|
177
|
+
@staticmethod
|
|
178
|
+
def normalize(number: str) -> str:
|
|
179
|
+
"""
|
|
180
|
+
Normalizuje numer faktury.
|
|
181
|
+
|
|
182
|
+
>>> InvoiceNumberNormalizer.normalize("fv/2025/00142")
|
|
183
|
+
'FV/2025/00142'
|
|
184
|
+
>>> InvoiceNumberNormalizer.normalize("FV 2025 142")
|
|
185
|
+
'FV/2025/142'
|
|
186
|
+
"""
|
|
187
|
+
if not number:
|
|
188
|
+
return ""
|
|
189
|
+
|
|
190
|
+
# Uppercase
|
|
191
|
+
normalized = number.upper().strip()
|
|
192
|
+
# Zamień różne separatory na /
|
|
193
|
+
normalized = re.sub(r'[\s\-_]+', '/', normalized)
|
|
194
|
+
# Usuń podwójne /
|
|
195
|
+
normalized = re.sub(r'/+', '/', normalized)
|
|
196
|
+
# Usuń / na początku i końcu
|
|
197
|
+
normalized = normalized.strip('/')
|
|
198
|
+
|
|
199
|
+
return normalized
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
class DocumentIDGenerator:
|
|
203
|
+
"""
|
|
204
|
+
Generator deterministycznych identyfikatorów dokumentów.
|
|
205
|
+
|
|
206
|
+
Generuje zawsze ten sam ID dla tych samych danych biznesowych,
|
|
207
|
+
niezależnie od formatu źródłowego dokumentu.
|
|
208
|
+
"""
|
|
209
|
+
|
|
210
|
+
def __init__(self, prefix: str = "EXEF"):
|
|
211
|
+
"""
|
|
212
|
+
Args:
|
|
213
|
+
prefix: Prefiks identyfikatora (domyślnie EXEF)
|
|
214
|
+
"""
|
|
215
|
+
self.prefix = prefix
|
|
216
|
+
|
|
217
|
+
def generate_invoice_id(
|
|
218
|
+
self,
|
|
219
|
+
seller_nip: str,
|
|
220
|
+
invoice_number: str,
|
|
221
|
+
issue_date: Union[str, date],
|
|
222
|
+
gross_amount: Union[str, float, Decimal],
|
|
223
|
+
buyer_nip: Optional[str] = None
|
|
224
|
+
) -> str:
|
|
225
|
+
"""
|
|
226
|
+
Generuje ID dla faktury VAT.
|
|
227
|
+
|
|
228
|
+
Pola kanoniczne: NIP sprzedawcy | Numer faktury | Data wystawienia | Kwota brutto
|
|
229
|
+
|
|
230
|
+
>>> gen = DocumentIDGenerator()
|
|
231
|
+
>>> gen.generate_invoice_id("5213017228", "FV/2025/00142", "2025-01-15", 1230.00)
|
|
232
|
+
'EXEF-FV-A7B3C9D2E1F04856'
|
|
233
|
+
"""
|
|
234
|
+
canonical = CanonicalData(
|
|
235
|
+
document_type=DocumentType.INVOICE,
|
|
236
|
+
canonical_string="|".join([
|
|
237
|
+
NIPValidator.normalize(seller_nip),
|
|
238
|
+
InvoiceNumberNormalizer.normalize(invoice_number),
|
|
239
|
+
DateNormalizer.normalize(issue_date),
|
|
240
|
+
AmountNormalizer.normalize(gross_amount),
|
|
241
|
+
]),
|
|
242
|
+
raw_fields={
|
|
243
|
+
'seller_nip': seller_nip,
|
|
244
|
+
'invoice_number': invoice_number,
|
|
245
|
+
'issue_date': issue_date,
|
|
246
|
+
'gross_amount': gross_amount,
|
|
247
|
+
'buyer_nip': buyer_nip,
|
|
248
|
+
}
|
|
249
|
+
)
|
|
250
|
+
return self._generate_id(canonical)
|
|
251
|
+
|
|
252
|
+
def generate_receipt_id(
|
|
253
|
+
self,
|
|
254
|
+
seller_nip: str,
|
|
255
|
+
receipt_date: Union[str, date],
|
|
256
|
+
gross_amount: Union[str, float, Decimal],
|
|
257
|
+
receipt_number: Optional[str] = None,
|
|
258
|
+
cash_register_number: Optional[str] = None,
|
|
259
|
+
) -> str:
|
|
260
|
+
"""
|
|
261
|
+
Generuje ID dla paragonu fiskalnego.
|
|
262
|
+
|
|
263
|
+
Paragony są trudniejsze - nie mają unikalnego numeru.
|
|
264
|
+
Używamy: NIP sprzedawcy | Data | Kwota | Numer kasy (jeśli dostępny)
|
|
265
|
+
|
|
266
|
+
>>> gen = DocumentIDGenerator()
|
|
267
|
+
>>> gen.generate_receipt_id("5213017228", "2025-01-15", 45.99)
|
|
268
|
+
'EXEF-PAR-...'
|
|
269
|
+
"""
|
|
270
|
+
parts = [
|
|
271
|
+
NIPValidator.normalize(seller_nip),
|
|
272
|
+
DateNormalizer.normalize(receipt_date),
|
|
273
|
+
AmountNormalizer.normalize(gross_amount),
|
|
274
|
+
]
|
|
275
|
+
|
|
276
|
+
# Dodaj numer paragonu lub kasy jeśli dostępny
|
|
277
|
+
if receipt_number:
|
|
278
|
+
parts.append(receipt_number.strip().upper())
|
|
279
|
+
if cash_register_number:
|
|
280
|
+
parts.append(cash_register_number.strip().upper())
|
|
281
|
+
|
|
282
|
+
canonical = CanonicalData(
|
|
283
|
+
document_type=DocumentType.RECEIPT,
|
|
284
|
+
canonical_string="|".join(parts),
|
|
285
|
+
raw_fields={
|
|
286
|
+
'seller_nip': seller_nip,
|
|
287
|
+
'receipt_date': receipt_date,
|
|
288
|
+
'gross_amount': gross_amount,
|
|
289
|
+
'receipt_number': receipt_number,
|
|
290
|
+
'cash_register_number': cash_register_number,
|
|
291
|
+
}
|
|
292
|
+
)
|
|
293
|
+
return self._generate_id(canonical)
|
|
294
|
+
|
|
295
|
+
def generate_contract_id(
|
|
296
|
+
self,
|
|
297
|
+
party1_nip: str,
|
|
298
|
+
party2_nip: str,
|
|
299
|
+
contract_date: Union[str, date],
|
|
300
|
+
contract_number: Optional[str] = None,
|
|
301
|
+
contract_type: Optional[str] = None,
|
|
302
|
+
) -> str:
|
|
303
|
+
"""
|
|
304
|
+
Generuje ID dla umowy.
|
|
305
|
+
|
|
306
|
+
Pola kanoniczne: NIP strona 1 | NIP strona 2 (posortowane) | Data | Numer umowy
|
|
307
|
+
|
|
308
|
+
NIP-y są sortowane alfabetycznie, żeby kolejność stron nie wpływała na ID.
|
|
309
|
+
"""
|
|
310
|
+
nips = sorted([
|
|
311
|
+
NIPValidator.normalize(party1_nip),
|
|
312
|
+
NIPValidator.normalize(party2_nip)
|
|
313
|
+
])
|
|
314
|
+
|
|
315
|
+
parts = [
|
|
316
|
+
nips[0],
|
|
317
|
+
nips[1],
|
|
318
|
+
DateNormalizer.normalize(contract_date),
|
|
319
|
+
]
|
|
320
|
+
|
|
321
|
+
if contract_number:
|
|
322
|
+
parts.append(contract_number.strip().upper())
|
|
323
|
+
if contract_type:
|
|
324
|
+
parts.append(contract_type.strip().upper())
|
|
325
|
+
|
|
326
|
+
canonical = CanonicalData(
|
|
327
|
+
document_type=DocumentType.CONTRACT,
|
|
328
|
+
canonical_string="|".join(parts),
|
|
329
|
+
raw_fields={
|
|
330
|
+
'party1_nip': party1_nip,
|
|
331
|
+
'party2_nip': party2_nip,
|
|
332
|
+
'contract_date': contract_date,
|
|
333
|
+
'contract_number': contract_number,
|
|
334
|
+
'contract_type': contract_type,
|
|
335
|
+
}
|
|
336
|
+
)
|
|
337
|
+
return self._generate_id(canonical)
|
|
338
|
+
|
|
339
|
+
def generate_bank_statement_id(
|
|
340
|
+
self,
|
|
341
|
+
account_number: str,
|
|
342
|
+
statement_date: Union[str, date],
|
|
343
|
+
statement_number: Optional[str] = None,
|
|
344
|
+
) -> str:
|
|
345
|
+
"""
|
|
346
|
+
Generuje ID dla wyciągu bankowego.
|
|
347
|
+
|
|
348
|
+
Pola kanoniczne: Numer konta (26 cyfr) | Data | Numer wyciągu
|
|
349
|
+
"""
|
|
350
|
+
# Normalizuj numer konta - tylko cyfry
|
|
351
|
+
account = re.sub(r'[\s\-]', '', account_number)
|
|
352
|
+
|
|
353
|
+
parts = [
|
|
354
|
+
account,
|
|
355
|
+
DateNormalizer.normalize(statement_date),
|
|
356
|
+
]
|
|
357
|
+
|
|
358
|
+
if statement_number:
|
|
359
|
+
parts.append(statement_number.strip())
|
|
360
|
+
|
|
361
|
+
canonical = CanonicalData(
|
|
362
|
+
document_type=DocumentType.BANK_STATEMENT,
|
|
363
|
+
canonical_string="|".join(parts),
|
|
364
|
+
raw_fields={
|
|
365
|
+
'account_number': account_number,
|
|
366
|
+
'statement_date': statement_date,
|
|
367
|
+
'statement_number': statement_number,
|
|
368
|
+
}
|
|
369
|
+
)
|
|
370
|
+
return self._generate_id(canonical)
|
|
371
|
+
|
|
372
|
+
def generate_correction_id(
|
|
373
|
+
self,
|
|
374
|
+
seller_nip: str,
|
|
375
|
+
correction_number: str,
|
|
376
|
+
issue_date: Union[str, date],
|
|
377
|
+
original_invoice_number: str,
|
|
378
|
+
gross_amount: Union[str, float, Decimal],
|
|
379
|
+
) -> str:
|
|
380
|
+
"""
|
|
381
|
+
Generuje ID dla faktury korygującej.
|
|
382
|
+
|
|
383
|
+
Pola kanoniczne: NIP | Numer korekty | Data | Numer oryginału | Kwota
|
|
384
|
+
"""
|
|
385
|
+
canonical = CanonicalData(
|
|
386
|
+
document_type=DocumentType.CORRECTION,
|
|
387
|
+
canonical_string="|".join([
|
|
388
|
+
NIPValidator.normalize(seller_nip),
|
|
389
|
+
InvoiceNumberNormalizer.normalize(correction_number),
|
|
390
|
+
DateNormalizer.normalize(issue_date),
|
|
391
|
+
InvoiceNumberNormalizer.normalize(original_invoice_number),
|
|
392
|
+
AmountNormalizer.normalize(gross_amount),
|
|
393
|
+
]),
|
|
394
|
+
raw_fields={
|
|
395
|
+
'seller_nip': seller_nip,
|
|
396
|
+
'correction_number': correction_number,
|
|
397
|
+
'issue_date': issue_date,
|
|
398
|
+
'original_invoice_number': original_invoice_number,
|
|
399
|
+
'gross_amount': gross_amount,
|
|
400
|
+
}
|
|
401
|
+
)
|
|
402
|
+
return self._generate_id(canonical)
|
|
403
|
+
|
|
404
|
+
def generate_cash_receipt_id(
|
|
405
|
+
self,
|
|
406
|
+
document_number: str,
|
|
407
|
+
document_date: Union[str, date],
|
|
408
|
+
amount: Union[str, float, Decimal],
|
|
409
|
+
issuer_nip: Optional[str] = None,
|
|
410
|
+
payer_name: Optional[str] = None,
|
|
411
|
+
) -> str:
|
|
412
|
+
"""
|
|
413
|
+
Generuje ID dla dokumentu KP (Kasa Przyjmie - dowód wpłaty).
|
|
414
|
+
|
|
415
|
+
Pola kanoniczne: Numer dokumentu | Data | Kwota | NIP wystawcy
|
|
416
|
+
"""
|
|
417
|
+
parts = [
|
|
418
|
+
document_number.strip().upper(),
|
|
419
|
+
DateNormalizer.normalize(document_date),
|
|
420
|
+
AmountNormalizer.normalize(amount),
|
|
421
|
+
]
|
|
422
|
+
|
|
423
|
+
if issuer_nip:
|
|
424
|
+
parts.append(NIPValidator.normalize(issuer_nip))
|
|
425
|
+
if payer_name:
|
|
426
|
+
# Hash nazwy płatnika dla prywatności
|
|
427
|
+
name_hash = hashlib.md5(payer_name.strip().upper().encode()).hexdigest()[:8]
|
|
428
|
+
parts.append(name_hash)
|
|
429
|
+
|
|
430
|
+
canonical = CanonicalData(
|
|
431
|
+
document_type=DocumentType.CASH_IN,
|
|
432
|
+
canonical_string="|".join(parts),
|
|
433
|
+
raw_fields={
|
|
434
|
+
'document_number': document_number,
|
|
435
|
+
'document_date': document_date,
|
|
436
|
+
'amount': amount,
|
|
437
|
+
'issuer_nip': issuer_nip,
|
|
438
|
+
'payer_name': payer_name,
|
|
439
|
+
}
|
|
440
|
+
)
|
|
441
|
+
return self._generate_id(canonical)
|
|
442
|
+
|
|
443
|
+
def generate_cash_disbursement_id(
|
|
444
|
+
self,
|
|
445
|
+
document_number: str,
|
|
446
|
+
document_date: Union[str, date],
|
|
447
|
+
amount: Union[str, float, Decimal],
|
|
448
|
+
issuer_nip: Optional[str] = None,
|
|
449
|
+
recipient_name: Optional[str] = None,
|
|
450
|
+
) -> str:
|
|
451
|
+
"""
|
|
452
|
+
Generuje ID dla dokumentu KW (Kasa Wyda - dowód wypłaty).
|
|
453
|
+
|
|
454
|
+
Pola kanoniczne: Numer dokumentu | Data | Kwota | NIP wystawcy
|
|
455
|
+
"""
|
|
456
|
+
parts = [
|
|
457
|
+
document_number.strip().upper(),
|
|
458
|
+
DateNormalizer.normalize(document_date),
|
|
459
|
+
AmountNormalizer.normalize(amount),
|
|
460
|
+
]
|
|
461
|
+
|
|
462
|
+
if issuer_nip:
|
|
463
|
+
parts.append(NIPValidator.normalize(issuer_nip))
|
|
464
|
+
if recipient_name:
|
|
465
|
+
name_hash = hashlib.md5(recipient_name.strip().upper().encode()).hexdigest()[:8]
|
|
466
|
+
parts.append(name_hash)
|
|
467
|
+
|
|
468
|
+
canonical = CanonicalData(
|
|
469
|
+
document_type=DocumentType.CASH_OUT,
|
|
470
|
+
canonical_string="|".join(parts),
|
|
471
|
+
raw_fields={
|
|
472
|
+
'document_number': document_number,
|
|
473
|
+
'document_date': document_date,
|
|
474
|
+
'amount': amount,
|
|
475
|
+
'issuer_nip': issuer_nip,
|
|
476
|
+
'recipient_name': recipient_name,
|
|
477
|
+
}
|
|
478
|
+
)
|
|
479
|
+
return self._generate_id(canonical)
|
|
480
|
+
|
|
481
|
+
def generate_bill_id(
|
|
482
|
+
self,
|
|
483
|
+
issuer_nip: str,
|
|
484
|
+
bill_number: str,
|
|
485
|
+
issue_date: Union[str, date],
|
|
486
|
+
gross_amount: Union[str, float, Decimal],
|
|
487
|
+
) -> str:
|
|
488
|
+
"""
|
|
489
|
+
Generuje ID dla rachunku (bez VAT).
|
|
490
|
+
|
|
491
|
+
Pola kanoniczne: NIP wystawcy | Numer | Data | Kwota
|
|
492
|
+
Identyczne jak faktura, ale z innym typem dokumentu.
|
|
493
|
+
"""
|
|
494
|
+
canonical = CanonicalData(
|
|
495
|
+
document_type=DocumentType.BILL,
|
|
496
|
+
canonical_string="|".join([
|
|
497
|
+
NIPValidator.normalize(issuer_nip),
|
|
498
|
+
InvoiceNumberNormalizer.normalize(bill_number),
|
|
499
|
+
DateNormalizer.normalize(issue_date),
|
|
500
|
+
AmountNormalizer.normalize(gross_amount),
|
|
501
|
+
]),
|
|
502
|
+
raw_fields={
|
|
503
|
+
'issuer_nip': issuer_nip,
|
|
504
|
+
'bill_number': bill_number,
|
|
505
|
+
'issue_date': issue_date,
|
|
506
|
+
'gross_amount': gross_amount,
|
|
507
|
+
}
|
|
508
|
+
)
|
|
509
|
+
return self._generate_id(canonical)
|
|
510
|
+
|
|
511
|
+
def generate_debit_note_id(
|
|
512
|
+
self,
|
|
513
|
+
issuer_nip: str,
|
|
514
|
+
note_number: str,
|
|
515
|
+
issue_date: Union[str, date],
|
|
516
|
+
amount: Union[str, float, Decimal],
|
|
517
|
+
recipient_nip: Optional[str] = None,
|
|
518
|
+
) -> str:
|
|
519
|
+
"""
|
|
520
|
+
Generuje ID dla noty księgowej (obciążeniowej/uznaniowej).
|
|
521
|
+
|
|
522
|
+
Pola kanoniczne: NIP wystawcy | Numer noty | Data | Kwota
|
|
523
|
+
"""
|
|
524
|
+
parts = [
|
|
525
|
+
NIPValidator.normalize(issuer_nip),
|
|
526
|
+
note_number.strip().upper(),
|
|
527
|
+
DateNormalizer.normalize(issue_date),
|
|
528
|
+
AmountNormalizer.normalize(amount),
|
|
529
|
+
]
|
|
530
|
+
|
|
531
|
+
if recipient_nip:
|
|
532
|
+
parts.append(NIPValidator.normalize(recipient_nip))
|
|
533
|
+
|
|
534
|
+
canonical = CanonicalData(
|
|
535
|
+
document_type=DocumentType.DEBIT_NOTE,
|
|
536
|
+
canonical_string="|".join(parts),
|
|
537
|
+
raw_fields={
|
|
538
|
+
'issuer_nip': issuer_nip,
|
|
539
|
+
'note_number': note_number,
|
|
540
|
+
'issue_date': issue_date,
|
|
541
|
+
'amount': amount,
|
|
542
|
+
'recipient_nip': recipient_nip,
|
|
543
|
+
}
|
|
544
|
+
)
|
|
545
|
+
return self._generate_id(canonical)
|
|
546
|
+
|
|
547
|
+
def generate_delivery_note_id(
|
|
548
|
+
self,
|
|
549
|
+
issuer_nip: str,
|
|
550
|
+
document_number: str,
|
|
551
|
+
issue_date: Union[str, date],
|
|
552
|
+
recipient_nip: Optional[str] = None,
|
|
553
|
+
) -> str:
|
|
554
|
+
"""
|
|
555
|
+
Generuje ID dla dokumentu WZ (Wydanie Zewnętrzne).
|
|
556
|
+
|
|
557
|
+
Pola kanoniczne: NIP wystawcy | Numer WZ | Data | NIP odbiorcy
|
|
558
|
+
"""
|
|
559
|
+
parts = [
|
|
560
|
+
NIPValidator.normalize(issuer_nip),
|
|
561
|
+
document_number.strip().upper(),
|
|
562
|
+
DateNormalizer.normalize(issue_date),
|
|
563
|
+
]
|
|
564
|
+
|
|
565
|
+
if recipient_nip:
|
|
566
|
+
parts.append(NIPValidator.normalize(recipient_nip))
|
|
567
|
+
|
|
568
|
+
canonical = CanonicalData(
|
|
569
|
+
document_type=DocumentType.DELIVERY_NOTE,
|
|
570
|
+
canonical_string="|".join(parts),
|
|
571
|
+
raw_fields={
|
|
572
|
+
'issuer_nip': issuer_nip,
|
|
573
|
+
'document_number': document_number,
|
|
574
|
+
'issue_date': issue_date,
|
|
575
|
+
'recipient_nip': recipient_nip,
|
|
576
|
+
}
|
|
577
|
+
)
|
|
578
|
+
return self._generate_id(canonical)
|
|
579
|
+
|
|
580
|
+
def generate_expense_report_id(
|
|
581
|
+
self,
|
|
582
|
+
employee_id: str,
|
|
583
|
+
report_date: Union[str, date],
|
|
584
|
+
total_amount: Union[str, float, Decimal],
|
|
585
|
+
report_number: Optional[str] = None,
|
|
586
|
+
company_nip: Optional[str] = None,
|
|
587
|
+
) -> str:
|
|
588
|
+
"""
|
|
589
|
+
Generuje ID dla delegacji / rozliczenia kosztów.
|
|
590
|
+
|
|
591
|
+
Pola kanoniczne: ID pracownika | Data | Kwota | Numer
|
|
592
|
+
"""
|
|
593
|
+
parts = [
|
|
594
|
+
employee_id.strip().upper(),
|
|
595
|
+
DateNormalizer.normalize(report_date),
|
|
596
|
+
AmountNormalizer.normalize(total_amount),
|
|
597
|
+
]
|
|
598
|
+
|
|
599
|
+
if report_number:
|
|
600
|
+
parts.append(report_number.strip().upper())
|
|
601
|
+
if company_nip:
|
|
602
|
+
parts.append(NIPValidator.normalize(company_nip))
|
|
603
|
+
|
|
604
|
+
canonical = CanonicalData(
|
|
605
|
+
document_type=DocumentType.EXPENSE_REPORT,
|
|
606
|
+
canonical_string="|".join(parts),
|
|
607
|
+
raw_fields={
|
|
608
|
+
'employee_id': employee_id,
|
|
609
|
+
'report_date': report_date,
|
|
610
|
+
'total_amount': total_amount,
|
|
611
|
+
'report_number': report_number,
|
|
612
|
+
'company_nip': company_nip,
|
|
613
|
+
}
|
|
614
|
+
)
|
|
615
|
+
return self._generate_id(canonical)
|
|
616
|
+
|
|
617
|
+
def generate_generic_id(
|
|
618
|
+
self,
|
|
619
|
+
document_type: DocumentType,
|
|
620
|
+
content_hash: str,
|
|
621
|
+
document_date: Optional[Union[str, date]] = None,
|
|
622
|
+
issuer_nip: Optional[str] = None,
|
|
623
|
+
) -> str:
|
|
624
|
+
"""
|
|
625
|
+
Generuje ID dla dokumentu o nieznanym typie.
|
|
626
|
+
|
|
627
|
+
Wymaga podania hasha treści (np. z OCR).
|
|
628
|
+
"""
|
|
629
|
+
parts = [content_hash[:64]] # Maksymalnie 64 znaki hasha
|
|
630
|
+
|
|
631
|
+
if document_date:
|
|
632
|
+
parts.append(DateNormalizer.normalize(document_date))
|
|
633
|
+
if issuer_nip:
|
|
634
|
+
parts.append(NIPValidator.normalize(issuer_nip))
|
|
635
|
+
|
|
636
|
+
canonical = CanonicalData(
|
|
637
|
+
document_type=document_type,
|
|
638
|
+
canonical_string="|".join(parts),
|
|
639
|
+
raw_fields={
|
|
640
|
+
'content_hash': content_hash,
|
|
641
|
+
'document_date': document_date,
|
|
642
|
+
'issuer_nip': issuer_nip,
|
|
643
|
+
}
|
|
644
|
+
)
|
|
645
|
+
return self._generate_id(canonical)
|
|
646
|
+
|
|
647
|
+
def _generate_id(self, canonical: CanonicalData) -> str:
|
|
648
|
+
"""
|
|
649
|
+
Generuje finalny identyfikator z danych kanonicznych.
|
|
650
|
+
|
|
651
|
+
Format: {PREFIX}-{TYPE}-{HASH16}
|
|
652
|
+
Przykład: EXEF-FV-A7B3C9D2E1F04856
|
|
653
|
+
"""
|
|
654
|
+
# SHA256 z canonical string
|
|
655
|
+
hash_bytes = hashlib.sha256(canonical.canonical_string.encode('utf-8')).digest()
|
|
656
|
+
hash_hex = hash_bytes.hex()[:16].upper()
|
|
657
|
+
|
|
658
|
+
return f"{self.prefix}-{canonical.document_type.value}-{hash_hex}"
|
|
659
|
+
|
|
660
|
+
def verify_id(self, document_id: str, canonical_string: str) -> bool:
|
|
661
|
+
"""
|
|
662
|
+
Weryfikuje czy ID odpowiada danym kanonicznym.
|
|
663
|
+
|
|
664
|
+
>>> gen = DocumentIDGenerator()
|
|
665
|
+
>>> gen.verify_id("EXEF-FV-A7B3C9D2E1F04856", "5213017228|FV/2025/00142|2025-01-15|1230.00")
|
|
666
|
+
True
|
|
667
|
+
"""
|
|
668
|
+
hash_bytes = hashlib.sha256(canonical_string.encode('utf-8')).digest()
|
|
669
|
+
expected_hash = hash_bytes.hex()[:16].upper()
|
|
670
|
+
|
|
671
|
+
parts = document_id.split('-')
|
|
672
|
+
if len(parts) != 3:
|
|
673
|
+
return False
|
|
674
|
+
|
|
675
|
+
return parts[2] == expected_hash
|
|
676
|
+
|
|
677
|
+
@staticmethod
|
|
678
|
+
def parse_id(document_id: str) -> dict:
|
|
679
|
+
"""
|
|
680
|
+
Parsuje identyfikator dokumentu.
|
|
681
|
+
|
|
682
|
+
>>> DocumentIDGenerator.parse_id("EXEF-FV-A7B3C9D2E1F04856")
|
|
683
|
+
{'prefix': 'EXEF', 'type': 'FV', 'hash': 'A7B3C9D2E1F04856',
|
|
684
|
+
'document_type': <DocumentType.INVOICE>}
|
|
685
|
+
"""
|
|
686
|
+
parts = document_id.split('-')
|
|
687
|
+
if len(parts) != 3:
|
|
688
|
+
raise ValueError(f"Invalid document ID format: {document_id}")
|
|
689
|
+
|
|
690
|
+
prefix, type_code, hash_value = parts
|
|
691
|
+
|
|
692
|
+
# Znajdź typ dokumentu
|
|
693
|
+
doc_type = None
|
|
694
|
+
for dt in DocumentType:
|
|
695
|
+
if dt.value == type_code:
|
|
696
|
+
doc_type = dt
|
|
697
|
+
break
|
|
698
|
+
|
|
699
|
+
return {
|
|
700
|
+
'prefix': prefix,
|
|
701
|
+
'type': type_code,
|
|
702
|
+
'hash': hash_value,
|
|
703
|
+
'document_type': doc_type,
|
|
704
|
+
}
|
|
705
|
+
|
|
706
|
+
|
|
707
|
+
# Singleton dla wygody
|
|
708
|
+
_default_generator = DocumentIDGenerator()
|
|
709
|
+
|
|
710
|
+
def generate_invoice_id(*args, **kwargs) -> str:
|
|
711
|
+
"""Skrót do DocumentIDGenerator().generate_invoice_id()"""
|
|
712
|
+
return _default_generator.generate_invoice_id(*args, **kwargs)
|
|
713
|
+
|
|
714
|
+
def generate_receipt_id(*args, **kwargs) -> str:
|
|
715
|
+
"""Skrót do DocumentIDGenerator().generate_receipt_id()"""
|
|
716
|
+
return _default_generator.generate_receipt_id(*args, **kwargs)
|
|
717
|
+
|
|
718
|
+
def generate_contract_id(*args, **kwargs) -> str:
|
|
719
|
+
"""Skrót do DocumentIDGenerator().generate_contract_id()"""
|
|
720
|
+
return _default_generator.generate_contract_id(*args, **kwargs)
|