docid 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,720 @@
1
+ """
2
+ Deterministyczny generator identyfikatorów dokumentów.
3
+
4
+ Generuje zawsze ten sam identyfikator dla tego samego dokumentu,
5
+ niezależnie od formatu źródłowego (skan, PDF, KSeF XML).
6
+ """
7
+
8
+ import hashlib
9
+ import re
10
+ import uuid
11
+ from dataclasses import dataclass
12
+ from datetime import date, datetime
13
+ from decimal import ROUND_HALF_UP, Decimal
14
+ from enum import Enum
15
+ from typing import Optional, Union
16
+
17
+ # Namespace UUID dla EXEF (RFC 4122 UUID v5)
18
+ EXEF_NAMESPACE = uuid.UUID('a1b2c3d4-e5f6-7890-abcd-ef1234567890')
19
+
20
+
21
+ class DocumentType(Enum):
22
+ """Typy dokumentów obsługiwane przez system."""
23
+ INVOICE = "FV" # Faktura VAT
24
+ RECEIPT = "PAR" # Paragon fiskalny
25
+ CONTRACT = "UMO" # Umowa
26
+ BANK_STATEMENT = "WB" # Wyciąg bankowy
27
+ CORRECTION = "KOR" # Faktura korygująca
28
+ PROFORMA = "PRO" # Faktura proforma
29
+ ADVANCE = "ZAL" # Faktura zaliczkowa
30
+ BILL = "RAC" # Rachunek (bez VAT)
31
+ CASH_IN = "KP" # Kasa Przyjmie (dowód wpłaty)
32
+ CASH_OUT = "KW" # Kasa Wyda (dowód wypłaty)
33
+ DEBIT_NOTE = "NK" # Nota księgowa
34
+ DELIVERY_NOTE = "WZ" # Wydanie zewnętrzne
35
+ RECEIPT_NOTE = "PZ" # Przyjęcie zewnętrzne
36
+ EXPENSE_REPORT = "DEL" # Delegacja / rozliczenie kosztów
37
+ OTHER = "DOC" # Inny dokument
38
+
39
+
40
+ @dataclass
41
+ class CanonicalData:
42
+ """Kanoniczne dane dokumentu do generowania ID."""
43
+ document_type: DocumentType
44
+ canonical_string: str
45
+
46
+ # Opcjonalne dane źródłowe dla debugowania
47
+ raw_fields: Optional[dict] = None
48
+
49
+
50
+ class NIPValidator:
51
+ """Walidator i normalizator NIP."""
52
+
53
+ @staticmethod
54
+ def normalize(nip: str) -> str:
55
+ """
56
+ Normalizuje NIP do formatu 10 cyfr bez separatorów.
57
+
58
+ >>> NIPValidator.normalize("521-301-72-28")
59
+ '5213017228'
60
+ >>> NIPValidator.normalize("PL 521 301 72 28")
61
+ '5213017228'
62
+ """
63
+ if not nip:
64
+ return ""
65
+ # Usuń prefiks kraju, spacje, myślniki
66
+ cleaned = re.sub(r'^[A-Z]{2}', '', nip.upper())
67
+ cleaned = re.sub(r'[\s\-\.]', '', cleaned)
68
+ return cleaned
69
+
70
+ @staticmethod
71
+ def validate(nip: str) -> bool:
72
+ """
73
+ Waliduje NIP według algorytmu kontrolnego.
74
+
75
+ >>> NIPValidator.validate("5213017228")
76
+ True
77
+ """
78
+ nip = NIPValidator.normalize(nip)
79
+ if len(nip) != 10 or not nip.isdigit():
80
+ return False
81
+
82
+ weights = [6, 5, 7, 2, 3, 4, 5, 6, 7]
83
+ checksum = sum(int(nip[i]) * weights[i] for i in range(9))
84
+ return checksum % 11 == int(nip[9])
85
+
86
+
87
+ class AmountNormalizer:
88
+ """Normalizator kwot pieniężnych."""
89
+
90
+ @staticmethod
91
+ def normalize(amount: Union[str, float, Decimal]) -> str:
92
+ """
93
+ Normalizuje kwotę do formatu z 2 miejscami po przecinku.
94
+
95
+ >>> AmountNormalizer.normalize("1 230,50 zł")
96
+ '1230.50'
97
+ >>> AmountNormalizer.normalize(1230.5)
98
+ '1230.50'
99
+ """
100
+ if isinstance(amount, (int, float)):
101
+ return f"{Decimal(str(amount)).quantize(Decimal('0.01'), rounding=ROUND_HALF_UP)}"
102
+
103
+ # Parsowanie stringa
104
+ cleaned = str(amount).upper()
105
+ # Usuń walutę i spacje
106
+ cleaned = re.sub(r'[ZŁPLN\s]', '', cleaned)
107
+ # Zamień przecinek na kropkę
108
+ cleaned = cleaned.replace(',', '.')
109
+ # Usuń separatory tysięcy (spacje lub kropki przed ostatnią kropką)
110
+ parts = cleaned.rsplit('.', 1)
111
+ if len(parts) == 2:
112
+ integer_part = re.sub(r'[\.\s]', '', parts[0])
113
+ decimal_part = parts[1]
114
+ cleaned = f"{integer_part}.{decimal_part}"
115
+ else:
116
+ cleaned = re.sub(r'[\.\s]', '', cleaned)
117
+
118
+ try:
119
+ decimal_val = Decimal(cleaned).quantize(Decimal('0.01'), rounding=ROUND_HALF_UP)
120
+ return str(decimal_val)
121
+ except Exception:
122
+ return "0.00"
123
+
124
+
125
+ class DateNormalizer:
126
+ """Normalizator dat."""
127
+
128
+ FORMATS = [
129
+ '%Y-%m-%d', # 2025-01-15
130
+ '%d-%m-%Y', # 15-01-2025
131
+ '%d.%m.%Y', # 15.01.2025
132
+ '%d/%m/%Y', # 15/01/2025
133
+ '%Y/%m/%d', # 2025/01/15
134
+ '%d %m %Y', # 15 01 2025
135
+ '%Y%m%d', # 20250115
136
+ ]
137
+
138
+ @staticmethod
139
+ def normalize(date_str: Union[str, date, datetime]) -> str:
140
+ """
141
+ Normalizuje datę do formatu ISO YYYY-MM-DD.
142
+
143
+ >>> DateNormalizer.normalize("15.01.2025")
144
+ '2025-01-15'
145
+ >>> DateNormalizer.normalize("2025-01-15")
146
+ '2025-01-15'
147
+ """
148
+ if isinstance(date_str, datetime):
149
+ return date_str.strftime('%Y-%m-%d')
150
+ if isinstance(date_str, date):
151
+ return date_str.strftime('%Y-%m-%d')
152
+
153
+ cleaned = str(date_str).strip()
154
+
155
+ for fmt in DateNormalizer.FORMATS:
156
+ try:
157
+ parsed = datetime.strptime(cleaned, fmt)
158
+ return parsed.strftime('%Y-%m-%d')
159
+ except ValueError:
160
+ continue
161
+
162
+ # Fallback - spróbuj wyciągnąć cyfry
163
+ digits = re.findall(r'\d+', cleaned)
164
+ if len(digits) >= 3:
165
+ # Zgaduj format na podstawie wartości
166
+ if len(digits[0]) == 4: # Rok pierwszy
167
+ return f"{digits[0]}-{digits[1].zfill(2)}-{digits[2].zfill(2)}"
168
+ elif len(digits[2]) == 4: # Rok ostatni
169
+ return f"{digits[2]}-{digits[1].zfill(2)}-{digits[0].zfill(2)}"
170
+
171
+ return cleaned # Zwróć oryginał jeśli nie można sparsować
172
+
173
+
174
+ class InvoiceNumberNormalizer:
175
+ """Normalizator numerów faktur."""
176
+
177
+ @staticmethod
178
+ def normalize(number: str) -> str:
179
+ """
180
+ Normalizuje numer faktury.
181
+
182
+ >>> InvoiceNumberNormalizer.normalize("fv/2025/00142")
183
+ 'FV/2025/00142'
184
+ >>> InvoiceNumberNormalizer.normalize("FV 2025 142")
185
+ 'FV/2025/142'
186
+ """
187
+ if not number:
188
+ return ""
189
+
190
+ # Uppercase
191
+ normalized = number.upper().strip()
192
+ # Zamień różne separatory na /
193
+ normalized = re.sub(r'[\s\-_]+', '/', normalized)
194
+ # Usuń podwójne /
195
+ normalized = re.sub(r'/+', '/', normalized)
196
+ # Usuń / na początku i końcu
197
+ normalized = normalized.strip('/')
198
+
199
+ return normalized
200
+
201
+
202
+ class DocumentIDGenerator:
203
+ """
204
+ Generator deterministycznych identyfikatorów dokumentów.
205
+
206
+ Generuje zawsze ten sam ID dla tych samych danych biznesowych,
207
+ niezależnie od formatu źródłowego dokumentu.
208
+ """
209
+
210
+ def __init__(self, prefix: str = "EXEF"):
211
+ """
212
+ Args:
213
+ prefix: Prefiks identyfikatora (domyślnie EXEF)
214
+ """
215
+ self.prefix = prefix
216
+
217
+ def generate_invoice_id(
218
+ self,
219
+ seller_nip: str,
220
+ invoice_number: str,
221
+ issue_date: Union[str, date],
222
+ gross_amount: Union[str, float, Decimal],
223
+ buyer_nip: Optional[str] = None
224
+ ) -> str:
225
+ """
226
+ Generuje ID dla faktury VAT.
227
+
228
+ Pola kanoniczne: NIP sprzedawcy | Numer faktury | Data wystawienia | Kwota brutto
229
+
230
+ >>> gen = DocumentIDGenerator()
231
+ >>> gen.generate_invoice_id("5213017228", "FV/2025/00142", "2025-01-15", 1230.00)
232
+ 'EXEF-FV-A7B3C9D2E1F04856'
233
+ """
234
+ canonical = CanonicalData(
235
+ document_type=DocumentType.INVOICE,
236
+ canonical_string="|".join([
237
+ NIPValidator.normalize(seller_nip),
238
+ InvoiceNumberNormalizer.normalize(invoice_number),
239
+ DateNormalizer.normalize(issue_date),
240
+ AmountNormalizer.normalize(gross_amount),
241
+ ]),
242
+ raw_fields={
243
+ 'seller_nip': seller_nip,
244
+ 'invoice_number': invoice_number,
245
+ 'issue_date': issue_date,
246
+ 'gross_amount': gross_amount,
247
+ 'buyer_nip': buyer_nip,
248
+ }
249
+ )
250
+ return self._generate_id(canonical)
251
+
252
+ def generate_receipt_id(
253
+ self,
254
+ seller_nip: str,
255
+ receipt_date: Union[str, date],
256
+ gross_amount: Union[str, float, Decimal],
257
+ receipt_number: Optional[str] = None,
258
+ cash_register_number: Optional[str] = None,
259
+ ) -> str:
260
+ """
261
+ Generuje ID dla paragonu fiskalnego.
262
+
263
+ Paragony są trudniejsze - nie mają unikalnego numeru.
264
+ Używamy: NIP sprzedawcy | Data | Kwota | Numer kasy (jeśli dostępny)
265
+
266
+ >>> gen = DocumentIDGenerator()
267
+ >>> gen.generate_receipt_id("5213017228", "2025-01-15", 45.99)
268
+ 'EXEF-PAR-...'
269
+ """
270
+ parts = [
271
+ NIPValidator.normalize(seller_nip),
272
+ DateNormalizer.normalize(receipt_date),
273
+ AmountNormalizer.normalize(gross_amount),
274
+ ]
275
+
276
+ # Dodaj numer paragonu lub kasy jeśli dostępny
277
+ if receipt_number:
278
+ parts.append(receipt_number.strip().upper())
279
+ if cash_register_number:
280
+ parts.append(cash_register_number.strip().upper())
281
+
282
+ canonical = CanonicalData(
283
+ document_type=DocumentType.RECEIPT,
284
+ canonical_string="|".join(parts),
285
+ raw_fields={
286
+ 'seller_nip': seller_nip,
287
+ 'receipt_date': receipt_date,
288
+ 'gross_amount': gross_amount,
289
+ 'receipt_number': receipt_number,
290
+ 'cash_register_number': cash_register_number,
291
+ }
292
+ )
293
+ return self._generate_id(canonical)
294
+
295
+ def generate_contract_id(
296
+ self,
297
+ party1_nip: str,
298
+ party2_nip: str,
299
+ contract_date: Union[str, date],
300
+ contract_number: Optional[str] = None,
301
+ contract_type: Optional[str] = None,
302
+ ) -> str:
303
+ """
304
+ Generuje ID dla umowy.
305
+
306
+ Pola kanoniczne: NIP strona 1 | NIP strona 2 (posortowane) | Data | Numer umowy
307
+
308
+ NIP-y są sortowane alfabetycznie, żeby kolejność stron nie wpływała na ID.
309
+ """
310
+ nips = sorted([
311
+ NIPValidator.normalize(party1_nip),
312
+ NIPValidator.normalize(party2_nip)
313
+ ])
314
+
315
+ parts = [
316
+ nips[0],
317
+ nips[1],
318
+ DateNormalizer.normalize(contract_date),
319
+ ]
320
+
321
+ if contract_number:
322
+ parts.append(contract_number.strip().upper())
323
+ if contract_type:
324
+ parts.append(contract_type.strip().upper())
325
+
326
+ canonical = CanonicalData(
327
+ document_type=DocumentType.CONTRACT,
328
+ canonical_string="|".join(parts),
329
+ raw_fields={
330
+ 'party1_nip': party1_nip,
331
+ 'party2_nip': party2_nip,
332
+ 'contract_date': contract_date,
333
+ 'contract_number': contract_number,
334
+ 'contract_type': contract_type,
335
+ }
336
+ )
337
+ return self._generate_id(canonical)
338
+
339
+ def generate_bank_statement_id(
340
+ self,
341
+ account_number: str,
342
+ statement_date: Union[str, date],
343
+ statement_number: Optional[str] = None,
344
+ ) -> str:
345
+ """
346
+ Generuje ID dla wyciągu bankowego.
347
+
348
+ Pola kanoniczne: Numer konta (26 cyfr) | Data | Numer wyciągu
349
+ """
350
+ # Normalizuj numer konta - tylko cyfry
351
+ account = re.sub(r'[\s\-]', '', account_number)
352
+
353
+ parts = [
354
+ account,
355
+ DateNormalizer.normalize(statement_date),
356
+ ]
357
+
358
+ if statement_number:
359
+ parts.append(statement_number.strip())
360
+
361
+ canonical = CanonicalData(
362
+ document_type=DocumentType.BANK_STATEMENT,
363
+ canonical_string="|".join(parts),
364
+ raw_fields={
365
+ 'account_number': account_number,
366
+ 'statement_date': statement_date,
367
+ 'statement_number': statement_number,
368
+ }
369
+ )
370
+ return self._generate_id(canonical)
371
+
372
+ def generate_correction_id(
373
+ self,
374
+ seller_nip: str,
375
+ correction_number: str,
376
+ issue_date: Union[str, date],
377
+ original_invoice_number: str,
378
+ gross_amount: Union[str, float, Decimal],
379
+ ) -> str:
380
+ """
381
+ Generuje ID dla faktury korygującej.
382
+
383
+ Pola kanoniczne: NIP | Numer korekty | Data | Numer oryginału | Kwota
384
+ """
385
+ canonical = CanonicalData(
386
+ document_type=DocumentType.CORRECTION,
387
+ canonical_string="|".join([
388
+ NIPValidator.normalize(seller_nip),
389
+ InvoiceNumberNormalizer.normalize(correction_number),
390
+ DateNormalizer.normalize(issue_date),
391
+ InvoiceNumberNormalizer.normalize(original_invoice_number),
392
+ AmountNormalizer.normalize(gross_amount),
393
+ ]),
394
+ raw_fields={
395
+ 'seller_nip': seller_nip,
396
+ 'correction_number': correction_number,
397
+ 'issue_date': issue_date,
398
+ 'original_invoice_number': original_invoice_number,
399
+ 'gross_amount': gross_amount,
400
+ }
401
+ )
402
+ return self._generate_id(canonical)
403
+
404
+ def generate_cash_receipt_id(
405
+ self,
406
+ document_number: str,
407
+ document_date: Union[str, date],
408
+ amount: Union[str, float, Decimal],
409
+ issuer_nip: Optional[str] = None,
410
+ payer_name: Optional[str] = None,
411
+ ) -> str:
412
+ """
413
+ Generuje ID dla dokumentu KP (Kasa Przyjmie - dowód wpłaty).
414
+
415
+ Pola kanoniczne: Numer dokumentu | Data | Kwota | NIP wystawcy
416
+ """
417
+ parts = [
418
+ document_number.strip().upper(),
419
+ DateNormalizer.normalize(document_date),
420
+ AmountNormalizer.normalize(amount),
421
+ ]
422
+
423
+ if issuer_nip:
424
+ parts.append(NIPValidator.normalize(issuer_nip))
425
+ if payer_name:
426
+ # Hash nazwy płatnika dla prywatności
427
+ name_hash = hashlib.md5(payer_name.strip().upper().encode()).hexdigest()[:8]
428
+ parts.append(name_hash)
429
+
430
+ canonical = CanonicalData(
431
+ document_type=DocumentType.CASH_IN,
432
+ canonical_string="|".join(parts),
433
+ raw_fields={
434
+ 'document_number': document_number,
435
+ 'document_date': document_date,
436
+ 'amount': amount,
437
+ 'issuer_nip': issuer_nip,
438
+ 'payer_name': payer_name,
439
+ }
440
+ )
441
+ return self._generate_id(canonical)
442
+
443
+ def generate_cash_disbursement_id(
444
+ self,
445
+ document_number: str,
446
+ document_date: Union[str, date],
447
+ amount: Union[str, float, Decimal],
448
+ issuer_nip: Optional[str] = None,
449
+ recipient_name: Optional[str] = None,
450
+ ) -> str:
451
+ """
452
+ Generuje ID dla dokumentu KW (Kasa Wyda - dowód wypłaty).
453
+
454
+ Pola kanoniczne: Numer dokumentu | Data | Kwota | NIP wystawcy
455
+ """
456
+ parts = [
457
+ document_number.strip().upper(),
458
+ DateNormalizer.normalize(document_date),
459
+ AmountNormalizer.normalize(amount),
460
+ ]
461
+
462
+ if issuer_nip:
463
+ parts.append(NIPValidator.normalize(issuer_nip))
464
+ if recipient_name:
465
+ name_hash = hashlib.md5(recipient_name.strip().upper().encode()).hexdigest()[:8]
466
+ parts.append(name_hash)
467
+
468
+ canonical = CanonicalData(
469
+ document_type=DocumentType.CASH_OUT,
470
+ canonical_string="|".join(parts),
471
+ raw_fields={
472
+ 'document_number': document_number,
473
+ 'document_date': document_date,
474
+ 'amount': amount,
475
+ 'issuer_nip': issuer_nip,
476
+ 'recipient_name': recipient_name,
477
+ }
478
+ )
479
+ return self._generate_id(canonical)
480
+
481
+ def generate_bill_id(
482
+ self,
483
+ issuer_nip: str,
484
+ bill_number: str,
485
+ issue_date: Union[str, date],
486
+ gross_amount: Union[str, float, Decimal],
487
+ ) -> str:
488
+ """
489
+ Generuje ID dla rachunku (bez VAT).
490
+
491
+ Pola kanoniczne: NIP wystawcy | Numer | Data | Kwota
492
+ Identyczne jak faktura, ale z innym typem dokumentu.
493
+ """
494
+ canonical = CanonicalData(
495
+ document_type=DocumentType.BILL,
496
+ canonical_string="|".join([
497
+ NIPValidator.normalize(issuer_nip),
498
+ InvoiceNumberNormalizer.normalize(bill_number),
499
+ DateNormalizer.normalize(issue_date),
500
+ AmountNormalizer.normalize(gross_amount),
501
+ ]),
502
+ raw_fields={
503
+ 'issuer_nip': issuer_nip,
504
+ 'bill_number': bill_number,
505
+ 'issue_date': issue_date,
506
+ 'gross_amount': gross_amount,
507
+ }
508
+ )
509
+ return self._generate_id(canonical)
510
+
511
+ def generate_debit_note_id(
512
+ self,
513
+ issuer_nip: str,
514
+ note_number: str,
515
+ issue_date: Union[str, date],
516
+ amount: Union[str, float, Decimal],
517
+ recipient_nip: Optional[str] = None,
518
+ ) -> str:
519
+ """
520
+ Generuje ID dla noty księgowej (obciążeniowej/uznaniowej).
521
+
522
+ Pola kanoniczne: NIP wystawcy | Numer noty | Data | Kwota
523
+ """
524
+ parts = [
525
+ NIPValidator.normalize(issuer_nip),
526
+ note_number.strip().upper(),
527
+ DateNormalizer.normalize(issue_date),
528
+ AmountNormalizer.normalize(amount),
529
+ ]
530
+
531
+ if recipient_nip:
532
+ parts.append(NIPValidator.normalize(recipient_nip))
533
+
534
+ canonical = CanonicalData(
535
+ document_type=DocumentType.DEBIT_NOTE,
536
+ canonical_string="|".join(parts),
537
+ raw_fields={
538
+ 'issuer_nip': issuer_nip,
539
+ 'note_number': note_number,
540
+ 'issue_date': issue_date,
541
+ 'amount': amount,
542
+ 'recipient_nip': recipient_nip,
543
+ }
544
+ )
545
+ return self._generate_id(canonical)
546
+
547
+ def generate_delivery_note_id(
548
+ self,
549
+ issuer_nip: str,
550
+ document_number: str,
551
+ issue_date: Union[str, date],
552
+ recipient_nip: Optional[str] = None,
553
+ ) -> str:
554
+ """
555
+ Generuje ID dla dokumentu WZ (Wydanie Zewnętrzne).
556
+
557
+ Pola kanoniczne: NIP wystawcy | Numer WZ | Data | NIP odbiorcy
558
+ """
559
+ parts = [
560
+ NIPValidator.normalize(issuer_nip),
561
+ document_number.strip().upper(),
562
+ DateNormalizer.normalize(issue_date),
563
+ ]
564
+
565
+ if recipient_nip:
566
+ parts.append(NIPValidator.normalize(recipient_nip))
567
+
568
+ canonical = CanonicalData(
569
+ document_type=DocumentType.DELIVERY_NOTE,
570
+ canonical_string="|".join(parts),
571
+ raw_fields={
572
+ 'issuer_nip': issuer_nip,
573
+ 'document_number': document_number,
574
+ 'issue_date': issue_date,
575
+ 'recipient_nip': recipient_nip,
576
+ }
577
+ )
578
+ return self._generate_id(canonical)
579
+
580
+ def generate_expense_report_id(
581
+ self,
582
+ employee_id: str,
583
+ report_date: Union[str, date],
584
+ total_amount: Union[str, float, Decimal],
585
+ report_number: Optional[str] = None,
586
+ company_nip: Optional[str] = None,
587
+ ) -> str:
588
+ """
589
+ Generuje ID dla delegacji / rozliczenia kosztów.
590
+
591
+ Pola kanoniczne: ID pracownika | Data | Kwota | Numer
592
+ """
593
+ parts = [
594
+ employee_id.strip().upper(),
595
+ DateNormalizer.normalize(report_date),
596
+ AmountNormalizer.normalize(total_amount),
597
+ ]
598
+
599
+ if report_number:
600
+ parts.append(report_number.strip().upper())
601
+ if company_nip:
602
+ parts.append(NIPValidator.normalize(company_nip))
603
+
604
+ canonical = CanonicalData(
605
+ document_type=DocumentType.EXPENSE_REPORT,
606
+ canonical_string="|".join(parts),
607
+ raw_fields={
608
+ 'employee_id': employee_id,
609
+ 'report_date': report_date,
610
+ 'total_amount': total_amount,
611
+ 'report_number': report_number,
612
+ 'company_nip': company_nip,
613
+ }
614
+ )
615
+ return self._generate_id(canonical)
616
+
617
+ def generate_generic_id(
618
+ self,
619
+ document_type: DocumentType,
620
+ content_hash: str,
621
+ document_date: Optional[Union[str, date]] = None,
622
+ issuer_nip: Optional[str] = None,
623
+ ) -> str:
624
+ """
625
+ Generuje ID dla dokumentu o nieznanym typie.
626
+
627
+ Wymaga podania hasha treści (np. z OCR).
628
+ """
629
+ parts = [content_hash[:64]] # Maksymalnie 64 znaki hasha
630
+
631
+ if document_date:
632
+ parts.append(DateNormalizer.normalize(document_date))
633
+ if issuer_nip:
634
+ parts.append(NIPValidator.normalize(issuer_nip))
635
+
636
+ canonical = CanonicalData(
637
+ document_type=document_type,
638
+ canonical_string="|".join(parts),
639
+ raw_fields={
640
+ 'content_hash': content_hash,
641
+ 'document_date': document_date,
642
+ 'issuer_nip': issuer_nip,
643
+ }
644
+ )
645
+ return self._generate_id(canonical)
646
+
647
+ def _generate_id(self, canonical: CanonicalData) -> str:
648
+ """
649
+ Generuje finalny identyfikator z danych kanonicznych.
650
+
651
+ Format: {PREFIX}-{TYPE}-{HASH16}
652
+ Przykład: EXEF-FV-A7B3C9D2E1F04856
653
+ """
654
+ # SHA256 z canonical string
655
+ hash_bytes = hashlib.sha256(canonical.canonical_string.encode('utf-8')).digest()
656
+ hash_hex = hash_bytes.hex()[:16].upper()
657
+
658
+ return f"{self.prefix}-{canonical.document_type.value}-{hash_hex}"
659
+
660
+ def verify_id(self, document_id: str, canonical_string: str) -> bool:
661
+ """
662
+ Weryfikuje czy ID odpowiada danym kanonicznym.
663
+
664
+ >>> gen = DocumentIDGenerator()
665
+ >>> gen.verify_id("EXEF-FV-A7B3C9D2E1F04856", "5213017228|FV/2025/00142|2025-01-15|1230.00")
666
+ True
667
+ """
668
+ hash_bytes = hashlib.sha256(canonical_string.encode('utf-8')).digest()
669
+ expected_hash = hash_bytes.hex()[:16].upper()
670
+
671
+ parts = document_id.split('-')
672
+ if len(parts) != 3:
673
+ return False
674
+
675
+ return parts[2] == expected_hash
676
+
677
+ @staticmethod
678
+ def parse_id(document_id: str) -> dict:
679
+ """
680
+ Parsuje identyfikator dokumentu.
681
+
682
+ >>> DocumentIDGenerator.parse_id("EXEF-FV-A7B3C9D2E1F04856")
683
+ {'prefix': 'EXEF', 'type': 'FV', 'hash': 'A7B3C9D2E1F04856',
684
+ 'document_type': <DocumentType.INVOICE>}
685
+ """
686
+ parts = document_id.split('-')
687
+ if len(parts) != 3:
688
+ raise ValueError(f"Invalid document ID format: {document_id}")
689
+
690
+ prefix, type_code, hash_value = parts
691
+
692
+ # Znajdź typ dokumentu
693
+ doc_type = None
694
+ for dt in DocumentType:
695
+ if dt.value == type_code:
696
+ doc_type = dt
697
+ break
698
+
699
+ return {
700
+ 'prefix': prefix,
701
+ 'type': type_code,
702
+ 'hash': hash_value,
703
+ 'document_type': doc_type,
704
+ }
705
+
706
+
707
+ # Singleton dla wygody
708
+ _default_generator = DocumentIDGenerator()
709
+
710
+ def generate_invoice_id(*args, **kwargs) -> str:
711
+ """Skrót do DocumentIDGenerator().generate_invoice_id()"""
712
+ return _default_generator.generate_invoice_id(*args, **kwargs)
713
+
714
+ def generate_receipt_id(*args, **kwargs) -> str:
715
+ """Skrót do DocumentIDGenerator().generate_receipt_id()"""
716
+ return _default_generator.generate_receipt_id(*args, **kwargs)
717
+
718
+ def generate_contract_id(*args, **kwargs) -> str:
719
+ """Skrót do DocumentIDGenerator().generate_contract_id()"""
720
+ return _default_generator.generate_contract_id(*args, **kwargs)