mikrowerk-edi-invoicing 0.3.6__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edi_invoice_parser/__init__.py +18 -1
- edi_invoice_parser/model/__init__.py +16 -1
- edi_invoice_parser/model/trade_document_types.py +139 -0
- edi_invoice_parser/parse_plain_pdf_file.py +5 -0
- edi_invoice_parser/pdf_llm_parser/__init__.py +5 -0
- edi_invoice_parser/pdf_llm_parser/google_gemini_parser.py +145 -0
- edi_invoice_parser/tests/test_parse_plain_pdf_invoice.py +71 -0
- edi_invoice_parser/util/timer_helper.py +11 -0
- {mikrowerk_edi_invoicing-0.3.6.dist-info → mikrowerk_edi_invoicing-0.5.0.dist-info}/METADATA +3 -2
- {mikrowerk_edi_invoicing-0.3.6.dist-info → mikrowerk_edi_invoicing-0.5.0.dist-info}/RECORD +13 -7
- {mikrowerk_edi_invoicing-0.3.6.dist-info → mikrowerk_edi_invoicing-0.5.0.dist-info}/WHEEL +1 -1
- {mikrowerk_edi_invoicing-0.3.6.dist-info → mikrowerk_edi_invoicing-0.5.0.dist-info}/licenses/LICENSE +0 -0
- {mikrowerk_edi_invoicing-0.3.6.dist-info → mikrowerk_edi_invoicing-0.5.0.dist-info}/top_level.txt +0 -0
edi_invoice_parser/__init__.py
CHANGED
|
@@ -2,6 +2,9 @@ from .cross_industry_invoice_mapper import parse_and_map_x_rechnung
|
|
|
2
2
|
from .model.x_rechnung import (XRechnung, XRechnungTradeParty, XRechnungTradeAddress, XRechnungTradeContact,
|
|
3
3
|
XRechnungPaymentMeans, XRechnungBankAccount, XRechnungCurrency, XRechnungTradeLine,
|
|
4
4
|
XRechnungAppliedTradeTax, XRechnungFinancialCard)
|
|
5
|
+
from .model.trade_document_types import TradeDocument, TradeParty, TradePartyAddress, TradeCurrency, TradePartyContact, \
|
|
6
|
+
TradeLine, PaymentMeans, AppliedTradeTax, BankAccount, FinancialCard, ubl_doc_codes
|
|
7
|
+
from .parse_plain_pdf_file import analyze_document
|
|
5
8
|
|
|
6
9
|
__all__ = ["parse_and_map_x_rechnung",
|
|
7
10
|
"XRechnung",
|
|
@@ -13,5 +16,19 @@ __all__ = ["parse_and_map_x_rechnung",
|
|
|
13
16
|
"XRechnungCurrency",
|
|
14
17
|
"XRechnungTradeLine",
|
|
15
18
|
"XRechnungAppliedTradeTax",
|
|
16
|
-
"XRechnungFinancialCard"
|
|
19
|
+
"XRechnungFinancialCard",
|
|
20
|
+
"analyze_document",
|
|
21
|
+
"TradeDocument",
|
|
22
|
+
"TradeParty",
|
|
23
|
+
"TradePartyAddress",
|
|
24
|
+
"TradeCurrency",
|
|
25
|
+
"TradePartyContact",
|
|
26
|
+
"TradeLine",
|
|
27
|
+
"PaymentMeans",
|
|
28
|
+
"AppliedTradeTax",
|
|
29
|
+
"BankAccount",
|
|
30
|
+
"FinancialCard",
|
|
31
|
+
"ubl_doc_codes"
|
|
17
32
|
]
|
|
33
|
+
|
|
34
|
+
version = "0.5.0"
|
|
@@ -1,4 +1,19 @@
|
|
|
1
1
|
from .x_rechnung import XRechnung
|
|
2
|
+
from .trade_document_types import TradeDocument, TradeParty, TradePartyAddress, TradeCurrency, TradePartyContact, \
|
|
3
|
+
TradeLine, PaymentMeans, AppliedTradeTax, BankAccount, FinancialCard, ubl_doc_codes
|
|
2
4
|
from .xml_abstract_x_rechnung_parser import XMLAbstractXRechnungParser
|
|
3
5
|
|
|
4
|
-
__all__ = ["XRechnung",
|
|
6
|
+
__all__ = ["XRechnung",
|
|
7
|
+
"XMLAbstractXRechnungParser",
|
|
8
|
+
"TradeDocument",
|
|
9
|
+
"TradeParty",
|
|
10
|
+
"TradePartyAddress",
|
|
11
|
+
"TradeCurrency",
|
|
12
|
+
"TradePartyContact",
|
|
13
|
+
"TradeLine",
|
|
14
|
+
"PaymentMeans",
|
|
15
|
+
"AppliedTradeTax",
|
|
16
|
+
"BankAccount",
|
|
17
|
+
"FinancialCard",
|
|
18
|
+
"ubl_doc_codes"
|
|
19
|
+
]
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from decimal import Decimal
|
|
3
|
+
import datetime
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
UBL Document Type (XML Root) Description UNCL 1001 Code (Example)
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
ubl_doc_codes = {
|
|
10
|
+
"ApplicationResponse": ('ApplicationResponse', 431, 'Response to an application/message'),
|
|
11
|
+
"Catalogue": ('Catalogue', 71, 'Product catalogue'),
|
|
12
|
+
"CatalogueRequest": ('CatalogueRequest', 171, 'Catalogue request'),
|
|
13
|
+
"CreditNote": ('CreditNote', 381, 'Commercial Credit note'),
|
|
14
|
+
"DebitNote": ('DebitNote', 383, 'Debit note'),
|
|
15
|
+
"DespatchAdvice": ('DespatchAdvice', 250, 'Despatch advice (Advance Ship Notice)'),
|
|
16
|
+
"Invoice": ('Invoice', 380, 'Commercial Invoice'),
|
|
17
|
+
"Order": ('Order', 220, 'Order'),
|
|
18
|
+
"OrderChange": ('OrderChange', 222, 'Order change'),
|
|
19
|
+
"OrderResponse": ('OrderResponse', 255, 'Order response (confirmation/rejection)'),
|
|
20
|
+
"Quotation": ('Quotation', 83, 'Quotation'),
|
|
21
|
+
"RequestForQuotation": ('RequestForQuotation', 135, 'Request for quotation'),
|
|
22
|
+
"RemittanceAdvice": ('RemittanceAdvice', 256, 'Remittance advice'),
|
|
23
|
+
"Statement": ('Statement', 86, 'Account statement / Balance confirmation'),
|
|
24
|
+
"UtilityStatement": ('Utility statement', 490, 'Statement (electricity, gas, etc.)'),
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@dataclass
|
|
29
|
+
class TradePartyAddress:
|
|
30
|
+
post_code: str = None
|
|
31
|
+
city_name: str = None
|
|
32
|
+
country_id: str = None
|
|
33
|
+
country_subdivision_id: str = None
|
|
34
|
+
address_line_1: str = None
|
|
35
|
+
address_line_2: str = None
|
|
36
|
+
address_line_3: str = None
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@dataclass
|
|
40
|
+
class TradePartyContact:
|
|
41
|
+
name: str
|
|
42
|
+
department_name: str = None
|
|
43
|
+
telephone: str = None
|
|
44
|
+
fax: str = None
|
|
45
|
+
email: str = None
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@dataclass
|
|
49
|
+
class TradeParty:
|
|
50
|
+
name: str
|
|
51
|
+
vat_registration_number: str = None
|
|
52
|
+
fiscal_registration_number: str = None
|
|
53
|
+
legal_registration_number: str = None
|
|
54
|
+
address: TradePartyAddress = None
|
|
55
|
+
contact: TradePartyContact = None
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
@dataclass
|
|
59
|
+
class AppliedTradeTax:
|
|
60
|
+
name: str
|
|
61
|
+
type_code: str = None
|
|
62
|
+
category_code: str = None
|
|
63
|
+
applicable_percent: Decimal = None
|
|
64
|
+
basis_amount: Decimal = None
|
|
65
|
+
calculated_amount: Decimal = None
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
@dataclass
|
|
69
|
+
class TradeLine:
|
|
70
|
+
pos_number: int
|
|
71
|
+
article_code: str = None
|
|
72
|
+
name: str = None
|
|
73
|
+
description: str = None
|
|
74
|
+
quantity: Decimal = None
|
|
75
|
+
unit_of_measure: str = None
|
|
76
|
+
unit_price: Decimal = None
|
|
77
|
+
total_net: Decimal = None
|
|
78
|
+
tax: AppliedTradeTax = None
|
|
79
|
+
total_amount: Decimal = None
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
@dataclass
|
|
83
|
+
class TradeCurrency:
|
|
84
|
+
amount: Decimal
|
|
85
|
+
currency_code: str
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
@dataclass
|
|
89
|
+
class BankAccount:
|
|
90
|
+
iban: str
|
|
91
|
+
bic: str = None
|
|
92
|
+
name: str = None
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
@dataclass
|
|
96
|
+
class FinancialCard:
|
|
97
|
+
id: str
|
|
98
|
+
cardholder_name: str | None = None
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
@dataclass
|
|
102
|
+
class PaymentMeans:
|
|
103
|
+
id: str = None
|
|
104
|
+
type_code: str = None
|
|
105
|
+
information: str = None
|
|
106
|
+
financial_card: FinancialCard = None
|
|
107
|
+
payee_account: BankAccount = None
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
@dataclass
|
|
111
|
+
class TradeDocument:
|
|
112
|
+
"""
|
|
113
|
+
Model of a Trade Document
|
|
114
|
+
"""
|
|
115
|
+
name: str
|
|
116
|
+
doc_type_code: tuple # Document Type Code: ubl_doc_codes
|
|
117
|
+
doc_id: str = None
|
|
118
|
+
issued_date_time: datetime = None # 'Date'
|
|
119
|
+
languages: str = None # 'Languages'
|
|
120
|
+
notes: str = None # 'Notes'
|
|
121
|
+
sender_reference: str = None # 'Buyer Reference'
|
|
122
|
+
receiver_reference: str = None
|
|
123
|
+
dispatch_reference: str = None
|
|
124
|
+
sales_order_reference: str = None
|
|
125
|
+
sender: TradeParty = None
|
|
126
|
+
receiver: TradeParty = None
|
|
127
|
+
currency_code: str = None # 'Currency Code'
|
|
128
|
+
payment_means: PaymentMeans = None
|
|
129
|
+
payment_terms: str = None # 'Payment Terms'
|
|
130
|
+
line_total_amount: Decimal = None # 'Line Total Amount'
|
|
131
|
+
charge_total_amount: Decimal = None # 'Charge Total Amount'
|
|
132
|
+
allowance_total_amount: Decimal = None # 'Allowance Total Amount'
|
|
133
|
+
tax_basis_total_amount: Decimal = None
|
|
134
|
+
tax_total_amount: Decimal = None # 'Tax Grand Total Amount'
|
|
135
|
+
grand_total_amount: Decimal = None # 'Grand Total Amount'
|
|
136
|
+
total_prepaid_amount: Decimal = None # 'Total Prepaid Amount'
|
|
137
|
+
due_payable_amount: Decimal = None # 'Due Payable Amount'
|
|
138
|
+
trade_line_items: [TradeLine] = None
|
|
139
|
+
applicable_trade_taxes: [AppliedTradeTax] = None
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
from .pdf_llm_parser.google_gemini_parser import analyze_document as google_analyze_document
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def analyze_document(pdf_binary: bytes, api_key=None, model: str = None, prompt=None) -> dict:
|
|
5
|
+
return google_analyze_document(pdf_binary, api_key=api_key, model=model, prompt=prompt)
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import logging
|
|
3
|
+
import os
|
|
4
|
+
|
|
5
|
+
from google import genai
|
|
6
|
+
from google.genai import types
|
|
7
|
+
|
|
8
|
+
DEFAULT_LLM_MODEL = "gemini-2.5-flash-lite"
|
|
9
|
+
|
|
10
|
+
_logger = logging.getLogger(__name__)
|
|
11
|
+
|
|
12
|
+
DEFAULT_PROMPT = """
|
|
13
|
+
Bitte analysiere das beigefügte Dokument und extrahiere alle relevanten Informationen als strukturiertes JSON.
|
|
14
|
+
Verwende exakt das folgende JSON-Schema und beachte die Anweisungen.
|
|
15
|
+
|
|
16
|
+
Wichtige Hinweise:
|
|
17
|
+
- Gib NUR valides JSON zurück. Kein umgebender Text oder Markdown-Formatierung.
|
|
18
|
+
- Bei fehlenden Werten verwende den JSON-Wert null.
|
|
19
|
+
- Gib alle Beträge als Zahlen (float oder integer), nicht als Strings.
|
|
20
|
+
- Formatiere alle Datumsangaben im Format YYYY-MM-DD.
|
|
21
|
+
- Gib das land als "ISO 3166-2" Code an, wenn nicht angegeben nehme "DE" an.
|
|
22
|
+
- Erkenne den Dokumententyp und klassifiziere ihn als: "Rechnung", "Gutschrift", "Auftrag", "Angebot", "Anfrage", "Auftragsbestätigung", "Bestellung"
|
|
23
|
+
- Erkenne für eine Gutschrift auch Varianten wie Credit Note, Rechnungsgutschrift, Erstattung
|
|
24
|
+
- Erkenne für eine Rechnung auch Varianten wie: "Rechnung","Proforma Rechnung","R-Nr.","Rechnung-Nr.","Rech.-Nr.","Invoice No.","Invoice Number","Kostenrechnung"
|
|
25
|
+
- Erkenne für eine Auftragsbestätigung auch Varianten wie: "Bestätigung", "Vertrag","Order Acknowledgement", "Order Confirmation"
|
|
26
|
+
- Erkenne für die Referenz auch Varianten wie: "Projektnummer", "Auftragsnummer", "Bestellnummer", "Referenznummer", "Kundenreferenz", "Reference"
|
|
27
|
+
- Ist der Dokumententyp nicht erkennbar klassifiziere ihn als: "sonstiges"
|
|
28
|
+
- Sind keine Positionen vorhanden erstelle eine zusammenfassung des Textes und die Summen oder Beträge, falls vorhanden
|
|
29
|
+
- Ist der Dokumententyp ist nicht erkennbar erstelle eine Zusammenfassung des Textes
|
|
30
|
+
- Erkenne auch Varianten wie: "MWST.", "VAT", "Ust.", etc. für die Umsatzsteuer.
|
|
31
|
+
- Erkenne für die Währung Varianten wie: "EUR", "€", "$", "USD", "CHF" oder sonstige Währungsbezeichnung oder Symbol
|
|
32
|
+
- Wenn keine Währung abgeben ist nimm "EUR" an
|
|
33
|
+
- Erkenne für das fälligkeitsdatum auch Varianten wie: "zahlbar bis", "fällig", "fällig bis", "zu liefern bis", "gültig bis"
|
|
34
|
+
- Erkenne ob eine Rechnung bereits bezahlt ist, ja oder nein, und vermerke die Zahlungsmethode wie: "Paypal", "Kreditkarte", "Überweisung", "Barzahlung"
|
|
35
|
+
- Eine Rechnung ist bezahlt "ja", wenn die Zahlungsmethode wie: "Paypal", "Kreditkarte" ist.
|
|
36
|
+
- Ernenne für die Steuernummer des Absenders auch: "Tax Number", "VAT Number", "VAT ID", "MWST ID", "UST ID", "Ust-id"
|
|
37
|
+
|
|
38
|
+
JSON-Schema:
|
|
39
|
+
{
|
|
40
|
+
"dokumenttyp": "string",
|
|
41
|
+
"dokumentnummer": "string",
|
|
42
|
+
"referenz": "string",
|
|
43
|
+
"dokumentendatum": "YYYY-MM-DD",
|
|
44
|
+
"fälligkeitsdatum": "YYYY-MM-DD",
|
|
45
|
+
"absender": {
|
|
46
|
+
"name": "string",
|
|
47
|
+
"adresse": "string",
|
|
48
|
+
"plz": "string",
|
|
49
|
+
"ort": "string",
|
|
50
|
+
"land": "string",
|
|
51
|
+
"steuernummer": "string",
|
|
52
|
+
},
|
|
53
|
+
"empfänger": {
|
|
54
|
+
"name": "string",
|
|
55
|
+
"adresse": "string",
|
|
56
|
+
"plz": "string",
|
|
57
|
+
"ort": "string",
|
|
58
|
+
"land": "string",
|
|
59
|
+
},
|
|
60
|
+
"positionen": [
|
|
61
|
+
{
|
|
62
|
+
"POS": "number",
|
|
63
|
+
"Bezeichnung": "string",
|
|
64
|
+
"Menge": "number",
|
|
65
|
+
"Einheit": "string",
|
|
66
|
+
"einzelpreis": "number",
|
|
67
|
+
"Betrag": "number",
|
|
68
|
+
"mwst_satz": "number"
|
|
69
|
+
}
|
|
70
|
+
],
|
|
71
|
+
"summen": {
|
|
72
|
+
"nettobetrag": "number",
|
|
73
|
+
"umsatzsteuer": "number",
|
|
74
|
+
"rechnungsbetrag": "number"
|
|
75
|
+
"währung": "string"
|
|
76
|
+
},
|
|
77
|
+
"zahlungshinweise": "string",
|
|
78
|
+
"zahlungsmethode": "string"
|
|
79
|
+
"bezahlt": "string",
|
|
80
|
+
"weitere_info": "string",
|
|
81
|
+
"bankverbindung": "string"
|
|
82
|
+
"zusammenfassung": "string",
|
|
83
|
+
}
|
|
84
|
+
"""
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def analyze_document(pdf_binary: bytes, api_key, model: str, prompt) -> dict:
|
|
88
|
+
"""
|
|
89
|
+
Analysiert eine PDF-Rechnung mit Gemini, extrahiert Informationen
|
|
90
|
+
und gibt sie als Python-Dictionary zurück.
|
|
91
|
+
"""
|
|
92
|
+
|
|
93
|
+
if not pdf_binary:
|
|
94
|
+
_logger.error("Error no binaries supplied")
|
|
95
|
+
raise ValueError("Error no binaries supplied")
|
|
96
|
+
|
|
97
|
+
if api_key:
|
|
98
|
+
if len(api_key) < 20:
|
|
99
|
+
raise RuntimeError("Der API Key scheint ungültig oder zu kurz zu sein.")
|
|
100
|
+
else:
|
|
101
|
+
api_key = os.environ.get("GEMINI_API_KEY", None)
|
|
102
|
+
if api_key is None:
|
|
103
|
+
raise RuntimeError("Die Umgebungsvariable 'GEMINI_API_KEY' wurde nicht gefunden.")
|
|
104
|
+
if len(api_key) < 20:
|
|
105
|
+
raise RuntimeError("Der API Key scheint ungültig oder zu kurz zu sein.")
|
|
106
|
+
|
|
107
|
+
api_key = api_key.strip()
|
|
108
|
+
client = genai.Client(api_key=api_key)
|
|
109
|
+
if not model:
|
|
110
|
+
model = DEFAULT_LLM_MODEL
|
|
111
|
+
if not prompt:
|
|
112
|
+
prompt = DEFAULT_PROMPT
|
|
113
|
+
|
|
114
|
+
_logger.info("\nSende Anfrage an die Gemini API...")
|
|
115
|
+
# Sende die Anfrage mit dem Prompt und der hochgeladenen Datei
|
|
116
|
+
_logger.info(f"use API-KEY: {api_key} length: {len(api_key)}")
|
|
117
|
+
try:
|
|
118
|
+
response = client.models.generate_content(
|
|
119
|
+
model=model,
|
|
120
|
+
contents=[
|
|
121
|
+
types.Part.from_bytes(
|
|
122
|
+
data=pdf_binary,
|
|
123
|
+
mime_type='application/pdf',
|
|
124
|
+
),
|
|
125
|
+
prompt])
|
|
126
|
+
except KeyError:
|
|
127
|
+
raise UserWarning("Fehler: Die Umgebungsvariable GOOGLE_API_KEY wurde nicht gefunden.\n"
|
|
128
|
+
"Bitte setzen Sie den Schlüssel, z.B. mit 'export GOOGLE_API_KEY=\"DEIN_API_SCHLÜSSEL\"'")
|
|
129
|
+
|
|
130
|
+
except Exception as e:
|
|
131
|
+
raise UserWarning(f"Ein unerwarteter Fehler ist aufgetreten: {e}")
|
|
132
|
+
|
|
133
|
+
# Bereinige und parse die Antwort
|
|
134
|
+
try:
|
|
135
|
+
# Manchmal gibt das Modell die Antwort in einem Markdown-Codeblock zurück.
|
|
136
|
+
# Dieser Code entfernt die Markierungen, um reines JSON zu erhalten.
|
|
137
|
+
cleaned_response = response.text.strip().replace("```json", "").replace("```", "").strip()
|
|
138
|
+
|
|
139
|
+
# Parse den JSON-String in ein Python-Dictionary
|
|
140
|
+
extracted_data = json.loads(cleaned_response)
|
|
141
|
+
return extracted_data
|
|
142
|
+
except json.JSONDecodeError:
|
|
143
|
+
raise RuntimeError("Fehler beim Parsen der Modell-Antwort:", response.text)
|
|
144
|
+
except Exception as e:
|
|
145
|
+
raise RuntimeError(f"Ein unerwarteter Fehler ist aufgetreten: {e}")
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
import unittest
|
|
2
|
+
import os
|
|
3
|
+
import pathlib
|
|
4
|
+
from parameterized import parameterized
|
|
5
|
+
import json
|
|
6
|
+
|
|
7
|
+
from . import get_checked_file_path
|
|
8
|
+
from edi_invoice_parser.parse_plain_pdf_file import analyze_document
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class TestPlainPdfInvoiceParser(unittest.TestCase):
|
|
12
|
+
@parameterized.expand([
|
|
13
|
+
('pdf', 'plain_pdf_invoices/25313 - Rechnung Konzepthausevent - 16-09-2025.pdf'),
|
|
14
|
+
('pdf', 'plain_pdf_invoices/57856 - 2509-7377.pdf'),
|
|
15
|
+
('pdf', 'plain_pdf_invoices/Bestätigung griffty GmbH VA 150919 SCCON_25.pdf'),
|
|
16
|
+
# ('pdf', 'plain_pdf_invoices/Invoice INV-1013.pdf'),
|
|
17
|
+
# ('pdf', 'plain_pdf_invoices/LGM-2509-784_Griffity_GmbH_10_09_2025_Rechnung.pdf'),
|
|
18
|
+
# ('pdf', 'plain_pdf_invoices/Order GO-0741243.pdf'),
|
|
19
|
+
# ('pdf', 'plain_pdf_invoices/Rechnung 30250628.pdf'),
|
|
20
|
+
# ('pdf', 'plain_pdf_invoices/Rechnung-202511899-11267.pdf'),
|
|
21
|
+
# ('pdf', 'plain_pdf_invoices/TS Rechnung TS2025-10586.pdf'),
|
|
22
|
+
# ('pdf', 'plain_pdf_invoices/Verkaufsrechnung 01-137334.pdf'),
|
|
23
|
+
])
|
|
24
|
+
def test_parse_pdf_invoice(self, file_type, file_path):
|
|
25
|
+
_file_path, _exists, _is_dir = get_checked_file_path(file_path, __file__)
|
|
26
|
+
self.assertEqual(file_type, 'pdf', "Only 'pdf' filetype is supported")
|
|
27
|
+
self.assertTrue(_exists, f"file does not exist: {_file_path}")
|
|
28
|
+
self.test_api_key_is_available()
|
|
29
|
+
api_key = os.environ.get("GEMINI_API_KEY")
|
|
30
|
+
|
|
31
|
+
filepath = pathlib.Path(_file_path)
|
|
32
|
+
binary = filepath.read_bytes()
|
|
33
|
+
invoice_data = analyze_document(binary, api_key=api_key)
|
|
34
|
+
self.assertIsNotNone(invoice_data, "No result retrieved")
|
|
35
|
+
print("\n------------------------------------------------------------")
|
|
36
|
+
print(f"\nDokument Date: {file_path}")
|
|
37
|
+
print("\n--- Extrahierte Dokumentdaten ---")
|
|
38
|
+
# Beispielhafter Zugriff auf einzelne Daten
|
|
39
|
+
print(f"\nDokumenttyp: {invoice_data.get('dokumenttyp', None)}")
|
|
40
|
+
print(f"\ndokumentnummer: {invoice_data.get('dokumentnummer', None)}")
|
|
41
|
+
print(f"\nAbsender: {invoice_data.get('absender', {}).get('name', None)}")
|
|
42
|
+
print(f"\nEmpfänger: {invoice_data.get('empfänger', {}).get('name', None)}")
|
|
43
|
+
print(f"Fälligkeitsdatum: {invoice_data.get('fälligkeitsdatum', None)}")
|
|
44
|
+
print(f"Gesamtbetrag: {invoice_data.get('summen', {}).get('rechnungsbetrag', 'None')} €")
|
|
45
|
+
print("\n------------------------------------------------------------")
|
|
46
|
+
# Gib das Dictionary als formatierten JSON-String aus
|
|
47
|
+
print(json.dumps(invoice_data, indent=2, ensure_ascii=False))
|
|
48
|
+
|
|
49
|
+
_out_file_path = _file_path.replace('.pdf', '.json')
|
|
50
|
+
with open(_out_file_path, "w") as f:
|
|
51
|
+
f.write(json.dumps(invoice_data, indent=2, ensure_ascii=False))
|
|
52
|
+
print(f"written result json to {_out_file_path}")
|
|
53
|
+
print("\n---------------------------------")
|
|
54
|
+
|
|
55
|
+
def test_api_key_is_available(self):
|
|
56
|
+
"""
|
|
57
|
+
Prüft, ob die Umgebungsvariable 'PROD_API_KEY' gesetzt ist.
|
|
58
|
+
|
|
59
|
+
WICHTIG: Gib niemals den Inhalt des Keys in Logs aus!
|
|
60
|
+
Prüfe nur, ob er existiert oder valide aussieht (z.B. Länge).
|
|
61
|
+
"""
|
|
62
|
+
|
|
63
|
+
# Hier liest Python die Umgebungsvariable
|
|
64
|
+
api_key = os.environ.get("GEMINI_API_KEY")
|
|
65
|
+
|
|
66
|
+
assert api_key is not None, "Die Umgebungsvariable 'GEMINI_API_KEY' wurde nicht gefunden."
|
|
67
|
+
assert len(api_key) > 20, "Der API Key scheint ungültig oder zu kurz zu sein."
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
if __name__ == '__main__':
|
|
71
|
+
unittest.main()
|
{mikrowerk_edi_invoicing-0.3.6.dist-info → mikrowerk_edi_invoicing-0.5.0.dist-info}/METADATA
RENAMED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mikrowerk_edi_invoicing
|
|
3
|
-
Version: 0.
|
|
4
|
-
Summary: Parser for EDI invoices in CII or UBL format
|
|
3
|
+
Version: 0.5.0
|
|
4
|
+
Summary: Parser for EDI invoices in CII or UBL format or plain pdf with LLM support
|
|
5
5
|
Author: Mikrowerk a Gammadata Division
|
|
6
6
|
Author-email: info@mikrowerk.com
|
|
7
7
|
License: GNU Affero General Public License v3
|
|
@@ -22,6 +22,7 @@ Requires-Dist: factur-x==3.6
|
|
|
22
22
|
Requires-Dist: jsonpickle~=4.0.1
|
|
23
23
|
Requires-Dist: parameterized
|
|
24
24
|
Requires-Dist: schwifty
|
|
25
|
+
Requires-Dist: google
|
|
25
26
|
Dynamic: author
|
|
26
27
|
Dynamic: author-email
|
|
27
28
|
Dynamic: classifier
|
|
@@ -1,5 +1,6 @@
|
|
|
1
|
-
edi_invoice_parser/__init__.py,sha256=
|
|
1
|
+
edi_invoice_parser/__init__.py,sha256=1fRVIy_xEyvR5MuiEuZJTxpBTN30uIxAVn_kqervWuM,1386
|
|
2
2
|
edi_invoice_parser/cross_industry_invoice_mapper.py,sha256=viUaEq71xjb9_KUVL9YCqWzfKwSa1ivcWnzb4SFdKCM,905
|
|
3
|
+
edi_invoice_parser/parse_plain_pdf_file.py,sha256=5CWHMHAlmqLzMLrPTLOFQ_liI6LcFFF0thpMF3j1SAk,282
|
|
3
4
|
edi_invoice_parser/cii_dom_parser/__init__.py,sha256=J9O0f-t1570oDgC_zWT6-MpjOYA6o3whazbe9ffQqjE,188
|
|
4
5
|
edi_invoice_parser/cii_dom_parser/dom_elements_helper.py,sha256=60_YyI85xWhIa92hHwIr_FLQvFuRcNrlCJjBi9S1s8o,3686
|
|
5
6
|
edi_invoice_parser/cii_dom_parser/pdf.py,sha256=1bbXWdqIGmrnYvkS284oc6ZVvH995vd9qB8-xdcDtf0,13270
|
|
@@ -20,18 +21,23 @@ edi_invoice_parser/cii_dom_parser/models/product.py,sha256=WRX0m5u6UTFv6Bvow1pED
|
|
|
20
21
|
edi_invoice_parser/cii_dom_parser/models/references.py,sha256=2LYJGJU5KkJse-UsUcQitbPWYdlT_lLFL6BvvQmUIoo,4468
|
|
21
22
|
edi_invoice_parser/cii_dom_parser/models/trade.py,sha256=exlMrVMzj9aMb0qI3c610G3GuB6Z5ttM4qkFfK5tn3o,6968
|
|
22
23
|
edi_invoice_parser/cii_dom_parser/models/tradelines.py,sha256=262opVT4pWXKEc_-HTEqVjm4yqQhvz639QNckEQCthY,5650
|
|
23
|
-
edi_invoice_parser/model/__init__.py,sha256=
|
|
24
|
+
edi_invoice_parser/model/__init__.py,sha256=kglbJax0bg64Wb0PIK8Vphdy7ApLiIb2qo23YFH25zg,695
|
|
25
|
+
edi_invoice_parser/model/trade_document_types.py,sha256=DBsdHOeu-W8CEOY7ScUd9yCZjUQFKlzco-om2BJIBSk,4079
|
|
24
26
|
edi_invoice_parser/model/x_rechnung.py,sha256=7RUuWh2STXn1fRNzO2-gP42i0tY1Jj2OBMykIKC3idY,10088
|
|
25
27
|
edi_invoice_parser/model/xml_abstract_x_rechnung_parser.py,sha256=puxCSC02zIXpfMY9xNLqY-w0aRv0y5ROHmNtmaV16o4,673
|
|
28
|
+
edi_invoice_parser/pdf_llm_parser/__init__.py,sha256=hJwV4HialAgphoAZ5SzklXgBrxA7KYfn1edKhUMxpwE,130
|
|
29
|
+
edi_invoice_parser/pdf_llm_parser/google_gemini_parser.py,sha256=Tqo3scK-vz8_r3DHuDmJbf3vLF5l9TGM8kH0f8nwXjw,6183
|
|
26
30
|
edi_invoice_parser/tests/__init__.py,sha256=gnkvp4ZsQ0g1L5r6fbyhvFNsSKp0PegdvVeQP_dVQSw,142
|
|
27
31
|
edi_invoice_parser/tests/test_iban_handling.py,sha256=suRaB9gxbNc2Dc7spjHmQyPBdXva98HF1js85wQWqPM,662
|
|
32
|
+
edi_invoice_parser/tests/test_parse_plain_pdf_invoice.py,sha256=g_oMG-UxlGWiliJw19_Yc6zKuieBKl7v27Oq3J965JM,3473
|
|
28
33
|
edi_invoice_parser/tests/test_parse_x_rechnung.py,sha256=K2g3jjfxh5gsTJtOvjWSzo-lPIdgnpMsuuewQa5BQq4,3591
|
|
29
34
|
edi_invoice_parser/ubl_sax_parser/__init__.py,sha256=P3QhOExirTKDRre-ReGBVv_GFZniEj_kOnWtUSNJGq0,91
|
|
30
35
|
edi_invoice_parser/ubl_sax_parser/xml_ubl_sax_parser.py,sha256=oSwqflp2nf_M5K6_iyAeNTYmDnCKIsgo5LO0BvPxRq0,16911
|
|
31
36
|
edi_invoice_parser/util/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
32
37
|
edi_invoice_parser/util/file_helper.py,sha256=4gdWbv8L9LSMraLKvGI1Z3NMcuGGy7JB1qFvNaW-yo4,767
|
|
33
|
-
|
|
34
|
-
mikrowerk_edi_invoicing-0.
|
|
35
|
-
mikrowerk_edi_invoicing-0.
|
|
36
|
-
mikrowerk_edi_invoicing-0.
|
|
37
|
-
mikrowerk_edi_invoicing-0.
|
|
38
|
+
edi_invoice_parser/util/timer_helper.py,sha256=X1XSV03iLZ4xfjELj_axlvNxzR2sOrJInXiv9HU2Fyg,284
|
|
39
|
+
mikrowerk_edi_invoicing-0.5.0.dist-info/licenses/LICENSE,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
|
|
40
|
+
mikrowerk_edi_invoicing-0.5.0.dist-info/METADATA,sha256=61zrdg24cjGligw2IxwYWEBWz-pOoCwzjYC4juLkM78,1074
|
|
41
|
+
mikrowerk_edi_invoicing-0.5.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
42
|
+
mikrowerk_edi_invoicing-0.5.0.dist-info/top_level.txt,sha256=OyIJDXDBfR9f0EvTDTmEHdXEFHscjRqX1MxeOeT2VKM,19
|
|
43
|
+
mikrowerk_edi_invoicing-0.5.0.dist-info/RECORD,,
|
{mikrowerk_edi_invoicing-0.3.6.dist-info → mikrowerk_edi_invoicing-0.5.0.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|
{mikrowerk_edi_invoicing-0.3.6.dist-info → mikrowerk_edi_invoicing-0.5.0.dist-info}/top_level.txt
RENAMED
|
File without changes
|