mikrowerk-edi-invoicing 0.5.0__py3-none-any.whl → 0.6.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edi_invoice_parser/__init__.py +3 -18
- edi_invoice_parser/cii_dom_parser/xml_cii_dom_parser.py +39 -38
- edi_invoice_parser/cross_industry_invoice_mapper.py +21 -5
- edi_invoice_parser/model/__init__.py +3 -5
- edi_invoice_parser/model/trade_document_types.py +60 -40
- edi_invoice_parser/model/xml_abstract_x_rechnung_parser.py +2 -2
- edi_invoice_parser/tests/__init__.py +2 -2
- edi_invoice_parser/tests/test_parse_x_rechnung.py +5 -5
- edi_invoice_parser/ubl_sax_parser/xml_ubl_sax_parser.py +45 -45
- {mikrowerk_edi_invoicing-0.5.0.dist-info → mikrowerk_edi_invoicing-0.6.1.dist-info}/METADATA +1 -1
- {mikrowerk_edi_invoicing-0.5.0.dist-info → mikrowerk_edi_invoicing-0.6.1.dist-info}/RECORD +14 -19
- edi_invoice_parser/model/x_rechnung.py +0 -265
- edi_invoice_parser/parse_plain_pdf_file.py +0 -5
- edi_invoice_parser/pdf_llm_parser/__init__.py +0 -5
- edi_invoice_parser/pdf_llm_parser/google_gemini_parser.py +0 -145
- edi_invoice_parser/tests/test_parse_plain_pdf_invoice.py +0 -71
- {mikrowerk_edi_invoicing-0.5.0.dist-info → mikrowerk_edi_invoicing-0.6.1.dist-info}/WHEEL +0 -0
- {mikrowerk_edi_invoicing-0.5.0.dist-info → mikrowerk_edi_invoicing-0.6.1.dist-info}/licenses/LICENSE +0 -0
- {mikrowerk_edi_invoicing-0.5.0.dist-info → mikrowerk_edi_invoicing-0.6.1.dist-info}/top_level.txt +0 -0
edi_invoice_parser/__init__.py
CHANGED
|
@@ -1,34 +1,19 @@
|
|
|
1
1
|
from .cross_industry_invoice_mapper import parse_and_map_x_rechnung
|
|
2
|
-
from .model.x_rechnung import (XRechnung, XRechnungTradeParty, XRechnungTradeAddress, XRechnungTradeContact,
|
|
3
|
-
XRechnungPaymentMeans, XRechnungBankAccount, XRechnungCurrency, XRechnungTradeLine,
|
|
4
|
-
XRechnungAppliedTradeTax, XRechnungFinancialCard)
|
|
5
2
|
from .model.trade_document_types import TradeDocument, TradeParty, TradePartyAddress, TradeCurrency, TradePartyContact, \
|
|
6
|
-
TradeLine,
|
|
7
|
-
from .parse_plain_pdf_file import analyze_document
|
|
3
|
+
TradeLine, TradePaymentMeans, AppliedTradeTax, BankAccount, FinancialCard, ubl_doc_codes
|
|
8
4
|
|
|
9
5
|
__all__ = ["parse_and_map_x_rechnung",
|
|
10
|
-
"XRechnung",
|
|
11
|
-
"XRechnungTradeParty",
|
|
12
|
-
"XRechnungTradeAddress",
|
|
13
|
-
"XRechnungTradeContact",
|
|
14
|
-
"XRechnungPaymentMeans",
|
|
15
|
-
"XRechnungBankAccount",
|
|
16
|
-
"XRechnungCurrency",
|
|
17
|
-
"XRechnungTradeLine",
|
|
18
|
-
"XRechnungAppliedTradeTax",
|
|
19
|
-
"XRechnungFinancialCard",
|
|
20
|
-
"analyze_document",
|
|
21
6
|
"TradeDocument",
|
|
22
7
|
"TradeParty",
|
|
23
8
|
"TradePartyAddress",
|
|
24
9
|
"TradeCurrency",
|
|
25
10
|
"TradePartyContact",
|
|
26
11
|
"TradeLine",
|
|
27
|
-
"
|
|
12
|
+
"TradePaymentMeans",
|
|
28
13
|
"AppliedTradeTax",
|
|
29
14
|
"BankAccount",
|
|
30
15
|
"FinancialCard",
|
|
31
16
|
"ubl_doc_codes"
|
|
32
17
|
]
|
|
33
18
|
|
|
34
|
-
version = "0.
|
|
19
|
+
version = "0.6.1"
|
|
@@ -4,46 +4,46 @@ This implements a mapper from a drafthorse parsed x-rechnung-xml to the internal
|
|
|
4
4
|
from datetime import datetime
|
|
5
5
|
|
|
6
6
|
from .models.document import Document
|
|
7
|
-
|
|
7
|
+
from ..model.trade_document_types import (TradeDocument, TradeParty, TradeCurrency, TradeLine, TradePaymentMeans,
|
|
8
|
+
AppliedTradeTax, TradePartyAddress, TradePartyContact, BankAccount,
|
|
9
|
+
FinancialCard)
|
|
8
10
|
from ..model.xml_abstract_x_rechnung_parser import XMLAbstractXRechnungParser
|
|
9
|
-
from ..model.x_rechnung import (XRechnung, XRechnungCurrency, XRechnungTradeParty, XRechnungTradeAddress,
|
|
10
|
-
XRechnungTradeContact, XRechnungPaymentMeans, XRechnungFinancialCard,
|
|
11
|
-
XRechnungTradeLine, XRechnungAppliedTradeTax, XRechnungBankAccount)
|
|
12
11
|
|
|
13
12
|
|
|
14
13
|
class XRechnungCIIXMLParser(XMLAbstractXRechnungParser):
|
|
15
14
|
|
|
16
15
|
@classmethod
|
|
17
|
-
def parse_and_map_x_rechnung(cls, _xml: any) ->
|
|
16
|
+
def parse_and_map_x_rechnung(cls, _xml: any) -> TradeDocument:
|
|
18
17
|
doc = Document.parse(_xml)
|
|
19
18
|
return cls().map_to_x_rechnung(doc)
|
|
20
19
|
|
|
21
20
|
@classmethod
|
|
22
|
-
def map_to_x_rechnung(cls, doc: any) ->
|
|
21
|
+
def map_to_x_rechnung(cls, doc: any) -> TradeDocument:
|
|
23
22
|
"""
|
|
24
23
|
:param doc: Element, the parsed dom root element
|
|
25
24
|
:return:
|
|
26
25
|
"""
|
|
27
26
|
|
|
28
|
-
return
|
|
27
|
+
return TradeDocument(
|
|
29
28
|
name=f"{cls().TYPE_CODES.get(doc.header.type_code.get_string(), 'Unknown doc type ')} {doc.header.id.get_string()}",
|
|
29
|
+
doc_type_name=f"{cls().TYPE_CODES.get(doc.header.type_code.get_string(), 'Unknown doc type ')}",
|
|
30
30
|
doc_id=doc.header.id.get_string(),
|
|
31
31
|
doc_type_code=doc.header.type_code.get_string(),
|
|
32
32
|
issued_date_time=doc.header.issue_date_time.get_value(),
|
|
33
33
|
notes=doc.header.notes.get_string_elements("\n"),
|
|
34
34
|
languages=doc.header.languages.get_string_elements(";"),
|
|
35
|
-
|
|
35
|
+
receiver_reference=doc.trade.agreement.buyer_reference.get_string(),
|
|
36
36
|
order_reference=doc.trade.agreement.buyer_order.issuer_assigned_id.get_string(),
|
|
37
37
|
currency_code=doc.trade.settlement.currency_code.get_string(),
|
|
38
38
|
line_total_amount=doc.trade.settlement.monetary_summation.line_total.get_value(),
|
|
39
39
|
charge_total_amount=doc.trade.settlement.monetary_summation.charge_total.get_value(),
|
|
40
40
|
allowance_total_amount=doc.trade.settlement.monetary_summation.allowance_total.get_value(),
|
|
41
|
-
tax_basis_total_amount=
|
|
41
|
+
tax_basis_total_amount=TradeCurrency.from_currency_tuple(
|
|
42
42
|
doc.trade.settlement.monetary_summation.tax_basis_total.get_currency()),
|
|
43
|
-
tax_total_amount=[
|
|
43
|
+
tax_total_amount=[TradeCurrency.from_currency_tuple(tpl) for tpl in
|
|
44
44
|
doc.trade.settlement.monetary_summation.tax_total_other_currency.get_currencies()],
|
|
45
45
|
# list of currency
|
|
46
|
-
grand_total_amount=
|
|
46
|
+
grand_total_amount=TradeCurrency.from_currency_tuple(
|
|
47
47
|
doc.trade.settlement.monetary_summation.grand_total.get_currency()),
|
|
48
48
|
total_prepaid_amount=doc.trade.settlement.monetary_summation.prepaid_total.get_value(),
|
|
49
49
|
due_payable_amount=doc.trade.settlement.monetary_summation.due_amount.get_value(),
|
|
@@ -51,11 +51,12 @@ class XRechnungCIIXMLParser(XMLAbstractXRechnungParser):
|
|
|
51
51
|
payment_means=cls().map_payment_means(
|
|
52
52
|
doc.trade.settlement.payment_means) if doc.trade.settlement.payment_means else None,
|
|
53
53
|
payment_terms=doc.trade.settlement.terms.get_string_elements("\n"),
|
|
54
|
-
|
|
54
|
+
sender=cls().map_trade_party(doc.trade.agreement.seller) if hasattr(doc.trade.agreement,
|
|
55
55
|
"seller") else None,
|
|
56
56
|
invoicee=cls().map_trade_party(doc.trade.agreement.invoicee) if hasattr(doc.trade.agreement,
|
|
57
57
|
"invoicee") else None,
|
|
58
|
-
|
|
58
|
+
receiver=cls().map_trade_party(doc.trade.agreement.buyer) if hasattr(doc.trade.agreement,
|
|
59
|
+
"buyer") else None,
|
|
59
60
|
payee=cls().map_trade_party(doc.trade.agreement.payee) if hasattr(doc.trade.agreement, "payee") else None,
|
|
60
61
|
trade_line_items=cls().map_trade_line_items(doc.trade.items) if hasattr(doc.trade, "items") else None,
|
|
61
62
|
applicable_trade_taxes=cls().map_trade_taxes(doc.trade.settlement.trade_tax) if hasattr(
|
|
@@ -64,9 +65,9 @@ class XRechnungCIIXMLParser(XMLAbstractXRechnungParser):
|
|
|
64
65
|
)
|
|
65
66
|
|
|
66
67
|
@classmethod
|
|
67
|
-
def map_trade_party(cls, trade_party: any) ->
|
|
68
|
+
def map_trade_party(cls, trade_party: any) -> TradeParty:
|
|
68
69
|
_global_id_schema, _global_id = cls().map_first_id(trade_party.global_id)
|
|
69
|
-
return
|
|
70
|
+
return TradeParty(
|
|
70
71
|
name=trade_party.name.get_string(),
|
|
71
72
|
description=trade_party.description.get_string(),
|
|
72
73
|
global_id=_global_id,
|
|
@@ -77,8 +78,8 @@ class XRechnungCIIXMLParser(XMLAbstractXRechnungParser):
|
|
|
77
78
|
trade_party, 'tax_registrations') else None,
|
|
78
79
|
fiscal_registration_number=cls().map_tax_registration(trade_party.tax_registrations, 'FC') if hasattr(
|
|
79
80
|
trade_party, 'tax_registrations') else None,
|
|
80
|
-
|
|
81
|
-
|
|
81
|
+
address=cls().map_trade_address(trade_party.address) if hasattr(trade_party, 'address') else None,
|
|
82
|
+
contact=cls().map_trade_contact(trade_party.contact) if hasattr(trade_party, 'contact') else None,
|
|
82
83
|
id=trade_party.id.get_string() if hasattr(trade_party, 'id') else None,
|
|
83
84
|
)
|
|
84
85
|
|
|
@@ -111,8 +112,8 @@ class XRechnungCIIXMLParser(XMLAbstractXRechnungParser):
|
|
|
111
112
|
return None
|
|
112
113
|
|
|
113
114
|
@staticmethod
|
|
114
|
-
def map_trade_address(trade_address: any) ->
|
|
115
|
-
return
|
|
115
|
+
def map_trade_address(trade_address: any) -> TradePartyAddress:
|
|
116
|
+
return TradePartyAddress(
|
|
116
117
|
city_name=trade_address.city_name.get_string(),
|
|
117
118
|
country_id=trade_address.country_id.get_string(),
|
|
118
119
|
country_subdivision_id=trade_address.country_subdivision.get_string(),
|
|
@@ -123,8 +124,8 @@ class XRechnungCIIXMLParser(XMLAbstractXRechnungParser):
|
|
|
123
124
|
)
|
|
124
125
|
|
|
125
126
|
@staticmethod
|
|
126
|
-
def map_trade_contact(trade_contact: any) ->
|
|
127
|
-
return
|
|
127
|
+
def map_trade_contact(trade_contact: any) -> TradePartyContact:
|
|
128
|
+
return TradePartyContact(
|
|
128
129
|
name=trade_contact.person_name.get_string(),
|
|
129
130
|
email=trade_contact.email.get_string(),
|
|
130
131
|
telephone=trade_contact.telephone.get_string(),
|
|
@@ -133,8 +134,8 @@ class XRechnungCIIXMLParser(XMLAbstractXRechnungParser):
|
|
|
133
134
|
)
|
|
134
135
|
|
|
135
136
|
@staticmethod
|
|
136
|
-
def map_bank_account(payment_means: any) ->
|
|
137
|
-
return
|
|
137
|
+
def map_bank_account(payment_means: any) -> BankAccount:
|
|
138
|
+
return BankAccount(
|
|
138
139
|
iban="".join(payment_means.payee_account.iban.get_string().split()) if (hasattr(payment_means,
|
|
139
140
|
'payee_account')
|
|
140
141
|
and payment_means.payee_account.iban.get_string()) else None,
|
|
@@ -144,15 +145,15 @@ class XRechnungCIIXMLParser(XMLAbstractXRechnungParser):
|
|
|
144
145
|
)
|
|
145
146
|
|
|
146
147
|
@staticmethod
|
|
147
|
-
def map_financial_card(financial_card: any) ->
|
|
148
|
-
return
|
|
148
|
+
def map_financial_card(financial_card: any) -> FinancialCard:
|
|
149
|
+
return FinancialCard(
|
|
149
150
|
id=financial_card.id.get_string(),
|
|
150
151
|
cardholder_name=financial_card.cardholder_name.get_string(),
|
|
151
152
|
)
|
|
152
153
|
|
|
153
154
|
@classmethod
|
|
154
|
-
def map_payment_means(cls, payment_means: any) ->
|
|
155
|
-
return
|
|
155
|
+
def map_payment_means(cls, payment_means: any) -> TradePaymentMeans:
|
|
156
|
+
return TradePaymentMeans(
|
|
156
157
|
information=payment_means.information.get_string(),
|
|
157
158
|
type_code=payment_means.type_code.get_string(),
|
|
158
159
|
payee_account=cls().map_bank_account(payment_means) if hasattr(payment_means, 'payee_account') else None,
|
|
@@ -161,8 +162,8 @@ class XRechnungCIIXMLParser(XMLAbstractXRechnungParser):
|
|
|
161
162
|
)
|
|
162
163
|
|
|
163
164
|
@staticmethod
|
|
164
|
-
def map_trade_tax(trade_tax: any) ->
|
|
165
|
-
return
|
|
165
|
+
def map_trade_tax(trade_tax: any) -> AppliedTradeTax:
|
|
166
|
+
return AppliedTradeTax(
|
|
166
167
|
type_code=trade_tax.type_code.get_string(),
|
|
167
168
|
name=f"{trade_tax.type_code.get_string()} {trade_tax.rate_applicable_percent.get_value()}",
|
|
168
169
|
category_code=trade_tax.category_code.get_string(),
|
|
@@ -172,33 +173,33 @@ class XRechnungCIIXMLParser(XMLAbstractXRechnungParser):
|
|
|
172
173
|
)
|
|
173
174
|
|
|
174
175
|
@classmethod
|
|
175
|
-
def map_trade_taxes(cls, trade_taxes: any) -> [
|
|
176
|
+
def map_trade_taxes(cls, trade_taxes: any) -> [TradeLine]:
|
|
176
177
|
res = []
|
|
177
178
|
for child in trade_taxes.children:
|
|
178
179
|
res.append(cls().map_trade_tax(child))
|
|
179
180
|
return res
|
|
180
181
|
|
|
181
182
|
@classmethod
|
|
182
|
-
def map_trade_line(cls, trade_line: any) ->
|
|
183
|
-
return
|
|
183
|
+
def map_trade_line(cls, trade_line: any) -> TradeLine:
|
|
184
|
+
return TradeLine(
|
|
184
185
|
name=trade_line.product.name.get_string(),
|
|
185
186
|
description=trade_line.product.description.get_string(),
|
|
186
187
|
line_id=trade_line.document.line_id.get_string(),
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
188
|
+
unit_price=trade_line.agreement.net.amount.get_value(),
|
|
189
|
+
unit_price_gross=trade_line.agreement.gross.amount.get_value(),
|
|
190
|
+
quantity=trade_line.delivery.billed_quantity.get_value(),
|
|
190
191
|
global_product_id=trade_line.product.global_id.get_string(),
|
|
191
|
-
|
|
192
|
+
total_amount=trade_line.settlement.monetary_summation.total_amount.get_value(),
|
|
192
193
|
total_allowance_charge=trade_line.settlement.monetary_summation.total_allowance_charge.get_value(),
|
|
193
194
|
quantity_unit_code=trade_line.delivery.billed_quantity._unit_code,
|
|
194
195
|
seller_assigned_id=trade_line.product.seller_assigned_id.get_string(),
|
|
195
196
|
buyer_assigned_id=trade_line.product.buyer_assigned_id.get_string(),
|
|
196
197
|
global_product_scheme_id=trade_line.product.global_id._scheme_id,
|
|
197
|
-
|
|
198
|
+
tax=cls().map_trade_tax(trade_line.settlement.trade_tax)
|
|
198
199
|
)
|
|
199
200
|
|
|
200
201
|
@classmethod
|
|
201
|
-
def map_trade_line_items(cls, trade_line_items: any) -> [
|
|
202
|
+
def map_trade_line_items(cls, trade_line_items: any) -> [TradeLine]:
|
|
202
203
|
res = []
|
|
203
204
|
for child in trade_line_items.children:
|
|
204
205
|
res.append(cls().map_trade_line(child))
|
|
@@ -1,14 +1,18 @@
|
|
|
1
1
|
"""
|
|
2
2
|
This implements a mapper from a drafthorse parsed x-rechnung-xml to the internal XRechnung object
|
|
3
3
|
"""
|
|
4
|
+
import logging
|
|
4
5
|
from lxml import etree
|
|
5
6
|
|
|
6
|
-
from .model.
|
|
7
|
+
from .model.trade_document_types import TradeDocument
|
|
7
8
|
from .cii_dom_parser import XRechnungCIIXMLParser
|
|
8
9
|
from .ubl_sax_parser.xml_ubl_sax_parser import XRechnungUblXMLParser
|
|
10
|
+
from .model.xml_abstract_x_rechnung_parser import XMLAbstractXRechnungParser
|
|
9
11
|
|
|
12
|
+
_logger = logging.getLogger(__name__)
|
|
10
13
|
|
|
11
|
-
|
|
14
|
+
|
|
15
|
+
def parse_and_map_x_rechnung(_xml: bytes) -> TradeDocument:
|
|
12
16
|
"""
|
|
13
17
|
|
|
14
18
|
Args:
|
|
@@ -17,12 +21,24 @@ def parse_and_map_x_rechnung(_xml: bytes) -> XRechnung:
|
|
|
17
21
|
Returns: XRechnung
|
|
18
22
|
|
|
19
23
|
"""
|
|
24
|
+
_parser = get_xml_parser_for_doc_type(_xml)
|
|
25
|
+
if _parser is None:
|
|
26
|
+
raise ValueError('xml format not supported for any parser"')
|
|
27
|
+
return _parser.parse_and_map_x_rechnung(_xml)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def get_xml_parser_for_doc_type(_xml: bytes) -> XMLAbstractXRechnungParser:
|
|
20
31
|
_parser = None
|
|
21
32
|
tree = etree.fromstring(_xml)
|
|
22
33
|
if tree.tag == '{urn:un:unece:uncefact:data:standard:CrossIndustryInvoice:100}CrossIndustryInvoice':
|
|
23
34
|
_parser = XRechnungCIIXMLParser()
|
|
24
35
|
elif tree.tag == '{urn:oasis:names:specification:ubl:schema:xsd:Invoice-2}Invoice':
|
|
25
36
|
_parser = XRechnungUblXMLParser()
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
return _parser
|
|
37
|
+
else:
|
|
38
|
+
_logger.warning(f'No parser found, unsupported XML tag: {tree.tag}')
|
|
39
|
+
return _parser
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def check_if_parser_is_available(_xml: bytes) -> bool:
|
|
43
|
+
return get_xml_parser_for_doc_type(_xml) is not None
|
|
44
|
+
|
|
@@ -1,17 +1,15 @@
|
|
|
1
|
-
from .x_rechnung import XRechnung
|
|
2
1
|
from .trade_document_types import TradeDocument, TradeParty, TradePartyAddress, TradeCurrency, TradePartyContact, \
|
|
3
|
-
TradeLine,
|
|
2
|
+
TradeLine, TradePaymentMeans, AppliedTradeTax, BankAccount, FinancialCard, ubl_doc_codes
|
|
4
3
|
from .xml_abstract_x_rechnung_parser import XMLAbstractXRechnungParser
|
|
5
4
|
|
|
6
|
-
__all__ = ["
|
|
7
|
-
"XMLAbstractXRechnungParser",
|
|
5
|
+
__all__ = ["XMLAbstractXRechnungParser",
|
|
8
6
|
"TradeDocument",
|
|
9
7
|
"TradeParty",
|
|
10
8
|
"TradePartyAddress",
|
|
11
9
|
"TradeCurrency",
|
|
12
10
|
"TradePartyContact",
|
|
13
11
|
"TradeLine",
|
|
14
|
-
"
|
|
12
|
+
"TradePaymentMeans",
|
|
15
13
|
"AppliedTradeTax",
|
|
16
14
|
"BankAccount",
|
|
17
15
|
"FinancialCard",
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
from dataclasses import dataclass
|
|
2
|
-
from decimal import Decimal
|
|
3
2
|
import datetime
|
|
4
3
|
|
|
5
4
|
"""
|
|
@@ -38,7 +37,7 @@ class TradePartyAddress:
|
|
|
38
37
|
|
|
39
38
|
@dataclass
|
|
40
39
|
class TradePartyContact:
|
|
41
|
-
name: str
|
|
40
|
+
name: str = None
|
|
42
41
|
department_name: str = None
|
|
43
42
|
telephone: str = None
|
|
44
43
|
fax: str = None
|
|
@@ -47,47 +46,62 @@ class TradePartyContact:
|
|
|
47
46
|
|
|
48
47
|
@dataclass
|
|
49
48
|
class TradeParty:
|
|
50
|
-
name: str
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
49
|
+
name: str= None
|
|
50
|
+
description: str = None # 'Description'
|
|
51
|
+
global_id: int = 0 # 'Global ID'
|
|
52
|
+
global_id_schema: str = None # 'Global Schema'
|
|
53
|
+
id: str = None # 'id'
|
|
54
|
+
address: TradePartyAddress | None = None
|
|
55
|
+
contact: TradePartyContact | None = None
|
|
56
|
+
email: str = None # 'Email'
|
|
57
|
+
vat_registration_number: str | None = None
|
|
58
|
+
fiscal_registration_number: str | None = None
|
|
59
|
+
legal_registration_number: str | None = None
|
|
56
60
|
|
|
57
61
|
|
|
58
62
|
@dataclass
|
|
59
63
|
class AppliedTradeTax:
|
|
60
|
-
name: str
|
|
64
|
+
name: str = None
|
|
61
65
|
type_code: str = None
|
|
62
66
|
category_code: str = None
|
|
63
|
-
applicable_percent:
|
|
64
|
-
basis_amount:
|
|
65
|
-
calculated_amount:
|
|
67
|
+
applicable_percent: float = None
|
|
68
|
+
basis_amount: float = None
|
|
69
|
+
calculated_amount: float = None
|
|
66
70
|
|
|
67
71
|
|
|
68
72
|
@dataclass
|
|
69
73
|
class TradeLine:
|
|
70
|
-
|
|
71
|
-
article_code: str = None
|
|
72
|
-
name: str = None
|
|
73
|
-
description: str = None
|
|
74
|
-
quantity:
|
|
75
|
-
|
|
76
|
-
unit_price:
|
|
77
|
-
|
|
78
|
-
tax: AppliedTradeTax = None
|
|
79
|
-
|
|
74
|
+
line_id: int = 0
|
|
75
|
+
article_code: str | None = None
|
|
76
|
+
name: str | None = None
|
|
77
|
+
description: str | None = None
|
|
78
|
+
quantity: float = None
|
|
79
|
+
quantity_unit_code: str = None
|
|
80
|
+
unit_price: float = None
|
|
81
|
+
unit_price_gross: float = None
|
|
82
|
+
tax: AppliedTradeTax | None = None
|
|
83
|
+
total_amount_net: float = None
|
|
84
|
+
total_amount: float = None
|
|
85
|
+
total_allowance_charge: float = None
|
|
86
|
+
global_product_id: str = None # 'Global Product ID')
|
|
87
|
+
global_product_scheme_id: str = None # 'Global Product Scheme ID')
|
|
88
|
+
seller_assigned_id: str = None # 'Seller Assigned ID')
|
|
89
|
+
buyer_assigned_id: str = None # 'Buyer Assigned ID')
|
|
80
90
|
|
|
81
91
|
|
|
82
92
|
@dataclass
|
|
83
93
|
class TradeCurrency:
|
|
84
|
-
amount:
|
|
94
|
+
amount: float
|
|
85
95
|
currency_code: str
|
|
86
96
|
|
|
97
|
+
@staticmethod
|
|
98
|
+
def from_currency_tuple(currency_tuple: tuple) -> 'TradeCurrency':
|
|
99
|
+
return TradeCurrency(*currency_tuple)
|
|
100
|
+
|
|
87
101
|
|
|
88
102
|
@dataclass
|
|
89
103
|
class BankAccount:
|
|
90
|
-
iban: str
|
|
104
|
+
iban: str | None = None
|
|
91
105
|
bic: str = None
|
|
92
106
|
name: str = None
|
|
93
107
|
|
|
@@ -99,7 +113,7 @@ class FinancialCard:
|
|
|
99
113
|
|
|
100
114
|
|
|
101
115
|
@dataclass
|
|
102
|
-
class
|
|
116
|
+
class TradePaymentMeans:
|
|
103
117
|
id: str = None
|
|
104
118
|
type_code: str = None
|
|
105
119
|
information: str = None
|
|
@@ -112,28 +126,34 @@ class TradeDocument:
|
|
|
112
126
|
"""
|
|
113
127
|
Model of a Trade Document
|
|
114
128
|
"""
|
|
115
|
-
name: str
|
|
116
|
-
doc_type_code:
|
|
129
|
+
name: str = None
|
|
130
|
+
doc_type_code: int = 0 # Document Type Code: ubl_doc_codes
|
|
131
|
+
doc_type_name: str = None
|
|
117
132
|
doc_id: str = None
|
|
133
|
+
project: str = None
|
|
118
134
|
issued_date_time: datetime = None # 'Date'
|
|
135
|
+
delivered_date_time: datetime = None # 'Delivered Date'
|
|
119
136
|
languages: str = None # 'Languages'
|
|
120
137
|
notes: str = None # 'Notes'
|
|
121
138
|
sender_reference: str = None # 'Buyer Reference'
|
|
122
|
-
receiver_reference: str = None
|
|
123
|
-
dispatch_reference: str = None
|
|
124
|
-
|
|
139
|
+
receiver_reference: str | None = None
|
|
140
|
+
dispatch_reference: str | None = None
|
|
141
|
+
order_reference: str | None = None
|
|
125
142
|
sender: TradeParty = None
|
|
126
143
|
receiver: TradeParty = None
|
|
144
|
+
payee: TradeParty = None
|
|
145
|
+
invoicee: TradeParty = None
|
|
127
146
|
currency_code: str = None # 'Currency Code'
|
|
128
|
-
payment_means:
|
|
129
|
-
payment_terms: str = None # 'Payment Terms'
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
147
|
+
payment_means: TradePaymentMeans = None
|
|
148
|
+
payment_terms: str | None = None # 'Payment Terms'
|
|
149
|
+
due_date_time: datetime = None
|
|
150
|
+
line_total_amount: float = None # 'Line Total Amount'
|
|
151
|
+
charge_total_amount: float = None # 'Charge Total Amount'
|
|
152
|
+
allowance_total_amount: float = None # 'Allowance Total Amount'
|
|
153
|
+
tax_basis_total_amount: TradeCurrency = None
|
|
154
|
+
tax_total_amount: [TradeCurrency] = None # 'Tax Grand Total Amount'
|
|
155
|
+
grand_total_amount: TradeCurrency = None # 'Grand Total Amount'
|
|
156
|
+
total_prepaid_amount: float = None # 'Total Prepaid Amount'
|
|
157
|
+
due_payable_amount: float = None # 'Due Payable Amount'
|
|
138
158
|
trade_line_items: [TradeLine] = None
|
|
139
159
|
applicable_trade_taxes: [AppliedTradeTax] = None
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from abc import ABC, abstractmethod
|
|
2
2
|
|
|
3
|
-
from ..model.
|
|
3
|
+
from ..model.trade_document_types import TradeDocument
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
class XMLAbstractXRechnungParser(ABC):
|
|
@@ -17,5 +17,5 @@ class XMLAbstractXRechnungParser(ABC):
|
|
|
17
17
|
|
|
18
18
|
@staticmethod
|
|
19
19
|
@abstractmethod
|
|
20
|
-
def parse_and_map_x_rechnung(_xml: any) ->
|
|
20
|
+
def parse_and_map_x_rechnung(_xml: any) -> TradeDocument:
|
|
21
21
|
pass
|
|
@@ -1,3 +1,3 @@
|
|
|
1
1
|
from ..util.file_helper import get_checked_file_path
|
|
2
|
-
from ..model.
|
|
3
|
-
__all__ = ["get_checked_file_path", "
|
|
2
|
+
from ..model.trade_document_types import TradeDocument
|
|
3
|
+
__all__ = ["get_checked_file_path", "TradeDocument"]
|
|
@@ -6,7 +6,7 @@ from facturx import get_facturx_xml_from_pdf
|
|
|
6
6
|
|
|
7
7
|
from . import get_checked_file_path
|
|
8
8
|
from edi_invoice_parser.cross_industry_invoice_mapper import parse_and_map_x_rechnung
|
|
9
|
-
from . import
|
|
9
|
+
from . import TradeDocument
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
class XRechnungEinfachTestCase(unittest.TestCase):
|
|
@@ -51,10 +51,10 @@ class XRechnungEinfachTestCase(unittest.TestCase):
|
|
|
51
51
|
else:
|
|
52
52
|
raise AssertionError(f'File type {file_type} not supported')
|
|
53
53
|
assert _parsed is not None
|
|
54
|
-
res_dict = _parsed.map_to_dict()
|
|
55
|
-
print(jsonpickle.dumps(
|
|
54
|
+
# res_dict = _parsed.map_to_dict()
|
|
55
|
+
print(jsonpickle.dumps(_parsed))
|
|
56
56
|
|
|
57
|
-
def _parse_xml(self, filepath) ->
|
|
57
|
+
def _parse_xml(self, filepath) -> TradeDocument:
|
|
58
58
|
_file_path, _exists, _is_dir = get_checked_file_path(filepath, __file__)
|
|
59
59
|
self.assertTrue(_exists)
|
|
60
60
|
print(f"\n_parse_xml: file_path={_file_path}")
|
|
@@ -64,7 +64,7 @@ class XRechnungEinfachTestCase(unittest.TestCase):
|
|
|
64
64
|
self.assertIsNotNone(res)
|
|
65
65
|
return res
|
|
66
66
|
|
|
67
|
-
def _parse_pdf(self, filepath) ->
|
|
67
|
+
def _parse_pdf(self, filepath) -> TradeDocument:
|
|
68
68
|
_file_path, _exists, _is_dir = get_checked_file_path(filepath, __file__)
|
|
69
69
|
self.assertTrue(_exists)
|
|
70
70
|
print(f"\n_parse_pdf: file_path={_file_path}")
|
|
@@ -7,34 +7,34 @@ from collections import deque
|
|
|
7
7
|
import xml.sax as sax
|
|
8
8
|
from decimal import Decimal
|
|
9
9
|
|
|
10
|
-
from ..model.
|
|
11
|
-
|
|
12
|
-
|
|
10
|
+
from ..model.trade_document_types import (TradeDocument, TradeParty, TradeCurrency, TradeLine, TradePaymentMeans,
|
|
11
|
+
AppliedTradeTax, TradePartyAddress, TradePartyContact, BankAccount)
|
|
12
|
+
|
|
13
13
|
from ..model import XMLAbstractXRechnungParser
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
class UblSaxHandler(sax.ContentHandler):
|
|
17
17
|
def __init__(self):
|
|
18
|
-
self.x_rechnung:
|
|
18
|
+
self.x_rechnung: TradeDocument = TradeDocument()
|
|
19
19
|
self.content = ""
|
|
20
20
|
self.stack = deque()
|
|
21
21
|
self.current_attributes = None
|
|
22
|
-
self.current_trade_party =
|
|
23
|
-
self.current_trade_address:
|
|
24
|
-
self.current_trade_contact:
|
|
25
|
-
self.current_payment_means:
|
|
26
|
-
self.current_payment_means_list: [
|
|
27
|
-
self.current_currency:
|
|
28
|
-
self.current_trade_line:
|
|
29
|
-
self.trade_line_list: [
|
|
30
|
-
self.current_trade_tax:
|
|
31
|
-
self.applicable_trade_taxes: [
|
|
22
|
+
self.current_trade_party = TradeParty()
|
|
23
|
+
self.current_trade_address: TradePartyAddress = TradePartyAddress()
|
|
24
|
+
self.current_trade_contact: TradePartyContact = TradePartyContact()
|
|
25
|
+
self.current_payment_means: TradePaymentMeans = TradePaymentMeans()
|
|
26
|
+
self.current_payment_means_list: [TradePaymentMeans] = []
|
|
27
|
+
self.current_currency: TradeCurrency | None = None
|
|
28
|
+
self.current_trade_line: TradeLine | None = None
|
|
29
|
+
self.trade_line_list: [TradeLine] = []
|
|
30
|
+
self.current_trade_tax: AppliedTradeTax | None = None
|
|
31
|
+
self.applicable_trade_taxes: [AppliedTradeTax] = []
|
|
32
32
|
self.allowance_line_count = 99900
|
|
33
33
|
self.allowance_line_count_incr = 10
|
|
34
34
|
|
|
35
35
|
def startDocument(self):
|
|
36
36
|
print("------------------------------ Start ---------------------------------------------------")
|
|
37
|
-
self.x_rechnung =
|
|
37
|
+
self.x_rechnung = TradeDocument()
|
|
38
38
|
self.stack = deque()
|
|
39
39
|
self.allowance_line_count = 99900
|
|
40
40
|
|
|
@@ -53,38 +53,38 @@ class UblSaxHandler(sax.ContentHandler):
|
|
|
53
53
|
match _tag_name:
|
|
54
54
|
case "Party":
|
|
55
55
|
# init all sub entities, if one is missing, so not taking left over
|
|
56
|
-
self.current_trade_party =
|
|
57
|
-
self.current_trade_address =
|
|
58
|
-
self.current_trade_contact =
|
|
56
|
+
self.current_trade_party = TradeParty()
|
|
57
|
+
self.current_trade_address = TradePartyAddress()
|
|
58
|
+
self.current_trade_contact = TradePartyContact()
|
|
59
59
|
case "Delivery":
|
|
60
|
-
self.current_trade_party =
|
|
61
|
-
self.current_trade_address =
|
|
62
|
-
self.current_trade_contact =
|
|
60
|
+
self.current_trade_party = TradeParty()
|
|
61
|
+
self.current_trade_address = TradePartyAddress()
|
|
62
|
+
self.current_trade_contact = TradePartyContact()
|
|
63
63
|
case "PostalAddress":
|
|
64
|
-
self.current_trade_address =
|
|
64
|
+
self.current_trade_address = TradePartyAddress()
|
|
65
65
|
case "DeliveryLocation":
|
|
66
|
-
self.current_trade_address =
|
|
66
|
+
self.current_trade_address = TradePartyAddress()
|
|
67
67
|
case "Contact":
|
|
68
|
-
self.current_trade_contact =
|
|
68
|
+
self.current_trade_contact = TradePartyContact()
|
|
69
69
|
case "DeliveryParty":
|
|
70
|
-
self.current_trade_contact =
|
|
70
|
+
self.current_trade_contact = TradePartyContact()
|
|
71
71
|
case "PaymentMeans":
|
|
72
|
-
self.current_payment_means =
|
|
73
|
-
self.current_payment_means.payee_account =
|
|
72
|
+
self.current_payment_means = TradePaymentMeans()
|
|
73
|
+
self.current_payment_means.payee_account = BankAccount()
|
|
74
74
|
case "InvoiceLine":
|
|
75
|
-
self.current_trade_line =
|
|
75
|
+
self.current_trade_line = TradeLine()
|
|
76
76
|
self.current_trade_tax = None
|
|
77
77
|
case "TaxSubtotal":
|
|
78
|
-
self.current_trade_tax =
|
|
78
|
+
self.current_trade_tax = AppliedTradeTax()
|
|
79
79
|
case "ClassifiedTaxCategory":
|
|
80
|
-
self.current_trade_tax =
|
|
80
|
+
self.current_trade_tax = AppliedTradeTax()
|
|
81
81
|
case "AllowanceCharge":
|
|
82
82
|
if _path.endswith('Invoice/AllowanceCharge'):
|
|
83
|
-
self.current_trade_line =
|
|
83
|
+
self.current_trade_line = TradeLine()
|
|
84
84
|
self.current_trade_tax = None
|
|
85
85
|
|
|
86
86
|
if _path.endswith('/AllowanceCharge/TaxCategory'):
|
|
87
|
-
self.current_trade_tax =
|
|
87
|
+
self.current_trade_tax = AppliedTradeTax()
|
|
88
88
|
|
|
89
89
|
print(f">>>>>>>>>>>>>>> start: {_ns} {_tag_name} >>>>>>>>>>>>>>>>")
|
|
90
90
|
print(_path)
|
|
@@ -134,7 +134,7 @@ class UblSaxHandler(sax.ContentHandler):
|
|
|
134
134
|
self.allowance_line_count += self.allowance_line_count_incr
|
|
135
135
|
self.current_trade_line.seller_assigned_id = "AllowanceCharge"
|
|
136
136
|
self.current_trade_line.total_amount_net = (
|
|
137
|
-
self.current_trade_line.
|
|
137
|
+
self.current_trade_line.unit_price * self.current_trade_line.quantity)
|
|
138
138
|
self.trade_line_list.append(self.current_trade_line)
|
|
139
139
|
|
|
140
140
|
# invoice top level properties
|
|
@@ -181,14 +181,14 @@ class UblSaxHandler(sax.ContentHandler):
|
|
|
181
181
|
# case "Invoice/AllowanceCharge/Amount":
|
|
182
182
|
# self.x_rechnung.allowance_total_amount = Decimal(content)
|
|
183
183
|
case "Invoice/TaxTotal/TaxAmount":
|
|
184
|
-
self.x_rechnung.tax_total_amount = [
|
|
185
|
-
|
|
184
|
+
self.x_rechnung.tax_total_amount = [TradeCurrency(float(content),
|
|
185
|
+
attrs.get('currencyID', 'EUR'))]
|
|
186
186
|
case "Invoice/TaxTotal/TaxSubtotal/TaxableAmount":
|
|
187
|
-
self.x_rechnung.tax_basis_total_amount =
|
|
188
|
-
attrs.get('currencyID', 'EUR'))
|
|
189
|
-
case "Invoice/LegalMonetaryTotal/TaxInclusiveAmount":
|
|
190
|
-
self.x_rechnung.grand_total_amount = XRechnungCurrency(Decimal(content),
|
|
187
|
+
self.x_rechnung.tax_basis_total_amount = TradeCurrency(float(content),
|
|
191
188
|
attrs.get('currencyID', 'EUR'))
|
|
189
|
+
case "Invoice/LegalMonetaryTotal/TaxInclusiveAmount":
|
|
190
|
+
self.x_rechnung.grand_total_amount = TradeCurrency(float(content),
|
|
191
|
+
attrs.get('currencyID', 'EUR'))
|
|
192
192
|
case "Invoice/LegalMonetaryTotal/PrepaidAmount":
|
|
193
193
|
self.x_rechnung.total_prepaid_amount = Decimal(content)
|
|
194
194
|
case "Invoice/LegalMonetaryTotal/PayableAmount":
|
|
@@ -256,7 +256,7 @@ class UblSaxHandler(sax.ContentHandler):
|
|
|
256
256
|
elif path.endswith("/InvoiceLine/Note"):
|
|
257
257
|
self.current_trade_line.note = content
|
|
258
258
|
elif path.endswith("/InvoiceLine/InvoicedQuantity"):
|
|
259
|
-
self.current_trade_line.
|
|
259
|
+
self.current_trade_line.quantity = float(content)
|
|
260
260
|
self.current_trade_line.quantity_unit_code = attr.get((None, 'unitCode'), None) if attr else None
|
|
261
261
|
elif path.endswith("/InvoiceLine/LineExtensionAmount"):
|
|
262
262
|
self.current_trade_line.total_amount_net = float(content)
|
|
@@ -275,18 +275,18 @@ class UblSaxHandler(sax.ContentHandler):
|
|
|
275
275
|
elif path.endswith("/InvoiceLine/Item/ItemInstance/LotIdentification/ExpiryDate"):
|
|
276
276
|
self.current_trade_line.expiry_date = datetime.fromisoformat(content)
|
|
277
277
|
elif path.endswith("/InvoiceLine/Price/PriceAmount"):
|
|
278
|
-
self.current_trade_line.
|
|
278
|
+
self.current_trade_line.unit_price = float(content)
|
|
279
279
|
|
|
280
280
|
def handle_allowance_charge(self, path: str, tag: str, content: str, attr=None):
|
|
281
281
|
if path.endswith("/AllowanceCharge/AllowanceChargeReason"):
|
|
282
282
|
self.current_trade_line.name = content
|
|
283
283
|
elif path.endswith("/AllowanceCharge/Amount"):
|
|
284
|
-
self.current_trade_line.
|
|
284
|
+
self.current_trade_line.unit_price = float(content)
|
|
285
285
|
elif path.endswith("/AllowanceCharge/ChargeIndicator"):
|
|
286
286
|
if content.lower() == "true":
|
|
287
|
-
self.current_trade_line.
|
|
287
|
+
self.current_trade_line.quantity = 1.0
|
|
288
288
|
else:
|
|
289
|
-
self.current_trade_line.
|
|
289
|
+
self.current_trade_line.quantity = -1.0
|
|
290
290
|
|
|
291
291
|
def handle_trade_tax(self, path: str, tag: str, content: str, attr=None):
|
|
292
292
|
if path.endswith("/ClassifiedTaxCategory/ID") or path.endswith("/TaxCategory/ID"):
|
|
@@ -304,7 +304,7 @@ class UblSaxHandler(sax.ContentHandler):
|
|
|
304
304
|
|
|
305
305
|
class XRechnungUblXMLParser(XMLAbstractXRechnungParser):
|
|
306
306
|
@classmethod
|
|
307
|
-
def parse_and_map_x_rechnung(cls, _xml: bytes) ->
|
|
307
|
+
def parse_and_map_x_rechnung(cls, _xml: bytes) -> TradeDocument:
|
|
308
308
|
# create an XMLReader
|
|
309
309
|
parser = sax.make_parser()
|
|
310
310
|
# turn off namespaces
|
|
@@ -1,11 +1,10 @@
|
|
|
1
|
-
edi_invoice_parser/__init__.py,sha256=
|
|
2
|
-
edi_invoice_parser/cross_industry_invoice_mapper.py,sha256=
|
|
3
|
-
edi_invoice_parser/parse_plain_pdf_file.py,sha256=5CWHMHAlmqLzMLrPTLOFQ_liI6LcFFF0thpMF3j1SAk,282
|
|
1
|
+
edi_invoice_parser/__init__.py,sha256=o_oatupAKzFcuuRI1z6Epj8-rIAEsxVCHwmPF0SJgVs,667
|
|
2
|
+
edi_invoice_parser/cross_industry_invoice_mapper.py,sha256=PCByIKH0srVnXqKR3orcQsrAxcqAtp_ljcmhNIv2FS8,1402
|
|
4
3
|
edi_invoice_parser/cii_dom_parser/__init__.py,sha256=J9O0f-t1570oDgC_zWT6-MpjOYA6o3whazbe9ffQqjE,188
|
|
5
4
|
edi_invoice_parser/cii_dom_parser/dom_elements_helper.py,sha256=60_YyI85xWhIa92hHwIr_FLQvFuRcNrlCJjBi9S1s8o,3686
|
|
6
5
|
edi_invoice_parser/cii_dom_parser/pdf.py,sha256=1bbXWdqIGmrnYvkS284oc6ZVvH995vd9qB8-xdcDtf0,13270
|
|
7
6
|
edi_invoice_parser/cii_dom_parser/utils.py,sha256=_ZaPPb1YeDWid3GP-9ojYDwm35HaJyXGGT_nIoiGSMw,624
|
|
8
|
-
edi_invoice_parser/cii_dom_parser/xml_cii_dom_parser.py,sha256=
|
|
7
|
+
edi_invoice_parser/cii_dom_parser/xml_cii_dom_parser.py,sha256=dvO9zvpHK1bAvnBKBAbFhht_yd9XbRprGcHz9FXllzk,11328
|
|
9
8
|
edi_invoice_parser/cii_dom_parser/xmp_schema.py,sha256=UXQnZT3L2PQMendI_vyjemddhw209gMjH5woIK07TEM,4085
|
|
10
9
|
edi_invoice_parser/cii_dom_parser/models/__init__.py,sha256=LANSc9OLmHBP8iv5RKlvvv8ugBGJjWycantnRi5Lt0I,439
|
|
11
10
|
edi_invoice_parser/cii_dom_parser/models/accounting.py,sha256=_ngHDoBruw6v5QbLeA23QarAQ0fOascCz0ISpnD9aPs,6714
|
|
@@ -21,23 +20,19 @@ edi_invoice_parser/cii_dom_parser/models/product.py,sha256=WRX0m5u6UTFv6Bvow1pED
|
|
|
21
20
|
edi_invoice_parser/cii_dom_parser/models/references.py,sha256=2LYJGJU5KkJse-UsUcQitbPWYdlT_lLFL6BvvQmUIoo,4468
|
|
22
21
|
edi_invoice_parser/cii_dom_parser/models/trade.py,sha256=exlMrVMzj9aMb0qI3c610G3GuB6Z5ttM4qkFfK5tn3o,6968
|
|
23
22
|
edi_invoice_parser/cii_dom_parser/models/tradelines.py,sha256=262opVT4pWXKEc_-HTEqVjm4yqQhvz639QNckEQCthY,5650
|
|
24
|
-
edi_invoice_parser/model/__init__.py,sha256=
|
|
25
|
-
edi_invoice_parser/model/trade_document_types.py,sha256=
|
|
26
|
-
edi_invoice_parser/model/
|
|
27
|
-
edi_invoice_parser/
|
|
28
|
-
edi_invoice_parser/pdf_llm_parser/__init__.py,sha256=hJwV4HialAgphoAZ5SzklXgBrxA7KYfn1edKhUMxpwE,130
|
|
29
|
-
edi_invoice_parser/pdf_llm_parser/google_gemini_parser.py,sha256=Tqo3scK-vz8_r3DHuDmJbf3vLF5l9TGM8kH0f8nwXjw,6183
|
|
30
|
-
edi_invoice_parser/tests/__init__.py,sha256=gnkvp4ZsQ0g1L5r6fbyhvFNsSKp0PegdvVeQP_dVQSw,142
|
|
23
|
+
edi_invoice_parser/model/__init__.py,sha256=gdzehRQK27DRpc-datjXMSkcs5GvxPFtph02wLE1Kns,647
|
|
24
|
+
edi_invoice_parser/model/trade_document_types.py,sha256=nmNzSG0bK9FCUzF1KmR1SUvgEx8ACaXezhLV_a97eYQ,5060
|
|
25
|
+
edi_invoice_parser/model/xml_abstract_x_rechnung_parser.py,sha256=ufkXX4OP0_CjsB9pjF0-h_MJerkOx4JeY54nULi_BUA,691
|
|
26
|
+
edi_invoice_parser/tests/__init__.py,sha256=KhEmBFKreDn5PGQFEnBzTAI7IkxtRj_H0dMxk_tuo54,160
|
|
31
27
|
edi_invoice_parser/tests/test_iban_handling.py,sha256=suRaB9gxbNc2Dc7spjHmQyPBdXva98HF1js85wQWqPM,662
|
|
32
|
-
edi_invoice_parser/tests/
|
|
33
|
-
edi_invoice_parser/tests/test_parse_x_rechnung.py,sha256=K2g3jjfxh5gsTJtOvjWSzo-lPIdgnpMsuuewQa5BQq4,3591
|
|
28
|
+
edi_invoice_parser/tests/test_parse_x_rechnung.py,sha256=jZK_Gw1yMHV8glR2yc7saC12MkcR4Jk1OqrQlKEvvN4,3604
|
|
34
29
|
edi_invoice_parser/ubl_sax_parser/__init__.py,sha256=P3QhOExirTKDRre-ReGBVv_GFZniEj_kOnWtUSNJGq0,91
|
|
35
|
-
edi_invoice_parser/ubl_sax_parser/xml_ubl_sax_parser.py,sha256=
|
|
30
|
+
edi_invoice_parser/ubl_sax_parser/xml_ubl_sax_parser.py,sha256=eZETsQi0MZ7f0SvyRZBorMUTJkpET9Bi2FDWBFka_qA,16625
|
|
36
31
|
edi_invoice_parser/util/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
37
32
|
edi_invoice_parser/util/file_helper.py,sha256=4gdWbv8L9LSMraLKvGI1Z3NMcuGGy7JB1qFvNaW-yo4,767
|
|
38
33
|
edi_invoice_parser/util/timer_helper.py,sha256=X1XSV03iLZ4xfjELj_axlvNxzR2sOrJInXiv9HU2Fyg,284
|
|
39
|
-
mikrowerk_edi_invoicing-0.
|
|
40
|
-
mikrowerk_edi_invoicing-0.
|
|
41
|
-
mikrowerk_edi_invoicing-0.
|
|
42
|
-
mikrowerk_edi_invoicing-0.
|
|
43
|
-
mikrowerk_edi_invoicing-0.
|
|
34
|
+
mikrowerk_edi_invoicing-0.6.1.dist-info/licenses/LICENSE,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
|
|
35
|
+
mikrowerk_edi_invoicing-0.6.1.dist-info/METADATA,sha256=EEEN0GCXPmhTvEFofOhbB8He2YY4UYP5q6_TdSOKJ9Q,1074
|
|
36
|
+
mikrowerk_edi_invoicing-0.6.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
37
|
+
mikrowerk_edi_invoicing-0.6.1.dist-info/top_level.txt,sha256=OyIJDXDBfR9f0EvTDTmEHdXEFHscjRqX1MxeOeT2VKM,19
|
|
38
|
+
mikrowerk_edi_invoicing-0.6.1.dist-info/RECORD,,
|
|
@@ -1,265 +0,0 @@
|
|
|
1
|
-
from datetime import datetime
|
|
2
|
-
from dataclasses import dataclass, asdict
|
|
3
|
-
from decimal import Decimal
|
|
4
|
-
|
|
5
|
-
__all__ = ['XRechnung', "XRechnungCurrency", "XRechnungTradeParty", "XRechnungTradeAddress", "XRechnungTradeContact",
|
|
6
|
-
"XRechnungPaymentMeans", "XRechnungBankAccount", "XRechnungAppliedTradeTax", "XRechnungTradeLine",
|
|
7
|
-
"XRechnungFinancialCard"]
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
@dataclass
|
|
11
|
-
class XRechnungCurrency:
|
|
12
|
-
amount: Decimal
|
|
13
|
-
currency_code: str
|
|
14
|
-
|
|
15
|
-
@staticmethod
|
|
16
|
-
def from_currency_tuple(currency_tuple: tuple) -> 'XRechnungCurrency':
|
|
17
|
-
return XRechnungCurrency(*currency_tuple)
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
@dataclass
|
|
21
|
-
class XRechnungTradeAddress:
|
|
22
|
-
post_code: str = None # 'Post-Code'
|
|
23
|
-
city_name: str = None # 'City'
|
|
24
|
-
country_id: str = None # 'Country ID
|
|
25
|
-
country_subdivision_id: str = None # 'Country Subdivision ID'
|
|
26
|
-
address_line_1: str = None # 'Address Line 1'
|
|
27
|
-
address_line_2: str = None # 'Address Line 2'
|
|
28
|
-
address_line_3: str = None # 'Address Line 3'
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
@dataclass
|
|
32
|
-
class XRechnungTradeContact:
|
|
33
|
-
name: str = None # 'Person Name'
|
|
34
|
-
department_name: str = None # 'Department Name'
|
|
35
|
-
telephone: str = None # 'Telephone Number'
|
|
36
|
-
fax: str = None # 'Fax'
|
|
37
|
-
email: str = None # 'Email'
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
@dataclass
|
|
41
|
-
class XRechnungTradeParty:
|
|
42
|
-
global_id: int = 0 # 'Global ID'
|
|
43
|
-
global_id_schema: str = None # 'Global Schema'
|
|
44
|
-
id: str = None # 'id'
|
|
45
|
-
name: str = None # 'Name'
|
|
46
|
-
description: str = None # 'Description'
|
|
47
|
-
postal_address: XRechnungTradeAddress = None
|
|
48
|
-
email: str = None # 'Email'
|
|
49
|
-
trade_contact: XRechnungTradeContact = None
|
|
50
|
-
vat_registration_number: str = None # 'VAT Registration Number'
|
|
51
|
-
fiscal_registration_number: str = None # 'Fiscal Registration Number'
|
|
52
|
-
legal_registration_number: str = None
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
# @dataclass
|
|
56
|
-
# class XRechnungSpecifiedTradeSettlementPaymentMeans:
|
|
57
|
-
# name: str = None # 'Name'
|
|
58
|
-
# type_code: str = None # 'Type Code'
|
|
59
|
-
# information: str = None # 'Information'
|
|
60
|
-
# iban: str = None # 'IBAN'
|
|
61
|
-
# bicid: str = None # 'BICID'
|
|
62
|
-
# account_name: str = None # 'Account Name'
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
@dataclass
|
|
66
|
-
class XRechnungAppliedTradeTax:
|
|
67
|
-
name: str = None # 'Name'
|
|
68
|
-
type_code: str = None
|
|
69
|
-
category_code: str = None
|
|
70
|
-
applicable_percent: float = 0.0
|
|
71
|
-
basis_amount: float = 0.0 # 'Basis Amount'
|
|
72
|
-
calculated_amount: float = 0.0 # 'Calculated Tax Amount'
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
@dataclass
|
|
76
|
-
class XRechnungTradeLine:
|
|
77
|
-
name: str = None # 'Name')
|
|
78
|
-
description: str = None
|
|
79
|
-
line_id: str = None # 'Line ID')
|
|
80
|
-
global_product_id: str = None # 'Global Product ID')
|
|
81
|
-
global_product_scheme_id: str = None # 'Global Product Scheme ID')
|
|
82
|
-
seller_assigned_id: str = None # 'Seller Assigned ID')
|
|
83
|
-
buyer_assigned_id: str = None # 'Buyer Assigned ID')
|
|
84
|
-
price_unit: float = 0.0 # 'Net Price')
|
|
85
|
-
quantity_billed: float = 0.0 # 'Billed Quantity')
|
|
86
|
-
quantity_unit_code: str = None # 'Quantity Code')
|
|
87
|
-
total_amount_net: float = 0.0 # 'Total Amount Net')
|
|
88
|
-
price_unit_gross: float = 0.0
|
|
89
|
-
total_allowance_charge: float = 0.0
|
|
90
|
-
trade_tax: any = None
|
|
91
|
-
note: str = None
|
|
92
|
-
lot_number_id: str = None
|
|
93
|
-
expiry_date: datetime = None
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
@dataclass
|
|
97
|
-
class XRechnungFinancialCard:
|
|
98
|
-
id: str | None = None
|
|
99
|
-
cardholder_name: str | None = None
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
@dataclass
|
|
103
|
-
class XRechnungBankAccount:
|
|
104
|
-
iban: str | None = None
|
|
105
|
-
bic: str | None = None
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
@dataclass
|
|
109
|
-
class XRechnungPaymentMeans:
|
|
110
|
-
id: str = None
|
|
111
|
-
type_code: str = None
|
|
112
|
-
information: str = None
|
|
113
|
-
financial_card: XRechnungFinancialCard = None
|
|
114
|
-
payee_account: XRechnungBankAccount = None
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
@dataclass
|
|
118
|
-
class XRechnung:
|
|
119
|
-
"""
|
|
120
|
-
Model an EDI invoice for mapping CII or UBL Invoices
|
|
121
|
-
"""
|
|
122
|
-
name: str = None # 'Name'
|
|
123
|
-
doc_id: str = None # 'Document ID'
|
|
124
|
-
doc_type_code: str = None # 'Subject Code'
|
|
125
|
-
issued_date_time: datetime = None # 'Date'
|
|
126
|
-
delivered_date_time: datetime = None # 'Delivered Date'
|
|
127
|
-
languages: str = None # 'Languages'
|
|
128
|
-
notes: str = None # 'Notes'
|
|
129
|
-
buyer_reference: str = None # 'Buyer Reference'
|
|
130
|
-
order_reference: str = None
|
|
131
|
-
dispatch_reference: str = None
|
|
132
|
-
sales_order_reference: str = None
|
|
133
|
-
seller: XRechnungTradeParty = None
|
|
134
|
-
payee: XRechnungTradeParty = None
|
|
135
|
-
buyer: XRechnungTradeParty = None
|
|
136
|
-
invoicee: XRechnungTradeParty = None
|
|
137
|
-
currency_code: str = None # 'Currency Code'
|
|
138
|
-
payment_means: XRechnungPaymentMeans = None
|
|
139
|
-
payment_terms: str = None # 'Payment Terms'
|
|
140
|
-
line_total_amount: Decimal = None # 'Line Total Amount'
|
|
141
|
-
charge_total_amount: Decimal = None # 'Charge Total Amount'
|
|
142
|
-
allowance_total_amount: Decimal = None # 'Allowance Total Amount'
|
|
143
|
-
tax_basis_total_amount: XRechnungCurrency = None
|
|
144
|
-
tax_total_amount: [XRechnungCurrency] = None # 'Tax Grand Total Amount'
|
|
145
|
-
grand_total_amount: XRechnungCurrency = None # 'Grand Total Amount'
|
|
146
|
-
total_prepaid_amount: Decimal = None # 'Total Prepaid Amount'
|
|
147
|
-
due_payable_amount: Decimal = None # 'Due Payable Amount'
|
|
148
|
-
trade_line_items: [XRechnungTradeLine] = None
|
|
149
|
-
applicable_trade_taxes: [XRechnungAppliedTradeTax] = None
|
|
150
|
-
|
|
151
|
-
def map_to_dict(self) -> dict:
|
|
152
|
-
"""
|
|
153
|
-
maps a XRechnung to a dict suited for generation odoo entities
|
|
154
|
-
Note: this is not a 1:1 mapping of the XRechnung model, some adjustments and simplifications made
|
|
155
|
-
:param self: XRechnung.XRechnung
|
|
156
|
-
:return: dict
|
|
157
|
-
"""
|
|
158
|
-
|
|
159
|
-
_dict = asdict(self)
|
|
160
|
-
_dict.update({
|
|
161
|
-
'line_total_amount': float(self.line_total_amount) if self.line_total_amount is not None else 0,
|
|
162
|
-
'charge_total_amount': float(self.charge_total_amount) if self.charge_total_amount else 0,
|
|
163
|
-
'allowance_total_amount': float(self.allowance_total_amount) if self.allowance_total_amount else 0,
|
|
164
|
-
'tax_basis_total_amount': float(
|
|
165
|
-
self.tax_basis_total_amount.amount) if self.tax_basis_total_amount else 0,
|
|
166
|
-
'tax_total_amount': self.sum_x_rechnung_currency(self.tax_total_amount) if self.tax_total_amount else 0,
|
|
167
|
-
'grand_total_amount': float(self.grand_total_amount.amount) if self.grand_total_amount else 0,
|
|
168
|
-
'total_prepaid_amount': float(self.total_prepaid_amount) if self.total_prepaid_amount else 0.0,
|
|
169
|
-
'due_payable_amount': float(self.due_payable_amount) if self.due_payable_amount else 0.0,
|
|
170
|
-
'seller': self.map_trade_party(self.seller) if self.seller else None,
|
|
171
|
-
'payee': self.map_trade_party(self.payee) if self.payee else None,
|
|
172
|
-
'buyer': self.map_trade_party(self.buyer) if self.buyer else None,
|
|
173
|
-
'invoicee': self.map_trade_party(self.invoicee) if self.invoicee else None,
|
|
174
|
-
'payment_means': self.map_payment_means(self.payment_means) if self.payment_means else None,
|
|
175
|
-
'trade_line_items': self.map_tradeline_items_to_dict(self.trade_line_items),
|
|
176
|
-
'applicable_trade_taxes': self.map_trade_taxes_to_dict(self.applicable_trade_taxes),
|
|
177
|
-
})
|
|
178
|
-
return _dict
|
|
179
|
-
|
|
180
|
-
@classmethod
|
|
181
|
-
def map_tradeline_to_dict(cls, x_trade_line: XRechnungTradeLine) -> dict:
|
|
182
|
-
_dict = asdict(x_trade_line)
|
|
183
|
-
_dict.update(
|
|
184
|
-
{'trade_tax': asdict(x_trade_line.trade_tax) if x_trade_line.trade_tax is not None else None,
|
|
185
|
-
'price_unit': float(x_trade_line.price_unit),
|
|
186
|
-
'quantity_billed': float(x_trade_line.quantity_billed),
|
|
187
|
-
'total_amount_net': float(x_trade_line.total_amount_net),
|
|
188
|
-
})
|
|
189
|
-
return _dict
|
|
190
|
-
|
|
191
|
-
@classmethod
|
|
192
|
-
def map_tradeline_items_to_dict(cls, tradeline_items: list) -> list:
|
|
193
|
-
res = []
|
|
194
|
-
if tradeline_items:
|
|
195
|
-
for tradeline_item in tradeline_items:
|
|
196
|
-
res.append(cls.map_tradeline_to_dict(tradeline_item))
|
|
197
|
-
return res
|
|
198
|
-
|
|
199
|
-
@classmethod
|
|
200
|
-
def map_trade_taxes_to_dict(cls, trade_taxes: list) -> list:
|
|
201
|
-
res = []
|
|
202
|
-
if trade_taxes:
|
|
203
|
-
for tax in trade_taxes:
|
|
204
|
-
res.append(asdict(tax))
|
|
205
|
-
return res
|
|
206
|
-
|
|
207
|
-
@classmethod
|
|
208
|
-
def map_trade_party(cls, trade_party: XRechnungTradeParty) -> dict:
|
|
209
|
-
_dict = asdict(trade_party)
|
|
210
|
-
_dict.update({
|
|
211
|
-
'postal_address': cls.map_trade_address(trade_party.postal_address),
|
|
212
|
-
'trade_contact': cls.map_trade_contact(trade_party.trade_contact, trade_party)
|
|
213
|
-
})
|
|
214
|
-
return _dict
|
|
215
|
-
|
|
216
|
-
@classmethod
|
|
217
|
-
def map_trade_address(cls, trade_address: XRechnungTradeAddress) -> dict | None:
|
|
218
|
-
if trade_address is None:
|
|
219
|
-
return None
|
|
220
|
-
_dict = asdict(trade_address)
|
|
221
|
-
return _dict
|
|
222
|
-
|
|
223
|
-
@classmethod
|
|
224
|
-
def map_trade_contact(cls, trade_contact: XRechnungTradeContact, trade_party: XRechnungTradeParty) -> dict | None:
|
|
225
|
-
if trade_contact is None:
|
|
226
|
-
return None
|
|
227
|
-
_dict = asdict(trade_contact)
|
|
228
|
-
if _dict.get('name', None) is None or _dict.get('name', None) == '':
|
|
229
|
-
_dict.update({'name': trade_party.name})
|
|
230
|
-
return _dict
|
|
231
|
-
|
|
232
|
-
@classmethod
|
|
233
|
-
def map_payment_means(cls, payment_means: XRechnungPaymentMeans) -> dict:
|
|
234
|
-
_dict = {
|
|
235
|
-
'type_code': payment_means.type_code,
|
|
236
|
-
'information': payment_means.information,
|
|
237
|
-
'financial_card': cls.map_financial_card(
|
|
238
|
-
payment_means.financial_card) if payment_means.financial_card else None,
|
|
239
|
-
'bank_account': cls.map_bank_account(payment_means.payee_account) if payment_means.payee_account else None,
|
|
240
|
-
}
|
|
241
|
-
return _dict
|
|
242
|
-
|
|
243
|
-
@classmethod
|
|
244
|
-
def map_financial_card(cls, card: XRechnungFinancialCard) -> dict:
|
|
245
|
-
_dict = {
|
|
246
|
-
'card_number': card.id,
|
|
247
|
-
'card_holder_name': card.cardholder_name
|
|
248
|
-
}
|
|
249
|
-
return _dict
|
|
250
|
-
|
|
251
|
-
@classmethod
|
|
252
|
-
def map_bank_account(cls, bank: XRechnungBankAccount) -> dict:
|
|
253
|
-
_dict = {
|
|
254
|
-
'iban': bank.iban,
|
|
255
|
-
'bic': bank.bic
|
|
256
|
-
}
|
|
257
|
-
return _dict
|
|
258
|
-
|
|
259
|
-
@classmethod
|
|
260
|
-
def sum_x_rechnung_currency(cls, x_currencies: [XRechnungCurrency]) -> float:
|
|
261
|
-
res = 0.0
|
|
262
|
-
if x_currencies:
|
|
263
|
-
for x_currency in x_currencies:
|
|
264
|
-
res += float(x_currency.amount)
|
|
265
|
-
return res
|
|
@@ -1,5 +0,0 @@
|
|
|
1
|
-
from .pdf_llm_parser.google_gemini_parser import analyze_document as google_analyze_document
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
def analyze_document(pdf_binary: bytes, api_key=None, model: str = None, prompt=None) -> dict:
|
|
5
|
-
return google_analyze_document(pdf_binary, api_key=api_key, model=model, prompt=prompt)
|
|
@@ -1,145 +0,0 @@
|
|
|
1
|
-
import json
|
|
2
|
-
import logging
|
|
3
|
-
import os
|
|
4
|
-
|
|
5
|
-
from google import genai
|
|
6
|
-
from google.genai import types
|
|
7
|
-
|
|
8
|
-
DEFAULT_LLM_MODEL = "gemini-2.5-flash-lite"
|
|
9
|
-
|
|
10
|
-
_logger = logging.getLogger(__name__)
|
|
11
|
-
|
|
12
|
-
DEFAULT_PROMPT = """
|
|
13
|
-
Bitte analysiere das beigefügte Dokument und extrahiere alle relevanten Informationen als strukturiertes JSON.
|
|
14
|
-
Verwende exakt das folgende JSON-Schema und beachte die Anweisungen.
|
|
15
|
-
|
|
16
|
-
Wichtige Hinweise:
|
|
17
|
-
- Gib NUR valides JSON zurück. Kein umgebender Text oder Markdown-Formatierung.
|
|
18
|
-
- Bei fehlenden Werten verwende den JSON-Wert null.
|
|
19
|
-
- Gib alle Beträge als Zahlen (float oder integer), nicht als Strings.
|
|
20
|
-
- Formatiere alle Datumsangaben im Format YYYY-MM-DD.
|
|
21
|
-
- Gib das land als "ISO 3166-2" Code an, wenn nicht angegeben nehme "DE" an.
|
|
22
|
-
- Erkenne den Dokumententyp und klassifiziere ihn als: "Rechnung", "Gutschrift", "Auftrag", "Angebot", "Anfrage", "Auftragsbestätigung", "Bestellung"
|
|
23
|
-
- Erkenne für eine Gutschrift auch Varianten wie Credit Note, Rechnungsgutschrift, Erstattung
|
|
24
|
-
- Erkenne für eine Rechnung auch Varianten wie: "Rechnung","Proforma Rechnung","R-Nr.","Rechnung-Nr.","Rech.-Nr.","Invoice No.","Invoice Number","Kostenrechnung"
|
|
25
|
-
- Erkenne für eine Auftragsbestätigung auch Varianten wie: "Bestätigung", "Vertrag","Order Acknowledgement", "Order Confirmation"
|
|
26
|
-
- Erkenne für die Referenz auch Varianten wie: "Projektnummer", "Auftragsnummer", "Bestellnummer", "Referenznummer", "Kundenreferenz", "Reference"
|
|
27
|
-
- Ist der Dokumententyp nicht erkennbar klassifiziere ihn als: "sonstiges"
|
|
28
|
-
- Sind keine Positionen vorhanden erstelle eine zusammenfassung des Textes und die Summen oder Beträge, falls vorhanden
|
|
29
|
-
- Ist der Dokumententyp ist nicht erkennbar erstelle eine Zusammenfassung des Textes
|
|
30
|
-
- Erkenne auch Varianten wie: "MWST.", "VAT", "Ust.", etc. für die Umsatzsteuer.
|
|
31
|
-
- Erkenne für die Währung Varianten wie: "EUR", "€", "$", "USD", "CHF" oder sonstige Währungsbezeichnung oder Symbol
|
|
32
|
-
- Wenn keine Währung abgeben ist nimm "EUR" an
|
|
33
|
-
- Erkenne für das fälligkeitsdatum auch Varianten wie: "zahlbar bis", "fällig", "fällig bis", "zu liefern bis", "gültig bis"
|
|
34
|
-
- Erkenne ob eine Rechnung bereits bezahlt ist, ja oder nein, und vermerke die Zahlungsmethode wie: "Paypal", "Kreditkarte", "Überweisung", "Barzahlung"
|
|
35
|
-
- Eine Rechnung ist bezahlt "ja", wenn die Zahlungsmethode wie: "Paypal", "Kreditkarte" ist.
|
|
36
|
-
- Ernenne für die Steuernummer des Absenders auch: "Tax Number", "VAT Number", "VAT ID", "MWST ID", "UST ID", "Ust-id"
|
|
37
|
-
|
|
38
|
-
JSON-Schema:
|
|
39
|
-
{
|
|
40
|
-
"dokumenttyp": "string",
|
|
41
|
-
"dokumentnummer": "string",
|
|
42
|
-
"referenz": "string",
|
|
43
|
-
"dokumentendatum": "YYYY-MM-DD",
|
|
44
|
-
"fälligkeitsdatum": "YYYY-MM-DD",
|
|
45
|
-
"absender": {
|
|
46
|
-
"name": "string",
|
|
47
|
-
"adresse": "string",
|
|
48
|
-
"plz": "string",
|
|
49
|
-
"ort": "string",
|
|
50
|
-
"land": "string",
|
|
51
|
-
"steuernummer": "string",
|
|
52
|
-
},
|
|
53
|
-
"empfänger": {
|
|
54
|
-
"name": "string",
|
|
55
|
-
"adresse": "string",
|
|
56
|
-
"plz": "string",
|
|
57
|
-
"ort": "string",
|
|
58
|
-
"land": "string",
|
|
59
|
-
},
|
|
60
|
-
"positionen": [
|
|
61
|
-
{
|
|
62
|
-
"POS": "number",
|
|
63
|
-
"Bezeichnung": "string",
|
|
64
|
-
"Menge": "number",
|
|
65
|
-
"Einheit": "string",
|
|
66
|
-
"einzelpreis": "number",
|
|
67
|
-
"Betrag": "number",
|
|
68
|
-
"mwst_satz": "number"
|
|
69
|
-
}
|
|
70
|
-
],
|
|
71
|
-
"summen": {
|
|
72
|
-
"nettobetrag": "number",
|
|
73
|
-
"umsatzsteuer": "number",
|
|
74
|
-
"rechnungsbetrag": "number"
|
|
75
|
-
"währung": "string"
|
|
76
|
-
},
|
|
77
|
-
"zahlungshinweise": "string",
|
|
78
|
-
"zahlungsmethode": "string"
|
|
79
|
-
"bezahlt": "string",
|
|
80
|
-
"weitere_info": "string",
|
|
81
|
-
"bankverbindung": "string"
|
|
82
|
-
"zusammenfassung": "string",
|
|
83
|
-
}
|
|
84
|
-
"""
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
def analyze_document(pdf_binary: bytes, api_key, model: str, prompt) -> dict:
|
|
88
|
-
"""
|
|
89
|
-
Analysiert eine PDF-Rechnung mit Gemini, extrahiert Informationen
|
|
90
|
-
und gibt sie als Python-Dictionary zurück.
|
|
91
|
-
"""
|
|
92
|
-
|
|
93
|
-
if not pdf_binary:
|
|
94
|
-
_logger.error("Error no binaries supplied")
|
|
95
|
-
raise ValueError("Error no binaries supplied")
|
|
96
|
-
|
|
97
|
-
if api_key:
|
|
98
|
-
if len(api_key) < 20:
|
|
99
|
-
raise RuntimeError("Der API Key scheint ungültig oder zu kurz zu sein.")
|
|
100
|
-
else:
|
|
101
|
-
api_key = os.environ.get("GEMINI_API_KEY", None)
|
|
102
|
-
if api_key is None:
|
|
103
|
-
raise RuntimeError("Die Umgebungsvariable 'GEMINI_API_KEY' wurde nicht gefunden.")
|
|
104
|
-
if len(api_key) < 20:
|
|
105
|
-
raise RuntimeError("Der API Key scheint ungültig oder zu kurz zu sein.")
|
|
106
|
-
|
|
107
|
-
api_key = api_key.strip()
|
|
108
|
-
client = genai.Client(api_key=api_key)
|
|
109
|
-
if not model:
|
|
110
|
-
model = DEFAULT_LLM_MODEL
|
|
111
|
-
if not prompt:
|
|
112
|
-
prompt = DEFAULT_PROMPT
|
|
113
|
-
|
|
114
|
-
_logger.info("\nSende Anfrage an die Gemini API...")
|
|
115
|
-
# Sende die Anfrage mit dem Prompt und der hochgeladenen Datei
|
|
116
|
-
_logger.info(f"use API-KEY: {api_key} length: {len(api_key)}")
|
|
117
|
-
try:
|
|
118
|
-
response = client.models.generate_content(
|
|
119
|
-
model=model,
|
|
120
|
-
contents=[
|
|
121
|
-
types.Part.from_bytes(
|
|
122
|
-
data=pdf_binary,
|
|
123
|
-
mime_type='application/pdf',
|
|
124
|
-
),
|
|
125
|
-
prompt])
|
|
126
|
-
except KeyError:
|
|
127
|
-
raise UserWarning("Fehler: Die Umgebungsvariable GOOGLE_API_KEY wurde nicht gefunden.\n"
|
|
128
|
-
"Bitte setzen Sie den Schlüssel, z.B. mit 'export GOOGLE_API_KEY=\"DEIN_API_SCHLÜSSEL\"'")
|
|
129
|
-
|
|
130
|
-
except Exception as e:
|
|
131
|
-
raise UserWarning(f"Ein unerwarteter Fehler ist aufgetreten: {e}")
|
|
132
|
-
|
|
133
|
-
# Bereinige und parse die Antwort
|
|
134
|
-
try:
|
|
135
|
-
# Manchmal gibt das Modell die Antwort in einem Markdown-Codeblock zurück.
|
|
136
|
-
# Dieser Code entfernt die Markierungen, um reines JSON zu erhalten.
|
|
137
|
-
cleaned_response = response.text.strip().replace("```json", "").replace("```", "").strip()
|
|
138
|
-
|
|
139
|
-
# Parse den JSON-String in ein Python-Dictionary
|
|
140
|
-
extracted_data = json.loads(cleaned_response)
|
|
141
|
-
return extracted_data
|
|
142
|
-
except json.JSONDecodeError:
|
|
143
|
-
raise RuntimeError("Fehler beim Parsen der Modell-Antwort:", response.text)
|
|
144
|
-
except Exception as e:
|
|
145
|
-
raise RuntimeError(f"Ein unerwarteter Fehler ist aufgetreten: {e}")
|
|
@@ -1,71 +0,0 @@
|
|
|
1
|
-
import unittest
|
|
2
|
-
import os
|
|
3
|
-
import pathlib
|
|
4
|
-
from parameterized import parameterized
|
|
5
|
-
import json
|
|
6
|
-
|
|
7
|
-
from . import get_checked_file_path
|
|
8
|
-
from edi_invoice_parser.parse_plain_pdf_file import analyze_document
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
class TestPlainPdfInvoiceParser(unittest.TestCase):
|
|
12
|
-
@parameterized.expand([
|
|
13
|
-
('pdf', 'plain_pdf_invoices/25313 - Rechnung Konzepthausevent - 16-09-2025.pdf'),
|
|
14
|
-
('pdf', 'plain_pdf_invoices/57856 - 2509-7377.pdf'),
|
|
15
|
-
('pdf', 'plain_pdf_invoices/Bestätigung griffty GmbH VA 150919 SCCON_25.pdf'),
|
|
16
|
-
# ('pdf', 'plain_pdf_invoices/Invoice INV-1013.pdf'),
|
|
17
|
-
# ('pdf', 'plain_pdf_invoices/LGM-2509-784_Griffity_GmbH_10_09_2025_Rechnung.pdf'),
|
|
18
|
-
# ('pdf', 'plain_pdf_invoices/Order GO-0741243.pdf'),
|
|
19
|
-
# ('pdf', 'plain_pdf_invoices/Rechnung 30250628.pdf'),
|
|
20
|
-
# ('pdf', 'plain_pdf_invoices/Rechnung-202511899-11267.pdf'),
|
|
21
|
-
# ('pdf', 'plain_pdf_invoices/TS Rechnung TS2025-10586.pdf'),
|
|
22
|
-
# ('pdf', 'plain_pdf_invoices/Verkaufsrechnung 01-137334.pdf'),
|
|
23
|
-
])
|
|
24
|
-
def test_parse_pdf_invoice(self, file_type, file_path):
|
|
25
|
-
_file_path, _exists, _is_dir = get_checked_file_path(file_path, __file__)
|
|
26
|
-
self.assertEqual(file_type, 'pdf', "Only 'pdf' filetype is supported")
|
|
27
|
-
self.assertTrue(_exists, f"file does not exist: {_file_path}")
|
|
28
|
-
self.test_api_key_is_available()
|
|
29
|
-
api_key = os.environ.get("GEMINI_API_KEY")
|
|
30
|
-
|
|
31
|
-
filepath = pathlib.Path(_file_path)
|
|
32
|
-
binary = filepath.read_bytes()
|
|
33
|
-
invoice_data = analyze_document(binary, api_key=api_key)
|
|
34
|
-
self.assertIsNotNone(invoice_data, "No result retrieved")
|
|
35
|
-
print("\n------------------------------------------------------------")
|
|
36
|
-
print(f"\nDokument Date: {file_path}")
|
|
37
|
-
print("\n--- Extrahierte Dokumentdaten ---")
|
|
38
|
-
# Beispielhafter Zugriff auf einzelne Daten
|
|
39
|
-
print(f"\nDokumenttyp: {invoice_data.get('dokumenttyp', None)}")
|
|
40
|
-
print(f"\ndokumentnummer: {invoice_data.get('dokumentnummer', None)}")
|
|
41
|
-
print(f"\nAbsender: {invoice_data.get('absender', {}).get('name', None)}")
|
|
42
|
-
print(f"\nEmpfänger: {invoice_data.get('empfänger', {}).get('name', None)}")
|
|
43
|
-
print(f"Fälligkeitsdatum: {invoice_data.get('fälligkeitsdatum', None)}")
|
|
44
|
-
print(f"Gesamtbetrag: {invoice_data.get('summen', {}).get('rechnungsbetrag', 'None')} €")
|
|
45
|
-
print("\n------------------------------------------------------------")
|
|
46
|
-
# Gib das Dictionary als formatierten JSON-String aus
|
|
47
|
-
print(json.dumps(invoice_data, indent=2, ensure_ascii=False))
|
|
48
|
-
|
|
49
|
-
_out_file_path = _file_path.replace('.pdf', '.json')
|
|
50
|
-
with open(_out_file_path, "w") as f:
|
|
51
|
-
f.write(json.dumps(invoice_data, indent=2, ensure_ascii=False))
|
|
52
|
-
print(f"written result json to {_out_file_path}")
|
|
53
|
-
print("\n---------------------------------")
|
|
54
|
-
|
|
55
|
-
def test_api_key_is_available(self):
|
|
56
|
-
"""
|
|
57
|
-
Prüft, ob die Umgebungsvariable 'PROD_API_KEY' gesetzt ist.
|
|
58
|
-
|
|
59
|
-
WICHTIG: Gib niemals den Inhalt des Keys in Logs aus!
|
|
60
|
-
Prüfe nur, ob er existiert oder valide aussieht (z.B. Länge).
|
|
61
|
-
"""
|
|
62
|
-
|
|
63
|
-
# Hier liest Python die Umgebungsvariable
|
|
64
|
-
api_key = os.environ.get("GEMINI_API_KEY")
|
|
65
|
-
|
|
66
|
-
assert api_key is not None, "Die Umgebungsvariable 'GEMINI_API_KEY' wurde nicht gefunden."
|
|
67
|
-
assert len(api_key) > 20, "Der API Key scheint ungültig oder zu kurz zu sein."
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
if __name__ == '__main__':
|
|
71
|
-
unittest.main()
|
|
File without changes
|
{mikrowerk_edi_invoicing-0.5.0.dist-info → mikrowerk_edi_invoicing-0.6.1.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|
{mikrowerk_edi_invoicing-0.5.0.dist-info → mikrowerk_edi_invoicing-0.6.1.dist-info}/top_level.txt
RENAMED
|
File without changes
|