json2ubl 1.0.0__py3-none-any.whl → 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- json2ubl/__init__.py +27 -21
- json2ubl/config.py +1 -1
- json2ubl/converter.py +83 -30
- {json2ubl-1.0.0.dist-info → json2ubl-1.0.1.dist-info}/METADATA +13 -10
- {json2ubl-1.0.0.dist-info → json2ubl-1.0.1.dist-info}/RECORD +8 -8
- {json2ubl-1.0.0.dist-info → json2ubl-1.0.1.dist-info}/WHEEL +0 -0
- {json2ubl-1.0.0.dist-info → json2ubl-1.0.1.dist-info}/licenses/LICENSE +0 -0
- {json2ubl-1.0.0.dist-info → json2ubl-1.0.1.dist-info}/top_level.txt +0 -0
json2ubl/__init__.py
CHANGED
|
@@ -4,6 +4,8 @@ from typing import Any, Dict, List
|
|
|
4
4
|
from .config import UblConfig, get_logger
|
|
5
5
|
from .converter import Json2UblConverter
|
|
6
6
|
|
|
7
|
+
__version__ = "1.0.1"
|
|
8
|
+
|
|
7
9
|
logger = get_logger(__name__)
|
|
8
10
|
PACKAGE_DIR = Path(__file__).parent
|
|
9
11
|
|
|
@@ -80,42 +82,35 @@ def json_dict_to_ubl_xml(
|
|
|
80
82
|
config.setup_logging()
|
|
81
83
|
|
|
82
84
|
converter = Json2UblConverter(config)
|
|
85
|
+
merged_dicts = converter._group_and_merge_documents(list_of_dicts)
|
|
86
|
+
|
|
83
87
|
documents = []
|
|
84
88
|
document_types: Dict[str, int] = {}
|
|
89
|
+
first_error_response = None
|
|
85
90
|
|
|
86
|
-
for doc_dict in
|
|
91
|
+
for doc_dict in merged_dicts:
|
|
87
92
|
response = converter.convert_json_dict_to_xml_dict(doc_dict)
|
|
88
93
|
|
|
89
94
|
if response.get("error_response"):
|
|
95
|
+
if first_error_response is None:
|
|
96
|
+
first_error_response = response["error_response"]
|
|
90
97
|
logger.error(f"Conversion failed: {response['error_response']}")
|
|
91
|
-
|
|
98
|
+
continue
|
|
92
99
|
|
|
93
100
|
if not response.get("documents") or len(response["documents"]) == 0:
|
|
94
101
|
error_msg = "No valid documents in conversion response"
|
|
102
|
+
if first_error_response is None:
|
|
103
|
+
first_error_response = error_msg
|
|
95
104
|
logger.error(error_msg)
|
|
96
|
-
|
|
97
|
-
"documents": [],
|
|
98
|
-
"summary": {
|
|
99
|
-
"total_inputs": len(list_of_dicts),
|
|
100
|
-
"files_created": 0,
|
|
101
|
-
"document_types": {},
|
|
102
|
-
},
|
|
103
|
-
"error_response": error_msg,
|
|
104
|
-
}
|
|
105
|
+
continue
|
|
105
106
|
|
|
106
107
|
doc_info = response["documents"][0]
|
|
107
108
|
if not isinstance(doc_info, dict):
|
|
108
109
|
error_msg = "Invalid document info format"
|
|
110
|
+
if first_error_response is None:
|
|
111
|
+
first_error_response = error_msg
|
|
109
112
|
logger.error(error_msg)
|
|
110
|
-
|
|
111
|
-
"documents": [],
|
|
112
|
-
"summary": {
|
|
113
|
-
"total_inputs": len(list_of_dicts),
|
|
114
|
-
"files_created": 0,
|
|
115
|
-
"document_types": {},
|
|
116
|
-
},
|
|
117
|
-
"error_response": error_msg,
|
|
118
|
-
}
|
|
113
|
+
continue
|
|
119
114
|
|
|
120
115
|
documents.append(doc_info)
|
|
121
116
|
|
|
@@ -123,10 +118,21 @@ def json_dict_to_ubl_xml(
|
|
|
123
118
|
for dtype, count in doc_type.items():
|
|
124
119
|
document_types[dtype] = document_types.get(dtype, 0) + count
|
|
125
120
|
|
|
121
|
+
if not documents and first_error_response:
|
|
122
|
+
return {
|
|
123
|
+
"documents": [],
|
|
124
|
+
"summary": {
|
|
125
|
+
"total_inputs": len(list_of_dicts),
|
|
126
|
+
"files_created": 0,
|
|
127
|
+
"document_types": {},
|
|
128
|
+
},
|
|
129
|
+
"error_response": first_error_response,
|
|
130
|
+
}
|
|
131
|
+
|
|
126
132
|
return {
|
|
127
133
|
"documents": documents,
|
|
128
134
|
"summary": {
|
|
129
|
-
"total_inputs": len(
|
|
135
|
+
"total_inputs": len(merged_dicts),
|
|
130
136
|
"files_created": 0,
|
|
131
137
|
"document_types": document_types,
|
|
132
138
|
},
|
json2ubl/config.py
CHANGED
|
@@ -33,7 +33,7 @@ class UblConfig:
|
|
|
33
33
|
logs_dir = Path("logs")
|
|
34
34
|
logs_dir.mkdir(parents=True, exist_ok=True)
|
|
35
35
|
|
|
36
|
-
timestamp = datetime.now().strftime("%Y%m%
|
|
36
|
+
timestamp = datetime.now().strftime("%Y%m%d")
|
|
37
37
|
log_filename = f"json2ubl_{timestamp}.log"
|
|
38
38
|
log_path = logs_dir / log_filename
|
|
39
39
|
|
json2ubl/converter.py
CHANGED
|
@@ -353,30 +353,16 @@ class Json2UblConverter:
|
|
|
353
353
|
|
|
354
354
|
logger.info(f"Found {len(data)} documents in file")
|
|
355
355
|
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
grouped: Dict[str, List[Dict[str, Any]]] = {}
|
|
359
|
-
skipped_count = 0
|
|
360
|
-
for page in data:
|
|
361
|
-
doc_id = page.get("id")
|
|
362
|
-
if not doc_id:
|
|
363
|
-
logger.warning("Skipping page without 'id' field")
|
|
364
|
-
skipped_count += 1
|
|
365
|
-
continue
|
|
366
|
-
grouped.setdefault(doc_id, []).append(page)
|
|
367
|
-
|
|
368
|
-
logger.info(
|
|
369
|
-
f"Grouped into {len(grouped)} unique documents (skipped {skipped_count} without id)"
|
|
370
|
-
)
|
|
356
|
+
merged_docs = self._group_and_merge_documents(data)
|
|
357
|
+
logger.info(f"Grouped into {len(merged_docs)} unique documents")
|
|
371
358
|
|
|
372
359
|
documents = []
|
|
373
360
|
document_types: Dict[str, int] = {}
|
|
374
361
|
first_error_response = None
|
|
375
362
|
|
|
376
|
-
for
|
|
363
|
+
for merged in merged_docs:
|
|
377
364
|
try:
|
|
378
|
-
|
|
379
|
-
|
|
365
|
+
doc_id = merged.get("id", "UNKNOWN")
|
|
380
366
|
response = self.convert_json_dict_to_xml_dict(merged)
|
|
381
367
|
|
|
382
368
|
if response.get("error_response"):
|
|
@@ -598,28 +584,95 @@ class Json2UblConverter:
|
|
|
598
584
|
"error_response": error_msg,
|
|
599
585
|
}
|
|
600
586
|
|
|
587
|
+
def _group_and_merge_documents(self, documents: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
588
|
+
"""Group documents by ID and merge pages with same ID.
|
|
589
|
+
|
|
590
|
+
Args:
|
|
591
|
+
documents: List of document dictionaries (potentially with duplicate IDs)
|
|
592
|
+
|
|
593
|
+
Returns:
|
|
594
|
+
List of merged documents (one per unique ID)
|
|
595
|
+
"""
|
|
596
|
+
if not documents:
|
|
597
|
+
return []
|
|
598
|
+
|
|
599
|
+
documents = [self._normalize_keys_recursive(doc) for doc in documents]
|
|
600
|
+
|
|
601
|
+
grouped: Dict[str, List[Dict[str, Any]]] = {}
|
|
602
|
+
for doc in documents:
|
|
603
|
+
doc_id = doc.get("id")
|
|
604
|
+
if not doc_id:
|
|
605
|
+
logger.warning("Skipping document without 'id' field")
|
|
606
|
+
continue
|
|
607
|
+
grouped.setdefault(doc_id, []).append(doc)
|
|
608
|
+
|
|
609
|
+
merged_documents = []
|
|
610
|
+
for doc_id, pages in grouped.items():
|
|
611
|
+
try:
|
|
612
|
+
doc_type_raw = pages[0].get("document_type")
|
|
613
|
+
document_type = NUMERIC_TYPE_TO_DOCUMENT_TYPE.get(str(doc_type_raw))
|
|
614
|
+
|
|
615
|
+
schema_cache = {}
|
|
616
|
+
if document_type:
|
|
617
|
+
schema_cache = self._load_schema_cache(document_type)
|
|
618
|
+
|
|
619
|
+
merged = self._merge_pages(pages, schema_cache)
|
|
620
|
+
merged_documents.append(merged)
|
|
621
|
+
except Exception as e:
|
|
622
|
+
logger.error(f"Failed to merge document {doc_id}: {e}")
|
|
623
|
+
continue
|
|
624
|
+
|
|
625
|
+
return merged_documents
|
|
626
|
+
|
|
601
627
|
@staticmethod
|
|
602
|
-
def _merge_pages(
|
|
603
|
-
|
|
628
|
+
def _merge_pages(
|
|
629
|
+
pages: List[Dict[str, Any]], schema_cache: Dict[str, Any] | None = None
|
|
630
|
+
) -> Dict[str, Any]:
|
|
631
|
+
"""Merge multi-page invoice into single object.
|
|
632
|
+
|
|
633
|
+
Args:
|
|
634
|
+
pages: List of document pages (dictionaries)
|
|
635
|
+
schema_cache: Schema cache to identify array fields dynamically
|
|
636
|
+
|
|
637
|
+
Returns:
|
|
638
|
+
Merged document dictionary
|
|
639
|
+
"""
|
|
604
640
|
if not pages:
|
|
605
641
|
return {}
|
|
606
642
|
|
|
607
643
|
merged = deepcopy(pages[0])
|
|
608
644
|
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
"
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
}
|
|
645
|
+
array_fields = set()
|
|
646
|
+
if schema_cache and "elements" in schema_cache:
|
|
647
|
+
for field_lower, field_info in schema_cache["elements"].items():
|
|
648
|
+
if isinstance(field_info, dict) and field_info.get("maxOccurs") == "unbounded":
|
|
649
|
+
array_fields.add(field_lower)
|
|
615
650
|
|
|
616
651
|
for page in pages[1:]:
|
|
617
|
-
for
|
|
618
|
-
|
|
619
|
-
|
|
652
|
+
page_keys_lower = {k.lower(): k for k in page.keys()}
|
|
653
|
+
|
|
654
|
+
for field_lower in array_fields:
|
|
655
|
+
original_key = page_keys_lower.get(field_lower)
|
|
656
|
+
if original_key and page.get(original_key):
|
|
657
|
+
merged_keys_lower = {k.lower(): k for k in merged.keys()}
|
|
658
|
+
merged_key_original = merged_keys_lower.get(field_lower)
|
|
659
|
+
|
|
660
|
+
if merged_key_original:
|
|
661
|
+
if not isinstance(merged[merged_key_original], list):
|
|
662
|
+
merged[merged_key_original] = [merged[merged_key_original]]
|
|
663
|
+
if isinstance(page[original_key], list):
|
|
664
|
+
merged[merged_key_original].extend(page[original_key])
|
|
665
|
+
else:
|
|
666
|
+
merged[merged_key_original].append(page[original_key])
|
|
667
|
+
else:
|
|
668
|
+
if isinstance(page[original_key], list):
|
|
669
|
+
merged[original_key] = page[original_key]
|
|
670
|
+
else:
|
|
671
|
+
merged[original_key] = [page[original_key]]
|
|
620
672
|
|
|
621
673
|
for key, value in page.items():
|
|
622
|
-
|
|
674
|
+
key_lower = key.lower()
|
|
675
|
+
if key_lower not in array_fields and value is not None:
|
|
623
676
|
merged[key] = value
|
|
624
677
|
|
|
625
678
|
return merged
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: json2ubl
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.1
|
|
4
4
|
Summary: Production-grade JSON to UBL 2.1 XML converter with schema-driven mapping
|
|
5
5
|
Author-email: SherozShaikh <shaikh.sheroz07@gmail.com>
|
|
6
6
|
License: MIT
|
|
@@ -22,16 +22,16 @@ Classifier: Topic :: Office/Business :: Financial
|
|
|
22
22
|
Requires-Python: >=3.10
|
|
23
23
|
Description-Content-Type: text/markdown
|
|
24
24
|
License-File: LICENSE
|
|
25
|
-
Requires-Dist: lxml
|
|
26
|
-
Requires-Dist: pydantic
|
|
27
|
-
Requires-Dist: pyyaml
|
|
28
|
-
Requires-Dist: loguru
|
|
25
|
+
Requires-Dist: lxml~=6.0
|
|
26
|
+
Requires-Dist: pydantic~=2.7
|
|
27
|
+
Requires-Dist: pyyaml~=6.0
|
|
28
|
+
Requires-Dist: loguru~=0.7
|
|
29
29
|
Provides-Extra: dev
|
|
30
|
-
Requires-Dist: pytest
|
|
31
|
-
Requires-Dist: pytest-cov
|
|
32
|
-
Requires-Dist: black
|
|
33
|
-
Requires-Dist: ruff
|
|
34
|
-
Requires-Dist: mypy
|
|
30
|
+
Requires-Dist: pytest~=8.3; extra == "dev"
|
|
31
|
+
Requires-Dist: pytest-cov~=5.0; extra == "dev"
|
|
32
|
+
Requires-Dist: black~=24.10; extra == "dev"
|
|
33
|
+
Requires-Dist: ruff~=0.8; extra == "dev"
|
|
34
|
+
Requires-Dist: mypy~=1.13; extra == "dev"
|
|
35
35
|
Dynamic: license-file
|
|
36
36
|
|
|
37
37
|
# json2ubl
|
|
@@ -41,6 +41,7 @@ Dynamic: license-file
|
|
|
41
41
|
[](https://badge.fury.io/py/json2ubl)
|
|
42
42
|
[](https://pypi.org/project/json2ubl/)
|
|
43
43
|
[](https://opensource.org/licenses/MIT)
|
|
44
|
+
[](https://github.com/psf/black)
|
|
44
45
|
|
|
45
46
|
[json2ubl](https://pypi.org/project/json2ubl/) is a production-ready converter that transforms JSON documents into UBL 2.1-compliant XML. It works with all 60+ UBL document types using automatic schema-driven mapping—no hardcoded field definitions required.
|
|
46
47
|
|
|
@@ -211,6 +212,8 @@ Convert JSON file and write XML files to disk.
|
|
|
211
212
|
- Rolls back on partial failure
|
|
212
213
|
- Atomic file operations with temp file staging
|
|
213
214
|
|
|
215
|
+
**For detailed API documentation with input/output examples and error handling, see [API.md](docs/API.md)**
|
|
216
|
+
|
|
214
217
|
---
|
|
215
218
|
|
|
216
219
|
## 🛡️ Error Handling
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
json2ubl/__init__.py,sha256=
|
|
1
|
+
json2ubl/__init__.py,sha256=zZi4Ke0gn1NcFXygwzLyhAxTA1g9g54gjEVsaBzgLYw,7280
|
|
2
2
|
json2ubl/benchmark.py,sha256=TVQfe6aifNNv6VkjlnQmV8yDQL2se1Wim8bbOJy0HxQ,2242
|
|
3
3
|
json2ubl/benchmark_examples.py,sha256=jTjS1gpyAkZuYtfFjCHERQhfgqIAiIHl7td6DXkQp7A,2123
|
|
4
|
-
json2ubl/config.py,sha256=
|
|
4
|
+
json2ubl/config.py,sha256=sjtxG9gqzMZYzBeDcz6bjDwRY3eklGWAoc88qoL1Wro,1703
|
|
5
5
|
json2ubl/constants.py,sha256=VKyW-NcsuASKLgXjq4jMbsu8wK5eALlcjkqMxZ4GGek,2115
|
|
6
|
-
json2ubl/converter.py,sha256=
|
|
6
|
+
json2ubl/converter.py,sha256=05wkyDqIJxK3Jb2anbVbHk1DRsi6_4dIECae1GsgIac,27078
|
|
7
7
|
json2ubl/exceptions.py,sha256=SjuarBL3kFwTyXX8i3gI_XFS3F_jI9YdRwUJ9qkkJI8,1363
|
|
8
8
|
json2ubl/core/__init__.py,sha256=ZeHxEGzJnvbGtvLzKtFiScSSz5aYgUtDXb_jHeR6crc,258
|
|
9
9
|
json2ubl/core/mapper.py,sha256=rBXctsIPH92wWAHz6mRDNWC8S_w76K7DDQo-qI0hYqY,7099
|
|
@@ -95,8 +95,8 @@ json2ubl/schemas/ubl-2.1/maindoc/UBL-TransportationStatusRequest-2.1.xsd,sha256=
|
|
|
95
95
|
json2ubl/schemas/ubl-2.1/maindoc/UBL-UnawardedNotification-2.1.xsd,sha256=zGmccnKSXad4709Ih3fn66175FtJUb7PLPuRRy50_wY,25282
|
|
96
96
|
json2ubl/schemas/ubl-2.1/maindoc/UBL-UtilityStatement-2.1.xsd,sha256=Yajwexatg2kKRsooaQC0yfKIiKblUzpykcMaFD8Qy1A,29901
|
|
97
97
|
json2ubl/schemas/ubl-2.1/maindoc/UBL-Waybill-2.1.xsd,sha256=dApHBjRdiqgrRkbLeN3US8LrkTQBh609zgcQH6RPRZ8,31171
|
|
98
|
-
json2ubl-1.0.
|
|
99
|
-
json2ubl-1.0.
|
|
100
|
-
json2ubl-1.0.
|
|
101
|
-
json2ubl-1.0.
|
|
102
|
-
json2ubl-1.0.
|
|
98
|
+
json2ubl-1.0.1.dist-info/licenses/LICENSE,sha256=fud2PsdIMMFCTKje7U4wU1LruG0_xiLIl0K9EYFpp20,1069
|
|
99
|
+
json2ubl-1.0.1.dist-info/METADATA,sha256=i_Go11equlGX76Yh9vmvQQT8_noWviSKqdeiUKKuWAU,9954
|
|
100
|
+
json2ubl-1.0.1.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
101
|
+
json2ubl-1.0.1.dist-info/top_level.txt,sha256=ThPMfVebrZMMoA4l92KA8IaHwh76tYLpgORoyfM6s94,9
|
|
102
|
+
json2ubl-1.0.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|