json2ubl 1.0.0__py3-none-any.whl → 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
json2ubl/__init__.py CHANGED
@@ -4,6 +4,8 @@ from typing import Any, Dict, List
4
4
  from .config import UblConfig, get_logger
5
5
  from .converter import Json2UblConverter
6
6
 
7
+ __version__ = "1.0.1"
8
+
7
9
  logger = get_logger(__name__)
8
10
  PACKAGE_DIR = Path(__file__).parent
9
11
 
@@ -80,42 +82,35 @@ def json_dict_to_ubl_xml(
80
82
  config.setup_logging()
81
83
 
82
84
  converter = Json2UblConverter(config)
85
+ merged_dicts = converter._group_and_merge_documents(list_of_dicts)
86
+
83
87
  documents = []
84
88
  document_types: Dict[str, int] = {}
89
+ first_error_response = None
85
90
 
86
- for doc_dict in list_of_dicts:
91
+ for doc_dict in merged_dicts:
87
92
  response = converter.convert_json_dict_to_xml_dict(doc_dict)
88
93
 
89
94
  if response.get("error_response"):
95
+ if first_error_response is None:
96
+ first_error_response = response["error_response"]
90
97
  logger.error(f"Conversion failed: {response['error_response']}")
91
- return response
98
+ continue
92
99
 
93
100
  if not response.get("documents") or len(response["documents"]) == 0:
94
101
  error_msg = "No valid documents in conversion response"
102
+ if first_error_response is None:
103
+ first_error_response = error_msg
95
104
  logger.error(error_msg)
96
- return {
97
- "documents": [],
98
- "summary": {
99
- "total_inputs": len(list_of_dicts),
100
- "files_created": 0,
101
- "document_types": {},
102
- },
103
- "error_response": error_msg,
104
- }
105
+ continue
105
106
 
106
107
  doc_info = response["documents"][0]
107
108
  if not isinstance(doc_info, dict):
108
109
  error_msg = "Invalid document info format"
110
+ if first_error_response is None:
111
+ first_error_response = error_msg
109
112
  logger.error(error_msg)
110
- return {
111
- "documents": [],
112
- "summary": {
113
- "total_inputs": len(list_of_dicts),
114
- "files_created": 0,
115
- "document_types": {},
116
- },
117
- "error_response": error_msg,
118
- }
113
+ continue
119
114
 
120
115
  documents.append(doc_info)
121
116
 
@@ -123,10 +118,21 @@ def json_dict_to_ubl_xml(
123
118
  for dtype, count in doc_type.items():
124
119
  document_types[dtype] = document_types.get(dtype, 0) + count
125
120
 
121
+ if not documents and first_error_response:
122
+ return {
123
+ "documents": [],
124
+ "summary": {
125
+ "total_inputs": len(list_of_dicts),
126
+ "files_created": 0,
127
+ "document_types": {},
128
+ },
129
+ "error_response": first_error_response,
130
+ }
131
+
126
132
  return {
127
133
  "documents": documents,
128
134
  "summary": {
129
- "total_inputs": len(list_of_dicts),
135
+ "total_inputs": len(merged_dicts),
130
136
  "files_created": 0,
131
137
  "document_types": document_types,
132
138
  },
json2ubl/config.py CHANGED
@@ -33,7 +33,7 @@ class UblConfig:
33
33
  logs_dir = Path("logs")
34
34
  logs_dir.mkdir(parents=True, exist_ok=True)
35
35
 
36
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
36
+ timestamp = datetime.now().strftime("%Y%m%d")
37
37
  log_filename = f"json2ubl_{timestamp}.log"
38
38
  log_path = logs_dir / log_filename
39
39
 
json2ubl/converter.py CHANGED
@@ -353,30 +353,16 @@ class Json2UblConverter:
353
353
 
354
354
  logger.info(f"Found {len(data)} documents in file")
355
355
 
356
- data = [self._normalize_keys_recursive(page) for page in data]
357
-
358
- grouped: Dict[str, List[Dict[str, Any]]] = {}
359
- skipped_count = 0
360
- for page in data:
361
- doc_id = page.get("id")
362
- if not doc_id:
363
- logger.warning("Skipping page without 'id' field")
364
- skipped_count += 1
365
- continue
366
- grouped.setdefault(doc_id, []).append(page)
367
-
368
- logger.info(
369
- f"Grouped into {len(grouped)} unique documents (skipped {skipped_count} without id)"
370
- )
356
+ merged_docs = self._group_and_merge_documents(data)
357
+ logger.info(f"Grouped into {len(merged_docs)} unique documents")
371
358
 
372
359
  documents = []
373
360
  document_types: Dict[str, int] = {}
374
361
  first_error_response = None
375
362
 
376
- for doc_id, pages in grouped.items():
363
+ for merged in merged_docs:
377
364
  try:
378
- merged = self._merge_pages(pages)
379
-
365
+ doc_id = merged.get("id", "UNKNOWN")
380
366
  response = self.convert_json_dict_to_xml_dict(merged)
381
367
 
382
368
  if response.get("error_response"):
@@ -598,28 +584,95 @@ class Json2UblConverter:
598
584
  "error_response": error_msg,
599
585
  }
600
586
 
587
+ def _group_and_merge_documents(self, documents: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
588
+ """Group documents by ID and merge pages with same ID.
589
+
590
+ Args:
591
+ documents: List of document dictionaries (potentially with duplicate IDs)
592
+
593
+ Returns:
594
+ List of merged documents (one per unique ID)
595
+ """
596
+ if not documents:
597
+ return []
598
+
599
+ documents = [self._normalize_keys_recursive(doc) for doc in documents]
600
+
601
+ grouped: Dict[str, List[Dict[str, Any]]] = {}
602
+ for doc in documents:
603
+ doc_id = doc.get("id")
604
+ if not doc_id:
605
+ logger.warning("Skipping document without 'id' field")
606
+ continue
607
+ grouped.setdefault(doc_id, []).append(doc)
608
+
609
+ merged_documents = []
610
+ for doc_id, pages in grouped.items():
611
+ try:
612
+ doc_type_raw = pages[0].get("document_type")
613
+ document_type = NUMERIC_TYPE_TO_DOCUMENT_TYPE.get(str(doc_type_raw))
614
+
615
+ schema_cache = {}
616
+ if document_type:
617
+ schema_cache = self._load_schema_cache(document_type)
618
+
619
+ merged = self._merge_pages(pages, schema_cache)
620
+ merged_documents.append(merged)
621
+ except Exception as e:
622
+ logger.error(f"Failed to merge document {doc_id}: {e}")
623
+ continue
624
+
625
+ return merged_documents
626
+
601
627
  @staticmethod
602
- def _merge_pages(pages: List[Dict[str, Any]]) -> Dict[str, Any]:
603
- """Merge multi-page invoice into single object."""
628
+ def _merge_pages(
629
+ pages: List[Dict[str, Any]], schema_cache: Dict[str, Any] | None = None
630
+ ) -> Dict[str, Any]:
631
+ """Merge multi-page invoice into single object.
632
+
633
+ Args:
634
+ pages: List of document pages (dictionaries)
635
+ schema_cache: Schema cache to identify array fields dynamically
636
+
637
+ Returns:
638
+ Merged document dictionary
639
+ """
604
640
  if not pages:
605
641
  return {}
606
642
 
607
643
  merged = deepcopy(pages[0])
608
644
 
609
- list_fields = {
610
- "invoiceLines",
611
- "additionalDocumentReferences",
612
- "globalAllowanceCharges",
613
- "taxTotal",
614
- }
645
+ array_fields = set()
646
+ if schema_cache and "elements" in schema_cache:
647
+ for field_lower, field_info in schema_cache["elements"].items():
648
+ if isinstance(field_info, dict) and field_info.get("maxOccurs") == "unbounded":
649
+ array_fields.add(field_lower)
615
650
 
616
651
  for page in pages[1:]:
617
- for field in list_fields:
618
- if field in page and page[field]:
619
- merged.setdefault(field, []).extend(page[field])
652
+ page_keys_lower = {k.lower(): k for k in page.keys()}
653
+
654
+ for field_lower in array_fields:
655
+ original_key = page_keys_lower.get(field_lower)
656
+ if original_key and page.get(original_key):
657
+ merged_keys_lower = {k.lower(): k for k in merged.keys()}
658
+ merged_key_original = merged_keys_lower.get(field_lower)
659
+
660
+ if merged_key_original:
661
+ if not isinstance(merged[merged_key_original], list):
662
+ merged[merged_key_original] = [merged[merged_key_original]]
663
+ if isinstance(page[original_key], list):
664
+ merged[merged_key_original].extend(page[original_key])
665
+ else:
666
+ merged[merged_key_original].append(page[original_key])
667
+ else:
668
+ if isinstance(page[original_key], list):
669
+ merged[original_key] = page[original_key]
670
+ else:
671
+ merged[original_key] = [page[original_key]]
620
672
 
621
673
  for key, value in page.items():
622
- if key not in list_fields and value is not None:
674
+ key_lower = key.lower()
675
+ if key_lower not in array_fields and value is not None:
623
676
  merged[key] = value
624
677
 
625
678
  return merged
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: json2ubl
3
- Version: 1.0.0
3
+ Version: 1.0.1
4
4
  Summary: Production-grade JSON to UBL 2.1 XML converter with schema-driven mapping
5
5
  Author-email: SherozShaikh <shaikh.sheroz07@gmail.com>
6
6
  License: MIT
@@ -22,16 +22,16 @@ Classifier: Topic :: Office/Business :: Financial
22
22
  Requires-Python: >=3.10
23
23
  Description-Content-Type: text/markdown
24
24
  License-File: LICENSE
25
- Requires-Dist: lxml<5.0,>=4.9.4
26
- Requires-Dist: pydantic<3.0,>=2.7.0
27
- Requires-Dist: pyyaml<7.0,>=6.0.1
28
- Requires-Dist: loguru<1.0,>=0.7.2
25
+ Requires-Dist: lxml~=6.0
26
+ Requires-Dist: pydantic~=2.7
27
+ Requires-Dist: pyyaml~=6.0
28
+ Requires-Dist: loguru~=0.7
29
29
  Provides-Extra: dev
30
- Requires-Dist: pytest<9.0,>=8.3.0; extra == "dev"
31
- Requires-Dist: pytest-cov<6.0,>=5.0.0; extra == "dev"
32
- Requires-Dist: black<25.0,>=24.10.0; extra == "dev"
33
- Requires-Dist: ruff<1.0,>=0.8.0; extra == "dev"
34
- Requires-Dist: mypy<2.0,>=1.13.0; extra == "dev"
30
+ Requires-Dist: pytest~=8.3; extra == "dev"
31
+ Requires-Dist: pytest-cov~=5.0; extra == "dev"
32
+ Requires-Dist: black~=24.10; extra == "dev"
33
+ Requires-Dist: ruff~=0.8; extra == "dev"
34
+ Requires-Dist: mypy~=1.13; extra == "dev"
35
35
  Dynamic: license-file
36
36
 
37
37
  # json2ubl
@@ -41,6 +41,7 @@ Dynamic: license-file
41
41
  [![PyPI version](https://badge.fury.io/py/json2ubl.svg)](https://badge.fury.io/py/json2ubl)
42
42
  [![Python Versions](https://img.shields.io/pypi/pyversions/json2ubl.svg)](https://pypi.org/project/json2ubl/)
43
43
  [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
44
+ [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
44
45
 
45
46
  [json2ubl](https://pypi.org/project/json2ubl/) is a production-ready converter that transforms JSON documents into UBL 2.1-compliant XML. It works with all 60+ UBL document types using automatic schema-driven mapping—no hardcoded field definitions required.
46
47
 
@@ -211,6 +212,8 @@ Convert JSON file and write XML files to disk.
211
212
  - Rolls back on partial failure
212
213
  - Atomic file operations with temp file staging
213
214
 
215
+ **For detailed API documentation with input/output examples and error handling, see [API.md](docs/API.md)**
216
+
214
217
  ---
215
218
 
216
219
  ## 🛡️ Error Handling
@@ -1,9 +1,9 @@
1
- json2ubl/__init__.py,sha256=QL7iCApi-wboHu2z7XvZIg96AReFnPUpLGKj8CN_btU,7097
1
+ json2ubl/__init__.py,sha256=zZi4Ke0gn1NcFXygwzLyhAxTA1g9g54gjEVsaBzgLYw,7280
2
2
  json2ubl/benchmark.py,sha256=TVQfe6aifNNv6VkjlnQmV8yDQL2se1Wim8bbOJy0HxQ,2242
3
3
  json2ubl/benchmark_examples.py,sha256=jTjS1gpyAkZuYtfFjCHERQhfgqIAiIHl7td6DXkQp7A,2123
4
- json2ubl/config.py,sha256=_l_hHOTx6RzEjNvvg4Gx67JB6l7x4kePHnWSW2BDXL0,1710
4
+ json2ubl/config.py,sha256=sjtxG9gqzMZYzBeDcz6bjDwRY3eklGWAoc88qoL1Wro,1703
5
5
  json2ubl/constants.py,sha256=VKyW-NcsuASKLgXjq4jMbsu8wK5eALlcjkqMxZ4GGek,2115
6
- json2ubl/converter.py,sha256=cKHnqjPAzg3sxY5qvPQRTE5D6XWmkRRdGHRr-pr36gM,24685
6
+ json2ubl/converter.py,sha256=05wkyDqIJxK3Jb2anbVbHk1DRsi6_4dIECae1GsgIac,27078
7
7
  json2ubl/exceptions.py,sha256=SjuarBL3kFwTyXX8i3gI_XFS3F_jI9YdRwUJ9qkkJI8,1363
8
8
  json2ubl/core/__init__.py,sha256=ZeHxEGzJnvbGtvLzKtFiScSSz5aYgUtDXb_jHeR6crc,258
9
9
  json2ubl/core/mapper.py,sha256=rBXctsIPH92wWAHz6mRDNWC8S_w76K7DDQo-qI0hYqY,7099
@@ -95,8 +95,8 @@ json2ubl/schemas/ubl-2.1/maindoc/UBL-TransportationStatusRequest-2.1.xsd,sha256=
95
95
  json2ubl/schemas/ubl-2.1/maindoc/UBL-UnawardedNotification-2.1.xsd,sha256=zGmccnKSXad4709Ih3fn66175FtJUb7PLPuRRy50_wY,25282
96
96
  json2ubl/schemas/ubl-2.1/maindoc/UBL-UtilityStatement-2.1.xsd,sha256=Yajwexatg2kKRsooaQC0yfKIiKblUzpykcMaFD8Qy1A,29901
97
97
  json2ubl/schemas/ubl-2.1/maindoc/UBL-Waybill-2.1.xsd,sha256=dApHBjRdiqgrRkbLeN3US8LrkTQBh609zgcQH6RPRZ8,31171
98
- json2ubl-1.0.0.dist-info/licenses/LICENSE,sha256=fud2PsdIMMFCTKje7U4wU1LruG0_xiLIl0K9EYFpp20,1069
99
- json2ubl-1.0.0.dist-info/METADATA,sha256=ePN-oA1AzbTQYjFV9o_pc7VoBu6LRyDpFKyG69tPVlA,9796
100
- json2ubl-1.0.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
101
- json2ubl-1.0.0.dist-info/top_level.txt,sha256=ThPMfVebrZMMoA4l92KA8IaHwh76tYLpgORoyfM6s94,9
102
- json2ubl-1.0.0.dist-info/RECORD,,
98
+ json2ubl-1.0.1.dist-info/licenses/LICENSE,sha256=fud2PsdIMMFCTKje7U4wU1LruG0_xiLIl0K9EYFpp20,1069
99
+ json2ubl-1.0.1.dist-info/METADATA,sha256=i_Go11equlGX76Yh9vmvQQT8_noWviSKqdeiUKKuWAU,9954
100
+ json2ubl-1.0.1.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
101
+ json2ubl-1.0.1.dist-info/top_level.txt,sha256=ThPMfVebrZMMoA4l92KA8IaHwh76tYLpgORoyfM6s94,9
102
+ json2ubl-1.0.1.dist-info/RECORD,,