jsonunwrap 0.2.1__tar.gz → 0.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: jsonunwrap
3
- Version: 0.2.1
3
+ Version: 0.2.2
4
4
  Summary: A small python package that unpacks data from a JSON url and converts it into a csv file.
5
5
  Author-email: njuedominic <njuemugodominic@gmail.com>
6
6
  Project-URL: Homepage, https://github.com/njuedominic/json-unwrap
@@ -0,0 +1,99 @@
1
+ """
2
+ This module contains the core functionality for the json_unwrap package.
3
+ The main function is `json_to_csv`, which converts a JSON file to a CSV file.
4
+ """
5
+ import os
6
+ from typing import Any, Dict, List, Union
7
+ import pandas as pd
8
+ import requests
9
+
10
+ def unwrap_data(data: Union[Dict[str, Any], List[Any]]) -> pd.DataFrame:
11
+ """
12
+ Normalizes and deeply flattens semi-structured JSON data into a pandas DataFrame.
13
+ """
14
+ # 1. Ensure we start with a clean record list
15
+ if isinstance(data, dict):
16
+ # Handle cases where the data is inside a wrapper key (like {"products": [...]})
17
+ list_keys = [k for k, v in data.items() if isinstance(v, list)]
18
+ if list_keys and len(data) <= 4:
19
+ main_data = data[list_keys[0]]
20
+ else:
21
+ main_data = [data]
22
+ else:
23
+ main_data = data
24
+
25
+ # 2. Base normalization
26
+ df = pd.json_normalize(main_data)
27
+
28
+ # 3. Clean Linear Pass: Avoid infinite loops by tracking column states directly
29
+ columns_to_process = list(df.columns)
30
+
31
+ while columns_to_process:
32
+ col = columns_to_process.pop(0)
33
+
34
+ # Guard check if the column was dropped in a previous iteration
35
+ if col not in df.columns:
36
+ continue
37
+
38
+ non_null_vals = df[col].dropna()
39
+ if non_null_vals.empty:
40
+ continue
41
+
42
+ # Check for nested dictionaries
43
+ if any(isinstance(val, dict) for val in non_null_vals):
44
+ nested_df = pd.json_normalize(non_null_vals).set_index(non_null_vals.index)
45
+ # Add new sub-columns back into the processing queue
46
+ new_cols = [f"{c}_{col}" for c in nested_df.columns]
47
+ df = df.drop(columns=[col]).join(nested_df, rsuffix=f"_{col}")
48
+ columns_to_process.extend(new_cols)
49
+
50
+ # Check for nested lists (But do not loop back if it's just raw strings/ints)
51
+ elif any(isinstance(val, list) for val in non_null_vals):
52
+ # Check if the list contains dictionaries before exploding heavily
53
+ first_list = next((v for v in non_null_vals if isinstance(v, list) and v), None)
54
+
55
+ df = df.explode(col)
56
+
57
+ # If the inner elements were dictionaries, we need to flatten them on the next pass
58
+ if first_list and isinstance(first_list[0], dict):
59
+ columns_to_process.append(col)
60
+
61
+ return df
62
+
63
+
64
+ def fetch_json(url: str, **kwargs: Any) -> Union[Dict[str, Any], List[Any]]:
65
+ """
66
+ Fetches raw JSON data from a URL.
67
+
68
+ Args:
69
+ url: The endpoint web target.
70
+ **kwargs: Additional arguments passed directly to requests.get (e.g., headers, auth).
71
+ """
72
+ response = requests.get(url, **kwargs)
73
+ response.raise_for_status()
74
+ return response.json()
75
+
76
+
77
+ def json_to_csv(url: str, output_path: str) -> pd.DataFrame:
78
+ """
79
+ Fetches JSON from a URL, deeply flattens it, and saves it directly to a CSV file.
80
+
81
+ Args:
82
+ url: The endpoint URL containing the target JSON data.
83
+ output_path: Target filesystem path where the CSV will be written.
84
+
85
+ Returns:
86
+ The generated pandas DataFrame.
87
+ """
88
+ # Ensure the parent output directory exists safely
89
+ directory = os.path.dirname(output_path)
90
+ if directory:
91
+ os.makedirs(directory, exist_ok=True)
92
+
93
+ raw_data = fetch_json(url)
94
+ df = unwrap_data(raw_data)
95
+
96
+ df.to_csv(output_path, index=False)
97
+ return df
98
+
99
+
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: jsonunwrap
3
- Version: 0.2.1
3
+ Version: 0.2.2
4
4
  Summary: A small python package that unpacks data from a JSON url and converts it into a csv file.
5
5
  Author-email: njuedominic <njuemugodominic@gmail.com>
6
6
  Project-URL: Homepage, https://github.com/njuedominic/json-unwrap
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "jsonunwrap"
7
- version = "0.2.1"
7
+ version = "0.2.2"
8
8
  authors = [
9
9
  { name = "njuedominic", email="njuemugodominic@gmail.com"}
10
10
  ]
@@ -1,80 +0,0 @@
1
- """
2
- This module contains the core functionality for the json_unwrap package.
3
- The main function is `json_to_csv`, which converts a JSON file to a CSV file.
4
- """
5
- import os
6
- from typing import Any, Dict, List, Union
7
- import pandas as pd
8
- import requests
9
-
10
- def unwrap_data(data: Union[Dict[str, Any], List[Any]]) -> pd.DataFrame:
11
- """
12
- Normalizes and deeply flattens semi-structured JSON data into a pandas DataFrame.
13
- """
14
- if isinstance(data, dict):
15
- main_data = [data]
16
- else:
17
- main_data = data
18
-
19
- df = pd.json_normalize(main_data)
20
-
21
- # Professional Fix: Track already exploded primitive columns to prevent loops
22
- exploded_columns = set()
23
-
24
- changed = True
25
- while changed:
26
- changed = False
27
- for col in list(df.columns):
28
- # 1. Explode lists (Only if we haven't exploded this exact column name yet)
29
- if col not in exploded_columns and any(isinstance(val, list) for val in df[col].dropna()):
30
- df = df.explode(col)
31
- exploded_columns.add(col) # Mark as done so we don't repeat it!
32
- changed = True
33
- break # Refresh columns list
34
-
35
- # 2. Normalize and merge nested dictionaries
36
- if any(isinstance(val, dict) for val in df[col].dropna()):
37
- nested_df = pd.json_normalize(df[col]).set_index(df.index)
38
- df = df.drop(columns=[col]).join(nested_df, rsuffix=f"_{col}")
39
- changed = True
40
- break
41
-
42
- return df
43
-
44
-
45
- def fetch_json(url: str, **kwargs: Any) -> Union[Dict[str, Any], List[Any]]:
46
- """
47
- Fetches raw JSON data from a URL.
48
-
49
- Args:
50
- url: The endpoint web target.
51
- **kwargs: Additional arguments passed directly to requests.get (e.g., headers, auth).
52
- """
53
- response = requests.get(url, **kwargs)
54
- response.raise_for_status()
55
- return response.json()
56
-
57
-
58
- def json_to_csv(url: str, output_path: str) -> pd.DataFrame:
59
- """
60
- Fetches JSON from a URL, deeply flattens it, and saves it directly to a CSV file.
61
-
62
- Args:
63
- url: The endpoint URL containing the target JSON data.
64
- output_path: Target filesystem path where the CSV will be written.
65
-
66
- Returns:
67
- The generated pandas DataFrame.
68
- """
69
- # Ensure the parent output directory exists safely
70
- directory = os.path.dirname(output_path)
71
- if directory:
72
- os.makedirs(directory, exist_ok=True)
73
-
74
- raw_data = fetch_json(url)
75
- df = unwrap_data(raw_data)
76
-
77
- df.to_csv(output_path, index=False)
78
- return df
79
-
80
-
File without changes
File without changes