jsonunwrap 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
json_unwrap/__init__.py
ADDED
json_unwrap/core.py
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
"""
|
|
2
|
+
This module contains the core functionality for the json_unwrap package.
|
|
3
|
+
The main function is `json_to_csv`, which converts a JSON file to a CSV file.
|
|
4
|
+
"""
|
|
5
|
+
import os
|
|
6
|
+
from typing import Any, Dict, List, Union
|
|
7
|
+
import pandas as pd
|
|
8
|
+
import requests
|
|
9
|
+
|
|
10
|
+
def unwrap_data(data: Union[Dict[str, Any], List[Any]]) -> pd.DataFrame:
|
|
11
|
+
"""
|
|
12
|
+
Normalizes and deeply flattens semi-structured JSON data into a pandas DataFrame.
|
|
13
|
+
"""
|
|
14
|
+
# Simply convert a single dictionary into a list containing that dictionary
|
|
15
|
+
if isinstance(data, dict):
|
|
16
|
+
main_data = [data]
|
|
17
|
+
else:
|
|
18
|
+
main_data = data
|
|
19
|
+
|
|
20
|
+
# Perform the initial normalization
|
|
21
|
+
df = pd.json_normalize(main_data)
|
|
22
|
+
|
|
23
|
+
# Automatically iterate through the columns and deeply flatten any nested structures
|
|
24
|
+
changed = True
|
|
25
|
+
while changed:
|
|
26
|
+
changed = False
|
|
27
|
+
for col in list(df.columns):
|
|
28
|
+
# Explode lists
|
|
29
|
+
if any(isinstance(val, list) for val in df[col].dropna()):
|
|
30
|
+
df = df.explode(col)
|
|
31
|
+
changed = True
|
|
32
|
+
break # Refresh columns list after structural changes
|
|
33
|
+
|
|
34
|
+
# Normalize and merge nested dictionaries
|
|
35
|
+
if any(isinstance(val, dict) for val in df[col].dropna()):
|
|
36
|
+
nested_df = pd.json_normalize(df[col]).set_index(df.index)
|
|
37
|
+
df = df.drop(columns=[col]).join(nested_df, rsuffix=f"_{col}")
|
|
38
|
+
changed = True
|
|
39
|
+
break
|
|
40
|
+
|
|
41
|
+
return df
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def fetch_json(url: str, **kwargs: Any) -> Union[Dict[str, Any], List[Any]]:
|
|
45
|
+
"""
|
|
46
|
+
Fetches raw JSON data from a URL.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
url: The endpoint web target.
|
|
50
|
+
**kwargs: Additional arguments passed directly to requests.get (e.g., headers, auth).
|
|
51
|
+
"""
|
|
52
|
+
response = requests.get(url, **kwargs)
|
|
53
|
+
response.raise_for_status()
|
|
54
|
+
return response.json()
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def json_to_csv(url: str, output_path: str) -> pd.DataFrame:
|
|
58
|
+
"""
|
|
59
|
+
Fetches JSON from a URL, deeply flattens it, and saves it directly to a CSV file.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
url: The endpoint URL containing the target JSON data.
|
|
63
|
+
output_path: Target filesystem path where the CSV will be written.
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
The generated pandas DataFrame.
|
|
67
|
+
"""
|
|
68
|
+
# Ensure the parent output directory exists safely
|
|
69
|
+
directory = os.path.dirname(output_path)
|
|
70
|
+
if directory:
|
|
71
|
+
os.makedirs(directory, exist_ok=True)
|
|
72
|
+
|
|
73
|
+
raw_data = fetch_json(url)
|
|
74
|
+
df = unwrap_data(raw_data)
|
|
75
|
+
|
|
76
|
+
df.to_csv(output_path, index=False)
|
|
77
|
+
return df
|
|
78
|
+
|
|
79
|
+
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: jsonunwrap
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A small python package that unpacks data from a JSON url and converts it into a csv file.
|
|
5
|
+
Author-email: njuedominic <njuemugodominic@gmail.com>
|
|
6
|
+
Project-URL: Homepage, https://github.com/njuedominic/json-unwrap
|
|
7
|
+
Project-URL: Bug Tracker, https://github.com/njuedominic/json-unwrap/issues
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Requires-Python: >3.8
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
Requires-Dist: requests>=2.28.0
|
|
14
|
+
Requires-Dist: pandas>=2.0.0
|
|
15
|
+
|
|
16
|
+
### JSON Unwrap
|
|
17
|
+
A small python package that unpacks data from a JSON url and converts it into a csv file.
|
|
18
|
+
|
|
19
|
+
### Current features
|
|
20
|
+
* Convert one JSON url to a csv
|
|
21
|
+
* Automatically creates a data folder if it does not exist
|
|
22
|
+
|
|
23
|
+
### Roadmap
|
|
24
|
+
* Custom file names for output csv
|
|
25
|
+
* Convert into a dataframe ready for use in a notebook environment
|
|
26
|
+
* Error handling for a failed url
|
|
27
|
+
* Tests
|
|
28
|
+
* Documentation examples
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
json_unwrap/__init__.py,sha256=evLhuFoy_b7rs4tvySmpzTweSKQhduX_yRmIYYGsb2I,195
|
|
2
|
+
json_unwrap/core.py,sha256=aHKMkPqfWM-vK5E8JxBIAMN6B-A2jWSPnZgrYPX5A2w,2525
|
|
3
|
+
jsonunwrap-0.1.0.dist-info/METADATA,sha256=sL3qwKF8BTtZwFPaOa_Nc-jlMjpAxoeijDmXHyzdsNc,1020
|
|
4
|
+
jsonunwrap-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
5
|
+
jsonunwrap-0.1.0.dist-info/top_level.txt,sha256=tHU37K_W_2K-VIavGdYByt5ftkgRBMnPHuNxq8GnAUU,12
|
|
6
|
+
jsonunwrap-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
json_unwrap
|