jsonunwrap 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- jsonunwrap-0.1.0/PKG-INFO +28 -0
- jsonunwrap-0.1.0/README.md +13 -0
- jsonunwrap-0.1.0/json_unwrap/__init__.py +8 -0
- jsonunwrap-0.1.0/json_unwrap/core.py +79 -0
- jsonunwrap-0.1.0/jsonunwrap.egg-info/PKG-INFO +28 -0
- jsonunwrap-0.1.0/jsonunwrap.egg-info/SOURCES.txt +10 -0
- jsonunwrap-0.1.0/jsonunwrap.egg-info/dependency_links.txt +1 -0
- jsonunwrap-0.1.0/jsonunwrap.egg-info/requires.txt +2 -0
- jsonunwrap-0.1.0/jsonunwrap.egg-info/top_level.txt +1 -0
- jsonunwrap-0.1.0/pyproject.toml +28 -0
- jsonunwrap-0.1.0/setup.cfg +4 -0
- jsonunwrap-0.1.0/tests/test_core.py +21 -0
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: jsonunwrap
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A small python package that unpacks data from a JSON url and converts it into a csv file.
|
|
5
|
+
Author-email: njuedominic <njuemugodominic@gmail.com>
|
|
6
|
+
Project-URL: Homepage, https://github.com/njuedominic/json-unwrap
|
|
7
|
+
Project-URL: Bug Tracker, https://github.com/njuedominic/json-unwrap/issues
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Requires-Python: >3.8
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
Requires-Dist: requests>=2.28.0
|
|
14
|
+
Requires-Dist: pandas>=2.0.0
|
|
15
|
+
|
|
16
|
+
### JSON Unwrap
|
|
17
|
+
A small python package that unpacks data from a JSON url and converts it into a csv file.
|
|
18
|
+
|
|
19
|
+
### Current features
|
|
20
|
+
* Convert one JSON url to a csv
|
|
21
|
+
* Automatically creates a data folder if it does not exist
|
|
22
|
+
|
|
23
|
+
### Roadmap
|
|
24
|
+
* Custom file names for output csv
|
|
25
|
+
* Convert into a dataframe ready for use in a notebook environment
|
|
26
|
+
* Error handling for a failed url
|
|
27
|
+
* Tests
|
|
28
|
+
* Documentation examples
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
### JSON Unwrap
|
|
2
|
+
A small python package that unpacks data from a JSON url and converts it into a csv file.
|
|
3
|
+
|
|
4
|
+
### Current features
|
|
5
|
+
* Convert one JSON url to a csv
|
|
6
|
+
* Automatically creates a data folder if it does not exist
|
|
7
|
+
|
|
8
|
+
### Roadmap
|
|
9
|
+
* Custom file names for output csv
|
|
10
|
+
* Convert into a dataframe ready for use in a notebook environment
|
|
11
|
+
* Error handling for a failed url
|
|
12
|
+
* Tests
|
|
13
|
+
* Documentation examples
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
"""
|
|
2
|
+
This module contains the core functionality for the json_unwrap package.
|
|
3
|
+
The main function is `json_to_csv`, which converts a JSON file to a CSV file.
|
|
4
|
+
"""
|
|
5
|
+
import os
|
|
6
|
+
from typing import Any, Dict, List, Union
|
|
7
|
+
import pandas as pd
|
|
8
|
+
import requests
|
|
9
|
+
|
|
10
|
+
def unwrap_data(data: Union[Dict[str, Any], List[Any]]) -> pd.DataFrame:
|
|
11
|
+
"""
|
|
12
|
+
Normalizes and deeply flattens semi-structured JSON data into a pandas DataFrame.
|
|
13
|
+
"""
|
|
14
|
+
# Simply convert a single dictionary into a list containing that dictionary
|
|
15
|
+
if isinstance(data, dict):
|
|
16
|
+
main_data = [data]
|
|
17
|
+
else:
|
|
18
|
+
main_data = data
|
|
19
|
+
|
|
20
|
+
# Perform the initial normalization
|
|
21
|
+
df = pd.json_normalize(main_data)
|
|
22
|
+
|
|
23
|
+
# Automatically iterate through the columns and deeply flatten any nested structures
|
|
24
|
+
changed = True
|
|
25
|
+
while changed:
|
|
26
|
+
changed = False
|
|
27
|
+
for col in list(df.columns):
|
|
28
|
+
# Explode lists
|
|
29
|
+
if any(isinstance(val, list) for val in df[col].dropna()):
|
|
30
|
+
df = df.explode(col)
|
|
31
|
+
changed = True
|
|
32
|
+
break # Refresh columns list after structural changes
|
|
33
|
+
|
|
34
|
+
# Normalize and merge nested dictionaries
|
|
35
|
+
if any(isinstance(val, dict) for val in df[col].dropna()):
|
|
36
|
+
nested_df = pd.json_normalize(df[col]).set_index(df.index)
|
|
37
|
+
df = df.drop(columns=[col]).join(nested_df, rsuffix=f"_{col}")
|
|
38
|
+
changed = True
|
|
39
|
+
break
|
|
40
|
+
|
|
41
|
+
return df
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def fetch_json(url: str, **kwargs: Any) -> Union[Dict[str, Any], List[Any]]:
|
|
45
|
+
"""
|
|
46
|
+
Fetches raw JSON data from a URL.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
url: The endpoint web target.
|
|
50
|
+
**kwargs: Additional arguments passed directly to requests.get (e.g., headers, auth).
|
|
51
|
+
"""
|
|
52
|
+
response = requests.get(url, **kwargs)
|
|
53
|
+
response.raise_for_status()
|
|
54
|
+
return response.json()
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def json_to_csv(url: str, output_path: str) -> pd.DataFrame:
|
|
58
|
+
"""
|
|
59
|
+
Fetches JSON from a URL, deeply flattens it, and saves it directly to a CSV file.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
url: The endpoint URL containing the target JSON data.
|
|
63
|
+
output_path: Target filesystem path where the CSV will be written.
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
The generated pandas DataFrame.
|
|
67
|
+
"""
|
|
68
|
+
# Ensure the parent output directory exists safely
|
|
69
|
+
directory = os.path.dirname(output_path)
|
|
70
|
+
if directory:
|
|
71
|
+
os.makedirs(directory, exist_ok=True)
|
|
72
|
+
|
|
73
|
+
raw_data = fetch_json(url)
|
|
74
|
+
df = unwrap_data(raw_data)
|
|
75
|
+
|
|
76
|
+
df.to_csv(output_path, index=False)
|
|
77
|
+
return df
|
|
78
|
+
|
|
79
|
+
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: jsonunwrap
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A small python package that unpacks data from a JSON url and converts it into a csv file.
|
|
5
|
+
Author-email: njuedominic <njuemugodominic@gmail.com>
|
|
6
|
+
Project-URL: Homepage, https://github.com/njuedominic/json-unwrap
|
|
7
|
+
Project-URL: Bug Tracker, https://github.com/njuedominic/json-unwrap/issues
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Requires-Python: >3.8
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
Requires-Dist: requests>=2.28.0
|
|
14
|
+
Requires-Dist: pandas>=2.0.0
|
|
15
|
+
|
|
16
|
+
### JSON Unwrap
|
|
17
|
+
A small python package that unpacks data from a JSON url and converts it into a csv file.
|
|
18
|
+
|
|
19
|
+
### Current features
|
|
20
|
+
* Convert one JSON url to a csv
|
|
21
|
+
* Automatically creates a data folder if it does not exist
|
|
22
|
+
|
|
23
|
+
### Roadmap
|
|
24
|
+
* Custom file names for output csv
|
|
25
|
+
* Convert into a dataframe ready for use in a notebook environment
|
|
26
|
+
* Error handling for a failed url
|
|
27
|
+
* Tests
|
|
28
|
+
* Documentation examples
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
pyproject.toml
|
|
3
|
+
json_unwrap/__init__.py
|
|
4
|
+
json_unwrap/core.py
|
|
5
|
+
jsonunwrap.egg-info/PKG-INFO
|
|
6
|
+
jsonunwrap.egg-info/SOURCES.txt
|
|
7
|
+
jsonunwrap.egg-info/dependency_links.txt
|
|
8
|
+
jsonunwrap.egg-info/requires.txt
|
|
9
|
+
jsonunwrap.egg-info/top_level.txt
|
|
10
|
+
tests/test_core.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
json_unwrap
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "jsonunwrap"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
authors = [
|
|
9
|
+
{ name = "njuedominic", email="njuemugodominic@gmail.com"}
|
|
10
|
+
]
|
|
11
|
+
description = "A small python package that unpacks data from a JSON url and converts it into a csv file."
|
|
12
|
+
readme = "README.md"
|
|
13
|
+
requires-python = ">3.8"
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Programming Language :: Python :: 3",
|
|
16
|
+
"License :: OSI Approved :: MIT License",
|
|
17
|
+
"Operating System :: OS Independent",
|
|
18
|
+
]
|
|
19
|
+
dependencies = [
|
|
20
|
+
"requests>=2.28.0",
|
|
21
|
+
"pandas>=2.0.0",
|
|
22
|
+
]
|
|
23
|
+
[project.urls]
|
|
24
|
+
"Homepage" = "https://github.com/njuedominic/json-unwrap"
|
|
25
|
+
"Bug Tracker" = "https://github.com/njuedominic/json-unwrap/issues"
|
|
26
|
+
|
|
27
|
+
[tool.setuptools]
|
|
28
|
+
packages = ["json_unwrap"]
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
import pytest
|
|
3
|
+
import json_unwrap as ju
|
|
4
|
+
|
|
5
|
+
def test_unwrap_data_basic_flattening():
|
|
6
|
+
"""Test that basic nested dictionaries are flattened with dot notation."""
|
|
7
|
+
sample = {"id": 1, "user": {"name": "Alice", "role": "Admin"}}
|
|
8
|
+
df = ju.unwrap_data(sample)
|
|
9
|
+
|
|
10
|
+
assert isinstance(df, pd.DataFrame)
|
|
11
|
+
assert "user.name" in df.columns
|
|
12
|
+
assert df.loc[0, "user.name"] == "Alice"
|
|
13
|
+
|
|
14
|
+
def test_unwrap_data_list_explosion():
|
|
15
|
+
"""Test that lists inside dictionaries are properly exploded into rows."""
|
|
16
|
+
sample = {"id": 101, "hobbies": ["reading", "coding"]}
|
|
17
|
+
df = ju.unwrap_data(sample)
|
|
18
|
+
|
|
19
|
+
# Should create 2 rows because of the 2 items in the list
|
|
20
|
+
assert len(df) == 2
|
|
21
|
+
assert list(df["hobbies"]) == ["reading", "coding"]
|