json-razor 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- json_razor-0.1.0/PKG-INFO +89 -0
- json_razor-0.1.0/README.md +80 -0
- json_razor-0.1.0/json_razor/__init__.py +3 -0
- json_razor-0.1.0/json_razor/cli.py +88 -0
- json_razor-0.1.0/json_razor/core.py +51 -0
- json_razor-0.1.0/json_razor/formats.py +27 -0
- json_razor-0.1.0/json_razor.egg-info/PKG-INFO +89 -0
- json_razor-0.1.0/json_razor.egg-info/SOURCES.txt +13 -0
- json_razor-0.1.0/json_razor.egg-info/dependency_links.txt +1 -0
- json_razor-0.1.0/json_razor.egg-info/entry_points.txt +2 -0
- json_razor-0.1.0/json_razor.egg-info/requires.txt +1 -0
- json_razor-0.1.0/json_razor.egg-info/top_level.txt +1 -0
- json_razor-0.1.0/pyproject.toml +19 -0
- json_razor-0.1.0/setup.cfg +4 -0
- json_razor-0.1.0/tests/test_core.py +82 -0
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: json-razor
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Reduces JSON, YAML, and NDJSON volume by collapsing repeated structures while preserving the schema.
|
|
5
|
+
License: MIT
|
|
6
|
+
Requires-Python: >=3.8
|
|
7
|
+
Description-Content-Type: text/markdown
|
|
8
|
+
Requires-Dist: PyYAML
|
|
9
|
+
|
|
10
|
+
# JSON's Razor — Cut the fat
|
|
11
|
+
|
|
12
|
+
[](https://github.com/rick-does/json-razor/actions/workflows/tests.yml)
|
|
13
|
+
|
|
14
|
+
Reduces JSON, YAML, and NDJSON volume by collapsing repeated structures while preserving the schema.
|
|
15
|
+
|
|
16
|
+
Large structured data files are hard to parse — not because the structure is complex, but because repetition obscures it. A list of 10,000 objects with identical shape tells you nothing more than a list of 1. JSON's Razor collapses that repetition to its minimum essential form: one representative example of each repeated structure, at every level of nesting.
|
|
17
|
+
|
|
18
|
+
The output is valid, parseable data in the same format as input — not a summary, not a schema definition. It just has far less volume.
|
|
19
|
+
|
|
20
|
+
---
|
|
21
|
+
|
|
22
|
+
## Install
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
pip install json-razor
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
---
|
|
29
|
+
|
|
30
|
+
## Usage
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
cat big.json | json-razor # stdin → stdout
|
|
34
|
+
json-razor big.json # file input → stdout
|
|
35
|
+
json-razor big.json -o small.json # file input → file output
|
|
36
|
+
json-razor big.yaml # auto-detected as YAML
|
|
37
|
+
json-razor app.log --format ndjson # NDJSON log file
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
### Options
|
|
41
|
+
|
|
42
|
+
| Flag | Default | Description |
|
|
43
|
+
|------|---------|-------------|
|
|
44
|
+
| `--keep N` | 1 | Number of examples to keep per repeated structure |
|
|
45
|
+
| `--depth N` | unlimited | Stop collapsing below this nesting depth |
|
|
46
|
+
| `--format` | auto | Force format: `json`, `yaml`, or `ndjson` |
|
|
47
|
+
| `--truncate N` | 100 | Max string length before truncating |
|
|
48
|
+
|
|
49
|
+
---
|
|
50
|
+
|
|
51
|
+
## How it works
|
|
52
|
+
|
|
53
|
+
**Arrays** — collapsed to one item. Mixed-type arrays keep one of each distinct type.
|
|
54
|
+
|
|
55
|
+
```json
|
|
56
|
+
// input
|
|
57
|
+
[{"id": 1, "name": "alice"}, {"id": 2, "name": "bob"}, {"id": 3, "name": "carol"}]
|
|
58
|
+
|
|
59
|
+
// output
|
|
60
|
+
[{"id": 1, "name": "alice"}]
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
**Mixed types** — one representative per JSON type (null, bool, number, string, array, object).
|
|
64
|
+
|
|
65
|
+
```json
|
|
66
|
+
// input
|
|
67
|
+
[1, "hello", {"id": 1}, null, true, [1, 2, 3]]
|
|
68
|
+
|
|
69
|
+
// output
|
|
70
|
+
[1, "hello", {"id": 1}, null, true, [1]]
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
**Nested structures** — collapsed recursively at every level.
|
|
74
|
+
|
|
75
|
+
**NDJSON** — collapsed across lines; one representative line kept.
|
|
76
|
+
|
|
77
|
+
**Nulls and empty values** — always preserved (`null`, `[]`, `{}`).
|
|
78
|
+
|
|
79
|
+
**Long strings** — truncated to a configurable preview.
|
|
80
|
+
|
|
81
|
+
---
|
|
82
|
+
|
|
83
|
+
## Supported formats
|
|
84
|
+
|
|
85
|
+
| Format | Auto-detected from |
|
|
86
|
+
|--------|--------------------|
|
|
87
|
+
| JSON | `.json` |
|
|
88
|
+
| YAML | `.yaml`, `.yml` |
|
|
89
|
+
| NDJSON | `.ndjson` |
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
# JSON's Razor — Cut the fat
|
|
2
|
+
|
|
3
|
+
[](https://github.com/rick-does/json-razor/actions/workflows/tests.yml)
|
|
4
|
+
|
|
5
|
+
Reduces JSON, YAML, and NDJSON volume by collapsing repeated structures while preserving the schema.
|
|
6
|
+
|
|
7
|
+
Large structured data files are hard to parse — not because the structure is complex, but because repetition obscures it. A list of 10,000 objects with identical shape tells you nothing more than a list of 1. JSON's Razor collapses that repetition to its minimum essential form: one representative example of each repeated structure, at every level of nesting.
|
|
8
|
+
|
|
9
|
+
The output is valid, parseable data in the same format as input — not a summary, not a schema definition. It just has far less volume.
|
|
10
|
+
|
|
11
|
+
---
|
|
12
|
+
|
|
13
|
+
## Install
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
pip install json-razor
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
---
|
|
20
|
+
|
|
21
|
+
## Usage
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
cat big.json | json-razor # stdin → stdout
|
|
25
|
+
json-razor big.json # file input → stdout
|
|
26
|
+
json-razor big.json -o small.json # file input → file output
|
|
27
|
+
json-razor big.yaml # auto-detected as YAML
|
|
28
|
+
json-razor app.log --format ndjson # NDJSON log file
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
### Options
|
|
32
|
+
|
|
33
|
+
| Flag | Default | Description |
|
|
34
|
+
|------|---------|-------------|
|
|
35
|
+
| `--keep N` | 1 | Number of examples to keep per repeated structure |
|
|
36
|
+
| `--depth N` | unlimited | Stop collapsing below this nesting depth |
|
|
37
|
+
| `--format` | auto | Force format: `json`, `yaml`, or `ndjson` |
|
|
38
|
+
| `--truncate N` | 100 | Max string length before truncating |
|
|
39
|
+
|
|
40
|
+
---
|
|
41
|
+
|
|
42
|
+
## How it works
|
|
43
|
+
|
|
44
|
+
**Arrays** — collapsed to one item. Mixed-type arrays keep one of each distinct type.
|
|
45
|
+
|
|
46
|
+
```json
|
|
47
|
+
// input
|
|
48
|
+
[{"id": 1, "name": "alice"}, {"id": 2, "name": "bob"}, {"id": 3, "name": "carol"}]
|
|
49
|
+
|
|
50
|
+
// output
|
|
51
|
+
[{"id": 1, "name": "alice"}]
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
**Mixed types** — one representative per JSON type (null, bool, number, string, array, object).
|
|
55
|
+
|
|
56
|
+
```json
|
|
57
|
+
// input
|
|
58
|
+
[1, "hello", {"id": 1}, null, true, [1, 2, 3]]
|
|
59
|
+
|
|
60
|
+
// output
|
|
61
|
+
[1, "hello", {"id": 1}, null, true, [1]]
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
**Nested structures** — collapsed recursively at every level.
|
|
65
|
+
|
|
66
|
+
**NDJSON** — collapsed across lines; one representative line kept.
|
|
67
|
+
|
|
68
|
+
**Nulls and empty values** — always preserved (`null`, `[]`, `{}`).
|
|
69
|
+
|
|
70
|
+
**Long strings** — truncated to a configurable preview.
|
|
71
|
+
|
|
72
|
+
---
|
|
73
|
+
|
|
74
|
+
## Supported formats
|
|
75
|
+
|
|
76
|
+
| Format | Auto-detected from |
|
|
77
|
+
|--------|--------------------|
|
|
78
|
+
| JSON | `.json` |
|
|
79
|
+
| YAML | `.yaml`, `.yml` |
|
|
80
|
+
| NDJSON | `.ndjson` |
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import os
|
|
3
|
+
import sys
|
|
4
|
+
|
|
5
|
+
from .core import collapse
|
|
6
|
+
from .formats import dump, dump_ndjson, load, load_ndjson
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def detect_format(path):
|
|
10
|
+
if path is None:
|
|
11
|
+
return "json"
|
|
12
|
+
ext = os.path.splitext(path)[1].lower()
|
|
13
|
+
if ext == ".json":
|
|
14
|
+
return "json"
|
|
15
|
+
if ext in (".yaml", ".yml"):
|
|
16
|
+
return "yaml"
|
|
17
|
+
if ext == ".ndjson":
|
|
18
|
+
return "ndjson"
|
|
19
|
+
return "json"
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def main():
|
|
23
|
+
parser = argparse.ArgumentParser(
|
|
24
|
+
prog="json-razor",
|
|
25
|
+
description="Cut the fat. Collapse repeated structures in JSON, YAML, and NDJSON.",
|
|
26
|
+
)
|
|
27
|
+
parser.add_argument("input", nargs="?", help="Input file (default: stdin)")
|
|
28
|
+
parser.add_argument("-o", "--output", help="Output file (default: stdout)")
|
|
29
|
+
parser.add_argument(
|
|
30
|
+
"--keep",
|
|
31
|
+
type=int,
|
|
32
|
+
default=1,
|
|
33
|
+
metavar="N",
|
|
34
|
+
help="Examples to keep per repeated structure (default: 1)",
|
|
35
|
+
)
|
|
36
|
+
parser.add_argument(
|
|
37
|
+
"--depth",
|
|
38
|
+
type=int,
|
|
39
|
+
default=None,
|
|
40
|
+
metavar="N",
|
|
41
|
+
help="Stop collapsing below this nesting depth",
|
|
42
|
+
)
|
|
43
|
+
parser.add_argument(
|
|
44
|
+
"--format",
|
|
45
|
+
choices=["json", "yaml", "ndjson"],
|
|
46
|
+
help="Input format (default: auto-detect from extension)",
|
|
47
|
+
)
|
|
48
|
+
parser.add_argument(
|
|
49
|
+
"--truncate",
|
|
50
|
+
type=int,
|
|
51
|
+
default=100,
|
|
52
|
+
metavar="N",
|
|
53
|
+
help="Max string length before truncating (default: 100)",
|
|
54
|
+
)
|
|
55
|
+
args = parser.parse_args()
|
|
56
|
+
|
|
57
|
+
if args.input is None and sys.stdin.isatty():
|
|
58
|
+
parser.print_help()
|
|
59
|
+
sys.exit(0)
|
|
60
|
+
|
|
61
|
+
fmt = args.format or detect_format(args.input)
|
|
62
|
+
|
|
63
|
+
if args.input:
|
|
64
|
+
with open(args.input, "r", encoding="utf-8") as f:
|
|
65
|
+
text = f.read()
|
|
66
|
+
else:
|
|
67
|
+
text = sys.stdin.read()
|
|
68
|
+
|
|
69
|
+
if fmt == "ndjson":
|
|
70
|
+
records = load_ndjson(text)
|
|
71
|
+
collapsed = collapse(records, keep=args.keep, depth=args.depth, truncate=args.truncate)
|
|
72
|
+
result = dump_ndjson(collapsed)
|
|
73
|
+
else:
|
|
74
|
+
data = load(text, fmt)
|
|
75
|
+
collapsed = collapse(data, keep=args.keep, depth=args.depth, truncate=args.truncate)
|
|
76
|
+
result = dump(collapsed, fmt)
|
|
77
|
+
|
|
78
|
+
if args.output:
|
|
79
|
+
with open(args.output, "w", encoding="utf-8") as f:
|
|
80
|
+
f.write(result)
|
|
81
|
+
if not result.endswith("\n"):
|
|
82
|
+
f.write("\n")
|
|
83
|
+
else:
|
|
84
|
+
print(result)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
if __name__ == "__main__":
|
|
88
|
+
main()
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
def collapse(data, keep=1, depth=None, truncate=100, _depth=0):
|
|
2
|
+
if depth is not None and _depth >= depth:
|
|
3
|
+
return data
|
|
4
|
+
|
|
5
|
+
if isinstance(data, str):
|
|
6
|
+
if truncate is not None and len(data) > truncate:
|
|
7
|
+
return data[:truncate] + "..."
|
|
8
|
+
return data
|
|
9
|
+
|
|
10
|
+
if isinstance(data, bool) or data is None or isinstance(data, (int, float)):
|
|
11
|
+
return data
|
|
12
|
+
|
|
13
|
+
if isinstance(data, list):
|
|
14
|
+
if len(data) == 0:
|
|
15
|
+
return []
|
|
16
|
+
|
|
17
|
+
buckets = {}
|
|
18
|
+
for item in data:
|
|
19
|
+
key = _type_key(item)
|
|
20
|
+
if key not in buckets:
|
|
21
|
+
buckets[key] = []
|
|
22
|
+
buckets[key].append(item)
|
|
23
|
+
|
|
24
|
+
result = []
|
|
25
|
+
for items in buckets.values():
|
|
26
|
+
for item in items[:keep]:
|
|
27
|
+
result.append(collapse(item, keep, depth, truncate, _depth + 1))
|
|
28
|
+
return result
|
|
29
|
+
|
|
30
|
+
if isinstance(data, dict):
|
|
31
|
+
if len(data) == 0:
|
|
32
|
+
return {}
|
|
33
|
+
return {k: collapse(v, keep, depth, truncate, _depth + 1) for k, v in data.items()}
|
|
34
|
+
|
|
35
|
+
return data
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _type_key(value):
|
|
39
|
+
if value is None:
|
|
40
|
+
return "null"
|
|
41
|
+
if isinstance(value, bool):
|
|
42
|
+
return "bool"
|
|
43
|
+
if isinstance(value, (int, float)):
|
|
44
|
+
return "number"
|
|
45
|
+
if isinstance(value, str):
|
|
46
|
+
return "str"
|
|
47
|
+
if isinstance(value, list):
|
|
48
|
+
return "list"
|
|
49
|
+
if isinstance(value, dict):
|
|
50
|
+
return "dict"
|
|
51
|
+
return type(value).__name__
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import json
|
|
2
|
+
|
|
3
|
+
import yaml
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def load(text, fmt):
|
|
7
|
+
if fmt == "json":
|
|
8
|
+
return json.loads(text)
|
|
9
|
+
if fmt == "yaml":
|
|
10
|
+
return yaml.safe_load(text)
|
|
11
|
+
raise ValueError(f"Unknown format: {fmt}")
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def dump(data, fmt):
|
|
15
|
+
if fmt == "json":
|
|
16
|
+
return json.dumps(data, indent=2)
|
|
17
|
+
if fmt == "yaml":
|
|
18
|
+
return yaml.dump(data, default_flow_style=False, allow_unicode=True)
|
|
19
|
+
raise ValueError(f"Unknown format: {fmt}")
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def load_ndjson(text):
|
|
23
|
+
return [json.loads(line) for line in text.splitlines() if line.strip()]
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def dump_ndjson(records):
|
|
27
|
+
return "\n".join(json.dumps(r) for r in records)
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: json-razor
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Reduces JSON, YAML, and NDJSON volume by collapsing repeated structures while preserving the schema.
|
|
5
|
+
License: MIT
|
|
6
|
+
Requires-Python: >=3.8
|
|
7
|
+
Description-Content-Type: text/markdown
|
|
8
|
+
Requires-Dist: PyYAML
|
|
9
|
+
|
|
10
|
+
# JSON's Razor — Cut the fat
|
|
11
|
+
|
|
12
|
+
[](https://github.com/rick-does/json-razor/actions/workflows/tests.yml)
|
|
13
|
+
|
|
14
|
+
Reduces JSON, YAML, and NDJSON volume by collapsing repeated structures while preserving the schema.
|
|
15
|
+
|
|
16
|
+
Large structured data files are hard to parse — not because the structure is complex, but because repetition obscures it. A list of 10,000 objects with identical shape tells you nothing more than a list of 1. JSON's Razor collapses that repetition to its minimum essential form: one representative example of each repeated structure, at every level of nesting.
|
|
17
|
+
|
|
18
|
+
The output is valid, parseable data in the same format as input — not a summary, not a schema definition. It just has far less volume.
|
|
19
|
+
|
|
20
|
+
---
|
|
21
|
+
|
|
22
|
+
## Install
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
pip install json-razor
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
---
|
|
29
|
+
|
|
30
|
+
## Usage
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
cat big.json | json-razor # stdin → stdout
|
|
34
|
+
json-razor big.json # file input → stdout
|
|
35
|
+
json-razor big.json -o small.json # file input → file output
|
|
36
|
+
json-razor big.yaml # auto-detected as YAML
|
|
37
|
+
json-razor app.log --format ndjson # NDJSON log file
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
### Options
|
|
41
|
+
|
|
42
|
+
| Flag | Default | Description |
|
|
43
|
+
|------|---------|-------------|
|
|
44
|
+
| `--keep N` | 1 | Number of examples to keep per repeated structure |
|
|
45
|
+
| `--depth N` | unlimited | Stop collapsing below this nesting depth |
|
|
46
|
+
| `--format` | auto | Force format: `json`, `yaml`, or `ndjson` |
|
|
47
|
+
| `--truncate N` | 100 | Max string length before truncating |
|
|
48
|
+
|
|
49
|
+
---
|
|
50
|
+
|
|
51
|
+
## How it works
|
|
52
|
+
|
|
53
|
+
**Arrays** — collapsed to one item. Mixed-type arrays keep one of each distinct type.
|
|
54
|
+
|
|
55
|
+
```json
|
|
56
|
+
// input
|
|
57
|
+
[{"id": 1, "name": "alice"}, {"id": 2, "name": "bob"}, {"id": 3, "name": "carol"}]
|
|
58
|
+
|
|
59
|
+
// output
|
|
60
|
+
[{"id": 1, "name": "alice"}]
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
**Mixed types** — one representative per JSON type (null, bool, number, string, array, object).
|
|
64
|
+
|
|
65
|
+
```json
|
|
66
|
+
// input
|
|
67
|
+
[1, "hello", {"id": 1}, null, true, [1, 2, 3]]
|
|
68
|
+
|
|
69
|
+
// output
|
|
70
|
+
[1, "hello", {"id": 1}, null, true, [1]]
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
**Nested structures** — collapsed recursively at every level.
|
|
74
|
+
|
|
75
|
+
**NDJSON** — collapsed across lines; one representative line kept.
|
|
76
|
+
|
|
77
|
+
**Nulls and empty values** — always preserved (`null`, `[]`, `{}`).
|
|
78
|
+
|
|
79
|
+
**Long strings** — truncated to a configurable preview.
|
|
80
|
+
|
|
81
|
+
---
|
|
82
|
+
|
|
83
|
+
## Supported formats
|
|
84
|
+
|
|
85
|
+
| Format | Auto-detected from |
|
|
86
|
+
|--------|--------------------|
|
|
87
|
+
| JSON | `.json` |
|
|
88
|
+
| YAML | `.yaml`, `.yml` |
|
|
89
|
+
| NDJSON | `.ndjson` |
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
pyproject.toml
|
|
3
|
+
json_razor/__init__.py
|
|
4
|
+
json_razor/cli.py
|
|
5
|
+
json_razor/core.py
|
|
6
|
+
json_razor/formats.py
|
|
7
|
+
json_razor.egg-info/PKG-INFO
|
|
8
|
+
json_razor.egg-info/SOURCES.txt
|
|
9
|
+
json_razor.egg-info/dependency_links.txt
|
|
10
|
+
json_razor.egg-info/entry_points.txt
|
|
11
|
+
json_razor.egg-info/requires.txt
|
|
12
|
+
json_razor.egg-info/top_level.txt
|
|
13
|
+
tests/test_core.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
PyYAML
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
json_razor
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "json-razor"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Reduces JSON, YAML, and NDJSON volume by collapsing repeated structures while preserving the schema."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.8"
|
|
11
|
+
license = {text = "MIT"}
|
|
12
|
+
dependencies = ["PyYAML"]
|
|
13
|
+
|
|
14
|
+
[project.scripts]
|
|
15
|
+
json-razor = "json_razor.cli:main"
|
|
16
|
+
|
|
17
|
+
[tool.setuptools.packages.find]
|
|
18
|
+
where = ["."]
|
|
19
|
+
include = ["json_razor*"]
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
from json_razor.core import collapse
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def test_array_collapse():
|
|
6
|
+
data = [{"id": 1}, {"id": 2}, {"id": 3}]
|
|
7
|
+
assert collapse(data) == [{"id": 1}]
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def test_array_keep_n():
|
|
11
|
+
data = [{"id": 1}, {"id": 2}, {"id": 3}]
|
|
12
|
+
assert collapse(data, keep=2) == [{"id": 1}, {"id": 2}]
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def test_mixed_type_array():
|
|
16
|
+
data = [1, "hello", {"id": 1}, None]
|
|
17
|
+
result = collapse(data)
|
|
18
|
+
types = {type(x) for x in result}
|
|
19
|
+
assert types == {int, str, dict, type(None)}
|
|
20
|
+
assert len(result) == 4
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def test_single_item_array_unchanged():
|
|
24
|
+
data = [{"id": 1}]
|
|
25
|
+
assert collapse(data) == [{"id": 1}]
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def test_empty_array_preserved():
|
|
29
|
+
assert collapse([]) == []
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def test_empty_object_preserved():
|
|
33
|
+
assert collapse({}) == {}
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def test_null_preserved():
|
|
37
|
+
assert collapse(None) is None
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def test_nested_array_collapse():
|
|
41
|
+
data = {"items": [{"a": 1}, {"a": 2}, {"a": 3}]}
|
|
42
|
+
assert collapse(data) == {"items": [{"a": 1}]}
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def test_string_truncation():
|
|
46
|
+
long = "x" * 200
|
|
47
|
+
result = collapse(long, truncate=100)
|
|
48
|
+
assert result == "x" * 100 + "..."
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def test_string_no_truncation_when_short():
|
|
52
|
+
s = "hello"
|
|
53
|
+
assert collapse(s, truncate=100) == "hello"
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def test_depth_limit():
|
|
57
|
+
data = {"a": [1, 2, 3]}
|
|
58
|
+
result = collapse(data, depth=1)
|
|
59
|
+
assert result == {"a": [1, 2, 3]}
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def test_bool_not_confused_with_int():
|
|
63
|
+
data = [True, False, 1, 0]
|
|
64
|
+
result = collapse(data)
|
|
65
|
+
types = {type(x) for x in result}
|
|
66
|
+
assert bool in types
|
|
67
|
+
assert int in types
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def test_object_keys_preserved():
|
|
71
|
+
data = {"name": "foo", "age": 30, "items": [1, 2, 3]}
|
|
72
|
+
result = collapse(data)
|
|
73
|
+
assert "name" in result
|
|
74
|
+
assert "age" in result
|
|
75
|
+
assert result["items"] == [1]
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def test_ndjson_style():
|
|
79
|
+
records = [{"id": i, "val": i * 2} for i in range(100)]
|
|
80
|
+
result = collapse(records)
|
|
81
|
+
assert len(result) == 1
|
|
82
|
+
assert result[0] == {"id": 0, "val": 0}
|