headson 0.6.2__tar.gz → 0.6.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of headson might be problematic. Click here for more details.
- {headson-0.6.2 → headson-0.6.3}/Cargo.lock +1 -1
- {headson-0.6.2 → headson-0.6.3}/Cargo.toml +1 -1
- {headson-0.6.2 → headson-0.6.3}/PKG-INFO +29 -7
- {headson-0.6.2 → headson-0.6.3}/README.md +28 -6
- headson-0.6.3/docs/assets/tapes/demo.gif +0 -0
- {headson-0.6.2 → headson-0.6.3}/pyproject.toml +1 -1
- {headson-0.6.2 → headson-0.6.3}/python/Cargo.lock +2 -2
- {headson-0.6.2 → headson-0.6.3}/python/Cargo.toml +1 -1
- {headson-0.6.2 → headson-0.6.3}/python/README.md +6 -2
- {headson-0.6.2 → headson-0.6.3}/python/src/lib.rs +9 -7
- headson-0.6.3/src/ingest/json.rs +37 -0
- headson-0.6.3/src/ingest/mod.rs +64 -0
- headson-0.6.3/src/ingest/text.rs +45 -0
- headson-0.6.3/src/ingest/yaml.rs +39 -0
- {headson-0.6.2 → headson-0.6.3}/src/lib.rs +27 -0
- {headson-0.6.2 → headson-0.6.3}/src/main.rs +37 -9
- {headson-0.6.2 → headson-0.6.3}/src/serialization/fileset.rs +2 -1
- {headson-0.6.2 → headson-0.6.3}/src/serialization/mod.rs +63 -3
- {headson-0.6.2 → headson-0.6.3}/src/serialization/templates/mod.rs +3 -0
- headson-0.6.3/src/serialization/templates/text.rs +43 -0
- {headson-0.6.2 → headson-0.6.3}/src/serialization/types.rs +1 -0
- headson-0.6.3/src/text_ingest/mod.rs +192 -0
- headson-0.6.2/docs/assets/tapes/demo.gif +0 -0
- headson-0.6.2/src/ingest/mod.rs +0 -120
- {headson-0.6.2 → headson-0.6.3}/docs/assets/algorithm.svg +0 -0
- {headson-0.6.2 → headson-0.6.3}/docs/assets/logo.png +0 -0
- {headson-0.6.2 → headson-0.6.3}/docs/assets/logo.svg +0 -0
- {headson-0.6.2 → headson-0.6.3}/python/headson/__init__.py +0 -0
- {headson-0.6.2 → headson-0.6.3}/src/format.rs +0 -0
- {headson-0.6.2 → headson-0.6.3}/src/json_ingest/builder.rs +0 -0
- {headson-0.6.2 → headson-0.6.3}/src/json_ingest/mod.rs +0 -0
- {headson-0.6.2 → headson-0.6.3}/src/json_ingest/samplers/default.rs +0 -0
- {headson-0.6.2 → headson-0.6.3}/src/json_ingest/samplers/head.rs +0 -0
- {headson-0.6.2 → headson-0.6.3}/src/json_ingest/samplers/mod.rs +0 -0
- {headson-0.6.2 → headson-0.6.3}/src/json_ingest/samplers/tail.rs +0 -0
- {headson-0.6.2 → headson-0.6.3}/src/order/build.rs +0 -0
- {headson-0.6.2 → headson-0.6.3}/src/order/mod.rs +0 -0
- {headson-0.6.2 → headson-0.6.3}/src/order/scoring.rs +0 -0
- {headson-0.6.2 → headson-0.6.3}/src/order/snapshots/headson__order__build__tests__order_empty_array_order.snap +0 -0
- {headson-0.6.2 → headson-0.6.3}/src/order/snapshots/headson__order__build__tests__order_single_string_array_order.snap +0 -0
- {headson-0.6.2 → headson-0.6.3}/src/order/types.rs +0 -0
- {headson-0.6.2 → headson-0.6.3}/src/serialization/color.rs +0 -0
- {headson-0.6.2 → headson-0.6.3}/src/serialization/output.rs +0 -0
- {headson-0.6.2 → headson-0.6.3}/src/serialization/snapshots/headson__serialization__tests__arena_render_empty.snap +0 -0
- {headson-0.6.2 → headson-0.6.3}/src/serialization/snapshots/headson__serialization__tests__arena_render_empty_yaml.snap +0 -0
- {headson-0.6.2 → headson-0.6.3}/src/serialization/snapshots/headson__serialization__tests__arena_render_single.snap +0 -0
- {headson-0.6.2 → headson-0.6.3}/src/serialization/snapshots/headson__serialization__tests__arena_render_single_yaml.snap +0 -0
- {headson-0.6.2 → headson-0.6.3}/src/serialization/snapshots/headson__serialization__tests__array_internal_gaps_yaml.snap +0 -0
- {headson-0.6.2 → headson-0.6.3}/src/serialization/snapshots/headson__serialization__tests__array_omitted_js_head.snap +0 -0
- {headson-0.6.2 → headson-0.6.3}/src/serialization/snapshots/headson__serialization__tests__array_omitted_js_tail.snap +0 -0
- {headson-0.6.2 → headson-0.6.3}/src/serialization/snapshots/headson__serialization__tests__array_omitted_pseudo_head.snap +0 -0
- {headson-0.6.2 → headson-0.6.3}/src/serialization/snapshots/headson__serialization__tests__array_omitted_pseudo_tail.snap +0 -0
- {headson-0.6.2 → headson-0.6.3}/src/serialization/snapshots/headson__serialization__tests__array_omitted_yaml_head.snap +0 -0
- {headson-0.6.2 → headson-0.6.3}/src/serialization/snapshots/headson__serialization__tests__array_omitted_yaml_tail.snap +0 -0
- {headson-0.6.2 → headson-0.6.3}/src/serialization/snapshots/headson__serialization__tests__inline_open_array_in_object_json.snap +0 -0
- {headson-0.6.2 → headson-0.6.3}/src/serialization/snapshots/headson__serialization__tests__inline_open_array_in_object_yaml.snap +0 -0
- {headson-0.6.2 → headson-0.6.3}/src/serialization/templates/core.rs +0 -0
- {headson-0.6.2 → headson-0.6.3}/src/serialization/templates/js.rs +0 -0
- {headson-0.6.2 → headson-0.6.3}/src/serialization/templates/json.rs +0 -0
- {headson-0.6.2 → headson-0.6.3}/src/serialization/templates/pseudo.rs +0 -0
- {headson-0.6.2 → headson-0.6.3}/src/serialization/templates/yaml.rs +0 -0
- {headson-0.6.2 → headson-0.6.3}/src/snapshots/headson__order__tests__order_empty_array_order.snap +0 -0
- {headson-0.6.2 → headson-0.6.3}/src/snapshots/headson__order__tests__order_single_string_array_order.snap +0 -0
- {headson-0.6.2 → headson-0.6.3}/src/utils/graph.rs +0 -0
- {headson-0.6.2 → headson-0.6.3}/src/utils/json.rs +0 -0
- {headson-0.6.2 → headson-0.6.3}/src/utils/mod.rs +0 -0
- {headson-0.6.2 → headson-0.6.3}/src/utils/search.rs +0 -0
- {headson-0.6.2 → headson-0.6.3}/src/utils/text.rs +0 -0
- {headson-0.6.2 → headson-0.6.3}/src/utils/tree_arena.rs +0 -0
- {headson-0.6.2 → headson-0.6.3}/src/yaml_ingest/mod.rs +0 -0
- {headson-0.6.2 → headson-0.6.3}/tests/fixtures/json/JSONTestSuite/LICENSE +0 -0
- {headson-0.6.2 → headson-0.6.3}/tests/fixtures/json/JSONTestSuite/README.md +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: headson
|
|
3
|
-
Version: 0.6.
|
|
3
|
+
Version: 0.6.3
|
|
4
4
|
Classifier: Programming Language :: Python
|
|
5
5
|
Classifier: Programming Language :: Python :: 3
|
|
6
6
|
Classifier: Programming Language :: Rust
|
|
@@ -20,7 +20,7 @@ Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
|
|
|
20
20
|
<br/>
|
|
21
21
|
</p>
|
|
22
22
|
|
|
23
|
-
|
|
23
|
+
`heal`/`tail` for JSON, YAML - but structure‑aware. Get a compact preview that shows both the shape and representative values of your data, all within a strict character budget. (Just like `head`/`tail`, `headson` can also work with unstructured text files.)
|
|
24
24
|
|
|
25
25
|
Available as:
|
|
26
26
|
- CLI (see [Usage](#usage))
|
|
@@ -41,10 +41,11 @@ From source:
|
|
|
41
41
|
## Features
|
|
42
42
|
|
|
43
43
|
- Budgeted output: specify exactly how much you want to see
|
|
44
|
-
- Output formats: `auto | json | yaml`
|
|
44
|
+
- Output formats: `auto | json | yaml | text`
|
|
45
45
|
- Styles: `strict | default | detailed`
|
|
46
46
|
- JSON family: `strict` → strict JSON, `default` → human‑friendly Pseudo, `detailed` → JS with inline comments
|
|
47
47
|
- YAML: always YAML; `strict` has no comments, `default` uses “# …”, `detailed` uses “# N more …”
|
|
48
|
+
- Text: prints raw lines. In `default` style, omissions are shown as a single line `…`; in `detailed`, as `… N more lines …`. `strict` omits array‑level summaries.
|
|
48
49
|
- Multiple inputs: preview many files at once with a shared or per‑file budget
|
|
49
50
|
- Fast: processes gigabyte‑scale files in seconds (mostly disk‑bound)
|
|
50
51
|
- Available as a CLI app and as a Python library
|
|
@@ -68,12 +69,12 @@ Common flags:
|
|
|
68
69
|
|
|
69
70
|
- `-n, --budget <BYTES>`: per‑file output budget. For multiple inputs, default total budget is `<BYTES> * number_of_inputs`.
|
|
70
71
|
- `-N, --global-budget <BYTES>`: total output budget across all inputs. With `--budget`, the effective total is the smaller of the two.
|
|
71
|
-
- `-f, --format <auto|json|yaml>`: output format (default: `auto`).
|
|
72
|
-
- Auto: stdin → JSON family; filesets → per‑file based on extension (`.json` → JSON family, `.yaml`/`.yml` → YAML).
|
|
72
|
+
- `-f, --format <auto|json|yaml|text>`: output format (default: `auto`).
|
|
73
|
+
- Auto: stdin → JSON family; filesets → per‑file based on extension (`.json` → JSON family, `.yaml`/`.yml` → YAML, unknown → Text).
|
|
73
74
|
- `-t, --template <strict|default|detailed>`: output style (default: `default`).
|
|
74
75
|
- JSON family: `strict` → strict JSON; `default` → Pseudo; `detailed` → JS with inline comments.
|
|
75
76
|
- YAML: always YAML; style only affects comments (`strict` none, `default` “# …”, `detailed` “# N more …”).
|
|
76
|
-
- `-i, --input-format <json|yaml>`: ingestion format (default: `json`). For filesets in `auto` format, ingestion is chosen by extensions.
|
|
77
|
+
- `-i, --input-format <json|yaml|text>`: ingestion format (default: `json`). For filesets in `auto` format, ingestion is chosen by extensions.
|
|
77
78
|
- `-m, --compact`: no indentation, no spaces, no newlines
|
|
78
79
|
- `--no-newline`: single line output
|
|
79
80
|
- `--no-space`: no space after `:` in objects
|
|
@@ -86,7 +87,8 @@ Notes:
|
|
|
86
87
|
|
|
87
88
|
- Multiple inputs:
|
|
88
89
|
- With newlines enabled, file sections are rendered with human‑readable headers. In compact/single‑line modes, headers are omitted.
|
|
89
|
-
|
|
90
|
+
- In `--format auto`, each file uses its own best format: JSON family for `.json`, YAML for `.yaml`/`.yml`.
|
|
91
|
+
- Unknown extensions are treated as Text (raw lines) — safe for logs and `.txt` files.
|
|
90
92
|
- `--global-budget` may truncate or omit entire files to respect the total budget.
|
|
91
93
|
- The tool finds the largest preview that fits the budget; even if extremely tight, you still get a minimal, valid preview.
|
|
92
94
|
- Directories and binary files are ignored; a notice is printed to stderr for each. Stdin reads the stream as‑is.
|
|
@@ -110,6 +112,25 @@ Quick one‑liners:
|
|
|
110
112
|
|
|
111
113
|
headson -n 400 -f yaml -t detailed config.yaml
|
|
112
114
|
|
|
115
|
+
### Text mode
|
|
116
|
+
|
|
117
|
+
- Single file (auto):
|
|
118
|
+
|
|
119
|
+
headson -n 200 notes.txt
|
|
120
|
+
|
|
121
|
+
- Force Text ingest/output (useful when mixing with other extensions):
|
|
122
|
+
|
|
123
|
+
headson -n 200 -i text -f text notes.txt
|
|
124
|
+
|
|
125
|
+
- Many text files (fileset):
|
|
126
|
+
|
|
127
|
+
headson -n 800 -i text -f text logs/*.txt
|
|
128
|
+
|
|
129
|
+
- Styles on Text:
|
|
130
|
+
- default: omission as a standalone `…` line.
|
|
131
|
+
- detailed: omission as `… N more lines …`.
|
|
132
|
+
- strict: no array‑level omission line (individual long lines may still truncate with `…`).
|
|
133
|
+
|
|
113
134
|
Show help:
|
|
114
135
|
|
|
115
136
|
headson --help
|
|
@@ -157,6 +178,7 @@ Regenerate locally:
|
|
|
157
178
|
- Run: cargo make tapes
|
|
158
179
|
- Outputs are written to docs/assets/tapes
|
|
159
180
|
|
|
181
|
+
|
|
160
182
|
## Python Bindings
|
|
161
183
|
|
|
162
184
|
A thin Python extension module is available on PyPI as `headson`.
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
<br/>
|
|
7
7
|
</p>
|
|
8
8
|
|
|
9
|
-
|
|
9
|
+
`heal`/`tail` for JSON, YAML - but structure‑aware. Get a compact preview that shows both the shape and representative values of your data, all within a strict character budget. (Just like `head`/`tail`, `headson` can also work with unstructured text files.)
|
|
10
10
|
|
|
11
11
|
Available as:
|
|
12
12
|
- CLI (see [Usage](#usage))
|
|
@@ -27,10 +27,11 @@ From source:
|
|
|
27
27
|
## Features
|
|
28
28
|
|
|
29
29
|
- Budgeted output: specify exactly how much you want to see
|
|
30
|
-
- Output formats: `auto | json | yaml`
|
|
30
|
+
- Output formats: `auto | json | yaml | text`
|
|
31
31
|
- Styles: `strict | default | detailed`
|
|
32
32
|
- JSON family: `strict` → strict JSON, `default` → human‑friendly Pseudo, `detailed` → JS with inline comments
|
|
33
33
|
- YAML: always YAML; `strict` has no comments, `default` uses “# …”, `detailed` uses “# N more …”
|
|
34
|
+
- Text: prints raw lines. In `default` style, omissions are shown as a single line `…`; in `detailed`, as `… N more lines …`. `strict` omits array‑level summaries.
|
|
34
35
|
- Multiple inputs: preview many files at once with a shared or per‑file budget
|
|
35
36
|
- Fast: processes gigabyte‑scale files in seconds (mostly disk‑bound)
|
|
36
37
|
- Available as a CLI app and as a Python library
|
|
@@ -54,12 +55,12 @@ Common flags:
|
|
|
54
55
|
|
|
55
56
|
- `-n, --budget <BYTES>`: per‑file output budget. For multiple inputs, default total budget is `<BYTES> * number_of_inputs`.
|
|
56
57
|
- `-N, --global-budget <BYTES>`: total output budget across all inputs. With `--budget`, the effective total is the smaller of the two.
|
|
57
|
-
- `-f, --format <auto|json|yaml>`: output format (default: `auto`).
|
|
58
|
-
- Auto: stdin → JSON family; filesets → per‑file based on extension (`.json` → JSON family, `.yaml`/`.yml` → YAML).
|
|
58
|
+
- `-f, --format <auto|json|yaml|text>`: output format (default: `auto`).
|
|
59
|
+
- Auto: stdin → JSON family; filesets → per‑file based on extension (`.json` → JSON family, `.yaml`/`.yml` → YAML, unknown → Text).
|
|
59
60
|
- `-t, --template <strict|default|detailed>`: output style (default: `default`).
|
|
60
61
|
- JSON family: `strict` → strict JSON; `default` → Pseudo; `detailed` → JS with inline comments.
|
|
61
62
|
- YAML: always YAML; style only affects comments (`strict` none, `default` “# …”, `detailed` “# N more …”).
|
|
62
|
-
- `-i, --input-format <json|yaml>`: ingestion format (default: `json`). For filesets in `auto` format, ingestion is chosen by extensions.
|
|
63
|
+
- `-i, --input-format <json|yaml|text>`: ingestion format (default: `json`). For filesets in `auto` format, ingestion is chosen by extensions.
|
|
63
64
|
- `-m, --compact`: no indentation, no spaces, no newlines
|
|
64
65
|
- `--no-newline`: single line output
|
|
65
66
|
- `--no-space`: no space after `:` in objects
|
|
@@ -72,7 +73,8 @@ Notes:
|
|
|
72
73
|
|
|
73
74
|
- Multiple inputs:
|
|
74
75
|
- With newlines enabled, file sections are rendered with human‑readable headers. In compact/single‑line modes, headers are omitted.
|
|
75
|
-
|
|
76
|
+
- In `--format auto`, each file uses its own best format: JSON family for `.json`, YAML for `.yaml`/`.yml`.
|
|
77
|
+
- Unknown extensions are treated as Text (raw lines) — safe for logs and `.txt` files.
|
|
76
78
|
- `--global-budget` may truncate or omit entire files to respect the total budget.
|
|
77
79
|
- The tool finds the largest preview that fits the budget; even if extremely tight, you still get a minimal, valid preview.
|
|
78
80
|
- Directories and binary files are ignored; a notice is printed to stderr for each. Stdin reads the stream as‑is.
|
|
@@ -96,6 +98,25 @@ Quick one‑liners:
|
|
|
96
98
|
|
|
97
99
|
headson -n 400 -f yaml -t detailed config.yaml
|
|
98
100
|
|
|
101
|
+
### Text mode
|
|
102
|
+
|
|
103
|
+
- Single file (auto):
|
|
104
|
+
|
|
105
|
+
headson -n 200 notes.txt
|
|
106
|
+
|
|
107
|
+
- Force Text ingest/output (useful when mixing with other extensions):
|
|
108
|
+
|
|
109
|
+
headson -n 200 -i text -f text notes.txt
|
|
110
|
+
|
|
111
|
+
- Many text files (fileset):
|
|
112
|
+
|
|
113
|
+
headson -n 800 -i text -f text logs/*.txt
|
|
114
|
+
|
|
115
|
+
- Styles on Text:
|
|
116
|
+
- default: omission as a standalone `…` line.
|
|
117
|
+
- detailed: omission as `… N more lines …`.
|
|
118
|
+
- strict: no array‑level omission line (individual long lines may still truncate with `…`).
|
|
119
|
+
|
|
99
120
|
Show help:
|
|
100
121
|
|
|
101
122
|
headson --help
|
|
@@ -143,6 +164,7 @@ Regenerate locally:
|
|
|
143
164
|
- Run: cargo make tapes
|
|
144
165
|
- Outputs are written to docs/assets/tapes
|
|
145
166
|
|
|
167
|
+
|
|
146
168
|
## Python Bindings
|
|
147
169
|
|
|
148
170
|
A thin Python extension module is available on PyPI as `headson`.
|
|
Binary file
|
|
@@ -214,7 +214,7 @@ dependencies = [
|
|
|
214
214
|
|
|
215
215
|
[[package]]
|
|
216
216
|
name = "headson"
|
|
217
|
-
version = "0.6.
|
|
217
|
+
version = "0.6.3"
|
|
218
218
|
dependencies = [
|
|
219
219
|
"anyhow",
|
|
220
220
|
"clap",
|
|
@@ -228,7 +228,7 @@ dependencies = [
|
|
|
228
228
|
|
|
229
229
|
[[package]]
|
|
230
230
|
name = "headson-python"
|
|
231
|
-
version = "0.6.
|
|
231
|
+
version = "0.6.3"
|
|
232
232
|
dependencies = [
|
|
233
233
|
"anyhow",
|
|
234
234
|
"headson",
|
|
@@ -5,9 +5,9 @@ Minimal Python API for the `headson` preview renderer.
|
|
|
5
5
|
API
|
|
6
6
|
|
|
7
7
|
- `headson.summarize(text: str, *, format: str = "auto", style: str = "default", input_format: str = "json", character_budget: int | None = None, skew: str = "balanced") -> str`
|
|
8
|
-
- `format`: output format — `"auto" | "json" | "yaml"`.
|
|
8
|
+
- `format`: output format — `"auto" | "json" | "yaml" | "text"`.
|
|
9
9
|
- `style`: output style — `"strict" | "default" | "detailed"`.
|
|
10
|
-
- `input_format`: ingestion format — `"json" | "yaml"`.
|
|
10
|
+
- `input_format`: ingestion format — `"json" | "yaml" | "text"`.
|
|
11
11
|
- `character_budget`: maximum output size in characters (defaults to 500 if not set).
|
|
12
12
|
- `skew`: one of `"balanced" | "head" | "tail"`.
|
|
13
13
|
- `balanced` (default), `head` keeps first N, `tail` keeps last N. Display styles place omission markers accordingly; strict JSON remains unannotated.
|
|
@@ -36,6 +36,10 @@ print(headson.summarize(doc, format="yaml", style="default", input_format="yaml"
|
|
|
36
36
|
print(headson.summarize(doc, format="yaml", style="detailed", input_format="yaml", character_budget=60))
|
|
37
37
|
|
|
38
38
|
# Note: tail mode affects only display styles; strict JSON stays strict.
|
|
39
|
+
|
|
40
|
+
# Text: render raw lines with omission markers depending on style
|
|
41
|
+
text = "one\ntwo\nthree\n"
|
|
42
|
+
print(headson.summarize(text, format="text", style="default", input_format="text", character_budget=10))
|
|
39
43
|
```
|
|
40
44
|
|
|
41
45
|
Install for development:
|
|
@@ -31,7 +31,11 @@ fn map_output_template(format: &str, style: Style) -> Result<OutputTemplate> {
|
|
|
31
31
|
"auto" => Ok(map_json_template_for_style(style)), // stdin => JSON family
|
|
32
32
|
"json" => Ok(map_json_template_for_style(style)),
|
|
33
33
|
"yaml" | "yml" => Ok(OutputTemplate::Yaml),
|
|
34
|
-
|
|
34
|
+
"text" => Ok(OutputTemplate::Text),
|
|
35
|
+
other => bail!(
|
|
36
|
+
"unknown format: {} (expected 'auto' | 'json' | 'yaml' | 'text')",
|
|
37
|
+
other
|
|
38
|
+
),
|
|
35
39
|
}
|
|
36
40
|
}
|
|
37
41
|
|
|
@@ -107,14 +111,12 @@ fn summarize(
|
|
|
107
111
|
let input = text.as_bytes().to_vec();
|
|
108
112
|
py.detach(|| {
|
|
109
113
|
match input_format.to_ascii_lowercase().as_str() {
|
|
110
|
-
"json" => headson_core::headson(input, &cfg, &prio, budget)
|
|
114
|
+
"json" => headson_core::headson(input, &cfg, &prio, budget).map_err(to_pyerr),
|
|
115
|
+
"yaml" | "yml" => headson_core::headson_yaml(input, &cfg, &prio, budget)
|
|
111
116
|
.map_err(to_pyerr),
|
|
112
|
-
"
|
|
113
|
-
headson_core::headson_yaml(input, &cfg, &prio, budget)
|
|
114
|
-
.map_err(to_pyerr)
|
|
115
|
-
}
|
|
117
|
+
"text" => headson_core::headson_text(input, &cfg, &prio, budget).map_err(to_pyerr),
|
|
116
118
|
other => Err(to_pyerr(anyhow::anyhow!(
|
|
117
|
-
"unknown input_format: {} (expected 'json' | 'yaml')",
|
|
119
|
+
"unknown input_format: {} (expected 'json' | 'yaml' | 'text')",
|
|
118
120
|
other
|
|
119
121
|
))),
|
|
120
122
|
}
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
use anyhow::Result;
|
|
2
|
+
|
|
3
|
+
use super::Ingest;
|
|
4
|
+
use crate::order::PriorityConfig;
|
|
5
|
+
use crate::utils::tree_arena::JsonTreeArena as TreeArena;
|
|
6
|
+
|
|
7
|
+
/// JSON adapter for the ingest boundary. Delegates to the existing
|
|
8
|
+
/// JSON builder to produce the neutral `TreeArena`.
|
|
9
|
+
pub struct JsonIngest;
|
|
10
|
+
|
|
11
|
+
impl Ingest for JsonIngest {
|
|
12
|
+
fn parse_one(bytes: Vec<u8>, cfg: &PriorityConfig) -> Result<TreeArena> {
|
|
13
|
+
crate::json_ingest::build_json_tree_arena_from_bytes(bytes, cfg)
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
fn parse_many(
|
|
17
|
+
inputs: Vec<(String, Vec<u8>)>,
|
|
18
|
+
cfg: &PriorityConfig,
|
|
19
|
+
) -> Result<TreeArena> {
|
|
20
|
+
crate::json_ingest::build_json_tree_arena_from_many(inputs, cfg)
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
/// Convenience functions for the JSON ingest path.
|
|
25
|
+
pub fn parse_json_one(
|
|
26
|
+
bytes: Vec<u8>,
|
|
27
|
+
cfg: &PriorityConfig,
|
|
28
|
+
) -> Result<TreeArena> {
|
|
29
|
+
JsonIngest::parse_one(bytes, cfg)
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
pub fn parse_json_many(
|
|
33
|
+
inputs: Vec<(String, Vec<u8>)>,
|
|
34
|
+
cfg: &PriorityConfig,
|
|
35
|
+
) -> Result<TreeArena> {
|
|
36
|
+
JsonIngest::parse_many(inputs, cfg)
|
|
37
|
+
}
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
use anyhow::Result;
|
|
2
|
+
|
|
3
|
+
use crate::order::PriorityConfig;
|
|
4
|
+
use crate::utils::tree_arena::JsonTreeArena as TreeArena;
|
|
5
|
+
|
|
6
|
+
/// Format-agnostic ingest boundary. Other formats can implement this trait
|
|
7
|
+
/// to produce the neutral TreeArena without going through JSON first.
|
|
8
|
+
pub trait Ingest {
|
|
9
|
+
fn parse_one(bytes: Vec<u8>, cfg: &PriorityConfig) -> Result<TreeArena>;
|
|
10
|
+
fn parse_many(
|
|
11
|
+
inputs: Vec<(String, Vec<u8>)>,
|
|
12
|
+
cfg: &PriorityConfig,
|
|
13
|
+
) -> Result<TreeArena>;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
// Submodules for per-format adapters
|
|
17
|
+
pub mod json;
|
|
18
|
+
pub mod text;
|
|
19
|
+
pub mod yaml;
|
|
20
|
+
|
|
21
|
+
// Re-export commonly used helpers for convenience (keep adapter types private)
|
|
22
|
+
pub use json::{parse_json_many, parse_json_one};
|
|
23
|
+
pub use text::{parse_text_many, parse_text_one};
|
|
24
|
+
pub use yaml::{parse_yaml_many, parse_yaml_one};
|
|
25
|
+
|
|
26
|
+
#[cfg(test)]
|
|
27
|
+
mod tests {
|
|
28
|
+
use super::*;
|
|
29
|
+
use crate::order::NodeKind;
|
|
30
|
+
|
|
31
|
+
#[test]
|
|
32
|
+
fn parse_one_basic_shape() {
|
|
33
|
+
let arena = parse_json_one(
|
|
34
|
+
b"{\"a\":1}".to_vec(),
|
|
35
|
+
&PriorityConfig::new(usize::MAX, usize::MAX),
|
|
36
|
+
)
|
|
37
|
+
.unwrap();
|
|
38
|
+
assert!(
|
|
39
|
+
!arena.is_fileset,
|
|
40
|
+
"single input should not be marked fileset"
|
|
41
|
+
);
|
|
42
|
+
let root = arena.root_id;
|
|
43
|
+
assert_eq!(arena.nodes[root].kind, NodeKind::Object);
|
|
44
|
+
assert_eq!(arena.nodes[root].object_len.unwrap_or(1), 1);
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
#[test]
|
|
48
|
+
fn parse_many_sets_fileset_root() {
|
|
49
|
+
let inputs = vec![
|
|
50
|
+
("a.json".to_string(), b"{}".to_vec()),
|
|
51
|
+
("b.json".to_string(), b"[]".to_vec()),
|
|
52
|
+
];
|
|
53
|
+
let arena = parse_json_many(
|
|
54
|
+
inputs,
|
|
55
|
+
&PriorityConfig::new(usize::MAX, usize::MAX),
|
|
56
|
+
)
|
|
57
|
+
.unwrap();
|
|
58
|
+
assert!(arena.is_fileset, "multi input should be marked fileset");
|
|
59
|
+
let root = arena.root_id;
|
|
60
|
+
assert_eq!(arena.nodes[root].kind, NodeKind::Object);
|
|
61
|
+
// Expect two top-level entries
|
|
62
|
+
assert_eq!(arena.nodes[root].object_len.unwrap_or(0), 2);
|
|
63
|
+
}
|
|
64
|
+
}
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
use anyhow::Result;
|
|
2
|
+
|
|
3
|
+
use super::Ingest;
|
|
4
|
+
use crate::order::PriorityConfig;
|
|
5
|
+
use crate::utils::tree_arena::JsonTreeArena as TreeArena;
|
|
6
|
+
|
|
7
|
+
/// Text adapter for the ingest boundary.
|
|
8
|
+
///
|
|
9
|
+
/// Behavior:
|
|
10
|
+
/// - Decodes input as UTF-8 using `String::from_utf8_lossy`, which replaces
|
|
11
|
+
/// any invalid UTF-8 byte sequences with the Unicode replacement character
|
|
12
|
+
/// U+FFFD (this is what “lossy” means here).
|
|
13
|
+
/// - Normalizes newlines to `\n`, splits into logical lines, and builds an
|
|
14
|
+
/// array of string nodes.
|
|
15
|
+
/// - For multi-file inputs, produces a fileset object where each value is the
|
|
16
|
+
/// corresponding file’s array of lines.
|
|
17
|
+
pub struct TextIngest;
|
|
18
|
+
|
|
19
|
+
impl Ingest for TextIngest {
|
|
20
|
+
fn parse_one(bytes: Vec<u8>, cfg: &PriorityConfig) -> Result<TreeArena> {
|
|
21
|
+
crate::text_ingest::build_text_tree_arena_from_bytes(bytes, cfg)
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
fn parse_many(
|
|
25
|
+
inputs: Vec<(String, Vec<u8>)>,
|
|
26
|
+
cfg: &PriorityConfig,
|
|
27
|
+
) -> Result<TreeArena> {
|
|
28
|
+
crate::text_ingest::build_text_tree_arena_from_many(inputs, cfg)
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
/// Convenience functions for the Text ingest path.
|
|
33
|
+
pub fn parse_text_one(
|
|
34
|
+
bytes: Vec<u8>,
|
|
35
|
+
cfg: &PriorityConfig,
|
|
36
|
+
) -> Result<TreeArena> {
|
|
37
|
+
TextIngest::parse_one(bytes, cfg)
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
pub fn parse_text_many(
|
|
41
|
+
inputs: Vec<(String, Vec<u8>)>,
|
|
42
|
+
cfg: &PriorityConfig,
|
|
43
|
+
) -> Result<TreeArena> {
|
|
44
|
+
TextIngest::parse_many(inputs, cfg)
|
|
45
|
+
}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
use anyhow::Result;
|
|
2
|
+
|
|
3
|
+
use super::Ingest;
|
|
4
|
+
use crate::order::PriorityConfig;
|
|
5
|
+
use crate::utils::tree_arena::JsonTreeArena as TreeArena;
|
|
6
|
+
|
|
7
|
+
/// YAML adapter for the ingest boundary. Parses YAML using `yaml-rust2`
|
|
8
|
+
/// and builds the neutral `TreeArena`. Multi-document YAML in a single
|
|
9
|
+
/// input is wrapped in an array; multi-file inputs produce a fileset
|
|
10
|
+
/// object whose values may be arrays when a file contains multiple docs.
|
|
11
|
+
pub struct YamlIngest;
|
|
12
|
+
|
|
13
|
+
impl Ingest for YamlIngest {
|
|
14
|
+
fn parse_one(bytes: Vec<u8>, cfg: &PriorityConfig) -> Result<TreeArena> {
|
|
15
|
+
crate::yaml_ingest::build_yaml_tree_arena_from_bytes(bytes, cfg)
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
fn parse_many(
|
|
19
|
+
inputs: Vec<(String, Vec<u8>)>,
|
|
20
|
+
cfg: &PriorityConfig,
|
|
21
|
+
) -> Result<TreeArena> {
|
|
22
|
+
crate::yaml_ingest::build_yaml_tree_arena_from_many(inputs, cfg)
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
/// Convenience functions for the YAML ingest path.
|
|
27
|
+
pub fn parse_yaml_one(
|
|
28
|
+
bytes: Vec<u8>,
|
|
29
|
+
cfg: &PriorityConfig,
|
|
30
|
+
) -> Result<TreeArena> {
|
|
31
|
+
YamlIngest::parse_one(bytes, cfg)
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
pub fn parse_yaml_many(
|
|
35
|
+
inputs: Vec<(String, Vec<u8>)>,
|
|
36
|
+
cfg: &PriorityConfig,
|
|
37
|
+
) -> Result<TreeArena> {
|
|
38
|
+
YamlIngest::parse_many(inputs, cfg)
|
|
39
|
+
}
|
|
@@ -25,6 +25,7 @@ mod ingest;
|
|
|
25
25
|
mod json_ingest;
|
|
26
26
|
mod order;
|
|
27
27
|
mod serialization;
|
|
28
|
+
mod text_ingest;
|
|
28
29
|
mod utils;
|
|
29
30
|
mod yaml_ingest;
|
|
30
31
|
pub use order::types::{ArrayBias, ArraySamplerStrategy};
|
|
@@ -87,6 +88,32 @@ pub fn headson_many_yaml(
|
|
|
87
88
|
Ok(out)
|
|
88
89
|
}
|
|
89
90
|
|
|
91
|
+
/// Same as `headson` but using the Text ingest path.
|
|
92
|
+
pub fn headson_text(
|
|
93
|
+
input: Vec<u8>,
|
|
94
|
+
config: &RenderConfig,
|
|
95
|
+
priority_cfg: &PriorityConfig,
|
|
96
|
+
budget: usize,
|
|
97
|
+
) -> Result<String> {
|
|
98
|
+
let arena = crate::ingest::parse_text_one(input, priority_cfg)?;
|
|
99
|
+
let order_build = order::build_order(&arena, priority_cfg)?;
|
|
100
|
+
let out = find_largest_render_under_budget(&order_build, config, budget);
|
|
101
|
+
Ok(out)
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
/// Same as `headson_many` but using the Text ingest path.
|
|
105
|
+
pub fn headson_many_text(
|
|
106
|
+
inputs: Vec<(String, Vec<u8>)>,
|
|
107
|
+
config: &RenderConfig,
|
|
108
|
+
priority_cfg: &PriorityConfig,
|
|
109
|
+
budget: usize,
|
|
110
|
+
) -> Result<String> {
|
|
111
|
+
let arena = crate::ingest::parse_text_many(inputs, priority_cfg)?;
|
|
112
|
+
let order_build = order::build_order(&arena, priority_cfg)?;
|
|
113
|
+
let out = find_largest_render_under_budget(&order_build, config, budget);
|
|
114
|
+
Ok(out)
|
|
115
|
+
}
|
|
116
|
+
|
|
90
117
|
fn find_largest_render_under_budget(
|
|
91
118
|
order_build: &PriorityOrder,
|
|
92
119
|
config: &RenderConfig,
|
|
@@ -29,7 +29,7 @@ struct Cli {
|
|
|
29
29
|
long = "format",
|
|
30
30
|
value_enum,
|
|
31
31
|
default_value_t = OutputFormat::Auto,
|
|
32
|
-
help = "Output format: auto|json|yaml (filesets: auto is per-file)."
|
|
32
|
+
help = "Output format: auto|json|yaml|text (filesets: auto is per-file)."
|
|
33
33
|
)]
|
|
34
34
|
format: OutputFormat,
|
|
35
35
|
#[arg(
|
|
@@ -110,7 +110,7 @@ struct Cli {
|
|
|
110
110
|
long = "input-format",
|
|
111
111
|
value_enum,
|
|
112
112
|
default_value_t = InputFormat::Json,
|
|
113
|
-
help = "Input ingestion format: json
|
|
113
|
+
help = "Input ingestion format: json|yaml|text."
|
|
114
114
|
)]
|
|
115
115
|
input_format: InputFormat,
|
|
116
116
|
}
|
|
@@ -120,6 +120,7 @@ enum OutputFormat {
|
|
|
120
120
|
Auto,
|
|
121
121
|
Json,
|
|
122
122
|
Yaml,
|
|
123
|
+
Text,
|
|
123
124
|
}
|
|
124
125
|
|
|
125
126
|
#[derive(Copy, Clone, Debug, ValueEnum)]
|
|
@@ -133,6 +134,7 @@ enum StyleArg {
|
|
|
133
134
|
enum InputFormat {
|
|
134
135
|
Json,
|
|
135
136
|
Yaml,
|
|
137
|
+
Text,
|
|
136
138
|
}
|
|
137
139
|
|
|
138
140
|
fn main() -> Result<()> {
|
|
@@ -198,6 +200,9 @@ fn run_from_stdin(
|
|
|
198
200
|
InputFormat::Yaml => {
|
|
199
201
|
headson::headson_yaml(input_bytes, &cfg, &prio, eff)
|
|
200
202
|
}
|
|
203
|
+
InputFormat::Text => {
|
|
204
|
+
headson::headson_text(input_bytes, &cfg, &prio, eff)
|
|
205
|
+
}
|
|
201
206
|
}
|
|
202
207
|
}
|
|
203
208
|
|
|
@@ -222,11 +227,22 @@ fn run_from_paths(
|
|
|
222
227
|
lower.ends_with(".yaml") || lower.ends_with(".yml")
|
|
223
228
|
})
|
|
224
229
|
}
|
|
230
|
+
fn all_json_ext(entries: &InputEntries) -> bool {
|
|
231
|
+
entries.iter().all(|(name, _)| {
|
|
232
|
+
let lower = name.to_ascii_lowercase();
|
|
233
|
+
lower.ends_with(".json")
|
|
234
|
+
})
|
|
235
|
+
}
|
|
225
236
|
if cli.inputs.len() > 1 {
|
|
226
|
-
let chosen_input = if matches!(cli.format, OutputFormat::Auto)
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
237
|
+
let chosen_input = if matches!(cli.format, OutputFormat::Auto) {
|
|
238
|
+
if any_yaml_ext(&entries) {
|
|
239
|
+
InputFormat::Yaml
|
|
240
|
+
} else if all_json_ext(&entries) {
|
|
241
|
+
InputFormat::Json
|
|
242
|
+
} else {
|
|
243
|
+
// Mixed or unknown extensions: treat as text to avoid JSON/YAML parse errors
|
|
244
|
+
InputFormat::Text
|
|
245
|
+
}
|
|
230
246
|
} else {
|
|
231
247
|
cli.input_format
|
|
232
248
|
};
|
|
@@ -236,6 +252,7 @@ fn run_from_paths(
|
|
|
236
252
|
OutputFormat::Auto => headson::OutputTemplate::Auto,
|
|
237
253
|
OutputFormat::Json => map_json_template_for_style(cfg.style),
|
|
238
254
|
OutputFormat::Yaml => headson::OutputTemplate::Yaml,
|
|
255
|
+
OutputFormat::Text => headson::OutputTemplate::Text,
|
|
239
256
|
};
|
|
240
257
|
let out = match chosen_input {
|
|
241
258
|
InputFormat::Json => {
|
|
@@ -244,6 +261,9 @@ fn run_from_paths(
|
|
|
244
261
|
InputFormat::Yaml => {
|
|
245
262
|
headson::headson_many_yaml(entries, &cfg, &prio, eff)?
|
|
246
263
|
}
|
|
264
|
+
InputFormat::Text => {
|
|
265
|
+
headson::headson_many_text(entries, &cfg, &prio, eff)?
|
|
266
|
+
}
|
|
247
267
|
};
|
|
248
268
|
Ok((out, ignored))
|
|
249
269
|
} else if included == 0 {
|
|
@@ -257,8 +277,10 @@ fn run_from_paths(
|
|
|
257
277
|
OutputFormat::Auto => {
|
|
258
278
|
if is_yaml_ext {
|
|
259
279
|
InputFormat::Yaml
|
|
260
|
-
} else {
|
|
280
|
+
} else if lower.ends_with(".json") {
|
|
261
281
|
InputFormat::Json
|
|
282
|
+
} else {
|
|
283
|
+
InputFormat::Text
|
|
262
284
|
}
|
|
263
285
|
}
|
|
264
286
|
_ => cli.input_format,
|
|
@@ -272,6 +294,9 @@ fn run_from_paths(
|
|
|
272
294
|
InputFormat::Yaml => {
|
|
273
295
|
headson::headson_yaml(bytes, &cfg, &prio, eff)?
|
|
274
296
|
}
|
|
297
|
+
InputFormat::Text => {
|
|
298
|
+
headson::headson_text(bytes, &cfg, &prio, eff)?
|
|
299
|
+
}
|
|
275
300
|
};
|
|
276
301
|
Ok((out, ignored))
|
|
277
302
|
}
|
|
@@ -359,6 +384,7 @@ fn get_render_config_from(cli: &Cli) -> headson::RenderConfig {
|
|
|
359
384
|
map_json_template_for_style(map_style(cli.style))
|
|
360
385
|
}
|
|
361
386
|
OutputFormat::Yaml => headson::OutputTemplate::Yaml,
|
|
387
|
+
OutputFormat::Text => headson::OutputTemplate::Text,
|
|
362
388
|
};
|
|
363
389
|
let space = if cli.compact || cli.no_space { "" } else { " " }.to_string();
|
|
364
390
|
let newline = if cli.compact || cli.no_newline {
|
|
@@ -433,6 +459,7 @@ fn resolve_effective_template_for_stdin(
|
|
|
433
459
|
map_json_template_for_style(style)
|
|
434
460
|
}
|
|
435
461
|
OutputFormat::Yaml => headson::OutputTemplate::Yaml,
|
|
462
|
+
OutputFormat::Text => headson::OutputTemplate::Text,
|
|
436
463
|
}
|
|
437
464
|
}
|
|
438
465
|
|
|
@@ -444,14 +471,15 @@ fn resolve_effective_template_for_single(
|
|
|
444
471
|
match fmt {
|
|
445
472
|
OutputFormat::Json => map_json_template_for_style(style),
|
|
446
473
|
OutputFormat::Yaml => headson::OutputTemplate::Yaml,
|
|
474
|
+
OutputFormat::Text => headson::OutputTemplate::Text,
|
|
447
475
|
OutputFormat::Auto => {
|
|
448
476
|
if lower_name.ends_with(".yaml") || lower_name.ends_with(".yml") {
|
|
449
477
|
headson::OutputTemplate::Yaml
|
|
450
478
|
} else if lower_name.ends_with(".json") {
|
|
451
479
|
map_json_template_for_style(style)
|
|
452
480
|
} else {
|
|
453
|
-
// Unknown:
|
|
454
|
-
|
|
481
|
+
// Unknown extension: prefer text template.
|
|
482
|
+
headson::OutputTemplate::Text
|
|
455
483
|
}
|
|
456
484
|
}
|
|
457
485
|
}
|
|
@@ -85,7 +85,7 @@ impl<'a> RenderScope<'a> {
|
|
|
85
85
|
let fmt = Format::from_filename(raw_key);
|
|
86
86
|
let template = match fmt {
|
|
87
87
|
Format::Yaml => OutputTemplate::Yaml,
|
|
88
|
-
Format::Json
|
|
88
|
+
Format::Json => match self.config.style {
|
|
89
89
|
crate::serialization::types::Style::Strict => {
|
|
90
90
|
OutputTemplate::Json
|
|
91
91
|
}
|
|
@@ -96,6 +96,7 @@ impl<'a> RenderScope<'a> {
|
|
|
96
96
|
OutputTemplate::Js
|
|
97
97
|
}
|
|
98
98
|
},
|
|
99
|
+
Format::Unknown => OutputTemplate::Text,
|
|
99
100
|
};
|
|
100
101
|
return self.render_node_to_string_with_template(
|
|
101
102
|
child_id, depth, false, template,
|