headson 0.6.2__tar.gz → 0.6.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of headson might be problematic. Click here for more details.

Files changed (72) hide show
  1. {headson-0.6.2 → headson-0.6.3}/Cargo.lock +1 -1
  2. {headson-0.6.2 → headson-0.6.3}/Cargo.toml +1 -1
  3. {headson-0.6.2 → headson-0.6.3}/PKG-INFO +29 -7
  4. {headson-0.6.2 → headson-0.6.3}/README.md +28 -6
  5. headson-0.6.3/docs/assets/tapes/demo.gif +0 -0
  6. {headson-0.6.2 → headson-0.6.3}/pyproject.toml +1 -1
  7. {headson-0.6.2 → headson-0.6.3}/python/Cargo.lock +2 -2
  8. {headson-0.6.2 → headson-0.6.3}/python/Cargo.toml +1 -1
  9. {headson-0.6.2 → headson-0.6.3}/python/README.md +6 -2
  10. {headson-0.6.2 → headson-0.6.3}/python/src/lib.rs +9 -7
  11. headson-0.6.3/src/ingest/json.rs +37 -0
  12. headson-0.6.3/src/ingest/mod.rs +64 -0
  13. headson-0.6.3/src/ingest/text.rs +45 -0
  14. headson-0.6.3/src/ingest/yaml.rs +39 -0
  15. {headson-0.6.2 → headson-0.6.3}/src/lib.rs +27 -0
  16. {headson-0.6.2 → headson-0.6.3}/src/main.rs +37 -9
  17. {headson-0.6.2 → headson-0.6.3}/src/serialization/fileset.rs +2 -1
  18. {headson-0.6.2 → headson-0.6.3}/src/serialization/mod.rs +63 -3
  19. {headson-0.6.2 → headson-0.6.3}/src/serialization/templates/mod.rs +3 -0
  20. headson-0.6.3/src/serialization/templates/text.rs +43 -0
  21. {headson-0.6.2 → headson-0.6.3}/src/serialization/types.rs +1 -0
  22. headson-0.6.3/src/text_ingest/mod.rs +192 -0
  23. headson-0.6.2/docs/assets/tapes/demo.gif +0 -0
  24. headson-0.6.2/src/ingest/mod.rs +0 -120
  25. {headson-0.6.2 → headson-0.6.3}/docs/assets/algorithm.svg +0 -0
  26. {headson-0.6.2 → headson-0.6.3}/docs/assets/logo.png +0 -0
  27. {headson-0.6.2 → headson-0.6.3}/docs/assets/logo.svg +0 -0
  28. {headson-0.6.2 → headson-0.6.3}/python/headson/__init__.py +0 -0
  29. {headson-0.6.2 → headson-0.6.3}/src/format.rs +0 -0
  30. {headson-0.6.2 → headson-0.6.3}/src/json_ingest/builder.rs +0 -0
  31. {headson-0.6.2 → headson-0.6.3}/src/json_ingest/mod.rs +0 -0
  32. {headson-0.6.2 → headson-0.6.3}/src/json_ingest/samplers/default.rs +0 -0
  33. {headson-0.6.2 → headson-0.6.3}/src/json_ingest/samplers/head.rs +0 -0
  34. {headson-0.6.2 → headson-0.6.3}/src/json_ingest/samplers/mod.rs +0 -0
  35. {headson-0.6.2 → headson-0.6.3}/src/json_ingest/samplers/tail.rs +0 -0
  36. {headson-0.6.2 → headson-0.6.3}/src/order/build.rs +0 -0
  37. {headson-0.6.2 → headson-0.6.3}/src/order/mod.rs +0 -0
  38. {headson-0.6.2 → headson-0.6.3}/src/order/scoring.rs +0 -0
  39. {headson-0.6.2 → headson-0.6.3}/src/order/snapshots/headson__order__build__tests__order_empty_array_order.snap +0 -0
  40. {headson-0.6.2 → headson-0.6.3}/src/order/snapshots/headson__order__build__tests__order_single_string_array_order.snap +0 -0
  41. {headson-0.6.2 → headson-0.6.3}/src/order/types.rs +0 -0
  42. {headson-0.6.2 → headson-0.6.3}/src/serialization/color.rs +0 -0
  43. {headson-0.6.2 → headson-0.6.3}/src/serialization/output.rs +0 -0
  44. {headson-0.6.2 → headson-0.6.3}/src/serialization/snapshots/headson__serialization__tests__arena_render_empty.snap +0 -0
  45. {headson-0.6.2 → headson-0.6.3}/src/serialization/snapshots/headson__serialization__tests__arena_render_empty_yaml.snap +0 -0
  46. {headson-0.6.2 → headson-0.6.3}/src/serialization/snapshots/headson__serialization__tests__arena_render_single.snap +0 -0
  47. {headson-0.6.2 → headson-0.6.3}/src/serialization/snapshots/headson__serialization__tests__arena_render_single_yaml.snap +0 -0
  48. {headson-0.6.2 → headson-0.6.3}/src/serialization/snapshots/headson__serialization__tests__array_internal_gaps_yaml.snap +0 -0
  49. {headson-0.6.2 → headson-0.6.3}/src/serialization/snapshots/headson__serialization__tests__array_omitted_js_head.snap +0 -0
  50. {headson-0.6.2 → headson-0.6.3}/src/serialization/snapshots/headson__serialization__tests__array_omitted_js_tail.snap +0 -0
  51. {headson-0.6.2 → headson-0.6.3}/src/serialization/snapshots/headson__serialization__tests__array_omitted_pseudo_head.snap +0 -0
  52. {headson-0.6.2 → headson-0.6.3}/src/serialization/snapshots/headson__serialization__tests__array_omitted_pseudo_tail.snap +0 -0
  53. {headson-0.6.2 → headson-0.6.3}/src/serialization/snapshots/headson__serialization__tests__array_omitted_yaml_head.snap +0 -0
  54. {headson-0.6.2 → headson-0.6.3}/src/serialization/snapshots/headson__serialization__tests__array_omitted_yaml_tail.snap +0 -0
  55. {headson-0.6.2 → headson-0.6.3}/src/serialization/snapshots/headson__serialization__tests__inline_open_array_in_object_json.snap +0 -0
  56. {headson-0.6.2 → headson-0.6.3}/src/serialization/snapshots/headson__serialization__tests__inline_open_array_in_object_yaml.snap +0 -0
  57. {headson-0.6.2 → headson-0.6.3}/src/serialization/templates/core.rs +0 -0
  58. {headson-0.6.2 → headson-0.6.3}/src/serialization/templates/js.rs +0 -0
  59. {headson-0.6.2 → headson-0.6.3}/src/serialization/templates/json.rs +0 -0
  60. {headson-0.6.2 → headson-0.6.3}/src/serialization/templates/pseudo.rs +0 -0
  61. {headson-0.6.2 → headson-0.6.3}/src/serialization/templates/yaml.rs +0 -0
  62. {headson-0.6.2 → headson-0.6.3}/src/snapshots/headson__order__tests__order_empty_array_order.snap +0 -0
  63. {headson-0.6.2 → headson-0.6.3}/src/snapshots/headson__order__tests__order_single_string_array_order.snap +0 -0
  64. {headson-0.6.2 → headson-0.6.3}/src/utils/graph.rs +0 -0
  65. {headson-0.6.2 → headson-0.6.3}/src/utils/json.rs +0 -0
  66. {headson-0.6.2 → headson-0.6.3}/src/utils/mod.rs +0 -0
  67. {headson-0.6.2 → headson-0.6.3}/src/utils/search.rs +0 -0
  68. {headson-0.6.2 → headson-0.6.3}/src/utils/text.rs +0 -0
  69. {headson-0.6.2 → headson-0.6.3}/src/utils/tree_arena.rs +0 -0
  70. {headson-0.6.2 → headson-0.6.3}/src/yaml_ingest/mod.rs +0 -0
  71. {headson-0.6.2 → headson-0.6.3}/tests/fixtures/json/JSONTestSuite/LICENSE +0 -0
  72. {headson-0.6.2 → headson-0.6.3}/tests/fixtures/json/JSONTestSuite/README.md +0 -0
@@ -298,7 +298,7 @@ dependencies = [
298
298
 
299
299
  [[package]]
300
300
  name = "headson"
301
- version = "0.6.2"
301
+ version = "0.6.3"
302
302
  dependencies = [
303
303
  "anyhow",
304
304
  "assert_cmd",
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "headson"
3
- version = "0.6.2"
3
+ version = "0.6.3"
4
4
  edition = "2024"
5
5
  description = "Budget‑constrained JSON preview renderer"
6
6
  readme = "README.md"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: headson
3
- Version: 0.6.2
3
+ Version: 0.6.3
4
4
  Classifier: Programming Language :: Python
5
5
  Classifier: Programming Language :: Python :: 3
6
6
  Classifier: Programming Language :: Rust
@@ -20,7 +20,7 @@ Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
20
20
  <br/>
21
21
  </p>
22
22
 
23
- Head/tail for JSON and YAML but structure‑aware. Get a compact preview that shows both the shape and representative values of your data, all within a strict character budget.
23
+ `heal`/`tail` for JSON, YAML - but structure‑aware. Get a compact preview that shows both the shape and representative values of your data, all within a strict character budget. (Just like `head`/`tail`, `headson` can also work with unstructured text files.)
24
24
 
25
25
  Available as:
26
26
  - CLI (see [Usage](#usage))
@@ -41,10 +41,11 @@ From source:
41
41
  ## Features
42
42
 
43
43
  - Budgeted output: specify exactly how much you want to see
44
- - Output formats: `auto | json | yaml`
44
+ - Output formats: `auto | json | yaml | text`
45
45
  - Styles: `strict | default | detailed`
46
46
  - JSON family: `strict` → strict JSON, `default` → human‑friendly Pseudo, `detailed` → JS with inline comments
47
47
  - YAML: always YAML; `strict` has no comments, `default` uses “# …”, `detailed` uses “# N more …”
48
+ - Text: prints raw lines. In `default` style, omissions are shown as a single line `…`; in `detailed`, as `… N more lines …`. `strict` omits array‑level summaries.
48
49
  - Multiple inputs: preview many files at once with a shared or per‑file budget
49
50
  - Fast: processes gigabyte‑scale files in seconds (mostly disk‑bound)
50
51
  - Available as a CLI app and as a Python library
@@ -68,12 +69,12 @@ Common flags:
68
69
 
69
70
  - `-n, --budget <BYTES>`: per‑file output budget. For multiple inputs, default total budget is `<BYTES> * number_of_inputs`.
70
71
  - `-N, --global-budget <BYTES>`: total output budget across all inputs. With `--budget`, the effective total is the smaller of the two.
71
- - `-f, --format <auto|json|yaml>`: output format (default: `auto`).
72
- - Auto: stdin → JSON family; filesets → per‑file based on extension (`.json` → JSON family, `.yaml`/`.yml` → YAML).
72
+ - `-f, --format <auto|json|yaml|text>`: output format (default: `auto`).
73
+ - Auto: stdin → JSON family; filesets → per‑file based on extension (`.json` → JSON family, `.yaml`/`.yml` → YAML, unknown → Text).
73
74
  - `-t, --template <strict|default|detailed>`: output style (default: `default`).
74
75
  - JSON family: `strict` → strict JSON; `default` → Pseudo; `detailed` → JS with inline comments.
75
76
  - YAML: always YAML; style only affects comments (`strict` none, `default` “# …”, `detailed` “# N more …”).
76
- - `-i, --input-format <json|yaml>`: ingestion format (default: `json`). For filesets in `auto` format, ingestion is chosen by extensions.
77
+ - `-i, --input-format <json|yaml|text>`: ingestion format (default: `json`). For filesets in `auto` format, ingestion is chosen by extensions.
77
78
  - `-m, --compact`: no indentation, no spaces, no newlines
78
79
  - `--no-newline`: single line output
79
80
  - `--no-space`: no space after `:` in objects
@@ -86,7 +87,8 @@ Notes:
86
87
 
87
88
  - Multiple inputs:
88
89
  - With newlines enabled, file sections are rendered with human‑readable headers. In compact/single‑line modes, headers are omitted.
89
- - In `--format auto`, each file uses its own best format: JSON family for `.json`, YAML for `.yaml`/`.yml`.
90
+ - In `--format auto`, each file uses its own best format: JSON family for `.json`, YAML for `.yaml`/`.yml`.
91
+ - Unknown extensions are treated as Text (raw lines) — safe for logs and `.txt` files.
90
92
  - `--global-budget` may truncate or omit entire files to respect the total budget.
91
93
  - The tool finds the largest preview that fits the budget; even if extremely tight, you still get a minimal, valid preview.
92
94
  - Directories and binary files are ignored; a notice is printed to stderr for each. Stdin reads the stream as‑is.
@@ -110,6 +112,25 @@ Quick one‑liners:
110
112
 
111
113
  headson -n 400 -f yaml -t detailed config.yaml
112
114
 
115
+ ### Text mode
116
+
117
+ - Single file (auto):
118
+
119
+ headson -n 200 notes.txt
120
+
121
+ - Force Text ingest/output (useful when mixing with other extensions):
122
+
123
+ headson -n 200 -i text -f text notes.txt
124
+
125
+ - Many text files (fileset):
126
+
127
+ headson -n 800 -i text -f text logs/*.txt
128
+
129
+ - Styles on Text:
130
+ - default: omission as a standalone `…` line.
131
+ - detailed: omission as `… N more lines …`.
132
+ - strict: no array‑level omission line (individual long lines may still truncate with `…`).
133
+
113
134
  Show help:
114
135
 
115
136
  headson --help
@@ -157,6 +178,7 @@ Regenerate locally:
157
178
  - Run: cargo make tapes
158
179
  - Outputs are written to docs/assets/tapes
159
180
 
181
+
160
182
  ## Python Bindings
161
183
 
162
184
  A thin Python extension module is available on PyPI as `headson`.
@@ -6,7 +6,7 @@
6
6
  <br/>
7
7
  </p>
8
8
 
9
- Head/tail for JSON and YAML but structure‑aware. Get a compact preview that shows both the shape and representative values of your data, all within a strict character budget.
9
+ `heal`/`tail` for JSON, YAML - but structure‑aware. Get a compact preview that shows both the shape and representative values of your data, all within a strict character budget. (Just like `head`/`tail`, `headson` can also work with unstructured text files.)
10
10
 
11
11
  Available as:
12
12
  - CLI (see [Usage](#usage))
@@ -27,10 +27,11 @@ From source:
27
27
  ## Features
28
28
 
29
29
  - Budgeted output: specify exactly how much you want to see
30
- - Output formats: `auto | json | yaml`
30
+ - Output formats: `auto | json | yaml | text`
31
31
  - Styles: `strict | default | detailed`
32
32
  - JSON family: `strict` → strict JSON, `default` → human‑friendly Pseudo, `detailed` → JS with inline comments
33
33
  - YAML: always YAML; `strict` has no comments, `default` uses “# …”, `detailed` uses “# N more …”
34
+ - Text: prints raw lines. In `default` style, omissions are shown as a single line `…`; in `detailed`, as `… N more lines …`. `strict` omits array‑level summaries.
34
35
  - Multiple inputs: preview many files at once with a shared or per‑file budget
35
36
  - Fast: processes gigabyte‑scale files in seconds (mostly disk‑bound)
36
37
  - Available as a CLI app and as a Python library
@@ -54,12 +55,12 @@ Common flags:
54
55
 
55
56
  - `-n, --budget <BYTES>`: per‑file output budget. For multiple inputs, default total budget is `<BYTES> * number_of_inputs`.
56
57
  - `-N, --global-budget <BYTES>`: total output budget across all inputs. With `--budget`, the effective total is the smaller of the two.
57
- - `-f, --format <auto|json|yaml>`: output format (default: `auto`).
58
- - Auto: stdin → JSON family; filesets → per‑file based on extension (`.json` → JSON family, `.yaml`/`.yml` → YAML).
58
+ - `-f, --format <auto|json|yaml|text>`: output format (default: `auto`).
59
+ - Auto: stdin → JSON family; filesets → per‑file based on extension (`.json` → JSON family, `.yaml`/`.yml` → YAML, unknown → Text).
59
60
  - `-t, --template <strict|default|detailed>`: output style (default: `default`).
60
61
  - JSON family: `strict` → strict JSON; `default` → Pseudo; `detailed` → JS with inline comments.
61
62
  - YAML: always YAML; style only affects comments (`strict` none, `default` “# …”, `detailed` “# N more …”).
62
- - `-i, --input-format <json|yaml>`: ingestion format (default: `json`). For filesets in `auto` format, ingestion is chosen by extensions.
63
+ - `-i, --input-format <json|yaml|text>`: ingestion format (default: `json`). For filesets in `auto` format, ingestion is chosen by extensions.
63
64
  - `-m, --compact`: no indentation, no spaces, no newlines
64
65
  - `--no-newline`: single line output
65
66
  - `--no-space`: no space after `:` in objects
@@ -72,7 +73,8 @@ Notes:
72
73
 
73
74
  - Multiple inputs:
74
75
  - With newlines enabled, file sections are rendered with human‑readable headers. In compact/single‑line modes, headers are omitted.
75
- - In `--format auto`, each file uses its own best format: JSON family for `.json`, YAML for `.yaml`/`.yml`.
76
+ - In `--format auto`, each file uses its own best format: JSON family for `.json`, YAML for `.yaml`/`.yml`.
77
+ - Unknown extensions are treated as Text (raw lines) — safe for logs and `.txt` files.
76
78
  - `--global-budget` may truncate or omit entire files to respect the total budget.
77
79
  - The tool finds the largest preview that fits the budget; even if extremely tight, you still get a minimal, valid preview.
78
80
  - Directories and binary files are ignored; a notice is printed to stderr for each. Stdin reads the stream as‑is.
@@ -96,6 +98,25 @@ Quick one‑liners:
96
98
 
97
99
  headson -n 400 -f yaml -t detailed config.yaml
98
100
 
101
+ ### Text mode
102
+
103
+ - Single file (auto):
104
+
105
+ headson -n 200 notes.txt
106
+
107
+ - Force Text ingest/output (useful when mixing with other extensions):
108
+
109
+ headson -n 200 -i text -f text notes.txt
110
+
111
+ - Many text files (fileset):
112
+
113
+ headson -n 800 -i text -f text logs/*.txt
114
+
115
+ - Styles on Text:
116
+ - default: omission as a standalone `…` line.
117
+ - detailed: omission as `… N more lines …`.
118
+ - strict: no array‑level omission line (individual long lines may still truncate with `…`).
119
+
99
120
  Show help:
100
121
 
101
122
  headson --help
@@ -143,6 +164,7 @@ Regenerate locally:
143
164
  - Run: cargo make tapes
144
165
  - Outputs are written to docs/assets/tapes
145
166
 
167
+
146
168
  ## Python Bindings
147
169
 
148
170
  A thin Python extension module is available on PyPI as `headson`.
Binary file
@@ -4,7 +4,7 @@ build-backend = "maturin"
4
4
 
5
5
  [project]
6
6
  name = "headson"
7
- version = "0.6.2"
7
+ version = "0.6.3"
8
8
  description = "Budget‑constrained JSON preview renderer (Python bindings)"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -214,7 +214,7 @@ dependencies = [
214
214
 
215
215
  [[package]]
216
216
  name = "headson"
217
- version = "0.6.2"
217
+ version = "0.6.3"
218
218
  dependencies = [
219
219
  "anyhow",
220
220
  "clap",
@@ -228,7 +228,7 @@ dependencies = [
228
228
 
229
229
  [[package]]
230
230
  name = "headson-python"
231
- version = "0.6.2"
231
+ version = "0.6.3"
232
232
  dependencies = [
233
233
  "anyhow",
234
234
  "headson",
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "headson-python"
3
- version = "0.6.2"
3
+ version = "0.6.3"
4
4
  edition = "2021"
5
5
  publish = false
6
6
  readme = "README.md"
@@ -5,9 +5,9 @@ Minimal Python API for the `headson` preview renderer.
5
5
  API
6
6
 
7
7
  - `headson.summarize(text: str, *, format: str = "auto", style: str = "default", input_format: str = "json", character_budget: int | None = None, skew: str = "balanced") -> str`
8
- - `format`: output format — `"auto" | "json" | "yaml"`.
8
+ - `format`: output format — `"auto" | "json" | "yaml" | "text"`.
9
9
  - `style`: output style — `"strict" | "default" | "detailed"`.
10
- - `input_format`: ingestion format — `"json" | "yaml"`.
10
+ - `input_format`: ingestion format — `"json" | "yaml" | "text"`.
11
11
  - `character_budget`: maximum output size in characters (defaults to 500 if not set).
12
12
  - `skew`: one of `"balanced" | "head" | "tail"`.
13
13
  - `balanced` (default), `head` keeps first N, `tail` keeps last N. Display styles place omission markers accordingly; strict JSON remains unannotated.
@@ -36,6 +36,10 @@ print(headson.summarize(doc, format="yaml", style="default", input_format="yaml"
36
36
  print(headson.summarize(doc, format="yaml", style="detailed", input_format="yaml", character_budget=60))
37
37
 
38
38
  # Note: tail mode affects only display styles; strict JSON stays strict.
39
+
40
+ # Text: render raw lines with omission markers depending on style
41
+ text = "one\ntwo\nthree\n"
42
+ print(headson.summarize(text, format="text", style="default", input_format="text", character_budget=10))
39
43
  ```
40
44
 
41
45
  Install for development:
@@ -31,7 +31,11 @@ fn map_output_template(format: &str, style: Style) -> Result<OutputTemplate> {
31
31
  "auto" => Ok(map_json_template_for_style(style)), // stdin => JSON family
32
32
  "json" => Ok(map_json_template_for_style(style)),
33
33
  "yaml" | "yml" => Ok(OutputTemplate::Yaml),
34
- other => bail!("unknown format: {} (expected 'auto' | 'json' | 'yaml')", other),
34
+ "text" => Ok(OutputTemplate::Text),
35
+ other => bail!(
36
+ "unknown format: {} (expected 'auto' | 'json' | 'yaml' | 'text')",
37
+ other
38
+ ),
35
39
  }
36
40
  }
37
41
 
@@ -107,14 +111,12 @@ fn summarize(
107
111
  let input = text.as_bytes().to_vec();
108
112
  py.detach(|| {
109
113
  match input_format.to_ascii_lowercase().as_str() {
110
- "json" => headson_core::headson(input, &cfg, &prio, budget)
114
+ "json" => headson_core::headson(input, &cfg, &prio, budget).map_err(to_pyerr),
115
+ "yaml" | "yml" => headson_core::headson_yaml(input, &cfg, &prio, budget)
111
116
  .map_err(to_pyerr),
112
- "yaml" | "yml" => {
113
- headson_core::headson_yaml(input, &cfg, &prio, budget)
114
- .map_err(to_pyerr)
115
- }
117
+ "text" => headson_core::headson_text(input, &cfg, &prio, budget).map_err(to_pyerr),
116
118
  other => Err(to_pyerr(anyhow::anyhow!(
117
- "unknown input_format: {} (expected 'json' | 'yaml')",
119
+ "unknown input_format: {} (expected 'json' | 'yaml' | 'text')",
118
120
  other
119
121
  ))),
120
122
  }
@@ -0,0 +1,37 @@
1
+ use anyhow::Result;
2
+
3
+ use super::Ingest;
4
+ use crate::order::PriorityConfig;
5
+ use crate::utils::tree_arena::JsonTreeArena as TreeArena;
6
+
7
+ /// JSON adapter for the ingest boundary. Delegates to the existing
8
+ /// JSON builder to produce the neutral `TreeArena`.
9
+ pub struct JsonIngest;
10
+
11
+ impl Ingest for JsonIngest {
12
+ fn parse_one(bytes: Vec<u8>, cfg: &PriorityConfig) -> Result<TreeArena> {
13
+ crate::json_ingest::build_json_tree_arena_from_bytes(bytes, cfg)
14
+ }
15
+
16
+ fn parse_many(
17
+ inputs: Vec<(String, Vec<u8>)>,
18
+ cfg: &PriorityConfig,
19
+ ) -> Result<TreeArena> {
20
+ crate::json_ingest::build_json_tree_arena_from_many(inputs, cfg)
21
+ }
22
+ }
23
+
24
+ /// Convenience functions for the JSON ingest path.
25
+ pub fn parse_json_one(
26
+ bytes: Vec<u8>,
27
+ cfg: &PriorityConfig,
28
+ ) -> Result<TreeArena> {
29
+ JsonIngest::parse_one(bytes, cfg)
30
+ }
31
+
32
+ pub fn parse_json_many(
33
+ inputs: Vec<(String, Vec<u8>)>,
34
+ cfg: &PriorityConfig,
35
+ ) -> Result<TreeArena> {
36
+ JsonIngest::parse_many(inputs, cfg)
37
+ }
@@ -0,0 +1,64 @@
1
+ use anyhow::Result;
2
+
3
+ use crate::order::PriorityConfig;
4
+ use crate::utils::tree_arena::JsonTreeArena as TreeArena;
5
+
6
+ /// Format-agnostic ingest boundary. Other formats can implement this trait
7
+ /// to produce the neutral TreeArena without going through JSON first.
8
+ pub trait Ingest {
9
+ fn parse_one(bytes: Vec<u8>, cfg: &PriorityConfig) -> Result<TreeArena>;
10
+ fn parse_many(
11
+ inputs: Vec<(String, Vec<u8>)>,
12
+ cfg: &PriorityConfig,
13
+ ) -> Result<TreeArena>;
14
+ }
15
+
16
+ // Submodules for per-format adapters
17
+ pub mod json;
18
+ pub mod text;
19
+ pub mod yaml;
20
+
21
+ // Re-export commonly used helpers for convenience (keep adapter types private)
22
+ pub use json::{parse_json_many, parse_json_one};
23
+ pub use text::{parse_text_many, parse_text_one};
24
+ pub use yaml::{parse_yaml_many, parse_yaml_one};
25
+
26
+ #[cfg(test)]
27
+ mod tests {
28
+ use super::*;
29
+ use crate::order::NodeKind;
30
+
31
+ #[test]
32
+ fn parse_one_basic_shape() {
33
+ let arena = parse_json_one(
34
+ b"{\"a\":1}".to_vec(),
35
+ &PriorityConfig::new(usize::MAX, usize::MAX),
36
+ )
37
+ .unwrap();
38
+ assert!(
39
+ !arena.is_fileset,
40
+ "single input should not be marked fileset"
41
+ );
42
+ let root = arena.root_id;
43
+ assert_eq!(arena.nodes[root].kind, NodeKind::Object);
44
+ assert_eq!(arena.nodes[root].object_len.unwrap_or(1), 1);
45
+ }
46
+
47
+ #[test]
48
+ fn parse_many_sets_fileset_root() {
49
+ let inputs = vec![
50
+ ("a.json".to_string(), b"{}".to_vec()),
51
+ ("b.json".to_string(), b"[]".to_vec()),
52
+ ];
53
+ let arena = parse_json_many(
54
+ inputs,
55
+ &PriorityConfig::new(usize::MAX, usize::MAX),
56
+ )
57
+ .unwrap();
58
+ assert!(arena.is_fileset, "multi input should be marked fileset");
59
+ let root = arena.root_id;
60
+ assert_eq!(arena.nodes[root].kind, NodeKind::Object);
61
+ // Expect two top-level entries
62
+ assert_eq!(arena.nodes[root].object_len.unwrap_or(0), 2);
63
+ }
64
+ }
@@ -0,0 +1,45 @@
1
+ use anyhow::Result;
2
+
3
+ use super::Ingest;
4
+ use crate::order::PriorityConfig;
5
+ use crate::utils::tree_arena::JsonTreeArena as TreeArena;
6
+
7
+ /// Text adapter for the ingest boundary.
8
+ ///
9
+ /// Behavior:
10
+ /// - Decodes input as UTF-8 using `String::from_utf8_lossy`, which replaces
11
+ /// any invalid UTF-8 byte sequences with the Unicode replacement character
12
+ /// U+FFFD (this is what “lossy” means here).
13
+ /// - Normalizes newlines to `\n`, splits into logical lines, and builds an
14
+ /// array of string nodes.
15
+ /// - For multi-file inputs, produces a fileset object where each value is the
16
+ /// corresponding file’s array of lines.
17
+ pub struct TextIngest;
18
+
19
+ impl Ingest for TextIngest {
20
+ fn parse_one(bytes: Vec<u8>, cfg: &PriorityConfig) -> Result<TreeArena> {
21
+ crate::text_ingest::build_text_tree_arena_from_bytes(bytes, cfg)
22
+ }
23
+
24
+ fn parse_many(
25
+ inputs: Vec<(String, Vec<u8>)>,
26
+ cfg: &PriorityConfig,
27
+ ) -> Result<TreeArena> {
28
+ crate::text_ingest::build_text_tree_arena_from_many(inputs, cfg)
29
+ }
30
+ }
31
+
32
+ /// Convenience functions for the Text ingest path.
33
+ pub fn parse_text_one(
34
+ bytes: Vec<u8>,
35
+ cfg: &PriorityConfig,
36
+ ) -> Result<TreeArena> {
37
+ TextIngest::parse_one(bytes, cfg)
38
+ }
39
+
40
+ pub fn parse_text_many(
41
+ inputs: Vec<(String, Vec<u8>)>,
42
+ cfg: &PriorityConfig,
43
+ ) -> Result<TreeArena> {
44
+ TextIngest::parse_many(inputs, cfg)
45
+ }
@@ -0,0 +1,39 @@
1
+ use anyhow::Result;
2
+
3
+ use super::Ingest;
4
+ use crate::order::PriorityConfig;
5
+ use crate::utils::tree_arena::JsonTreeArena as TreeArena;
6
+
7
+ /// YAML adapter for the ingest boundary. Parses YAML using `yaml-rust2`
8
+ /// and builds the neutral `TreeArena`. Multi-document YAML in a single
9
+ /// input is wrapped in an array; multi-file inputs produce a fileset
10
+ /// object whose values may be arrays when a file contains multiple docs.
11
+ pub struct YamlIngest;
12
+
13
+ impl Ingest for YamlIngest {
14
+ fn parse_one(bytes: Vec<u8>, cfg: &PriorityConfig) -> Result<TreeArena> {
15
+ crate::yaml_ingest::build_yaml_tree_arena_from_bytes(bytes, cfg)
16
+ }
17
+
18
+ fn parse_many(
19
+ inputs: Vec<(String, Vec<u8>)>,
20
+ cfg: &PriorityConfig,
21
+ ) -> Result<TreeArena> {
22
+ crate::yaml_ingest::build_yaml_tree_arena_from_many(inputs, cfg)
23
+ }
24
+ }
25
+
26
+ /// Convenience functions for the YAML ingest path.
27
+ pub fn parse_yaml_one(
28
+ bytes: Vec<u8>,
29
+ cfg: &PriorityConfig,
30
+ ) -> Result<TreeArena> {
31
+ YamlIngest::parse_one(bytes, cfg)
32
+ }
33
+
34
+ pub fn parse_yaml_many(
35
+ inputs: Vec<(String, Vec<u8>)>,
36
+ cfg: &PriorityConfig,
37
+ ) -> Result<TreeArena> {
38
+ YamlIngest::parse_many(inputs, cfg)
39
+ }
@@ -25,6 +25,7 @@ mod ingest;
25
25
  mod json_ingest;
26
26
  mod order;
27
27
  mod serialization;
28
+ mod text_ingest;
28
29
  mod utils;
29
30
  mod yaml_ingest;
30
31
  pub use order::types::{ArrayBias, ArraySamplerStrategy};
@@ -87,6 +88,32 @@ pub fn headson_many_yaml(
87
88
  Ok(out)
88
89
  }
89
90
 
91
+ /// Same as `headson` but using the Text ingest path.
92
+ pub fn headson_text(
93
+ input: Vec<u8>,
94
+ config: &RenderConfig,
95
+ priority_cfg: &PriorityConfig,
96
+ budget: usize,
97
+ ) -> Result<String> {
98
+ let arena = crate::ingest::parse_text_one(input, priority_cfg)?;
99
+ let order_build = order::build_order(&arena, priority_cfg)?;
100
+ let out = find_largest_render_under_budget(&order_build, config, budget);
101
+ Ok(out)
102
+ }
103
+
104
+ /// Same as `headson_many` but using the Text ingest path.
105
+ pub fn headson_many_text(
106
+ inputs: Vec<(String, Vec<u8>)>,
107
+ config: &RenderConfig,
108
+ priority_cfg: &PriorityConfig,
109
+ budget: usize,
110
+ ) -> Result<String> {
111
+ let arena = crate::ingest::parse_text_many(inputs, priority_cfg)?;
112
+ let order_build = order::build_order(&arena, priority_cfg)?;
113
+ let out = find_largest_render_under_budget(&order_build, config, budget);
114
+ Ok(out)
115
+ }
116
+
90
117
  fn find_largest_render_under_budget(
91
118
  order_build: &PriorityOrder,
92
119
  config: &RenderConfig,
@@ -29,7 +29,7 @@ struct Cli {
29
29
  long = "format",
30
30
  value_enum,
31
31
  default_value_t = OutputFormat::Auto,
32
- help = "Output format: auto|json|yaml (filesets: auto is per-file)."
32
+ help = "Output format: auto|json|yaml|text (filesets: auto is per-file)."
33
33
  )]
34
34
  format: OutputFormat,
35
35
  #[arg(
@@ -110,7 +110,7 @@ struct Cli {
110
110
  long = "input-format",
111
111
  value_enum,
112
112
  default_value_t = InputFormat::Json,
113
- help = "Input ingestion format: json or yaml."
113
+ help = "Input ingestion format: json|yaml|text."
114
114
  )]
115
115
  input_format: InputFormat,
116
116
  }
@@ -120,6 +120,7 @@ enum OutputFormat {
120
120
  Auto,
121
121
  Json,
122
122
  Yaml,
123
+ Text,
123
124
  }
124
125
 
125
126
  #[derive(Copy, Clone, Debug, ValueEnum)]
@@ -133,6 +134,7 @@ enum StyleArg {
133
134
  enum InputFormat {
134
135
  Json,
135
136
  Yaml,
137
+ Text,
136
138
  }
137
139
 
138
140
  fn main() -> Result<()> {
@@ -198,6 +200,9 @@ fn run_from_stdin(
198
200
  InputFormat::Yaml => {
199
201
  headson::headson_yaml(input_bytes, &cfg, &prio, eff)
200
202
  }
203
+ InputFormat::Text => {
204
+ headson::headson_text(input_bytes, &cfg, &prio, eff)
205
+ }
201
206
  }
202
207
  }
203
208
 
@@ -222,11 +227,22 @@ fn run_from_paths(
222
227
  lower.ends_with(".yaml") || lower.ends_with(".yml")
223
228
  })
224
229
  }
230
+ fn all_json_ext(entries: &InputEntries) -> bool {
231
+ entries.iter().all(|(name, _)| {
232
+ let lower = name.to_ascii_lowercase();
233
+ lower.ends_with(".json")
234
+ })
235
+ }
225
236
  if cli.inputs.len() > 1 {
226
- let chosen_input = if matches!(cli.format, OutputFormat::Auto)
227
- && any_yaml_ext(&entries)
228
- {
229
- InputFormat::Yaml
237
+ let chosen_input = if matches!(cli.format, OutputFormat::Auto) {
238
+ if any_yaml_ext(&entries) {
239
+ InputFormat::Yaml
240
+ } else if all_json_ext(&entries) {
241
+ InputFormat::Json
242
+ } else {
243
+ // Mixed or unknown extensions: treat as text to avoid JSON/YAML parse errors
244
+ InputFormat::Text
245
+ }
230
246
  } else {
231
247
  cli.input_format
232
248
  };
@@ -236,6 +252,7 @@ fn run_from_paths(
236
252
  OutputFormat::Auto => headson::OutputTemplate::Auto,
237
253
  OutputFormat::Json => map_json_template_for_style(cfg.style),
238
254
  OutputFormat::Yaml => headson::OutputTemplate::Yaml,
255
+ OutputFormat::Text => headson::OutputTemplate::Text,
239
256
  };
240
257
  let out = match chosen_input {
241
258
  InputFormat::Json => {
@@ -244,6 +261,9 @@ fn run_from_paths(
244
261
  InputFormat::Yaml => {
245
262
  headson::headson_many_yaml(entries, &cfg, &prio, eff)?
246
263
  }
264
+ InputFormat::Text => {
265
+ headson::headson_many_text(entries, &cfg, &prio, eff)?
266
+ }
247
267
  };
248
268
  Ok((out, ignored))
249
269
  } else if included == 0 {
@@ -257,8 +277,10 @@ fn run_from_paths(
257
277
  OutputFormat::Auto => {
258
278
  if is_yaml_ext {
259
279
  InputFormat::Yaml
260
- } else {
280
+ } else if lower.ends_with(".json") {
261
281
  InputFormat::Json
282
+ } else {
283
+ InputFormat::Text
262
284
  }
263
285
  }
264
286
  _ => cli.input_format,
@@ -272,6 +294,9 @@ fn run_from_paths(
272
294
  InputFormat::Yaml => {
273
295
  headson::headson_yaml(bytes, &cfg, &prio, eff)?
274
296
  }
297
+ InputFormat::Text => {
298
+ headson::headson_text(bytes, &cfg, &prio, eff)?
299
+ }
275
300
  };
276
301
  Ok((out, ignored))
277
302
  }
@@ -359,6 +384,7 @@ fn get_render_config_from(cli: &Cli) -> headson::RenderConfig {
359
384
  map_json_template_for_style(map_style(cli.style))
360
385
  }
361
386
  OutputFormat::Yaml => headson::OutputTemplate::Yaml,
387
+ OutputFormat::Text => headson::OutputTemplate::Text,
362
388
  };
363
389
  let space = if cli.compact || cli.no_space { "" } else { " " }.to_string();
364
390
  let newline = if cli.compact || cli.no_newline {
@@ -433,6 +459,7 @@ fn resolve_effective_template_for_stdin(
433
459
  map_json_template_for_style(style)
434
460
  }
435
461
  OutputFormat::Yaml => headson::OutputTemplate::Yaml,
462
+ OutputFormat::Text => headson::OutputTemplate::Text,
436
463
  }
437
464
  }
438
465
 
@@ -444,14 +471,15 @@ fn resolve_effective_template_for_single(
444
471
  match fmt {
445
472
  OutputFormat::Json => map_json_template_for_style(style),
446
473
  OutputFormat::Yaml => headson::OutputTemplate::Yaml,
474
+ OutputFormat::Text => headson::OutputTemplate::Text,
447
475
  OutputFormat::Auto => {
448
476
  if lower_name.ends_with(".yaml") || lower_name.ends_with(".yml") {
449
477
  headson::OutputTemplate::Yaml
450
478
  } else if lower_name.ends_with(".json") {
451
479
  map_json_template_for_style(style)
452
480
  } else {
453
- // Unknown: pick based on style for JSON family.
454
- map_json_template_for_style(style)
481
+ // Unknown extension: prefer text template.
482
+ headson::OutputTemplate::Text
455
483
  }
456
484
  }
457
485
  }
@@ -85,7 +85,7 @@ impl<'a> RenderScope<'a> {
85
85
  let fmt = Format::from_filename(raw_key);
86
86
  let template = match fmt {
87
87
  Format::Yaml => OutputTemplate::Yaml,
88
- Format::Json | Format::Unknown => match self.config.style {
88
+ Format::Json => match self.config.style {
89
89
  crate::serialization::types::Style::Strict => {
90
90
  OutputTemplate::Json
91
91
  }
@@ -96,6 +96,7 @@ impl<'a> RenderScope<'a> {
96
96
  OutputTemplate::Js
97
97
  }
98
98
  },
99
+ Format::Unknown => OutputTemplate::Text,
99
100
  };
100
101
  return self.render_node_to_string_with_template(
101
102
  child_id, depth, false, template,