rsyntaxtree 1.4.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (158) hide show
  1. checksums.yaml +4 -4
  2. data/.claude/docs/json-schema-spec.md +521 -0
  3. data/.gitignore +1 -0
  4. data/.ruby-version +1 -1
  5. data/CHANGELOG.md +17 -3
  6. data/README.md +3 -2
  7. data/bin/rsyntaxtree +9 -5
  8. data/dev/generate_examples.rb +2 -0
  9. data/docs/_examples/010.md +1 -0
  10. data/docs/_examples/056.md +1 -3
  11. data/docs/_examples/057.md +34 -0
  12. data/docs/_examples/058.md +47 -0
  13. data/docs/_examples/059.md +54 -0
  14. data/docs/_examples/060.md +72 -0
  15. data/docs/_examples/061.md +97 -0
  16. data/docs/_examples/062.md +96 -0
  17. data/docs/_examples/063.md +53 -0
  18. data/docs/assets/img/000.png +0 -0
  19. data/docs/assets/img/001.png +0 -0
  20. data/docs/assets/img/002.png +0 -0
  21. data/docs/assets/img/003.png +0 -0
  22. data/docs/assets/img/004.png +0 -0
  23. data/docs/assets/img/005.png +0 -0
  24. data/docs/assets/img/006.png +0 -0
  25. data/docs/assets/img/007.png +0 -0
  26. data/docs/assets/img/008.png +0 -0
  27. data/docs/assets/img/009.png +0 -0
  28. data/docs/assets/img/010.png +0 -0
  29. data/docs/assets/img/011.png +0 -0
  30. data/docs/assets/img/012.png +0 -0
  31. data/docs/assets/img/013.png +0 -0
  32. data/docs/assets/img/014.png +0 -0
  33. data/docs/assets/img/015.png +0 -0
  34. data/docs/assets/img/016.png +0 -0
  35. data/docs/assets/img/017.png +0 -0
  36. data/docs/assets/img/018.png +0 -0
  37. data/docs/assets/img/019.png +0 -0
  38. data/docs/assets/img/020.png +0 -0
  39. data/docs/assets/img/021.png +0 -0
  40. data/docs/assets/img/022.png +0 -0
  41. data/docs/assets/img/023.png +0 -0
  42. data/docs/assets/img/024.png +0 -0
  43. data/docs/assets/img/025.png +0 -0
  44. data/docs/assets/img/026.png +0 -0
  45. data/docs/assets/img/027.png +0 -0
  46. data/docs/assets/img/028.png +0 -0
  47. data/docs/assets/img/029.png +0 -0
  48. data/docs/assets/img/030.png +0 -0
  49. data/docs/assets/img/031.png +0 -0
  50. data/docs/assets/img/032.png +0 -0
  51. data/docs/assets/img/033.png +0 -0
  52. data/docs/assets/img/034.png +0 -0
  53. data/docs/assets/img/035.png +0 -0
  54. data/docs/assets/img/036.png +0 -0
  55. data/docs/assets/img/037.png +0 -0
  56. data/docs/assets/img/038.png +0 -0
  57. data/docs/assets/img/039.png +0 -0
  58. data/docs/assets/img/040.png +0 -0
  59. data/docs/assets/img/041.png +0 -0
  60. data/docs/assets/img/042.png +0 -0
  61. data/docs/assets/img/043.png +0 -0
  62. data/docs/assets/img/044.png +0 -0
  63. data/docs/assets/img/045.png +0 -0
  64. data/docs/assets/img/046.png +0 -0
  65. data/docs/assets/img/047.png +0 -0
  66. data/docs/assets/img/048.png +0 -0
  67. data/docs/assets/img/049.png +0 -0
  68. data/docs/assets/img/050.png +0 -0
  69. data/docs/assets/img/051.png +0 -0
  70. data/docs/assets/img/052.png +0 -0
  71. data/docs/assets/img/053.png +0 -0
  72. data/docs/assets/img/054.png +0 -0
  73. data/docs/assets/img/055.png +0 -0
  74. data/docs/assets/img/056.png +0 -0
  75. data/docs/assets/img/057.png +0 -0
  76. data/docs/assets/img/058.png +0 -0
  77. data/docs/assets/img/059.png +0 -0
  78. data/docs/assets/img/060.png +0 -0
  79. data/docs/assets/img/061.png +0 -0
  80. data/docs/assets/img/062.png +0 -0
  81. data/docs/assets/img/063.png +0 -0
  82. data/docs/assets/svg/000.svg +26 -26
  83. data/docs/assets/svg/001.svg +19 -19
  84. data/docs/assets/svg/002.svg +37 -37
  85. data/docs/assets/svg/003.svg +29 -29
  86. data/docs/assets/svg/004.svg +41 -41
  87. data/docs/assets/svg/005.svg +21 -21
  88. data/docs/assets/svg/006.svg +25 -25
  89. data/docs/assets/svg/007.svg +31 -31
  90. data/docs/assets/svg/008.svg +35 -35
  91. data/docs/assets/svg/009.svg +35 -35
  92. data/docs/assets/svg/010.svg +50 -52
  93. data/docs/assets/svg/011.svg +30 -30
  94. data/docs/assets/svg/012.svg +35 -35
  95. data/docs/assets/svg/013.svg +104 -104
  96. data/docs/assets/svg/014.svg +80 -80
  97. data/docs/assets/svg/015.svg +28 -28
  98. data/docs/assets/svg/016.svg +54 -54
  99. data/docs/assets/svg/017.svg +35 -35
  100. data/docs/assets/svg/018.svg +37 -37
  101. data/docs/assets/svg/019.svg +95 -95
  102. data/docs/assets/svg/020.svg +53 -53
  103. data/docs/assets/svg/021.svg +42 -42
  104. data/docs/assets/svg/022.svg +68 -68
  105. data/docs/assets/svg/023.svg +32 -32
  106. data/docs/assets/svg/024.svg +23 -23
  107. data/docs/assets/svg/025.svg +99 -99
  108. data/docs/assets/svg/026.svg +19 -19
  109. data/docs/assets/svg/027.svg +50 -50
  110. data/docs/assets/svg/028.svg +21 -21
  111. data/docs/assets/svg/029.svg +69 -69
  112. data/docs/assets/svg/030.svg +35 -35
  113. data/docs/assets/svg/031.svg +15 -15
  114. data/docs/assets/svg/032.svg +40 -41
  115. data/docs/assets/svg/033.svg +38 -38
  116. data/docs/assets/svg/034.svg +40 -40
  117. data/docs/assets/svg/035.svg +39 -39
  118. data/docs/assets/svg/036.svg +17 -17
  119. data/docs/assets/svg/037.svg +26 -26
  120. data/docs/assets/svg/038.svg +39 -39
  121. data/docs/assets/svg/039.svg +7 -7
  122. data/docs/assets/svg/040.svg +67 -67
  123. data/docs/assets/svg/041.svg +49 -49
  124. data/docs/assets/svg/042.svg +15 -15
  125. data/docs/assets/svg/043.svg +127 -127
  126. data/docs/assets/svg/044.svg +2 -2
  127. data/docs/assets/svg/045.svg +56 -56
  128. data/docs/assets/svg/046.svg +647 -647
  129. data/docs/assets/svg/047.svg +26 -26
  130. data/docs/assets/svg/048.svg +69 -69
  131. data/docs/assets/svg/049.svg +44 -44
  132. data/docs/assets/svg/050.svg +100 -100
  133. data/docs/assets/svg/051.svg +96 -96
  134. data/docs/assets/svg/052.svg +50 -50
  135. data/docs/assets/svg/053.svg +41 -41
  136. data/docs/assets/svg/054.svg +16 -15
  137. data/docs/assets/svg/055.svg +10 -9
  138. data/docs/assets/svg/056.svg +27 -29
  139. data/docs/assets/svg/057.svg +91 -0
  140. data/docs/assets/svg/058.svg +95 -0
  141. data/docs/assets/svg/059.svg +111 -0
  142. data/docs/assets/svg/060.svg +134 -0
  143. data/docs/assets/svg/061.svg +193 -0
  144. data/docs/assets/svg/062.svg +179 -0
  145. data/docs/assets/svg/063.svg +94 -0
  146. data/docs/documentation.md +9 -0
  147. data/docs/documentation_ja.md +9 -0
  148. data/lib/rsyntaxtree/base_graph.rb +94 -4
  149. data/lib/rsyntaxtree/element.rb +4 -3
  150. data/lib/rsyntaxtree/lsif_graph.rb +298 -0
  151. data/lib/rsyntaxtree/string_parser.rb +5 -5
  152. data/lib/rsyntaxtree/svg_graph.rb +201 -93
  153. data/lib/rsyntaxtree/version.rb +1 -1
  154. data/lib/rsyntaxtree.rb +10 -1
  155. data/test/example_verify_test.rb +2 -0
  156. data/test/lsif_test.rb +290 -0
  157. data/test/overlap_test.rb +122 -0
  158. metadata +27 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0e09c9414509d88c6bb4d1bc9b8acdb5384094e642a1fb4765c62a4ffe6a4ba0
4
- data.tar.gz: c4f617a630db5edf5373ceeeb4c03b889cc84d548201a4746f448386db2efebe
3
+ metadata.gz: e76c90ef8a999398071a513ebb2252ed2ef4db01c2fcfa1873c0b905a87c8e56
4
+ data.tar.gz: 81b4e5e68126540111ee2533f4c8d807dac86a31eece4595718634edd40c989f
5
5
  SHA512:
6
- metadata.gz: 3d1e620f2662c43e3c1c66b55750ae252fa90e978ddfebed0fc5937cc8265014f3fc02aed3ee3627b4a2de264709c58e2e076884c1f0c869c8868db9dcf42237
7
- data.tar.gz: 7f6f5d8ccb8b0f3f50363e5e1809ec32461e677458ec4b53bfbe383ef48889ed6ff8f2fda1edfc47fafcf4747857fcf260a1007d083733e20aeb4eefaf987b00
6
+ metadata.gz: 54ae414b09823cf07637535896d47f0a639cf63d3142997bf13a3d08bd6dc09dee156df1c1cccf48d3acafa3fe8138c58fccfc0d411787843fbd6f4667a85cf9
7
+ data.tar.gz: 6f407aa8d12d08d6e1a75b0d10ced8a6cdbcf42739ef7fbf2a1a4d71e34d94ce36c00711ccb6a0c57022e5eb0763aae2d3d50a6ac3f0f3779aa9b42c90a37dd3
@@ -0,0 +1,521 @@
1
+ # LSIF: Linguistic Structure Interchange Format
2
+
3
+ Version: 0.2.0 (Draft)
4
+ Date: 2026-03-30
5
+
6
+ ## Overview
7
+
8
+ LSIF (Linguistic Structure Interchange Format) is a JSON-based interchange format for linguistic structural data. It is designed to be:
9
+
10
+ - **Self-contained**: consumers can fully reconstruct and render structures without parsing source notation
11
+ - **Tool-agnostic**: any tool — including LLMs — can produce or consume this format
12
+ - **Extensible**: new relationship types, data layers, and metadata can be added without breaking existing consumers
13
+ - **Structure-neutral**: not limited to trees or to syntax; can represent any linguistic structure (semantic networks, discourse structures, phonological representations, dependency graphs, etc.)
14
+ - **Layer-composable**: multiple independent information layers can coexist within a single document
15
+
16
+ ## File extension
17
+
18
+ `.lsif.json`
19
+
20
+ ## Primary use cases
21
+
22
+ 1. Multi-plane 3D visualization (shear-transformed parallel/perpendicular planes)
23
+ 2. Programmatic analysis of linguistic structures
24
+ 3. Interoperability between linguistic tools and LLMs
25
+ 4. Multi-layer structural composition (syntax, semantics, prosody, discourse, etc.)
26
+
27
+ ## Conformance levels
28
+
29
+ LSIF defines three conformance levels. Each level extends the previous one. The levels are orthogonal in the sense that **Rendered** and **Layered** can be combined independently with **Core**.
30
+
31
+ ### Core
32
+
33
+ The minimal structural representation. Contains only node identities, labels, and relationships.
34
+
35
+ **Required fields**: `lsif`, `nodes` (with `id` and `label`), `edges`, `paths`
36
+
37
+ **Use cases**: LLM-generated structures, partial structure descriptions, structural exchange between tools without visual rendering.
38
+
39
+ A Core-level LSIF represents the abstract structure itself — not necessarily a tree, not necessarily syntactic. It could be a semantic network, a discourse graph, or any set of labeled nodes and typed relationships.
40
+
41
+ ### Rendered
42
+
43
+ Core plus visual layout information. Adds geometry, position, style, and tree-convenience fields.
44
+
45
+ **Additional fields**: `geometry`, `nodes[].position`, `nodes[].style`, `nodes[].type`, `nodes[].level`, `nodes[].parent`, `nodes[].children`
46
+
47
+ **Use cases**: output from rendering tools (e.g. RSyntaxTree), input to visualization tools.
48
+
49
+ ### Layered
50
+
51
+ Core or Rendered plus additional information layers. Each layer is an independent, self-describing data plane that adds structured information to existing nodes and edges.
52
+
53
+ **Additional fields**: `layers`
54
+
55
+ **Use cases**: multi-stratal linguistic descriptions, LLM-assisted structure enrichment, multi-plane visualization.
56
+
57
+ ## Schema
58
+
59
+ ### Top-level structure
60
+
61
+ ```json
62
+ {
63
+ "lsif": { ... },
64
+ "meta": { ... },
65
+ "geometry": { ... },
66
+ "nodes": [ ... ],
67
+ "edges": [ ... ],
68
+ "paths": [ ... ],
69
+ "layers": [ ... ]
70
+ }
71
+ ```
72
+
73
+ ### `lsif` (required)
74
+
75
+ Format identification and version.
76
+
77
+ ```json
78
+ {
79
+ "version": "0.2.0",
80
+ "generator": "rsyntaxtree 1.4.0",
81
+ "level": "rendered"
82
+ }
83
+ ```
84
+
85
+ | Field | Type | Required | Description |
86
+ |-------|------|----------|-------------|
87
+ | `version` | string | Yes | LSIF schema version (semver) |
88
+ | `generator` | string or null | Yes | Tool/agent that produced this file. `null` if hand-authored |
89
+ | `level` | string | No | Conformance level: `"core"`, `"rendered"`, or `"layered"`. Defaults to `"core"` if omitted |
90
+
91
+ ### `meta` (optional)
92
+
93
+ Provenance information. Consumers MUST NOT depend on this section for rendering or analysis. It exists solely for round-trip editing and debugging.
94
+
95
+ ```json
96
+ {
97
+ "source": {
98
+ "format": "rsyntaxtree-bracket",
99
+ "input": "[TP [DP_i_ John] [T' [T__0__ pres] [VP ...]]]",
100
+ "params": {
101
+ "font_style": "serif",
102
+ "font_size": 16,
103
+ "color": "modern",
104
+ "connector": "auto",
105
+ "connector_height": 2.0,
106
+ "line_width": 1,
107
+ "symmetrize": false,
108
+ "polyline": false,
109
+ "hide_default_connectors": false,
110
+ "transparent": false
111
+ }
112
+ }
113
+ }
114
+ ```
115
+
116
+ | Field | Type | Description |
117
+ |-------|------|-------------|
118
+ | `source` | object or null | Original input data. `null` if not from a specific tool |
119
+ | `source.format` | string | Input format identifier (e.g. `"rsyntaxtree-bracket"`, `"penn-treebank"`, `"llm-generated"`) |
120
+ | `source.input` | string | Raw input text |
121
+ | `source.params` | object | Rendering parameters as passed to the generator |
122
+
123
+ ### `geometry` (Rendered level)
124
+
125
+ Bounding box of the rendered structure. Required at Rendered level; absent at Core level.
126
+
127
+ ```json
128
+ {
129
+ "width": 540.0,
130
+ "height": 480.0,
131
+ "direction": "ttb"
132
+ }
133
+ ```
134
+
135
+ | Field | Type | Description |
136
+ |-------|------|-------------|
137
+ | `width` | number | Total width of the rendered structure (px) |
138
+ | `height` | number | Total height of the rendered structure (px) |
139
+ | `direction` | string | Layout direction: `"ttb"` (top-to-bottom, default), `"ltr"` (left-to-right). Future: `"rtl"`, `"btt"`. Omit or `"ttb"` for default |
140
+
141
+ ### `nodes` (required)
142
+
143
+ Array of all nodes. At minimum, each node must have `id` and `label`.
144
+
145
+ #### Core-level node (minimal)
146
+
147
+ ```json
148
+ {
149
+ "id": 1,
150
+ "label": {
151
+ "raw": "VP",
152
+ "lines": [
153
+ { "segments": [{ "text": "VP", "decorations": [] }] }
154
+ ]
155
+ }
156
+ }
157
+ ```
158
+
159
+ #### Rendered-level node (full)
160
+
161
+ ```json
162
+ {
163
+ "id": 1,
164
+ "type": "node",
165
+ "level": 0,
166
+ "label": {
167
+ "raw": "DP_i_",
168
+ "lines": [
169
+ {
170
+ "segments": [
171
+ { "text": "DP", "decorations": [] },
172
+ { "text": "i", "decorations": ["subscript"] }
173
+ ]
174
+ }
175
+ ]
176
+ },
177
+ "position": {
178
+ "x": 50.0,
179
+ "y": 80.0,
180
+ "content_width": 35.0,
181
+ "content_height": 22.0,
182
+ "subtree_width": 100.0
183
+ },
184
+ "style": {
185
+ "color": "#0072B2",
186
+ "enclosure": "none",
187
+ "triangle": false
188
+ },
189
+ "parent": null,
190
+ "children": [2, 3]
191
+ }
192
+ ```
193
+
194
+ #### Node fields
195
+
196
+ | Field | Type | Level | Description |
197
+ |-------|------|-------|-------------|
198
+ | `id` | integer | Core | Unique node identifier (1-based) |
199
+ | `label` | object | Core | Node label content |
200
+ | `type` | string | Rendered | `"node"` (internal) or `"leaf"` (terminal). Tree-specific |
201
+ | `level` | integer | Rendered | Depth in structure (0 = root). Tree-specific |
202
+ | `position` | object | Rendered | Layout coordinates and dimensions |
203
+ | `style` | object | Rendered | Resolved visual properties |
204
+ | `parent` | integer or null | Rendered | Parent node ID. Convenience field derived from `edges` |
205
+ | `children` | array of integer | Rendered | Child node IDs. Convenience field derived from `edges` |
206
+
207
+ #### `label` object
208
+
209
+ | Field | Type | Description |
210
+ |-------|------|-------------|
211
+ | `raw` | string | Original markup text (e.g. `"DP_i_"`) |
212
+ | `lines` | array | Array of line objects (for multi-line labels) |
213
+
214
+ #### `label.lines[]` element
215
+
216
+ | Field | Type | Description |
217
+ |-------|------|-------------|
218
+ | `segments` | array | Array of text segments within a single line |
219
+
220
+ #### `label.lines[].segments[]` element
221
+
222
+ | Field | Type | Description |
223
+ |-------|------|-------------|
224
+ | `text` | string | Plain text content |
225
+ | `decorations` | array of string | Applied decorations (see below) |
226
+
227
+ **Decoration values:**
228
+
229
+ | Value | Description |
230
+ |-------|-------------|
231
+ | `"bold"` | Bold text |
232
+ | `"italic"` | Italic text |
233
+ | `"bolditalic"` | Bold + italic |
234
+ | `"subscript"` | Subscript (smaller, lowered) |
235
+ | `"superscript"` | Superscript (smaller, raised) |
236
+ | `"small"` | Small text |
237
+ | `"overline"` | Line above text |
238
+ | `"underline"` | Line below text |
239
+ | `"linethrough"` | Strikethrough |
240
+ | `"box"` | Rectangular border around text |
241
+ | `"circle"` | Circular border around text |
242
+ | `"hatched"` | Hatched fill (combined with box/circle) |
243
+ | `"bar"` | Horizontal bar |
244
+ | `"bstroke"` | Bold stroke variant |
245
+ | `"arrow_to_l"` | Left-pointing arrow |
246
+ | `"arrow_to_r"` | Right-pointing arrow |
247
+
248
+ Multiple decorations can be combined (e.g. `["bold", "subscript"]`).
249
+
250
+ #### `position` object (Rendered level)
251
+
252
+ All coordinates are in the rendering coordinate space (origin at top-left, y increases downward).
253
+
254
+ | Field | Type | Description |
255
+ |-------|------|-------------|
256
+ | `x` | number | Horizontal position of the node label (left edge) |
257
+ | `y` | number | Vertical position of the node label (top edge) |
258
+ | `content_width` | number | Width of the label text/enclosure |
259
+ | `content_height` | number | Height of the label text/enclosure |
260
+ | `subtree_width` | number | Width of the subtree rooted at this node |
261
+
262
+ #### `style` object (Rendered level)
263
+
264
+ All values are resolved (not symbolic). Colors are hex strings or named CSS colors.
265
+
266
+ | Field | Type | Description |
267
+ |-------|------|-------------|
268
+ | `color` | string or null | Resolved node color (e.g. `"#0072B2"`, `"red"`). `null` for default |
269
+ | `enclosure` | string | `"none"`, `"brackets"`, `"rectangle"`, `"bold_rectangle"` |
270
+ | `triangle` | boolean | Whether this node uses a triangle connector to its parent |
271
+
272
+ ### `edges` (required)
273
+
274
+ Array of structural relationships. One entry per relationship. May be empty.
275
+
276
+ ```json
277
+ {
278
+ "from": 1,
279
+ "to": 2,
280
+ "type": "dominance",
281
+ "connector": "line"
282
+ }
283
+ ```
284
+
285
+ | Field | Type | Level | Description |
286
+ |-------|------|-------|-------------|
287
+ | `from` | integer | Core | Source node ID |
288
+ | `to` | integer | Core | Target node ID |
289
+ | `type` | string | Core | Relationship type |
290
+ | `connector` | string or null | Rendered | Visual connector type. `null` or absent at Core level |
291
+
292
+ **Edge type values (current and planned):**
293
+
294
+ | Value | Description |
295
+ |-------|-------------|
296
+ | `"dominance"` | Tree edge (parent governs child) |
297
+ | `"dependency"` | Dependency relation |
298
+ | `"correspondence"` | Inter-layer mapping |
299
+ | `"binding"` | Binding/coreference relation |
300
+ | `"agreement"` | Agreement relation |
301
+ | `"semantic_role"` | Semantic role assignment |
302
+ | `"discourse"` | Discourse relation |
303
+
304
+ The `type` field is open-ended: any string value is valid. The values above are conventionally defined; tools should use them where applicable.
305
+
306
+ ### `paths` (required)
307
+
308
+ Array of non-structural, directional relationships (movement arrows, etc.). May be empty.
309
+
310
+ ```json
311
+ {
312
+ "from": 5,
313
+ "to": 2,
314
+ "direction": "forward",
315
+ "type": "movement"
316
+ }
317
+ ```
318
+
319
+ | Field | Type | Description |
320
+ |-------|------|-------------|
321
+ | `from` | integer | Source node ID |
322
+ | `to` | integer | Target node ID |
323
+ | `direction` | string | `"forward"` (->), `"backward"` (<-), or `"bidirectional"` (<->) |
324
+ | `type` | string | Relationship type (open-ended) |
325
+
326
+ ### `layers` (Layered level)
327
+
328
+ Array of independent information layers. Each layer adds structured data to existing nodes and/or edges without modifying the Core structure. Absent at Core and Rendered levels.
329
+
330
+ ```json
331
+ {
332
+ "layers": [
333
+ {
334
+ "id": "semantics",
335
+ "label": "Semantic roles",
336
+ "description": "Thematic role assignments for each predicate-argument relation",
337
+ "node_data": {
338
+ "3": { "theta_role": "agent", "animacy": "animate" },
339
+ "9": { "theta_role": "theme", "animacy": "inanimate" }
340
+ },
341
+ "edge_data": {
342
+ "0": { "semantic_type": "predication" }
343
+ }
344
+ },
345
+ {
346
+ "id": "prosody",
347
+ "label": "Prosodic structure",
348
+ "description": "Prosodic phrasing and prominence",
349
+ "node_data": {
350
+ "3": { "prosodic_word": "PWd1", "prominence": "primary" }
351
+ }
352
+ }
353
+ ]
354
+ }
355
+ ```
356
+
357
+ #### Layer fields
358
+
359
+ | Field | Type | Description |
360
+ |-------|------|-------------|
361
+ | `id` | string | Unique layer identifier |
362
+ | `label` | string | Human-readable layer name |
363
+ | `description` | string or null | Optional description of the layer's content and purpose |
364
+ | `node_data` | object or null | Keyed by node ID (as string). Values are arbitrary objects |
365
+ | `edge_data` | object or null | Keyed by edge index (as string). Values are arbitrary objects |
366
+
367
+ **Design notes:**
368
+
369
+ - Layer data is keyed by node/edge ID, not embedded within node/edge objects, to preserve the Core structure untouched.
370
+ - The value objects within `node_data` and `edge_data` are schema-free; each layer defines its own semantics.
371
+ - Layers can reference each other via edge `type: "correspondence"` in the main `edges` array, or via shared node IDs.
372
+ - A layer can also introduce its own nodes and edges (e.g., a semantic structure with different topology), stored in separate `nodes` and `edges` fields within the layer. This is reserved for future specification.
373
+
374
+ ## Design principles
375
+
376
+ ### Self-containedness
377
+
378
+ The `nodes`, `edges`, and `paths` sections contain all information needed to render or analyze the structure. No consumer should need to parse `meta.source.input` or understand any source notation.
379
+
380
+ ### Resolved values
381
+
382
+ Style properties (Rendered level) are stored as resolved values, not symbolic references:
383
+ - Colors: `"#0072B2"` not `"modern"`
384
+ - Enclosures: `"brackets"` not `"#"`
385
+ - Decorations: `["bold", "subscript"]` not `"**_text_**"`
386
+
387
+ ### Extensibility
388
+
389
+ - New fields can be added to any object without breaking existing consumers
390
+ - New `type` values for `edges` and `paths` can be introduced freely
391
+ - The `layers` mechanism provides open-ended data composition without schema changes
392
+ - The `lsif.version` field tracks breaking changes
393
+
394
+ ### Graceful degradation
395
+
396
+ Consumers should process only the fields and levels they understand, and ignore the rest. A Rendered-level consumer receiving a Core-level document should be able to compute its own layout. A Core-level consumer receiving a Rendered-level document should ignore `position`, `style`, and other Rendered fields.
397
+
398
+ ### Versioning policy
399
+
400
+ - **Patch** (0.x.y): documentation clarifications, new optional fields
401
+ - **Minor** (0.x.0): new conformance-level features, new conventional type values
402
+ - **Major** (x.0.0): breaking changes to existing field semantics or removal of fields
403
+
404
+ ## RSyntaxTree output
405
+
406
+ RSyntaxTree always produces **Rendered**-level LSIF with `lsif.level` set to `"rendered"`. All Core and Rendered fields are populated. The `layers` section is not included.
407
+
408
+ ## Complete example (Rendered level)
409
+
410
+ ```json
411
+ {
412
+ "lsif": {
413
+ "version": "0.2.0",
414
+ "generator": "rsyntaxtree 1.4.0",
415
+ "level": "rendered"
416
+ },
417
+ "meta": {
418
+ "source": {
419
+ "format": "rsyntaxtree-bracket",
420
+ "input": "[TP [DP_i_ John] [VP [V sleeps]]]",
421
+ "params": {
422
+ "font_style": "sans",
423
+ "font_size": 16,
424
+ "color": "modern"
425
+ }
426
+ }
427
+ },
428
+ "geometry": {
429
+ "width": 280.0,
430
+ "height": 240.0
431
+ },
432
+ "nodes": [
433
+ {
434
+ "id": 1,
435
+ "type": "node",
436
+ "level": 0,
437
+ "label": {
438
+ "raw": "TP",
439
+ "lines": [{ "segments": [{ "text": "TP", "decorations": [] }] }]
440
+ },
441
+ "position": { "x": 100.0, "y": 20.0, "content_width": 30.0, "content_height": 22.0, "subtree_width": 280.0 },
442
+ "style": { "color": "#0072B2", "enclosure": "none", "triangle": false },
443
+ "parent": null,
444
+ "children": [2, 4]
445
+ },
446
+ {
447
+ "id": 2,
448
+ "type": "node",
449
+ "level": 1,
450
+ "label": {
451
+ "raw": "DP_i_",
452
+ "lines": [{ "segments": [{ "text": "DP", "decorations": [] }, { "text": "i", "decorations": ["subscript"] }] }]
453
+ },
454
+ "position": { "x": 30.0, "y": 80.0, "content_width": 35.0, "content_height": 22.0, "subtree_width": 100.0 },
455
+ "style": { "color": "#0072B2", "enclosure": "none", "triangle": false },
456
+ "parent": 1,
457
+ "children": [3]
458
+ },
459
+ {
460
+ "id": 3,
461
+ "type": "leaf",
462
+ "level": 2,
463
+ "label": {
464
+ "raw": "John",
465
+ "lines": [{ "segments": [{ "text": "John", "decorations": [] }] }]
466
+ },
467
+ "position": { "x": 30.0, "y": 140.0, "content_width": 40.0, "content_height": 22.0, "subtree_width": 100.0 },
468
+ "style": { "color": "#009E73", "enclosure": "none", "triangle": false },
469
+ "parent": 2,
470
+ "children": []
471
+ },
472
+ {
473
+ "id": 4,
474
+ "type": "node",
475
+ "level": 1,
476
+ "label": {
477
+ "raw": "VP",
478
+ "lines": [{ "segments": [{ "text": "VP", "decorations": [] }] }]
479
+ },
480
+ "position": { "x": 170.0, "y": 80.0, "content_width": 30.0, "content_height": 22.0, "subtree_width": 180.0 },
481
+ "style": { "color": "#0072B2", "enclosure": "none", "triangle": false },
482
+ "parent": 1,
483
+ "children": [5]
484
+ },
485
+ {
486
+ "id": 5,
487
+ "type": "node",
488
+ "level": 2,
489
+ "label": {
490
+ "raw": "V",
491
+ "lines": [{ "segments": [{ "text": "V", "decorations": [] }] }]
492
+ },
493
+ "position": { "x": 175.0, "y": 140.0, "content_width": 20.0, "content_height": 22.0, "subtree_width": 100.0 },
494
+ "style": { "color": "#0072B2", "enclosure": "none", "triangle": false },
495
+ "parent": 4,
496
+ "children": [6]
497
+ },
498
+ {
499
+ "id": 6,
500
+ "type": "leaf",
501
+ "level": 3,
502
+ "label": {
503
+ "raw": "sleeps",
504
+ "lines": [{ "segments": [{ "text": "sleeps", "decorations": [] }] }]
505
+ },
506
+ "position": { "x": 155.0, "y": 200.0, "content_width": 60.0, "content_height": 22.0, "subtree_width": 100.0 },
507
+ "style": { "color": "#009E73", "enclosure": "none", "triangle": false },
508
+ "parent": 5,
509
+ "children": []
510
+ }
511
+ ],
512
+ "edges": [
513
+ { "from": 1, "to": 2, "type": "dominance", "connector": "line" },
514
+ { "from": 1, "to": 4, "type": "dominance", "connector": "line" },
515
+ { "from": 2, "to": 3, "type": "dominance", "connector": "line" },
516
+ { "from": 4, "to": 5, "type": "dominance", "connector": "line" },
517
+ { "from": 5, "to": 6, "type": "dominance", "connector": "line" }
518
+ ],
519
+ "paths": []
520
+ }
521
+ ```
data/.gitignore CHANGED
@@ -7,6 +7,7 @@ Gemfile.lock
7
7
  *.bak
8
8
  *.~
9
9
  *.log
10
+ *.gem
10
11
  log/
11
12
  logs/
12
13
 
data/.ruby-version CHANGED
@@ -1 +1 @@
1
- 4.0.0
1
+ 4.0.1
data/CHANGELOG.md CHANGED
@@ -1,21 +1,35 @@
1
1
  # Changelog
2
2
 
3
+ ## [1.5.0] - 2026-04
4
+
5
+ ### Added
6
+ - Left-to-right tree layout (`-d ltr` / `--direction ltr`)
7
+ - LSIF `geometry.direction` field for layout direction
8
+ - LTR-aware path drawing (movement arrows route rightward in ⊃ shape)
9
+ - LTR-aware line-type connections (direct lines between nodes)
10
+ - Examples 058-063: LTR versions of classification trees and vP-shell with paths
11
+
12
+ ### Fixed
13
+ - Node label overlap when adjacent subtrees have long labels
14
+
15
+ ### Improved
16
+ - TTB path bulge proportional to endpoint distance (reduced excess)
17
+
3
18
  ## [1.4.0] - 2026-01
4
19
 
5
20
  ### Added
21
+ - LSIF (Linguistic Structure Interchange Format) JSON output (`-f lsif`)
6
22
  - Per-node coloring with `@color:` syntax (named colors and hex colors)
7
23
  - Penn Treebank format support with escaped parentheses (`\(`, `\)`)
8
24
  - Standard input support for piping tree data
9
25
  - Configuration file support (`.rsyntaxtreerc`)
10
26
  - Config file validation with helpful error messages
11
27
 
12
- ### Fixed
13
- - Triangle connector now correctly draws from parent to child (not child to grandchild)
14
-
15
28
  ### Documentation
16
29
  - Added TikZ output limitations section
17
30
  - Improved README with Features section
18
31
  - Added examples for per-node coloring (054, 055, 056)
32
+ - Added example 057: Subscript and superscript demo
19
33
 
20
34
  ## [1.3.2] - 2024
21
35
 
data/README.md CHANGED
@@ -58,7 +58,7 @@ Connect nodes with lines or arrows:
58
58
 
59
59
  ### Multiple Output Formats
60
60
 
61
- Generate trees in PNG, SVG, PDF, JPG, or GIF format.
61
+ Generate trees in PNG, SVG, PDF, JPG, GIF, or LSIF (JSON) format.
62
62
 
63
63
  ## Web Interface
64
64
 
@@ -143,13 +143,14 @@ Usage:
143
143
  | Option | Description | Default |
144
144
  |--------|-------------|---------|
145
145
  | `-o, --outdir` | Output directory | `./` |
146
- | `-f, --format` | Output format: png, gif, jpg, pdf, svg | `png` |
146
+ | `-f, --format` | Output format: png, gif, jpg, pdf, svg, lsif | `png` |
147
147
  | `-l, --leafstyle` | Leaf style: auto, triangle, bar, nothing | `auto` |
148
148
  | `-n, --fontstyle` | Font style: sans, serif, cjk, mono | `sans` |
149
149
  | `-s, --fontsize` | Font size: 8-26 | `16` |
150
150
  | `-c, --color` | Color mode: modern, traditional, off | `modern` |
151
151
  | `-y, --symmetrize` | Symmetrical tree: on, off | `off` |
152
152
  | `-p, --polyline` | Polyline connectors: on, off | `off` |
153
+ | `-d, --direction` | Tree layout direction: ttb, ltr | `ttb` |
153
154
 
154
155
  Run `rsyntaxtree -h` for the full list of options.
155
156
 
data/bin/rsyntaxtree CHANGED
@@ -12,7 +12,7 @@ require_relative '../lib/rsyntaxtree/format_converter'
12
12
  CONFIG_VALIDATORS = {
13
13
  outdir: ->(v) { FileTest.directory?(v) ? nil : "must be an existing directory path" },
14
14
  outfilename: ->(v) { v.is_a?(String) ? nil : "must be a string" },
15
- format: ->(v) { /\A(png|jpg|gif|pdf|svg)\z/ =~ v.to_s ? nil : "must be png, jpg, gif, pdf, or svg" },
15
+ format: ->(v) { /\A(png|jpg|gif|pdf|svg|lsif)\z/ =~ v.to_s ? nil : "must be png, jpg, gif, pdf, svg, or lsif" },
16
16
  leafstyle: ->(v) { /\A(auto|triangle|bar|nothing)\z/ =~ v.to_s ? nil : "must be auto, triangle, bar, or nothing" },
17
17
  fontstyle: ->(v) { /\A(sans|serif|cjk|mono)\z/ =~ v.to_s ? nil : "must be sans, serif, cjk, or mono" },
18
18
  font: ->(v) { v.nil? || File.exist?(v) ? nil : "must be path to an existing ttf font" },
@@ -23,7 +23,8 @@ CONFIG_VALIDATORS = {
23
23
  symmetrize: ->(v) { /\A(on|off|true|false)\z/ =~ v.to_s ? nil : "must be on or off" },
24
24
  transparent: ->(v) { /\A(on|off|true|false)\z/ =~ v.to_s ? nil : "must be on or off" },
25
25
  polyline: ->(v) { /\A(on|off|true|false)\z/ =~ v.to_s ? nil : "must be on or off" },
26
- hide_default_connectors: ->(v) { /\A(on|off|true|false)\z/ =~ v.to_s ? nil : "must be on or off" }
26
+ hide_default_connectors: ->(v) { /\A(on|off|true|false)\z/ =~ v.to_s ? nil : "must be on or off" },
27
+ direction: ->(v) { /\A(ttb|ltr)\z/ =~ v.to_s ? nil : "must be ttb or ltr" }
27
28
  }.freeze
28
29
 
29
30
  # Validate configuration and return errors/warnings
@@ -97,7 +98,7 @@ opts = Optimist.options do
97
98
 
98
99
  opt :outdir, "Output directory", default: "./"
99
100
  opt :outfilename, "Output file base name", default: "syntree"
100
- opt :format, "Output format: png, jpg, gif, pdf, or svg", default: "png"
101
+ opt :format, "Output format: png, jpg, gif, pdf, svg, or lsif", default: "png"
101
102
  opt :leafstyle, "visual style of tree leaves: auto, triangle, bar, or nothing", default: "auto"
102
103
  opt :fontstyle, "Font style (available when ttf font is specified): sans, serif, cjk, mono", default: "sans"
103
104
  opt :font, "Path to a ttf font used to generate tree (optional)", type: String
@@ -109,11 +110,13 @@ opts = Optimist.options do
109
110
  opt :transparent, "Make background transparent: on or off", default: "off"
110
111
  opt :polyline, "draw polyline connectors: on or off", default: "off"
111
112
  opt :hide_default_connectors, "make default connectors transparent: on or off", default: "off"
113
+ opt :direction, "Tree layout direction: ttb (top-to-bottom) or ltr (left-to-right)", default: "ttb"
112
114
  opt :help, "This is a custom help message", short: :h
113
115
  end
114
116
 
115
117
  Optimist.die :outdir, "must be an exsting directory path" unless FileTest.directory?(opts[:outdir])
116
- Optimist.die :format, "must be png, pdf, or svg" unless /\A(png|jpg|gif|pdf|svg)\z/ =~ opts[:format]
118
+ Optimist.die :direction, "must be ttb or ltr" unless /\A(ttb|ltr)\z/ =~ opts[:direction]
119
+ Optimist.die :format, "must be png, pdf, svg, or lsif" unless /\A(png|jpg|gif|pdf|svg|lsif)\z/ =~ opts[:format]
117
120
  Optimist.die :leafstyle, "must be auto, triangle, bar, or nothing" unless /\A(auto|triangle|bar|nothing)\z/ =~ opts[:leafstyle]
118
121
  Optimist.die :fontstyle, "must be sans, serif, cjk, or mono" unless /\A(sans|serif|cjk|mono)\z/ =~ opts[:fontstyle]
119
122
  Optimist.die :font, "must be path to an existing ttf font" if opts[:font] && !File.exist?(opts[:font])
@@ -170,7 +173,8 @@ begin
170
173
  rsg = RSyntaxTree::RSGenerator.new(string_opts)
171
174
  ext = string_opts[:format]
172
175
  outfilename = string_opts[:outfilename] || "syntree"
173
- filepath = File.expand_path(string_opts[:outdir]) + "/#{outfilename}." + ext
176
+ file_ext = ext == "lsif" ? "lsif.json" : ext
177
+ filepath = File.expand_path(string_opts[:outdir]) + "/#{outfilename}.#{file_ext}"
174
178
  draw_method = "draw_#{ext}"
175
179
  output = rsg.send(draw_method)
176
180
  File.binwrite(filepath, output)
@@ -54,6 +54,8 @@ Dir.glob("*.md", base: examples_dir).map do |md|
54
54
  opts[:symmetrize] = value
55
55
  when "connector"
56
56
  opts[:leafstyle] = value
57
+ when "direction"
58
+ opts[:direction] = value
57
59
  when "font"
58
60
  opts[:fontstyle] = case value
59
61
  when /mono/i
@@ -1,5 +1,6 @@
1
1
  ---
2
2
  name: "010"
3
+ caption: "vP-shell with movement paths"
3
4
  category: "Generative Grammar"
4
5
  polyline: "off"
5
6
  color: "modern"