rsyntaxtree 1.3.2 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (174) hide show
  1. checksums.yaml +4 -4
  2. data/.claude/docs/json-schema-spec.md +521 -0
  3. data/.github/FUNDING.yml +6 -0
  4. data/.gitignore +2 -0
  5. data/.ruby-version +1 -0
  6. data/CHANGELOG.md +48 -0
  7. data/Gemfile +6 -0
  8. data/LICENSE +21 -0
  9. data/README.md +116 -52
  10. data/bin/rsyntaxtree +113 -21
  11. data/dev/generate_examples.rb +2 -0
  12. data/docs/_examples/010.md +1 -0
  13. data/docs/_examples/054.md +22 -0
  14. data/docs/_examples/055.md +24 -0
  15. data/docs/_examples/056.md +25 -0
  16. data/docs/_examples/057.md +34 -0
  17. data/docs/_examples/058.md +47 -0
  18. data/docs/_examples/059.md +54 -0
  19. data/docs/_examples/060.md +72 -0
  20. data/docs/_examples/061.md +97 -0
  21. data/docs/_examples/062.md +96 -0
  22. data/docs/_examples/063.md +53 -0
  23. data/docs/assets/img/000.png +0 -0
  24. data/docs/assets/img/001.png +0 -0
  25. data/docs/assets/img/002.png +0 -0
  26. data/docs/assets/img/003.png +0 -0
  27. data/docs/assets/img/004.png +0 -0
  28. data/docs/assets/img/005.png +0 -0
  29. data/docs/assets/img/006.png +0 -0
  30. data/docs/assets/img/007.png +0 -0
  31. data/docs/assets/img/008.png +0 -0
  32. data/docs/assets/img/009.png +0 -0
  33. data/docs/assets/img/010.png +0 -0
  34. data/docs/assets/img/011.png +0 -0
  35. data/docs/assets/img/012.png +0 -0
  36. data/docs/assets/img/013.png +0 -0
  37. data/docs/assets/img/014.png +0 -0
  38. data/docs/assets/img/015.png +0 -0
  39. data/docs/assets/img/016.png +0 -0
  40. data/docs/assets/img/017.png +0 -0
  41. data/docs/assets/img/018.png +0 -0
  42. data/docs/assets/img/019.png +0 -0
  43. data/docs/assets/img/020.png +0 -0
  44. data/docs/assets/img/021.png +0 -0
  45. data/docs/assets/img/022.png +0 -0
  46. data/docs/assets/img/023.png +0 -0
  47. data/docs/assets/img/024.png +0 -0
  48. data/docs/assets/img/025.png +0 -0
  49. data/docs/assets/img/026.png +0 -0
  50. data/docs/assets/img/027.png +0 -0
  51. data/docs/assets/img/028.png +0 -0
  52. data/docs/assets/img/029.png +0 -0
  53. data/docs/assets/img/030.png +0 -0
  54. data/docs/assets/img/031.png +0 -0
  55. data/docs/assets/img/032.png +0 -0
  56. data/docs/assets/img/033.png +0 -0
  57. data/docs/assets/img/034.png +0 -0
  58. data/docs/assets/img/035.png +0 -0
  59. data/docs/assets/img/036.png +0 -0
  60. data/docs/assets/img/037.png +0 -0
  61. data/docs/assets/img/038.png +0 -0
  62. data/docs/assets/img/039.png +0 -0
  63. data/docs/assets/img/040.png +0 -0
  64. data/docs/assets/img/041.png +0 -0
  65. data/docs/assets/img/042.png +0 -0
  66. data/docs/assets/img/043.png +0 -0
  67. data/docs/assets/img/044.png +0 -0
  68. data/docs/assets/img/045.png +0 -0
  69. data/docs/assets/img/046.png +0 -0
  70. data/docs/assets/img/047.png +0 -0
  71. data/docs/assets/img/048.png +0 -0
  72. data/docs/assets/img/049.png +0 -0
  73. data/docs/assets/img/050.png +0 -0
  74. data/docs/assets/img/051.png +0 -0
  75. data/docs/assets/img/052.png +0 -0
  76. data/docs/assets/img/053.png +0 -0
  77. data/docs/assets/img/054.png +0 -0
  78. data/docs/assets/img/055.png +0 -0
  79. data/docs/assets/img/056.png +0 -0
  80. data/docs/assets/img/057.png +0 -0
  81. data/docs/assets/img/058.png +0 -0
  82. data/docs/assets/img/059.png +0 -0
  83. data/docs/assets/img/060.png +0 -0
  84. data/docs/assets/img/061.png +0 -0
  85. data/docs/assets/img/062.png +0 -0
  86. data/docs/assets/img/063.png +0 -0
  87. data/docs/assets/svg/000.svg +26 -26
  88. data/docs/assets/svg/001.svg +19 -19
  89. data/docs/assets/svg/002.svg +37 -37
  90. data/docs/assets/svg/003.svg +29 -29
  91. data/docs/assets/svg/004.svg +41 -41
  92. data/docs/assets/svg/005.svg +21 -21
  93. data/docs/assets/svg/006.svg +25 -25
  94. data/docs/assets/svg/007.svg +31 -31
  95. data/docs/assets/svg/008.svg +35 -35
  96. data/docs/assets/svg/009.svg +35 -35
  97. data/docs/assets/svg/010.svg +50 -52
  98. data/docs/assets/svg/011.svg +30 -30
  99. data/docs/assets/svg/012.svg +35 -35
  100. data/docs/assets/svg/013.svg +104 -104
  101. data/docs/assets/svg/014.svg +80 -80
  102. data/docs/assets/svg/015.svg +28 -28
  103. data/docs/assets/svg/016.svg +54 -54
  104. data/docs/assets/svg/017.svg +35 -35
  105. data/docs/assets/svg/018.svg +37 -37
  106. data/docs/assets/svg/019.svg +95 -95
  107. data/docs/assets/svg/020.svg +53 -53
  108. data/docs/assets/svg/021.svg +42 -42
  109. data/docs/assets/svg/022.svg +68 -68
  110. data/docs/assets/svg/023.svg +32 -32
  111. data/docs/assets/svg/024.svg +23 -23
  112. data/docs/assets/svg/025.svg +99 -99
  113. data/docs/assets/svg/026.svg +19 -19
  114. data/docs/assets/svg/027.svg +50 -50
  115. data/docs/assets/svg/028.svg +21 -21
  116. data/docs/assets/svg/029.svg +69 -69
  117. data/docs/assets/svg/030.svg +35 -35
  118. data/docs/assets/svg/031.svg +15 -15
  119. data/docs/assets/svg/032.svg +40 -41
  120. data/docs/assets/svg/033.svg +38 -38
  121. data/docs/assets/svg/034.svg +40 -40
  122. data/docs/assets/svg/035.svg +39 -39
  123. data/docs/assets/svg/036.svg +17 -17
  124. data/docs/assets/svg/037.svg +26 -26
  125. data/docs/assets/svg/038.svg +39 -39
  126. data/docs/assets/svg/039.svg +7 -7
  127. data/docs/assets/svg/040.svg +67 -67
  128. data/docs/assets/svg/041.svg +49 -49
  129. data/docs/assets/svg/042.svg +15 -15
  130. data/docs/assets/svg/043.svg +127 -127
  131. data/docs/assets/svg/044.svg +2 -2
  132. data/docs/assets/svg/045.svg +56 -56
  133. data/docs/assets/svg/046.svg +647 -647
  134. data/docs/assets/svg/047.svg +26 -26
  135. data/docs/assets/svg/048.svg +69 -69
  136. data/docs/assets/svg/049.svg +44 -44
  137. data/docs/assets/svg/050.svg +100 -100
  138. data/docs/assets/svg/051.svg +96 -96
  139. data/docs/assets/svg/052.svg +50 -50
  140. data/docs/assets/svg/053.svg +41 -41
  141. data/docs/assets/svg/054.svg +51 -0
  142. data/docs/assets/svg/055.svg +54 -0
  143. data/docs/assets/svg/056.svg +71 -0
  144. data/docs/assets/svg/057.svg +91 -0
  145. data/docs/assets/svg/058.svg +95 -0
  146. data/docs/assets/svg/059.svg +111 -0
  147. data/docs/assets/svg/060.svg +134 -0
  148. data/docs/assets/svg/061.svg +193 -0
  149. data/docs/assets/svg/062.svg +179 -0
  150. data/docs/assets/svg/063.svg +94 -0
  151. data/docs/documentation.md +98 -0
  152. data/docs/documentation_ja.md +99 -1
  153. data/lib/rsyntaxtree/base_graph.rb +99 -9
  154. data/lib/rsyntaxtree/element.rb +6 -4
  155. data/lib/rsyntaxtree/elementlist.rb +3 -5
  156. data/lib/rsyntaxtree/format_converter.rb +65 -0
  157. data/lib/rsyntaxtree/lsif_graph.rb +298 -0
  158. data/lib/rsyntaxtree/markup_parser.rb +13 -2
  159. data/lib/rsyntaxtree/string_parser.rb +6 -6
  160. data/lib/rsyntaxtree/svg_graph.rb +210 -95
  161. data/lib/rsyntaxtree/tikz_generator.rb +131 -0
  162. data/lib/rsyntaxtree/utils.rb +1 -1
  163. data/lib/rsyntaxtree/version.rb +1 -1
  164. data/lib/rsyntaxtree.rb +52 -25
  165. data/rsyntaxtree.gemspec +2 -0
  166. data/syntree.svg +41 -0
  167. data/test/cli_test.rb +262 -0
  168. data/test/example_verify_test.rb +2 -0
  169. data/test/format_converter_test.rb +129 -0
  170. data/test/lsif_test.rb +290 -0
  171. data/test/node_styling_test.rb +239 -0
  172. data/test/overlap_test.rb +122 -0
  173. data/test/tikz_test.rb +89 -0
  174. metadata +76 -6
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b1d293f1051661467a4b39531bb7868073256fc6a40e60affa4dee8daa45187f
4
- data.tar.gz: 79ececb16b1b9394f5027ccec1f4dcba9d4cd541fe8829276eb9c18d5a377c24
3
+ metadata.gz: e76c90ef8a999398071a513ebb2252ed2ef4db01c2fcfa1873c0b905a87c8e56
4
+ data.tar.gz: 81b4e5e68126540111ee2533f4c8d807dac86a31eece4595718634edd40c989f
5
5
  SHA512:
6
- metadata.gz: cd4e3ceb0efc58a58b87b4273525aa9456e6050c5b7dfe41bcc0b790018240d2e1c1a0f34a42c45f78faf6913441f41a57aef30f60daab8ce7cfc24182a8b3ce
7
- data.tar.gz: 76b44cc2c343a43714ab9cfe52c3a04c524fc81296e901796e0b2aa7b8205b73de4b2a88001d269a5fa834a9267e62e3d2a034b2fbde7d7ca471be90d41524ae
6
+ metadata.gz: 54ae414b09823cf07637535896d47f0a639cf63d3142997bf13a3d08bd6dc09dee156df1c1cccf48d3acafa3fe8138c58fccfc0d411787843fbd6f4667a85cf9
7
+ data.tar.gz: 6f407aa8d12d08d6e1a75b0d10ced8a6cdbcf42739ef7fbf2a1a4d71e34d94ce36c00711ccb6a0c57022e5eb0763aae2d3d50a6ac3f0f3779aa9b42c90a37dd3
@@ -0,0 +1,521 @@
1
+ # LSIF: Linguistic Structure Interchange Format
2
+
3
+ Version: 0.2.0 (Draft)
4
+ Date: 2026-03-30
5
+
6
+ ## Overview
7
+
8
+ LSIF (Linguistic Structure Interchange Format) is a JSON-based interchange format for linguistic structural data. It is designed to be:
9
+
10
+ - **Self-contained**: consumers can fully reconstruct and render structures without parsing source notation
11
+ - **Tool-agnostic**: any tool — including LLMs — can produce or consume this format
12
+ - **Extensible**: new relationship types, data layers, and metadata can be added without breaking existing consumers
13
+ - **Structure-neutral**: not limited to trees or to syntax; can represent any linguistic structure (semantic networks, discourse structures, phonological representations, dependency graphs, etc.)
14
+ - **Layer-composable**: multiple independent information layers can coexist within a single document
15
+
16
+ ## File extension
17
+
18
+ `.lsif.json`
19
+
20
+ ## Primary use cases
21
+
22
+ 1. Multi-plane 3D visualization (shear-transformed parallel/perpendicular planes)
23
+ 2. Programmatic analysis of linguistic structures
24
+ 3. Interoperability between linguistic tools and LLMs
25
+ 4. Multi-layer structural composition (syntax, semantics, prosody, discourse, etc.)
26
+
27
+ ## Conformance levels
28
+
29
+ LSIF defines three conformance levels. Each level extends the previous one. The levels are orthogonal in the sense that **Rendered** and **Layered** can be combined independently with **Core**.
30
+
31
+ ### Core
32
+
33
+ The minimal structural representation. Contains only node identities, labels, and relationships.
34
+
35
+ **Required fields**: `lsif`, `nodes` (with `id` and `label`), `edges`, `paths`
36
+
37
+ **Use cases**: LLM-generated structures, partial structure descriptions, structural exchange between tools without visual rendering.
38
+
39
+ A Core-level LSIF represents the abstract structure itself — not necessarily a tree, not necessarily syntactic. It could be a semantic network, a discourse graph, or any set of labeled nodes and typed relationships.
40
+
41
+ ### Rendered
42
+
43
+ Core plus visual layout information. Adds geometry, position, style, and tree-convenience fields.
44
+
45
+ **Additional fields**: `geometry`, `nodes[].position`, `nodes[].style`, `nodes[].type`, `nodes[].level`, `nodes[].parent`, `nodes[].children`
46
+
47
+ **Use cases**: output from rendering tools (e.g. RSyntaxTree), input to visualization tools.
48
+
49
+ ### Layered
50
+
51
+ Core or Rendered plus additional information layers. Each layer is an independent, self-describing data plane that adds structured information to existing nodes and edges.
52
+
53
+ **Additional fields**: `layers`
54
+
55
+ **Use cases**: multi-stratal linguistic descriptions, LLM-assisted structure enrichment, multi-plane visualization.
56
+
57
+ ## Schema
58
+
59
+ ### Top-level structure
60
+
61
+ ```json
62
+ {
63
+ "lsif": { ... },
64
+ "meta": { ... },
65
+ "geometry": { ... },
66
+ "nodes": [ ... ],
67
+ "edges": [ ... ],
68
+ "paths": [ ... ],
69
+ "layers": [ ... ]
70
+ }
71
+ ```
72
+
73
+ ### `lsif` (required)
74
+
75
+ Format identification and version.
76
+
77
+ ```json
78
+ {
79
+ "version": "0.2.0",
80
+ "generator": "rsyntaxtree 1.4.0",
81
+ "level": "rendered"
82
+ }
83
+ ```
84
+
85
+ | Field | Type | Required | Description |
86
+ |-------|------|----------|-------------|
87
+ | `version` | string | Yes | LSIF schema version (semver) |
88
+ | `generator` | string or null | Yes | Tool/agent that produced this file. `null` if hand-authored |
89
+ | `level` | string | No | Conformance level: `"core"`, `"rendered"`, or `"layered"`. Defaults to `"core"` if omitted |
90
+
91
+ ### `meta` (optional)
92
+
93
+ Provenance information. Consumers MUST NOT depend on this section for rendering or analysis. It exists solely for round-trip editing and debugging.
94
+
95
+ ```json
96
+ {
97
+ "source": {
98
+ "format": "rsyntaxtree-bracket",
99
+ "input": "[TP [DP_i_ John] [T' [T__0__ pres] [VP ...]]]",
100
+ "params": {
101
+ "font_style": "serif",
102
+ "font_size": 16,
103
+ "color": "modern",
104
+ "connector": "auto",
105
+ "connector_height": 2.0,
106
+ "line_width": 1,
107
+ "symmetrize": false,
108
+ "polyline": false,
109
+ "hide_default_connectors": false,
110
+ "transparent": false
111
+ }
112
+ }
113
+ }
114
+ ```
115
+
116
+ | Field | Type | Description |
117
+ |-------|------|-------------|
118
+ | `source` | object or null | Original input data. `null` if not from a specific tool |
119
+ | `source.format` | string | Input format identifier (e.g. `"rsyntaxtree-bracket"`, `"penn-treebank"`, `"llm-generated"`) |
120
+ | `source.input` | string | Raw input text |
121
+ | `source.params` | object | Rendering parameters as passed to the generator |
122
+
123
+ ### `geometry` (Rendered level)
124
+
125
+ Bounding box of the rendered structure. Required at Rendered level; absent at Core level.
126
+
127
+ ```json
128
+ {
129
+ "width": 540.0,
130
+ "height": 480.0,
131
+ "direction": "ttb"
132
+ }
133
+ ```
134
+
135
+ | Field | Type | Description |
136
+ |-------|------|-------------|
137
+ | `width` | number | Total width of the rendered structure (px) |
138
+ | `height` | number | Total height of the rendered structure (px) |
139
+ | `direction` | string | Layout direction: `"ttb"` (top-to-bottom, default), `"ltr"` (left-to-right). Future: `"rtl"`, `"btt"`. Omit or `"ttb"` for default |
140
+
141
+ ### `nodes` (required)
142
+
143
+ Array of all nodes. At minimum, each node must have `id` and `label`.
144
+
145
+ #### Core-level node (minimal)
146
+
147
+ ```json
148
+ {
149
+ "id": 1,
150
+ "label": {
151
+ "raw": "VP",
152
+ "lines": [
153
+ { "segments": [{ "text": "VP", "decorations": [] }] }
154
+ ]
155
+ }
156
+ }
157
+ ```
158
+
159
+ #### Rendered-level node (full)
160
+
161
+ ```json
162
+ {
163
+ "id": 1,
164
+ "type": "node",
165
+ "level": 0,
166
+ "label": {
167
+ "raw": "DP_i_",
168
+ "lines": [
169
+ {
170
+ "segments": [
171
+ { "text": "DP", "decorations": [] },
172
+ { "text": "i", "decorations": ["subscript"] }
173
+ ]
174
+ }
175
+ ]
176
+ },
177
+ "position": {
178
+ "x": 50.0,
179
+ "y": 80.0,
180
+ "content_width": 35.0,
181
+ "content_height": 22.0,
182
+ "subtree_width": 100.0
183
+ },
184
+ "style": {
185
+ "color": "#0072B2",
186
+ "enclosure": "none",
187
+ "triangle": false
188
+ },
189
+ "parent": null,
190
+ "children": [2, 3]
191
+ }
192
+ ```
193
+
194
+ #### Node fields
195
+
196
+ | Field | Type | Level | Description |
197
+ |-------|------|-------|-------------|
198
+ | `id` | integer | Core | Unique node identifier (1-based) |
199
+ | `label` | object | Core | Node label content |
200
+ | `type` | string | Rendered | `"node"` (internal) or `"leaf"` (terminal). Tree-specific |
201
+ | `level` | integer | Rendered | Depth in structure (0 = root). Tree-specific |
202
+ | `position` | object | Rendered | Layout coordinates and dimensions |
203
+ | `style` | object | Rendered | Resolved visual properties |
204
+ | `parent` | integer or null | Rendered | Parent node ID. Convenience field derived from `edges` |
205
+ | `children` | array of integer | Rendered | Child node IDs. Convenience field derived from `edges` |
206
+
207
+ #### `label` object
208
+
209
+ | Field | Type | Description |
210
+ |-------|------|-------------|
211
+ | `raw` | string | Original markup text (e.g. `"DP_i_"`) |
212
+ | `lines` | array | Array of line objects (for multi-line labels) |
213
+
214
+ #### `label.lines[]` element
215
+
216
+ | Field | Type | Description |
217
+ |-------|------|-------------|
218
+ | `segments` | array | Array of text segments within a single line |
219
+
220
+ #### `label.lines[].segments[]` element
221
+
222
+ | Field | Type | Description |
223
+ |-------|------|-------------|
224
+ | `text` | string | Plain text content |
225
+ | `decorations` | array of string | Applied decorations (see below) |
226
+
227
+ **Decoration values:**
228
+
229
+ | Value | Description |
230
+ |-------|-------------|
231
+ | `"bold"` | Bold text |
232
+ | `"italic"` | Italic text |
233
+ | `"bolditalic"` | Bold + italic |
234
+ | `"subscript"` | Subscript (smaller, lowered) |
235
+ | `"superscript"` | Superscript (smaller, raised) |
236
+ | `"small"` | Small text |
237
+ | `"overline"` | Line above text |
238
+ | `"underline"` | Line below text |
239
+ | `"linethrough"` | Strikethrough |
240
+ | `"box"` | Rectangular border around text |
241
+ | `"circle"` | Circular border around text |
242
+ | `"hatched"` | Hatched fill (combined with box/circle) |
243
+ | `"bar"` | Horizontal bar |
244
+ | `"bstroke"` | Bold stroke variant |
245
+ | `"arrow_to_l"` | Left-pointing arrow |
246
+ | `"arrow_to_r"` | Right-pointing arrow |
247
+
248
+ Multiple decorations can be combined (e.g. `["bold", "subscript"]`).
249
+
250
+ #### `position` object (Rendered level)
251
+
252
+ All coordinates are in the rendering coordinate space (origin at top-left, y increases downward).
253
+
254
+ | Field | Type | Description |
255
+ |-------|------|-------------|
256
+ | `x` | number | Horizontal position of the node label (left edge) |
257
+ | `y` | number | Vertical position of the node label (top edge) |
258
+ | `content_width` | number | Width of the label text/enclosure |
259
+ | `content_height` | number | Height of the label text/enclosure |
260
+ | `subtree_width` | number | Width of the subtree rooted at this node |
261
+
262
+ #### `style` object (Rendered level)
263
+
264
+ All values are resolved (not symbolic). Colors are hex strings or named CSS colors.
265
+
266
+ | Field | Type | Description |
267
+ |-------|------|-------------|
268
+ | `color` | string or null | Resolved node color (e.g. `"#0072B2"`, `"red"`). `null` for default |
269
+ | `enclosure` | string | `"none"`, `"brackets"`, `"rectangle"`, `"bold_rectangle"` |
270
+ | `triangle` | boolean | Whether this node uses a triangle connector to its parent |
271
+
272
+ ### `edges` (required)
273
+
274
+ Array of structural relationships. One entry per relationship. May be empty.
275
+
276
+ ```json
277
+ {
278
+ "from": 1,
279
+ "to": 2,
280
+ "type": "dominance",
281
+ "connector": "line"
282
+ }
283
+ ```
284
+
285
+ | Field | Type | Level | Description |
286
+ |-------|------|-------|-------------|
287
+ | `from` | integer | Core | Source node ID |
288
+ | `to` | integer | Core | Target node ID |
289
+ | `type` | string | Core | Relationship type |
290
+ | `connector` | string or null | Rendered | Visual connector type. `null` or absent at Core level |
291
+
292
+ **Edge type values (current and planned):**
293
+
294
+ | Value | Description |
295
+ |-------|-------------|
296
+ | `"dominance"` | Tree edge (parent governs child) |
297
+ | `"dependency"` | Dependency relation |
298
+ | `"correspondence"` | Inter-layer mapping |
299
+ | `"binding"` | Binding/coreference relation |
300
+ | `"agreement"` | Agreement relation |
301
+ | `"semantic_role"` | Semantic role assignment |
302
+ | `"discourse"` | Discourse relation |
303
+
304
+ The `type` field is open-ended: any string value is valid. The values above are conventionally defined; tools should use them where applicable.
305
+
306
+ ### `paths` (required)
307
+
308
+ Array of non-structural, directional relationships (movement arrows, etc.). May be empty.
309
+
310
+ ```json
311
+ {
312
+ "from": 5,
313
+ "to": 2,
314
+ "direction": "forward",
315
+ "type": "movement"
316
+ }
317
+ ```
318
+
319
+ | Field | Type | Description |
320
+ |-------|------|-------------|
321
+ | `from` | integer | Source node ID |
322
+ | `to` | integer | Target node ID |
323
+ | `direction` | string | `"forward"` (->), `"backward"` (<-), or `"bidirectional"` (<->) |
324
+ | `type` | string | Relationship type (open-ended) |
325
+
326
+ ### `layers` (Layered level)
327
+
328
+ Array of independent information layers. Each layer adds structured data to existing nodes and/or edges without modifying the Core structure. Absent at Core and Rendered levels.
329
+
330
+ ```json
331
+ {
332
+ "layers": [
333
+ {
334
+ "id": "semantics",
335
+ "label": "Semantic roles",
336
+ "description": "Thematic role assignments for each predicate-argument relation",
337
+ "node_data": {
338
+ "3": { "theta_role": "agent", "animacy": "animate" },
339
+ "9": { "theta_role": "theme", "animacy": "inanimate" }
340
+ },
341
+ "edge_data": {
342
+ "0": { "semantic_type": "predication" }
343
+ }
344
+ },
345
+ {
346
+ "id": "prosody",
347
+ "label": "Prosodic structure",
348
+ "description": "Prosodic phrasing and prominence",
349
+ "node_data": {
350
+ "3": { "prosodic_word": "PWd1", "prominence": "primary" }
351
+ }
352
+ }
353
+ ]
354
+ }
355
+ ```
356
+
357
+ #### Layer fields
358
+
359
+ | Field | Type | Description |
360
+ |-------|------|-------------|
361
+ | `id` | string | Unique layer identifier |
362
+ | `label` | string | Human-readable layer name |
363
+ | `description` | string or null | Optional description of the layer's content and purpose |
364
+ | `node_data` | object or null | Keyed by node ID (as string). Values are arbitrary objects |
365
+ | `edge_data` | object or null | Keyed by edge index (as string). Values are arbitrary objects |
366
+
367
+ **Design notes:**
368
+
369
+ - Layer data is keyed by node/edge ID, not embedded within node/edge objects, to preserve the Core structure untouched.
370
+ - The value objects within `node_data` and `edge_data` are schema-free; each layer defines its own semantics.
371
+ - Layers can reference each other via edge `type: "correspondence"` in the main `edges` array, or via shared node IDs.
372
+ - A layer can also introduce its own nodes and edges (e.g., a semantic structure with different topology), stored in separate `nodes` and `edges` fields within the layer. This is reserved for future specification.
373
+
374
+ ## Design principles
375
+
376
+ ### Self-containedness
377
+
378
+ The `nodes`, `edges`, and `paths` sections contain all information needed to render or analyze the structure. No consumer should need to parse `meta.source.input` or understand any source notation.
379
+
380
+ ### Resolved values
381
+
382
+ Style properties (Rendered level) are stored as resolved values, not symbolic references:
383
+ - Colors: `"#0072B2"` not `"modern"`
384
+ - Enclosures: `"brackets"` not `"#"`
385
+ - Decorations: `["bold", "subscript"]` not `"**_text_**"`
386
+
387
+ ### Extensibility
388
+
389
+ - New fields can be added to any object without breaking existing consumers
390
+ - New `type` values for `edges` and `paths` can be introduced freely
391
+ - The `layers` mechanism provides open-ended data composition without schema changes
392
+ - The `lsif.version` field tracks breaking changes
393
+
394
+ ### Graceful degradation
395
+
396
+ Consumers should process only the fields and levels they understand, and ignore the rest. A Rendered-level consumer receiving a Core-level document should be able to compute its own layout. A Core-level consumer receiving a Rendered-level document should ignore `position`, `style`, and other Rendered fields.
397
+
398
+ ### Versioning policy
399
+
400
+ - **Patch** (0.x.y): documentation clarifications, new optional fields
401
+ - **Minor** (0.x.0): new conformance-level features, new conventional type values
402
+ - **Major** (x.0.0): breaking changes to existing field semantics or removal of fields
403
+
404
+ ## RSyntaxTree output
405
+
406
+ RSyntaxTree always produces **Rendered**-level LSIF with `lsif.level` set to `"rendered"`. All Core and Rendered fields are populated. The `layers` section is not included.
407
+
408
+ ## Complete example (Rendered level)
409
+
410
+ ```json
411
+ {
412
+ "lsif": {
413
+ "version": "0.2.0",
414
+ "generator": "rsyntaxtree 1.4.0",
415
+ "level": "rendered"
416
+ },
417
+ "meta": {
418
+ "source": {
419
+ "format": "rsyntaxtree-bracket",
420
+ "input": "[TP [DP_i_ John] [VP [V sleeps]]]",
421
+ "params": {
422
+ "font_style": "sans",
423
+ "font_size": 16,
424
+ "color": "modern"
425
+ }
426
+ }
427
+ },
428
+ "geometry": {
429
+ "width": 280.0,
430
+ "height": 240.0
431
+ },
432
+ "nodes": [
433
+ {
434
+ "id": 1,
435
+ "type": "node",
436
+ "level": 0,
437
+ "label": {
438
+ "raw": "TP",
439
+ "lines": [{ "segments": [{ "text": "TP", "decorations": [] }] }]
440
+ },
441
+ "position": { "x": 100.0, "y": 20.0, "content_width": 30.0, "content_height": 22.0, "subtree_width": 280.0 },
442
+ "style": { "color": "#0072B2", "enclosure": "none", "triangle": false },
443
+ "parent": null,
444
+ "children": [2, 4]
445
+ },
446
+ {
447
+ "id": 2,
448
+ "type": "node",
449
+ "level": 1,
450
+ "label": {
451
+ "raw": "DP_i_",
452
+ "lines": [{ "segments": [{ "text": "DP", "decorations": [] }, { "text": "i", "decorations": ["subscript"] }] }]
453
+ },
454
+ "position": { "x": 30.0, "y": 80.0, "content_width": 35.0, "content_height": 22.0, "subtree_width": 100.0 },
455
+ "style": { "color": "#0072B2", "enclosure": "none", "triangle": false },
456
+ "parent": 1,
457
+ "children": [3]
458
+ },
459
+ {
460
+ "id": 3,
461
+ "type": "leaf",
462
+ "level": 2,
463
+ "label": {
464
+ "raw": "John",
465
+ "lines": [{ "segments": [{ "text": "John", "decorations": [] }] }]
466
+ },
467
+ "position": { "x": 30.0, "y": 140.0, "content_width": 40.0, "content_height": 22.0, "subtree_width": 100.0 },
468
+ "style": { "color": "#009E73", "enclosure": "none", "triangle": false },
469
+ "parent": 2,
470
+ "children": []
471
+ },
472
+ {
473
+ "id": 4,
474
+ "type": "node",
475
+ "level": 1,
476
+ "label": {
477
+ "raw": "VP",
478
+ "lines": [{ "segments": [{ "text": "VP", "decorations": [] }] }]
479
+ },
480
+ "position": { "x": 170.0, "y": 80.0, "content_width": 30.0, "content_height": 22.0, "subtree_width": 180.0 },
481
+ "style": { "color": "#0072B2", "enclosure": "none", "triangle": false },
482
+ "parent": 1,
483
+ "children": [5]
484
+ },
485
+ {
486
+ "id": 5,
487
+ "type": "node",
488
+ "level": 2,
489
+ "label": {
490
+ "raw": "V",
491
+ "lines": [{ "segments": [{ "text": "V", "decorations": [] }] }]
492
+ },
493
+ "position": { "x": 175.0, "y": 140.0, "content_width": 20.0, "content_height": 22.0, "subtree_width": 100.0 },
494
+ "style": { "color": "#0072B2", "enclosure": "none", "triangle": false },
495
+ "parent": 4,
496
+ "children": [6]
497
+ },
498
+ {
499
+ "id": 6,
500
+ "type": "leaf",
501
+ "level": 3,
502
+ "label": {
503
+ "raw": "sleeps",
504
+ "lines": [{ "segments": [{ "text": "sleeps", "decorations": [] }] }]
505
+ },
506
+ "position": { "x": 155.0, "y": 200.0, "content_width": 60.0, "content_height": 22.0, "subtree_width": 100.0 },
507
+ "style": { "color": "#009E73", "enclosure": "none", "triangle": false },
508
+ "parent": 5,
509
+ "children": []
510
+ }
511
+ ],
512
+ "edges": [
513
+ { "from": 1, "to": 2, "type": "dominance", "connector": "line" },
514
+ { "from": 1, "to": 4, "type": "dominance", "connector": "line" },
515
+ { "from": 2, "to": 3, "type": "dominance", "connector": "line" },
516
+ { "from": 4, "to": 5, "type": "dominance", "connector": "line" },
517
+ { "from": 5, "to": 6, "type": "dominance", "connector": "line" }
518
+ ],
519
+ "paths": []
520
+ }
521
+ ```
@@ -0,0 +1,6 @@
1
+ # These are supported funding model platforms
2
+
3
+ github: [yohasebe]
4
+ ko_fi: yohasebe
5
+ buy_me_a_coffee: yohasebe
6
+ # custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']
data/.gitignore CHANGED
@@ -7,6 +7,7 @@ Gemfile.lock
7
7
  *.bak
8
8
  *.~
9
9
  *.log
10
+ *.gem
10
11
  log/
11
12
  logs/
12
13
 
@@ -18,3 +19,4 @@ tags
18
19
  .rubocop_todo.yml
19
20
  .solargraph.yml
20
21
 
22
+ CLAUDE.md
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ 4.0.1
data/CHANGELOG.md ADDED
@@ -0,0 +1,48 @@
1
+ # Changelog
2
+
3
+ ## [1.5.0] - 2026-04
4
+
5
+ ### Added
6
+ - Left-to-right tree layout (`-d ltr` / `--direction ltr`)
7
+ - LSIF `geometry.direction` field for layout direction
8
+ - LTR-aware path drawing (movement arrows route rightward in ⊃ shape)
9
+ - LTR-aware line-type connections (direct lines between nodes)
10
+ - Examples 058-063: LTR versions of classification trees and vP-shell with paths
11
+
12
+ ### Fixed
13
+ - Node label overlap when adjacent subtrees have long labels
14
+
15
+ ### Improved
16
+ - TTB path bulge proportional to endpoint distance (reduced excess)
17
+
18
+ ## [1.4.0] - 2026-01
19
+
20
+ ### Added
21
+ - LSIF (Linguistic Structure Interchange Format) JSON output (`-f lsif`)
22
+ - Per-node coloring with `@color:` syntax (named colors and hex colors)
23
+ - Penn Treebank format support with escaped parentheses (`\(`, `\)`)
24
+ - Standard input support for piping tree data
25
+ - Configuration file support (`.rsyntaxtreerc`)
26
+ - Config file validation with helpful error messages
27
+
28
+ ### Documentation
29
+ - Added TikZ output limitations section
30
+ - Improved README with Features section
31
+ - Added examples for per-node coloring (054, 055, 056)
32
+ - Added example 057: Subscript and superscript demo
33
+
34
+ ## [1.3.2] - 2024
35
+
36
+ - Garbage collection friendly implementation
37
+
38
+ ## [1.3.1] - 2024
39
+
40
+ - Bug fixes and improvements
41
+
42
+ ## [1.3.0] - 2024
43
+
44
+ - TikZ/forest LaTeX output support
45
+
46
+ ## Previous versions
47
+
48
+ See commit history for earlier changes.
data/Gemfile CHANGED
@@ -6,3 +6,9 @@ gem "optimist"
6
6
  gem "parslet"
7
7
  gem "rmagick"
8
8
  gem "rsvg2"
9
+
10
+ group :development, :test do
11
+ gem "minitest"
12
+ gem "nokogiri"
13
+ gem "rake"
14
+ end
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2007-2026 Yoichiro Hasebe
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.