sorbet-toon 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +45 -0
- data/lib/sorbet/toon/README.md +168 -0
- data/lib/sorbet/toon/codec.rb +55 -0
- data/lib/sorbet/toon/config.rb +50 -0
- data/lib/sorbet/toon/constants.rb +39 -0
- data/lib/sorbet/toon/decode/decoders.rb +226 -0
- data/lib/sorbet/toon/decode/parser.rb +267 -0
- data/lib/sorbet/toon/decode/scanner.rb +118 -0
- data/lib/sorbet/toon/decode/validation.rb +66 -0
- data/lib/sorbet/toon/decoder.rb +42 -0
- data/lib/sorbet/toon/encode/encoders.rb +246 -0
- data/lib/sorbet/toon/encode/normalize.rb +108 -0
- data/lib/sorbet/toon/encode/primitives.rb +86 -0
- data/lib/sorbet/toon/encode/writer.rb +28 -0
- data/lib/sorbet/toon/encoder.rb +43 -0
- data/lib/sorbet/toon/enum_extensions.rb +28 -0
- data/lib/sorbet/toon/errors.rb +10 -0
- data/lib/sorbet/toon/normalizer.rb +93 -0
- data/lib/sorbet/toon/reconstructor.rb +181 -0
- data/lib/sorbet/toon/shared/literal_utils.rb +35 -0
- data/lib/sorbet/toon/shared/string_utils.rb +95 -0
- data/lib/sorbet/toon/shared/validation.rb +41 -0
- data/lib/sorbet/toon/signature_formatter.rb +172 -0
- data/lib/sorbet/toon/struct_extensions.rb +21 -0
- data/lib/sorbet/toon/version.rb +7 -0
- data/lib/sorbet/toon.rb +56 -0
- metadata +84 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: fb0c6792c80943334b08014b87d6ee1b48a9341c69b58880a6b9a9f5f2eaf3ff
|
|
4
|
+
data.tar.gz: 86d67e78304e6afc6f0d5902171dbec0cc046e9cbba737e57335cb34a0da47c7
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: abf1f4ffe87f7e25d417563bcf1a28ef2f788552b5f9204a727af75c3cbaa81a3c43f5b306c6897740fcea2517fe165937a0a0cf45943e5aee0e0282aee6d441
|
|
7
|
+
data.tar.gz: 238e0569ff48361b6adbc273c5ca340d09099cd16be3947da828c57ccfb13fd4e53bf6b27e4910acd40d4fcc3e9b7426a259867cd691724c4e96f7c2ede18b55
|
data/LICENSE
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Vicente Services SL
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
22
|
+
|
|
23
|
+
This project is a Ruby port of the original Python [DSPy library](https://github.com/stanfordnlp/dspy), which is licensed under the MIT License:
|
|
24
|
+
|
|
25
|
+
MIT License
|
|
26
|
+
|
|
27
|
+
Copyright (c) 2023 Stanford Future Data Systems
|
|
28
|
+
|
|
29
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
30
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
31
|
+
in the Software without restriction, including without limitation the rights
|
|
32
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
33
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
34
|
+
furnished to do so, subject to the following conditions:
|
|
35
|
+
|
|
36
|
+
The above copyright notice and this permission notice shall be included in all
|
|
37
|
+
copies or substantial portions of the Software.
|
|
38
|
+
|
|
39
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
40
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
41
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
42
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
43
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
44
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
45
|
+
SOFTWARE.
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
# Sorbet::Toon (alpha)
|
|
2
|
+
|
|
3
|
+
Sorbet::Toon brings the official [TOON data format](https://github.com/toon-format/toon) to Ruby. It lets DSPy.rb—and any Sorbet-powered code—render compact prompt contracts and parse LLM responses using the same `DSPy::Signature` metadata that already powers JSON/BAML. Think of it as “typed Markdown tables for LLMs”: humans stay oriented, tokens stay low, and structs survive the round-trip.
|
|
4
|
+
|
|
5
|
+
> **Status:** Codec + normalization + reconstruction + TOON mixins are merged. DSPy adapter + docs/tools are landing now; expect sharp edges until v0.1.0.
|
|
6
|
+
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
## Install & Load
|
|
10
|
+
|
|
11
|
+
Until the gem ships on RubyGems, point Bundler at the repo:
|
|
12
|
+
|
|
13
|
+
```ruby
|
|
14
|
+
# Gemfile
|
|
15
|
+
gem 'sorbet-toon', path: '.' # replace with "~> 0.1" once published
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
```ruby
|
|
19
|
+
require 'sorbet-runtime'
|
|
20
|
+
require 'sorbet/toon'
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
Optional convenience:
|
|
24
|
+
|
|
25
|
+
```ruby
|
|
26
|
+
# Extensions are enabled automatically when sorbet/toon is required.
|
|
27
|
+
# Structs already have #to_toon / .from_toon and enums gain helpers.
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
---
|
|
31
|
+
|
|
32
|
+
## Encode + Decode Walkthrough
|
|
33
|
+
|
|
34
|
+
```ruby
|
|
35
|
+
class Source < T::Struct
|
|
36
|
+
prop :name, String
|
|
37
|
+
prop :url, String
|
|
38
|
+
prop :notes, T.nilable(String)
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
class ResearchSignature < DSPy::Signature
|
|
42
|
+
input { const :query, String }
|
|
43
|
+
output do
|
|
44
|
+
const :summary, String
|
|
45
|
+
const :sources, T::Array[Source]
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
payload = ResearchSignature.output_struct_class.new(
|
|
50
|
+
summary: 'Recent AI papers',
|
|
51
|
+
sources: [
|
|
52
|
+
Source.new(name: 'Anthropic', url: 'https://anthropic.com', notes: nil),
|
|
53
|
+
Source.new(name: 'OpenAI', url: 'https://openai.com', notes: 'top pick')
|
|
54
|
+
]
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
toon = Sorbet::Toon.encode(payload, signature: ResearchSignature, role: :output)
|
|
58
|
+
# summary: Recent AI papers
|
|
59
|
+
# sources[2]{name,url,notes}:
|
|
60
|
+
# Anthropic,https://anthropic.com,
|
|
61
|
+
# OpenAI,https://openai.com,"top pick"
|
|
62
|
+
|
|
63
|
+
rehydrated = Sorbet::Toon.decode(toon, signature: ResearchSignature, role: :output)
|
|
64
|
+
rehydrated.summary # => "Recent AI papers"
|
|
65
|
+
rehydrated.sources.last.notes # => "top pick"
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
### Mixins (optional)
|
|
69
|
+
|
|
70
|
+
```ruby
|
|
71
|
+
toon_blob = payload.to_toon(include_type_metadata: true)
|
|
72
|
+
# => struct instances gain #to_toon automatically
|
|
73
|
+
|
|
74
|
+
decoded = Source.from_toon(%(- name: Ada\n url: https://example.com))
|
|
75
|
+
# => class methods .from_toon / enums too
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
### Config knobs
|
|
79
|
+
|
|
80
|
+
```ruby
|
|
81
|
+
Sorbet::Toon.configure do |config|
|
|
82
|
+
config.indent = 4
|
|
83
|
+
config.delimiter = Sorbet::Toon::Constants::PIPE
|
|
84
|
+
config.include_type_metadata = true
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
Sorbet::Toon.encode(value, delimiter: Sorbet::Toon::Constants::TAB) # per-call override
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
---
|
|
91
|
+
|
|
92
|
+
## Signature Formatter & DSPy Adapter
|
|
93
|
+
|
|
94
|
+
`Sorbet::Toon::SignatureFormatter` inspects `DSPy::Signature` definitions to produce human-friendly TOON guidance (field order, optional notes, tabular hints). `DSPy::Schema::SorbetToonAdapter` wires that into prompts:
|
|
95
|
+
|
|
96
|
+
```ruby
|
|
97
|
+
DSPy::Prompt.from_signature(
|
|
98
|
+
ResearchSignature,
|
|
99
|
+
schema_format: :toon, # show TOON-friendly schema guidance
|
|
100
|
+
data_format: :toon # render inputs/outputs as TOON
|
|
101
|
+
)
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
System prompts now read:
|
|
105
|
+
|
|
106
|
+
```
|
|
107
|
+
Your input schema fields (TOON order) are:
|
|
108
|
+
- query (String) — Research question
|
|
109
|
+
|
|
110
|
+
Your output schema fields (TOON order) are:
|
|
111
|
+
- summary (String) — Key findings
|
|
112
|
+
- sources (Array<Source>)
|
|
113
|
+
• Tabular columns: name, url, notes
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
User prompts embed real TOON values:
|
|
117
|
+
|
|
118
|
+
```
|
|
119
|
+
## Input Values
|
|
120
|
+
```toon
|
|
121
|
+
query: recent diffusion models
|
|
122
|
+
```
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
LLMs reply with:
|
|
126
|
+
|
|
127
|
+
```
|
|
128
|
+
## Output values
|
|
129
|
+
```toon
|
|
130
|
+
summary: Diffusion models are accelerating image synthesis...
|
|
131
|
+
sources[2]{name,url,notes}:
|
|
132
|
+
Anthropic,https://anthropic.com,
|
|
133
|
+
OpenAI,https://openai.com,"top pick"
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
Decoding uses the same adapter:
|
|
137
|
+
|
|
138
|
+
```ruby
|
|
139
|
+
DSPy::Schema::SorbetToonAdapter.parse_output(ResearchSignature, toon_blob)
|
|
140
|
+
# => Hash with string keys (Predict converts to structs automatically)
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
`DSPy::LM.new(..., data_format: :toon)` enables automatic TOON parsing inside `DSPy::LM#chat`, so `Predict` keeps receiving hashes/structs without custom code.
|
|
144
|
+
|
|
145
|
+
---
|
|
146
|
+
|
|
147
|
+
## Troubleshooting
|
|
148
|
+
|
|
149
|
+
| Symptom | Fix |
|
|
150
|
+
| --- | --- |
|
|
151
|
+
| `LoadError: cannot load such file -- sorbet/toon` | Ensure the gem is installed (via repo path or RubyGems) *before* loading DSPy. |
|
|
152
|
+
| LLM responds with narrative text instead of TOON | Double-check the system prompt includes the TOON instructions. `data_format: :toon` forces `Prompt#render_user_prompt` to embed a ```toon``` block and explicitly asks for a TOON reply. |
|
|
153
|
+
| `Sorbet::Toon::DecodeError` complaining about columns | Arrays of structs become tabular if every row shares the same keys. Make sure your struct props/names align and optional fields are either present in every row or omitted entirely. |
|
|
154
|
+
| Need to include `_type` discriminator | `Sorbet::Toon.configure { |c| c.include_type_metadata = true }` or per-call option `Sorbet::Toon.encode(value, include_type_metadata: true)`. Useful for union/variant decoding. |
|
|
155
|
+
| Optional fields showing as `null` | Normalizer already elides fully optional Sorbet props when nil. If you see `null`, the prop likely isn’t declared `T.nilable` or lacks a default. |
|
|
156
|
+
|
|
157
|
+
---
|
|
158
|
+
|
|
159
|
+
## Roadmap
|
|
160
|
+
|
|
161
|
+
- [x] Codec + fixtures ported from the TypeScript reference.
|
|
162
|
+
- [x] Sorbet-aware normalizer, decoder reconstruction, struct/enum mixins.
|
|
163
|
+
- [x] Gem scaffold (`sorbet-toon.gemspec`, version file, README).
|
|
164
|
+
- [ ] Signature formatter + DSPy adapter (data_format: :toon) **← in progress**
|
|
165
|
+
- [ ] Full llms-full README (copy-pastable recipes + troubleshooting deep dive).
|
|
166
|
+
- [ ] Integration specs mirroring the BAML suite.
|
|
167
|
+
|
|
168
|
+
Feedback / bugs → issues or `hey@vicente.services`. Thank you!
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'errors'
|
|
4
|
+
require_relative 'constants'
|
|
5
|
+
require_relative 'encode/normalize'
|
|
6
|
+
require_relative 'encode/encoders'
|
|
7
|
+
require_relative 'decode/scanner'
|
|
8
|
+
require_relative 'decode/decoders'
|
|
9
|
+
|
|
10
|
+
module Sorbet
|
|
11
|
+
module Toon
|
|
12
|
+
module Codec
|
|
13
|
+
class << self
|
|
14
|
+
DEFAULT_INDENT = 2
|
|
15
|
+
DEFAULT_DELIMITER = Constants::DEFAULT_DELIMITER
|
|
16
|
+
DEFAULT_LENGTH_MARKER = false
|
|
17
|
+
|
|
18
|
+
def encode(input, indent: DEFAULT_INDENT, delimiter: DEFAULT_DELIMITER, length_marker: DEFAULT_LENGTH_MARKER)
|
|
19
|
+
normalized = Encode::Normalize.normalize(input)
|
|
20
|
+
Encode::Encoders.encode_value(
|
|
21
|
+
normalized,
|
|
22
|
+
indent: indent,
|
|
23
|
+
delimiter: delimiter,
|
|
24
|
+
length_marker: length_marker
|
|
25
|
+
)
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def decode(input, **opts)
|
|
29
|
+
raise ArgumentError, 'Input must be a string' unless input.is_a?(String)
|
|
30
|
+
|
|
31
|
+
indent = extract_option(opts, :indent, DEFAULT_INDENT)
|
|
32
|
+
strict = extract_option(opts, :strict, true)
|
|
33
|
+
|
|
34
|
+
scan_result = Decode::Scanner.to_parsed_lines(input, indent, strict)
|
|
35
|
+
return {} if scan_result[:lines].empty?
|
|
36
|
+
|
|
37
|
+
cursor = Decode::LineCursor.new(scan_result[:lines], scan_result[:blank_lines])
|
|
38
|
+
Decode::Decoders.decode_value_from_lines(cursor, strict: strict)
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
private
|
|
42
|
+
|
|
43
|
+
def extract_option(options, key, default)
|
|
44
|
+
if options.key?(key)
|
|
45
|
+
options[key]
|
|
46
|
+
elsif options.key?(key.to_s)
|
|
47
|
+
options[key.to_s]
|
|
48
|
+
else
|
|
49
|
+
default
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
end
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'constants'
|
|
4
|
+
|
|
5
|
+
module Sorbet
|
|
6
|
+
module Toon
|
|
7
|
+
class Config
|
|
8
|
+
attr_accessor :include_type_metadata, :indent, :delimiter, :length_marker, :strict
|
|
9
|
+
|
|
10
|
+
def initialize
|
|
11
|
+
reset!
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def reset!
|
|
15
|
+
@include_type_metadata = false
|
|
16
|
+
@indent = 2
|
|
17
|
+
@delimiter = Constants::DEFAULT_DELIMITER
|
|
18
|
+
@length_marker = false
|
|
19
|
+
@strict = true
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def copy
|
|
23
|
+
copy = self.class.new
|
|
24
|
+
copy.include_type_metadata = include_type_metadata
|
|
25
|
+
copy.indent = indent
|
|
26
|
+
copy.delimiter = delimiter
|
|
27
|
+
copy.length_marker = length_marker
|
|
28
|
+
copy.strict = strict
|
|
29
|
+
copy
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def resolve(overrides = {})
|
|
33
|
+
copy.apply(overrides)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def apply(overrides = {})
|
|
37
|
+
overrides.each do |key, value|
|
|
38
|
+
next if value.nil?
|
|
39
|
+
|
|
40
|
+
setter = "#{key}="
|
|
41
|
+
raise ArgumentError, "Unknown config option: #{key}" unless respond_to?(setter)
|
|
42
|
+
|
|
43
|
+
public_send(setter, value)
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
self
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Sorbet
|
|
4
|
+
module Toon
|
|
5
|
+
module Constants
|
|
6
|
+
LIST_ITEM_MARKER = '-'
|
|
7
|
+
LIST_ITEM_PREFIX = '- '
|
|
8
|
+
|
|
9
|
+
COMMA = ','
|
|
10
|
+
COLON = ':'
|
|
11
|
+
SPACE = ' '
|
|
12
|
+
PIPE = '|'
|
|
13
|
+
HASH = '#'
|
|
14
|
+
TAB = "\t"
|
|
15
|
+
|
|
16
|
+
OPEN_BRACKET = '['
|
|
17
|
+
CLOSE_BRACKET = ']'
|
|
18
|
+
OPEN_BRACE = '{'
|
|
19
|
+
CLOSE_BRACE = '}'
|
|
20
|
+
|
|
21
|
+
NULL_LITERAL = 'null'
|
|
22
|
+
TRUE_LITERAL = 'true'
|
|
23
|
+
FALSE_LITERAL = 'false'
|
|
24
|
+
|
|
25
|
+
BACKSLASH = '\\'
|
|
26
|
+
DOUBLE_QUOTE = '"'
|
|
27
|
+
NEWLINE = "\n"
|
|
28
|
+
CARRIAGE_RETURN = "\r"
|
|
29
|
+
|
|
30
|
+
DELIMITERS = {
|
|
31
|
+
comma: COMMA,
|
|
32
|
+
tab: TAB,
|
|
33
|
+
pipe: PIPE
|
|
34
|
+
}.freeze
|
|
35
|
+
|
|
36
|
+
DEFAULT_DELIMITER = DELIMITERS[:comma]
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative '../constants'
|
|
4
|
+
require_relative '../errors'
|
|
5
|
+
require_relative '../shared/string_utils'
|
|
6
|
+
require_relative '../shared/literal_utils'
|
|
7
|
+
require_relative 'scanner'
|
|
8
|
+
require_relative 'parser'
|
|
9
|
+
require_relative 'validation'
|
|
10
|
+
|
|
11
|
+
module Sorbet
|
|
12
|
+
module Toon
|
|
13
|
+
module Decode
|
|
14
|
+
module Decoders
|
|
15
|
+
module_function
|
|
16
|
+
|
|
17
|
+
def decode_value_from_lines(cursor, **options)
|
|
18
|
+
first = cursor.peek
|
|
19
|
+
raise Sorbet::Toon::DecodeError, 'No content to decode' unless first
|
|
20
|
+
|
|
21
|
+
if Parser.array_header_after_hyphen?(first.content)
|
|
22
|
+
header_info = Parser.parse_array_header_line(first.content, Constants::DEFAULT_DELIMITER)
|
|
23
|
+
if header_info
|
|
24
|
+
cursor.advance
|
|
25
|
+
return decode_array_from_header(header_info[:header], header_info[:inline_values], cursor, 0, options)
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
if cursor.length == 1 && !key_value_line?(first)
|
|
30
|
+
return Parser.parse_primitive_token(first.content.strip)
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
decode_object(cursor, 0, options)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def key_value_line?(line)
|
|
37
|
+
content = line.content
|
|
38
|
+
|
|
39
|
+
if content.start_with?(Constants::DOUBLE_QUOTE)
|
|
40
|
+
closing = Shared::StringUtils.find_closing_quote(content, 0)
|
|
41
|
+
return false if closing == -1
|
|
42
|
+
|
|
43
|
+
return content[closing + 1..]&.include?(Constants::COLON)
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
content.include?(Constants::COLON)
|
|
47
|
+
end
|
|
48
|
+
private_class_method :key_value_line?
|
|
49
|
+
|
|
50
|
+
def decode_object(cursor, base_depth, options)
|
|
51
|
+
result = {}
|
|
52
|
+
computed_depth = nil
|
|
53
|
+
|
|
54
|
+
until cursor.at_end?
|
|
55
|
+
line = cursor.peek
|
|
56
|
+
break unless line
|
|
57
|
+
break if line.depth < base_depth
|
|
58
|
+
|
|
59
|
+
computed_depth ||= line.depth
|
|
60
|
+
break unless line.depth == computed_depth
|
|
61
|
+
|
|
62
|
+
key, value = decode_key_value_pair(line, cursor, computed_depth, options)
|
|
63
|
+
result[key] = value
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
result
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def decode_key_value_pair(line, cursor, base_depth, options)
|
|
70
|
+
cursor.advance
|
|
71
|
+
key_value = decode_key_value(line.content, cursor, base_depth, options)
|
|
72
|
+
[key_value[:key], key_value[:value]]
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def decode_key_value(content, cursor, base_depth, options)
|
|
76
|
+
array_header = Parser.parse_array_header_line(content, Constants::DEFAULT_DELIMITER)
|
|
77
|
+
if array_header && array_header[:header][:key]
|
|
78
|
+
value = decode_array_from_header(array_header[:header], array_header[:inline_values], cursor, base_depth, options)
|
|
79
|
+
return { key: array_header[:header][:key], value: value, follow_depth: base_depth + 1 }
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
key_info = Parser.parse_key_token(content, 0)
|
|
83
|
+
key = key_info[:key]
|
|
84
|
+
rest = content[key_info[:end]..]&.strip
|
|
85
|
+
|
|
86
|
+
if rest.nil? || rest.empty?
|
|
87
|
+
next_line = cursor.peek
|
|
88
|
+
if next_line && next_line.depth > base_depth
|
|
89
|
+
nested = decode_object(cursor, base_depth + 1, options)
|
|
90
|
+
return { key: key, value: nested, follow_depth: base_depth + 1 }
|
|
91
|
+
end
|
|
92
|
+
return { key: key, value: {}, follow_depth: base_depth + 1 }
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
value = Parser.parse_primitive_token(rest)
|
|
96
|
+
{ key: key, value: value, follow_depth: base_depth + 1 }
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def decode_array_from_header(header, inline_values, cursor, base_depth, options)
|
|
100
|
+
if inline_values
|
|
101
|
+
values = Parser.parse_delimited_values(inline_values, header[:delimiter])
|
|
102
|
+
primitives = Parser.map_row_values_to_primitives(values)
|
|
103
|
+
Validation.assert_expected_count(primitives.length, header[:length], 'inline array items', strict: options[:strict])
|
|
104
|
+
return primitives
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
if header[:fields] && !header[:fields].empty?
|
|
108
|
+
return decode_tabular_array(header, cursor, base_depth, options)
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
decode_list_array(header, cursor, base_depth, options)
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
def decode_list_array(header, cursor, base_depth, options)
|
|
115
|
+
items = []
|
|
116
|
+
item_depth = base_depth + 1
|
|
117
|
+
start_line = cursor.current&.line_number || 0
|
|
118
|
+
end_line = start_line
|
|
119
|
+
|
|
120
|
+
while !cursor.at_end? && items.length < header[:length]
|
|
121
|
+
line = cursor.peek
|
|
122
|
+
break unless line
|
|
123
|
+
break if line.depth < item_depth
|
|
124
|
+
|
|
125
|
+
if line.content == Constants::LIST_ITEM_MARKER || line.content.start_with?(Constants::LIST_ITEM_PREFIX)
|
|
126
|
+
item = decode_list_item(cursor, item_depth, options)
|
|
127
|
+
items << item
|
|
128
|
+
end_line = cursor.current&.line_number || end_line
|
|
129
|
+
else
|
|
130
|
+
break
|
|
131
|
+
end
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
Validation.assert_expected_count(items.length, header[:length], 'list array items', strict: options[:strict])
|
|
135
|
+
Validation.validate_no_blank_lines_in_range(start_line, end_line, cursor.blank_lines, strict: options[:strict], context: 'list array')
|
|
136
|
+
Validation.validate_no_extra_list_items(cursor, item_depth, header[:length])
|
|
137
|
+
items
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
def decode_tabular_array(header, cursor, base_depth, options)
|
|
141
|
+
rows = []
|
|
142
|
+
row_depth = base_depth + 1
|
|
143
|
+
start_line = cursor.current&.line_number || 0
|
|
144
|
+
end_line = start_line
|
|
145
|
+
|
|
146
|
+
while cursor.has_more_at_depth?(row_depth) && rows.length < header[:length]
|
|
147
|
+
line = cursor.peek
|
|
148
|
+
break unless line
|
|
149
|
+
break if line.depth != row_depth
|
|
150
|
+
break if line.content.start_with?(Constants::LIST_ITEM_PREFIX)
|
|
151
|
+
|
|
152
|
+
values = Parser.parse_delimited_values(line.content, header[:delimiter])
|
|
153
|
+
Validation.assert_expected_count(
|
|
154
|
+
values.length,
|
|
155
|
+
header[:fields].length,
|
|
156
|
+
'tabular row values',
|
|
157
|
+
strict: options[:strict]
|
|
158
|
+
)
|
|
159
|
+
primitives = Parser.map_row_values_to_primitives(values)
|
|
160
|
+
rows << primitives
|
|
161
|
+
cursor.advance
|
|
162
|
+
end_line = line.line_number
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
Validation.assert_expected_count(rows.length, header[:length], 'tabular rows', strict: options[:strict])
|
|
166
|
+
Validation.validate_no_blank_lines_in_range(start_line, end_line, cursor.blank_lines, strict: options[:strict], context: 'tabular array')
|
|
167
|
+
Validation.validate_no_extra_tabular_rows(cursor, row_depth, header)
|
|
168
|
+
|
|
169
|
+
rows.map do |primitives|
|
|
170
|
+
Hash[header[:fields].zip(primitives)]
|
|
171
|
+
end
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
def decode_list_item(cursor, base_depth, options)
|
|
175
|
+
line = cursor.next
|
|
176
|
+
raise Sorbet::Toon::DecodeError, 'Expected list item' unless line
|
|
177
|
+
|
|
178
|
+
return {} if line.content == Constants::LIST_ITEM_MARKER
|
|
179
|
+
|
|
180
|
+
unless line.content.start_with?(Constants::LIST_ITEM_PREFIX)
|
|
181
|
+
raise Sorbet::Toon::DecodeError, "Expected list item to start with \"#{Constants::LIST_ITEM_PREFIX}\""
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
after_hyphen = line.content[Constants::LIST_ITEM_PREFIX.length..] || ''
|
|
185
|
+
return {} if after_hyphen.strip.empty?
|
|
186
|
+
|
|
187
|
+
if Parser.array_header_after_hyphen?(after_hyphen)
|
|
188
|
+
header_info = Parser.parse_array_header_line(after_hyphen, Constants::DEFAULT_DELIMITER)
|
|
189
|
+
if header_info
|
|
190
|
+
return decode_array_from_header(header_info[:header], header_info[:inline_values], cursor, base_depth, options)
|
|
191
|
+
end
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
if Parser.object_first_field_after_hyphen?(after_hyphen)
|
|
195
|
+
return decode_object_from_list_item(line, cursor, base_depth, options)
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
Parser.parse_primitive_token(after_hyphen)
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
def decode_object_from_list_item(first_line, cursor, base_depth, options)
|
|
202
|
+
after_hyphen = first_line.content[Constants::LIST_ITEM_PREFIX.length..] || ''
|
|
203
|
+
key_value = decode_key_value(after_hyphen, cursor, base_depth, options)
|
|
204
|
+
|
|
205
|
+
obj = { key_value[:key] => key_value[:value] }
|
|
206
|
+
follow_depth = key_value[:follow_depth]
|
|
207
|
+
|
|
208
|
+
until cursor.at_end?
|
|
209
|
+
line = cursor.peek
|
|
210
|
+
break unless line
|
|
211
|
+
break if line.depth < follow_depth
|
|
212
|
+
|
|
213
|
+
if line.depth == follow_depth && !line.content.start_with?(Constants::LIST_ITEM_PREFIX)
|
|
214
|
+
k, v = decode_key_value_pair(line, cursor, follow_depth, options)
|
|
215
|
+
obj[k] = v
|
|
216
|
+
else
|
|
217
|
+
break
|
|
218
|
+
end
|
|
219
|
+
end
|
|
220
|
+
|
|
221
|
+
obj
|
|
222
|
+
end
|
|
223
|
+
end
|
|
224
|
+
end
|
|
225
|
+
end
|
|
226
|
+
end
|