lexer_kit 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +21 -0
  3. data/README.md +157 -0
  4. data/exe/lexer_kit +7 -0
  5. data/ext/lexer_kit_rust/Cargo.toml +17 -0
  6. data/ext/lexer_kit_rust/extconf.rb +6 -0
  7. data/ext/lexer_kit_rust/src/deserializer.rs +213 -0
  8. data/ext/lexer_kit_rust/src/dfa.rs +217 -0
  9. data/ext/lexer_kit_rust/src/fast_stream.rs +468 -0
  10. data/ext/lexer_kit_rust/src/lib.rs +248 -0
  11. data/ext/lexer_kit_rust/src/opcodes.rs +718 -0
  12. data/ext/lexer_kit_rust/src/safety_test.rs +498 -0
  13. data/ext/lexer_kit_rust/src/trie.rs +206 -0
  14. data/ext/lexer_kit_rust/src/types.rs +319 -0
  15. data/ext/lexer_kit_rust/src/vm.rs +258 -0
  16. data/lib/lexer_kit/builder/compiler.rb +596 -0
  17. data/lib/lexer_kit/builder/conflict_detector.rb +209 -0
  18. data/lib/lexer_kit/builder/mode_def.rb +36 -0
  19. data/lib/lexer_kit/builder/token_def.rb +65 -0
  20. data/lib/lexer_kit/builder/validator.rb +84 -0
  21. data/lib/lexer_kit/builder.rb +230 -0
  22. data/lib/lexer_kit/cli/commands.rb +389 -0
  23. data/lib/lexer_kit/cli.rb +88 -0
  24. data/lib/lexer_kit/core/diagnostic.rb +103 -0
  25. data/lib/lexer_kit/core/source.rb +154 -0
  26. data/lib/lexer_kit/core/span.rb +80 -0
  27. data/lib/lexer_kit/core/token.rb +120 -0
  28. data/lib/lexer_kit/core.rb +13 -0
  29. data/lib/lexer_kit/debug/disassembler.rb +143 -0
  30. data/lib/lexer_kit/debug/visualizer.rb +203 -0
  31. data/lib/lexer_kit/debug.rb +11 -0
  32. data/lib/lexer_kit/dfa/byte_class_builder.rb +69 -0
  33. data/lib/lexer_kit/dfa/case_folding.rb +45 -0
  34. data/lib/lexer_kit/dfa/char_class_collector.rb +81 -0
  35. data/lib/lexer_kit/dfa/dfa_builder.rb +95 -0
  36. data/lib/lexer_kit/dfa/dfa_minimizer.rb +158 -0
  37. data/lib/lexer_kit/dfa/nfa.rb +304 -0
  38. data/lib/lexer_kit/dfa/regex_ast.rb +64 -0
  39. data/lib/lexer_kit/dfa/regex_parser.rb +385 -0
  40. data/lib/lexer_kit/dfa/utf8_range.rb +175 -0
  41. data/lib/lexer_kit/dfa/utf8_range_pattern.rb +17 -0
  42. data/lib/lexer_kit/dfa.rb +37 -0
  43. data/lib/lexer_kit/errors.rb +76 -0
  44. data/lib/lexer_kit/format/lkb1/decoder.rb +126 -0
  45. data/lib/lexer_kit/format/lkb1.rb +199 -0
  46. data/lib/lexer_kit/format/lkt1.rb +111 -0
  47. data/lib/lexer_kit/format.rb +19 -0
  48. data/lib/lexer_kit/ir/compiled_program.rb +228 -0
  49. data/lib/lexer_kit/ir/constant_pool.rb +107 -0
  50. data/lib/lexer_kit/ir/dfa_table.rb +125 -0
  51. data/lib/lexer_kit/ir/instruction.rb +50 -0
  52. data/lib/lexer_kit/ir/jump_table.rb +94 -0
  53. data/lib/lexer_kit/ir/keyword_table.rb +168 -0
  54. data/lib/lexer_kit/ir/opcode.rb +96 -0
  55. data/lib/lexer_kit/ir/serializer.rb +249 -0
  56. data/lib/lexer_kit/ir.rb +16 -0
  57. data/lib/lexer_kit/runner.rb +114 -0
  58. data/lib/lexer_kit/trie.rb +170 -0
  59. data/lib/lexer_kit/version.rb +5 -0
  60. data/lib/lexer_kit.rb +155 -0
  61. metadata +119 -0
@@ -0,0 +1,319 @@
1
+ //! Core data structures for the LexerKit Rust VM
2
+ //!
3
+ //! These structures mirror the C implementation for binary compatibility.
4
+
5
+ /// DFA special values
6
+ pub const DFA_DEAD_STATE: u16 = 0;
7
+ pub const DFA_NO_ACCEPT: u16 = 0xFFFF;
8
+
9
+ /// Maximum mode stack depth
10
+ pub const MAX_MODE_STACK: usize = 64;
11
+
12
+ /// Maximum steps per token (prevents infinite loops)
13
+ pub const MAX_STEPS_PER_TOKEN: u32 = 10_000;
14
+
15
+ /// Maximum consecutive zero-length tokens before halting
16
+ pub const MAX_ZERO_PROGRESS_TOKENS: u16 = 100;
17
+
18
+ /// VM return codes
19
+ #[derive(Debug, Clone, Copy, PartialEq, Eq)]
20
+ pub enum VmResult {
21
+ Continue,
22
+ Emit,
23
+ Halt,
24
+ }
25
+
26
+ /// Emit result structure (filled by emit handlers)
27
+ #[derive(Debug, Clone, Copy, Default)]
28
+ pub struct EmitResult {
29
+ pub token_id: u16,
30
+ pub start: usize,
31
+ pub length: usize,
32
+ }
33
+
34
+ /// Instruction: 4 bytes packed
35
+ #[derive(Debug, Clone, Copy)]
36
+ pub struct Instruction {
37
+ pub opcode: u8,
38
+ pub arg_hi: u8,
39
+ pub arg_mid: u8,
40
+ pub arg_lo: u8,
41
+ }
42
+
43
+ impl Instruction {
44
+ /// Extract the 24-bit argument from the instruction
45
+ #[inline]
46
+ pub fn arg(&self) -> u32 {
47
+ ((self.arg_hi as u32) << 16) | ((self.arg_mid as u32) << 8) | (self.arg_lo as u32)
48
+ }
49
+
50
+ /// Create instruction from bytes
51
+ pub fn from_bytes(bytes: &[u8]) -> Self {
52
+ Self {
53
+ opcode: bytes[0],
54
+ arg_hi: bytes[1],
55
+ arg_mid: bytes[2],
56
+ arg_lo: bytes[3],
57
+ }
58
+ }
59
+ }
60
+
61
+ /// DFA table
62
+ #[derive(Debug, Clone)]
63
+ pub struct DfaTable {
64
+ pub state_count: u16,
65
+ pub class_count: u16,
66
+ pub byte_class: Vec<u8>, // 256 bytes
67
+ pub transitions: Vec<u16>, // state_count * class_count
68
+ pub accept_tokens: Vec<u16>, // state_count entries, 0xFFFF = not accepting
69
+ }
70
+
71
+ impl DfaTable {
72
+ /// Create a new empty DFA table
73
+ pub fn new() -> Self {
74
+ Self {
75
+ state_count: 0,
76
+ class_count: 0,
77
+ byte_class: vec![0; 256],
78
+ transitions: Vec::new(),
79
+ accept_tokens: Vec::new(),
80
+ }
81
+ }
82
+ }
83
+
84
+ impl Default for DfaTable {
85
+ fn default() -> Self {
86
+ Self::new()
87
+ }
88
+ }
89
+
90
+ /// Jump table - dense 256-entry lookup for O(1) access
91
+ #[derive(Debug, Clone)]
92
+ pub struct JumpTable {
93
+ pub lookup: [u32; 256], // Direct byte -> offset mapping
94
+ pub default_offset: u32, // Offset for EOF case
95
+ }
96
+
97
+ impl JumpTable {
98
+ pub fn new() -> Self {
99
+ Self {
100
+ lookup: [0; 256],
101
+ default_offset: 0,
102
+ }
103
+ }
104
+ }
105
+
106
+ impl Default for JumpTable {
107
+ fn default() -> Self {
108
+ Self::new()
109
+ }
110
+ }
111
+
112
+ /// Constant pool entry
113
+ #[derive(Debug, Clone)]
114
+ pub struct ConstantEntry {
115
+ pub data: Vec<u8>,
116
+ }
117
+
118
+ /// Constant pool
119
+ #[derive(Debug, Clone, Default)]
120
+ pub struct ConstantPool {
121
+ pub entries: Vec<ConstantEntry>,
122
+ }
123
+
124
+ impl ConstantPool {
125
+ pub fn new() -> Self {
126
+ Self {
127
+ entries: Vec::new(),
128
+ }
129
+ }
130
+
131
+ /// Get constant data by index
132
+ #[inline]
133
+ pub fn get(&self, index: usize) -> Option<&[u8]> {
134
+ self.entries.get(index).map(|e| e.data.as_slice())
135
+ }
136
+ }
137
+
138
+ /// Keyword table entry
139
+ #[derive(Debug, Clone)]
140
+ pub struct KeywordEntry {
141
+ pub keyword: Vec<u8>,
142
+ pub token_id: u16,
143
+ }
144
+
145
+ /// Keyword table
146
+ #[derive(Debug, Clone)]
147
+ pub struct KeywordTable {
148
+ pub base_token_id: u16, // Token ID when no keyword matches
149
+ pub entries: Vec<KeywordEntry>,
150
+ }
151
+
152
+ impl KeywordTable {
153
+ pub fn new(base_token_id: u16) -> Self {
154
+ Self {
155
+ base_token_id,
156
+ entries: Vec::new(),
157
+ }
158
+ }
159
+ }
160
+
161
+ /// Mode entry - only offset needed at runtime
162
+ #[derive(Debug, Clone, Copy)]
163
+ pub struct Mode {
164
+ pub start_offset: u32,
165
+ }
166
+
167
+ /// Compiled program
168
+ #[derive(Debug, Clone)]
169
+ pub struct CompiledProgram {
170
+ pub instructions: Vec<Instruction>,
171
+ pub dfa_tables: Vec<DfaTable>,
172
+ pub jump_tables: Vec<JumpTable>,
173
+ pub keyword_tables: Vec<KeywordTable>,
174
+ pub constant_pool: ConstantPool,
175
+ pub modes: Vec<Mode>,
176
+ pub default_mode_offset: u32,
177
+ }
178
+
179
+ impl CompiledProgram {
180
+ pub fn new() -> Self {
181
+ Self {
182
+ instructions: Vec::new(),
183
+ dfa_tables: Vec::new(),
184
+ jump_tables: Vec::new(),
185
+ keyword_tables: Vec::new(),
186
+ constant_pool: ConstantPool::new(),
187
+ modes: Vec::new(),
188
+ default_mode_offset: 0,
189
+ }
190
+ }
191
+
192
+ /// Find mode offset by index
193
+ #[inline]
194
+ pub fn find_mode_offset(&self, mode_idx: u16) -> u32 {
195
+ let offset = if (mode_idx as usize) < self.modes.len() {
196
+ self.modes[mode_idx as usize].start_offset
197
+ } else {
198
+ self.default_mode_offset
199
+ };
200
+ // Ensure offset is within bounds
201
+ if (offset as usize) < self.instructions.len() {
202
+ offset
203
+ } else {
204
+ 0
205
+ }
206
+ }
207
+ }
208
+
209
+ impl Default for CompiledProgram {
210
+ fn default() -> Self {
211
+ Self::new()
212
+ }
213
+ }
214
+
215
+ /// VM execution state
216
+ #[derive(Debug)]
217
+ pub struct VmState<'a> {
218
+ pub prog: &'a CompiledProgram,
219
+ pub bytes: &'a [u8],
220
+ pub pos: usize,
221
+ pub mark: usize,
222
+ pub last_match_len: usize,
223
+ pub last_match_order: u16,
224
+ pub last_match_ip: u32,
225
+ pub best_match_len: usize,
226
+ pub best_match_order: u16,
227
+ pub best_match_ip: u32,
228
+ pub ip: u32, // instruction pointer
229
+ pub mode: u16,
230
+ pub mode_stack: [u16; MAX_MODE_STACK],
231
+ pub mode_sp: u8,
232
+ pub failed: bool,
233
+ pub pending_token_id: u16,
234
+ pub has_pending_token: bool,
235
+ // Zero-progress detection for infinite loop prevention
236
+ pub last_emit_pos: usize,
237
+ pub zero_progress_count: u16,
238
+ // Error message (set on runtime errors like stack overflow)
239
+ pub error: Option<&'static str>,
240
+ }
241
+
242
+ impl<'a> VmState<'a> {
243
+ /// Initialize VM state
244
+ pub fn new(prog: &'a CompiledProgram, bytes: &'a [u8]) -> Self {
245
+ let ip = if (prog.default_mode_offset as usize) < prog.instructions.len() {
246
+ prog.default_mode_offset
247
+ } else {
248
+ 0
249
+ };
250
+
251
+ Self {
252
+ prog,
253
+ bytes,
254
+ pos: 0,
255
+ mark: 0,
256
+ last_match_len: 0,
257
+ last_match_order: 0xFFFF,
258
+ last_match_ip: 0,
259
+ best_match_len: 0,
260
+ best_match_order: 0xFFFF,
261
+ best_match_ip: 0,
262
+ ip,
263
+ mode: 0,
264
+ mode_stack: [0; MAX_MODE_STACK],
265
+ mode_sp: 0,
266
+ failed: false,
267
+ pending_token_id: 0,
268
+ has_pending_token: false,
269
+ last_emit_pos: 0,
270
+ zero_progress_count: 0,
271
+ error: None,
272
+ }
273
+ }
274
+
275
+ /// Get remaining bytes from current position
276
+ #[inline]
277
+ pub fn remaining_bytes(&self) -> &'a [u8] {
278
+ &self.bytes[self.pos..]
279
+ }
280
+
281
+ /// Check if at end of input
282
+ #[inline]
283
+ pub fn is_eof(&self) -> bool {
284
+ self.pos >= self.bytes.len()
285
+ }
286
+
287
+ /// Get current byte (if available)
288
+ #[inline]
289
+ pub fn current_byte(&self) -> Option<u8> {
290
+ self.bytes.get(self.pos).copied()
291
+ }
292
+
293
+ /// Update best match if current last_match is better (longer, or same length with earlier order)
294
+ #[inline]
295
+ pub fn update_best_if_better(&mut self) {
296
+ if self.last_match_len > self.best_match_len
297
+ || (self.last_match_len == self.best_match_len
298
+ && self.last_match_order < self.best_match_order)
299
+ {
300
+ self.best_match_len = self.last_match_len;
301
+ self.best_match_order = self.last_match_order;
302
+ self.best_match_ip = self.last_match_ip;
303
+ }
304
+ }
305
+
306
+ /// Clear best match tracking state
307
+ #[inline]
308
+ pub fn clear_best(&mut self) {
309
+ self.best_match_len = 0;
310
+ self.best_match_order = 0xFFFF;
311
+ self.best_match_ip = 0;
312
+ }
313
+
314
+ /// Reset position to mark
315
+ #[inline]
316
+ pub fn reset_pos_to_mark(&mut self) {
317
+ self.pos = self.mark;
318
+ }
319
+ }
@@ -0,0 +1,258 @@
1
+ //! VM execution loop
2
+ //!
3
+ //! This module implements the main VM execution logic.
4
+
5
+ use crate::opcodes::{self, OP_HALT};
6
+ use crate::types::{CompiledProgram, EmitResult, VmResult, VmState, MAX_STEPS_PER_TOKEN};
7
+
8
+ /// Reserved token IDs (must match lib/lexer_kit.rb)
9
+ /// - 0: Internal sentinel (never emitted)
10
+ /// - 1: INVALID (error token)
11
+ /// - 2-7: Reserved for future use
12
+ /// - 8+: User-defined tokens
13
+ const INVALID_TOKEN_ID: u16 = 1;
14
+ const FIRST_USER_TOKEN_ID: u16 = 8;
15
+
16
+ /// Check if a token_id is valid for emission
17
+ /// Valid tokens are: INVALID (1) for error tokens, or user tokens (>= 8)
18
+ #[inline]
19
+ fn is_valid_token_id(token_id: u16) -> bool {
20
+ token_id == INVALID_TOKEN_ID || token_id >= FIRST_USER_TOKEN_ID
21
+ }
22
+
23
+ /// VM step result
24
+ #[derive(Debug, Clone, Copy)]
25
+ pub enum StepResult {
26
+ /// Token was emitted
27
+ Emit(EmitResult),
28
+ /// Execution halted (EOF or error)
29
+ Halt,
30
+ }
31
+
32
+ /// Execute one token step of the VM
33
+ ///
34
+ /// This function executes instructions until a token is emitted or the VM halts.
35
+ pub fn step(vm: &mut VmState) -> StepResult {
36
+ let mut step_count = 0u32;
37
+
38
+ while (vm.ip as usize) < vm.prog.instructions.len() {
39
+ let instr = &vm.prog.instructions[vm.ip as usize];
40
+ let arg = instr.arg();
41
+ let opcode = instr.opcode;
42
+
43
+ step_count += 1;
44
+ if step_count > MAX_STEPS_PER_TOKEN {
45
+ return StepResult::Halt;
46
+ }
47
+
48
+ // Handle OP_HALT specially
49
+ if opcode == OP_HALT {
50
+ return StepResult::Halt;
51
+ }
52
+
53
+ let (result, emit) = opcodes::execute(vm, opcode, arg);
54
+
55
+ match result {
56
+ VmResult::Emit => {
57
+ if let Some(emit_result) = emit {
58
+ return StepResult::Emit(emit_result);
59
+ }
60
+ }
61
+ VmResult::Halt => {
62
+ return StepResult::Halt;
63
+ }
64
+ VmResult::Continue => {}
65
+ }
66
+ }
67
+
68
+ StepResult::Halt
69
+ }
70
+
71
+ /// Collect all tokens from input
72
+ ///
73
+ /// This is the batch collection function that avoids Ruby boundary crossings.
74
+ /// Returns Err with error message if VM encounters a runtime error.
75
+ pub fn collect_tokens(
76
+ prog: &CompiledProgram,
77
+ bytes: &[u8],
78
+ ) -> Result<Vec<(u16, usize, usize)>, &'static str> {
79
+ let mut vm = VmState::new(prog, bytes);
80
+ let mut tokens = Vec::new();
81
+
82
+ loop {
83
+ match step(&mut vm) {
84
+ StepResult::Emit(emit) => {
85
+ // Filter tokens:
86
+ // - Must have length > 0 (skip zero-length tokens)
87
+ // - Must have valid token_id (INVALID=1 or user tokens >= 8)
88
+ // This guards against malformed serialized data with reserved token IDs
89
+ if emit.length > 0 && is_valid_token_id(emit.token_id) {
90
+ tokens.push((emit.token_id, emit.start, emit.length));
91
+ }
92
+ }
93
+ StepResult::Halt => {
94
+ // Check for error
95
+ if let Some(err) = vm.error {
96
+ return Err(err);
97
+ }
98
+ break;
99
+ }
100
+ }
101
+ }
102
+
103
+ Ok(tokens)
104
+ }
105
+
106
+ /// Token iterator for streaming tokenization
107
+ pub struct TokenIterator<'a> {
108
+ vm: VmState<'a>,
109
+ done: bool,
110
+ }
111
+
112
+ impl<'a> TokenIterator<'a> {
113
+ #[allow(dead_code)] // Used in tests
114
+ pub fn new(prog: &'a CompiledProgram, bytes: &'a [u8]) -> Self {
115
+ Self {
116
+ vm: VmState::new(prog, bytes),
117
+ done: false,
118
+ }
119
+ }
120
+ }
121
+
122
+ impl<'a> Iterator for TokenIterator<'a> {
123
+ type Item = (u16, usize, usize);
124
+
125
+ fn next(&mut self) -> Option<Self::Item> {
126
+ if self.done {
127
+ return None;
128
+ }
129
+
130
+ loop {
131
+ match step(&mut self.vm) {
132
+ StepResult::Emit(emit) => {
133
+ // Filter tokens:
134
+ // - Must have length > 0 (skip zero-length tokens)
135
+ // - Must have valid token_id (INVALID=1 or user tokens >= 8)
136
+ // This guards against malformed serialized data with reserved token IDs
137
+ if emit.length > 0 && is_valid_token_id(emit.token_id) {
138
+ return Some((emit.token_id, emit.start, emit.length));
139
+ }
140
+ }
141
+ StepResult::Halt => {
142
+ self.done = true;
143
+ return None;
144
+ }
145
+ }
146
+ }
147
+ }
148
+ }
149
+
150
+ #[cfg(test)]
151
+ mod tests {
152
+ use super::*;
153
+ use crate::types::{DfaTable, Instruction, Mode};
154
+
155
+ fn create_simple_program() -> CompiledProgram {
156
+ // Create a simple program that matches "a" and emits token 1
157
+ let mut prog = CompiledProgram::new();
158
+
159
+ // DFA that matches 'a'
160
+ let mut dfa = DfaTable::new();
161
+ dfa.state_count = 3;
162
+ dfa.class_count = 2;
163
+ dfa.byte_class = vec![0; 256];
164
+ dfa.byte_class[b'a' as usize] = 1; // 'a' is class 1
165
+ // Transitions: state 0 is dead, state 1 is start, state 2 is accept
166
+ // transitions[state * class_count + class]
167
+ dfa.transitions = vec![
168
+ 0, 0, // state 0 (dead)
169
+ 0, 2, // state 1: class 0 -> dead, class 1 ('a') -> state 2
170
+ 0, 0, // state 2 (accept): all -> dead
171
+ ];
172
+ dfa.accept_tokens = vec![
173
+ 0xFFFF, // state 0: not accepting
174
+ 0xFFFF, // state 1: not accepting
175
+ 1, // state 2: token 1
176
+ ];
177
+ prog.dfa_tables.push(dfa);
178
+
179
+ // Instructions:
180
+ // 0: MARK
181
+ // 1: DFA_RUN_IF_MATCH (dfa=0, fail->4)
182
+ // 2: EMIT 1
183
+ // 3: JUMP 0 (loop)
184
+ // 4: HALT
185
+ prog.instructions = vec![
186
+ Instruction {
187
+ opcode: opcodes::OP_MARK,
188
+ arg_hi: 0,
189
+ arg_mid: 0,
190
+ arg_lo: 0,
191
+ },
192
+ Instruction {
193
+ opcode: opcodes::OP_DFA_RUN_IF_MATCH,
194
+ arg_hi: 0,
195
+ arg_mid: 0,
196
+ arg_lo: 4,
197
+ },
198
+ Instruction {
199
+ opcode: opcodes::OP_EMIT,
200
+ arg_hi: 0,
201
+ arg_mid: 0,
202
+ arg_lo: 1,
203
+ },
204
+ Instruction {
205
+ opcode: opcodes::OP_JUMP,
206
+ arg_hi: 0,
207
+ arg_mid: 0,
208
+ arg_lo: 0,
209
+ },
210
+ Instruction {
211
+ opcode: opcodes::OP_HALT,
212
+ arg_hi: 0,
213
+ arg_mid: 0,
214
+ arg_lo: 0,
215
+ },
216
+ ];
217
+
218
+ prog.modes.push(Mode { start_offset: 0 });
219
+ prog.default_mode_offset = 0;
220
+
221
+ prog
222
+ }
223
+
224
+ #[test]
225
+ fn test_vm_single_token() {
226
+ let prog = create_simple_program();
227
+ let tokens = collect_tokens(&prog, b"a").unwrap();
228
+ assert_eq!(tokens.len(), 1);
229
+ assert_eq!(tokens[0], (1, 0, 1));
230
+ }
231
+
232
+ #[test]
233
+ fn test_vm_multiple_tokens() {
234
+ let prog = create_simple_program();
235
+ let tokens = collect_tokens(&prog, b"aaa").unwrap();
236
+ assert_eq!(tokens.len(), 3);
237
+ assert_eq!(tokens[0], (1, 0, 1));
238
+ assert_eq!(tokens[1], (1, 1, 1));
239
+ assert_eq!(tokens[2], (1, 2, 1));
240
+ }
241
+
242
+ #[test]
243
+ fn test_vm_no_match() {
244
+ let prog = create_simple_program();
245
+ let tokens = collect_tokens(&prog, b"b").unwrap();
246
+ assert_eq!(tokens.len(), 0);
247
+ }
248
+
249
+ #[test]
250
+ fn test_token_iterator() {
251
+ let prog = create_simple_program();
252
+ let iter = TokenIterator::new(&prog, b"aa");
253
+ let tokens: Vec<_> = iter.collect();
254
+ assert_eq!(tokens.len(), 2);
255
+ assert_eq!(tokens[0], (1, 0, 1));
256
+ assert_eq!(tokens[1], (1, 1, 1));
257
+ }
258
+ }