rich-ruby 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1145 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "style"
4
+ require_relative "segment"
5
+ require_relative "text"
6
+ require_relative "panel"
7
+
8
+ module Rich
9
+ # Syntax highlighting for source code.
10
+ # Provides token-based syntax highlighting for multiple programming languages.
11
+ class Syntax
12
+ # Default theme for syntax highlighting
13
+ DEFAULT_THEME = {
14
+ # Keywords
15
+ keyword: Style.new(color: Color.parse("magenta"), bold: true),
16
+ keyword_constant: Style.new(color: Color.parse("cyan"), bold: true),
17
+ keyword_declaration: Style.new(color: Color.parse("magenta"), bold: true),
18
+ keyword_namespace: Style.new(color: Color.parse("magenta"), bold: true),
19
+ keyword_type: Style.new(color: Color.parse("cyan")),
20
+
21
+ # Names
22
+ name: Style.new(color: Color.parse("white")),
23
+ name_builtin: Style.new(color: Color.parse("cyan")),
24
+ name_class: Style.new(color: Color.parse("green"), bold: true),
25
+ name_constant: Style.new(color: Color.parse("cyan")),
26
+ name_decorator: Style.new(color: Color.parse("bright_magenta")),
27
+ name_exception: Style.new(color: Color.parse("green"), bold: true),
28
+ name_function: Style.new(color: Color.parse("green")),
29
+ name_variable: Style.new(color: Color.parse("white")),
30
+ name_tag: Style.new(color: Color.parse("bright_magenta")),
31
+ name_attribute: Style.new(color: Color.parse("yellow")),
32
+
33
+ # Literals
34
+ string: Style.new(color: Color.parse("yellow")),
35
+ string_doc: Style.new(color: Color.parse("yellow"), italic: true),
36
+ string_escape: Style.new(color: Color.parse("bright_magenta")),
37
+ string_interpol: Style.new(color: Color.parse("bright_magenta")),
38
+ string_regex: Style.new(color: Color.parse("bright_yellow")),
39
+ string_symbol: Style.new(color: Color.parse("bright_green")),
40
+
41
+ number: Style.new(color: Color.parse("cyan")),
42
+ number_float: Style.new(color: Color.parse("cyan")),
43
+ number_hex: Style.new(color: Color.parse("cyan")),
44
+
45
+ # Operators and Punctuation
46
+ operator: Style.new(color: Color.parse("bright_magenta")),
47
+ punctuation: Style.new(color: Color.parse("white")),
48
+
49
+ # Comments
50
+ comment: Style.new(color: Color.parse("bright_black"), italic: true),
51
+ comment_doc: Style.new(color: Color.parse("bright_black"), italic: true),
52
+ comment_preproc: Style.new(color: Color.parse("bright_magenta")),
53
+
54
+ # Generic
55
+ generic_deleted: Style.new(color: Color.parse("red")),
56
+ generic_inserted: Style.new(color: Color.parse("green")),
57
+ generic_heading: Style.new(color: Color.parse("bright_blue"), bold: true),
58
+ generic_subheading: Style.new(color: Color.parse("bright_blue")),
59
+ generic_error: Style.new(color: Color.parse("bright_red")),
60
+
61
+ # Other
62
+ text: Style.new,
63
+ error: Style.new(color: Color.parse("bright_red"), bold: true)
64
+ }.freeze
65
+
66
+ # Monokai theme
67
+ MONOKAI_THEME = {
68
+ keyword: Style.new(color: Color.parse("#f92672"), bold: true),
69
+ keyword_constant: Style.new(color: Color.parse("#ae81ff")),
70
+ keyword_type: Style.new(color: Color.parse("#66d9ef"), italic: true),
71
+ name: Style.new(color: Color.parse("#f8f8f2")),
72
+ name_builtin: Style.new(color: Color.parse("#66d9ef")),
73
+ name_class: Style.new(color: Color.parse("#a6e22e")),
74
+ name_function: Style.new(color: Color.parse("#a6e22e")),
75
+ name_decorator: Style.new(color: Color.parse("#a6e22e")),
76
+ string: Style.new(color: Color.parse("#e6db74")),
77
+ string_doc: Style.new(color: Color.parse("#e6db74")),
78
+ number: Style.new(color: Color.parse("#ae81ff")),
79
+ operator: Style.new(color: Color.parse("#f92672")),
80
+ comment: Style.new(color: Color.parse("#75715e"), italic: true),
81
+ punctuation: Style.new(color: Color.parse("#f8f8f2")),
82
+ text: Style.new(color: Color.parse("#f8f8f2")),
83
+ error: Style.new(color: Color.parse("#f92672"), bold: true)
84
+ }.freeze
85
+
86
+ # Dracula theme
87
+ DRACULA_THEME = {
88
+ keyword: Style.new(color: Color.parse("#ff79c6"), bold: true),
89
+ keyword_constant: Style.new(color: Color.parse("#bd93f9")),
90
+ keyword_type: Style.new(color: Color.parse("#8be9fd"), italic: true),
91
+ name: Style.new(color: Color.parse("#f8f8f2")),
92
+ name_builtin: Style.new(color: Color.parse("#8be9fd")),
93
+ name_class: Style.new(color: Color.parse("#50fa7b")),
94
+ name_function: Style.new(color: Color.parse("#50fa7b")),
95
+ name_decorator: Style.new(color: Color.parse("#50fa7b")),
96
+ string: Style.new(color: Color.parse("#f1fa8c")),
97
+ string_doc: Style.new(color: Color.parse("#6272a4")),
98
+ number: Style.new(color: Color.parse("#bd93f9")),
99
+ operator: Style.new(color: Color.parse("#ff79c6")),
100
+ comment: Style.new(color: Color.parse("#6272a4"), italic: true),
101
+ punctuation: Style.new(color: Color.parse("#f8f8f2")),
102
+ text: Style.new(color: Color.parse("#f8f8f2")),
103
+ error: Style.new(color: Color.parse("#ff5555"), bold: true)
104
+ }.freeze
105
+
106
+ THEMES = {
107
+ default: DEFAULT_THEME,
108
+ monokai: MONOKAI_THEME,
109
+ dracula: DRACULA_THEME
110
+ }.freeze
111
+
112
+ # @return [String] Source code
113
+ attr_reader :code
114
+
115
+ # @return [String] Language name
116
+ attr_reader :language
117
+
118
+ # @return [Hash] Theme styles
119
+ attr_reader :theme
120
+
121
+ # @return [Boolean] Show line numbers
122
+ attr_reader :line_numbers
123
+
124
+ # @return [Integer, nil] Starting line number
125
+ attr_reader :start_line
126
+
127
+ # @return [Array<Integer>, nil] Lines to highlight
128
+ attr_reader :highlight_lines
129
+
130
+ # @return [Boolean] Word wrap
131
+ attr_reader :word_wrap
132
+
133
+ # @return [Style, nil] Background style
134
+ attr_reader :background_style
135
+
136
+ # @return [Integer] Tab size
137
+ attr_reader :tab_size
138
+
139
+ def initialize(
140
+ code,
141
+ language: "text",
142
+ theme: :default,
143
+ line_numbers: false,
144
+ start_line: 1,
145
+ highlight_lines: nil,
146
+ word_wrap: false,
147
+ background_style: nil,
148
+ tab_size: 4
149
+ )
150
+ @code = code.to_s
151
+ @language = language.to_s.downcase
152
+ @theme = theme.is_a?(Hash) ? theme : (THEMES[theme] || DEFAULT_THEME)
153
+ @line_numbers = line_numbers
154
+ @start_line = start_line
155
+ @highlight_lines = highlight_lines
156
+ @word_wrap = word_wrap
157
+ @background_style = background_style
158
+ @tab_size = tab_size
159
+ end
160
+
161
+ # Highlight the code and return segments
162
+ # @return [Array<Segment>]
163
+ def to_segments
164
+ segments = []
165
+ lines = @code.gsub("\t", " " * @tab_size).split("\n", -1)
166
+
167
+ # Calculate line number width
168
+ line_num_width = (@start_line + lines.length - 1).to_s.length
169
+
170
+ lines.each_with_index do |line, index|
171
+ line_num = @start_line + index
172
+ is_highlighted = @highlight_lines&.include?(line_num)
173
+
174
+ # Line number
175
+ if @line_numbers
176
+ num_style = is_highlighted ? Style.new(color: Color.parse("yellow"), bold: true) : Style.new(color: Color.parse("bright_black"))
177
+ segments << Segment.new(line_num.to_s.rjust(line_num_width), style: num_style)
178
+ segments << Segment.new(" │ ", style: Style.new(color: Color.parse("bright_black")))
179
+ end
180
+
181
+ # Highlighted line background
182
+ if is_highlighted
183
+ bg_style = Style.new(bgcolor: Color.parse("color(237)"))
184
+ segments.concat(highlight_line(line).map do |seg|
185
+ combined_style = seg.style ? seg.style + bg_style : bg_style
186
+ Segment.new(seg.text, style: combined_style)
187
+ end)
188
+ else
189
+ segments.concat(highlight_line(line))
190
+ end
191
+
192
+ segments << Segment.new("\n") if index < lines.length - 1
193
+ end
194
+
195
+ segments
196
+ end
197
+
198
+ # Highlight a single line
199
+ # @param line [String] Line to highlight
200
+ # @return [Array<Segment>]
201
+ def highlight_line(line)
202
+ lexer = get_lexer(@language)
203
+ lexer.tokenize(line, @theme)
204
+ end
205
+
206
+ # Render to string with ANSI codes
207
+ # @param color_system [Symbol] Color system
208
+ # @return [String]
209
+ def render(color_system: ColorSystem::TRUECOLOR)
210
+ Segment.render(to_segments, color_system: color_system)
211
+ end
212
+
213
+ # Render inside a panel
214
+ # @param title [String, nil] Panel title
215
+ # @return [String]
216
+ def to_panel(title: nil, max_width: 80)
217
+ title ||= @language.capitalize
218
+ panel = Panel.new(
219
+ render,
220
+ title: title,
221
+ border_style: "dim",
222
+ padding: 0
223
+ )
224
+ panel.render(max_width: max_width)
225
+ end
226
+
227
+ class << self
228
+ # Create syntax from file
229
+ # @param path [String] File path
230
+ # @param kwargs [Hash] Options
231
+ # @return [Syntax]
232
+ def from_file(path, **kwargs)
233
+ code = File.read(path)
234
+ language = kwargs.delete(:language) || detect_language(path)
235
+ new(code, language: language, **kwargs)
236
+ end
237
+
238
+ # Detect language from file extension
239
+ # @param path [String] File path
240
+ # @return [String]
241
+ def detect_language(path)
242
+ ext = File.extname(path).downcase.delete(".")
243
+ EXTENSION_MAP[ext] || "text"
244
+ end
245
+
246
+ # List supported languages
247
+ # @return [Array<String>]
248
+ def supported_languages
249
+ LEXERS.keys.sort
250
+ end
251
+ end
252
+
253
+ private
254
+
255
+ def get_lexer(language)
256
+ LEXERS[language] || LEXERS["text"]
257
+ end
258
+
259
+ # File extension to language mapping
260
+ EXTENSION_MAP = {
261
+ "rb" => "ruby",
262
+ "py" => "python",
263
+ "js" => "javascript",
264
+ "ts" => "typescript",
265
+ "jsx" => "javascript",
266
+ "tsx" => "typescript",
267
+ "json" => "json",
268
+ "yml" => "yaml",
269
+ "yaml" => "yaml",
270
+ "xml" => "xml",
271
+ "html" => "html",
272
+ "htm" => "html",
273
+ "css" => "css",
274
+ "scss" => "scss",
275
+ "sass" => "sass",
276
+ "sql" => "sql",
277
+ "sh" => "bash",
278
+ "bash" => "bash",
279
+ "zsh" => "bash",
280
+ "ps1" => "powershell",
281
+ "c" => "c",
282
+ "h" => "c",
283
+ "cpp" => "cpp",
284
+ "hpp" => "cpp",
285
+ "cc" => "cpp",
286
+ "go" => "go",
287
+ "rs" => "rust",
288
+ "java" => "java",
289
+ "kt" => "kotlin",
290
+ "swift" => "swift",
291
+ "md" => "markdown",
292
+ "markdown" => "markdown",
293
+ "dockerfile" => "dockerfile",
294
+ "toml" => "toml",
295
+ "ini" => "ini",
296
+ "conf" => "ini",
297
+ "txt" => "text"
298
+ }.freeze
299
+ end
300
+
301
+ # Base lexer class for tokenization
302
+ class BaseLexer
303
+ def tokenize(line, theme)
304
+ [Segment.new(line, style: theme[:text])]
305
+ end
306
+ end
307
+
308
+ # Ruby lexer
309
+ class RubyLexer < BaseLexer
310
+ KEYWORDS = %w[
311
+ def class module end if else elsif unless case when then
312
+ begin rescue ensure raise return yield do while until for
313
+ break next redo retry in and or not alias defined? super
314
+ self nil true false __FILE__ __LINE__ __ENCODING__
315
+ require require_relative include extend prepend attr_reader
316
+ attr_writer attr_accessor private protected public
317
+ lambda proc loop catch throw
318
+ ].freeze
319
+
320
+ BUILTINS = %w[
321
+ puts print p pp gets chomp to_s to_i to_f to_a to_h length
322
+ size each map select reject find reduce inject sort sort_by
323
+ uniq compact flatten reverse join split push pop shift unshift
324
+ first last min max sum count empty? nil? is_a? kind_of?
325
+ respond_to? send __send__ method methods instance_variables
326
+ class superclass ancestors included_modules freeze frozen?
327
+ dup clone tap then yield_self itself inspect
328
+ ].freeze
329
+
330
+ def tokenize(line, theme)
331
+ segments = []
332
+ pos = 0
333
+
334
+ while pos < line.length
335
+ # Skip whitespace
336
+ if line[pos].match?(/\s/)
337
+ ws_end = pos
338
+ ws_end += 1 while ws_end < line.length && line[ws_end].match?(/\s/)
339
+ segments << Segment.new(line[pos...ws_end])
340
+ pos = ws_end
341
+ next
342
+ end
343
+
344
+ # Comment
345
+ if line[pos] == "#"
346
+ segments << Segment.new(line[pos..], style: theme[:comment])
347
+ break
348
+ end
349
+
350
+ # String (double quote)
351
+ if line[pos] == '"'
352
+ str_end = find_string_end(line, pos, '"')
353
+ segments << Segment.new(line[pos..str_end], style: theme[:string])
354
+ pos = str_end + 1
355
+ next
356
+ end
357
+
358
+ # String (single quote)
359
+ if line[pos] == "'"
360
+ str_end = find_string_end(line, pos, "'")
361
+ segments << Segment.new(line[pos..str_end], style: theme[:string])
362
+ pos = str_end + 1
363
+ next
364
+ end
365
+
366
+ # Regex
367
+ if line[pos] == "/" && (pos == 0 || line[pos - 1].match?(/[\s=({,]/))
368
+ regex_end = find_string_end(line, pos, "/")
369
+ if regex_end > pos
370
+ segments << Segment.new(line[pos..regex_end], style: theme[:string_regex])
371
+ pos = regex_end + 1
372
+ next
373
+ end
374
+ end
375
+
376
+ # Symbol
377
+ if line[pos] == ":"
378
+ if pos + 1 < line.length && line[pos + 1].match?(/[a-zA-Z_]/)
379
+ sym_end = pos + 1
380
+ sym_end += 1 while sym_end < line.length && line[sym_end].match?(/\w/)
381
+ segments << Segment.new(line[pos...sym_end], style: theme[:string_symbol] || theme[:string])
382
+ pos = sym_end
383
+ next
384
+ end
385
+ end
386
+
387
+ # Number
388
+ if line[pos].match?(/\d/)
389
+ num_end = pos
390
+ num_end += 1 while num_end < line.length && line[num_end].match?(/[\d._xXoObB]/)
391
+ segments << Segment.new(line[pos...num_end], style: theme[:number])
392
+ pos = num_end
393
+ next
394
+ end
395
+
396
+ # Instance variable
397
+ if line[pos] == "@"
398
+ var_end = pos + 1
399
+ var_end += 1 if var_end < line.length && line[var_end] == "@"
400
+ var_end += 1 while var_end < line.length && line[var_end].match?(/\w/)
401
+ segments << Segment.new(line[pos...var_end], style: theme[:name_variable] || theme[:name])
402
+ pos = var_end
403
+ next
404
+ end
405
+
406
+ # Global variable
407
+ if line[pos] == "$"
408
+ var_end = pos + 1
409
+ var_end += 1 while var_end < line.length && line[var_end].match?(/\w/)
410
+ segments << Segment.new(line[pos...var_end], style: theme[:name_variable] || theme[:name])
411
+ pos = var_end
412
+ next
413
+ end
414
+
415
+ # Constant/Class name
416
+ if line[pos].match?(/[A-Z]/)
417
+ word_end = pos
418
+ word_end += 1 while word_end < line.length && line[word_end].match?(/\w/)
419
+ word = line[pos...word_end]
420
+ if %w[true false nil].include?(word.downcase)
421
+ segments << Segment.new(word, style: theme[:keyword_constant] || theme[:keyword])
422
+ else
423
+ segments << Segment.new(word, style: theme[:name_class] || theme[:name])
424
+ end
425
+ pos = word_end
426
+ next
427
+ end
428
+
429
+ # Identifier/Keyword
430
+ if line[pos].match?(/[a-z_]/i)
431
+ word_end = pos
432
+ word_end += 1 while word_end < line.length && line[word_end].match?(/[\w?!]/)
433
+ word = line[pos...word_end]
434
+
435
+ style = if KEYWORDS.include?(word)
436
+ theme[:keyword]
437
+ elsif BUILTINS.include?(word)
438
+ theme[:name_builtin] || theme[:name]
439
+ else
440
+ theme[:name]
441
+ end
442
+
443
+ segments << Segment.new(word, style: style)
444
+ pos = word_end
445
+ next
446
+ end
447
+
448
+ # Operators and punctuation
449
+ if line[pos].match?(/[+\-*\/%&|^~<>=!?:]/)
450
+ op_end = pos + 1
451
+ op_end += 1 while op_end < line.length && line[op_end].match?(/[+\-*\/%&|^~<>=!?:]/)
452
+ segments << Segment.new(line[pos...op_end], style: theme[:operator])
453
+ pos = op_end
454
+ next
455
+ end
456
+
457
+ # Punctuation
458
+ if line[pos].match?(/[(){}\[\].,;]/)
459
+ segments << Segment.new(line[pos], style: theme[:punctuation])
460
+ pos += 1
461
+ next
462
+ end
463
+
464
+ # Default
465
+ segments << Segment.new(line[pos])
466
+ pos += 1
467
+ end
468
+
469
+ segments
470
+ end
471
+
472
+ private
473
+
474
+ def find_string_end(line, start, delimiter)
475
+ pos = start + 1
476
+ while pos < line.length
477
+ return pos if line[pos] == delimiter && line[pos - 1] != "\\"
478
+
479
+ pos += 1
480
+ end
481
+ line.length - 1
482
+ end
483
+ end
484
+
485
+ # Python lexer
486
+ class PythonLexer < BaseLexer
487
+ KEYWORDS = %w[
488
+ and as assert async await break class continue def del elif else
489
+ except finally for from global if import in is lambda None nonlocal
490
+ not or pass raise return try while with yield True False
491
+ ].freeze
492
+
493
+ BUILTINS = %w[
494
+ abs all any ascii bin bool breakpoint bytearray bytes callable
495
+ chr classmethod compile complex delattr dict dir divmod enumerate
496
+ eval exec filter float format frozenset getattr globals hasattr
497
+ hash help hex id input int isinstance issubclass iter len list
498
+ locals map max memoryview min next object oct open ord pow print
499
+ property range repr reversed round set setattr slice sorted
500
+ staticmethod str sum super tuple type vars zip
501
+ ].freeze
502
+
503
+ def tokenize(line, theme)
504
+ segments = []
505
+ pos = 0
506
+
507
+ while pos < line.length
508
+ if line[pos].match?(/\s/)
509
+ ws_end = pos
510
+ ws_end += 1 while ws_end < line.length && line[ws_end].match?(/\s/)
511
+ segments << Segment.new(line[pos...ws_end])
512
+ pos = ws_end
513
+ next
514
+ end
515
+
516
+ # Comment
517
+ if line[pos] == "#"
518
+ segments << Segment.new(line[pos..], style: theme[:comment])
519
+ break
520
+ end
521
+
522
+ # Docstring/String
523
+ if line[pos..pos + 2] == '"""' || line[pos..pos + 2] == "'''"
524
+ delim = line[pos..pos + 2]
525
+ str_end = line.index(delim, pos + 3)
526
+ str_end = str_end ? str_end + 2 : line.length - 1
527
+ segments << Segment.new(line[pos..str_end], style: theme[:string_doc] || theme[:string])
528
+ pos = str_end + 1
529
+ next
530
+ end
531
+
532
+ # String
533
+ if ['"', "'"].include?(line[pos])
534
+ delim = line[pos]
535
+ str_end = find_string_end(line, pos, delim)
536
+ segments << Segment.new(line[pos..str_end], style: theme[:string])
537
+ pos = str_end + 1
538
+ next
539
+ end
540
+
541
+ # Number
542
+ if line[pos].match?(/\d/)
543
+ num_end = pos
544
+ num_end += 1 while num_end < line.length && line[num_end].match?(/[\d._xXoObBeE+\-]/)
545
+ segments << Segment.new(line[pos...num_end], style: theme[:number])
546
+ pos = num_end
547
+ next
548
+ end
549
+
550
+ # Decorator
551
+ if line[pos] == "@"
552
+ dec_end = pos + 1
553
+ dec_end += 1 while dec_end < line.length && line[dec_end].match?(/[\w.]/)
554
+ segments << Segment.new(line[pos...dec_end], style: theme[:name_decorator] || theme[:name])
555
+ pos = dec_end
556
+ next
557
+ end
558
+
559
+ # Identifier
560
+ if line[pos].match?(/[a-zA-Z_]/)
561
+ word_end = pos
562
+ word_end += 1 while word_end < line.length && line[word_end].match?(/\w/)
563
+ word = line[pos...word_end]
564
+
565
+ style = if KEYWORDS.include?(word)
566
+ theme[:keyword]
567
+ elsif BUILTINS.include?(word)
568
+ theme[:name_builtin] || theme[:name]
569
+ elsif word[0].match?(/[A-Z]/)
570
+ theme[:name_class] || theme[:name]
571
+ else
572
+ theme[:name]
573
+ end
574
+
575
+ segments << Segment.new(word, style: style)
576
+ pos = word_end
577
+ next
578
+ end
579
+
580
+ # Operators
581
+ if line[pos].match?(/[+\-*\/%&|^~<>=!@]/)
582
+ op_end = pos + 1
583
+ op_end += 1 while op_end < line.length && line[op_end].match?(/[+\-*\/%&|^~<>=!@]/)
584
+ segments << Segment.new(line[pos...op_end], style: theme[:operator])
585
+ pos = op_end
586
+ next
587
+ end
588
+
589
+ # Punctuation
590
+ if line[pos].match?(/[(){}\[\].,;:]/)
591
+ segments << Segment.new(line[pos], style: theme[:punctuation])
592
+ pos += 1
593
+ next
594
+ end
595
+
596
+ segments << Segment.new(line[pos])
597
+ pos += 1
598
+ end
599
+
600
+ segments
601
+ end
602
+
603
+ private
604
+
605
+ def find_string_end(line, start, delimiter)
606
+ pos = start + 1
607
+ while pos < line.length
608
+ return pos if line[pos] == delimiter && line[pos - 1] != "\\"
609
+
610
+ pos += 1
611
+ end
612
+ line.length - 1
613
+ end
614
+ end
615
+
616
+ # JavaScript lexer
617
+ class JavaScriptLexer < BaseLexer
618
+ KEYWORDS = %w[
619
+ async await break case catch class const continue debugger default
620
+ delete do else export extends finally for function if import in
621
+ instanceof let new return static super switch this throw try typeof
622
+ var void while with yield true false null undefined
623
+ ].freeze
624
+
625
+ BUILTINS = %w[
626
+ Array Boolean Date Error Function JSON Math Number Object Promise
627
+ RegExp String Symbol Map Set WeakMap WeakSet Proxy Reflect
628
+ console window document parseInt parseFloat isNaN isFinite
629
+ decodeURI decodeURIComponent encodeURI encodeURIComponent eval
630
+ setTimeout setInterval clearTimeout clearInterval fetch
631
+ ].freeze
632
+
633
+ def tokenize(line, theme)
634
+ segments = []
635
+ pos = 0
636
+
637
+ while pos < line.length
638
+ if line[pos].match?(/\s/)
639
+ ws_end = pos
640
+ ws_end += 1 while ws_end < line.length && line[ws_end].match?(/\s/)
641
+ segments << Segment.new(line[pos...ws_end])
642
+ pos = ws_end
643
+ next
644
+ end
645
+
646
+ # Single-line comment
647
+ if line[pos..pos + 1] == "//"
648
+ segments << Segment.new(line[pos..], style: theme[:comment])
649
+ break
650
+ end
651
+
652
+ # Template literal
653
+ if line[pos] == "`"
654
+ str_end = find_string_end(line, pos, "`")
655
+ segments << Segment.new(line[pos..str_end], style: theme[:string])
656
+ pos = str_end + 1
657
+ next
658
+ end
659
+
660
+ # String
661
+ if ['"', "'"].include?(line[pos])
662
+ delim = line[pos]
663
+ str_end = find_string_end(line, pos, delim)
664
+ segments << Segment.new(line[pos..str_end], style: theme[:string])
665
+ pos = str_end + 1
666
+ next
667
+ end
668
+
669
+ # Regex
670
+ if line[pos] == "/" && (pos == 0 || line[pos - 1].match?(/[\s=({,\[]/))
671
+ regex_end = find_string_end(line, pos, "/")
672
+ if regex_end > pos
673
+ # Include flags
674
+ regex_end += 1 while regex_end + 1 < line.length && line[regex_end + 1].match?(/[gimsuy]/)
675
+ segments << Segment.new(line[pos..regex_end], style: theme[:string_regex] || theme[:string])
676
+ pos = regex_end + 1
677
+ next
678
+ end
679
+ end
680
+
681
+ # Number
682
+ if line[pos].match?(/\d/) || (line[pos] == "." && pos + 1 < line.length && line[pos + 1].match?(/\d/))
683
+ num_end = pos
684
+ num_end += 1 while num_end < line.length && line[num_end].match?(/[\d._xXoObBeEnN]/)
685
+ segments << Segment.new(line[pos...num_end], style: theme[:number])
686
+ pos = num_end
687
+ next
688
+ end
689
+
690
+ # Identifier
691
+ if line[pos].match?(/[a-zA-Z_$]/)
692
+ word_end = pos
693
+ word_end += 1 while word_end < line.length && line[word_end].match?(/[\w$]/)
694
+ word = line[pos...word_end]
695
+
696
+ style = if KEYWORDS.include?(word)
697
+ theme[:keyword]
698
+ elsif BUILTINS.include?(word)
699
+ theme[:name_builtin] || theme[:name]
700
+ elsif word[0].match?(/[A-Z]/)
701
+ theme[:name_class] || theme[:name]
702
+ else
703
+ theme[:name]
704
+ end
705
+
706
+ segments << Segment.new(word, style: style)
707
+ pos = word_end
708
+ next
709
+ end
710
+
711
+ # Arrow function
712
+ if line[pos..pos + 1] == "=>"
713
+ segments << Segment.new("=>", style: theme[:operator])
714
+ pos += 2
715
+ next
716
+ end
717
+
718
+ # Operators
719
+ if line[pos].match?(/[+\-*\/%&|^~<>=!?:]/)
720
+ op_end = pos + 1
721
+ op_end += 1 while op_end < line.length && line[op_end].match?(/[+\-*\/%&|^~<>=!?:]/)
722
+ segments << Segment.new(line[pos...op_end], style: theme[:operator])
723
+ pos = op_end
724
+ next
725
+ end
726
+
727
+ # Punctuation
728
+ if line[pos].match?(/[(){}\[\].,;]/)
729
+ segments << Segment.new(line[pos], style: theme[:punctuation])
730
+ pos += 1
731
+ next
732
+ end
733
+
734
+ segments << Segment.new(line[pos])
735
+ pos += 1
736
+ end
737
+
738
+ segments
739
+ end
740
+
741
+ private
742
+
743
+ def find_string_end(line, start, delimiter)
744
+ pos = start + 1
745
+ while pos < line.length
746
+ return pos if line[pos] == delimiter && line[pos - 1] != "\\"
747
+
748
+ pos += 1
749
+ end
750
+ line.length - 1
751
+ end
752
+ end
753
+
754
+ # SQL Lexer
755
+ class SQLLexer < BaseLexer
756
+ KEYWORDS = %w[
757
+ SELECT FROM WHERE AND OR NOT NULL IS IN LIKE BETWEEN EXISTS
758
+ INSERT INTO VALUES UPDATE SET DELETE CREATE TABLE DROP ALTER
759
+ INDEX VIEW TRIGGER PROCEDURE FUNCTION AS ON JOIN LEFT RIGHT
760
+ INNER OUTER FULL CROSS NATURAL USING ORDER BY ASC DESC GROUP
761
+ HAVING LIMIT OFFSET UNION ALL DISTINCT CASE WHEN THEN ELSE END
762
+ IF BEGIN COMMIT ROLLBACK TRANSACTION PRIMARY KEY FOREIGN
763
+ REFERENCES UNIQUE DEFAULT CHECK CONSTRAINT CASCADE RESTRICT
764
+ TRUE FALSE GRANT REVOKE WITH RECURSIVE
765
+ ].freeze
766
+
767
+ BUILTINS = %w[
768
+ COUNT SUM AVG MIN MAX LENGTH UPPER LOWER TRIM CONCAT SUBSTRING
769
+ REPLACE COALESCE NULLIF CAST CONVERT DATE TIME DATETIME
770
+ YEAR MONTH DAY HOUR MINUTE SECOND NOW CURRENT_DATE
771
+ CURRENT_TIME CURRENT_TIMESTAMP ABS ROUND FLOOR CEILING
772
+ POWER SQRT MOD ROW_NUMBER RANK DENSE_RANK OVER PARTITION
773
+ ].freeze
774
+
775
+ TYPES = %w[
776
+ INT INTEGER BIGINT SMALLINT TINYINT FLOAT DOUBLE DECIMAL
777
+ NUMERIC REAL CHAR VARCHAR TEXT NCHAR NVARCHAR NTEXT
778
+ DATE TIME DATETIME TIMESTAMP BOOLEAN BOOL BLOB BINARY
779
+ VARBINARY UUID JSON XML
780
+ ].freeze
781
+
782
+ def tokenize(line, theme)
783
+ segments = []
784
+ pos = 0
785
+
786
+ while pos < line.length
787
+ if line[pos].match?(/\s/)
788
+ ws_end = pos
789
+ ws_end += 1 while ws_end < line.length && line[ws_end].match?(/\s/)
790
+ segments << Segment.new(line[pos...ws_end])
791
+ pos = ws_end
792
+ next
793
+ end
794
+
795
+ # Comment
796
+ if line[pos..pos + 1] == "--"
797
+ segments << Segment.new(line[pos..], style: theme[:comment])
798
+ break
799
+ end
800
+
801
+ # String
802
+ if line[pos] == "'"
803
+ str_end = pos + 1
804
+ str_end += 1 while str_end < line.length && line[str_end] != "'"
805
+ str_end = [str_end, line.length - 1].min
806
+ segments << Segment.new(line[pos..str_end], style: theme[:string])
807
+ pos = str_end + 1
808
+ next
809
+ end
810
+
811
+ # Number
812
+ if line[pos].match?(/\d/)
813
+ num_end = pos
814
+ num_end += 1 while num_end < line.length && line[num_end].match?(/[\d.]/)
815
+ segments << Segment.new(line[pos...num_end], style: theme[:number])
816
+ pos = num_end
817
+ next
818
+ end
819
+
820
+ # Identifier
821
+ if line[pos].match?(/[a-zA-Z_]/)
822
+ word_end = pos
823
+ word_end += 1 while word_end < line.length && line[word_end].match?(/\w/)
824
+ word = line[pos...word_end]
825
+ upper_word = word.upcase
826
+
827
+ style = if KEYWORDS.include?(upper_word)
828
+ theme[:keyword]
829
+ elsif BUILTINS.include?(upper_word)
830
+ theme[:name_builtin] || theme[:name]
831
+ elsif TYPES.include?(upper_word)
832
+ theme[:keyword_type] || theme[:keyword]
833
+ else
834
+ theme[:name]
835
+ end
836
+
837
+ segments << Segment.new(word, style: style)
838
+ pos = word_end
839
+ next
840
+ end
841
+
842
+ # Operators
843
+ if line[pos].match?(/[+\-*\/%<>=!]/)
844
+ op_end = pos + 1
845
+ op_end += 1 while op_end < line.length && line[op_end].match?(/[+\-*\/%<>=!]/)
846
+ segments << Segment.new(line[pos...op_end], style: theme[:operator])
847
+ pos = op_end
848
+ next
849
+ end
850
+
851
+ # Punctuation
852
+ if line[pos].match?(/[(),;.]/)
853
+ segments << Segment.new(line[pos], style: theme[:punctuation])
854
+ pos += 1
855
+ next
856
+ end
857
+
858
+ segments << Segment.new(line[pos])
859
+ pos += 1
860
+ end
861
+
862
+ segments
863
+ end
864
+ end
865
+
866
+ # JSON Lexer (simple)
867
+ class JSONLexer < BaseLexer
868
+ def tokenize(line, theme)
869
+ segments = []
870
+ pos = 0
871
+
872
+ while pos < line.length
873
+ if line[pos].match?(/\s/)
874
+ ws_end = pos
875
+ ws_end += 1 while ws_end < line.length && line[ws_end].match?(/\s/)
876
+ segments << Segment.new(line[pos...ws_end])
877
+ pos = ws_end
878
+ next
879
+ end
880
+
881
+ # String
882
+ if line[pos] == '"'
883
+ str_end = pos + 1
884
+ str_end += 1 while str_end < line.length && !(line[str_end] == '"' && line[str_end - 1] != "\\")
885
+ str_end = [str_end, line.length - 1].min
886
+ content = line[pos..str_end]
887
+
888
+ # Check if it's a key (followed by :)
889
+ rest = line[str_end + 1..].lstrip
890
+ is_key = rest.start_with?(":")
891
+
892
+ segments << Segment.new(content, style: is_key ? theme[:name] : theme[:string])
893
+ pos = str_end + 1
894
+ next
895
+ end
896
+
897
+ # Number
898
+ if line[pos].match?(/[\d\-]/)
899
+ num_end = pos
900
+ num_end += 1 while num_end < line.length && line[num_end].match?(/[\d.eE+\-]/)
901
+ segments << Segment.new(line[pos...num_end], style: theme[:number])
902
+ pos = num_end
903
+ next
904
+ end
905
+
906
+ # Boolean/null
907
+ if line[pos].match?(/[tfn]/)
908
+ if line[pos..pos + 3] == "true"
909
+ segments << Segment.new("true", style: theme[:keyword_constant] || theme[:keyword])
910
+ pos += 4
911
+ next
912
+ elsif line[pos..pos + 4] == "false"
913
+ segments << Segment.new("false", style: theme[:keyword_constant] || theme[:keyword])
914
+ pos += 5
915
+ next
916
+ elsif line[pos..pos + 3] == "null"
917
+ segments << Segment.new("null", style: theme[:keyword_constant] || theme[:keyword])
918
+ pos += 4
919
+ next
920
+ end
921
+ end
922
+
923
+ # Punctuation
924
+ if line[pos].match?(/[{}\[\]:,]/)
925
+ segments << Segment.new(line[pos], style: theme[:punctuation])
926
+ pos += 1
927
+ next
928
+ end
929
+
930
+ segments << Segment.new(line[pos])
931
+ pos += 1
932
+ end
933
+
934
+ segments
935
+ end
936
+ end
937
+
938
+ # YAML Lexer
939
+ class YAMLLexer < BaseLexer
940
+ def tokenize(line, theme)
941
+ segments = []
942
+ pos = 0
943
+
944
+ while pos < line.length
945
+ # Comment
946
+ if line[pos] == "#"
947
+ segments << Segment.new(line[pos..], style: theme[:comment])
948
+ break
949
+ end
950
+
951
+ # Key (before colon)
952
+ if pos == 0 || line[0...pos].match?(/^\s*$/)
953
+ colon_pos = line.index(":")
954
+ if colon_pos
955
+ key = line[0...colon_pos]
956
+ segments << Segment.new(key, style: theme[:name])
957
+ segments << Segment.new(":", style: theme[:punctuation])
958
+ pos = colon_pos + 1
959
+ next
960
+ end
961
+ end
962
+
963
+ if line[pos].match?(/\s/)
964
+ ws_end = pos
965
+ ws_end += 1 while ws_end < line.length && line[ws_end].match?(/\s/)
966
+ segments << Segment.new(line[pos...ws_end])
967
+ pos = ws_end
968
+ next
969
+ end
970
+
971
+ # String
972
+ if ['"', "'"].include?(line[pos])
973
+ delim = line[pos]
974
+ str_end = pos + 1
975
+ str_end += 1 while str_end < line.length && line[str_end] != delim
976
+ str_end = [str_end, line.length - 1].min
977
+ segments << Segment.new(line[pos..str_end], style: theme[:string])
978
+ pos = str_end + 1
979
+ next
980
+ end
981
+
982
+ # Boolean/null
983
+ rest = line[pos..].downcase
984
+ if rest.start_with?("true") || rest.start_with?("false") || rest.start_with?("null") || rest.start_with?("yes") || rest.start_with?("no")
985
+ word_end = pos
986
+ word_end += 1 while word_end < line.length && line[word_end].match?(/\w/)
987
+ segments << Segment.new(line[pos...word_end], style: theme[:keyword_constant] || theme[:keyword])
988
+ pos = word_end
989
+ next
990
+ end
991
+
992
+ # Number
993
+ if line[pos].match?(/[\d\-]/)
994
+ num_end = pos
995
+ num_end += 1 while num_end < line.length && line[num_end].match?(/[\d.]/)
996
+ segments << Segment.new(line[pos...num_end], style: theme[:number])
997
+ pos = num_end
998
+ next
999
+ end
1000
+
1001
+ # List marker
1002
+ if line[pos] == "-" && (pos + 1 >= line.length || line[pos + 1].match?(/\s/))
1003
+ segments << Segment.new("-", style: theme[:punctuation])
1004
+ pos += 1
1005
+ next
1006
+ end
1007
+
1008
+ # Default text
1009
+ word_end = pos
1010
+ word_end += 1 while word_end < line.length && !line[word_end].match?(/[\s#]/)
1011
+ segments << Segment.new(line[pos...word_end], style: theme[:string])
1012
+ pos = word_end
1013
+ end
1014
+
1015
+ segments
1016
+ end
1017
+ end
1018
+
1019
+ # Bash/Shell lexer
1020
+ class BashLexer < BaseLexer
1021
+ KEYWORDS = %w[
1022
+ if then else elif fi case esac for while until do done in
1023
+ function return exit break continue local export readonly
1024
+ declare typeset source alias unalias
1025
+ ].freeze
1026
+
1027
+ BUILTINS = %w[
1028
+ echo printf read cd pwd pushd popd dirs ls cat grep sed awk
1029
+ cut sort uniq wc head tail less more find xargs chmod chown
1030
+ mkdir rmdir rm cp mv ln touch date time kill ps top df du
1031
+ tar gzip gunzip zip unzip curl wget ssh scp rsync git
1032
+ sudo su man which whereis whatis type hash history set unset
1033
+ shift eval exec test true false
1034
+ ].freeze
1035
+
1036
+ def tokenize(line, theme)
1037
+ segments = []
1038
+ pos = 0
1039
+
1040
+ while pos < line.length
1041
+ if line[pos].match?(/\s/)
1042
+ ws_end = pos
1043
+ ws_end += 1 while ws_end < line.length && line[ws_end].match?(/\s/)
1044
+ segments << Segment.new(line[pos...ws_end])
1045
+ pos = ws_end
1046
+ next
1047
+ end
1048
+
1049
+ # Comment
1050
+ if line[pos] == "#"
1051
+ segments << Segment.new(line[pos..], style: theme[:comment])
1052
+ break
1053
+ end
1054
+
1055
+ # String
1056
+ if ['"', "'"].include?(line[pos])
1057
+ delim = line[pos]
1058
+ str_end = pos + 1
1059
+ str_end += 1 while str_end < line.length && !(line[str_end] == delim && line[str_end - 1] != "\\")
1060
+ str_end = [str_end, line.length - 1].min
1061
+ segments << Segment.new(line[pos..str_end], style: theme[:string])
1062
+ pos = str_end + 1
1063
+ next
1064
+ end
1065
+
1066
+ # Variable
1067
+ if line[pos] == "$"
1068
+ var_end = pos + 1
1069
+ if var_end < line.length && line[var_end] == "{"
1070
+ var_end = line.index("}", var_end) || line.length - 1
1071
+ else
1072
+ var_end += 1 while var_end < line.length && line[var_end].match?(/\w/)
1073
+ end
1074
+ segments << Segment.new(line[pos..var_end], style: theme[:name_variable] || theme[:name])
1075
+ pos = var_end + 1
1076
+ next
1077
+ end
1078
+
1079
+ # Number
1080
+ if line[pos].match?(/\d/)
1081
+ num_end = pos
1082
+ num_end += 1 while num_end < line.length && line[num_end].match?(/\d/)
1083
+ segments << Segment.new(line[pos...num_end], style: theme[:number])
1084
+ pos = num_end
1085
+ next
1086
+ end
1087
+
1088
+ # Identifier
1089
+ if line[pos].match?(/[a-zA-Z_]/)
1090
+ word_end = pos
1091
+ word_end += 1 while word_end < line.length && line[word_end].match?(/[\w\-]/)
1092
+ word = line[pos...word_end]
1093
+
1094
+ style = if KEYWORDS.include?(word)
1095
+ theme[:keyword]
1096
+ elsif BUILTINS.include?(word)
1097
+ theme[:name_builtin] || theme[:name]
1098
+ else
1099
+ theme[:name]
1100
+ end
1101
+
1102
+ segments << Segment.new(word, style: style)
1103
+ pos = word_end
1104
+ next
1105
+ end
1106
+
1107
+ # Operators and special chars
1108
+ if line[pos].match?(/[|&;<>(){}]/)
1109
+ segments << Segment.new(line[pos], style: theme[:operator])
1110
+ pos += 1
1111
+ next
1112
+ end
1113
+
1114
+ segments << Segment.new(line[pos])
1115
+ pos += 1
1116
+ end
1117
+
1118
+ segments
1119
+ end
1120
+ end
1121
+
1122
+ # Plain text (no highlighting)
1123
+ class TextLexer < BaseLexer
1124
+ # Just returns the line as-is
1125
+ end
1126
+
1127
+ # Lexer registry
1128
+ LEXERS = {
1129
+ "ruby" => RubyLexer.new,
1130
+ "python" => PythonLexer.new,
1131
+ "javascript" => JavaScriptLexer.new,
1132
+ "js" => JavaScriptLexer.new,
1133
+ "typescript" => JavaScriptLexer.new,
1134
+ "ts" => JavaScriptLexer.new,
1135
+ "sql" => SQLLexer.new,
1136
+ "json" => JSONLexer.new,
1137
+ "yaml" => YAMLLexer.new,
1138
+ "yml" => YAMLLexer.new,
1139
+ "bash" => BashLexer.new,
1140
+ "shell" => BashLexer.new,
1141
+ "sh" => BashLexer.new,
1142
+ "text" => TextLexer.new,
1143
+ "txt" => TextLexer.new
1144
+ }.freeze
1145
+ end