rich-ruby 1.0.1 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/rich/syntax.rb CHANGED
@@ -1,1145 +1,1220 @@
1
- # frozen_string_literal: true
2
-
3
- require_relative "style"
4
- require_relative "segment"
5
- require_relative "text"
6
- require_relative "panel"
7
-
8
- module Rich
9
- # Syntax highlighting for source code.
10
- # Provides token-based syntax highlighting for multiple programming languages.
11
- class Syntax
12
- # Default theme for syntax highlighting
13
- DEFAULT_THEME = {
14
- # Keywords
15
- keyword: Style.new(color: Color.parse("magenta"), bold: true),
16
- keyword_constant: Style.new(color: Color.parse("cyan"), bold: true),
17
- keyword_declaration: Style.new(color: Color.parse("magenta"), bold: true),
18
- keyword_namespace: Style.new(color: Color.parse("magenta"), bold: true),
19
- keyword_type: Style.new(color: Color.parse("cyan")),
20
-
21
- # Names
22
- name: Style.new(color: Color.parse("white")),
23
- name_builtin: Style.new(color: Color.parse("cyan")),
24
- name_class: Style.new(color: Color.parse("green"), bold: true),
25
- name_constant: Style.new(color: Color.parse("cyan")),
26
- name_decorator: Style.new(color: Color.parse("bright_magenta")),
27
- name_exception: Style.new(color: Color.parse("green"), bold: true),
28
- name_function: Style.new(color: Color.parse("green")),
29
- name_variable: Style.new(color: Color.parse("white")),
30
- name_tag: Style.new(color: Color.parse("bright_magenta")),
31
- name_attribute: Style.new(color: Color.parse("yellow")),
32
-
33
- # Literals
34
- string: Style.new(color: Color.parse("yellow")),
35
- string_doc: Style.new(color: Color.parse("yellow"), italic: true),
36
- string_escape: Style.new(color: Color.parse("bright_magenta")),
37
- string_interpol: Style.new(color: Color.parse("bright_magenta")),
38
- string_regex: Style.new(color: Color.parse("bright_yellow")),
39
- string_symbol: Style.new(color: Color.parse("bright_green")),
40
-
41
- number: Style.new(color: Color.parse("cyan")),
42
- number_float: Style.new(color: Color.parse("cyan")),
43
- number_hex: Style.new(color: Color.parse("cyan")),
44
-
45
- # Operators and Punctuation
46
- operator: Style.new(color: Color.parse("bright_magenta")),
47
- punctuation: Style.new(color: Color.parse("white")),
48
-
49
- # Comments
50
- comment: Style.new(color: Color.parse("bright_black"), italic: true),
51
- comment_doc: Style.new(color: Color.parse("bright_black"), italic: true),
52
- comment_preproc: Style.new(color: Color.parse("bright_magenta")),
53
-
54
- # Generic
55
- generic_deleted: Style.new(color: Color.parse("red")),
56
- generic_inserted: Style.new(color: Color.parse("green")),
57
- generic_heading: Style.new(color: Color.parse("bright_blue"), bold: true),
58
- generic_subheading: Style.new(color: Color.parse("bright_blue")),
59
- generic_error: Style.new(color: Color.parse("bright_red")),
60
-
61
- # Other
62
- text: Style.new,
63
- error: Style.new(color: Color.parse("bright_red"), bold: true)
64
- }.freeze
65
-
66
- # Monokai theme
67
- MONOKAI_THEME = {
68
- keyword: Style.new(color: Color.parse("#f92672"), bold: true),
69
- keyword_constant: Style.new(color: Color.parse("#ae81ff")),
70
- keyword_type: Style.new(color: Color.parse("#66d9ef"), italic: true),
71
- name: Style.new(color: Color.parse("#f8f8f2")),
72
- name_builtin: Style.new(color: Color.parse("#66d9ef")),
73
- name_class: Style.new(color: Color.parse("#a6e22e")),
74
- name_function: Style.new(color: Color.parse("#a6e22e")),
75
- name_decorator: Style.new(color: Color.parse("#a6e22e")),
76
- string: Style.new(color: Color.parse("#e6db74")),
77
- string_doc: Style.new(color: Color.parse("#e6db74")),
78
- number: Style.new(color: Color.parse("#ae81ff")),
79
- operator: Style.new(color: Color.parse("#f92672")),
80
- comment: Style.new(color: Color.parse("#75715e"), italic: true),
81
- punctuation: Style.new(color: Color.parse("#f8f8f2")),
82
- text: Style.new(color: Color.parse("#f8f8f2")),
83
- error: Style.new(color: Color.parse("#f92672"), bold: true)
84
- }.freeze
85
-
86
- # Dracula theme
87
- DRACULA_THEME = {
88
- keyword: Style.new(color: Color.parse("#ff79c6"), bold: true),
89
- keyword_constant: Style.new(color: Color.parse("#bd93f9")),
90
- keyword_type: Style.new(color: Color.parse("#8be9fd"), italic: true),
91
- name: Style.new(color: Color.parse("#f8f8f2")),
92
- name_builtin: Style.new(color: Color.parse("#8be9fd")),
93
- name_class: Style.new(color: Color.parse("#50fa7b")),
94
- name_function: Style.new(color: Color.parse("#50fa7b")),
95
- name_decorator: Style.new(color: Color.parse("#50fa7b")),
96
- string: Style.new(color: Color.parse("#f1fa8c")),
97
- string_doc: Style.new(color: Color.parse("#6272a4")),
98
- number: Style.new(color: Color.parse("#bd93f9")),
99
- operator: Style.new(color: Color.parse("#ff79c6")),
100
- comment: Style.new(color: Color.parse("#6272a4"), italic: true),
101
- punctuation: Style.new(color: Color.parse("#f8f8f2")),
102
- text: Style.new(color: Color.parse("#f8f8f2")),
103
- error: Style.new(color: Color.parse("#ff5555"), bold: true)
104
- }.freeze
105
-
106
- THEMES = {
107
- default: DEFAULT_THEME,
108
- monokai: MONOKAI_THEME,
109
- dracula: DRACULA_THEME
110
- }.freeze
111
-
112
- # @return [String] Source code
113
- attr_reader :code
114
-
115
- # @return [String] Language name
116
- attr_reader :language
117
-
118
- # @return [Hash] Theme styles
119
- attr_reader :theme
120
-
121
- # @return [Boolean] Show line numbers
122
- attr_reader :line_numbers
123
-
124
- # @return [Integer, nil] Starting line number
125
- attr_reader :start_line
126
-
127
- # @return [Array<Integer>, nil] Lines to highlight
128
- attr_reader :highlight_lines
129
-
130
- # @return [Boolean] Word wrap
131
- attr_reader :word_wrap
132
-
133
- # @return [Style, nil] Background style
134
- attr_reader :background_style
135
-
136
- # @return [Integer] Tab size
137
- attr_reader :tab_size
138
-
139
- def initialize(
140
- code,
141
- language: "text",
142
- theme: :default,
143
- line_numbers: false,
144
- start_line: 1,
145
- highlight_lines: nil,
146
- word_wrap: false,
147
- background_style: nil,
148
- tab_size: 4
149
- )
150
- @code = code.to_s
151
- @language = language.to_s.downcase
152
- @theme = theme.is_a?(Hash) ? theme : (THEMES[theme] || DEFAULT_THEME)
153
- @line_numbers = line_numbers
154
- @start_line = start_line
155
- @highlight_lines = highlight_lines
156
- @word_wrap = word_wrap
157
- @background_style = background_style
158
- @tab_size = tab_size
159
- end
160
-
161
- # Highlight the code and return segments
162
- # @return [Array<Segment>]
163
- def to_segments
164
- segments = []
165
- lines = @code.gsub("\t", " " * @tab_size).split("\n", -1)
166
-
167
- # Calculate line number width
168
- line_num_width = (@start_line + lines.length - 1).to_s.length
169
-
170
- lines.each_with_index do |line, index|
171
- line_num = @start_line + index
172
- is_highlighted = @highlight_lines&.include?(line_num)
173
-
174
- # Line number
175
- if @line_numbers
176
- num_style = is_highlighted ? Style.new(color: Color.parse("yellow"), bold: true) : Style.new(color: Color.parse("bright_black"))
177
- segments << Segment.new(line_num.to_s.rjust(line_num_width), style: num_style)
178
- segments << Segment.new(" │ ", style: Style.new(color: Color.parse("bright_black")))
179
- end
180
-
181
- # Highlighted line background
182
- if is_highlighted
183
- bg_style = Style.new(bgcolor: Color.parse("color(237)"))
184
- segments.concat(highlight_line(line).map do |seg|
185
- combined_style = seg.style ? seg.style + bg_style : bg_style
186
- Segment.new(seg.text, style: combined_style)
187
- end)
188
- else
189
- segments.concat(highlight_line(line))
190
- end
191
-
192
- segments << Segment.new("\n") if index < lines.length - 1
193
- end
194
-
195
- segments
196
- end
197
-
198
- # Highlight a single line
199
- # @param line [String] Line to highlight
200
- # @return [Array<Segment>]
201
- def highlight_line(line)
202
- lexer = get_lexer(@language)
203
- lexer.tokenize(line, @theme)
204
- end
205
-
206
- # Render to string with ANSI codes
207
- # @param color_system [Symbol] Color system
208
- # @return [String]
209
- def render(color_system: ColorSystem::TRUECOLOR)
210
- Segment.render(to_segments, color_system: color_system)
211
- end
212
-
213
- # Render inside a panel
214
- # @param title [String, nil] Panel title
215
- # @return [String]
216
- def to_panel(title: nil, max_width: 80)
217
- title ||= @language.capitalize
218
- panel = Panel.new(
219
- render,
220
- title: title,
221
- border_style: "dim",
222
- padding: 0
223
- )
224
- panel.render(max_width: max_width)
225
- end
226
-
227
- class << self
228
- # Create syntax from file
229
- # @param path [String] File path
230
- # @param kwargs [Hash] Options
231
- # @return [Syntax]
232
- def from_file(path, **kwargs)
233
- code = File.read(path)
234
- language = kwargs.delete(:language) || detect_language(path)
235
- new(code, language: language, **kwargs)
236
- end
237
-
238
- # Detect language from file extension
239
- # @param path [String] File path
240
- # @return [String]
241
- def detect_language(path)
242
- ext = File.extname(path).downcase.delete(".")
243
- EXTENSION_MAP[ext] || "text"
244
- end
245
-
246
- # List supported languages
247
- # @return [Array<String>]
248
- def supported_languages
249
- LEXERS.keys.sort
250
- end
251
- end
252
-
253
- private
254
-
255
- def get_lexer(language)
256
- LEXERS[language] || LEXERS["text"]
257
- end
258
-
259
- # File extension to language mapping
260
- EXTENSION_MAP = {
261
- "rb" => "ruby",
262
- "py" => "python",
263
- "js" => "javascript",
264
- "ts" => "typescript",
265
- "jsx" => "javascript",
266
- "tsx" => "typescript",
267
- "json" => "json",
268
- "yml" => "yaml",
269
- "yaml" => "yaml",
270
- "xml" => "xml",
271
- "html" => "html",
272
- "htm" => "html",
273
- "css" => "css",
274
- "scss" => "scss",
275
- "sass" => "sass",
276
- "sql" => "sql",
277
- "sh" => "bash",
278
- "bash" => "bash",
279
- "zsh" => "bash",
280
- "ps1" => "powershell",
281
- "c" => "c",
282
- "h" => "c",
283
- "cpp" => "cpp",
284
- "hpp" => "cpp",
285
- "cc" => "cpp",
286
- "go" => "go",
287
- "rs" => "rust",
288
- "java" => "java",
289
- "kt" => "kotlin",
290
- "swift" => "swift",
291
- "md" => "markdown",
292
- "markdown" => "markdown",
293
- "dockerfile" => "dockerfile",
294
- "toml" => "toml",
295
- "ini" => "ini",
296
- "conf" => "ini",
297
- "txt" => "text"
298
- }.freeze
299
- end
300
-
301
- # Base lexer class for tokenization
302
- class BaseLexer
303
- def tokenize(line, theme)
304
- [Segment.new(line, style: theme[:text])]
305
- end
306
- end
307
-
308
- # Ruby lexer
309
- class RubyLexer < BaseLexer
310
- KEYWORDS = %w[
311
- def class module end if else elsif unless case when then
312
- begin rescue ensure raise return yield do while until for
313
- break next redo retry in and or not alias defined? super
314
- self nil true false __FILE__ __LINE__ __ENCODING__
315
- require require_relative include extend prepend attr_reader
316
- attr_writer attr_accessor private protected public
317
- lambda proc loop catch throw
318
- ].freeze
319
-
320
- BUILTINS = %w[
321
- puts print p pp gets chomp to_s to_i to_f to_a to_h length
322
- size each map select reject find reduce inject sort sort_by
323
- uniq compact flatten reverse join split push pop shift unshift
324
- first last min max sum count empty? nil? is_a? kind_of?
325
- respond_to? send __send__ method methods instance_variables
326
- class superclass ancestors included_modules freeze frozen?
327
- dup clone tap then yield_self itself inspect
328
- ].freeze
329
-
330
- def tokenize(line, theme)
331
- segments = []
332
- pos = 0
333
-
334
- while pos < line.length
335
- # Skip whitespace
336
- if line[pos].match?(/\s/)
337
- ws_end = pos
338
- ws_end += 1 while ws_end < line.length && line[ws_end].match?(/\s/)
339
- segments << Segment.new(line[pos...ws_end])
340
- pos = ws_end
341
- next
342
- end
343
-
344
- # Comment
345
- if line[pos] == "#"
346
- segments << Segment.new(line[pos..], style: theme[:comment])
347
- break
348
- end
349
-
350
- # String (double quote)
351
- if line[pos] == '"'
352
- str_end = find_string_end(line, pos, '"')
353
- segments << Segment.new(line[pos..str_end], style: theme[:string])
354
- pos = str_end + 1
355
- next
356
- end
357
-
358
- # String (single quote)
359
- if line[pos] == "'"
360
- str_end = find_string_end(line, pos, "'")
361
- segments << Segment.new(line[pos..str_end], style: theme[:string])
362
- pos = str_end + 1
363
- next
364
- end
365
-
366
- # Regex
367
- if line[pos] == "/" && (pos == 0 || line[pos - 1].match?(/[\s=({,]/))
368
- regex_end = find_string_end(line, pos, "/")
369
- if regex_end > pos
370
- segments << Segment.new(line[pos..regex_end], style: theme[:string_regex])
371
- pos = regex_end + 1
372
- next
373
- end
374
- end
375
-
376
- # Symbol
377
- if line[pos] == ":"
378
- if pos + 1 < line.length && line[pos + 1].match?(/[a-zA-Z_]/)
379
- sym_end = pos + 1
380
- sym_end += 1 while sym_end < line.length && line[sym_end].match?(/\w/)
381
- segments << Segment.new(line[pos...sym_end], style: theme[:string_symbol] || theme[:string])
382
- pos = sym_end
383
- next
384
- end
385
- end
386
-
387
- # Number
388
- if line[pos].match?(/\d/)
389
- num_end = pos
390
- num_end += 1 while num_end < line.length && line[num_end].match?(/[\d._xXoObB]/)
391
- segments << Segment.new(line[pos...num_end], style: theme[:number])
392
- pos = num_end
393
- next
394
- end
395
-
396
- # Instance variable
397
- if line[pos] == "@"
398
- var_end = pos + 1
399
- var_end += 1 if var_end < line.length && line[var_end] == "@"
400
- var_end += 1 while var_end < line.length && line[var_end].match?(/\w/)
401
- segments << Segment.new(line[pos...var_end], style: theme[:name_variable] || theme[:name])
402
- pos = var_end
403
- next
404
- end
405
-
406
- # Global variable
407
- if line[pos] == "$"
408
- var_end = pos + 1
409
- var_end += 1 while var_end < line.length && line[var_end].match?(/\w/)
410
- segments << Segment.new(line[pos...var_end], style: theme[:name_variable] || theme[:name])
411
- pos = var_end
412
- next
413
- end
414
-
415
- # Constant/Class name
416
- if line[pos].match?(/[A-Z]/)
417
- word_end = pos
418
- word_end += 1 while word_end < line.length && line[word_end].match?(/\w/)
419
- word = line[pos...word_end]
420
- if %w[true false nil].include?(word.downcase)
421
- segments << Segment.new(word, style: theme[:keyword_constant] || theme[:keyword])
422
- else
423
- segments << Segment.new(word, style: theme[:name_class] || theme[:name])
424
- end
425
- pos = word_end
426
- next
427
- end
428
-
429
- # Identifier/Keyword
430
- if line[pos].match?(/[a-z_]/i)
431
- word_end = pos
432
- word_end += 1 while word_end < line.length && line[word_end].match?(/[\w?!]/)
433
- word = line[pos...word_end]
434
-
435
- style = if KEYWORDS.include?(word)
436
- theme[:keyword]
437
- elsif BUILTINS.include?(word)
438
- theme[:name_builtin] || theme[:name]
439
- else
440
- theme[:name]
441
- end
442
-
443
- segments << Segment.new(word, style: style)
444
- pos = word_end
445
- next
446
- end
447
-
448
- # Operators and punctuation
449
- if line[pos].match?(/[+\-*\/%&|^~<>=!?:]/)
450
- op_end = pos + 1
451
- op_end += 1 while op_end < line.length && line[op_end].match?(/[+\-*\/%&|^~<>=!?:]/)
452
- segments << Segment.new(line[pos...op_end], style: theme[:operator])
453
- pos = op_end
454
- next
455
- end
456
-
457
- # Punctuation
458
- if line[pos].match?(/[(){}\[\].,;]/)
459
- segments << Segment.new(line[pos], style: theme[:punctuation])
460
- pos += 1
461
- next
462
- end
463
-
464
- # Default
465
- segments << Segment.new(line[pos])
466
- pos += 1
467
- end
468
-
469
- segments
470
- end
471
-
472
- private
473
-
474
- def find_string_end(line, start, delimiter)
475
- pos = start + 1
476
- while pos < line.length
477
- return pos if line[pos] == delimiter && line[pos - 1] != "\\"
478
-
479
- pos += 1
480
- end
481
- line.length - 1
482
- end
483
- end
484
-
485
- # Python lexer
486
- class PythonLexer < BaseLexer
487
- KEYWORDS = %w[
488
- and as assert async await break class continue def del elif else
489
- except finally for from global if import in is lambda None nonlocal
490
- not or pass raise return try while with yield True False
491
- ].freeze
492
-
493
- BUILTINS = %w[
494
- abs all any ascii bin bool breakpoint bytearray bytes callable
495
- chr classmethod compile complex delattr dict dir divmod enumerate
496
- eval exec filter float format frozenset getattr globals hasattr
497
- hash help hex id input int isinstance issubclass iter len list
498
- locals map max memoryview min next object oct open ord pow print
499
- property range repr reversed round set setattr slice sorted
500
- staticmethod str sum super tuple type vars zip
501
- ].freeze
502
-
503
- def tokenize(line, theme)
504
- segments = []
505
- pos = 0
506
-
507
- while pos < line.length
508
- if line[pos].match?(/\s/)
509
- ws_end = pos
510
- ws_end += 1 while ws_end < line.length && line[ws_end].match?(/\s/)
511
- segments << Segment.new(line[pos...ws_end])
512
- pos = ws_end
513
- next
514
- end
515
-
516
- # Comment
517
- if line[pos] == "#"
518
- segments << Segment.new(line[pos..], style: theme[:comment])
519
- break
520
- end
521
-
522
- # Docstring/String
523
- if line[pos..pos + 2] == '"""' || line[pos..pos + 2] == "'''"
524
- delim = line[pos..pos + 2]
525
- str_end = line.index(delim, pos + 3)
526
- str_end = str_end ? str_end + 2 : line.length - 1
527
- segments << Segment.new(line[pos..str_end], style: theme[:string_doc] || theme[:string])
528
- pos = str_end + 1
529
- next
530
- end
531
-
532
- # String
533
- if ['"', "'"].include?(line[pos])
534
- delim = line[pos]
535
- str_end = find_string_end(line, pos, delim)
536
- segments << Segment.new(line[pos..str_end], style: theme[:string])
537
- pos = str_end + 1
538
- next
539
- end
540
-
541
- # Number
542
- if line[pos].match?(/\d/)
543
- num_end = pos
544
- num_end += 1 while num_end < line.length && line[num_end].match?(/[\d._xXoObBeE+\-]/)
545
- segments << Segment.new(line[pos...num_end], style: theme[:number])
546
- pos = num_end
547
- next
548
- end
549
-
550
- # Decorator
551
- if line[pos] == "@"
552
- dec_end = pos + 1
553
- dec_end += 1 while dec_end < line.length && line[dec_end].match?(/[\w.]/)
554
- segments << Segment.new(line[pos...dec_end], style: theme[:name_decorator] || theme[:name])
555
- pos = dec_end
556
- next
557
- end
558
-
559
- # Identifier
560
- if line[pos].match?(/[a-zA-Z_]/)
561
- word_end = pos
562
- word_end += 1 while word_end < line.length && line[word_end].match?(/\w/)
563
- word = line[pos...word_end]
564
-
565
- style = if KEYWORDS.include?(word)
566
- theme[:keyword]
567
- elsif BUILTINS.include?(word)
568
- theme[:name_builtin] || theme[:name]
569
- elsif word[0].match?(/[A-Z]/)
570
- theme[:name_class] || theme[:name]
571
- else
572
- theme[:name]
573
- end
574
-
575
- segments << Segment.new(word, style: style)
576
- pos = word_end
577
- next
578
- end
579
-
580
- # Operators
581
- if line[pos].match?(/[+\-*\/%&|^~<>=!@]/)
582
- op_end = pos + 1
583
- op_end += 1 while op_end < line.length && line[op_end].match?(/[+\-*\/%&|^~<>=!@]/)
584
- segments << Segment.new(line[pos...op_end], style: theme[:operator])
585
- pos = op_end
586
- next
587
- end
588
-
589
- # Punctuation
590
- if line[pos].match?(/[(){}\[\].,;:]/)
591
- segments << Segment.new(line[pos], style: theme[:punctuation])
592
- pos += 1
593
- next
594
- end
595
-
596
- segments << Segment.new(line[pos])
597
- pos += 1
598
- end
599
-
600
- segments
601
- end
602
-
603
- private
604
-
605
- def find_string_end(line, start, delimiter)
606
- pos = start + 1
607
- while pos < line.length
608
- return pos if line[pos] == delimiter && line[pos - 1] != "\\"
609
-
610
- pos += 1
611
- end
612
- line.length - 1
613
- end
614
- end
615
-
616
- # JavaScript lexer
617
- class JavaScriptLexer < BaseLexer
618
- KEYWORDS = %w[
619
- async await break case catch class const continue debugger default
620
- delete do else export extends finally for function if import in
621
- instanceof let new return static super switch this throw try typeof
622
- var void while with yield true false null undefined
623
- ].freeze
624
-
625
- BUILTINS = %w[
626
- Array Boolean Date Error Function JSON Math Number Object Promise
627
- RegExp String Symbol Map Set WeakMap WeakSet Proxy Reflect
628
- console window document parseInt parseFloat isNaN isFinite
629
- decodeURI decodeURIComponent encodeURI encodeURIComponent eval
630
- setTimeout setInterval clearTimeout clearInterval fetch
631
- ].freeze
632
-
633
- def tokenize(line, theme)
634
- segments = []
635
- pos = 0
636
-
637
- while pos < line.length
638
- if line[pos].match?(/\s/)
639
- ws_end = pos
640
- ws_end += 1 while ws_end < line.length && line[ws_end].match?(/\s/)
641
- segments << Segment.new(line[pos...ws_end])
642
- pos = ws_end
643
- next
644
- end
645
-
646
- # Single-line comment
647
- if line[pos..pos + 1] == "//"
648
- segments << Segment.new(line[pos..], style: theme[:comment])
649
- break
650
- end
651
-
652
- # Template literal
653
- if line[pos] == "`"
654
- str_end = find_string_end(line, pos, "`")
655
- segments << Segment.new(line[pos..str_end], style: theme[:string])
656
- pos = str_end + 1
657
- next
658
- end
659
-
660
- # String
661
- if ['"', "'"].include?(line[pos])
662
- delim = line[pos]
663
- str_end = find_string_end(line, pos, delim)
664
- segments << Segment.new(line[pos..str_end], style: theme[:string])
665
- pos = str_end + 1
666
- next
667
- end
668
-
669
- # Regex
670
- if line[pos] == "/" && (pos == 0 || line[pos - 1].match?(/[\s=({,\[]/))
671
- regex_end = find_string_end(line, pos, "/")
672
- if regex_end > pos
673
- # Include flags
674
- regex_end += 1 while regex_end + 1 < line.length && line[regex_end + 1].match?(/[gimsuy]/)
675
- segments << Segment.new(line[pos..regex_end], style: theme[:string_regex] || theme[:string])
676
- pos = regex_end + 1
677
- next
678
- end
679
- end
680
-
681
- # Number
682
- if line[pos].match?(/\d/) || (line[pos] == "." && pos + 1 < line.length && line[pos + 1].match?(/\d/))
683
- num_end = pos
684
- num_end += 1 while num_end < line.length && line[num_end].match?(/[\d._xXoObBeEnN]/)
685
- segments << Segment.new(line[pos...num_end], style: theme[:number])
686
- pos = num_end
687
- next
688
- end
689
-
690
- # Identifier
691
- if line[pos].match?(/[a-zA-Z_$]/)
692
- word_end = pos
693
- word_end += 1 while word_end < line.length && line[word_end].match?(/[\w$]/)
694
- word = line[pos...word_end]
695
-
696
- style = if KEYWORDS.include?(word)
697
- theme[:keyword]
698
- elsif BUILTINS.include?(word)
699
- theme[:name_builtin] || theme[:name]
700
- elsif word[0].match?(/[A-Z]/)
701
- theme[:name_class] || theme[:name]
702
- else
703
- theme[:name]
704
- end
705
-
706
- segments << Segment.new(word, style: style)
707
- pos = word_end
708
- next
709
- end
710
-
711
- # Arrow function
712
- if line[pos..pos + 1] == "=>"
713
- segments << Segment.new("=>", style: theme[:operator])
714
- pos += 2
715
- next
716
- end
717
-
718
- # Operators
719
- if line[pos].match?(/[+\-*\/%&|^~<>=!?:]/)
720
- op_end = pos + 1
721
- op_end += 1 while op_end < line.length && line[op_end].match?(/[+\-*\/%&|^~<>=!?:]/)
722
- segments << Segment.new(line[pos...op_end], style: theme[:operator])
723
- pos = op_end
724
- next
725
- end
726
-
727
- # Punctuation
728
- if line[pos].match?(/[(){}\[\].,;]/)
729
- segments << Segment.new(line[pos], style: theme[:punctuation])
730
- pos += 1
731
- next
732
- end
733
-
734
- segments << Segment.new(line[pos])
735
- pos += 1
736
- end
737
-
738
- segments
739
- end
740
-
741
- private
742
-
743
- def find_string_end(line, start, delimiter)
744
- pos = start + 1
745
- while pos < line.length
746
- return pos if line[pos] == delimiter && line[pos - 1] != "\\"
747
-
748
- pos += 1
749
- end
750
- line.length - 1
751
- end
752
- end
753
-
754
- # SQL Lexer
755
- class SQLLexer < BaseLexer
756
- KEYWORDS = %w[
757
- SELECT FROM WHERE AND OR NOT NULL IS IN LIKE BETWEEN EXISTS
758
- INSERT INTO VALUES UPDATE SET DELETE CREATE TABLE DROP ALTER
759
- INDEX VIEW TRIGGER PROCEDURE FUNCTION AS ON JOIN LEFT RIGHT
760
- INNER OUTER FULL CROSS NATURAL USING ORDER BY ASC DESC GROUP
761
- HAVING LIMIT OFFSET UNION ALL DISTINCT CASE WHEN THEN ELSE END
762
- IF BEGIN COMMIT ROLLBACK TRANSACTION PRIMARY KEY FOREIGN
763
- REFERENCES UNIQUE DEFAULT CHECK CONSTRAINT CASCADE RESTRICT
764
- TRUE FALSE GRANT REVOKE WITH RECURSIVE
765
- ].freeze
766
-
767
- BUILTINS = %w[
768
- COUNT SUM AVG MIN MAX LENGTH UPPER LOWER TRIM CONCAT SUBSTRING
769
- REPLACE COALESCE NULLIF CAST CONVERT DATE TIME DATETIME
770
- YEAR MONTH DAY HOUR MINUTE SECOND NOW CURRENT_DATE
771
- CURRENT_TIME CURRENT_TIMESTAMP ABS ROUND FLOOR CEILING
772
- POWER SQRT MOD ROW_NUMBER RANK DENSE_RANK OVER PARTITION
773
- ].freeze
774
-
775
- TYPES = %w[
776
- INT INTEGER BIGINT SMALLINT TINYINT FLOAT DOUBLE DECIMAL
777
- NUMERIC REAL CHAR VARCHAR TEXT NCHAR NVARCHAR NTEXT
778
- DATE TIME DATETIME TIMESTAMP BOOLEAN BOOL BLOB BINARY
779
- VARBINARY UUID JSON XML
780
- ].freeze
781
-
782
- def tokenize(line, theme)
783
- segments = []
784
- pos = 0
785
-
786
- while pos < line.length
787
- if line[pos].match?(/\s/)
788
- ws_end = pos
789
- ws_end += 1 while ws_end < line.length && line[ws_end].match?(/\s/)
790
- segments << Segment.new(line[pos...ws_end])
791
- pos = ws_end
792
- next
793
- end
794
-
795
- # Comment
796
- if line[pos..pos + 1] == "--"
797
- segments << Segment.new(line[pos..], style: theme[:comment])
798
- break
799
- end
800
-
801
- # String
802
- if line[pos] == "'"
803
- str_end = pos + 1
804
- str_end += 1 while str_end < line.length && line[str_end] != "'"
805
- str_end = [str_end, line.length - 1].min
806
- segments << Segment.new(line[pos..str_end], style: theme[:string])
807
- pos = str_end + 1
808
- next
809
- end
810
-
811
- # Number
812
- if line[pos].match?(/\d/)
813
- num_end = pos
814
- num_end += 1 while num_end < line.length && line[num_end].match?(/[\d.]/)
815
- segments << Segment.new(line[pos...num_end], style: theme[:number])
816
- pos = num_end
817
- next
818
- end
819
-
820
- # Identifier
821
- if line[pos].match?(/[a-zA-Z_]/)
822
- word_end = pos
823
- word_end += 1 while word_end < line.length && line[word_end].match?(/\w/)
824
- word = line[pos...word_end]
825
- upper_word = word.upcase
826
-
827
- style = if KEYWORDS.include?(upper_word)
828
- theme[:keyword]
829
- elsif BUILTINS.include?(upper_word)
830
- theme[:name_builtin] || theme[:name]
831
- elsif TYPES.include?(upper_word)
832
- theme[:keyword_type] || theme[:keyword]
833
- else
834
- theme[:name]
835
- end
836
-
837
- segments << Segment.new(word, style: style)
838
- pos = word_end
839
- next
840
- end
841
-
842
- # Operators
843
- if line[pos].match?(/[+\-*\/%<>=!]/)
844
- op_end = pos + 1
845
- op_end += 1 while op_end < line.length && line[op_end].match?(/[+\-*\/%<>=!]/)
846
- segments << Segment.new(line[pos...op_end], style: theme[:operator])
847
- pos = op_end
848
- next
849
- end
850
-
851
- # Punctuation
852
- if line[pos].match?(/[(),;.]/)
853
- segments << Segment.new(line[pos], style: theme[:punctuation])
854
- pos += 1
855
- next
856
- end
857
-
858
- segments << Segment.new(line[pos])
859
- pos += 1
860
- end
861
-
862
- segments
863
- end
864
- end
865
-
866
- # JSON Lexer (simple)
867
- class JSONLexer < BaseLexer
868
- def tokenize(line, theme)
869
- segments = []
870
- pos = 0
871
-
872
- while pos < line.length
873
- if line[pos].match?(/\s/)
874
- ws_end = pos
875
- ws_end += 1 while ws_end < line.length && line[ws_end].match?(/\s/)
876
- segments << Segment.new(line[pos...ws_end])
877
- pos = ws_end
878
- next
879
- end
880
-
881
- # String
882
- if line[pos] == '"'
883
- str_end = pos + 1
884
- str_end += 1 while str_end < line.length && !(line[str_end] == '"' && line[str_end - 1] != "\\")
885
- str_end = [str_end, line.length - 1].min
886
- content = line[pos..str_end]
887
-
888
- # Check if it's a key (followed by :)
889
- rest = line[str_end + 1..].lstrip
890
- is_key = rest.start_with?(":")
891
-
892
- segments << Segment.new(content, style: is_key ? theme[:name] : theme[:string])
893
- pos = str_end + 1
894
- next
895
- end
896
-
897
- # Number
898
- if line[pos].match?(/[\d\-]/)
899
- num_end = pos
900
- num_end += 1 while num_end < line.length && line[num_end].match?(/[\d.eE+\-]/)
901
- segments << Segment.new(line[pos...num_end], style: theme[:number])
902
- pos = num_end
903
- next
904
- end
905
-
906
- # Boolean/null
907
- if line[pos].match?(/[tfn]/)
908
- if line[pos..pos + 3] == "true"
909
- segments << Segment.new("true", style: theme[:keyword_constant] || theme[:keyword])
910
- pos += 4
911
- next
912
- elsif line[pos..pos + 4] == "false"
913
- segments << Segment.new("false", style: theme[:keyword_constant] || theme[:keyword])
914
- pos += 5
915
- next
916
- elsif line[pos..pos + 3] == "null"
917
- segments << Segment.new("null", style: theme[:keyword_constant] || theme[:keyword])
918
- pos += 4
919
- next
920
- end
921
- end
922
-
923
- # Punctuation
924
- if line[pos].match?(/[{}\[\]:,]/)
925
- segments << Segment.new(line[pos], style: theme[:punctuation])
926
- pos += 1
927
- next
928
- end
929
-
930
- segments << Segment.new(line[pos])
931
- pos += 1
932
- end
933
-
934
- segments
935
- end
936
- end
937
-
938
- # YAML Lexer
939
- class YAMLLexer < BaseLexer
940
- def tokenize(line, theme)
941
- segments = []
942
- pos = 0
943
-
944
- while pos < line.length
945
- # Comment
946
- if line[pos] == "#"
947
- segments << Segment.new(line[pos..], style: theme[:comment])
948
- break
949
- end
950
-
951
- # Key (before colon)
952
- if pos == 0 || line[0...pos].match?(/^\s*$/)
953
- colon_pos = line.index(":")
954
- if colon_pos
955
- key = line[0...colon_pos]
956
- segments << Segment.new(key, style: theme[:name])
957
- segments << Segment.new(":", style: theme[:punctuation])
958
- pos = colon_pos + 1
959
- next
960
- end
961
- end
962
-
963
- if line[pos].match?(/\s/)
964
- ws_end = pos
965
- ws_end += 1 while ws_end < line.length && line[ws_end].match?(/\s/)
966
- segments << Segment.new(line[pos...ws_end])
967
- pos = ws_end
968
- next
969
- end
970
-
971
- # String
972
- if ['"', "'"].include?(line[pos])
973
- delim = line[pos]
974
- str_end = pos + 1
975
- str_end += 1 while str_end < line.length && line[str_end] != delim
976
- str_end = [str_end, line.length - 1].min
977
- segments << Segment.new(line[pos..str_end], style: theme[:string])
978
- pos = str_end + 1
979
- next
980
- end
981
-
982
- # Boolean/null
983
- rest = line[pos..].downcase
984
- if rest.start_with?("true") || rest.start_with?("false") || rest.start_with?("null") || rest.start_with?("yes") || rest.start_with?("no")
985
- word_end = pos
986
- word_end += 1 while word_end < line.length && line[word_end].match?(/\w/)
987
- segments << Segment.new(line[pos...word_end], style: theme[:keyword_constant] || theme[:keyword])
988
- pos = word_end
989
- next
990
- end
991
-
992
- # Number
993
- if line[pos].match?(/[\d\-]/)
994
- num_end = pos
995
- num_end += 1 while num_end < line.length && line[num_end].match?(/[\d.]/)
996
- segments << Segment.new(line[pos...num_end], style: theme[:number])
997
- pos = num_end
998
- next
999
- end
1000
-
1001
- # List marker
1002
- if line[pos] == "-" && (pos + 1 >= line.length || line[pos + 1].match?(/\s/))
1003
- segments << Segment.new("-", style: theme[:punctuation])
1004
- pos += 1
1005
- next
1006
- end
1007
-
1008
- # Default text
1009
- word_end = pos
1010
- word_end += 1 while word_end < line.length && !line[word_end].match?(/[\s#]/)
1011
- segments << Segment.new(line[pos...word_end], style: theme[:string])
1012
- pos = word_end
1013
- end
1014
-
1015
- segments
1016
- end
1017
- end
1018
-
1019
- # Bash/Shell lexer
1020
- class BashLexer < BaseLexer
1021
- KEYWORDS = %w[
1022
- if then else elif fi case esac for while until do done in
1023
- function return exit break continue local export readonly
1024
- declare typeset source alias unalias
1025
- ].freeze
1026
-
1027
- BUILTINS = %w[
1028
- echo printf read cd pwd pushd popd dirs ls cat grep sed awk
1029
- cut sort uniq wc head tail less more find xargs chmod chown
1030
- mkdir rmdir rm cp mv ln touch date time kill ps top df du
1031
- tar gzip gunzip zip unzip curl wget ssh scp rsync git
1032
- sudo su man which whereis whatis type hash history set unset
1033
- shift eval exec test true false
1034
- ].freeze
1035
-
1036
- def tokenize(line, theme)
1037
- segments = []
1038
- pos = 0
1039
-
1040
- while pos < line.length
1041
- if line[pos].match?(/\s/)
1042
- ws_end = pos
1043
- ws_end += 1 while ws_end < line.length && line[ws_end].match?(/\s/)
1044
- segments << Segment.new(line[pos...ws_end])
1045
- pos = ws_end
1046
- next
1047
- end
1048
-
1049
- # Comment
1050
- if line[pos] == "#"
1051
- segments << Segment.new(line[pos..], style: theme[:comment])
1052
- break
1053
- end
1054
-
1055
- # String
1056
- if ['"', "'"].include?(line[pos])
1057
- delim = line[pos]
1058
- str_end = pos + 1
1059
- str_end += 1 while str_end < line.length && !(line[str_end] == delim && line[str_end - 1] != "\\")
1060
- str_end = [str_end, line.length - 1].min
1061
- segments << Segment.new(line[pos..str_end], style: theme[:string])
1062
- pos = str_end + 1
1063
- next
1064
- end
1065
-
1066
- # Variable
1067
- if line[pos] == "$"
1068
- var_end = pos + 1
1069
- if var_end < line.length && line[var_end] == "{"
1070
- var_end = line.index("}", var_end) || line.length - 1
1071
- else
1072
- var_end += 1 while var_end < line.length && line[var_end].match?(/\w/)
1073
- end
1074
- segments << Segment.new(line[pos..var_end], style: theme[:name_variable] || theme[:name])
1075
- pos = var_end + 1
1076
- next
1077
- end
1078
-
1079
- # Number
1080
- if line[pos].match?(/\d/)
1081
- num_end = pos
1082
- num_end += 1 while num_end < line.length && line[num_end].match?(/\d/)
1083
- segments << Segment.new(line[pos...num_end], style: theme[:number])
1084
- pos = num_end
1085
- next
1086
- end
1087
-
1088
- # Identifier
1089
- if line[pos].match?(/[a-zA-Z_]/)
1090
- word_end = pos
1091
- word_end += 1 while word_end < line.length && line[word_end].match?(/[\w\-]/)
1092
- word = line[pos...word_end]
1093
-
1094
- style = if KEYWORDS.include?(word)
1095
- theme[:keyword]
1096
- elsif BUILTINS.include?(word)
1097
- theme[:name_builtin] || theme[:name]
1098
- else
1099
- theme[:name]
1100
- end
1101
-
1102
- segments << Segment.new(word, style: style)
1103
- pos = word_end
1104
- next
1105
- end
1106
-
1107
- # Operators and special chars
1108
- if line[pos].match?(/[|&;<>(){}]/)
1109
- segments << Segment.new(line[pos], style: theme[:operator])
1110
- pos += 1
1111
- next
1112
- end
1113
-
1114
- segments << Segment.new(line[pos])
1115
- pos += 1
1116
- end
1117
-
1118
- segments
1119
- end
1120
- end
1121
-
1122
- # Plain text (no highlighting)
1123
- class TextLexer < BaseLexer
1124
- # Just returns the line as-is
1125
- end
1126
-
1127
- # Lexer registry
1128
- LEXERS = {
1129
- "ruby" => RubyLexer.new,
1130
- "python" => PythonLexer.new,
1131
- "javascript" => JavaScriptLexer.new,
1132
- "js" => JavaScriptLexer.new,
1133
- "typescript" => JavaScriptLexer.new,
1134
- "ts" => JavaScriptLexer.new,
1135
- "sql" => SQLLexer.new,
1136
- "json" => JSONLexer.new,
1137
- "yaml" => YAMLLexer.new,
1138
- "yml" => YAMLLexer.new,
1139
- "bash" => BashLexer.new,
1140
- "shell" => BashLexer.new,
1141
- "sh" => BashLexer.new,
1142
- "text" => TextLexer.new,
1143
- "txt" => TextLexer.new
1144
- }.freeze
1145
- end
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "style"
4
+ require_relative "segment"
5
+ require_relative "text"
6
+ require_relative "panel"
7
+
8
+ module Rich
9
+ # Syntax highlighting for source code.
10
+ # Provides token-based syntax highlighting for multiple programming languages.
11
+ class Syntax
12
+ # Default theme for syntax highlighting
13
+ DEFAULT_THEME = {
14
+ # Keywords
15
+ keyword: Style.new(color: Color.parse("magenta"), bold: true),
16
+ keyword_constant: Style.new(color: Color.parse("cyan"), bold: true),
17
+ keyword_declaration: Style.new(color: Color.parse("magenta"), bold: true),
18
+ keyword_namespace: Style.new(color: Color.parse("magenta"), bold: true),
19
+ keyword_type: Style.new(color: Color.parse("cyan")),
20
+
21
+ # Names
22
+ name: Style.new(color: Color.parse("white")),
23
+ name_builtin: Style.new(color: Color.parse("cyan")),
24
+ name_class: Style.new(color: Color.parse("green"), bold: true),
25
+ name_constant: Style.new(color: Color.parse("cyan")),
26
+ name_decorator: Style.new(color: Color.parse("bright_magenta")),
27
+ name_exception: Style.new(color: Color.parse("green"), bold: true),
28
+ name_function: Style.new(color: Color.parse("green")),
29
+ name_variable: Style.new(color: Color.parse("white")),
30
+ name_tag: Style.new(color: Color.parse("bright_magenta")),
31
+ name_attribute: Style.new(color: Color.parse("yellow")),
32
+
33
+ # Literals
34
+ string: Style.new(color: Color.parse("yellow")),
35
+ string_doc: Style.new(color: Color.parse("yellow"), italic: true),
36
+ string_escape: Style.new(color: Color.parse("bright_magenta")),
37
+ string_interpol: Style.new(color: Color.parse("bright_magenta")),
38
+ string_regex: Style.new(color: Color.parse("bright_yellow")),
39
+ string_symbol: Style.new(color: Color.parse("bright_green")),
40
+
41
+ number: Style.new(color: Color.parse("cyan")),
42
+ number_float: Style.new(color: Color.parse("cyan")),
43
+ number_hex: Style.new(color: Color.parse("cyan")),
44
+
45
+ # Operators and Punctuation
46
+ operator: Style.new(color: Color.parse("bright_magenta")),
47
+ punctuation: Style.new(color: Color.parse("white")),
48
+
49
+ # Comments
50
+ comment: Style.new(color: Color.parse("bright_black"), italic: true),
51
+ comment_doc: Style.new(color: Color.parse("bright_black"), italic: true),
52
+ comment_preproc: Style.new(color: Color.parse("bright_magenta")),
53
+
54
+ # Generic
55
+ generic_deleted: Style.new(color: Color.parse("red")),
56
+ generic_inserted: Style.new(color: Color.parse("green")),
57
+ generic_heading: Style.new(color: Color.parse("bright_blue"), bold: true),
58
+ generic_subheading: Style.new(color: Color.parse("bright_blue")),
59
+ generic_error: Style.new(color: Color.parse("bright_red")),
60
+
61
+ # Other
62
+ text: Style.new,
63
+ error: Style.new(color: Color.parse("bright_red"), bold: true)
64
+ }.freeze
65
+
66
+ # Monokai theme
67
+ MONOKAI_THEME = {
68
+ keyword: Style.new(color: Color.parse("#f92672"), bold: true),
69
+ keyword_constant: Style.new(color: Color.parse("#ae81ff")),
70
+ keyword_type: Style.new(color: Color.parse("#66d9ef"), italic: true),
71
+ name: Style.new(color: Color.parse("#f8f8f2")),
72
+ name_builtin: Style.new(color: Color.parse("#66d9ef")),
73
+ name_class: Style.new(color: Color.parse("#a6e22e")),
74
+ name_function: Style.new(color: Color.parse("#a6e22e")),
75
+ name_decorator: Style.new(color: Color.parse("#a6e22e")),
76
+ string: Style.new(color: Color.parse("#e6db74")),
77
+ string_doc: Style.new(color: Color.parse("#e6db74")),
78
+ number: Style.new(color: Color.parse("#ae81ff")),
79
+ operator: Style.new(color: Color.parse("#f92672")),
80
+ comment: Style.new(color: Color.parse("#75715e"), italic: true),
81
+ punctuation: Style.new(color: Color.parse("#f8f8f2")),
82
+ text: Style.new(color: Color.parse("#f8f8f2")),
83
+ error: Style.new(color: Color.parse("#f92672"), bold: true)
84
+ }.freeze
85
+
86
+ # Dracula theme
87
+ DRACULA_THEME = {
88
+ keyword: Style.new(color: Color.parse("#ff79c6"), bold: true),
89
+ keyword_constant: Style.new(color: Color.parse("#bd93f9")),
90
+ keyword_type: Style.new(color: Color.parse("#8be9fd"), italic: true),
91
+ name: Style.new(color: Color.parse("#f8f8f2")),
92
+ name_builtin: Style.new(color: Color.parse("#8be9fd")),
93
+ name_class: Style.new(color: Color.parse("#50fa7b")),
94
+ name_function: Style.new(color: Color.parse("#50fa7b")),
95
+ name_decorator: Style.new(color: Color.parse("#50fa7b")),
96
+ string: Style.new(color: Color.parse("#f1fa8c")),
97
+ string_doc: Style.new(color: Color.parse("#6272a4")),
98
+ number: Style.new(color: Color.parse("#bd93f9")),
99
+ operator: Style.new(color: Color.parse("#ff79c6")),
100
+ comment: Style.new(color: Color.parse("#6272a4"), italic: true),
101
+ punctuation: Style.new(color: Color.parse("#f8f8f2")),
102
+ text: Style.new(color: Color.parse("#f8f8f2")),
103
+ error: Style.new(color: Color.parse("#ff5555"), bold: true)
104
+ }.freeze
105
+
106
+ THEMES = {
107
+ default: DEFAULT_THEME,
108
+ monokai: MONOKAI_THEME,
109
+ dracula: DRACULA_THEME
110
+ }.freeze
111
+
112
+ # @return [String] Source code
113
+ attr_reader :code
114
+
115
+ # @return [String] Language name
116
+ attr_reader :language
117
+
118
+ # @return [Hash] Theme styles
119
+ attr_reader :theme
120
+
121
+ # @return [Boolean] Show line numbers
122
+ attr_reader :line_numbers
123
+
124
+ # @return [Integer, nil] Starting line number
125
+ attr_reader :start_line
126
+
127
+ # @return [Array<Integer>, nil] Lines to highlight
128
+ attr_reader :highlight_lines
129
+
130
+ # @return [Boolean] Word wrap
131
+ attr_reader :word_wrap
132
+
133
+ # @return [Style, nil] Background style
134
+ attr_reader :background_style
135
+
136
+ # @return [Integer] Tab size
137
+ attr_reader :tab_size
138
+
139
+ def initialize(
140
+ code,
141
+ language: "text",
142
+ theme: :default,
143
+ line_numbers: false,
144
+ start_line: 1,
145
+ highlight_lines: nil,
146
+ word_wrap: false,
147
+ background_style: nil,
148
+ tab_size: 4
149
+ )
150
+ @code = code.to_s
151
+ @language = language.to_s.downcase
152
+ @theme = theme.is_a?(Hash) ? theme : (THEMES[theme] || DEFAULT_THEME)
153
+ @line_numbers = line_numbers
154
+ @start_line = start_line
155
+ @highlight_lines = highlight_lines
156
+ @word_wrap = word_wrap
157
+ @background_style = background_style
158
+ @tab_size = tab_size
159
+ end
160
+
161
+ # Highlight the code and return segments
162
+ # @return [Array<Segment>]
163
+ def to_segments
164
+ segments = []
165
+ lines = @code.gsub("\t", " " * @tab_size).split("\n", -1)
166
+
167
+ # Calculate line number width
168
+ line_num_width = (@start_line + lines.length - 1).to_s.length
169
+
170
+ lines.each_with_index do |line, index|
171
+ line_num = @start_line + index
172
+ is_highlighted = @highlight_lines&.include?(line_num)
173
+
174
+ # Line number
175
+ if @line_numbers
176
+ num_style = is_highlighted ? Style.new(color: Color.parse("yellow"), bold: true) : Style.new(color: Color.parse("bright_black"))
177
+ segments << Segment.new(line_num.to_s.rjust(line_num_width), style: num_style)
178
+ segments << Segment.new(" │ ", style: Style.new(color: Color.parse("bright_black")))
179
+ end
180
+
181
+ # Highlighted line background
182
+ if is_highlighted
183
+ bg_style = Style.new(bgcolor: Color.parse("color(237)"))
184
+ segments.concat(highlight_line(line).map do |seg|
185
+ combined_style = seg.style ? seg.style + bg_style : bg_style
186
+ Segment.new(seg.text, style: combined_style)
187
+ end)
188
+ else
189
+ segments.concat(highlight_line(line))
190
+ end
191
+
192
+ segments << Segment.new("\n") if index < lines.length - 1
193
+ end
194
+
195
+ segments
196
+ end
197
+
198
+ # Highlight a single line
199
+ # @param line [String] Line to highlight
200
+ # @return [Array<Segment>]
201
+ def highlight_line(line)
202
+ lexer = get_lexer(@language)
203
+ lexer.tokenize(line, @theme)
204
+ end
205
+
206
+ # Render to string with ANSI codes
207
+ # @param color_system [Symbol] Color system
208
+ # @return [String]
209
+ def render(color_system: ColorSystem::TRUECOLOR)
210
+ Segment.render(to_segments, color_system: color_system)
211
+ end
212
+
213
+ # Render inside a panel
214
+ # @param title [String, nil] Panel title
215
+ # @return [String]
216
+ def to_panel(title: nil, max_width: 80)
217
+ title ||= @language.capitalize
218
+ panel = Panel.new(
219
+ render,
220
+ title: title,
221
+ border_style: "dim",
222
+ padding: 0
223
+ )
224
+ panel.render(max_width: max_width)
225
+ end
226
+
227
+ class << self
228
+ # Create syntax from file
229
+ # @param path [String] File path
230
+ # @param kwargs [Hash] Options
231
+ # @return [Syntax]
232
+ def from_file(path, **kwargs)
233
+ code = File.read(path)
234
+ language = kwargs.delete(:language) || detect_language(path)
235
+ new(code, language: language, **kwargs)
236
+ end
237
+
238
+ # Detect language from file extension
239
+ # @param path [String] File path
240
+ # @return [String]
241
+ def detect_language(path)
242
+ ext = File.extname(path).downcase.delete(".")
243
+ EXTENSION_MAP[ext] || "text"
244
+ end
245
+
246
+ # List supported languages
247
+ # @return [Array<String>]
248
+ def supported_languages
249
+ LEXERS.keys.sort
250
+ end
251
+ end
252
+
253
+ private
254
+
255
+ def get_lexer(language)
256
+ LEXERS[language] || LEXERS["text"]
257
+ end
258
+
259
+ # File extension to language mapping
260
+ EXTENSION_MAP = {
261
+ "rb" => "ruby",
262
+ "py" => "python",
263
+ "js" => "javascript",
264
+ "ts" => "typescript",
265
+ "jsx" => "javascript",
266
+ "tsx" => "typescript",
267
+ "json" => "json",
268
+ "yml" => "yaml",
269
+ "yaml" => "yaml",
270
+ "xml" => "xml",
271
+ "html" => "html",
272
+ "htm" => "html",
273
+ "css" => "css",
274
+ "scss" => "scss",
275
+ "sass" => "sass",
276
+ "sql" => "sql",
277
+ "sh" => "bash",
278
+ "bash" => "bash",
279
+ "zsh" => "bash",
280
+ "ps1" => "powershell",
281
+ "c" => "c",
282
+ "h" => "c",
283
+ "cpp" => "cpp",
284
+ "hpp" => "cpp",
285
+ "cc" => "cpp",
286
+ "go" => "go",
287
+ "rs" => "rust",
288
+ "java" => "java",
289
+ "kt" => "kotlin",
290
+ "swift" => "swift",
291
+ "md" => "markdown",
292
+ "markdown" => "markdown",
293
+ "dockerfile" => "dockerfile",
294
+ "toml" => "toml",
295
+ "ini" => "ini",
296
+ "conf" => "ini",
297
+ "txt" => "text"
298
+ }.freeze
299
+ end
300
+
301
+ # Base lexer class for tokenization
302
+ class BaseLexer
303
+ def tokenize(line, theme)
304
+ [Segment.new(line, style: theme[:text])]
305
+ end
306
+ end
307
+
308
+ # Ruby lexer
309
+ class RubyLexer < BaseLexer
310
+ KEYWORDS = %w[
311
+ def class module end if else elsif unless case when then
312
+ begin rescue ensure raise return yield do while until for
313
+ break next redo retry in and or not alias defined? super
314
+ self nil true false __FILE__ __LINE__ __ENCODING__
315
+ require require_relative include extend prepend attr_reader
316
+ attr_writer attr_accessor private protected public
317
+ lambda proc loop catch throw
318
+ ].freeze
319
+
320
+ BUILTINS = %w[
321
+ puts print p pp gets chomp to_s to_i to_f to_a to_h length
322
+ size each map select reject find reduce inject sort sort_by
323
+ uniq compact flatten reverse join split push pop shift unshift
324
+ first last min max sum count empty? nil? is_a? kind_of?
325
+ respond_to? send __send__ method methods instance_variables
326
+ class superclass ancestors included_modules freeze frozen?
327
+ dup clone tap then yield_self itself inspect
328
+ ].freeze
329
+
330
+ def tokenize(line, theme)
331
+ segments = []
332
+ pos = 0
333
+
334
+ while pos < line.length
335
+ # Skip whitespace
336
+ if line[pos].match?(/\s/)
337
+ ws_end = pos
338
+ ws_end += 1 while ws_end < line.length && line[ws_end].match?(/\s/)
339
+ segments << Segment.new(line[pos...ws_end])
340
+ pos = ws_end
341
+ next
342
+ end
343
+
344
+ # Comment
345
+ if line[pos] == "#"
346
+ segments << Segment.new(line[pos..], style: theme[:comment])
347
+ break
348
+ end
349
+
350
+ # String (double quote)
351
+ if line[pos] == '"'
352
+ str_end = find_string_end(line, pos, '"')
353
+ segments << Segment.new(line[pos..str_end], style: theme[:string])
354
+ pos = str_end + 1
355
+ next
356
+ end
357
+
358
+ # String (single quote)
359
+ if line[pos] == "'"
360
+ str_end = find_string_end(line, pos, "'")
361
+ segments << Segment.new(line[pos..str_end], style: theme[:string])
362
+ pos = str_end + 1
363
+ next
364
+ end
365
+
366
+ # Regex (only when a real closing '/' exists and the literal does not
367
+ # start with a space, so ordinary division `a / b` is not swallowed)
368
+ if line[pos] == "/" && (pos == 0 || line[pos - 1].match?(/[\s=({,]/)) &&
369
+ pos + 1 < line.length && line[pos + 1] != " "
370
+ regex_end = find_closing_delimiter(line, pos, "/")
371
+ if regex_end
372
+ segments << Segment.new(line[pos..regex_end], style: theme[:string_regex])
373
+ pos = regex_end + 1
374
+ next
375
+ end
376
+ end
377
+
378
+ # Symbol
379
+ if line[pos] == ":"
380
+ if pos + 1 < line.length && line[pos + 1].match?(/[a-zA-Z_]/)
381
+ sym_end = pos + 1
382
+ sym_end += 1 while sym_end < line.length && line[sym_end].match?(/\w/)
383
+ segments << Segment.new(line[pos...sym_end], style: theme[:string_symbol] || theme[:string])
384
+ pos = sym_end
385
+ next
386
+ end
387
+ end
388
+
389
+ # Number
390
+ if line[pos].match?(/\d/)
391
+ num_end = pos
392
+ num_end += 1 while num_end < line.length && line[num_end].match?(/[\d._xXoObB]/)
393
+ segments << Segment.new(line[pos...num_end], style: theme[:number])
394
+ pos = num_end
395
+ next
396
+ end
397
+
398
+ # Instance variable
399
+ if line[pos] == "@"
400
+ var_end = pos + 1
401
+ var_end += 1 if var_end < line.length && line[var_end] == "@"
402
+ var_end += 1 while var_end < line.length && line[var_end].match?(/\w/)
403
+ segments << Segment.new(line[pos...var_end], style: theme[:name_variable] || theme[:name])
404
+ pos = var_end
405
+ next
406
+ end
407
+
408
+ # Global variable
409
+ if line[pos] == "$"
410
+ var_end = pos + 1
411
+ var_end += 1 while var_end < line.length && line[var_end].match?(/\w/)
412
+ segments << Segment.new(line[pos...var_end], style: theme[:name_variable] || theme[:name])
413
+ pos = var_end
414
+ next
415
+ end
416
+
417
+ # Constant/Class name
418
+ if line[pos].match?(/[A-Z]/)
419
+ word_end = pos
420
+ word_end += 1 while word_end < line.length && line[word_end].match?(/\w/)
421
+ word = line[pos...word_end]
422
+ if %w[true false nil].include?(word.downcase)
423
+ segments << Segment.new(word, style: theme[:keyword_constant] || theme[:keyword])
424
+ else
425
+ segments << Segment.new(word, style: theme[:name_class] || theme[:name])
426
+ end
427
+ pos = word_end
428
+ next
429
+ end
430
+
431
+ # Identifier/Keyword
432
+ if line[pos].match?(/[a-z_]/i)
433
+ word_end = pos
434
+ word_end += 1 while word_end < line.length && line[word_end].match?(/[\w?!]/)
435
+ word = line[pos...word_end]
436
+
437
+ style = if KEYWORDS.include?(word)
438
+ theme[:keyword]
439
+ elsif BUILTINS.include?(word)
440
+ theme[:name_builtin] || theme[:name]
441
+ else
442
+ theme[:name]
443
+ end
444
+
445
+ segments << Segment.new(word, style: style)
446
+ pos = word_end
447
+ next
448
+ end
449
+
450
+ # Operators and punctuation
451
+ if line[pos].match?(/[+\-*\/%&|^~<>=!?:]/)
452
+ op_end = pos + 1
453
+ op_end += 1 while op_end < line.length && line[op_end].match?(/[+\-*\/%&|^~<>=!?:]/)
454
+ segments << Segment.new(line[pos...op_end], style: theme[:operator])
455
+ pos = op_end
456
+ next
457
+ end
458
+
459
+ # Punctuation
460
+ if line[pos].match?(/[(){}\[\].,;]/)
461
+ segments << Segment.new(line[pos], style: theme[:punctuation])
462
+ pos += 1
463
+ next
464
+ end
465
+
466
+ # Default
467
+ segments << Segment.new(line[pos])
468
+ pos += 1
469
+ end
470
+
471
+ segments
472
+ end
473
+
474
+ private
475
+
476
+ def find_string_end(line, start, delimiter)
477
+ pos = start + 1
478
+ while pos < line.length
479
+ ch = line[pos]
480
+ if ch == "\\"
481
+ # Skip the escaped character so a literal "\\" before the delimiter
482
+ # isn't mistaken for an escaped delimiter.
483
+ pos += 2
484
+ next
485
+ elsif ch == delimiter
486
+ return pos
487
+ end
488
+ pos += 1
489
+ end
490
+ line.length - 1
491
+ end
492
+
493
+ # Like find_string_end but returns nil when no closing delimiter exists on
494
+ # the line (used to distinguish a real regex literal from a division `/`).
495
+ def find_closing_delimiter(line, start, delimiter)
496
+ pos = start + 1
497
+ while pos < line.length
498
+ ch = line[pos]
499
+ if ch == "\\"
500
+ pos += 2
501
+ next
502
+ elsif ch == delimiter
503
+ return pos
504
+ end
505
+ pos += 1
506
+ end
507
+ nil
508
+ end
509
+ end
510
+
511
+ # Python lexer
512
+ class PythonLexer < BaseLexer
513
+ KEYWORDS = %w[
514
+ and as assert async await break class continue def del elif else
515
+ except finally for from global if import in is lambda None nonlocal
516
+ not or pass raise return try while with yield True False
517
+ ].freeze
518
+
519
+ BUILTINS = %w[
520
+ abs all any ascii bin bool breakpoint bytearray bytes callable
521
+ chr classmethod compile complex delattr dict dir divmod enumerate
522
+ eval exec filter float format frozenset getattr globals hasattr
523
+ hash help hex id input int isinstance issubclass iter len list
524
+ locals map max memoryview min next object oct open ord pow print
525
+ property range repr reversed round set setattr slice sorted
526
+ staticmethod str sum super tuple type vars zip
527
+ ].freeze
528
+
529
+ def tokenize(line, theme)
530
+ segments = []
531
+ pos = 0
532
+
533
+ while pos < line.length
534
+ if line[pos].match?(/\s/)
535
+ ws_end = pos
536
+ ws_end += 1 while ws_end < line.length && line[ws_end].match?(/\s/)
537
+ segments << Segment.new(line[pos...ws_end])
538
+ pos = ws_end
539
+ next
540
+ end
541
+
542
+ # Comment
543
+ if line[pos] == "#"
544
+ segments << Segment.new(line[pos..], style: theme[:comment])
545
+ break
546
+ end
547
+
548
+ # Docstring/String
549
+ if line[pos..pos + 2] == '"""' || line[pos..pos + 2] == "'''"
550
+ delim = line[pos..pos + 2]
551
+ str_end = line.index(delim, pos + 3)
552
+ str_end = str_end ? str_end + 2 : line.length - 1
553
+ segments << Segment.new(line[pos..str_end], style: theme[:string_doc] || theme[:string])
554
+ pos = str_end + 1
555
+ next
556
+ end
557
+
558
+ # String
559
+ if ['"', "'"].include?(line[pos])
560
+ delim = line[pos]
561
+ str_end = find_string_end(line, pos, delim)
562
+ segments << Segment.new(line[pos..str_end], style: theme[:string])
563
+ pos = str_end + 1
564
+ next
565
+ end
566
+
567
+ # Number
568
+ if line[pos].match?(/\d/)
569
+ num_end = pos
570
+ num_end += 1 while num_end < line.length && line[num_end].match?(/[\d._xXoObBeE+\-]/)
571
+ segments << Segment.new(line[pos...num_end], style: theme[:number])
572
+ pos = num_end
573
+ next
574
+ end
575
+
576
+ # Decorator
577
+ if line[pos] == "@"
578
+ dec_end = pos + 1
579
+ dec_end += 1 while dec_end < line.length && line[dec_end].match?(/[\w.]/)
580
+ segments << Segment.new(line[pos...dec_end], style: theme[:name_decorator] || theme[:name])
581
+ pos = dec_end
582
+ next
583
+ end
584
+
585
+ # Identifier
586
+ if line[pos].match?(/[a-zA-Z_]/)
587
+ word_end = pos
588
+ word_end += 1 while word_end < line.length && line[word_end].match?(/\w/)
589
+ word = line[pos...word_end]
590
+
591
+ style = if KEYWORDS.include?(word)
592
+ theme[:keyword]
593
+ elsif BUILTINS.include?(word)
594
+ theme[:name_builtin] || theme[:name]
595
+ elsif word[0].match?(/[A-Z]/)
596
+ theme[:name_class] || theme[:name]
597
+ else
598
+ theme[:name]
599
+ end
600
+
601
+ segments << Segment.new(word, style: style)
602
+ pos = word_end
603
+ next
604
+ end
605
+
606
+ # Operators
607
+ if line[pos].match?(/[+\-*\/%&|^~<>=!@]/)
608
+ op_end = pos + 1
609
+ op_end += 1 while op_end < line.length && line[op_end].match?(/[+\-*\/%&|^~<>=!@]/)
610
+ segments << Segment.new(line[pos...op_end], style: theme[:operator])
611
+ pos = op_end
612
+ next
613
+ end
614
+
615
+ # Punctuation
616
+ if line[pos].match?(/[(){}\[\].,;:]/)
617
+ segments << Segment.new(line[pos], style: theme[:punctuation])
618
+ pos += 1
619
+ next
620
+ end
621
+
622
+ segments << Segment.new(line[pos])
623
+ pos += 1
624
+ end
625
+
626
+ segments
627
+ end
628
+
629
+ private
630
+
631
+ def find_string_end(line, start, delimiter)
632
+ pos = start + 1
633
+ while pos < line.length
634
+ ch = line[pos]
635
+ if ch == "\\"
636
+ # Skip the escaped character so a literal "\\" before the delimiter
637
+ # isn't mistaken for an escaped delimiter.
638
+ pos += 2
639
+ next
640
+ elsif ch == delimiter
641
+ return pos
642
+ end
643
+ pos += 1
644
+ end
645
+ line.length - 1
646
+ end
647
+
648
+ # Like find_string_end but returns nil when no closing delimiter exists on
649
+ # the line (used to distinguish a real regex literal from a division `/`).
650
+ def find_closing_delimiter(line, start, delimiter)
651
+ pos = start + 1
652
+ while pos < line.length
653
+ ch = line[pos]
654
+ if ch == "\\"
655
+ pos += 2
656
+ next
657
+ elsif ch == delimiter
658
+ return pos
659
+ end
660
+ pos += 1
661
+ end
662
+ nil
663
+ end
664
+ end
665
+
666
+ # JavaScript lexer
667
+ class JavaScriptLexer < BaseLexer
668
+ KEYWORDS = %w[
669
+ async await break case catch class const continue debugger default
670
+ delete do else export extends finally for function if import in
671
+ instanceof let new return static super switch this throw try typeof
672
+ var void while with yield true false null undefined
673
+ ].freeze
674
+
675
+ BUILTINS = %w[
676
+ Array Boolean Date Error Function JSON Math Number Object Promise
677
+ RegExp String Symbol Map Set WeakMap WeakSet Proxy Reflect
678
+ console window document parseInt parseFloat isNaN isFinite
679
+ decodeURI decodeURIComponent encodeURI encodeURIComponent eval
680
+ setTimeout setInterval clearTimeout clearInterval fetch
681
+ ].freeze
682
+
683
+ def tokenize(line, theme)
684
+ segments = []
685
+ pos = 0
686
+
687
+ while pos < line.length
688
+ if line[pos].match?(/\s/)
689
+ ws_end = pos
690
+ ws_end += 1 while ws_end < line.length && line[ws_end].match?(/\s/)
691
+ segments << Segment.new(line[pos...ws_end])
692
+ pos = ws_end
693
+ next
694
+ end
695
+
696
+ # Single-line comment
697
+ if line[pos..pos + 1] == "//"
698
+ segments << Segment.new(line[pos..], style: theme[:comment])
699
+ break
700
+ end
701
+
702
+ # Template literal
703
+ if line[pos] == "`"
704
+ str_end = find_string_end(line, pos, "`")
705
+ segments << Segment.new(line[pos..str_end], style: theme[:string])
706
+ pos = str_end + 1
707
+ next
708
+ end
709
+
710
+ # String
711
+ if ['"', "'"].include?(line[pos])
712
+ delim = line[pos]
713
+ str_end = find_string_end(line, pos, delim)
714
+ segments << Segment.new(line[pos..str_end], style: theme[:string])
715
+ pos = str_end + 1
716
+ next
717
+ end
718
+
719
+ # Regex
720
+ if line[pos] == "/" && (pos == 0 || line[pos - 1].match?(/[\s=({,\[]/)) &&
721
+ pos + 1 < line.length && line[pos + 1] != " "
722
+ regex_end = find_closing_delimiter(line, pos, "/")
723
+ if regex_end
724
+ # Include flags
725
+ regex_end += 1 while regex_end + 1 < line.length && line[regex_end + 1].match?(/[gimsuy]/)
726
+ segments << Segment.new(line[pos..regex_end], style: theme[:string_regex] || theme[:string])
727
+ pos = regex_end + 1
728
+ next
729
+ end
730
+ end
731
+
732
+ # Number
733
+ if line[pos].match?(/\d/) || (line[pos] == "." && pos + 1 < line.length && line[pos + 1].match?(/\d/))
734
+ num_end = pos
735
+ num_end += 1 while num_end < line.length && line[num_end].match?(/[\d._xXoObBeEnN]/)
736
+ segments << Segment.new(line[pos...num_end], style: theme[:number])
737
+ pos = num_end
738
+ next
739
+ end
740
+
741
+ # Identifier
742
+ if line[pos].match?(/[a-zA-Z_$]/)
743
+ word_end = pos
744
+ word_end += 1 while word_end < line.length && line[word_end].match?(/[\w$]/)
745
+ word = line[pos...word_end]
746
+
747
+ style = if KEYWORDS.include?(word)
748
+ theme[:keyword]
749
+ elsif BUILTINS.include?(word)
750
+ theme[:name_builtin] || theme[:name]
751
+ elsif word[0].match?(/[A-Z]/)
752
+ theme[:name_class] || theme[:name]
753
+ else
754
+ theme[:name]
755
+ end
756
+
757
+ segments << Segment.new(word, style: style)
758
+ pos = word_end
759
+ next
760
+ end
761
+
762
+ # Arrow function
763
+ if line[pos..pos + 1] == "=>"
764
+ segments << Segment.new("=>", style: theme[:operator])
765
+ pos += 2
766
+ next
767
+ end
768
+
769
+ # Operators
770
+ if line[pos].match?(/[+\-*\/%&|^~<>=!?:]/)
771
+ op_end = pos + 1
772
+ op_end += 1 while op_end < line.length && line[op_end].match?(/[+\-*\/%&|^~<>=!?:]/)
773
+ segments << Segment.new(line[pos...op_end], style: theme[:operator])
774
+ pos = op_end
775
+ next
776
+ end
777
+
778
+ # Punctuation
779
+ if line[pos].match?(/[(){}\[\].,;]/)
780
+ segments << Segment.new(line[pos], style: theme[:punctuation])
781
+ pos += 1
782
+ next
783
+ end
784
+
785
+ segments << Segment.new(line[pos])
786
+ pos += 1
787
+ end
788
+
789
+ segments
790
+ end
791
+
792
+ private
793
+
794
+ def find_string_end(line, start, delimiter)
795
+ pos = start + 1
796
+ while pos < line.length
797
+ ch = line[pos]
798
+ if ch == "\\"
799
+ # Skip the escaped character so a literal "\\" before the delimiter
800
+ # isn't mistaken for an escaped delimiter.
801
+ pos += 2
802
+ next
803
+ elsif ch == delimiter
804
+ return pos
805
+ end
806
+ pos += 1
807
+ end
808
+ line.length - 1
809
+ end
810
+
811
+ # Like find_string_end but returns nil when no closing delimiter exists on
812
+ # the line (used to distinguish a real regex literal from a division `/`).
813
+ def find_closing_delimiter(line, start, delimiter)
814
+ pos = start + 1
815
+ while pos < line.length
816
+ ch = line[pos]
817
+ if ch == "\\"
818
+ pos += 2
819
+ next
820
+ elsif ch == delimiter
821
+ return pos
822
+ end
823
+ pos += 1
824
+ end
825
+ nil
826
+ end
827
+ end
828
+
829
+ # SQL Lexer
830
+ class SQLLexer < BaseLexer
831
+ KEYWORDS = %w[
832
+ SELECT FROM WHERE AND OR NOT NULL IS IN LIKE BETWEEN EXISTS
833
+ INSERT INTO VALUES UPDATE SET DELETE CREATE TABLE DROP ALTER
834
+ INDEX VIEW TRIGGER PROCEDURE FUNCTION AS ON JOIN LEFT RIGHT
835
+ INNER OUTER FULL CROSS NATURAL USING ORDER BY ASC DESC GROUP
836
+ HAVING LIMIT OFFSET UNION ALL DISTINCT CASE WHEN THEN ELSE END
837
+ IF BEGIN COMMIT ROLLBACK TRANSACTION PRIMARY KEY FOREIGN
838
+ REFERENCES UNIQUE DEFAULT CHECK CONSTRAINT CASCADE RESTRICT
839
+ TRUE FALSE GRANT REVOKE WITH RECURSIVE
840
+ ].freeze
841
+
842
+ BUILTINS = %w[
843
+ COUNT SUM AVG MIN MAX LENGTH UPPER LOWER TRIM CONCAT SUBSTRING
844
+ REPLACE COALESCE NULLIF CAST CONVERT DATE TIME DATETIME
845
+ YEAR MONTH DAY HOUR MINUTE SECOND NOW CURRENT_DATE
846
+ CURRENT_TIME CURRENT_TIMESTAMP ABS ROUND FLOOR CEILING
847
+ POWER SQRT MOD ROW_NUMBER RANK DENSE_RANK OVER PARTITION
848
+ ].freeze
849
+
850
+ TYPES = %w[
851
+ INT INTEGER BIGINT SMALLINT TINYINT FLOAT DOUBLE DECIMAL
852
+ NUMERIC REAL CHAR VARCHAR TEXT NCHAR NVARCHAR NTEXT
853
+ DATE TIME DATETIME TIMESTAMP BOOLEAN BOOL BLOB BINARY
854
+ VARBINARY UUID JSON XML
855
+ ].freeze
856
+
857
+ def tokenize(line, theme)
858
+ segments = []
859
+ pos = 0
860
+
861
+ while pos < line.length
862
+ if line[pos].match?(/\s/)
863
+ ws_end = pos
864
+ ws_end += 1 while ws_end < line.length && line[ws_end].match?(/\s/)
865
+ segments << Segment.new(line[pos...ws_end])
866
+ pos = ws_end
867
+ next
868
+ end
869
+
870
+ # Comment
871
+ if line[pos..pos + 1] == "--"
872
+ segments << Segment.new(line[pos..], style: theme[:comment])
873
+ break
874
+ end
875
+
876
+ # String
877
+ if line[pos] == "'"
878
+ str_end = pos + 1
879
+ str_end += 1 while str_end < line.length && line[str_end] != "'"
880
+ str_end = [str_end, line.length - 1].min
881
+ segments << Segment.new(line[pos..str_end], style: theme[:string])
882
+ pos = str_end + 1
883
+ next
884
+ end
885
+
886
+ # Number
887
+ if line[pos].match?(/\d/)
888
+ num_end = pos
889
+ num_end += 1 while num_end < line.length && line[num_end].match?(/[\d.]/)
890
+ segments << Segment.new(line[pos...num_end], style: theme[:number])
891
+ pos = num_end
892
+ next
893
+ end
894
+
895
+ # Identifier
896
+ if line[pos].match?(/[a-zA-Z_]/)
897
+ word_end = pos
898
+ word_end += 1 while word_end < line.length && line[word_end].match?(/\w/)
899
+ word = line[pos...word_end]
900
+ upper_word = word.upcase
901
+
902
+ style = if KEYWORDS.include?(upper_word)
903
+ theme[:keyword]
904
+ elsif BUILTINS.include?(upper_word)
905
+ theme[:name_builtin] || theme[:name]
906
+ elsif TYPES.include?(upper_word)
907
+ theme[:keyword_type] || theme[:keyword]
908
+ else
909
+ theme[:name]
910
+ end
911
+
912
+ segments << Segment.new(word, style: style)
913
+ pos = word_end
914
+ next
915
+ end
916
+
917
+ # Operators
918
+ if line[pos].match?(/[+\-*\/%<>=!]/)
919
+ op_end = pos + 1
920
+ op_end += 1 while op_end < line.length && line[op_end].match?(/[+\-*\/%<>=!]/)
921
+ segments << Segment.new(line[pos...op_end], style: theme[:operator])
922
+ pos = op_end
923
+ next
924
+ end
925
+
926
+ # Punctuation
927
+ if line[pos].match?(/[(),;.]/)
928
+ segments << Segment.new(line[pos], style: theme[:punctuation])
929
+ pos += 1
930
+ next
931
+ end
932
+
933
+ segments << Segment.new(line[pos])
934
+ pos += 1
935
+ end
936
+
937
+ segments
938
+ end
939
+ end
940
+
941
+ # JSON Lexer (simple)
942
+ class JSONLexer < BaseLexer
943
+ def tokenize(line, theme)
944
+ segments = []
945
+ pos = 0
946
+
947
+ while pos < line.length
948
+ if line[pos].match?(/\s/)
949
+ ws_end = pos
950
+ ws_end += 1 while ws_end < line.length && line[ws_end].match?(/\s/)
951
+ segments << Segment.new(line[pos...ws_end])
952
+ pos = ws_end
953
+ next
954
+ end
955
+
956
+ # String
957
+ if line[pos] == '"'
958
+ str_end = pos + 1
959
+ str_end += 1 while str_end < line.length && !(line[str_end] == '"' && line[str_end - 1] != "\\")
960
+ str_end = [str_end, line.length - 1].min
961
+ content = line[pos..str_end]
962
+
963
+ # Check if it's a key (followed by :)
964
+ rest = line[str_end + 1..].lstrip
965
+ is_key = rest.start_with?(":")
966
+
967
+ segments << Segment.new(content, style: is_key ? theme[:name] : theme[:string])
968
+ pos = str_end + 1
969
+ next
970
+ end
971
+
972
+ # Number
973
+ if line[pos].match?(/[\d\-]/)
974
+ num_end = pos
975
+ num_end += 1 while num_end < line.length && line[num_end].match?(/[\d.eE+\-]/)
976
+ segments << Segment.new(line[pos...num_end], style: theme[:number])
977
+ pos = num_end
978
+ next
979
+ end
980
+
981
+ # Boolean/null
982
+ if line[pos].match?(/[tfn]/)
983
+ if line[pos..pos + 3] == "true"
984
+ segments << Segment.new("true", style: theme[:keyword_constant] || theme[:keyword])
985
+ pos += 4
986
+ next
987
+ elsif line[pos..pos + 4] == "false"
988
+ segments << Segment.new("false", style: theme[:keyword_constant] || theme[:keyword])
989
+ pos += 5
990
+ next
991
+ elsif line[pos..pos + 3] == "null"
992
+ segments << Segment.new("null", style: theme[:keyword_constant] || theme[:keyword])
993
+ pos += 4
994
+ next
995
+ end
996
+ end
997
+
998
+ # Punctuation
999
+ if line[pos].match?(/[{}\[\]:,]/)
1000
+ segments << Segment.new(line[pos], style: theme[:punctuation])
1001
+ pos += 1
1002
+ next
1003
+ end
1004
+
1005
+ segments << Segment.new(line[pos])
1006
+ pos += 1
1007
+ end
1008
+
1009
+ segments
1010
+ end
1011
+ end
1012
+
1013
+ # YAML Lexer
1014
+ class YAMLLexer < BaseLexer
1015
+ def tokenize(line, theme)
1016
+ segments = []
1017
+ pos = 0
1018
+
1019
+ while pos < line.length
1020
+ # Comment
1021
+ if line[pos] == "#"
1022
+ segments << Segment.new(line[pos..], style: theme[:comment])
1023
+ break
1024
+ end
1025
+
1026
+ # Key (before colon)
1027
+ if pos == 0 || line[0...pos].match?(/^\s*$/)
1028
+ colon_pos = line.index(":")
1029
+ if colon_pos
1030
+ key = line[0...colon_pos]
1031
+ segments << Segment.new(key, style: theme[:name])
1032
+ segments << Segment.new(":", style: theme[:punctuation])
1033
+ pos = colon_pos + 1
1034
+ next
1035
+ end
1036
+ end
1037
+
1038
+ if line[pos].match?(/\s/)
1039
+ ws_end = pos
1040
+ ws_end += 1 while ws_end < line.length && line[ws_end].match?(/\s/)
1041
+ segments << Segment.new(line[pos...ws_end])
1042
+ pos = ws_end
1043
+ next
1044
+ end
1045
+
1046
+ # String
1047
+ if ['"', "'"].include?(line[pos])
1048
+ delim = line[pos]
1049
+ str_end = pos + 1
1050
+ str_end += 1 while str_end < line.length && line[str_end] != delim
1051
+ str_end = [str_end, line.length - 1].min
1052
+ segments << Segment.new(line[pos..str_end], style: theme[:string])
1053
+ pos = str_end + 1
1054
+ next
1055
+ end
1056
+
1057
+ # Boolean/null
1058
+ rest = line[pos..].downcase
1059
+ if rest.start_with?("true") || rest.start_with?("false") || rest.start_with?("null") || rest.start_with?("yes") || rest.start_with?("no")
1060
+ word_end = pos
1061
+ word_end += 1 while word_end < line.length && line[word_end].match?(/\w/)
1062
+ segments << Segment.new(line[pos...word_end], style: theme[:keyword_constant] || theme[:keyword])
1063
+ pos = word_end
1064
+ next
1065
+ end
1066
+
1067
+ # Number
1068
+ if line[pos].match?(/[\d\-]/)
1069
+ num_end = pos
1070
+ num_end += 1 while num_end < line.length && line[num_end].match?(/[\d.]/)
1071
+ segments << Segment.new(line[pos...num_end], style: theme[:number])
1072
+ pos = num_end
1073
+ next
1074
+ end
1075
+
1076
+ # List marker
1077
+ if line[pos] == "-" && (pos + 1 >= line.length || line[pos + 1].match?(/\s/))
1078
+ segments << Segment.new("-", style: theme[:punctuation])
1079
+ pos += 1
1080
+ next
1081
+ end
1082
+
1083
+ # Default text
1084
+ word_end = pos
1085
+ word_end += 1 while word_end < line.length && !line[word_end].match?(/[\s#]/)
1086
+ segments << Segment.new(line[pos...word_end], style: theme[:string])
1087
+ pos = word_end
1088
+ end
1089
+
1090
+ segments
1091
+ end
1092
+ end
1093
+
1094
+ # Bash/Shell lexer
1095
+ class BashLexer < BaseLexer
1096
+ KEYWORDS = %w[
1097
+ if then else elif fi case esac for while until do done in
1098
+ function return exit break continue local export readonly
1099
+ declare typeset source alias unalias
1100
+ ].freeze
1101
+
1102
+ BUILTINS = %w[
1103
+ echo printf read cd pwd pushd popd dirs ls cat grep sed awk
1104
+ cut sort uniq wc head tail less more find xargs chmod chown
1105
+ mkdir rmdir rm cp mv ln touch date time kill ps top df du
1106
+ tar gzip gunzip zip unzip curl wget ssh scp rsync git
1107
+ sudo su man which whereis whatis type hash history set unset
1108
+ shift eval exec test true false
1109
+ ].freeze
1110
+
1111
+ def tokenize(line, theme)
1112
+ segments = []
1113
+ pos = 0
1114
+
1115
+ while pos < line.length
1116
+ if line[pos].match?(/\s/)
1117
+ ws_end = pos
1118
+ ws_end += 1 while ws_end < line.length && line[ws_end].match?(/\s/)
1119
+ segments << Segment.new(line[pos...ws_end])
1120
+ pos = ws_end
1121
+ next
1122
+ end
1123
+
1124
+ # Comment
1125
+ if line[pos] == "#"
1126
+ segments << Segment.new(line[pos..], style: theme[:comment])
1127
+ break
1128
+ end
1129
+
1130
+ # String
1131
+ if ['"', "'"].include?(line[pos])
1132
+ delim = line[pos]
1133
+ str_end = pos + 1
1134
+ str_end += 1 while str_end < line.length && !(line[str_end] == delim && line[str_end - 1] != "\\")
1135
+ str_end = [str_end, line.length - 1].min
1136
+ segments << Segment.new(line[pos..str_end], style: theme[:string])
1137
+ pos = str_end + 1
1138
+ next
1139
+ end
1140
+
1141
+ # Variable
1142
+ if line[pos] == "$"
1143
+ var_end = pos + 1
1144
+ if var_end < line.length && line[var_end] == "{"
1145
+ var_end = line.index("}", var_end) || line.length - 1
1146
+ else
1147
+ var_end += 1 while var_end < line.length && line[var_end].match?(/\w/)
1148
+ end
1149
+ segments << Segment.new(line[pos..var_end], style: theme[:name_variable] || theme[:name])
1150
+ pos = var_end + 1
1151
+ next
1152
+ end
1153
+
1154
+ # Number
1155
+ if line[pos].match?(/\d/)
1156
+ num_end = pos
1157
+ num_end += 1 while num_end < line.length && line[num_end].match?(/\d/)
1158
+ segments << Segment.new(line[pos...num_end], style: theme[:number])
1159
+ pos = num_end
1160
+ next
1161
+ end
1162
+
1163
+ # Identifier
1164
+ if line[pos].match?(/[a-zA-Z_]/)
1165
+ word_end = pos
1166
+ word_end += 1 while word_end < line.length && line[word_end].match?(/[\w\-]/)
1167
+ word = line[pos...word_end]
1168
+
1169
+ style = if KEYWORDS.include?(word)
1170
+ theme[:keyword]
1171
+ elsif BUILTINS.include?(word)
1172
+ theme[:name_builtin] || theme[:name]
1173
+ else
1174
+ theme[:name]
1175
+ end
1176
+
1177
+ segments << Segment.new(word, style: style)
1178
+ pos = word_end
1179
+ next
1180
+ end
1181
+
1182
+ # Operators and special chars
1183
+ if line[pos].match?(/[|&;<>(){}]/)
1184
+ segments << Segment.new(line[pos], style: theme[:operator])
1185
+ pos += 1
1186
+ next
1187
+ end
1188
+
1189
+ segments << Segment.new(line[pos])
1190
+ pos += 1
1191
+ end
1192
+
1193
+ segments
1194
+ end
1195
+ end
1196
+
1197
+ # Plain text (no highlighting)
1198
+ class TextLexer < BaseLexer
1199
+ # Just returns the line as-is
1200
+ end
1201
+
1202
+ # Lexer registry
1203
+ LEXERS = {
1204
+ "ruby" => RubyLexer.new,
1205
+ "python" => PythonLexer.new,
1206
+ "javascript" => JavaScriptLexer.new,
1207
+ "js" => JavaScriptLexer.new,
1208
+ "typescript" => JavaScriptLexer.new,
1209
+ "ts" => JavaScriptLexer.new,
1210
+ "sql" => SQLLexer.new,
1211
+ "json" => JSONLexer.new,
1212
+ "yaml" => YAMLLexer.new,
1213
+ "yml" => YAMLLexer.new,
1214
+ "bash" => BashLexer.new,
1215
+ "shell" => BashLexer.new,
1216
+ "sh" => BashLexer.new,
1217
+ "text" => TextLexer.new,
1218
+ "txt" => TextLexer.new
1219
+ }.freeze
1220
+ end