mui 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +163 -0
  3. data/CHANGELOG.md +448 -0
  4. data/README.md +309 -6
  5. data/docs/_config.yml +56 -0
  6. data/docs/configuration.md +301 -0
  7. data/docs/getting-started.md +140 -0
  8. data/docs/index.md +55 -0
  9. data/docs/jobs.md +297 -0
  10. data/docs/keybindings.md +229 -0
  11. data/docs/plugins.md +285 -0
  12. data/docs/syntax-highlighting.md +149 -0
  13. data/exe/mui +1 -2
  14. data/lib/mui/autocmd.rb +66 -0
  15. data/lib/mui/buffer.rb +275 -0
  16. data/lib/mui/buffer_word_cache.rb +131 -0
  17. data/lib/mui/buffer_word_completer.rb +77 -0
  18. data/lib/mui/color_manager.rb +136 -0
  19. data/lib/mui/color_scheme.rb +63 -0
  20. data/lib/mui/command_completer.rb +30 -0
  21. data/lib/mui/command_context.rb +90 -0
  22. data/lib/mui/command_history.rb +89 -0
  23. data/lib/mui/command_line.rb +167 -0
  24. data/lib/mui/command_registry.rb +44 -0
  25. data/lib/mui/completion_renderer.rb +84 -0
  26. data/lib/mui/completion_state.rb +58 -0
  27. data/lib/mui/config.rb +58 -0
  28. data/lib/mui/editor.rb +395 -0
  29. data/lib/mui/error.rb +29 -0
  30. data/lib/mui/file_completer.rb +51 -0
  31. data/lib/mui/floating_window.rb +161 -0
  32. data/lib/mui/handler_result.rb +107 -0
  33. data/lib/mui/highlight.rb +22 -0
  34. data/lib/mui/highlighters/base.rb +23 -0
  35. data/lib/mui/highlighters/search_highlighter.rb +27 -0
  36. data/lib/mui/highlighters/selection_highlighter.rb +48 -0
  37. data/lib/mui/highlighters/syntax_highlighter.rb +107 -0
  38. data/lib/mui/input.rb +17 -0
  39. data/lib/mui/insert_completion_renderer.rb +92 -0
  40. data/lib/mui/insert_completion_state.rb +77 -0
  41. data/lib/mui/job.rb +81 -0
  42. data/lib/mui/job_manager.rb +113 -0
  43. data/lib/mui/key_code.rb +30 -0
  44. data/lib/mui/key_handler/base.rb +187 -0
  45. data/lib/mui/key_handler/command_mode.rb +511 -0
  46. data/lib/mui/key_handler/insert_mode.rb +323 -0
  47. data/lib/mui/key_handler/motions/motion_handler.rb +56 -0
  48. data/lib/mui/key_handler/normal_mode.rb +552 -0
  49. data/lib/mui/key_handler/operators/base_operator.rb +134 -0
  50. data/lib/mui/key_handler/operators/change_operator.rb +179 -0
  51. data/lib/mui/key_handler/operators/delete_operator.rb +176 -0
  52. data/lib/mui/key_handler/operators/paste_operator.rb +119 -0
  53. data/lib/mui/key_handler/operators/yank_operator.rb +127 -0
  54. data/lib/mui/key_handler/search_mode.rb +191 -0
  55. data/lib/mui/key_handler/visual_line_mode.rb +20 -0
  56. data/lib/mui/key_handler/visual_mode.rb +402 -0
  57. data/lib/mui/key_handler/window_command.rb +112 -0
  58. data/lib/mui/key_handler.rb +16 -0
  59. data/lib/mui/key_notation_parser.rb +152 -0
  60. data/lib/mui/key_sequence.rb +67 -0
  61. data/lib/mui/key_sequence_buffer.rb +85 -0
  62. data/lib/mui/key_sequence_handler.rb +163 -0
  63. data/lib/mui/key_sequence_matcher.rb +79 -0
  64. data/lib/mui/layout/calculator.rb +15 -0
  65. data/lib/mui/layout/leaf_node.rb +33 -0
  66. data/lib/mui/layout/node.rb +29 -0
  67. data/lib/mui/layout/split_node.rb +132 -0
  68. data/lib/mui/line_renderer.rb +173 -0
  69. data/lib/mui/mode.rb +13 -0
  70. data/lib/mui/mode_manager.rb +186 -0
  71. data/lib/mui/motion.rb +139 -0
  72. data/lib/mui/plugin.rb +35 -0
  73. data/lib/mui/plugin_manager.rb +106 -0
  74. data/lib/mui/register.rb +110 -0
  75. data/lib/mui/screen.rb +103 -0
  76. data/lib/mui/search_completer.rb +50 -0
  77. data/lib/mui/search_input.rb +40 -0
  78. data/lib/mui/search_state.rb +121 -0
  79. data/lib/mui/selection.rb +55 -0
  80. data/lib/mui/status_line_renderer.rb +40 -0
  81. data/lib/mui/syntax/language_detector.rb +106 -0
  82. data/lib/mui/syntax/lexer_base.rb +106 -0
  83. data/lib/mui/syntax/lexers/c_lexer.rb +127 -0
  84. data/lib/mui/syntax/lexers/css_lexer.rb +121 -0
  85. data/lib/mui/syntax/lexers/go_lexer.rb +205 -0
  86. data/lib/mui/syntax/lexers/html_lexer.rb +118 -0
  87. data/lib/mui/syntax/lexers/javascript_lexer.rb +197 -0
  88. data/lib/mui/syntax/lexers/markdown_lexer.rb +210 -0
  89. data/lib/mui/syntax/lexers/ruby_lexer.rb +114 -0
  90. data/lib/mui/syntax/lexers/rust_lexer.rb +148 -0
  91. data/lib/mui/syntax/lexers/typescript_lexer.rb +203 -0
  92. data/lib/mui/syntax/token.rb +42 -0
  93. data/lib/mui/syntax/token_cache.rb +91 -0
  94. data/lib/mui/tab_bar_renderer.rb +87 -0
  95. data/lib/mui/tab_manager.rb +96 -0
  96. data/lib/mui/tab_page.rb +35 -0
  97. data/lib/mui/terminal_adapter/base.rb +92 -0
  98. data/lib/mui/terminal_adapter/curses.rb +164 -0
  99. data/lib/mui/terminal_adapter.rb +4 -0
  100. data/lib/mui/themes/default.rb +315 -0
  101. data/lib/mui/undo_manager.rb +83 -0
  102. data/lib/mui/undoable_action.rb +175 -0
  103. data/lib/mui/unicode_width.rb +100 -0
  104. data/lib/mui/version.rb +1 -1
  105. data/lib/mui/window.rb +201 -0
  106. data/lib/mui/window_manager.rb +256 -0
  107. data/lib/mui/wrap_cache.rb +40 -0
  108. data/lib/mui/wrap_helper.rb +84 -0
  109. data/lib/mui.rb +171 -2
  110. metadata +123 -5
@@ -0,0 +1,106 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Mui
4
+ module Syntax
5
+ # Detects programming language from file path and provides appropriate lexer
6
+ class LanguageDetector
7
+ # Map file extensions to language symbols
8
+ EXTENSION_MAP = {
9
+ ".rb" => :ruby,
10
+ ".ru" => :ruby,
11
+ ".rake" => :ruby,
12
+ ".gemspec" => :ruby,
13
+ ".c" => :c,
14
+ ".h" => :c,
15
+ ".y" => :c,
16
+ ".go" => :go,
17
+ ".rs" => :rust,
18
+ ".js" => :javascript,
19
+ ".mjs" => :javascript,
20
+ ".cjs" => :javascript,
21
+ ".jsx" => :javascript,
22
+ ".ts" => :typescript,
23
+ ".tsx" => :typescript,
24
+ ".mts" => :typescript,
25
+ ".cts" => :typescript,
26
+ ".md" => :markdown,
27
+ ".markdown" => :markdown,
28
+ ".html" => :html,
29
+ ".htm" => :html,
30
+ ".xhtml" => :html,
31
+ ".css" => :css,
32
+ ".scss" => :css,
33
+ ".sass" => :css
34
+ }.freeze
35
+
36
+ # Map basenames (files without extension) to language symbols
37
+ BASENAME_MAP = {
38
+ "Gemfile" => :ruby,
39
+ "Rakefile" => :ruby,
40
+ "Guardfile" => :ruby,
41
+ "Vagrantfile" => :ruby,
42
+ "Berksfile" => :ruby,
43
+ "Capfile" => :ruby,
44
+ "Thorfile" => :ruby,
45
+ "Podfile" => :ruby,
46
+ "Brewfile" => :ruby
47
+ }.freeze
48
+
49
+ class << self
50
+ # Detect language from file path
51
+ def detect(file_path)
52
+ return nil if file_path.nil? || file_path.empty?
53
+
54
+ # Try extension first
55
+ ext = File.extname(file_path).downcase
56
+ language = EXTENSION_MAP[ext]
57
+ return language if language
58
+
59
+ # Try basename
60
+ basename = File.basename(file_path)
61
+ BASENAME_MAP[basename]
62
+ end
63
+
64
+ # Get a lexer instance for a language
65
+ def lexer_for(language)
66
+ case language
67
+ when :ruby
68
+ Lexers::RubyLexer.new
69
+ when :c
70
+ Lexers::CLexer.new
71
+ when :go
72
+ Lexers::GoLexer.new
73
+ when :rust
74
+ Lexers::RustLexer.new
75
+ when :javascript
76
+ Lexers::JavaScriptLexer.new
77
+ when :typescript
78
+ Lexers::TypeScriptLexer.new
79
+ when :markdown
80
+ Lexers::MarkdownLexer.new
81
+ when :html
82
+ Lexers::HtmlLexer.new
83
+ when :css
84
+ Lexers::CssLexer.new
85
+ end
86
+ end
87
+
88
+ # Get a lexer instance for a file path
89
+ def lexer_for_file(file_path)
90
+ language = detect(file_path)
91
+ lexer_for(language)
92
+ end
93
+
94
+ # List all supported languages
95
+ def supported_languages
96
+ (EXTENSION_MAP.values + BASENAME_MAP.values).uniq
97
+ end
98
+
99
+ # List all supported extensions
100
+ def supported_extensions
101
+ EXTENSION_MAP.keys
102
+ end
103
+ end
104
+ end
105
+ end
106
+ end
@@ -0,0 +1,106 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Mui
4
+ module Syntax
5
+ # Base class for language-specific lexers
6
+ # Subclasses should override token_patterns and optionally handle_multiline_state
7
+ class LexerBase
8
+ # Tokenize a single line of text
9
+ # TODO: Refactor to reduce complexity (Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity)
10
+ def tokenize(line, state = nil)
11
+ tokens = []
12
+ pos = 0
13
+ current_state = state
14
+
15
+ while pos < line.length
16
+ # Handle multiline state first (e.g., inside block comment)
17
+ if current_state
18
+ token, new_state, new_pos = handle_multiline_state(line, pos, current_state)
19
+ if token
20
+ tokens << token
21
+ pos = new_pos
22
+ current_state = new_state
23
+ next
24
+ elsif new_state.nil?
25
+ # State ended, continue normal tokenization
26
+ current_state = nil
27
+ pos = new_pos
28
+ next
29
+ end
30
+ end
31
+
32
+ # Check for multiline state start
33
+ new_state, token, new_pos = check_multiline_start(line, pos)
34
+ if new_state
35
+ tokens << token if token
36
+ pos = new_pos
37
+ current_state = new_state
38
+ next
39
+ end
40
+
41
+ # Normal token matching
42
+ token = match_token(line, pos)
43
+ if token
44
+ tokens << token
45
+ pos = token.end_col + 1
46
+ else
47
+ # Skip unrecognized character
48
+ pos += 1
49
+ end
50
+ end
51
+
52
+ [tokens, current_state]
53
+ end
54
+
55
+ # Check if a state continues to the next line
56
+ def continuing_state?(state)
57
+ !state.nil?
58
+ end
59
+
60
+ protected
61
+
62
+ # Override in subclass to define token patterns
63
+ def token_patterns
64
+ []
65
+ end
66
+
67
+ # Override in subclass to handle multiline constructs
68
+ def handle_multiline_state(_line, pos, _state)
69
+ [nil, nil, pos]
70
+ end
71
+
72
+ # Override in subclass to check for multiline construct starts
73
+ def check_multiline_start(_line, pos)
74
+ [nil, nil, pos]
75
+ end
76
+
77
+ private
78
+
79
+ # Get compiled patterns (cached)
80
+ # Uses \G anchor for efficient matching at specific position
81
+ def compiled_patterns
82
+ @compiled_patterns ||= token_patterns.map do |type, pattern|
83
+ [type, /\G#{pattern}/]
84
+ end
85
+ end
86
+
87
+ def match_token(line, pos)
88
+ return nil if pos >= line.length
89
+
90
+ compiled_patterns.each do |type, pattern|
91
+ match = pattern.match(line, pos)
92
+ next unless match
93
+
94
+ text = match[0]
95
+ return Token.new(
96
+ type:,
97
+ start_col: pos,
98
+ end_col: pos + text.length - 1,
99
+ text:
100
+ )
101
+ end
102
+ nil
103
+ end
104
+ end
105
+ end
106
+ end
@@ -0,0 +1,127 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Mui
4
+ module Syntax
5
+ module Lexers
6
+ # Lexer for C source code
7
+ class CLexer < LexerBase
8
+ # Pre-compiled patterns with \G anchor for position-specific matching
9
+ # These are compiled once at class load time
10
+ COMPILED_PATTERNS = [
11
+ # Single line comment
12
+ [:comment, %r{\G//.*}],
13
+ # Single-line block comment /* ... */ on one line
14
+ [:comment, %r{\G/\*.*?\*/}],
15
+ # Double quoted string (with escape handling)
16
+ [:string, /\G"(?:[^"\\]|\\.)*"/],
17
+ # Character literal
18
+ [:char, /\G'(?:[^'\\]|\\.)*'/],
19
+ # Preprocessor directives
20
+ [:preprocessor, /\G^\s*#\s*(?:include|define|undef|ifdef|ifndef|if|else|elif|endif|error|pragma|line)\b.*/],
21
+ # Float numbers (must be before integer)
22
+ [:number, /\G\b\d+\.\d+(?:e[+-]?\d+)?[fFlL]?\b/i],
23
+ # Hexadecimal
24
+ [:number, /\G\b0x[0-9a-fA-F]+[uUlL]*\b/],
25
+ # Octal
26
+ [:number, /\G\b0[0-7]+[uUlL]*\b/],
27
+ # Integer
28
+ [:number, /\G\b\d+[uUlL]*\b/],
29
+ # Type keywords (int, char, void, etc.)
30
+ [:type, /\G\b(?:char|double|float|int|long|short|signed|unsigned|void|_Bool|_Complex|_Imaginary)\b/],
31
+ # Other keywords (if, for, return, const, static, etc.)
32
+ [:keyword, /\G\b(?:auto|break|case|const|continue|default|do|else|enum|extern|for|goto|if|register|return|sizeof|static|struct|switch|typedef|union|volatile|while|inline|restrict|_Alignas|_Alignof|_Atomic|_Generic|_Noreturn|_Static_assert|_Thread_local)\b/],
33
+ # Identifiers
34
+ [:identifier, /\G\b[a-zA-Z_][a-zA-Z0-9_]*\b/],
35
+ # Operators
36
+ [:operator, %r{\G(?:[+\-*/%&|^~<>=!]+|->|<<|>>|\+\+|--)}]
37
+ ].freeze
38
+
39
+ # Multiline comment patterns (pre-compiled)
40
+ BLOCK_COMMENT_END = %r{\*/}
41
+ BLOCK_COMMENT_START = %r{/\*}
42
+ BLOCK_COMMENT_START_ANCHOR = %r{\A/\*}
43
+
44
+ protected
45
+
46
+ # Use pre-compiled class-level patterns
47
+ def compiled_patterns
48
+ COMPILED_PATTERNS
49
+ end
50
+
51
+ # Handle /* ... */ block comments that span multiple lines
52
+ def handle_multiline_state(line, pos, state)
53
+ return [nil, nil, pos] unless state == :block_comment
54
+
55
+ # Look for */
56
+ end_match = line[pos..].match(BLOCK_COMMENT_END)
57
+ if end_match
58
+ end_pos = pos + end_match.begin(0) + 1
59
+ text = line[pos..end_pos]
60
+ token = Token.new(
61
+ type: :comment,
62
+ start_col: pos,
63
+ end_col: end_pos,
64
+ text:
65
+ )
66
+ [token, nil, end_pos + 1]
67
+ else
68
+ # Entire line is part of block comment
69
+ text = line[pos..]
70
+ unless text.empty?
71
+ token = Token.new(
72
+ type: :comment,
73
+ start_col: pos,
74
+ end_col: line.length - 1,
75
+ text:
76
+ )
77
+ end
78
+ [token, :block_comment, line.length]
79
+ end
80
+ end
81
+
82
+ # Check for /* block comment start (that doesn't end on the same line)
83
+ def check_multiline_start(line, pos)
84
+ rest = line[pos..]
85
+
86
+ # Check for /* that doesn't have a matching */ on this line
87
+ start_match = rest.match(BLOCK_COMMENT_START)
88
+ return [nil, nil, pos] unless start_match
89
+
90
+ start_pos = pos + start_match.begin(0)
91
+ after_start = line[(start_pos + 2)..]
92
+
93
+ # Check if there's a closing */ on the same line after this /*
94
+ if after_start&.include?("*/")
95
+ # There's a closing on this line, let normal token matching handle it
96
+ [nil, nil, pos]
97
+ else
98
+ # No closing on this line, enter block comment state
99
+ text = line[start_pos..]
100
+ token = Token.new(
101
+ type: :comment,
102
+ start_col: start_pos,
103
+ end_col: line.length - 1,
104
+ text:
105
+ )
106
+ [:block_comment, token, line.length]
107
+ end
108
+ end
109
+
110
+ private
111
+
112
+ def match_token(line, pos)
113
+ # First check for start of multiline comment
114
+ if line[pos..].match?(BLOCK_COMMENT_START_ANCHOR)
115
+ rest = line[(pos + 2)..]
116
+ unless rest&.include?("*/")
117
+ # This will be handled by check_multiline_start
118
+ return nil
119
+ end
120
+ end
121
+
122
+ super
123
+ end
124
+ end
125
+ end
126
+ end
127
+ end
@@ -0,0 +1,121 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Mui
4
+ module Syntax
5
+ module Lexers
6
+ # Lexer for CSS source files
7
+ class CssLexer < LexerBase
8
+ # Pre-compiled patterns with \G anchor for position-specific matching
9
+ COMPILED_PATTERNS = [
10
+ # Single-line block comment /* ... */ on one line
11
+ [:comment, %r{\G/\*.*?\*/}],
12
+ # @rules (at-rules)
13
+ [:preprocessor, /\G@[a-zA-Z-]+/],
14
+ # Hex color (must be before ID selector - matches 3-8 hex digits only)
15
+ [:number, /\G#[0-9a-fA-F]{3,8}(?![a-zA-Z0-9_-])/],
16
+ # ID selector (starts with letter or underscore/hyphen after #)
17
+ [:constant, /\G#[a-zA-Z_-][a-zA-Z0-9_-]*/],
18
+ # Class selector
19
+ [:type, /\G\.[a-zA-Z_-][a-zA-Z0-9_-]*/],
20
+ # Pseudo-elements and pseudo-classes
21
+ [:keyword, /\G::?[a-zA-Z-]+(?:\([^)]*\))?/],
22
+ # Property name (followed by colon)
23
+ [:identifier, /\G[a-zA-Z-]+(?=\s*:)/],
24
+ # Double quoted string
25
+ [:string, /\G"(?:[^"\\]|\\.)*"/],
26
+ # Single quoted string
27
+ [:string, /\G'(?:[^'\\]|\\.)*'/],
28
+ # URL function
29
+ [:string, /\Gurl\([^)]*\)/i],
30
+ # Numbers with units
31
+ [:number, /\G-?\d+\.?\d*(?:px|em|rem|%|vh|vw|vmin|vmax|ch|ex|cm|mm|in|pt|pc|deg|rad|grad|turn|s|ms|Hz|kHz|dpi|dpcm|dppx|fr)?/i],
32
+ # Functions (calc, rgb, rgba, hsl, var, etc.)
33
+ [:keyword, /\G[a-zA-Z-]+(?=\()/],
34
+ # Property values / keywords (important, inherit, etc.)
35
+ [:constant, /\G!important\b/i],
36
+ [:constant, /\G\b(?:inherit|initial|unset|revert|none|auto|normal)\b/],
37
+ # Element selectors and identifiers
38
+ [:identifier, /\G[a-zA-Z_-][a-zA-Z0-9_-]*/],
39
+ # Operators and symbols
40
+ [:operator, /\G[{}():;,>+~*=\[\]]/]
41
+ ].freeze
42
+
43
+ # Multiline comment patterns
44
+ BLOCK_COMMENT_END = %r{\*/}
45
+ BLOCK_COMMENT_START = %r{/\*}
46
+ BLOCK_COMMENT_START_ANCHOR = %r{\A/\*}
47
+
48
+ protected
49
+
50
+ def compiled_patterns
51
+ COMPILED_PATTERNS
52
+ end
53
+
54
+ # Handle /* ... */ block comments that span multiple lines
55
+ def handle_multiline_state(line, pos, state)
56
+ return [nil, nil, pos] unless state == :block_comment
57
+
58
+ end_match = line[pos..].match(BLOCK_COMMENT_END)
59
+ if end_match
60
+ end_pos = pos + end_match.begin(0) + 1
61
+ text = line[pos..end_pos]
62
+ token = Token.new(
63
+ type: :comment,
64
+ start_col: pos,
65
+ end_col: end_pos,
66
+ text:
67
+ )
68
+ [token, nil, end_pos + 1]
69
+ else
70
+ text = line[pos..]
71
+ token = if text.empty?
72
+ nil
73
+ else
74
+ Token.new(
75
+ type: :comment,
76
+ start_col: pos,
77
+ end_col: line.length - 1,
78
+ text:
79
+ )
80
+ end
81
+ [token, :block_comment, line.length]
82
+ end
83
+ end
84
+
85
+ def check_multiline_start(line, pos)
86
+ rest = line[pos..]
87
+
88
+ start_match = rest.match(BLOCK_COMMENT_START)
89
+ return [nil, nil, pos] unless start_match
90
+
91
+ start_pos = pos + start_match.begin(0)
92
+ after_start = line[(start_pos + 2)..]
93
+
94
+ if after_start&.include?("*/")
95
+ [nil, nil, pos]
96
+ else
97
+ text = line[start_pos..]
98
+ token = Token.new(
99
+ type: :comment,
100
+ start_col: start_pos,
101
+ end_col: line.length - 1,
102
+ text:
103
+ )
104
+ [:block_comment, token, line.length]
105
+ end
106
+ end
107
+
108
+ private
109
+
110
+ def match_token(line, pos)
111
+ if line[pos..].match?(BLOCK_COMMENT_START_ANCHOR)
112
+ rest = line[(pos + 2)..]
113
+ return nil unless rest&.include?("*/")
114
+ end
115
+
116
+ super
117
+ end
118
+ end
119
+ end
120
+ end
121
+ end
@@ -0,0 +1,205 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Mui
4
+ module Syntax
5
+ module Lexers
6
+ # Lexer for Go source code
7
+ class GoLexer < LexerBase
8
+ # Go keywords
9
+ KEYWORDS = %w[
10
+ break case chan const continue default defer else fallthrough
11
+ for func go goto if import interface map package range return
12
+ select struct switch type var
13
+ ].freeze
14
+
15
+ # Go built-in types
16
+ TYPES = %w[
17
+ bool byte complex64 complex128 error float32 float64
18
+ int int8 int16 int32 int64 rune string
19
+ uint uint8 uint16 uint32 uint64 uintptr
20
+ any comparable
21
+ ].freeze
22
+
23
+ # Go constants
24
+ CONSTANTS = %w[true false nil iota].freeze
25
+
26
+ # Pre-compiled patterns with \G anchor for position-specific matching
27
+ COMPILED_PATTERNS = [
28
+ # Single line comment
29
+ [:comment, %r{\G//.*}],
30
+ # Single-line block comment /* ... */ on one line
31
+ [:comment, %r{\G/\*.*?\*/}],
32
+ # Raw string literal (backtick)
33
+ [:string, /\G`[^`]*`/],
34
+ # Double quoted string (with escape handling)
35
+ [:string, /\G"(?:[^"\\]|\\.)*"/],
36
+ # Character literal (rune)
37
+ [:char, /\G'(?:[^'\\]|\\.)*'/],
38
+ # Float numbers (must be before integer)
39
+ [:number, /\G\b\d+\.\d+(?:e[+-]?\d+)?\b/i],
40
+ # Hexadecimal
41
+ [:number, /\G\b0x[0-9a-fA-F]+\b/i],
42
+ # Octal
43
+ [:number, /\G\b0o[0-7]+\b/i],
44
+ # Binary
45
+ [:number, /\G\b0b[01]+\b/i],
46
+ # Integer
47
+ [:number, /\G\b\d+\b/],
48
+ # Constants (true, false, nil, iota)
49
+ [:constant, /\G\b(?:true|false|nil|iota)\b/],
50
+ # Types
51
+ [:type, /\G\b(?:bool|byte|complex64|complex128|error|float32|float64|int|int8|int16|int32|int64|rune|string|uint|uint8|uint16|uint32|uint64|uintptr|any|comparable)\b/],
52
+ # Keywords
53
+ [:keyword, /\G\b(?:break|case|chan|const|continue|default|defer|else|fallthrough|for|func|go|goto|if|import|interface|map|package|range|return|select|struct|switch|type|var)\b/],
54
+ # Exported identifiers (start with uppercase)
55
+ [:constant, /\G\b[A-Z][a-zA-Z0-9_]*\b/],
56
+ # Regular identifiers
57
+ [:identifier, /\G\b[a-z_][a-zA-Z0-9_]*\b/],
58
+ # Operators
59
+ [:operator, %r{\G(?:&&|\|\||<-|<<=?|>>=?|&\^=?|[+\-*/%&|^<>=!]=?|:=|\+\+|--)}]
60
+ ].freeze
61
+
62
+ # Multiline comment patterns (pre-compiled)
63
+ BLOCK_COMMENT_END = %r{\*/}
64
+ BLOCK_COMMENT_START = %r{/\*}
65
+ BLOCK_COMMENT_START_ANCHOR = %r{\A/\*}
66
+
67
+ # Raw string patterns (pre-compiled)
68
+ RAW_STRING_START = /\A`/
69
+ RAW_STRING_END = /`/
70
+
71
+ protected
72
+
73
+ def compiled_patterns
74
+ COMPILED_PATTERNS
75
+ end
76
+
77
+ # Handle /* ... */ block comments and raw strings that span multiple lines
78
+ def handle_multiline_state(line, pos, state)
79
+ case state
80
+ when :block_comment
81
+ handle_block_comment(line, pos)
82
+ when :raw_string
83
+ handle_raw_string(line, pos)
84
+ else
85
+ [nil, nil, pos]
86
+ end
87
+ end
88
+
89
+ def check_multiline_start(line, pos)
90
+ rest = line[pos..]
91
+
92
+ # Check for raw string start
93
+ if rest.match?(RAW_STRING_START)
94
+ after_start = line[(pos + 1)..]
95
+ unless after_start&.include?("`")
96
+ # No closing on this line, enter raw string state
97
+ text = line[pos..]
98
+ token = Token.new(
99
+ type: :string,
100
+ start_col: pos,
101
+ end_col: line.length - 1,
102
+ text:
103
+ )
104
+ return [:raw_string, token, line.length]
105
+ end
106
+ end
107
+
108
+ # Check for /* that doesn't have a matching */ on this line
109
+ start_match = rest.match(BLOCK_COMMENT_START)
110
+ return [nil, nil, pos] unless start_match
111
+
112
+ start_pos = pos + start_match.begin(0)
113
+ after_start = line[(start_pos + 2)..]
114
+
115
+ if after_start&.include?("*/")
116
+ [nil, nil, pos]
117
+ else
118
+ text = line[start_pos..]
119
+ token = Token.new(
120
+ type: :comment,
121
+ start_col: start_pos,
122
+ end_col: line.length - 1,
123
+ text:
124
+ )
125
+ [:block_comment, token, line.length]
126
+ end
127
+ end
128
+
129
+ private
130
+
131
+ def handle_block_comment(line, pos)
132
+ end_match = line[pos..].match(BLOCK_COMMENT_END)
133
+ if end_match
134
+ end_pos = pos + end_match.begin(0) + 1
135
+ text = line[pos..end_pos]
136
+ token = Token.new(
137
+ type: :comment,
138
+ start_col: pos,
139
+ end_col: end_pos,
140
+ text:
141
+ )
142
+ [token, nil, end_pos + 1]
143
+ else
144
+ text = line[pos..]
145
+ token = if text.empty?
146
+ nil
147
+ else
148
+ Token.new(
149
+ type: :comment,
150
+ start_col: pos,
151
+ end_col: line.length - 1,
152
+ text:
153
+ )
154
+ end
155
+ [token, :block_comment, line.length]
156
+ end
157
+ end
158
+
159
+ def handle_raw_string(line, pos)
160
+ end_match = line[pos..].match(RAW_STRING_END)
161
+ if end_match
162
+ end_pos = pos + end_match.begin(0)
163
+ text = line[pos..end_pos]
164
+ token = Token.new(
165
+ type: :string,
166
+ start_col: pos,
167
+ end_col: end_pos,
168
+ text:
169
+ )
170
+ [token, nil, end_pos + 1]
171
+ else
172
+ text = line[pos..]
173
+ token = if text.empty?
174
+ nil
175
+ else
176
+ Token.new(
177
+ type: :string,
178
+ start_col: pos,
179
+ end_col: line.length - 1,
180
+ text:
181
+ )
182
+ end
183
+ [token, :raw_string, line.length]
184
+ end
185
+ end
186
+
187
+ def match_token(line, pos)
188
+ # Check for start of raw string
189
+ if line[pos..].match?(RAW_STRING_START)
190
+ rest = line[(pos + 1)..]
191
+ return nil unless rest&.include?("`")
192
+ end
193
+
194
+ # Check for start of multiline comment
195
+ if line[pos..].match?(BLOCK_COMMENT_START_ANCHOR)
196
+ rest = line[(pos + 2)..]
197
+ return nil unless rest&.include?("*/")
198
+ end
199
+
200
+ super
201
+ end
202
+ end
203
+ end
204
+ end
205
+ end