markbridge 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. checksums.yaml +4 -4
  2. data/lib/markbridge/all.rb +4 -7
  3. data/lib/markbridge/ast/document.rb +1 -1
  4. data/lib/markbridge/ast/element.rb +2 -2
  5. data/lib/markbridge/ast/list.rb +2 -2
  6. data/lib/markbridge/ast/table.rb +61 -0
  7. data/lib/markbridge/ast/text.rb +5 -1
  8. data/lib/markbridge/ast.rb +1 -0
  9. data/lib/markbridge/bbcode.rb +4 -0
  10. data/lib/markbridge/gem_loader.rb +2 -3
  11. data/lib/markbridge/html.rb +4 -0
  12. data/lib/markbridge/mediawiki.rb +4 -0
  13. data/lib/markbridge/parsers/bbcode/closing_strategies/base.rb +0 -10
  14. data/lib/markbridge/parsers/bbcode/closing_strategies/reordering.rb +17 -4
  15. data/lib/markbridge/parsers/bbcode/closing_strategies/tag_reconciler.rb +64 -44
  16. data/lib/markbridge/parsers/bbcode/handler_registry.rb +26 -11
  17. data/lib/markbridge/parsers/bbcode/handlers/attachment_handler.rb +17 -12
  18. data/lib/markbridge/parsers/bbcode/handlers/base_handler.rb +0 -10
  19. data/lib/markbridge/parsers/bbcode/handlers/code_handler.rb +6 -10
  20. data/lib/markbridge/parsers/bbcode/handlers/image_handler.rb +13 -19
  21. data/lib/markbridge/parsers/bbcode/handlers/list_handler.rb +1 -5
  22. data/lib/markbridge/parsers/bbcode/handlers/list_item_handler.rb +1 -2
  23. data/lib/markbridge/parsers/bbcode/handlers/quote_handler.rb +30 -35
  24. data/lib/markbridge/parsers/bbcode/handlers/raw_handler.rb +2 -6
  25. data/lib/markbridge/parsers/bbcode/handlers/self_closing_handler.rb +4 -4
  26. data/lib/markbridge/parsers/bbcode/handlers/table_cell_handler.rb +26 -0
  27. data/lib/markbridge/parsers/bbcode/handlers/table_handler.rb +32 -0
  28. data/lib/markbridge/parsers/bbcode/handlers/table_row_handler.rb +35 -0
  29. data/lib/markbridge/parsers/bbcode/parser.rb +5 -8
  30. data/lib/markbridge/parsers/bbcode/parser_state.rb +12 -18
  31. data/lib/markbridge/parsers/bbcode/peekable_enumerator.rb +9 -59
  32. data/lib/markbridge/parsers/bbcode/raw_content_collector.rb +2 -2
  33. data/lib/markbridge/parsers/bbcode/scanner.rb +49 -63
  34. data/lib/markbridge/parsers/bbcode/tokens/tag_end_token.rb +1 -5
  35. data/lib/markbridge/parsers/bbcode/tokens/tag_start_token.rb +1 -6
  36. data/lib/markbridge/parsers/bbcode/tokens/text_token.rb +1 -7
  37. data/lib/markbridge/parsers/bbcode/tokens/token.rb +1 -1
  38. data/lib/markbridge/parsers/bbcode.rb +4 -0
  39. data/lib/markbridge/parsers/html/handler_registry.rb +32 -44
  40. data/lib/markbridge/parsers/html/handlers/base_handler.rb +0 -3
  41. data/lib/markbridge/parsers/html/handlers/image_handler.rb +1 -4
  42. data/lib/markbridge/parsers/html/handlers/table_cell_handler.rb +24 -0
  43. data/lib/markbridge/parsers/html/handlers/table_handler.rb +24 -0
  44. data/lib/markbridge/parsers/html/handlers/table_row_handler.rb +24 -0
  45. data/lib/markbridge/parsers/html/parser.rb +16 -15
  46. data/lib/markbridge/parsers/html.rb +3 -0
  47. data/lib/markbridge/parsers/media_wiki/inline_parser.rb +115 -151
  48. data/lib/markbridge/parsers/media_wiki/inline_tag_registry.rb +103 -0
  49. data/lib/markbridge/parsers/media_wiki/parser.rb +174 -71
  50. data/lib/markbridge/parsers/media_wiki.rb +1 -0
  51. data/lib/markbridge/parsers/text_formatter/handler_registry.rb +10 -36
  52. data/lib/markbridge/parsers/text_formatter/handlers/table_cell_handler.rb +26 -0
  53. data/lib/markbridge/parsers/text_formatter/parser.rb +3 -8
  54. data/lib/markbridge/parsers/text_formatter.rb +1 -0
  55. data/lib/markbridge/processors/discourse_markdown/code_block_tracker.rb +111 -92
  56. data/lib/markbridge/processors/discourse_markdown/detectors/base.rb +13 -7
  57. data/lib/markbridge/processors/discourse_markdown/detectors/event.rb +11 -20
  58. data/lib/markbridge/processors/discourse_markdown/detectors/poll.rb +10 -48
  59. data/lib/markbridge/processors/discourse_markdown/detectors/upload.rb +38 -63
  60. data/lib/markbridge/processors/discourse_markdown/scanner.rb +36 -41
  61. data/lib/markbridge/renderers/discourse/builders/list_item_builder.rb +6 -6
  62. data/lib/markbridge/renderers/discourse/html_escaper.rb +20 -0
  63. data/lib/markbridge/renderers/discourse/markdown_escaper.rb +262 -205
  64. data/lib/markbridge/renderers/discourse/render_context.rb +23 -11
  65. data/lib/markbridge/renderers/discourse/renderer.rb +54 -11
  66. data/lib/markbridge/renderers/discourse/rendering_interface.rb +12 -4
  67. data/lib/markbridge/renderers/discourse/tag.rb +14 -1
  68. data/lib/markbridge/renderers/discourse/tag_library.rb +30 -25
  69. data/lib/markbridge/renderers/discourse/tags/align_tag.rb +15 -7
  70. data/lib/markbridge/renderers/discourse/tags/attachment_tag.rb +1 -1
  71. data/lib/markbridge/renderers/discourse/tags/bold_tag.rb +2 -0
  72. data/lib/markbridge/renderers/discourse/tags/code_tag.rb +14 -8
  73. data/lib/markbridge/renderers/discourse/tags/email_tag.rb +5 -3
  74. data/lib/markbridge/renderers/discourse/tags/event_tag.rb +3 -3
  75. data/lib/markbridge/renderers/discourse/tags/heading_tag.rb +6 -2
  76. data/lib/markbridge/renderers/discourse/tags/horizontal_rule_tag.rb +2 -2
  77. data/lib/markbridge/renderers/discourse/tags/image_tag.rb +12 -1
  78. data/lib/markbridge/renderers/discourse/tags/italic_tag.rb +2 -0
  79. data/lib/markbridge/renderers/discourse/tags/line_break_tag.rb +2 -2
  80. data/lib/markbridge/renderers/discourse/tags/list_item_tag.rb +24 -47
  81. data/lib/markbridge/renderers/discourse/tags/list_tag.rb +10 -15
  82. data/lib/markbridge/renderers/discourse/tags/mention_tag.rb +6 -2
  83. data/lib/markbridge/renderers/discourse/tags/paragraph_tag.rb +10 -0
  84. data/lib/markbridge/renderers/discourse/tags/poll_tag.rb +9 -4
  85. data/lib/markbridge/renderers/discourse/tags/quote_tag.rb +17 -11
  86. data/lib/markbridge/renderers/discourse/tags/spoiler_tag.rb +9 -0
  87. data/lib/markbridge/renderers/discourse/tags/strikethrough_tag.rb +2 -0
  88. data/lib/markbridge/renderers/discourse/tags/table_cell_tag.rb +18 -0
  89. data/lib/markbridge/renderers/discourse/tags/table_row_tag.rb +18 -0
  90. data/lib/markbridge/renderers/discourse/tags/table_tag.rb +128 -0
  91. data/lib/markbridge/renderers/discourse/tags/underline_tag.rb +10 -3
  92. data/lib/markbridge/renderers/discourse/tags/upload_tag.rb +28 -1
  93. data/lib/markbridge/renderers/discourse/tags/url_tag.rb +5 -3
  94. data/lib/markbridge/renderers/discourse.rb +4 -0
  95. data/lib/markbridge/textformatter.rb +4 -0
  96. data/lib/markbridge/version.rb +1 -1
  97. data/lib/markbridge.rb +27 -62
  98. metadata +19 -2
@@ -11,7 +11,6 @@ module Markbridge
11
11
  end
12
12
 
13
13
  def on_open(token:, context:, registry:, tokens: nil)
14
- # Check if ordered: explicit ol/olist tag, or type=1, or option=1
15
14
  ordered =
16
15
  %w[ol olist].include?(token.tag) || token.attrs[:type] == "1" ||
17
16
  token.attrs[:option] == "1"
@@ -21,10 +20,7 @@ module Markbridge
21
20
  end
22
21
 
23
22
  def on_close(token:, context:, registry:, tokens: nil)
24
- # Auto-close open list item before closing list
25
- context.pop if context.current.is_a?(AST::ListItem)
26
-
27
- # Then use default closing behavior
23
+ context.pop if context.current.instance_of?(AST::ListItem)
28
24
  super
29
25
  end
30
26
 
@@ -11,8 +11,7 @@ module Markbridge
11
11
  end
12
12
 
13
13
  def on_open(token:, context:, registry:, tokens: nil)
14
- # Auto-close previous list item if opening a new one
15
- context.pop if context.current.is_a?(AST::ListItem)
14
+ context.pop if context.current.instance_of?(AST::ListItem)
16
15
 
17
16
  element = AST::ListItem.new
18
17
  context.push(element, token:)
@@ -17,46 +17,41 @@ module Markbridge
17
17
  end
18
18
 
19
19
  def on_open(token:, context:, registry:, tokens: nil)
20
- # Extract quote attributes
21
- author = nil
22
- post = nil
23
- topic = nil
24
- username = nil
20
+ attrs = extract_quote_attrs(token)
21
+ element = AST::Quote.new(**attrs)
22
+ context.push(element, token:)
23
+ end
25
24
 
26
- # Check for author attribute or option
27
- if token.attrs[:author]
28
- author = token.attrs[:author]
29
- elsif token.attrs[:option]
30
- # Parse Discourse-style quote: "username, post:123, topic:456"
31
- option = token.attrs[:option]
32
- if option.match?(/,\s*post:\d+/)
33
- # Discourse format with post/topic
34
- parts = option.split(",").map(&:strip)
35
- username = parts[0]
36
- parts[1..].each do |part|
37
- if part =~ /^post:(\d+)$/
38
- post = ::Regexp.last_match(1)
39
- elsif part =~ /^topic:(\d+)$/
40
- topic = ::Regexp.last_match(1)
41
- end
42
- end
43
- author = username
44
- else
45
- # Simple author attribution
46
- author = option
47
- end
48
- end
25
+ attr_reader :element_class
49
26
 
50
- # Check for explicit username, post, topic attributes (override option if present)
51
- username = token.attrs[:username] if token.attrs[:username]
52
- post = token.attrs[:post] if token.attrs[:post]
53
- topic = token.attrs[:topic] if token.attrs[:topic]
27
+ private
54
28
 
55
- element = AST::Quote.new(author:, post:, topic:, username:)
56
- context.push(element, token:)
29
+ def extract_quote_attrs(token)
30
+ author, post, topic, username = extract_from_option(token)
31
+ author ||= token.attrs[:author]
32
+
33
+ {
34
+ author:,
35
+ post: token.attrs[:post] || post,
36
+ topic: token.attrs[:topic] || topic,
37
+ username: token.attrs[:username] || username,
38
+ }
57
39
  end
58
40
 
59
- attr_reader :element_class
41
+ def extract_from_option(token)
42
+ option = token.attrs[:option]
43
+ return nil, nil, nil, nil unless option
44
+
45
+ post = option[/,\s*post:(\d+)/, 1]
46
+ return option, nil, nil, nil unless post
47
+
48
+ # Discourse format: "username, post:123, topic:456" (topic optional,
49
+ # order irrelevant between post: and topic:).
50
+ username = option.split(",").first.strip
51
+ topic = option[/,\s*topic:(\d+)/, 1]
52
+
53
+ [username, post, topic, username]
54
+ end
60
55
  end
61
56
  end
62
57
  end
@@ -15,19 +15,15 @@ module Markbridge
15
15
 
16
16
  def on_open(token:, context:, registry:, tokens:)
17
17
  result = @collector.collect(token.tag, tokens)
18
-
19
- # Track unclosed raw tags for diagnostics
20
18
  context.mark_unclosed_raw!(token.tag) if result.unclosed?
21
19
 
22
20
  element = create_element(token:, content: result.content)
23
21
  context.add_child(element)
24
22
  end
25
23
 
26
- # RawHandler doesn't push to stack, so on_close should do nothing
24
+ # The collector consumes the closing tag, so this fires only when a
25
+ # `[/raw]` token leaks past the collector — treat it as literal text.
27
26
  def on_close(token:, context:, registry:, tokens: nil)
28
- # Raw content was already consumed by collector
29
- # Closing tag was consumed by collector, so this shouldn't be called
30
- # If it is called, treat as text
31
27
  context.add_child(AST::Text.new(token.source))
32
28
  end
33
29
 
@@ -15,10 +15,10 @@ module Markbridge
15
15
  context.add_child(element)
16
16
  end
17
17
 
18
- def on_close(token:, context:, registry:, tokens: nil)
19
- # Treat unexpected closing tag as text
20
- context.add_child(AST::Text.new(token.source))
21
- end
18
+ # on_close is inherited from BaseHandler. SelfClosing elements are
19
+ # never pushed onto the stack, so the registry's closing strategy
20
+ # always falls through to adding the closing-tag source as text -
21
+ # the same result as a dedicated override.
22
22
 
23
23
  attr_reader :element_class
24
24
  end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Markbridge
4
+ module Parsers
5
+ module BBCode
6
+ module Handlers
7
+ # Handler for table cell tags (td, th)
8
+ class TableCellHandler < BaseHandler
9
+ def initialize
10
+ @element_class = AST::TableCell
11
+ end
12
+
13
+ def on_open(token:, context:, registry:, tokens: nil)
14
+ # Auto-close previous cell if still open
15
+ context.pop if context.current.instance_of?(AST::TableCell)
16
+
17
+ element = AST::TableCell.new(header: token.tag == "th")
18
+ context.push(element, token:)
19
+ end
20
+
21
+ attr_reader :element_class
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Markbridge
4
+ module Parsers
5
+ module BBCode
6
+ module Handlers
7
+ # Handler for table tags
8
+ class TableHandler < BaseHandler
9
+ def initialize
10
+ @element_class = AST::Table
11
+ end
12
+
13
+ def on_open(token:, context:, registry:, tokens: nil)
14
+ element = AST::Table.new
15
+ context.push(element, token:)
16
+ end
17
+
18
+ def on_close(token:, context:, registry:, tokens: nil)
19
+ # Auto-close open cell before closing row
20
+ context.pop if context.current.instance_of?(AST::TableCell)
21
+ # Auto-close open row before closing table
22
+ context.pop if context.current.instance_of?(AST::TableRow)
23
+
24
+ super
25
+ end
26
+
27
+ attr_reader :element_class
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Markbridge
4
+ module Parsers
5
+ module BBCode
6
+ module Handlers
7
+ # Handler for table row tags (tr)
8
+ class TableRowHandler < BaseHandler
9
+ def initialize
10
+ @element_class = AST::TableRow
11
+ end
12
+
13
+ def on_open(token:, context:, registry:, tokens: nil)
14
+ # Auto-close open cell before starting new row
15
+ context.pop if context.current.instance_of?(AST::TableCell)
16
+ # Auto-close previous row if still open
17
+ context.pop if context.current.instance_of?(AST::TableRow)
18
+
19
+ element = AST::TableRow.new
20
+ context.push(element, token:)
21
+ end
22
+
23
+ def on_close(token:, context:, registry:, tokens: nil)
24
+ # Auto-close open cell before closing row
25
+ context.pop if context.current.instance_of?(AST::TableCell)
26
+
27
+ super
28
+ end
29
+
30
+ attr_reader :element_class
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
@@ -66,8 +66,7 @@ module Markbridge
66
66
  def parse_tokens(scanner, context)
67
67
  tokens = PeekableEnumerator.new(scanner)
68
68
 
69
- while tokens.has_next?
70
- token = tokens.next
69
+ while (token = tokens.next)
71
70
  case token
72
71
  when TextToken
73
72
  process_text(token, context)
@@ -94,7 +93,7 @@ module Markbridge
94
93
  if (handler = @handlers[token.tag])
95
94
  handler.on_open(token:, context:, registry: @handlers, tokens:)
96
95
  else
97
- handle_unknown_tag(token, context)
96
+ track_unknown_tag(token)
98
97
  end
99
98
  end
100
99
 
@@ -106,15 +105,13 @@ module Markbridge
106
105
  if (handler = @handlers[token.tag])
107
106
  handler.on_close(token:, context:, registry: @handlers, tokens:)
108
107
  else
109
- handle_unknown_tag(token, context)
108
+ track_unknown_tag(token)
110
109
  end
111
110
  end
112
111
 
113
- # Handle unknown tag by tracking it and ignoring the wrapper
114
- # while still processing its children
112
+ # Track unknown tag by name; the wrapper is ignored, children pass through.
115
113
  # @param token [Token]
116
- # @param context [ParserState]
117
- def handle_unknown_tag(token, context)
114
+ def track_unknown_tag(token)
118
115
  @unknown_tags[token.tag] += 1
119
116
  end
120
117
  end
@@ -28,16 +28,13 @@ module Markbridge
28
28
  # @return [Boolean] true if pushed successfully, false if depth exceeded
29
29
  # @raise [MaxDepthExceededError] when pushing would exceed MAX_DEPTH and no token provided
30
30
  def push(element, token: nil)
31
- if @depth >= MAX_DEPTH
32
- if token
33
- # Graceful degradation: treat as text
34
- @current << AST::Text.new(token.source)
35
- @depth_exceeded_count += 1
36
- return false
37
- else
38
- # Legacy behavior: raise error
39
- raise MaxDepthExceededError, MAX_DEPTH
40
- end
31
+ if @depth == MAX_DEPTH
32
+ raise MaxDepthExceededError, MAX_DEPTH unless token
33
+
34
+ # Graceful degradation: treat as text
35
+ @current << AST::Text.new(token.source)
36
+ @depth_exceeded_count += 1
37
+ return false
41
38
  end
42
39
 
43
40
  @current << element
@@ -50,10 +47,10 @@ module Markbridge
50
47
  # Pop current element and return to parent
51
48
  # @return [AST::Element] the parent node
52
49
  def pop
53
- return @root if @node_stack.size <= 1
50
+ return @root if @node_stack.size == 1
54
51
 
55
52
  @node_stack.pop
56
- @current = @node_stack.last
53
+ @current = @node_stack.fetch(-1)
57
54
  @depth -= 1
58
55
  @current
59
56
  end
@@ -80,12 +77,9 @@ module Markbridge
80
77
  # @param limit [Integer, nil] number of elements to include from the top
81
78
  # @return [Array<AST::Node>]
82
79
  def elements_from_current(limit = nil)
83
- return [] if @node_stack.empty?
84
-
85
- limit = (@node_stack.size - 1) if limit.nil?
86
- limit = [limit, @node_stack.size - 1].min
87
-
88
- (0..limit).map { |offset| @node_stack[@node_stack.size - 1 - offset] }
80
+ max_offset = @node_stack.size - 1
81
+ limit = [limit || max_offset, max_offset].min
82
+ (0..limit).map { |offset| @node_stack.fetch(max_offset - offset) }
89
83
  end
90
84
  end
91
85
  end
@@ -10,7 +10,7 @@ module Markbridge
10
10
  # `next_token`) so callers can:
11
11
  # - inspect the next token with {#peek} without advancing the scanner
12
12
  # - inspect several upcoming tokens with {#peek_ahead}
13
- # - consume tokens with {#next}
13
+ # - consume tokens with {#next} (returns `nil` when exhausted)
14
14
  #
15
15
  # The enumerator is lazy: tokens are only requested from the scanner
16
16
  # when needed. Once the underlying scanner returns `nil`, the enumerator
@@ -32,71 +32,29 @@ module Markbridge
32
32
  def initialize(scanner)
33
33
  @scanner = scanner
34
34
  @peeked = []
35
- @finished = false
36
35
  end
37
36
 
38
37
  # Consume and return the next token.
39
- #
40
- # If there are tokens in the internal buffer (from prior peeks) the
41
- # buffered token is returned. Otherwise, the next token is requested
42
- # from the underlying scanner via `next_token`.
43
- #
44
38
  # @return [Object, nil] next token or `nil` when exhausted
45
39
  def next
46
- return @peeked.shift if @peeked.any?
47
- return nil if @finished
48
-
49
- value = @scanner.next_token
50
- @finished = true if value.nil?
51
- value
52
- end
53
-
54
- # Return whether more tokens are available.
55
- #
56
- # This will attempt to fetch one token from the scanner if necessary
57
- # to determine whether more tokens remain.
58
- #
59
- # @return [Boolean] `true` if at least one token is available
60
- def has_next?
61
- return true if @peeked.any?
62
- return false if @finished
63
-
64
- value = @scanner.next_token
65
- if value.nil?
66
- @finished = true
67
- false
68
- else
69
- @peeked << value
70
- true
71
- end
40
+ ensure_peeked(1)
41
+ @peeked.shift
72
42
  end
73
43
 
74
44
  # Peek at the next single token without consuming it.
75
- #
76
- # If the enumerator has been exhausted this returns `nil`.
77
- #
78
45
  # @return [Object, nil] the next token or `nil` when exhausted
79
46
  def peek
80
- return @peeked.first if @peeked.any?
81
- return nil if @finished
82
-
83
47
  ensure_peeked(1)
84
48
  @peeked.first
85
49
  end
86
50
 
87
51
  # Peek ahead at up to `count` upcoming tokens without consuming them.
88
- #
89
- # The method will return an array with at most `count` elements.
90
- # If fewer tokens remain, a shorter array is returned. When the
91
- # enumerator is exhausted an empty array is returned.
92
- #
93
- # @param count [Integer] number of tokens to peek ahead (non\-negative)
52
+ # @param count [Integer] number of tokens to peek ahead (clamped to 0..)
94
53
  # @return [Array<Object>] array of upcoming tokens (possibly empty)
95
54
  def peek_ahead(count)
96
- return [] if count <= 0
97
-
55
+ count = [count, 0].max
98
56
  ensure_peeked(count)
99
- @peeked.take(count)
57
+ @peeked.first(count)
100
58
  end
101
59
 
102
60
  alias next_token next
@@ -104,19 +62,11 @@ module Markbridge
104
62
  private
105
63
 
106
64
  # Ensure at least `count` items are present in the peek buffer.
107
- #
108
- # This will repeatedly call `next_token` on the scanner until the
109
- # buffer contains `count` items or the scanner returns `nil`.
110
- #
111
- # @param count [Integer] desired buffer size
112
- # @return [void]
113
65
  def ensure_peeked(count)
114
- while !@finished && @peeked.size < count
66
+ while @peeked.size < count
115
67
  value = @scanner.next_token
116
- if value.nil?
117
- @finished = true
118
- break
119
- end
68
+ break if value.nil?
69
+
120
70
  @peeked << value
121
71
  end
122
72
  end
@@ -15,9 +15,9 @@ module Markbridge
15
15
  closed = false
16
16
 
17
17
  while (token = scanner.next_token)
18
- if token.is_a?(TagStartToken) && token.tag == tag_name
18
+ if token.instance_of?(TagStartToken) && token.tag == tag_name
19
19
  depth += 1
20
- elsif token.is_a?(TagEndToken) && token.tag == tag_name
20
+ elsif token.instance_of?(TagEndToken) && token.tag == tag_name
21
21
  if (depth -= 1) == 0
22
22
  closed = true
23
23
  break
@@ -20,16 +20,12 @@ module Markbridge
20
20
  if bracket_index.nil?
21
21
  text = @input[@current_pos..]
22
22
  @current_pos = @length
23
- return TextToken.new(text:, pos: start_pos)
24
- end
25
-
26
- if bracket_index > @current_pos
23
+ TextToken.new(text:, pos: start_pos)
24
+ elsif bracket_index > @current_pos
27
25
  text = @input[@current_pos...bracket_index]
28
26
  @current_pos = bracket_index
29
- return TextToken.new(text:, pos: start_pos)
30
- end
31
-
32
- if (tag_token = parse_tag_at_cursor)
27
+ TextToken.new(text:, pos: start_pos)
28
+ elsif (tag_token = parse_tag_at_cursor)
33
29
  tag_token
34
30
  else
35
31
  @current_pos += 1
@@ -53,35 +49,25 @@ module Markbridge
53
49
  :WHITESPACE_CHAR,
54
50
  :UNQUOTED_VALUE_STOP
55
51
 
52
+ # @return [Token, nil] tag token or nil if not a valid tag (caller rolls back)
53
+ # Precondition: caller has verified current_char == "[".
56
54
  def parse_tag_at_cursor
57
- return nil if current_char != "["
58
-
59
55
  tag_start_pos = @current_pos
60
56
  @current_pos += 1 # skip '['
61
-
62
- # Check for closing tag
63
- closing = current_char == "/"
64
- @current_pos += 1 if closing
65
-
66
- # Parse tag name
57
+ closing = consume("/")
67
58
  tag_name = scan_tag_name
68
- return rollback(tag_start_pos) unless tag_name
69
-
70
- # Parse attributes (only for opening tags)
71
- attrs = closing ? {} : scan_attributes
72
- return rollback(tag_start_pos) if current_char != "]"
59
+ attrs = (closing || tag_name.nil?) ? {} : scan_attributes
60
+ return rollback(tag_start_pos) unless tag_name && consume("]")
73
61
 
74
- @current_pos += 1 # skip ']'
75
-
76
- # Capture original source text
77
62
  source = @input[tag_start_pos...@current_pos]
63
+ build_token(closing:, tag: tag_name.downcase, attrs:, pos: tag_start_pos, source:)
64
+ end
78
65
 
79
- normalized_tag_name = tag_name.downcase
80
-
66
+ def build_token(closing:, tag:, attrs:, pos:, source:)
81
67
  if closing
82
- TagEndToken.new(tag: normalized_tag_name, pos: tag_start_pos, source:)
68
+ TagEndToken.new(tag:, pos:, source:)
83
69
  else
84
- TagStartToken.new(tag: normalized_tag_name, attrs:, pos: tag_start_pos, source:)
70
+ TagStartToken.new(tag:, attrs:, pos:, source:)
85
71
  end
86
72
  end
87
73
 
@@ -90,19 +76,21 @@ module Markbridge
90
76
  nil
91
77
  end
92
78
 
93
- # Scan a tag name: [a-z*.][a-z0-9]*(:uid)?
79
+ # Scan a tag name: [a-z*][a-z0-9]*(:hex*)?
80
+ #
81
+ # Char-by-char rather than a single regex over `@input[pos..]`
82
+ # because the regex form allocates a substring for every tag,
83
+ # which is a dominant cost on tag-heavy input. The char-based
84
+ # loop is ~3x faster under YJIT.
94
85
  # @return [String, nil]
95
86
  def scan_tag_name
96
87
  start = @current_pos
97
88
 
98
- # First character: letter, *, or .
99
89
  return nil unless current_char&.match?(TAG_INITIAL_CHAR)
100
90
  @current_pos += 1
101
91
 
102
- # Remaining characters: letters or digits
103
92
  @current_pos += 1 while current_char&.match?(TAG_NAME_CHAR)
104
93
 
105
- # Optional :uid suffix (e.g., [quote:abc123])
106
94
  if current_char == ":"
107
95
  @current_pos += 1
108
96
  @current_pos += 1 while current_char&.match?(UID_HEX_CHAR)
@@ -119,7 +107,6 @@ module Markbridge
119
107
  attrs = {}
120
108
  skip_whitespace
121
109
 
122
- # First attribute might be option: [tag=value]
123
110
  if current_char == "="
124
111
  @current_pos += 1
125
112
  skip_whitespace
@@ -129,17 +116,11 @@ module Markbridge
129
116
  skip_whitespace
130
117
  end
131
118
 
132
- # Named attributes: [tag key=value key=value ...]
133
- while (char = current_char) && char != "]"
134
- name = scan_while(ATTR_NAME_CHAR)
135
- break if name.nil?
136
-
119
+ while (name = scan_while(ATTR_NAME_CHAR))
137
120
  skip_whitespace
138
- break if current_char != "="
121
+ break unless consume("=")
139
122
 
140
- @current_pos += 1
141
123
  skip_whitespace
142
-
143
124
  value = scan_attribute_value
144
125
  attrs[name.downcase.to_sym] = value if value
145
126
  skip_whitespace
@@ -148,6 +129,13 @@ module Markbridge
148
129
  attrs
149
130
  end
150
131
 
132
+ def consume(char)
133
+ return false if current_char != char
134
+
135
+ @current_pos += 1
136
+ true
137
+ end
138
+
151
139
  def scan_attribute_value
152
140
  char = current_char
153
141
  if char == '"' || char == "'"
@@ -171,22 +159,15 @@ module Markbridge
171
159
  # Workaround: Use single quotes if you need double quotes in the value:
172
160
  # [url='has "quotes" inside'] → option: "has \"quotes\" inside" ✓
173
161
  #
174
- # @return [String] the unescaped attribute value
162
+ # @return [String, nil] the unescaped attribute value, or nil if unterminated
175
163
  def scan_quoted_string
176
164
  quote_char = current_char
177
165
  start = (@current_pos += 1) # skip opening quote
178
-
179
166
  closing_index = @input.index(quote_char, start)
167
+ return nil unless closing_index
180
168
 
181
- if closing_index
182
- value = @input[start...closing_index]
183
- @current_pos = closing_index + 1 # position after closing quote
184
- else
185
- value = @input[start..] || ""
186
- @current_pos = @length
187
- end
188
-
189
- value
169
+ @current_pos = closing_index + 1
170
+ @input[start...closing_index]
190
171
  end
191
172
 
192
173
  def scan_unquoted_value
@@ -195,18 +176,18 @@ module Markbridge
195
176
 
196
177
  # Consumes characters matching +pattern+; returns substring or nil if empty
197
178
  def scan_while(pattern)
198
- start = @current_pos
199
- while (char = current_char) && char.match?(pattern)
200
- @current_pos += 1
201
- end
202
-
203
- return nil if @current_pos == start
204
- @input[start...@current_pos]
179
+ stop_index = @current_pos
180
+ stop_index += 1 while stop_index < @length && @input[stop_index].match?(pattern)
181
+ consume_range(stop_index)
205
182
  end
206
183
 
207
- # Consumes characters until +pattern+ matches; returns substring or nil if empty
184
+ # Consumes characters until +pattern+ matches (or end of input); returns substring or nil if empty
208
185
  def scan_until(pattern)
209
- stop_index = @input.index(pattern, @current_pos) || @length
186
+ consume_range(@input.index(pattern, @current_pos) || @length)
187
+ end
188
+
189
+ # Slice [@current_pos, stop_index), advance the cursor, or return nil for empty.
190
+ def consume_range(stop_index)
210
191
  return nil if stop_index == @current_pos
211
192
 
212
193
  value = @input[@current_pos...stop_index]
@@ -219,11 +200,16 @@ module Markbridge
219
200
  end
220
201
 
221
202
  def skip_whitespace
222
- @current_pos += 1 while current_char&.match?(WHITESPACE_CHAR)
203
+ @current_pos += 1 while @current_pos < @length &&
204
+ @input[@current_pos].match?(WHITESPACE_CHAR)
223
205
  end
224
206
 
225
207
  def end_of_input?
226
- @current_pos >= @length
208
+ # All callers maintain @current_pos <= @length (scan_while
209
+ # bounds on @length; scan_until uses `index || @length`;
210
+ # consume is a no-op at EOF); `==` and `>=` are observably
211
+ # identical here.
212
+ @current_pos == @length
227
213
  end
228
214
  end
229
215
  end