bbortcodes 0.1.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +13 -0
- data/README.md +15 -10
- data/lib/bbortcodes/grammar.rb +24 -7
- data/lib/bbortcodes/parser.rb +7 -17
- data/lib/bbortcodes/transform.rb +5 -0
- data/lib/bbortcodes/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 00accc38cffaf4a382f6544309295b9e911676625312aa0ccf2cf42aefab345e
|
|
4
|
+
data.tar.gz: 832efe6cdda3787a15c7dca1f3e33aa28eafa026f6c9829162dc536947e275e2
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 267033c66da2ce686488602f537c1a343cff7364d629c26ac89832c4b2d63f9eebba54dfed83b8b418a0073c354b5ab102b6d702c7edc3b99dc1623402fbcc65
|
|
7
|
+
data.tar.gz: f903ec7d1d617cae3fac14d98fc1e956bf2c3b41b3d4ca6390de24ec5170502e7ce2d8bfea982252914189294a03e38136c37a51f5f8d8275109ead048f3be9a
|
data/CHANGELOG.md
CHANGED
|
@@ -7,6 +7,19 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
7
7
|
|
|
8
8
|
## [Unreleased]
|
|
9
9
|
|
|
10
|
+
## [1.0.0] - 2025-02-06
|
|
11
|
+
|
|
12
|
+
### Changed
|
|
13
|
+
- **BREAKING**: Grammar now only matches registered shortcode names
|
|
14
|
+
- Unregistered bracket patterns (e.g., `[unknown]`, markdown links `[text](url)`) pass through as plain text
|
|
15
|
+
- Previously, unregistered shortcodes would trigger `on_parse_error` handling
|
|
16
|
+
- **BREAKING**: `on_parse_error` configuration now only applies to grammar-level parse failures
|
|
17
|
+
- Unknown shortcode names no longer trigger this setting since they're treated as plain text
|
|
18
|
+
- Removed `handle_unknown_shortcode` method from Parser (now dead code)
|
|
19
|
+
|
|
20
|
+
### Fixed
|
|
21
|
+
- Empty attributes (e.g., `caption=""`) now parse correctly instead of returning partial Parslet objects
|
|
22
|
+
|
|
10
23
|
## [0.1.0] - 2025-10-15
|
|
11
24
|
|
|
12
25
|
### Added
|
data/README.md
CHANGED
|
@@ -1,10 +1,12 @@
|
|
|
1
|
+
[](https://badge.fury.io/rb/bbortcodes)
|
|
2
|
+
|
|
1
3
|
# BBortcodes
|
|
2
4
|
|
|
3
5
|
A state-of-the-art Ruby gem for parsing WordPress-like shortcodes with grammar-based parsing, type safety, and support for nested shortcodes.
|
|
4
6
|
|
|
5
7
|
## Features
|
|
6
8
|
|
|
7
|
-
- **Grammar-based parsing** using [Parslet](https://github.com/kschiess/parslet) -
|
|
9
|
+
- **Grammar-based parsing** using [Parslet](https://github.com/kschiess/parslet) - only registered shortcodes are parsed, everything else passes through as plain text
|
|
8
10
|
- **Type safety** with [Literal](https://github.com/joeldrapper/literal) for runtime type checking
|
|
9
11
|
- **Nested shortcodes** with configurable child validation
|
|
10
12
|
- **Self-closing and paired shortcodes** support
|
|
@@ -432,8 +434,9 @@ BBortcodes.configure do |config|
|
|
|
432
434
|
# Error handling
|
|
433
435
|
# ---------------
|
|
434
436
|
|
|
435
|
-
# How to handle parse
|
|
436
|
-
# Options: :raise, :skip (
|
|
437
|
+
# How to handle grammar-level parse failures
|
|
438
|
+
# Options: :raise, :skip (return original text), :strip (return empty)
|
|
439
|
+
# Note: Unregistered shortcodes like [unknown] are automatically plain text
|
|
437
440
|
config.on_parse_error = :raise # default
|
|
438
441
|
|
|
439
442
|
# How to handle disallowed nested shortcodes
|
|
@@ -463,20 +466,21 @@ BBortcodes.configure do |config|
|
|
|
463
466
|
end
|
|
464
467
|
```
|
|
465
468
|
|
|
466
|
-
####
|
|
469
|
+
#### Unregistered Shortcodes as Plain Text
|
|
467
470
|
|
|
468
|
-
|
|
469
|
-
BBortcodes.configure do |config|
|
|
470
|
-
config.on_parse_error = :skip
|
|
471
|
-
end
|
|
471
|
+
Bracket patterns that don't match registered shortcode names are automatically treated as plain text:
|
|
472
472
|
|
|
473
|
-
|
|
473
|
+
```ruby
|
|
474
|
+
text = "Text with [unknown]shortcode[/unknown] and [link](url)"
|
|
474
475
|
output, _ = BBortcodes.parse(text)
|
|
475
476
|
|
|
476
477
|
puts output
|
|
477
|
-
# => "Text with [unknown]shortcode[/unknown]"
|
|
478
|
+
# => "Text with [unknown]shortcode[/unknown] and [link](url)"
|
|
479
|
+
# Both patterns pass through unchanged since they're not registered shortcodes
|
|
478
480
|
```
|
|
479
481
|
|
|
482
|
+
This means markdown links, unregistered shortcode names, and other bracket patterns won't cause parse errors—they simply remain as plain text in the output.
|
|
483
|
+
|
|
480
484
|
#### Example: Strip Disallowed Children
|
|
481
485
|
|
|
482
486
|
```ruby
|
|
@@ -582,6 +586,7 @@ BBortcodes uses [Parslet](https://github.com/kschiess/parslet), a PEG (Parsing E
|
|
|
582
586
|
- **Better error messages** when syntax is invalid
|
|
583
587
|
- **Composable grammar rules** for maintainability
|
|
584
588
|
- **Unambiguous parsing** of edge cases
|
|
589
|
+
- **Dynamic tag matching** - the grammar only matches registered shortcode names, treating everything else (markdown links, unregistered patterns) as plain text
|
|
585
590
|
|
|
586
591
|
### Type Safety
|
|
587
592
|
|
data/lib/bbortcodes/grammar.rb
CHANGED
|
@@ -4,6 +4,11 @@ require "parslet"
|
|
|
4
4
|
|
|
5
5
|
module BBortcodes
|
|
6
6
|
class Grammar < Parslet::Parser
|
|
7
|
+
def initialize(tag_names = [])
|
|
8
|
+
super()
|
|
9
|
+
@tag_names = tag_names.sort_by(&:length).reverse # Match longest first
|
|
10
|
+
end
|
|
11
|
+
|
|
7
12
|
# Whitespace
|
|
8
13
|
rule(:space) { match('\s').repeat(1) }
|
|
9
14
|
rule(:space?) { space.maybe }
|
|
@@ -28,13 +33,19 @@ module BBortcodes
|
|
|
28
33
|
(space >> attribute).repeat(1)
|
|
29
34
|
end
|
|
30
35
|
|
|
31
|
-
#
|
|
32
|
-
rule(:
|
|
36
|
+
# Dynamic rule matching only registered tag names
|
|
37
|
+
rule(:valid_shortcode_name) do
|
|
38
|
+
if @tag_names.empty?
|
|
39
|
+
str("").absent? # Never matches when no tags registered
|
|
40
|
+
else
|
|
41
|
+
@tag_names.map { |name| str(name) }.reduce(:|)
|
|
42
|
+
end
|
|
43
|
+
end
|
|
33
44
|
|
|
34
45
|
# Self-closing shortcode: [name] or [name attr="value"]
|
|
35
46
|
rule(:self_closing_shortcode) do
|
|
36
47
|
str("[") >>
|
|
37
|
-
|
|
48
|
+
valid_shortcode_name.as(:name) >>
|
|
38
49
|
attributes.as(:attributes).maybe >>
|
|
39
50
|
space? >>
|
|
40
51
|
str("]")
|
|
@@ -43,7 +54,7 @@ module BBortcodes
|
|
|
43
54
|
# Opening tag: [name] or [name attr="value"]
|
|
44
55
|
rule(:opening_tag) do
|
|
45
56
|
str("[") >>
|
|
46
|
-
|
|
57
|
+
valid_shortcode_name.as(:tag_name) >>
|
|
47
58
|
attributes.as(:attributes).maybe >>
|
|
48
59
|
space? >>
|
|
49
60
|
str("]")
|
|
@@ -52,7 +63,7 @@ module BBortcodes
|
|
|
52
63
|
# Closing tag: [/name]
|
|
53
64
|
rule(:closing_tag) do
|
|
54
65
|
str("[/") >>
|
|
55
|
-
|
|
66
|
+
valid_shortcode_name.as(:tag_name) >>
|
|
56
67
|
space? >>
|
|
57
68
|
str("]")
|
|
58
69
|
end
|
|
@@ -61,8 +72,14 @@ module BBortcodes
|
|
|
61
72
|
rule(:content_char) { str("[").absent? >> any }
|
|
62
73
|
rule(:plain_text) { content_char.repeat(1).as(:text) }
|
|
63
74
|
|
|
75
|
+
# Fallback: lone bracket that doesn't start a valid shortcode or closing tag
|
|
76
|
+
rule(:lone_bracket) do
|
|
77
|
+
str("[") >> (str("/") >> valid_shortcode_name).absent?
|
|
78
|
+
end
|
|
79
|
+
rule(:lone_bracket_text) { lone_bracket.as(:text) }
|
|
80
|
+
|
|
64
81
|
# Forward declaration for recursive content
|
|
65
|
-
rule(:content_element) { paired_shortcode | self_closing_shortcode | plain_text }
|
|
82
|
+
rule(:content_element) { paired_shortcode | self_closing_shortcode | lone_bracket_text | plain_text }
|
|
66
83
|
rule(:content) { content_element.repeat.as(:content) }
|
|
67
84
|
|
|
68
85
|
# Paired shortcode with content: [name]content[/name]
|
|
@@ -73,7 +90,7 @@ module BBortcodes
|
|
|
73
90
|
end
|
|
74
91
|
|
|
75
92
|
# Document is a sequence of text and shortcodes
|
|
76
|
-
rule(:document_element) { paired_shortcode | self_closing_shortcode | plain_text }
|
|
93
|
+
rule(:document_element) { paired_shortcode | self_closing_shortcode | lone_bracket_text | plain_text }
|
|
77
94
|
rule(:document) { document_element.repeat.as(:document) }
|
|
78
95
|
|
|
79
96
|
root(:document)
|
data/lib/bbortcodes/parser.rb
CHANGED
|
@@ -9,7 +9,6 @@ module BBortcodes
|
|
|
9
9
|
def initialize(registry: nil, config: nil)
|
|
10
10
|
@registry = registry || BBortcodes.registry
|
|
11
11
|
@config = config || BBortcodes.config
|
|
12
|
-
@grammar = Grammar.new
|
|
13
12
|
@transform = Transform.new
|
|
14
13
|
end
|
|
15
14
|
|
|
@@ -28,9 +27,13 @@ module BBortcodes
|
|
|
28
27
|
context = prepare_context(context)
|
|
29
28
|
|
|
30
29
|
# Parse the text into a tree structure with timeout protection
|
|
30
|
+
# Create grammar with current registered tag names
|
|
31
|
+
tag_names = @registry.tag_names
|
|
32
|
+
grammar = Grammar.new(tag_names)
|
|
33
|
+
|
|
31
34
|
begin
|
|
32
35
|
parsed = Timeout.timeout(config.parse_timeout) do
|
|
33
|
-
|
|
36
|
+
grammar.parse(text)
|
|
34
37
|
end
|
|
35
38
|
rescue Timeout::Error
|
|
36
39
|
raise ParseError, "Parsing timeout exceeded (#{config.parse_timeout}s) - input may be too complex"
|
|
@@ -113,11 +116,9 @@ module BBortcodes
|
|
|
113
116
|
return reconstruct_shortcode_text(node)
|
|
114
117
|
end
|
|
115
118
|
|
|
116
|
-
# Find the shortcode class
|
|
119
|
+
# Find the shortcode class (should always succeed since grammar only matches registered tags)
|
|
117
120
|
shortcode_class = registry.find(tag_name)
|
|
118
|
-
unless shortcode_class
|
|
119
|
-
return handle_unknown_shortcode(node)
|
|
120
|
-
end
|
|
121
|
+
raise ParseError, "Unknown shortcode: #{tag_name}" unless shortcode_class
|
|
121
122
|
|
|
122
123
|
# Validate against parent's allowed children
|
|
123
124
|
if parent_shortcode && !parent_shortcode.class.allows_child?(shortcode_class)
|
|
@@ -166,17 +167,6 @@ module BBortcodes
|
|
|
166
167
|
end
|
|
167
168
|
end
|
|
168
169
|
|
|
169
|
-
def handle_unknown_shortcode(node)
|
|
170
|
-
case config.on_parse_error
|
|
171
|
-
when :raise
|
|
172
|
-
raise ParseError, "Unknown shortcode: #{node[:name]}"
|
|
173
|
-
when :skip
|
|
174
|
-
reconstruct_shortcode_text(node)
|
|
175
|
-
when :strip
|
|
176
|
-
""
|
|
177
|
-
end
|
|
178
|
-
end
|
|
179
|
-
|
|
180
170
|
def handle_disallowed_child(node, parent)
|
|
181
171
|
case config.on_disallowed_child
|
|
182
172
|
when :raise
|
data/lib/bbortcodes/transform.rb
CHANGED
|
@@ -12,6 +12,11 @@ module BBortcodes
|
|
|
12
12
|
{name.to_s => value.to_s}
|
|
13
13
|
end
|
|
14
14
|
|
|
15
|
+
# Transform attribute with empty or multi-char string value
|
|
16
|
+
rule(name: simple(:name), value: {string: sequence(:chars)}) do
|
|
17
|
+
{name.to_s => chars.map(&:to_s).join}
|
|
18
|
+
end
|
|
19
|
+
|
|
15
20
|
# Transform self-closing shortcode
|
|
16
21
|
rule(name: simple(:name), attributes: subtree(:attrs)) do
|
|
17
22
|
attributes = Array(attrs).reduce({}, :merge)
|
data/lib/bbortcodes/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: bbortcodes
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version:
|
|
4
|
+
version: 1.0.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Stefan Exner
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date:
|
|
11
|
+
date: 2026-02-06 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: anyway_config
|