js_regex_to_ruby 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE.txt +21 -0
- data/README.md +222 -0
- data/Rakefile +12 -0
- data/lib/js_regex_to_ruby/converter.rb +383 -0
- data/lib/js_regex_to_ruby/result.rb +45 -0
- data/lib/js_regex_to_ruby/version.rb +5 -0
- data/lib/js_regex_to_ruby.rb +38 -0
- data/sig/js_regex_to_ruby.rbs +64 -0
- metadata +56 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: fdbe70d784541cad9e2009b2306416e72349cfc5e9136cadb998faf811d5ad91
|
|
4
|
+
data.tar.gz: fee286185020e60bf68b8c36736993ed75f8056fae28a882d977498a35445b44
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: b9290e4565d2635db41c94ffbf865dbe3a085d943c413e5f5c635c74b67584ff73e09e0fe230674f662146cf170e8977e474d9aa0963b47aa84da4a3012a18ce
|
|
7
|
+
data.tar.gz: d12f32df94de6834ce18c083e0b0ba9eaadacdf1ed4c56a6e39c236e4308e1337710eb33a1ebe5f34474e52ba75db90af5654c6eadea37ecee8c2e8f90f54cd0
|
data/LICENSE.txt
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
The MIT License (MIT)
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 jasl
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
|
13
|
+
all copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
# JsRegexToRuby
|
|
2
|
+
|
|
3
|
+
A Ruby gem that converts ECMAScript (JavaScript) regular expressions to Ruby `Regexp` objects, preserving behavior as closely as Ruby's regex engine allows.
|
|
4
|
+
|
|
5
|
+
## Why This Gem?
|
|
6
|
+
|
|
7
|
+
JavaScript and Ruby regular expressions have subtle but important differences:
|
|
8
|
+
|
|
9
|
+
| Feature | JavaScript | Ruby |
|
|
10
|
+
|---------|-----------|------|
|
|
11
|
+
| `^` and `$` anchors | Match start/end of **string** by default | Match start/end of **line** by default |
|
|
12
|
+
| `/s` flag (dotAll) | Makes `.` match newlines | N/A (use `/m` in Ruby) |
|
|
13
|
+
| `/m` flag (multiline) | Makes `^`/`$` match line boundaries | N/A (already default behavior) |
|
|
14
|
+
| `[^]` (any character) | Matches any char including `\n` | Invalid syntax (use `[\s\S]`) |
|
|
15
|
+
| `/g`, `/y`, `/d`, `/u`, `/v` flags | Various features | No direct equivalents |
|
|
16
|
+
|
|
17
|
+
This gem handles these conversions automatically, emitting warnings when perfect conversion isn't possible.
|
|
18
|
+
|
|
19
|
+
## Installation
|
|
20
|
+
|
|
21
|
+
Add this line to your application's Gemfile:
|
|
22
|
+
|
|
23
|
+
```ruby
|
|
24
|
+
gem "js_regex_to_ruby"
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
And then execute:
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
bundle install
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
Or install it yourself as:
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
gem install js_regex_to_ruby
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
## Usage
|
|
40
|
+
|
|
41
|
+
### Basic Conversion
|
|
42
|
+
|
|
43
|
+
```ruby
|
|
44
|
+
require "js_regex_to_ruby"
|
|
45
|
+
|
|
46
|
+
# From a JS regex literal
|
|
47
|
+
result = JsRegexToRuby.convert("/^foo$/i")
|
|
48
|
+
result.regexp #=> /\Afoo\z/i
|
|
49
|
+
result.success? #=> true
|
|
50
|
+
|
|
51
|
+
# From pattern + flags separately
|
|
52
|
+
result = JsRegexToRuby.convert("^hello$", flags: "im")
|
|
53
|
+
result.regexp #=> /^hello$/i
|
|
54
|
+
result.ruby_source #=> "^hello$"
|
|
55
|
+
result.ruby_options #=> 1 (Regexp::IGNORECASE)
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
### Quick Conversion with `try_convert`
|
|
59
|
+
|
|
60
|
+
If you just need the `Regexp` and want `nil` on failure (no exceptions):
|
|
61
|
+
|
|
62
|
+
```ruby
|
|
63
|
+
# Returns Regexp on success
|
|
64
|
+
JsRegexToRuby.try_convert("/^foo$/i") #=> /\Afoo\z/i
|
|
65
|
+
|
|
66
|
+
# Returns nil on invalid input (never raises)
|
|
67
|
+
JsRegexToRuby.try_convert("/unterminated") #=> nil
|
|
68
|
+
JsRegexToRuby.try_convert("/(?invalid/") #=> nil
|
|
69
|
+
JsRegexToRuby.try_convert(nil) #=> nil
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
### Parsing JS Literal Without Converting
|
|
73
|
+
|
|
74
|
+
```ruby
|
|
75
|
+
pattern, flags = JsRegexToRuby.parse_literal('/foo\\/bar/gi')
|
|
76
|
+
pattern #=> "foo\\/bar"
|
|
77
|
+
flags #=> "gi"
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
### Handling Warnings
|
|
81
|
+
|
|
82
|
+
```ruby
|
|
83
|
+
result = JsRegexToRuby.convert("/test/guy")
|
|
84
|
+
|
|
85
|
+
result.warnings
|
|
86
|
+
#=> ["JS flag(s) not representable as Ruby Regexp options: g, u, y"]
|
|
87
|
+
|
|
88
|
+
result.ignored_js_flags
|
|
89
|
+
#=> ["g", "u", "y"]
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
### Result Object
|
|
93
|
+
|
|
94
|
+
The `Result` struct provides comprehensive information:
|
|
95
|
+
|
|
96
|
+
| Method | Description |
|
|
97
|
+
|--------|-------------|
|
|
98
|
+
| `regexp` | The compiled `Regexp` object (or `nil` if compilation failed) |
|
|
99
|
+
| `success?` | Returns `true` if `regexp` is not `nil` |
|
|
100
|
+
| `ruby_source` | The converted Ruby regex pattern string |
|
|
101
|
+
| `ruby_options` | Integer flags (`Regexp::IGNORECASE`, `Regexp::MULTILINE`, etc.) |
|
|
102
|
+
| `ruby_flags_string` | Human-readable flags string (e.g., `"im"`) |
|
|
103
|
+
| `ruby_literal` | Best-effort Ruby literal representation (e.g., `/pattern/im`) |
|
|
104
|
+
| `warnings` | Array of warning messages about the conversion |
|
|
105
|
+
| `ignored_js_flags` | Array of JS flags with no Ruby equivalent |
|
|
106
|
+
| `js_source` | Original JS pattern |
|
|
107
|
+
| `js_flags` | Original JS flags string |
|
|
108
|
+
|
|
109
|
+
### Without Compilation
|
|
110
|
+
|
|
111
|
+
If you only need the converted source without compiling:
|
|
112
|
+
|
|
113
|
+
```ruby
|
|
114
|
+
result = JsRegexToRuby.convert("/^test$/", compile: false)
|
|
115
|
+
result.ruby_source #=> "\\Atest\\z"
|
|
116
|
+
result.regexp #=> nil
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
## Conversion Details
|
|
120
|
+
|
|
121
|
+
### Flag Mapping
|
|
122
|
+
|
|
123
|
+
| JS Flag | Ruby Equivalent | Notes |
|
|
124
|
+
|---------|-----------------|-------|
|
|
125
|
+
| `i` | `Regexp::IGNORECASE` | Case-insensitive matching |
|
|
126
|
+
| `s` | `Regexp::MULTILINE` | JS dotAll → Ruby multiline (`.` matches `\n`) |
|
|
127
|
+
| `m` | *(behavior change)* | Keeps `^`/`$` as-is instead of converting to `\A`/`\z` |
|
|
128
|
+
| `g` | *(ignored)* | Global matching - handle in application code |
|
|
129
|
+
| `y` | *(ignored)* | Sticky matching - no equivalent |
|
|
130
|
+
| `u` | *(ignored)* | Unicode mode - Ruby handles Unicode differently |
|
|
131
|
+
| `v` | *(ignored)* | Unicode sets mode - no equivalent |
|
|
132
|
+
| `d` | *(ignored)* | Indices for matches - no equivalent |
|
|
133
|
+
|
|
134
|
+
### Anchor Conversion
|
|
135
|
+
|
|
136
|
+
By default (without JS `m` flag):
|
|
137
|
+
- `^` → `\A` (start of string)
|
|
138
|
+
- `$` → `\z` (end of string)
|
|
139
|
+
|
|
140
|
+
With JS `m` flag:
|
|
141
|
+
- `^` and `$` are preserved (matching line boundaries, which is Ruby's default behavior)
|
|
142
|
+
|
|
143
|
+
### Inline Modifiers
|
|
144
|
+
|
|
145
|
+
JavaScript's inline modifier groups are converted:
|
|
146
|
+
|
|
147
|
+
```ruby
|
|
148
|
+
# JS: (?s:a.c) - dotAll only inside group
|
|
149
|
+
result = JsRegexToRuby.convert("(?s:a.c)")
|
|
150
|
+
result.ruby_source #=> "(?m:a.c)"
|
|
151
|
+
|
|
152
|
+
# JS: (?m:^foo$) - multiline anchors inside group
|
|
153
|
+
result = JsRegexToRuby.convert("(?m:^foo$)bar$")
|
|
154
|
+
result.ruby_source #=> "(?:^foo$)bar\\z"
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
### Control Character Escapes
|
|
158
|
+
|
|
159
|
+
JavaScript's `\cX` control escapes are converted to the actual control character:
|
|
160
|
+
|
|
161
|
+
```ruby
|
|
162
|
+
result = JsRegexToRuby.convert('\\cA')
|
|
163
|
+
result.ruby_source.bytes.first #=> 1 (Ctrl+A)
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
### Any Character Class `[^]`
|
|
167
|
+
|
|
168
|
+
JavaScript's `[^]` matches any character including newlines (equivalent to `[\s\S]`). This is invalid syntax in Ruby, so it's automatically converted:
|
|
169
|
+
|
|
170
|
+
```ruby
|
|
171
|
+
result = JsRegexToRuby.convert("/a[^]b/")
|
|
172
|
+
result.ruby_source #=> "a[\\s\\S]b"
|
|
173
|
+
|
|
174
|
+
# Matches any character including newline
|
|
175
|
+
result.regexp.match?("a\nb") #=> true
|
|
176
|
+
result.regexp.match?("axb") #=> true
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
Note: Negated character classes like `[^abc]` are NOT affected and work as expected.
|
|
180
|
+
|
|
181
|
+
## Limitations
|
|
182
|
+
|
|
183
|
+
1. **No runtime flags**: JS flags like `g` (global) and `y` (sticky) affect matching behavior at runtime and have no Ruby `Regexp` equivalent. Handle these in your application logic.
|
|
184
|
+
|
|
185
|
+
2. **Unicode properties**: `\p{...}` syntax exists in both JS and Ruby but with different property names and semantics. No automatic conversion is performed.
|
|
186
|
+
|
|
187
|
+
3. **Named capture groups**: Both languages support named groups with identical syntax (`(?<name>...)`), so no conversion is needed.
|
|
188
|
+
|
|
189
|
+
4. **Backreferences**: Numbered backreferences (`\1`, `\2`) work similarly, but behavior edge cases may differ.
|
|
190
|
+
|
|
191
|
+
## Development
|
|
192
|
+
|
|
193
|
+
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests.
|
|
194
|
+
|
|
195
|
+
```bash
|
|
196
|
+
git clone https://github.com/jasl/js_regex_to_ruby.git
|
|
197
|
+
cd js_regex_to_ruby
|
|
198
|
+
bin/setup
|
|
199
|
+
rake test
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
You can also run `bin/console` for an interactive prompt to experiment with the gem.
|
|
203
|
+
|
|
204
|
+
## Contributing
|
|
205
|
+
|
|
206
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/jasl/js_regex_to_ruby.
|
|
207
|
+
|
|
208
|
+
1. Fork it
|
|
209
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
|
210
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
|
211
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
|
212
|
+
5. Create a new Pull Request
|
|
213
|
+
|
|
214
|
+
## See Also
|
|
215
|
+
|
|
216
|
+
- [MDN: Regular Expressions](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions)
|
|
217
|
+
- [Ruby Regexp Documentation](https://ruby-doc.org/core/Regexp.html)
|
|
218
|
+
- [js_regex](https://github.com/jaynetics/js_regex) - A similar gem with a different approach
|
|
219
|
+
|
|
220
|
+
## License
|
|
221
|
+
|
|
222
|
+
The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
|
data/Rakefile
ADDED
|
@@ -0,0 +1,383 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module JsRegexToRuby
|
|
4
|
+
# Convert an ECMAScript-style regular expression (pattern + flags) into a Ruby Regexp.
|
|
5
|
+
#
|
|
6
|
+
# The goal is to preserve behavior as much as Ruby's regex engine allows.
|
|
7
|
+
#
|
|
8
|
+
# Key conversions implemented:
|
|
9
|
+
# - JS /s (dotAll) => Ruby /m (dot-all in Ruby)
|
|
10
|
+
# - JS /m (multiline anchors) => Ruby has ^/$ multiline by default, so we rewrite ^/$
|
|
11
|
+
# to \A/\z when JS multiline is NOT enabled.
|
|
12
|
+
# - JS inline modifiers (?ims-ims:...) are supported, with mapping s->m and special handling for m.
|
|
13
|
+
# - JS [^] (match any character including newline) => Ruby [\s\S]
|
|
14
|
+
# - JS /g, /y, /u, /v, /d have no direct Regexp equivalent; we report them in Result#ignored_js_flags.
|
|
15
|
+
class Converter
|
|
16
|
+
JS_KNOWN_FLAGS = %w[d g i m s u v y].freeze
|
|
17
|
+
JS_GROUP_MOD_FLAGS = %w[i m s].freeze
|
|
18
|
+
|
|
19
|
+
# Tracks modifier state during source rewriting (immutable).
|
|
20
|
+
Context = Data.define(:js_multiline_anchors, :ruby_ignorecase, :ruby_dotall)
|
|
21
|
+
|
|
22
|
+
# Parse a JS regex literal like `/foo\\/bar/i`.
|
|
23
|
+
# Returns [pattern, flags].
|
|
24
|
+
def self.parse_literal(literal)
|
|
25
|
+
raise ArgumentError, "literal must be a String" unless literal.is_a?(String)
|
|
26
|
+
|
|
27
|
+
s = literal.strip
|
|
28
|
+
raise ArgumentError, "JS RegExp literal must start with /" unless s.start_with?("/")
|
|
29
|
+
|
|
30
|
+
in_class = false
|
|
31
|
+
escaped = false
|
|
32
|
+
i = 1
|
|
33
|
+
while i < s.length
|
|
34
|
+
ch = s[i]
|
|
35
|
+
if escaped
|
|
36
|
+
escaped = false
|
|
37
|
+
i += 1
|
|
38
|
+
next
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
if ch == "\\"
|
|
42
|
+
escaped = true
|
|
43
|
+
i += 1
|
|
44
|
+
next
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
if in_class
|
|
48
|
+
in_class = false if ch == "]"
|
|
49
|
+
i += 1
|
|
50
|
+
next
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
case ch
|
|
54
|
+
when "["
|
|
55
|
+
in_class = true
|
|
56
|
+
when "/"
|
|
57
|
+
pattern = s[1...i]
|
|
58
|
+
flags = s[(i + 1)..] || ""
|
|
59
|
+
return [pattern, flags]
|
|
60
|
+
end
|
|
61
|
+
i += 1
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
raise ArgumentError, "Unterminated JS RegExp literal (missing closing /)"
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# Convert a JS regex into a Ruby regex.
|
|
68
|
+
#
|
|
69
|
+
# @param input [String] Either a JS literal `/.../flags` or a JS pattern source.
|
|
70
|
+
# @param flags [String, nil] JS flags if input is not a literal.
|
|
71
|
+
# @param compile [Boolean] Whether to compile and return a Regexp in the result.
|
|
72
|
+
# @return [JsRegexToRuby::Result]
|
|
73
|
+
def self.convert(input, flags: nil, compile: true)
|
|
74
|
+
warnings = []
|
|
75
|
+
|
|
76
|
+
js_source, js_flags = if flags.nil? && looks_like_literal?(input)
|
|
77
|
+
parse_literal(input)
|
|
78
|
+
else
|
|
79
|
+
raise ArgumentError, "input must be a String" unless input.is_a?(String)
|
|
80
|
+
[input, (flags || "")]
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
js_flags = js_flags.to_s
|
|
84
|
+
seen_flags, unknown_flags, duplicate_flags = normalize_flags(js_flags)
|
|
85
|
+
|
|
86
|
+
warnings << "Unknown JS RegExp flag(s): #{unknown_flags.uniq.join(', ')}" unless unknown_flags.empty?
|
|
87
|
+
warnings << "Duplicate JS RegExp flag(s) ignored: #{duplicate_flags.uniq.join(', ')}" unless duplicate_flags.empty?
|
|
88
|
+
|
|
89
|
+
ignored_js_flags = (seen_flags.keys - %w[i m s]).sort
|
|
90
|
+
|
|
91
|
+
unless ignored_js_flags.empty?
|
|
92
|
+
warnings << "JS flag(s) not representable as Ruby Regexp options: #{ignored_js_flags.join(', ')}"
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
base_js_multiline = seen_flags["m"]
|
|
96
|
+
base_js_ignorecase = seen_flags["i"]
|
|
97
|
+
base_js_dotall = seen_flags["s"]
|
|
98
|
+
|
|
99
|
+
ruby_options = 0
|
|
100
|
+
ruby_options |= Regexp::IGNORECASE if base_js_ignorecase
|
|
101
|
+
ruby_options |= Regexp::MULTILINE if base_js_dotall # Ruby /m is dot-all
|
|
102
|
+
|
|
103
|
+
base_ctx = Context.new(
|
|
104
|
+
js_multiline_anchors: base_js_multiline,
|
|
105
|
+
ruby_ignorecase: base_js_ignorecase,
|
|
106
|
+
ruby_dotall: base_js_dotall
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
ruby_source = rewrite_source(js_source, base_ctx, warnings)
|
|
110
|
+
|
|
111
|
+
regexp = nil
|
|
112
|
+
if compile
|
|
113
|
+
begin
|
|
114
|
+
regexp = Regexp.new(ruby_source, ruby_options)
|
|
115
|
+
rescue RegexpError => e
|
|
116
|
+
warnings << "Ruby RegexpError: #{e.message}"
|
|
117
|
+
regexp = nil
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
Result.new(
|
|
122
|
+
ruby_source: ruby_source,
|
|
123
|
+
ruby_options: ruby_options,
|
|
124
|
+
regexp: regexp,
|
|
125
|
+
warnings: warnings,
|
|
126
|
+
ignored_js_flags: ignored_js_flags,
|
|
127
|
+
js_source: js_source,
|
|
128
|
+
js_flags: js_flags
|
|
129
|
+
)
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
def self.looks_like_literal?(s)
|
|
133
|
+
s.is_a?(String) && s.lstrip.start_with?("/")
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
def self.normalize_flags(flags)
|
|
137
|
+
seen = {}
|
|
138
|
+
unknown = []
|
|
139
|
+
duplicates = []
|
|
140
|
+
|
|
141
|
+
flags.each_char do |c|
|
|
142
|
+
if seen.key?(c)
|
|
143
|
+
duplicates << c
|
|
144
|
+
next
|
|
145
|
+
end
|
|
146
|
+
if JS_KNOWN_FLAGS.include?(c)
|
|
147
|
+
seen[c] = true
|
|
148
|
+
else
|
|
149
|
+
unknown << c
|
|
150
|
+
seen[c] = true
|
|
151
|
+
end
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
[seen, unknown, duplicates]
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
# Rewrite JS source to Ruby source.
|
|
158
|
+
#
|
|
159
|
+
# - Converts ^/$ depending on whether JS multiline-anchors mode is enabled in the current scope.
|
|
160
|
+
# - Converts inline modifiers (?ims-ims:...) into Ruby equivalents (i and m only; s->m).
|
|
161
|
+
# - Converts JS control escapes (\cA ... \cZ) to the actual control character.
|
|
162
|
+
# - Converts JS [^] (any character) to Ruby [\s\S].
|
|
163
|
+
def self.rewrite_source(src, base_ctx, warnings)
|
|
164
|
+
out = +""
|
|
165
|
+
in_class = false
|
|
166
|
+
stack = [base_ctx]
|
|
167
|
+
|
|
168
|
+
i = 0
|
|
169
|
+
while i < src.length
|
|
170
|
+
ch = src[i]
|
|
171
|
+
|
|
172
|
+
if in_class
|
|
173
|
+
if ch == "\\"
|
|
174
|
+
# handle escapes inside class
|
|
175
|
+
if control_escape_at?(src, i)
|
|
176
|
+
out << control_char(src[i + 2])
|
|
177
|
+
i += 3
|
|
178
|
+
next
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
out << ch
|
|
182
|
+
if i + 1 < src.length
|
|
183
|
+
out << src[i + 1]
|
|
184
|
+
i += 2
|
|
185
|
+
else
|
|
186
|
+
i += 1
|
|
187
|
+
end
|
|
188
|
+
next
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
out << ch
|
|
192
|
+
in_class = false if ch == "]"
|
|
193
|
+
i += 1
|
|
194
|
+
next
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
case ch
|
|
198
|
+
when "\\"
|
|
199
|
+
if control_escape_at?(src, i)
|
|
200
|
+
out << control_char(src[i + 2])
|
|
201
|
+
i += 3
|
|
202
|
+
else
|
|
203
|
+
out << ch
|
|
204
|
+
if i + 1 < src.length
|
|
205
|
+
out << src[i + 1]
|
|
206
|
+
i += 2
|
|
207
|
+
else
|
|
208
|
+
i += 1
|
|
209
|
+
end
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
when "["
|
|
213
|
+
# Check for [^] which matches any character (including newline) in JS
|
|
214
|
+
# This is invalid syntax in Ruby, so convert to [\s\S]
|
|
215
|
+
if src[i + 1] == "^" && src[i + 2] == "]"
|
|
216
|
+
out << "[\\s\\S]"
|
|
217
|
+
i += 3
|
|
218
|
+
else
|
|
219
|
+
in_class = true
|
|
220
|
+
out << ch
|
|
221
|
+
i += 1
|
|
222
|
+
end
|
|
223
|
+
|
|
224
|
+
when "("
|
|
225
|
+
# Try parse JS modifier group: (?ims-ims:...)
|
|
226
|
+
mod = parse_js_modifier_group(src, i)
|
|
227
|
+
if mod
|
|
228
|
+
current = stack.last
|
|
229
|
+
desired = apply_js_group_modifiers(current, mod[:enable], mod[:disable])
|
|
230
|
+
|
|
231
|
+
ruby_prefix = build_ruby_modifier_prefix(current, desired)
|
|
232
|
+
out << ruby_prefix
|
|
233
|
+
|
|
234
|
+
stack << desired
|
|
235
|
+
i = mod[:after_colon]
|
|
236
|
+
else
|
|
237
|
+
out << "("
|
|
238
|
+
stack << stack.last
|
|
239
|
+
i += 1
|
|
240
|
+
end
|
|
241
|
+
|
|
242
|
+
when ")"
|
|
243
|
+
out << ")"
|
|
244
|
+
if stack.length > 1
|
|
245
|
+
stack.pop
|
|
246
|
+
else
|
|
247
|
+
warnings << "Unbalanced ) in source; continuing"
|
|
248
|
+
end
|
|
249
|
+
i += 1
|
|
250
|
+
|
|
251
|
+
when "^"
|
|
252
|
+
if stack.last.js_multiline_anchors
|
|
253
|
+
out << "^"
|
|
254
|
+
else
|
|
255
|
+
out << '\\A'
|
|
256
|
+
end
|
|
257
|
+
i += 1
|
|
258
|
+
|
|
259
|
+
when "$"
|
|
260
|
+
if stack.last.js_multiline_anchors
|
|
261
|
+
out << "$"
|
|
262
|
+
else
|
|
263
|
+
out << '\\z'
|
|
264
|
+
end
|
|
265
|
+
i += 1
|
|
266
|
+
|
|
267
|
+
else
|
|
268
|
+
out << ch
|
|
269
|
+
i += 1
|
|
270
|
+
end
|
|
271
|
+
end
|
|
272
|
+
|
|
273
|
+
warnings << "Unbalanced ( in source: #{stack.length - 1} group(s) not closed" if stack.length > 1
|
|
274
|
+
warnings << "Unterminated character class ([...) in source" if in_class
|
|
275
|
+
|
|
276
|
+
out
|
|
277
|
+
end
|
|
278
|
+
|
|
279
|
+
def self.control_escape_at?(src, index)
|
|
280
|
+
return false unless src[index] == "\\"
|
|
281
|
+
return false unless src[index + 1] == "c"
|
|
282
|
+
letter = src[index + 2]
|
|
283
|
+
!!(letter && letter.match?(/[A-Za-z]/))
|
|
284
|
+
end
|
|
285
|
+
|
|
286
|
+
def self.control_char(letter)
|
|
287
|
+
# MDN: \cX where X is A-Z maps to the character code of X modulo 32.
|
|
288
|
+
# (A->1, B->2, ... Z->26)
|
|
289
|
+
(letter.ord & 0x1F).chr
|
|
290
|
+
end
|
|
291
|
+
|
|
292
|
+
# If a modifier group begins at src[index] (which should be '('),
|
|
293
|
+
# returns { enable: 'im', disable: 's', after_colon: <index after ':'> }.
|
|
294
|
+
# Otherwise returns nil.
|
|
295
|
+
def self.parse_js_modifier_group(src, index)
|
|
296
|
+
return nil unless src[index] == "("
|
|
297
|
+
return nil unless src[index + 1] == "?"
|
|
298
|
+
|
|
299
|
+
j = index + 2
|
|
300
|
+
enable = +""
|
|
301
|
+
while (c = src[j]) && JS_GROUP_MOD_FLAGS.include?(c)
|
|
302
|
+
enable << c
|
|
303
|
+
j += 1
|
|
304
|
+
end
|
|
305
|
+
|
|
306
|
+
disable = +""
|
|
307
|
+
if src[j] == "-"
|
|
308
|
+
j += 1
|
|
309
|
+
while (c = src[j])
|
|
310
|
+
break unless JS_GROUP_MOD_FLAGS.include?(c)
|
|
311
|
+
disable << c
|
|
312
|
+
j += 1
|
|
313
|
+
end
|
|
314
|
+
end
|
|
315
|
+
|
|
316
|
+
return nil unless src[j] == ":"
|
|
317
|
+
return nil if enable.empty? && disable.empty?
|
|
318
|
+
|
|
319
|
+
{
|
|
320
|
+
enable: enable,
|
|
321
|
+
disable: disable,
|
|
322
|
+
after_colon: j + 1,
|
|
323
|
+
}
|
|
324
|
+
end
|
|
325
|
+
|
|
326
|
+
def self.apply_js_group_modifiers(current, enable, disable)
|
|
327
|
+
js_m = current.js_multiline_anchors
|
|
328
|
+
ruby_i = current.ruby_ignorecase
|
|
329
|
+
ruby_dotall = current.ruby_dotall
|
|
330
|
+
|
|
331
|
+
enable.each_char do |f|
|
|
332
|
+
case f
|
|
333
|
+
when "m" then js_m = true
|
|
334
|
+
when "i" then ruby_i = true
|
|
335
|
+
when "s" then ruby_dotall = true
|
|
336
|
+
end
|
|
337
|
+
end
|
|
338
|
+
|
|
339
|
+
disable.each_char do |f|
|
|
340
|
+
case f
|
|
341
|
+
when "m" then js_m = false
|
|
342
|
+
when "i" then ruby_i = false
|
|
343
|
+
when "s" then ruby_dotall = false
|
|
344
|
+
end
|
|
345
|
+
end
|
|
346
|
+
|
|
347
|
+
current.with(
|
|
348
|
+
js_multiline_anchors: js_m,
|
|
349
|
+
ruby_ignorecase: ruby_i,
|
|
350
|
+
ruby_dotall: ruby_dotall
|
|
351
|
+
)
|
|
352
|
+
end
|
|
353
|
+
|
|
354
|
+
def self.build_ruby_modifier_prefix(current, desired)
|
|
355
|
+
enable = +""
|
|
356
|
+
disable = +""
|
|
357
|
+
|
|
358
|
+
if desired.ruby_ignorecase != current.ruby_ignorecase
|
|
359
|
+
(desired.ruby_ignorecase ? enable : disable) << "i"
|
|
360
|
+
end
|
|
361
|
+
|
|
362
|
+
# JS dotAll (s) maps to Ruby /m (dot-all)
|
|
363
|
+
if desired.ruby_dotall != current.ruby_dotall
|
|
364
|
+
(desired.ruby_dotall ? enable : disable) << "m"
|
|
365
|
+
end
|
|
366
|
+
|
|
367
|
+
if enable.empty? && disable.empty?
|
|
368
|
+
"(?:"
|
|
369
|
+
elsif disable.empty?
|
|
370
|
+
"(?#{enable}:"
|
|
371
|
+
elsif enable.empty?
|
|
372
|
+
"(?-#{disable}:"
|
|
373
|
+
else
|
|
374
|
+
"(?#{enable}-#{disable}:"
|
|
375
|
+
end
|
|
376
|
+
end
|
|
377
|
+
|
|
378
|
+
private_class_method :looks_like_literal?, :normalize_flags,
|
|
379
|
+
:rewrite_source, :control_escape_at?, :control_char,
|
|
380
|
+
:parse_js_modifier_group, :apply_js_group_modifiers,
|
|
381
|
+
:build_ruby_modifier_prefix
|
|
382
|
+
end
|
|
383
|
+
end
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module JsRegexToRuby
|
|
4
|
+
# A conversion outcome (immutable value object).
|
|
5
|
+
#
|
|
6
|
+
# @!attribute [r] ruby_source
|
|
7
|
+
# @return [String] Ruby regex source (not wrapped with /.../)
|
|
8
|
+
# @!attribute [r] ruby_options
|
|
9
|
+
# @return [Integer] Regexp option bits (IGNORECASE, MULTILINE, etc.)
|
|
10
|
+
# @!attribute [r] regexp
|
|
11
|
+
# @return [Regexp, nil] Compiled Regexp, or nil if compile: false or compilation failed
|
|
12
|
+
# @!attribute [r] warnings
|
|
13
|
+
# @return [Array<String>] Warning messages from conversion
|
|
14
|
+
# @!attribute [r] ignored_js_flags
|
|
15
|
+
# @return [Array<String>] Flags that have no direct Ruby Regexp equivalent
|
|
16
|
+
# @!attribute [r] js_source
|
|
17
|
+
# @return [String] Original JS pattern source
|
|
18
|
+
# @!attribute [r] js_flags
|
|
19
|
+
# @return [String] Original JS flags string
|
|
20
|
+
Result = Data.define(
|
|
21
|
+
:ruby_source,
|
|
22
|
+
:ruby_options,
|
|
23
|
+
:regexp,
|
|
24
|
+
:warnings,
|
|
25
|
+
:ignored_js_flags,
|
|
26
|
+
:js_source,
|
|
27
|
+
:js_flags
|
|
28
|
+
) do
|
|
29
|
+
def success?
|
|
30
|
+
!regexp.nil?
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def ruby_flags_string
|
|
34
|
+
s = +""
|
|
35
|
+
s << "i" if (ruby_options & Regexp::IGNORECASE) != 0
|
|
36
|
+
s << "m" if (ruby_options & Regexp::MULTILINE) != 0
|
|
37
|
+
s
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Best-effort Ruby literal representation (not necessarily re-escapable).
|
|
41
|
+
def ruby_literal
|
|
42
|
+
"/#{ruby_source.gsub("/", "\\/")}/#{ruby_flags_string}"
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "js_regex_to_ruby/version"
|
|
4
|
+
require_relative "js_regex_to_ruby/result"
|
|
5
|
+
require_relative "js_regex_to_ruby/converter"
|
|
6
|
+
|
|
7
|
+
module JsRegexToRuby
|
|
8
|
+
# Convenience API.
|
|
9
|
+
#
|
|
10
|
+
# @example
|
|
11
|
+
# JsRegexToRuby.convert('/^foo$/i').regexp #=> /\Afoo\z/i
|
|
12
|
+
def self.convert(input, flags: nil, compile: true)
|
|
13
|
+
Converter.convert(input, flags: flags, compile: compile)
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
# Try to convert a JS regex to Ruby Regexp.
|
|
17
|
+
# Returns the compiled Regexp on success, or nil on failure.
|
|
18
|
+
# Never raises exceptions for invalid input.
|
|
19
|
+
#
|
|
20
|
+
# @example
|
|
21
|
+
# JsRegexToRuby.try_convert('/^foo$/i') #=> /\Afoo\z/i
|
|
22
|
+
# JsRegexToRuby.try_convert('not a regex') #=> nil
|
|
23
|
+
# JsRegexToRuby.try_convert('/invalid[/') #=> nil
|
|
24
|
+
#
|
|
25
|
+
# @param input [String] Either a JS literal `/.../flags` or a JS pattern source.
|
|
26
|
+
# @param flags [String, nil] JS flags if input is not a literal.
|
|
27
|
+
# @return [Regexp, nil] The compiled Ruby Regexp, or nil if conversion/compilation failed.
|
|
28
|
+
def self.try_convert(input, flags: nil)
|
|
29
|
+
result = Converter.convert(input, flags: flags, compile: true)
|
|
30
|
+
result.regexp
|
|
31
|
+
rescue ArgumentError, RegexpError
|
|
32
|
+
nil
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def self.parse_literal(literal)
|
|
36
|
+
Converter.parse_literal(literal)
|
|
37
|
+
end
|
|
38
|
+
end
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
module JsRegexToRuby
|
|
2
|
+
VERSION: String
|
|
3
|
+
|
|
4
|
+
# Convenience API to convert a JS regex to Ruby
|
|
5
|
+
def self.convert: (String input, ?flags: String?, ?compile: bool) -> Result
|
|
6
|
+
|
|
7
|
+
# Try to convert a JS regex to Ruby Regexp, returns nil on failure
|
|
8
|
+
def self.try_convert: (String input, ?flags: String?) -> Regexp?
|
|
9
|
+
|
|
10
|
+
# Parse a JS regex literal into pattern and flags
|
|
11
|
+
def self.parse_literal: (String literal) -> [String, String]
|
|
12
|
+
|
|
13
|
+
# Conversion result (immutable value object)
|
|
14
|
+
class Result < Data
|
|
15
|
+
attr_reader ruby_source: String
|
|
16
|
+
attr_reader ruby_options: Integer
|
|
17
|
+
attr_reader regexp: Regexp?
|
|
18
|
+
attr_reader warnings: Array[String]
|
|
19
|
+
attr_reader ignored_js_flags: Array[String]
|
|
20
|
+
attr_reader js_source: String
|
|
21
|
+
attr_reader js_flags: String
|
|
22
|
+
|
|
23
|
+
def self.new: (
|
|
24
|
+
ruby_source: String,
|
|
25
|
+
ruby_options: Integer,
|
|
26
|
+
regexp: Regexp?,
|
|
27
|
+
warnings: Array[String],
|
|
28
|
+
ignored_js_flags: Array[String],
|
|
29
|
+
js_source: String,
|
|
30
|
+
js_flags: String
|
|
31
|
+
) -> Result
|
|
32
|
+
|
|
33
|
+
# Returns true if regexp compilation succeeded
|
|
34
|
+
def success?: () -> bool
|
|
35
|
+
|
|
36
|
+
# Returns Ruby flags as a string (e.g., "im")
|
|
37
|
+
def ruby_flags_string: () -> String
|
|
38
|
+
|
|
39
|
+
# Returns a Ruby literal representation
|
|
40
|
+
def ruby_literal: () -> String
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# Converter class for JS to Ruby regex conversion
|
|
44
|
+
class Converter
|
|
45
|
+
JS_KNOWN_FLAGS: Array[String]
|
|
46
|
+
JS_GROUP_MOD_FLAGS: Array[String]
|
|
47
|
+
|
|
48
|
+
# Parse a JS regex literal like `/foo\\/bar/i`
|
|
49
|
+
def self.parse_literal: (String literal) -> [String, String]
|
|
50
|
+
|
|
51
|
+
# Convert a JS regex into a Ruby regex
|
|
52
|
+
def self.convert: (String input, ?flags: String?, ?compile: bool) -> Result
|
|
53
|
+
|
|
54
|
+
# Context for tracking modifier state during rewriting (immutable)
|
|
55
|
+
class Context < Data
|
|
56
|
+
attr_reader js_multiline_anchors: bool?
|
|
57
|
+
attr_reader ruby_ignorecase: bool?
|
|
58
|
+
attr_reader ruby_dotall: bool?
|
|
59
|
+
|
|
60
|
+
def self.new: (js_multiline_anchors: bool?, ruby_ignorecase: bool?, ruby_dotall: bool?) -> Context
|
|
61
|
+
def with: (?js_multiline_anchors: bool?, ?ruby_ignorecase: bool?, ?ruby_dotall: bool?) -> Context
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: js_regex_to_ruby
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.1.1
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- jasl
|
|
8
|
+
bindir: exe
|
|
9
|
+
cert_chain: []
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
11
|
+
dependencies: []
|
|
12
|
+
description: |
|
|
13
|
+
A Ruby gem that converts ECMAScript (JavaScript) regular expressions to Ruby Regexp objects,
|
|
14
|
+
preserving behavior as closely as Ruby's regex engine allows. Handles flag mapping (i, s, m),
|
|
15
|
+
anchor conversion (^/$ to \A/\z), inline modifiers, and control character escapes.
|
|
16
|
+
Emits warnings for JS features without direct Ruby equivalents (g, y, u, v, d flags).
|
|
17
|
+
email:
|
|
18
|
+
- jasl9187@hotmail.com
|
|
19
|
+
executables: []
|
|
20
|
+
extensions: []
|
|
21
|
+
extra_rdoc_files: []
|
|
22
|
+
files:
|
|
23
|
+
- LICENSE.txt
|
|
24
|
+
- README.md
|
|
25
|
+
- Rakefile
|
|
26
|
+
- lib/js_regex_to_ruby.rb
|
|
27
|
+
- lib/js_regex_to_ruby/converter.rb
|
|
28
|
+
- lib/js_regex_to_ruby/result.rb
|
|
29
|
+
- lib/js_regex_to_ruby/version.rb
|
|
30
|
+
- sig/js_regex_to_ruby.rbs
|
|
31
|
+
homepage: https://github.com/jasl/js_regex_to_ruby
|
|
32
|
+
licenses:
|
|
33
|
+
- MIT
|
|
34
|
+
metadata:
|
|
35
|
+
homepage_uri: https://github.com/jasl/js_regex_to_ruby
|
|
36
|
+
bug_tracker_uri: https://github.com/jasl/js_regex_to_ruby/issues
|
|
37
|
+
documentation_uri: https://github.com/jasl/js_regex_to_ruby#readme
|
|
38
|
+
rubygems_mfa_required: 'true'
|
|
39
|
+
rdoc_options: []
|
|
40
|
+
require_paths:
|
|
41
|
+
- lib
|
|
42
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
43
|
+
requirements:
|
|
44
|
+
- - ">="
|
|
45
|
+
- !ruby/object:Gem::Version
|
|
46
|
+
version: 3.4.0
|
|
47
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
48
|
+
requirements:
|
|
49
|
+
- - ">="
|
|
50
|
+
- !ruby/object:Gem::Version
|
|
51
|
+
version: '0'
|
|
52
|
+
requirements: []
|
|
53
|
+
rubygems_version: 4.0.3
|
|
54
|
+
specification_version: 4
|
|
55
|
+
summary: Convert JavaScript regular expressions to Ruby Regexp objects
|
|
56
|
+
test_files: []
|