ruby_grammar_builder 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (43) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +21 -0
  3. data/lib/textmate_grammar/generated/grammar.rb +32 -0
  4. data/lib/textmate_grammar/generated/rule.rb +144 -0
  5. data/lib/textmate_grammar/grammar.rb +670 -0
  6. data/lib/textmate_grammar/grammar_plugin.rb +189 -0
  7. data/lib/textmate_grammar/import_patterns.rb +14 -0
  8. data/lib/textmate_grammar/linters/flat_includes.rb +32 -0
  9. data/lib/textmate_grammar/linters/includes_then_tag_as.rb +48 -0
  10. data/lib/textmate_grammar/linters/standard_naming.rb +226 -0
  11. data/lib/textmate_grammar/linters/start_match_empty.rb +49 -0
  12. data/lib/textmate_grammar/linters/tests.rb +19 -0
  13. data/lib/textmate_grammar/linters/unused_unresolved.rb +9 -0
  14. data/lib/textmate_grammar/pattern_extensions/look_ahead_for.rb +32 -0
  15. data/lib/textmate_grammar/pattern_extensions/look_ahead_to_avoid.rb +31 -0
  16. data/lib/textmate_grammar/pattern_extensions/look_behind_for.rb +31 -0
  17. data/lib/textmate_grammar/pattern_extensions/look_behind_to_avoid.rb +31 -0
  18. data/lib/textmate_grammar/pattern_extensions/lookaround_pattern.rb +169 -0
  19. data/lib/textmate_grammar/pattern_extensions/match_result_of.rb +67 -0
  20. data/lib/textmate_grammar/pattern_extensions/maybe.rb +50 -0
  21. data/lib/textmate_grammar/pattern_extensions/one_of.rb +107 -0
  22. data/lib/textmate_grammar/pattern_extensions/one_or_more_of.rb +42 -0
  23. data/lib/textmate_grammar/pattern_extensions/or_pattern.rb +55 -0
  24. data/lib/textmate_grammar/pattern_extensions/placeholder.rb +102 -0
  25. data/lib/textmate_grammar/pattern_extensions/recursively_match.rb +76 -0
  26. data/lib/textmate_grammar/pattern_extensions/zero_or_more_of.rb +50 -0
  27. data/lib/textmate_grammar/pattern_variations/base_pattern.rb +870 -0
  28. data/lib/textmate_grammar/pattern_variations/legacy_pattern.rb +61 -0
  29. data/lib/textmate_grammar/pattern_variations/pattern.rb +9 -0
  30. data/lib/textmate_grammar/pattern_variations/pattern_range.rb +233 -0
  31. data/lib/textmate_grammar/pattern_variations/repeatable_pattern.rb +204 -0
  32. data/lib/textmate_grammar/regex_operator.rb +182 -0
  33. data/lib/textmate_grammar/regex_operators/alternation.rb +24 -0
  34. data/lib/textmate_grammar/regex_operators/concat.rb +23 -0
  35. data/lib/textmate_grammar/stdlib/common.rb +20 -0
  36. data/lib/textmate_grammar/tokens.rb +110 -0
  37. data/lib/textmate_grammar/transforms/add_ending.rb +25 -0
  38. data/lib/textmate_grammar/transforms/bailout.rb +92 -0
  39. data/lib/textmate_grammar/transforms/fix_repeated_tag_as.rb +75 -0
  40. data/lib/textmate_grammar/transforms/resolve_placeholders.rb +121 -0
  41. data/lib/textmate_grammar/util.rb +198 -0
  42. data/lib/textmate_grammar.rb +4 -0
  43. metadata +85 -0
@@ -0,0 +1,870 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ # Provides a base class to simplify the writing of complex regular expressions rules
5
+ # This class completely handles capture numbers and provides convenience methods for
6
+ # many common Regexp operations
7
+ #
8
+ # @note Users should not normally directly instantiate this class
9
+ #
10
+ class PatternBase
11
+ # @return [PatternBase] The next pattern in the linked list of patterns
12
+ attr_accessor :next_pattern
13
+ # @return [String,PatternBase] The pattern to match
14
+ attr_accessor :match
15
+ # @return [Hash] The processed arguments
16
+ attr_accessor :arguments
17
+ # @return [Hash] The original arguments passed into initialize
18
+ attr_accessor :original_arguments
19
+
20
+ #
21
+ # does @arguments contain any attributes that require this pattern be captured?
22
+ #
23
+ # @return [Boolean] if this PatternBase needs to capture
24
+ #
25
+ def needs_to_capture?
26
+ capturing_attributes = [
27
+ :tag_as,
28
+ :reference,
29
+ :includes,
30
+ ]
31
+ puts @match.class unless @arguments.is_a? Hash
32
+
33
+ !(@arguments.keys & capturing_attributes).empty?
34
+ end
35
+
36
+ #
37
+ # Can the capture be optimized out
38
+ #
39
+ # When the pattern has nothing after it then its capture can instead become
40
+ # capture group 0
41
+ #
42
+ # @return [Boolean] can this capture become capture group 0
43
+ #
44
+ def optimize_outer_group?
45
+ needs_to_capture? and @next_pattern.nil?
46
+ end
47
+
48
+ #
49
+ # Appends pattern to the linked list of patterns
50
+ #
51
+ # @param [PatternBase] pattern the pattern to append
52
+ #
53
+ # @return [self]
54
+ #
55
+ # @see insert
56
+ #
57
+ def insert!(pattern)
58
+ last = self
59
+ last = last.next_pattern while last.next_pattern
60
+ last.next_pattern = pattern
61
+ self
62
+ end
63
+
64
+ #
65
+ # Append pattern to a copy of the linked list of patterns
66
+ #
67
+ # @param [PatternBase] pattern the pattern to append
68
+ #
69
+ # @return [PatternBase] a copy of self with pattern appended
70
+ #
71
+ def insert(pattern)
72
+ new_pattern = __deep_clone__
73
+ new_pattern.insert!(pattern).freeze
74
+ end
75
+
76
+ #
77
+ # Adds a capture group if needed
78
+ #
79
+ # @param [String] regex_as_string the pattern as a string
80
+ #
81
+ # @return [String] the pattern, potentially with a capture group
82
+ #
83
+ def add_capture_group_if_needed(regex_as_string)
84
+ regex_as_string = "(#{regex_as_string})" if needs_to_capture?
85
+ regex_as_string
86
+ end
87
+
88
+ #
89
+ # Uses a block to transform all Patterns in the list
90
+ #
91
+ # @param [Boolean] map_includes should include patterns be mapped?
92
+ # @yield [self] invokes the block with self for modification
93
+ #
94
+ # @return [self]
95
+ #
96
+ def map!(map_includes = false, &block)
97
+ yield self
98
+ if @match.is_a? PatternBase
99
+ if @match.frozen?
100
+ puts "frozen @match"
101
+ puts @match.inspect
102
+ end
103
+ @match = @match.map!(map_includes, &block)
104
+ end
105
+ if @next_pattern.is_a? PatternBase
106
+ if @next_pattern.frozen?
107
+ puts "frozen @next_pattern"
108
+ puts @next_pattern.inspect
109
+ end
110
+ @next_pattern = @next_pattern.map!(map_includes, &block)
111
+ end
112
+ map_includes!(&block) if map_includes
113
+ self
114
+ end
115
+
116
+ #
117
+ # (see #map!)
118
+ #
119
+ # @return [PatternBase] a transformed copy of self
120
+ #
121
+ def map(map_includes = false, &block)
122
+ __deep_clone__.map!(map_includes, &block).freeze
123
+ end
124
+
125
+ #
126
+ # Call the block for each pattern in the list
127
+ #
128
+ # @param [Boolean] each_includes should include patterns be called?
129
+ # @yield [self] invokes the block with self
130
+ #
131
+ # @return [void]
132
+ #
133
+ def each(each_includes = false, &block)
134
+ yield self
135
+ @match.each(each_includes, &block) if @match.is_a? PatternBase
136
+ @next_pattern.each(each_includes, &block) if @next_pattern.is_a? PatternBase
137
+
138
+ return unless each_includes
139
+ return unless @arguments[:includes].is_a? Array
140
+
141
+ @arguments[:includes].each do |s|
142
+ next unless s.is_a? Pattern
143
+
144
+ s.each(true, &block)
145
+ end
146
+ end
147
+
148
+ #
149
+ # Uses a block to transform all Patterns in all includes
150
+ # @api private
151
+ # @note only for use by map!
152
+ #
153
+ # @yield [self] invokes the block with the includes for modification
154
+ #
155
+ # @return [void]
156
+ #
157
+ def map_includes!(&block)
158
+ return unless @arguments[:includes].is_a? Array
159
+
160
+ @arguments[:includes].map! do |s|
161
+ if s.is_a? PatternBase
162
+ if s.frozen?
163
+ puts "frozen s"
164
+ puts s.inspect
165
+ end
166
+ end
167
+
168
+ next s.map!(true, &block) if s.is_a? PatternBase
169
+
170
+ next s
171
+ end
172
+ end
173
+
174
+ #
175
+ # Uses block to recursively transform includes
176
+ #
177
+ # @yield [PatternBase,Symbol,Regexp,String] invokes the block with each include to transform
178
+ #
179
+ # @return [PatternBase] a copy of self with transformed includes
180
+ #
181
+ def transform_includes(&block)
182
+ map(true) do |s|
183
+ s.arguments[:includes].map!(&block) if s.arguments[:includes].is_a? Array
184
+ end
185
+ end
186
+
187
+ #
188
+ # Uses block to recursively transform tag_as
189
+ #
190
+ # @yield [String] Invokes the block to with each tag_as to transform
191
+ #
192
+ # @return [PatternBase] a copy of self with transformed tag_as
193
+ #
194
+ def transform_tag_as(&block)
195
+ __deep_clone__.map! do |s|
196
+ s.arguments[:tag_as] = block.call(s.arguments[:tag_as]) if s.arguments[:tag_as]
197
+ next unless s.arguments[:includes].is_a?(Array)
198
+
199
+ s.arguments[:includes].map! do |i|
200
+ next i unless i.is_a? PatternBase
201
+
202
+ i.transform_tag_as(&block)
203
+ end
204
+ end.freeze
205
+ end
206
+
207
+ #
208
+ # Construct a new pattern
209
+ #
210
+ # @overload initialize(pattern)
211
+ # matches an exact pattern
212
+ # @param pattern [PatternBase, Regexp, String] the pattern to match
213
+ # @overload initialize(opts)
214
+ # @param opts [Hash] options
215
+ # @option opts [PatternBase, Regexp, String] :match the pattern to match
216
+ # @option opts [String] :tag_as what to tag this pattern as
217
+ # @option opts [Array<PatternBase, Symbol>] :includes pattern includes
218
+ # @option opts [String] :reference a name for this pattern can be referred to in
219
+ # earlier or later parts of the pattern list, or in tag_as
220
+ # @option opts [Array<String>] :should_fully_match string that this pattern should
221
+ # fully match
222
+ # @option opts [Array<String>] :should_partial_match string that this pattern should
223
+ # partially match
224
+ # @option opts [Array<String>] :should_not_fully_match string that this pattern should
225
+ # not fully match
226
+ # @option opts [Array<String>] :should_not_partial_match string that this pattern should
227
+ # not partially match
228
+ # @option opts [Enumerator, Integer] :at_most match up to N times, nil to match any
229
+ # number of times
230
+ # @option opts [Enumerator, Integer] :at_least match no fewer than N times, nil to
231
+ # match any number of times
232
+ # @option opts [Enumerator, Integer] :how_many_times match exactly N times
233
+ # @option opts [Array<String>] :word_cannot_be_any_of list of wordlike string that
234
+ # the pattern should not match (this is a qualifier not a unit test)
235
+ # @option opts [Boolean] :dont_back_track? can this pattern backtrack
236
+ # @note Plugins may provide additional options
237
+ # @note all options except :match are optional
238
+ # @overload initialize(opts, deep_clone, original)
239
+ # makes a copy of PatternBase
240
+ # @param opts [Hash] the original patterns @arguments with match
241
+ # @param deep_clone [:deep_clone] identifies as a deep_clone construction
242
+ # @param original [Hash] the original patterns @original_arguments
243
+ # @api private
244
+ # @note this should only be called by __deep_clone__, however subclasses must be
245
+ # able to accept this form
246
+ #
247
+ def initialize(*arguments)
248
+ if arguments.length > 1 && arguments[1] == :deep_clone
249
+ @arguments = arguments[0]
250
+ @match = @arguments[:match]
251
+ @arguments.delete(:match)
252
+ @original_arguments = arguments[2]
253
+ @next_pattern = nil
254
+ return
255
+ end
256
+
257
+ if arguments.length > 1
258
+ # PatternBase was likely constructed like `PatternBase.new(/foo/, option: bar)`
259
+ puts "PatternBase#new() expects a single Regexp, String, or Hash"
260
+ puts "PatternBase#new() was provided with multiple arguments"
261
+ puts "arguments:"
262
+ puts arguments
263
+ raise "See error above"
264
+ end
265
+ @next_pattern = nil
266
+ arg1 = arguments[0]
267
+ arg1 = {match: arg1} unless arg1.is_a? Hash
268
+ @original_arguments = arg1.clone
269
+ if arg1[:match].is_a? String
270
+ arg1[:match] = Regexp.escape(arg1[:match]).gsub("/", "\\/")
271
+ @match = arg1[:match]
272
+ elsif arg1[:match].is_a? Regexp
273
+ raise_if_regex_has_capture_group arg1[:match]
274
+ @match = arg1[:match].inspect[1..-2] # convert to string and remove the slashes
275
+ elsif arg1[:match].is_a? PatternBase
276
+ @match = arg1[:match]
277
+ else
278
+ puts <<-HEREDOC.remove_indent
279
+ Pattern.new() must be constructed with a String, Regexp, or Pattern
280
+ Provided arguments: #{@original_arguments}
281
+ HEREDOC
282
+ raise "See error above"
283
+ end
284
+ # ensure that includes is either nil or a flat array
285
+ if arg1[:includes]
286
+ arg1[:includes] = [arg1[:includes]] unless arg1[:includes].is_a? Array
287
+ arg1[:includes] = arg1[:includes].flatten
288
+ end
289
+ arg1.delete(:match)
290
+ @arguments = arg1
291
+ end
292
+
293
+ # attempts to provide a memorable name for a pattern
294
+ # @return [String]
295
+ def name
296
+ return @arguments[:reference] unless @arguments[:reference].nil?
297
+ return @arguments[:tag_as] unless @arguments[:tag_as].nil?
298
+
299
+ to_s
300
+ end
301
+
302
+ #
303
+ # converts a PatternBase to a Hash representing a textmate rule
304
+ #
305
+ # @return [Hash] The pattern as a textmate grammar rule
306
+ #
307
+ def to_tag
308
+ output = {
309
+ match: evaluate,
310
+ }
311
+
312
+ output[:captures] = convert_group_attributes_to_captures(collect_group_attributes)
313
+ if optimize_outer_group?
314
+ # optimize captures by removing outermost
315
+ output[:match] = output[:match][1..-2]
316
+ output[:name] = output[:captures]["0"][:name]
317
+ output[:captures]["0"].delete(:name)
318
+ output[:captures].reject! { |_, v| !v || v.empty? }
319
+ end
320
+ output.reject! { |_, v| !v || v.empty? }
321
+ output
322
+ end
323
+
324
+ #
325
+ # evaluates the pattern into a string suitable for inserting into a
326
+ # grammar or constructing a Regexp.
327
+ #
328
+ # @param [Hash] groups if groups is nil consider this PatternBase to be the top_level
329
+ # when a pattern is top_level, group numbers and back references are relative
330
+ # to that pattern
331
+ #
332
+ # @return [String] the complete pattern
333
+ #
334
+ def evaluate(groups = nil, fixup_refereces: false)
335
+ top_level = groups.nil?
336
+ groups = collect_group_attributes if top_level
337
+ evaluate_array = ['']
338
+
339
+ pat = self
340
+ while pat.is_a? PatternBase
341
+ evaluate_array << pat.evaluate_operator
342
+ evaluate_array << pat.do_evaluate_self(groups)
343
+ pat = pat.next_pattern
344
+ end
345
+
346
+ self_evaluate = RegexOperator.evaluate(evaluate_array)
347
+ self_evaluate = fixup_regex_references(groups, self_evaluate) if top_level || fixup_refereces
348
+ self_evaluate
349
+ end
350
+
351
+ #
352
+ # converts a pattern to a Regexp
353
+ #
354
+ # @param [Hash] groups if groups is nil consider this PatternBase to be the top_level
355
+ # when a pattern is top_level, group numbers and back references are relative
356
+ # to that pattern
357
+ #
358
+ # @return [Regexp] the pattern as a Regexp
359
+ #
360
+ def to_r(groups = nil)
361
+ with_no_warnings { Regexp.new(evaluate(groups)) }
362
+ end
363
+
364
+ #
365
+ # Displays the PatternBase as you would write it in code
366
+ #
367
+ # @param [Integer] depth the current nesting depth
368
+ # @param [Boolean] top_level is this a top level pattern or is it being chained
369
+ #
370
+ # @return [String] The pattern as a string
371
+ #
372
+ def to_s(depth = 0, top_level = true)
373
+ # TODO: make this method easier to understand
374
+
375
+ # rubocop:disable Metrics/LineLength
376
+ begin
377
+ plugins = Grammar.plugins
378
+ plugins.reject! { |p| (@original_arguments.keys & p.class.options).empty? }
379
+
380
+ regex_as_string =
381
+ case @original_arguments[:match]
382
+ when PatternBase then @original_arguments[:match].to_s(depth + 2, true)
383
+ when Regexp then @original_arguments[:match].inspect
384
+ when String then "/" + Regexp.escape(@original_arguments[:match]) + "/"
385
+ end
386
+ indent = " " * depth
387
+ output = indent + do_get_to_s_name(top_level)
388
+ # basic pattern information
389
+ output += "\n#{indent} match: " + regex_as_string.lstrip
390
+ output += ",\n#{indent} tag_as: \"" + @arguments[:tag_as] + '"' if @arguments[:tag_as]
391
+ output += ",\n#{indent} reference: \"" + @arguments[:reference] + '"' if @arguments[:reference]
392
+ # unit tests
393
+ output += ",\n#{indent} should_fully_match: " + @arguments[:should_fully_match].to_s if @arguments[:should_fully_match]
394
+ output += ",\n#{indent} should_not_fully_match: " + @arguments[:should_not_fully_match].to_s if @arguments[:should_not_fully_match]
395
+ output += ",\n#{indent} should_partially_match: " + @arguments[:should_partially_match].to_s if @arguments[:should_partially_match]
396
+ output += ",\n#{indent} should_not_partially_match: " + @arguments[:should_not_partially_match].to_s if @arguments[:should_not_partially_match]
397
+
398
+ output += ",\n#{indent} includes: " + @arguments[:includes].to_s if @arguments[:includes]
399
+ # add any linter/transform configurations
400
+ plugins.each { |p| output += p.display_options(indent + " ", @original_arguments) }
401
+ # subclass, ending and recursive
402
+ output += do_add_attributes(indent)
403
+ output += ",\n#{indent})"
404
+ output += @next_pattern.to_s(depth, false).lstrip if @next_pattern
405
+ output
406
+ rescue
407
+ return @original_arguments.to_s
408
+ end
409
+ # rubocop:enable Metrics/LineLength
410
+ end
411
+
412
+ #
413
+ # Runs the unit tests, recursively
414
+ #
415
+ # @return [Boolean] If all test passed return true, otherwise false
416
+ #
417
+ def run_tests
418
+ pass = [
419
+ run_self_tests,
420
+ ]
421
+
422
+ # run related unit tests
423
+ pass << @match.run_tests if @match.is_a? PatternBase
424
+ pass << @next_pattern.run_tests if @next_pattern.is_a? PatternBase
425
+ if @arguments[:includes].is_a? Array
426
+ @arguments[:includes]&.each { |inc| pass << inc.run_tests if inc.is_a? PatternBase }
427
+ elsif @arguments[:includes].is_a? PatternBase
428
+ pass << @arguments[:includes].run_tests
429
+ end
430
+ pass.none?(&:!)
431
+ end
432
+
433
+ #
434
+ # Runs the unit tests for self
435
+ #
436
+ # @return [Boolean] If all test passed return true, otherwise false
437
+ #
438
+ def run_self_tests
439
+ pass = [true]
440
+
441
+ # some patterns are not able to be evaluated
442
+ # do not attempt to unless required
443
+ return true unless [
444
+ :should_fully_match,
445
+ :should_not_fully_match,
446
+ :should_partially_match,
447
+ :should_not_partially_match,
448
+ ].any? { |k| @arguments.include? k }
449
+
450
+ copy = __deep_clone_self__
451
+ test_regex = copy.to_r
452
+ test_fully_regex = wrap_with_anchors(copy).to_r
453
+
454
+ warn = lambda do |symbol|
455
+ puts [
456
+ "",
457
+ "When testing the pattern #{test_regex.inspect}. The unit test for #{symbol} failed.",
458
+ "The unit test has the following patterns:",
459
+ "#{@arguments[symbol].to_yaml}",
460
+ "The Failing pattern is below:",
461
+ "#{self}",
462
+ ].join("\n")
463
+ end
464
+ if @arguments[:should_fully_match].is_a? Array
465
+ unless @arguments[:should_fully_match].all? { |test| test =~ test_fully_regex }
466
+ warn.call :should_fully_match
467
+ pass << false
468
+ end
469
+ end
470
+ if @arguments[:should_not_fully_match].is_a? Array
471
+ unless @arguments[:should_not_fully_match].none? { |test| test =~ test_fully_regex }
472
+ warn.call :should_not_fully_match
473
+ pass << false
474
+ end
475
+ end
476
+ if @arguments[:should_partially_match].is_a? Array
477
+ unless @arguments[:should_partially_match].all? { |test| test =~ test_regex }
478
+ warn.call :should_partially_match
479
+ pass << false
480
+ end
481
+ end
482
+ if @arguments[:should_not_partially_match].is_a? Array
483
+ unless @arguments[:should_not_partially_match].none? { |test| test =~ test_regex }
484
+ warn.call :should_not_partially_match
485
+ pass << false
486
+ end
487
+ end
488
+
489
+ pass.none?(&:!)
490
+ end
491
+
492
+ #
493
+ # To aid in Linters all Patterns support start_pattern which return the pattern
494
+ # for initial match, for a single match pattern that is itself
495
+ #
496
+ # @return [self] This pattern
497
+ #
498
+ def start_pattern
499
+ self
500
+ end
501
+
502
+ #
503
+ # Gets the patterns Hashcode
504
+ #
505
+ # @return [Integer] the Hashcode
506
+ #
507
+ def hash
508
+ # TODO: find a better hash code
509
+ # PatternBase.new("abc") == PatternBase.new(PatternBase.new("abc"))
510
+ # but PatternBase.new("abc").hash != PatternBase.new(PatternBase.new("abc")).hash
511
+ @match.hash
512
+ end
513
+
514
+ #
515
+ # Checks for equality
516
+ # A pattern is considered equal to another pattern if the result of tag_as is equivalent
517
+ #
518
+ # @param [PatternBase] other the pattern to compare
519
+ #
520
+ # @return [Boolean] true if other is a PatternBase and to_tag is equivalent, false otherwise
521
+ #
522
+ def eql?(other)
523
+ return false unless other.is_a? PatternBase
524
+
525
+ to_tag == other.to_tag
526
+ end
527
+
528
+ # (see #eql?)
529
+ def ==(other)
530
+ eql? other
531
+ end
532
+
533
+ #
534
+ # Construct a new pattern and append to the end
535
+ #
536
+ # @param [PatternBase] pattern options (see #initialize for options)
537
+ # @see #initialize
538
+ #
539
+ # @return [PatternBase] a copy of self with a pattern inserted
540
+ #
541
+ def then(pattern)
542
+ unless pattern.is_a?(PatternBase) && pattern.next_pattern.nil?
543
+ pattern = Pattern.new(pattern)
544
+ end
545
+ insert(pattern)
546
+ end
547
+ # other methods added by subclasses
548
+
549
+ #
550
+ # evaluates @match
551
+ # @note optionally override when inheriting
552
+ # @note by default this optionally adds a capture group
553
+ #
554
+ # @param [Hash] groups group attributes
555
+ #
556
+ # @return [String] the result of evaluating @match
557
+ #
558
+ def do_evaluate_self(groups)
559
+ match = @match
560
+ match = match.evaluate(groups) if match.is_a? PatternBase
561
+ add_capture_group_if_needed(match)
562
+ end
563
+
564
+ #
565
+ # Returns the operator to use when evaluating
566
+ #
567
+ # @return [RegexOperator] the operator to use
568
+ #
569
+ def evaluate_operator
570
+ ConcatOperator.new
571
+ end
572
+
573
+ #
574
+ # return a string of any additional attributes that need to be added to the #to_s output
575
+ # indent is a string with the amount of space the parent block is indented, attributes
576
+ # are indented 2 more spaces
577
+ # called by #to_s
578
+ #
579
+ # @param [String] indent the spaces to indent with
580
+ #
581
+ # @return [String] the attributes to add
582
+ #
583
+ def do_add_attributes(indent) # rubocop:disable Lint/UnusedMethodArgument
584
+ ""
585
+ end
586
+
587
+ #
588
+ # What is the name of the method that the user would call
589
+ # top_level is if a freestanding or chaining function is called
590
+ # called by #to_s
591
+ #
592
+ # @param [Boolean] top_level is this top_level or chained
593
+ #
594
+ # @return [String] the name of the method
595
+ #
596
+ def do_get_to_s_name(top_level)
597
+ top_level ? "Pattern.new(" : ".then("
598
+ end
599
+
600
+ # (see string_single_entity)
601
+ def single_entity?
602
+ string_single_entity? evaluate
603
+ end
604
+
605
+ # does this pattern contain no capturing groups
606
+ def groupless?
607
+ collect_group_attributes == []
608
+ end
609
+
610
+ # create a copy of this pattern that contains no groups
611
+ # @return [PatternBase]
612
+ def groupless
613
+ __deep_clone__.map! do |s|
614
+ s.arguments.delete(:tag_as)
615
+ s.arguments.delete(:reference)
616
+ s.arguments.delete(:includes)
617
+ raise "unable to remove capture" if s.needs_to_capture?
618
+ end.freeze
619
+ end
620
+
621
+ #
622
+ # Retags all tags_as
623
+ #
624
+ # @param [Hash] args retag options
625
+ # @option [Boolean] :all (true) should all tags be kept
626
+ # @option [Boolean] :keep (true) should all tags be kept
627
+ # @option [String] :append a string to append to all tags (implies :keep)
628
+ # @option [String] tag_as maps from an old tag_as to a new tag_as
629
+ # @option [String] reference maps from reference to a new tag_as
630
+ #
631
+ # @return [PatternBase] a copy of self retagged
632
+ #
633
+ def reTag(args)
634
+ __deep_clone__.map! do |s|
635
+ # tags are keep unless `all: false` or `keep: false`, and append is not a string
636
+ discard_tag = (args[:all] == false || args[:keep] == false)
637
+ discard_tag = false if args[:append].is_a? String
638
+
639
+ args.each do |key, tag|
640
+ if [s.arguments[:tag_as], s.arguments[:reference]].include? key
641
+ s.arguments[:tag_as] = tag
642
+ discard_tag = false
643
+ end
644
+ end
645
+
646
+ if args[:append].is_a?(String) && s.arguments[:tag_as]
647
+ s.arguments[:tag_as] = s.arguments[:tag_as] + "." + args[:append]
648
+ end
649
+
650
+ s.arguments.delete(:tag_as) if discard_tag
651
+ end.freeze
652
+ end
653
+
654
+ #
655
+ # Collects information about the capture groups
656
+ #
657
+ # @api private
658
+ #
659
+ # @param [Integer] next_group the next group number to use
660
+ #
661
+ # @return [Array<Hash>] group attributes
662
+ #
663
+ def collect_group_attributes(next_group = optimize_outer_group? ? 0 : 1)
664
+ groups = do_collect_self_groups(next_group)
665
+ next_group += groups.length
666
+ if @match.is_a? PatternBase
667
+ new_groups = @match.collect_group_attributes(next_group)
668
+ groups.concat(new_groups)
669
+ next_group += new_groups.length
670
+ end
671
+ if @next_pattern.is_a? PatternBase
672
+ new_groups = @next_pattern.collect_group_attributes(next_group)
673
+ groups.concat(new_groups)
674
+ end
675
+ groups
676
+ end
677
+
678
+ #
679
+ # Collect group information about self
680
+ #
681
+ # @param [Integer] next_group The next group number to use
682
+ #
683
+ # @return [Array<Hash>] group attributes
684
+ #
685
+ def do_collect_self_groups(next_group)
686
+ groups = []
687
+ groups << {group: next_group}.merge(@arguments) if needs_to_capture?
688
+ groups
689
+ end
690
+
691
+ #
692
+ # Displays the Pattern for inspection
693
+ #
694
+ # @return [String] A representation of the pattern
695
+ #
696
+ def inspect
697
+ super.split(" ")[0] + " match:" + @match.inspect + ">"
698
+ end
699
+
700
+ #
701
+ # Convert group references into backreferences
702
+ #
703
+ # @api private
704
+ #
705
+ # @param [Hash] groups group information for the pattern
706
+ # @param [String] self_regex the pattern as string
707
+ #
708
+ # @return [String] the fixed up regex_string
709
+ #
710
+ def fixup_regex_references(groups, self_regex)
711
+ # rubocop:disable Metrics/LineLength
712
+ references = {}
713
+ # convert all references to group numbers
714
+ groups.each do |group|
715
+ references[group[:reference]] = group[:group] unless group[:reference].nil?
716
+ end
717
+
718
+ # convert back references
719
+ self_regex = self_regex.gsub(/\(\?\#\[:backreference:([^\\]+?):\]\)/) do
720
+ match_reference = Regexp.last_match(1)
721
+ if references[match_reference].nil?
722
+ raise "\nWhen processing the matchResultOf:#{match_reference}, I couldn't find the group it was referencing"
723
+ end
724
+
725
+ # if the reference does exist, then replace it with it's number
726
+ "\\#{references[match_reference]}"
727
+ end
728
+
729
+ # check for a subroutine to the Nth group, replace it with `\N`
730
+ self_regex = self_regex.gsub(/\(\?\#\[:subroutine:([^\\]+?):\]\)/) do
731
+ match_reference = Regexp.last_match(1)
732
+ if references[match_reference].nil?
733
+ raise "\nWhen processing the recursivelyMatch:#{match_reference}, I couldn't find the group it was referencing"
734
+ end
735
+
736
+ # if the reference does exist, then replace it with it's number
737
+ "\\g<#{references[match_reference]}>"
738
+ end
739
+ # rubocop:enable Metrics/LineLength
740
+ self_regex
741
+ end
742
+
743
+ #
744
+ # Scrambles references of self
745
+ # This method provides a way to rename all references
746
+ # both actual references and references to references will be scrambled in
747
+ # some one to one mapping, all references that were unique before remain unique
748
+ #
749
+ # This must be idempotent, calling this repeatedly must have references be as if it
750
+ # was called only once, even if the pattern is cloned between calls
751
+ # this is because it may be called a different number of times depending on the nest
752
+ # level of the patterns
753
+ #
754
+ # @return [void] nothing
755
+ #
756
+ def self_scramble_references
757
+ scramble = lambda do |name|
758
+ return name if name.start_with?("__scrambled__")
759
+
760
+ "__scrambled__" + name
761
+ end
762
+
763
+ tag_as = @arguments[:tag_as]
764
+ reference = @arguments[:reference]
765
+ @arguments[:tag_as] = scramble.call(tag_as) if tag_as.is_a? String
766
+ @arguments[:reference] = scramble.call(reference) if reference.is_a? String
767
+ end
768
+
769
+ #
770
+ # Converts group attributes into a captures hash
771
+ #
772
+ # @api private
773
+ #
774
+ # @param [Hash] groups group attributes
775
+ #
776
+ # @return [Hash] capture hash
777
+ #
778
+ def convert_group_attributes_to_captures(groups)
779
+ captures = {}
780
+
781
+ groups.each do |group|
782
+ output = {}
783
+ output[:name] = group[:tag_as] unless group[:tag_as].nil?
784
+ if group[:includes].is_a? Array
785
+ output[:patterns] = convert_includes_to_patterns(group[:includes])
786
+ elsif !group[:includes].nil?
787
+ output[:patterns] = convert_includes_to_patterns([group[:includes]])
788
+ end
789
+ captures[group[:group].to_s] = output
790
+ end
791
+ # replace $match and $reference() with the appropriate capture number
792
+ captures.each do |key, value|
793
+ next if value[:name].nil?
794
+
795
+ value[:name] = value[:name].gsub(/\$(?:match|reference\((.+)\))/) do |match|
796
+ next ("$" + key) if match == "$match"
797
+
798
+ reference_group = groups.detect do |group|
799
+ group[:reference] == Regexp.last_match(1)
800
+ end
801
+ "$" + reference_group[:group].to_s
802
+ end
803
+ end
804
+ end
805
+
806
+ #
807
+ # converts an includes array into a patterns array
808
+ #
809
+ # @api private
810
+ #
811
+ # @param [Array<PatternBase, Symbol>] includes an includes array
812
+ #
813
+ # @return [Array<Hash>] a patterns array
814
+ #
815
+ def convert_includes_to_patterns(includes)
816
+ includes = [includes] unless includes.is_a? Array
817
+ patterns = includes.flatten.map do |rule|
818
+ next {include: rule} if rule.is_a?(String) && rule.start_with?("source.", "text.")
819
+ next {include: rule.to_s} if [:$self, :$base].include? rule
820
+ next {include: "##{rule}"} if rule.is_a? Symbol
821
+
822
+ rule = PatternBase.new(rule) unless rule.is_a? PatternBase
823
+ rule.to_tag
824
+ end
825
+ patterns
826
+ end
827
+
828
+ #
829
+ # Deeply clone self
830
+ #
831
+ # @return [PatternBase] a copy of self
832
+ #
833
+ def __deep_clone__
834
+ __deep_clone_self__.insert! @next_pattern.__deep_clone__
835
+ end
836
+
837
+ #
838
+ # Deeply clones self, without its next_pattern
839
+ #
840
+ # @return [PatternBase] a copy of self
841
+ #
842
+ def __deep_clone_self__
843
+ options = @arguments.__deep_clone__
844
+ options[:match] = @match.__deep_clone__
845
+ self.class.new(options, :deep_clone, @original_arguments)
846
+ end
847
+
848
+ #
849
+ # Raise an error if regex contains a capturing group
850
+ #
851
+ # @param [Regexp] regex the regexp to test
852
+ # @param [Integer] check the group to check for
853
+ #
854
+ # @return [void]
855
+ #
856
+ def raise_if_regex_has_capture_group(regex, check = 1)
857
+ # this will throw a RegexpError if there are no capturing groups
858
+ _ignore = with_no_warnings { /#{regex}#{"\\" + check.to_s}/ }
859
+ # at this point @match contains a capture group, complain
860
+ raise <<-HEREDOC.remove_indent
861
+
862
+ There is a pattern that is being constructed from a regular expression
863
+ with a capturing group. This is not allowed, as the group cannot be tracked
864
+ The bad pattern is
865
+ #{self}
866
+ HEREDOC
867
+ rescue RegexpError # rubocop: disable Lint/HandleExceptions
868
+ # no capture groups present, purposely do nothing
869
+ end
870
+ end