srl_ruby 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/LICENSE.txt +6 -1
- data/README.md +4 -1
- data/lib/srl_ruby.rb +5 -4
- data/lib/srl_ruby/ast_builder.rb +2 -1
- data/lib/srl_ruby/grammar.rb +1 -3
- data/lib/srl_ruby/tokenizer.rb +1 -2
- data/lib/srl_ruby/version.rb +1 -1
- data/spec/acceptance/srl_test_suite_spec.rb +57 -0
- data/spec/acceptance/support/rule_file_ast_builder.rb +99 -0
- data/spec/acceptance/support/rule_file_grammar.rb +41 -0
- data/spec/acceptance/support/rule_file_nodes.rb +49 -0
- data/spec/acceptance/support/rule_file_parser.rb +46 -0
- data/spec/acceptance/support/rule_file_token.rb +22 -0
- data/spec/acceptance/support/rule_file_tokenizer.rb +154 -0
- data/spec/integration_spec.rb +1 -1
- data/srl_ruby.gemspec +3 -2
- data/srl_test/README.md +12 -0
- data/srl_test/Test-Rules/README.md +56 -0
- data/srl_test/Test-Rules/backslash.rule +5 -0
- data/srl_test/Test-Rules/basename_capture_group.rule +7 -0
- data/srl_test/Test-Rules/issue_17_uppercase_letter.rule +6 -0
- data/srl_test/Test-Rules/literally_spaces.rule +4 -0
- data/srl_test/Test-Rules/no_word.rule +4 -0
- data/srl_test/Test-Rules/nondigit.rule +8 -0
- data/srl_test/Test-Rules/none_of.rule +6 -0
- data/srl_test/Test-Rules/sample_capture.rule +10 -0
- data/srl_test/Test-Rules/tab.rule +3 -0
- data/srl_test/Test-Rules/website_example_email.rule +9 -0
- data/srl_test/Test-Rules/website_example_email_capture.rule +11 -0
- data/srl_test/Test-Rules/website_example_lookahead.rule +6 -0
- data/srl_test/Test-Rules/website_example_password.rule +11 -0
- data/srl_test/Test-Rules/website_example_url.rule +38 -0
- data/srl_test/Test-Rules/word.rule +3 -0
- metadata +29 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6aad01259ac0746c8e49a856822c5e8c53aaed52
|
4
|
+
data.tar.gz: b814be25539c6304eab9471843c2eaa92abbbfc7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3f93e5d7277bafd3c4ca6449855d6a423eae6b6912d4e5543f7f2042026d6f1b56a970c3241a5152ae184b3e78a72e68ab437cdeb1e37cfc694f96c68941d4ad
|
7
|
+
data.tar.gz: f83cc31f5c5ccaeae1c14a0997d75da1eb4eed6ec3f306f822477de72c074383636d060530eb08f8176276a37870a4767a0635edf22035ed281987c88a9ff085
|
data/CHANGELOG.md
CHANGED
@@ -6,6 +6,18 @@
|
|
6
6
|
### Fixed
|
7
7
|
### Security
|
8
8
|
|
9
|
+
## [0.2.0] - 2018-03-14
|
10
|
+
### Added
|
11
|
+
- Added `spec/acceptance/support` directory. It contains test harness to use the .rule files from standard SRL test suite.
|
12
|
+
- Added `acceptance/srl_test_suite_spec.rb`file. Spec file designed to standard SRL test suite. At this date, SrlRuby passes 3 tests out of 15 tests in total.
|
13
|
+
|
14
|
+
### Changed
|
15
|
+
- API Change. Method SrlRuby#parse returns a Regexp instance (previously it was a String)
|
16
|
+
- API Change. Method SrlRuby#load_file returns a Regexp instance (previously it was a String)
|
17
|
+
|
18
|
+
### Fixed
|
19
|
+
- SRL 'backslash' produces now 4 consecutive backslashes (required by the conversion into Regexp)
|
20
|
+
|
9
21
|
## [0.1.1] - 2018-03-10
|
10
22
|
### Changed
|
11
23
|
- Parse error location is now given in line number, column number position.
|
data/LICENSE.txt
CHANGED
@@ -1,6 +1,11 @@
|
|
1
|
+
This license applies to all of srl_ruby except for the portions found under
|
2
|
+
the 'srl_test' directory, which is subject to its own license.
|
3
|
+
|
4
|
+
-----
|
5
|
+
|
1
6
|
The MIT License (MIT)
|
2
7
|
|
3
|
-
Copyright (c) 2018
|
8
|
+
Copyright (c) 2018 Dimitri Geshef
|
4
9
|
|
5
10
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
11
|
of this software and associated documentation files (the "Software"), to deal
|
data/README.md
CHANGED
@@ -69,7 +69,10 @@ And there is the equivalent regex found by `srl_ruby`:
|
|
69
69
|
|
70
70
|
|
71
71
|
## Usage
|
72
|
-
|
72
|
+
|
73
|
+
The method `SrlRuby#parse` accepts a Simple Regex Language string as input, and returns the corresponding regular expression.
|
74
|
+
|
75
|
+
For instance, the following snippet...
|
73
76
|
|
74
77
|
```ruby
|
75
78
|
require 'srl_ruby' # Load srl_ruby library
|
data/lib/srl_ruby.rb
CHANGED
@@ -5,9 +5,9 @@ require_relative './srl_ruby/ast_builder'
|
|
5
5
|
|
6
6
|
module SrlRuby # This module is used as a namespace
|
7
7
|
# Load the SRL expression contained in filename.
|
8
|
-
# Returns
|
9
|
-
# as a Ruby String.
|
8
|
+
# Returns an equivalent Regexp object.
|
10
9
|
# @param filename [String] file name to parse.
|
10
|
+
# @return [Regexp]
|
11
11
|
def self.load_file(filename)
|
12
12
|
source = nil
|
13
13
|
File.open(filename, 'r') { |f| source = f.read }
|
@@ -16,8 +16,9 @@ module SrlRuby # This module is used as a namespace
|
|
16
16
|
return parse(source)
|
17
17
|
end
|
18
18
|
|
19
|
-
# Parse the SRL expression into its
|
19
|
+
# Parse the SRL expression into its Regexp equivalent.
|
20
20
|
# @param source [String] the SRL source to parse and convert.
|
21
|
+
# @return [Regexp]
|
21
22
|
def self.parse(source)
|
22
23
|
# Create a Rley facade object
|
23
24
|
engine = Rley::Engine.new
|
@@ -41,6 +42,6 @@ module SrlRuby # This module is used as a namespace
|
|
41
42
|
|
42
43
|
# Now output the regexp literal
|
43
44
|
root = ast_ptree.root
|
44
|
-
return root.to_str
|
45
|
+
return Regexp.new(root.to_str)
|
45
46
|
end
|
46
47
|
end # module
|
data/lib/srl_ruby/ast_builder.rb
CHANGED
@@ -262,7 +262,8 @@ module SrlRuby
|
|
262
262
|
|
263
263
|
# rule('special_char' => 'BACKSLASH').as 'backslash'
|
264
264
|
def reduce_backslash(_production, _range, _tokens, _children)
|
265
|
-
|
265
|
+
# Double the basckslash (because of escaping)
|
266
|
+
string_literal("\\", true)
|
266
267
|
end
|
267
268
|
|
268
269
|
# rule('special_char' => %w[NEW LINE]).as 'new_line'
|
data/lib/srl_ruby/grammar.rb
CHANGED
@@ -2,9 +2,7 @@
|
|
2
2
|
require 'rley' # Load the gem
|
3
3
|
module SrlRuby
|
4
4
|
########################################
|
5
|
-
#
|
6
|
-
# This is a very partial grammar of SRL.
|
7
|
-
# It will be expanded with the coming versions of Rley
|
5
|
+
# SRL grammar
|
8
6
|
builder = Rley::Syntax::GrammarBuilder.new do
|
9
7
|
add_terminals('LPAREN', 'RPAREN', 'COMMA')
|
10
8
|
add_terminals('DIGIT_LIT', 'INTEGER', 'LETTER_LIT')
|
data/lib/srl_ruby/tokenizer.rb
CHANGED
@@ -16,7 +16,7 @@ module SrlRuby
|
|
16
16
|
attr_reader(:scanner)
|
17
17
|
attr_reader(:lineno)
|
18
18
|
attr_reader(:line_start)
|
19
|
-
attr_reader(:column)
|
19
|
+
# attr_reader(:column)
|
20
20
|
|
21
21
|
@@lexeme2name = {
|
22
22
|
'(' => 'LPAREN',
|
@@ -174,6 +174,5 @@ module SrlRuby
|
|
174
174
|
def tab_size()
|
175
175
|
2
|
176
176
|
end
|
177
|
-
|
178
177
|
end # class
|
179
178
|
end # module
|
data/lib/srl_ruby/version.rb
CHANGED
@@ -0,0 +1,57 @@
|
|
1
|
+
require_relative '../spec_helper'
|
2
|
+
require_relative './support/rule_file_parser'
|
3
|
+
require_relative '../../lib/srl_ruby'
|
4
|
+
|
5
|
+
##############################
|
6
|
+
# Understand how parser fails when first rule begins with %[...] instead of %w[...]
|
7
|
+
##############################
|
8
|
+
|
9
|
+
RSpec.describe Acceptance do
|
10
|
+
def rule_path
|
11
|
+
__FILE__.sub(/spec\/.+$/, 'srl_test/Test-Rules/')
|
12
|
+
end
|
13
|
+
|
14
|
+
def load_file(aFilename)
|
15
|
+
return Acceptance::RuleFileParser.load_file(rule_path + aFilename)
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_rule_file(aRuleFileRepr)
|
19
|
+
regex = SrlRuby::parse(aRuleFileRepr.srl.value)
|
20
|
+
expect(regex).not_to be_nil
|
21
|
+
|
22
|
+
aRuleFileRepr.match_tests.each do |test|
|
23
|
+
expect(regex.match(test.test_string.value)).not_to be_nil
|
24
|
+
end
|
25
|
+
aRuleFileRepr.no_match_tests.each do |test|
|
26
|
+
expect(regex.match(test.test_string.value)).to be_nil
|
27
|
+
end
|
28
|
+
aRuleFileRepr.capture_tests.each do |test|
|
29
|
+
matching = regex.match(test.test_string.value)
|
30
|
+
expect(matching).not_to be_nil
|
31
|
+
test.expectations do |exp|
|
32
|
+
var = exp.var_name.value.to_s
|
33
|
+
captured = exp.captured_text.value
|
34
|
+
name_index = matching.names.index(var)
|
35
|
+
expect(name_index).not_to be_nil
|
36
|
+
expect(matching.captures[name_index]).to eq(captured)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
it 'should match a backslash' do
|
42
|
+
puts __FILE__
|
43
|
+
rule_file_repr = load_file('backslash.rule')
|
44
|
+
test_rule_file(rule_file_repr)
|
45
|
+
end
|
46
|
+
|
47
|
+
it 'should not trim literal strings' do
|
48
|
+
rule_file_repr = load_file('literally_spaces.rule')
|
49
|
+
test_rule_file(rule_file_repr)
|
50
|
+
end
|
51
|
+
|
52
|
+
it 'should support lookahead' do
|
53
|
+
rule_file_repr = load_file('website_example_lookahead.rule')
|
54
|
+
test_rule_file(rule_file_repr)
|
55
|
+
end
|
56
|
+
|
57
|
+
end
|
@@ -0,0 +1,99 @@
|
|
1
|
+
require_relative 'rule_file_nodes'
|
2
|
+
|
3
|
+
module Acceptance
|
4
|
+
# The purpose of a ASTBuilder is to build piece by piece an AST
|
5
|
+
# (Abstract Syntax Tree) from a sequence of input tokens and
|
6
|
+
# visit events produced by walking over a GFGParsing object.
|
7
|
+
# Uses the Builder GoF pattern.
|
8
|
+
# The Builder pattern creates a complex object
|
9
|
+
# (say, a parse tree) from simpler objects (terminal and non-terminal
|
10
|
+
# nodes) and using a step by step approach.
|
11
|
+
class RuleFileASTBuilder < Rley::ParseRep::ASTBaseBuilder
|
12
|
+
Terminal2NodeClass = {
|
13
|
+
# Lexical ambiguity: integer literal represents two very different concepts:
|
14
|
+
# An index or a capture variable name
|
15
|
+
'INTEGER' => IntegerNode,
|
16
|
+
'STRING_LIT' => StringLitNode,
|
17
|
+
'IDENTIFIER' => VarnameNode,
|
18
|
+
'SRL_SOURCE' => SRLSourceNode
|
19
|
+
}.freeze
|
20
|
+
|
21
|
+
attr_reader :options
|
22
|
+
|
23
|
+
protected
|
24
|
+
|
25
|
+
def terminal2node()
|
26
|
+
Terminal2NodeClass
|
27
|
+
end
|
28
|
+
|
29
|
+
# rule('rule_file' => %w[srl_heading srl_tests]).as 'start_rule'
|
30
|
+
def reduce_start_rule(_production, _range, _tokens, theChildren)
|
31
|
+
rule_file = RuleFileTests.new(theChildren[0])
|
32
|
+
tests = theChildren.last.flatten
|
33
|
+
tests.each do |t|
|
34
|
+
case t
|
35
|
+
when MatchTest then rule_file.match_tests << t
|
36
|
+
when NoMatchTest then rule_file.no_match_tests << t
|
37
|
+
when CaptureTest then rule_file.capture_tests << t
|
38
|
+
else
|
39
|
+
raise StandardError, 'Internal error'
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
return rule_file
|
44
|
+
end
|
45
|
+
|
46
|
+
# rule('srl_heading' => %w[SRL: SRL_SOURCE]).as 'srl_source'
|
47
|
+
def reduce_srl_source(_production, _range, _tokens, theChildren)
|
48
|
+
return theChildren.last
|
49
|
+
end
|
50
|
+
|
51
|
+
# rule('srl_tests' => %w[srl_tests single_test]).as 'test_list'
|
52
|
+
def reduce_test_list(_production, _range, _tokens, theChildren)
|
53
|
+
return theChildren[0] << theChildren[1]
|
54
|
+
end
|
55
|
+
|
56
|
+
# rule('srl_tests' => 'single_test').as 'one_test'
|
57
|
+
def reduce_one_test(_production, _range, _tokens, theChildren)
|
58
|
+
return [theChildren.last]
|
59
|
+
end
|
60
|
+
|
61
|
+
# rule('match_test' => %w[MATCH: STRING_LIT]).as 'match_string'
|
62
|
+
def reduce_match_string(_production, _range, _tokens, theChildren)
|
63
|
+
MatchTest.new(theChildren.last)
|
64
|
+
end
|
65
|
+
|
66
|
+
# rule('no_match_test' => %w[NO MATCH: STRING_LIT]).as 'no_match_string'
|
67
|
+
def reduce_no_match_string(_production, _range, _tokens, theChildren)
|
68
|
+
NoMatchTest.new(theChildren.last)
|
69
|
+
end
|
70
|
+
|
71
|
+
# rule('capture_test' => %w[capture_heading capture_expectations])
|
72
|
+
# .as 'capture_test'
|
73
|
+
def reduce_capture_test(_production, _range, _tokens, theChildren)
|
74
|
+
CaptureTest.new(theChildren[0], theChildren.last)
|
75
|
+
end
|
76
|
+
|
77
|
+
# rule('capture_heading' => %w[CAPTURE FOR STRING_LIT COLON]).as 'capture_string'
|
78
|
+
def reduce_capture_string(_production, _range, _tokens, theChildren)
|
79
|
+
return theChildren[2]
|
80
|
+
end
|
81
|
+
|
82
|
+
# rule('capture_expectations' => %w[capture_expectations
|
83
|
+
# single_expectation]).as 'assertion_list'
|
84
|
+
def reduce_assertion_list(_production, _range, _tokens, theChildren)
|
85
|
+
return theChildren[0] << theChildren[1]
|
86
|
+
end
|
87
|
+
|
88
|
+
# rule('capture_expectations' => 'single_expectation').as 'one_expectation'
|
89
|
+
def reduce_one_expectation(_production, _range, _tokens, theChildren)
|
90
|
+
return [theChildren.last]
|
91
|
+
end
|
92
|
+
|
93
|
+
# rule('single_expectation' => %w[DASH INTEGER COLON capture_variable
|
94
|
+
# COLON STRING_LIT]).as 'capture_expectation'
|
95
|
+
def reduce_capture_expectation(_production, _range, _tokens, theChildren)
|
96
|
+
CaptureExpectation.new(theChildren[1], theChildren[3], theChildren[5])
|
97
|
+
end
|
98
|
+
end # class
|
99
|
+
end # module
|
@@ -0,0 +1,41 @@
|
|
1
|
+
# File: rule_file_grammar.rb
|
2
|
+
require 'rley' # Load the Rley gem
|
3
|
+
|
4
|
+
# Grammar for Test-Rule files
|
5
|
+
# [File format](https://github.com/SimpleRegex/Test-Rules/blob/master/README.md)
|
6
|
+
########################################
|
7
|
+
# Define a grammar for basic arithmetical expressions
|
8
|
+
builder = Rley::Syntax::GrammarBuilder.new do
|
9
|
+
# Punctuation
|
10
|
+
add_terminals('COLON', 'DASH')
|
11
|
+
|
12
|
+
# Keywords
|
13
|
+
add_terminals('CAPTURE', 'FOR')
|
14
|
+
add_terminals('MATCH:', 'NO', 'SRL:')
|
15
|
+
|
16
|
+
# Literals
|
17
|
+
add_terminals('INTEGER', 'STRING_LIT')
|
18
|
+
add_terminals('IDENTIFIER', 'SRL_SOURCE')
|
19
|
+
|
20
|
+
rule('rule_file' => %w[srl_heading srl_tests]).as 'start_rule'
|
21
|
+
rule('srl_heading' => %w[SRL: SRL_SOURCE]).as 'srl_source'
|
22
|
+
rule('srl_tests' => %w[srl_tests single_test]).as 'test_list'
|
23
|
+
rule('srl_tests' => 'single_test').as 'one_test'
|
24
|
+
rule('single_test' => 'atomic_test').as 'single_atomic_test'
|
25
|
+
rule('single_test' => 'compound_test').as 'single_compound_test'
|
26
|
+
rule('atomic_test' => 'match_test').as 'atomic_match'
|
27
|
+
rule('atomic_test' => 'no_match_test').as 'atomic_no_match'
|
28
|
+
rule('compound_test' => 'capture_test').as 'compound_capture'
|
29
|
+
rule('match_test' => %w[MATCH: STRING_LIT]).as 'match_string'
|
30
|
+
rule('no_match_test' => %w[NO MATCH: STRING_LIT]).as 'no_match_string'
|
31
|
+
rule('capture_test' => %w[capture_heading capture_expectations]).as 'capture_test'
|
32
|
+
rule('capture_heading' => %w[CAPTURE FOR STRING_LIT COLON]).as 'capture_string'
|
33
|
+
rule('capture_expectations' => %w[capture_expectations single_expectation]).as 'assertion_list'
|
34
|
+
rule('capture_expectations' => 'single_expectation').as 'one_expectation'
|
35
|
+
rule('single_expectation' => %w[DASH INTEGER COLON capture_variable COLON STRING_LIT]).as 'capture_expectation'
|
36
|
+
rule('capture_variable' => 'INTEGER').as 'var_integer'
|
37
|
+
rule('capture_variable' => 'IDENTIFIER').as 'var_identifier'
|
38
|
+
end
|
39
|
+
|
40
|
+
# And now build the grammar...
|
41
|
+
RuleFileGrammar = builder.grammar
|
@@ -0,0 +1,49 @@
|
|
1
|
+
# Classes that implement nodes of Abstract Syntax Trees (AST) representing
|
2
|
+
# rule file contents.
|
3
|
+
|
4
|
+
module Acceptance
|
5
|
+
RuleFileTerminalNode = Struct.new(:value) do
|
6
|
+
def initialize(aToken, _position)
|
7
|
+
init_value(aToken.lexeme)
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
class IntegerNode < RuleFileTerminalNode
|
12
|
+
def init_value(aLiteral)
|
13
|
+
self.value = aLiteral.to_i
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
class StringLitNode < RuleFileTerminalNode
|
18
|
+
def init_value(aLiteral)
|
19
|
+
self.value = aLiteral.dup
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
class SRLSourceNode < RuleFileTerminalNode
|
24
|
+
def init_value(aLiteral)
|
25
|
+
self.value = aLiteral.dup
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
class VarnameNode < RuleFileTerminalNode
|
30
|
+
def init_value(aLiteral)
|
31
|
+
self.value = aLiteral.dup
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
RuleFileTests = Struct.new(:srl, :match_tests, :no_match_tests, :capture_tests) do
|
36
|
+
def initialize(aSRLExpression)
|
37
|
+
self.srl = aSRLExpression.dup
|
38
|
+
self.match_tests = []
|
39
|
+
self.no_match_tests = []
|
40
|
+
self.capture_tests = []
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
MatchTest = Struct.new(:test_string)
|
45
|
+
NoMatchTest = Struct.new(:test_string)
|
46
|
+
|
47
|
+
CaptureExpectation = Struct.new(:result_index, :var_name, :captured_text)
|
48
|
+
CaptureTest = Struct.new(:test_string, :expectations)
|
49
|
+
end # module
|
@@ -0,0 +1,46 @@
|
|
1
|
+
require_relative 'rule_file_tokenizer'
|
2
|
+
require_relative 'rule_file_grammar'
|
3
|
+
require_relative 'rule_file_ast_builder'
|
4
|
+
|
5
|
+
module Acceptance # This module is used as a namespace
|
6
|
+
module RuleFileParser
|
7
|
+
# Load the rule file
|
8
|
+
# Returns the test rule representation
|
9
|
+
# @param filename [String] file name to parse.
|
10
|
+
def self.load_file(filename)
|
11
|
+
source = nil
|
12
|
+
File.open(filename, 'r') { |f| source = f.read }
|
13
|
+
return source if source.nil? || source.empty?
|
14
|
+
|
15
|
+
return parse(source)
|
16
|
+
end
|
17
|
+
|
18
|
+
# Parse the rule file
|
19
|
+
# @param source [String] the SRL source to parse and convert.
|
20
|
+
def self.parse(source)
|
21
|
+
# Create a Rley facade object
|
22
|
+
engine = Rley::Engine.new
|
23
|
+
|
24
|
+
# Step 1. Load SRL grammar
|
25
|
+
engine.use_grammar(RuleFileGrammar)
|
26
|
+
|
27
|
+
lexer = RuleFileTokenizer.new(source)
|
28
|
+
result = engine.parse(lexer.tokens)
|
29
|
+
|
30
|
+
unless result.success?
|
31
|
+
# Stop if the parse failed...
|
32
|
+
line1 = "Parsing failed\n"
|
33
|
+
line2 = "Reason: #{result.failure_reason.message}"
|
34
|
+
raise StandardError, line1 + line2
|
35
|
+
end
|
36
|
+
|
37
|
+
# Generate an abstract syntax tree (AST) from the parse result
|
38
|
+
engine.configuration.repr_builder = RuleFileASTBuilder
|
39
|
+
ast_ptree = engine.convert(result)
|
40
|
+
|
41
|
+
# Now output the regexp literal
|
42
|
+
root = ast_ptree.root
|
43
|
+
return root
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end # module
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'rley' # Load the Rley gem
|
2
|
+
|
3
|
+
module Acceptance
|
4
|
+
Position = Struct.new(:line, :column) do
|
5
|
+
def to_s()
|
6
|
+
"line #{line}, column #{column}"
|
7
|
+
end
|
8
|
+
end
|
9
|
+
|
10
|
+
# Specialization of Token class.
|
11
|
+
# It stores the position in (line, row) of the token
|
12
|
+
class RuleFileToken < Rley::Lexical::Token
|
13
|
+
attr_reader(:position)
|
14
|
+
|
15
|
+
def initialize(theLexeme, aTerminal, aPosition)
|
16
|
+
super(theLexeme, aTerminal)
|
17
|
+
@position = aPosition
|
18
|
+
end
|
19
|
+
end # class
|
20
|
+
end # module
|
21
|
+
|
22
|
+
# End of file
|
@@ -0,0 +1,154 @@
|
|
1
|
+
# File: rule_tokenizer.rb
|
2
|
+
# Tokenizer for SimpleRegex Test-Rule files
|
3
|
+
# [File format](https://github.com/SimpleRegex/Test-Rules/blob/master/README.md)
|
4
|
+
require 'strscan'
|
5
|
+
require 'pp'
|
6
|
+
require_relative 'rule_file_token'
|
7
|
+
|
8
|
+
module Acceptance
|
9
|
+
# The tokenizer should recognize:
|
10
|
+
# Keywords: as, capture, letter
|
11
|
+
# Integer literals including single digit
|
12
|
+
# String literals (quote delimited)
|
13
|
+
# Single character literal
|
14
|
+
# Delimiters: parentheses '(' and ')'
|
15
|
+
# Separators: comma (optional)
|
16
|
+
class RuleFileTokenizer
|
17
|
+
attr_reader(:scanner)
|
18
|
+
attr_reader(:lineno)
|
19
|
+
attr_reader(:line_start)
|
20
|
+
|
21
|
+
# Can be :default, :expecting_srl
|
22
|
+
attr_reader(:state)
|
23
|
+
|
24
|
+
@@lexeme2name = {
|
25
|
+
':' => 'COLON',
|
26
|
+
'-' => 'DASH'
|
27
|
+
}.freeze
|
28
|
+
|
29
|
+
# Here are all the Rule file keywords
|
30
|
+
@@keywords = %w[
|
31
|
+
capture
|
32
|
+
for
|
33
|
+
match:
|
34
|
+
no
|
35
|
+
srl:
|
36
|
+
].map { |x| [x, x.upcase] }.to_h
|
37
|
+
|
38
|
+
class ScanError < StandardError; end
|
39
|
+
|
40
|
+
def initialize(source)
|
41
|
+
@scanner = StringScanner.new(source)
|
42
|
+
@lineno = 1
|
43
|
+
@line_start = 0
|
44
|
+
@state = :default
|
45
|
+
end
|
46
|
+
|
47
|
+
def tokens()
|
48
|
+
tok_sequence = []
|
49
|
+
until @scanner.eos?
|
50
|
+
token = _next_token
|
51
|
+
tok_sequence << token unless token.nil?
|
52
|
+
end
|
53
|
+
|
54
|
+
return tok_sequence
|
55
|
+
end
|
56
|
+
|
57
|
+
private
|
58
|
+
|
59
|
+
def _next_token()
|
60
|
+
skip_noise
|
61
|
+
curr_ch = scanner.peek(1)
|
62
|
+
return nil if curr_ch.nil? || curr_ch.empty?
|
63
|
+
|
64
|
+
token = if state == :default
|
65
|
+
default_mode
|
66
|
+
else
|
67
|
+
expecting_srl
|
68
|
+
end
|
69
|
+
|
70
|
+
return token
|
71
|
+
end
|
72
|
+
|
73
|
+
def default_mode()
|
74
|
+
curr_ch = scanner.peek(1)
|
75
|
+
token = nil
|
76
|
+
|
77
|
+
|
78
|
+
if '-:'.include? curr_ch
|
79
|
+
# Delimiters, separators => single character token
|
80
|
+
token = build_token(@@lexeme2name[curr_ch], scanner.getch)
|
81
|
+
elsif (lexeme = scanner.scan(/[0-9]+/))
|
82
|
+
token = build_token('INTEGER', lexeme)
|
83
|
+
elsif (lexeme = scanner.scan(/srl:|match:/))
|
84
|
+
token = build_token(@@keywords[lexeme], lexeme)
|
85
|
+
@state = :expecting_srl if lexeme == 'srl:'
|
86
|
+
elsif (lexeme = scanner.scan(/[a-zA-Z_][a-zA-Z0-9_]*/))
|
87
|
+
keyw = @@keywords[lexeme]
|
88
|
+
token_type = keyw ? keyw : 'IDENTIFIER'
|
89
|
+
token = build_token(token_type, lexeme)
|
90
|
+
elsif (lexeme = scanner.scan(/"([^"]|\\")*"/)) # Double quotes literal?
|
91
|
+
unquoted = lexeme.gsub(/(^")|("$)/, '')
|
92
|
+
token = build_token('STRING_LIT', unquoted)
|
93
|
+
else # Unknown token
|
94
|
+
erroneous = curr_ch.nil? ? '' : curr_ch
|
95
|
+
sequel = scanner.scan(/.{1,20}/)
|
96
|
+
erroneous += sequel unless sequel.nil?
|
97
|
+
raise ScanError.new("Unknown token #{erroneous}")
|
98
|
+
end
|
99
|
+
|
100
|
+
return token
|
101
|
+
end
|
102
|
+
|
103
|
+
def expecting_srl()
|
104
|
+
scanner.skip(/^:/)
|
105
|
+
lexeme = scanner.scan(/[^\r\n]*/)
|
106
|
+
@state = :default
|
107
|
+
build_token('SRL_SOURCE', lexeme)
|
108
|
+
end
|
109
|
+
|
110
|
+
def build_token(aSymbolName, aLexeme)
|
111
|
+
begin
|
112
|
+
col = scanner.pos - aLexeme.size - @line_start + 1
|
113
|
+
pos = Position.new(@lineno, col)
|
114
|
+
token = RuleFileToken.new(aLexeme, aSymbolName, pos)
|
115
|
+
rescue StandardError => exc
|
116
|
+
puts "Failing with '#{aSymbolName}' and '#{aLexeme}'"
|
117
|
+
raise exc
|
118
|
+
end
|
119
|
+
|
120
|
+
return token
|
121
|
+
end
|
122
|
+
|
123
|
+
def skip_noise()
|
124
|
+
begin
|
125
|
+
noise_found = false
|
126
|
+
noise_found = true if skip_whitespaces
|
127
|
+
noise_found = true if skip_comment
|
128
|
+
end while noise_found
|
129
|
+
end
|
130
|
+
|
131
|
+
def skip_whitespaces()
|
132
|
+
pre_pos = scanner.pos
|
133
|
+
|
134
|
+
begin
|
135
|
+
ws_found = false
|
136
|
+
found = scanner.skip(/[ \t\f]+/)
|
137
|
+
ws_found = true if found
|
138
|
+
found = scanner.skip(/(?:\r\n)|\r|\n/)
|
139
|
+
if found
|
140
|
+
ws_found = true
|
141
|
+
@lineno += 1
|
142
|
+
@line_start = scanner.pos
|
143
|
+
end
|
144
|
+
end while ws_found
|
145
|
+
|
146
|
+
curr_pos = scanner.pos
|
147
|
+
return !(curr_pos == pre_pos)
|
148
|
+
end
|
149
|
+
|
150
|
+
def skip_comment()
|
151
|
+
scanner.skip(/#[^\n\r]+/)
|
152
|
+
end
|
153
|
+
end # class
|
154
|
+
end # module
|
data/spec/integration_spec.rb
CHANGED
data/srl_ruby.gemspec
CHANGED
@@ -18,7 +18,8 @@ module PkgExtending
|
|
18
18
|
'srl_ruby.gemspec',
|
19
19
|
'lib/*.*',
|
20
20
|
'lib/**/*.rb',
|
21
|
-
'spec/**/*.rb'
|
21
|
+
'spec/**/*.rb',
|
22
|
+
'srl_test/**/*.*'
|
22
23
|
]
|
23
24
|
aPackage.files = file_list
|
24
25
|
aPackage.test_files = Dir['spec/**/*_spec.rb']
|
@@ -54,7 +55,7 @@ END_DESCR
|
|
54
55
|
spec.required_ruby_version = '>= 2.1.0'
|
55
56
|
|
56
57
|
# Runtime dependencies
|
57
|
-
spec.add_dependency 'rley', '~> 0.6.
|
58
|
+
spec.add_dependency 'rley', '~> 0.6.04'
|
58
59
|
|
59
60
|
# Development dependencies
|
60
61
|
spec.add_development_dependency 'bundler', '~> 1.16'
|
data/srl_test/README.md
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
The files found under this directory are extracted directly from the
|
2
|
+
[Test-Rules](ttps://github.com/SimpleRegex/Test-Rules.) test harness
|
3
|
+
used for verifying the SRL implementation, the most recent version
|
4
|
+
of which is available at https://github.com/SimpleRegex/Test-Rules.
|
5
|
+
|
6
|
+
With the exception of this README.md, all of the files are Copyright (c)
|
7
|
+
2016-2018 Karim Geigier and released under the MIT license. Please see
|
8
|
+
Test-Rules/License text file for details.
|
9
|
+
|
10
|
+
Directory contents:
|
11
|
+
README.txt -- this file
|
12
|
+
Test-Rules/ -- files extracted directly from the SimpleRegex project.
|
@@ -0,0 +1,56 @@
|
|
1
|
+
# Test Rules
|
2
|
+
|
3
|
+
Test rules are made to verify that your implementation of SRL is valid.
|
4
|
+
These files contain simple tests to validate the SRL and the
|
5
|
+
corresponding results. The structure is easy to understand and implement.
|
6
|
+
|
7
|
+
## Structure of a .rule File
|
8
|
+
|
9
|
+
These rules are required to build valid test rules:
|
10
|
+
|
11
|
+
* All files used for testing must end with the extension `.rule` and at
|
12
|
+
least contain one valid assertion along with the SRL query.
|
13
|
+
* The query is defined through `srl: ` on the beginning of a line.
|
14
|
+
* All strings that should match are defined through `match: ` on the
|
15
|
+
beginning of a line.
|
16
|
+
* There can be unlimited `match: ` lines per rule.
|
17
|
+
* Each match must be surrounded by `"`.
|
18
|
+
* All strings that should **not** match are defined through `no match: `
|
19
|
+
on the beginning of a line.
|
20
|
+
* There can be unlimited `no match: ` lines per rule.
|
21
|
+
* Each match must be surrounded by `"`.
|
22
|
+
* If a capture group is defined, its result can be defined as follows:
|
23
|
+
* The line must begin with `capture for `.
|
24
|
+
* Surrounded by `"`, the test string to match must be provided, followed by a `: `.
|
25
|
+
* If a named group is desired, use the following syntax: `name: "result"`
|
26
|
+
* If a anonymous group is desired, just supply `"result"`.
|
27
|
+
* Separate multiple captures using `, `.
|
28
|
+
* If one expression returns multiple matches, supply the same test string in the second line.
|
29
|
+
* The query as well as the expectations must not exceed one line.
|
30
|
+
If required, new lines can be forced using `\n`. Tabs using `\t`.
|
31
|
+
* Comments must be on a separate line and start with a `#`.
|
32
|
+
|
33
|
+
## Example .rule Files
|
34
|
+
|
35
|
+
```
|
36
|
+
# This is a sample rule with a named capture group
|
37
|
+
srl: capture (letter twice) as "foo"
|
38
|
+
capture for "aa1":
|
39
|
+
- 0: foo: "aa"
|
40
|
+
match: "example"
|
41
|
+
match: "aa2"
|
42
|
+
no match: "a"
|
43
|
+
```
|
44
|
+
|
45
|
+
```
|
46
|
+
# This is a sample rule with an anonymous capture group and multiple results
|
47
|
+
srl: capture (digit)
|
48
|
+
capture for "123":
|
49
|
+
- 0: 0: "1"
|
50
|
+
- 1: 0: "2"
|
51
|
+
- 2: 0: "3"
|
52
|
+
|
53
|
+
capture for "01":
|
54
|
+
- 0: 0: "0"
|
55
|
+
- 1: 0: "1"
|
56
|
+
```
|
@@ -0,0 +1,9 @@
|
|
1
|
+
srl: begin with any of (digit, letter, one of "._%+-") once or more, literally "@", any of (digit, letter, one of ".-") once or more, literally ".", letter at least 2 times, must end, case insensitive
|
2
|
+
match: "you@example.com"
|
3
|
+
match: "you@example.email"
|
4
|
+
match: "me@foo.bar.email"
|
5
|
+
no match: "you@example.c"
|
6
|
+
no match: "you@example"
|
7
|
+
no match: "you@.com"
|
8
|
+
no match: "@example.com"
|
9
|
+
no match: "example.com"
|
@@ -0,0 +1,11 @@
|
|
1
|
+
srl: capture (any of (digit, letter, one of "._%+-") once or more) as "local", literally "@", capture (any of (digit, letter, one of ".-") once or more, literally ".", letter at least 2 times ) as "domain", case insensitive
|
2
|
+
match: "you@example.email, me@you.com"
|
3
|
+
no match: "you@example.c"
|
4
|
+
no match: "just some text"
|
5
|
+
no match: "example.com"
|
6
|
+
|
7
|
+
capture for "Message me at you@example.com. Business email: business@awesome.email":
|
8
|
+
- 0: local: "you"
|
9
|
+
- 0: domain: "example.com"
|
10
|
+
- 1: local: "business"
|
11
|
+
- 1: domain: "awesome.email"
|
@@ -0,0 +1,6 @@
|
|
1
|
+
srl: capture (digit) if not followed by (anything once or more, digit)
|
2
|
+
match: "This example contains 3 numbers. 2 should not match. Only 1 should."
|
3
|
+
no match: "some string without numbers"
|
4
|
+
|
5
|
+
capture for "This example contains 3 numbers. 2 should not match. Only 1 should.":
|
6
|
+
- 0: 0: "1"
|
@@ -0,0 +1,11 @@
|
|
1
|
+
srl: if followed by (anything never or more, letter), if followed by (anything never or more, uppercase letter), if followed by (anything never or more, digit), if followed by (anything never or more, one of "!@#$%^&*[]\"';:_-<>., =+/\\"), anything at least 8 time
|
2
|
+
match: "P@sSword1"
|
3
|
+
match: "Pass-w0rd"
|
4
|
+
match: "Th1s is Secure"
|
5
|
+
no match: "Password"
|
6
|
+
no match: "P@sS1"
|
7
|
+
no match: "justalongpassword"
|
8
|
+
no match: "m1ss1ng upper"
|
9
|
+
no match: "missing Number"
|
10
|
+
no match: "M1SS1NG LOWER"
|
11
|
+
no match: "m1ss1ngSpec1al"
|
@@ -0,0 +1,38 @@
|
|
1
|
+
srl: begin with capture (letter once or more) as "protocol", literally "://", capture ( letter once or more, any of (letter, literally ".") once or more, letter at least 2 times ) as "domain", literally ":" optional, capture (digit once or more) as "port" optional, capture (anything never or more) as "path" until (any of (literally "?", must end)), literally "?" optional, capture (anything never or more) as "parameters" optional, must end, case insensitive
|
2
|
+
match: "https://example.domain.com:1234/a/path?query=param"
|
3
|
+
match: "http://domain.com?query=param"
|
4
|
+
match: "http://domain.com/"
|
5
|
+
match: "http://domain.com"
|
6
|
+
match: "http://domain/foo/?bar=baz"
|
7
|
+
no match: "you@example.com"
|
8
|
+
no match: "domain.com"
|
9
|
+
no match: "://domain.com"
|
10
|
+
no match: "http://"
|
11
|
+
|
12
|
+
capture for "https://example.domain.com:1234/a/path?query=param":
|
13
|
+
- 0: protocol: "https"
|
14
|
+
- 0: domain: "example.domain.com"
|
15
|
+
- 0: port: "1234"
|
16
|
+
- 0: path: "/a/path"
|
17
|
+
- 0: parameters: "query=param"
|
18
|
+
|
19
|
+
capture for "https://example.domain.com:1234/a/path":
|
20
|
+
- 0: protocol: "https"
|
21
|
+
- 0: domain: "example.domain.com"
|
22
|
+
- 0: port: "1234"
|
23
|
+
- 0: path: "/a/path"
|
24
|
+
- 0: parameters: ""
|
25
|
+
|
26
|
+
capture for "protocol://domain/a/path":
|
27
|
+
- 0: protocol: "protocol"
|
28
|
+
- 0: domain: "domain"
|
29
|
+
- 0: port: ""
|
30
|
+
- 0: path: "/a/path"
|
31
|
+
- 0: parameters: ""
|
32
|
+
|
33
|
+
capture for "http://domain.com":
|
34
|
+
- 0: protocol: "http"
|
35
|
+
- 0: domain: "domain.com"
|
36
|
+
- 0: port: ""
|
37
|
+
- 0: path: ""
|
38
|
+
- 0: parameters: ""
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: srl_ruby
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dimitri Geshef
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-03-
|
11
|
+
date: 2018-03-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rley
|
@@ -16,14 +16,14 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: 0.6.
|
19
|
+
version: 0.6.04
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: 0.6.
|
26
|
+
version: 0.6.04
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: bundler
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -113,6 +113,13 @@ files:
|
|
113
113
|
- lib/srl_ruby/srl_token.rb
|
114
114
|
- lib/srl_ruby/tokenizer.rb
|
115
115
|
- lib/srl_ruby/version.rb
|
116
|
+
- spec/acceptance/srl_test_suite_spec.rb
|
117
|
+
- spec/acceptance/support/rule_file_ast_builder.rb
|
118
|
+
- spec/acceptance/support/rule_file_grammar.rb
|
119
|
+
- spec/acceptance/support/rule_file_nodes.rb
|
120
|
+
- spec/acceptance/support/rule_file_parser.rb
|
121
|
+
- spec/acceptance/support/rule_file_token.rb
|
122
|
+
- spec/acceptance/support/rule_file_tokenizer.rb
|
116
123
|
- spec/integration_spec.rb
|
117
124
|
- spec/regex/character_spec.rb
|
118
125
|
- spec/regex/multiplicity_spec.rb
|
@@ -120,6 +127,23 @@ files:
|
|
120
127
|
- spec/srl_ruby/srl_ruby_spec.rb
|
121
128
|
- spec/srl_ruby/tokenizer_spec.rb
|
122
129
|
- srl_ruby.gemspec
|
130
|
+
- srl_test/README.md
|
131
|
+
- srl_test/Test-Rules/README.md
|
132
|
+
- srl_test/Test-Rules/backslash.rule
|
133
|
+
- srl_test/Test-Rules/basename_capture_group.rule
|
134
|
+
- srl_test/Test-Rules/issue_17_uppercase_letter.rule
|
135
|
+
- srl_test/Test-Rules/literally_spaces.rule
|
136
|
+
- srl_test/Test-Rules/no_word.rule
|
137
|
+
- srl_test/Test-Rules/nondigit.rule
|
138
|
+
- srl_test/Test-Rules/none_of.rule
|
139
|
+
- srl_test/Test-Rules/sample_capture.rule
|
140
|
+
- srl_test/Test-Rules/tab.rule
|
141
|
+
- srl_test/Test-Rules/website_example_email.rule
|
142
|
+
- srl_test/Test-Rules/website_example_email_capture.rule
|
143
|
+
- srl_test/Test-Rules/website_example_lookahead.rule
|
144
|
+
- srl_test/Test-Rules/website_example_password.rule
|
145
|
+
- srl_test/Test-Rules/website_example_url.rule
|
146
|
+
- srl_test/Test-Rules/word.rule
|
123
147
|
homepage: https://github.com/famished-tiger/SRL-Ruby
|
124
148
|
licenses:
|
125
149
|
- MIT
|
@@ -148,6 +172,7 @@ summary: srl_ruby is a gem implementing a parser for Simple Regex Language (SRL)
|
|
148
172
|
It translates patterns expressed in SRL into plain Ruby Regexp objects or regex
|
149
173
|
literals.
|
150
174
|
test_files:
|
175
|
+
- spec/acceptance/srl_test_suite_spec.rb
|
151
176
|
- spec/integration_spec.rb
|
152
177
|
- spec/regex/character_spec.rb
|
153
178
|
- spec/regex/multiplicity_spec.rb
|