jgrep 1.4.0 → 1.5.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/CHANGELOG.markdown +18 -0
- data/README.markdown +13 -12
- data/Rakefile +6 -1
- data/bin/jgrep +115 -126
- data/lib/jgrep.rb +286 -377
- data/lib/parser/parser.rb +109 -125
- data/lib/parser/scanner.rb +148 -149
- data/spec/Rakefile +3 -3
- data/spec/spec_helper.rb +1 -2
- data/spec/unit/jgrep_spec.rb +239 -233
- data/spec/unit/parser_spec.rb +132 -127
- data/spec/unit/scanner_spec.rb +88 -86
- metadata +6 -19
data/lib/parser/parser.rb
CHANGED
@@ -1,132 +1,116 @@
|
|
1
1
|
module JGrep
|
2
|
-
|
3
|
-
|
2
|
+
class Parser
|
3
|
+
attr_reader :scanner, :execution_stack
|
4
4
|
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
5
|
+
def initialize(args)
|
6
|
+
@scanner = Scanner.new(args)
|
7
|
+
@execution_stack = []
|
8
|
+
|
9
|
+
parse
|
10
|
+
end
|
11
|
+
|
12
|
+
# Parse the input string, one token at a time a contruct the call stack
|
13
|
+
def parse(substatement = nil, token_index = 0)
|
14
|
+
p_token = nil
|
15
|
+
|
16
|
+
if substatement
|
17
|
+
c_token, c_token_value = substatement[token_index]
|
18
|
+
else
|
19
|
+
c_token, c_token_value = @scanner.get_token
|
20
|
+
end
|
21
|
+
|
22
|
+
parenth = 0
|
23
|
+
|
24
|
+
until c_token.nil?
|
25
|
+
if substatement
|
26
|
+
token_index += 1
|
27
|
+
n_token, n_token_value = substatement[token_index]
|
28
|
+
else
|
29
|
+
@scanner.token_index += 1
|
30
|
+
n_token, n_token_value = @scanner.get_token
|
31
|
+
end
|
32
|
+
|
33
|
+
next if n_token == " "
|
34
|
+
case c_token
|
35
|
+
|
36
|
+
when "and"
|
37
|
+
unless (n_token =~ /not|statement|\(|\+|-/) || (scanner.token_index == scanner.arguments.size)
|
38
|
+
raise "Error at column #{scanner.token_index}. \nExpected 'not', 'statement' or '('. Found '#{n_token_value}'"
|
39
|
+
end
|
40
|
+
|
41
|
+
raise "Error at column #{scanner.token_index}. \n Expression cannot start with 'and'" if p_token.nil?
|
42
|
+
raise "Error at column #{scanner.token_index}. \n #{p_token} cannot be followed by 'and'" if %w[and or].include?(p_token)
|
43
|
+
|
44
|
+
when "or"
|
45
|
+
unless (n_token =~ /not|statement|\(|\+|-/) || (scanner.token_index == scanner.arguments.size)
|
46
|
+
raise "Error at column #{scanner.token_index}. \nExpected 'not', 'statement', '('. Found '#{n_token_value}'"
|
47
|
+
end
|
48
|
+
|
49
|
+
raise "Error at column #{scanner.token_index}. \n Expression cannot start with 'or'" if p_token.nil?
|
50
|
+
raise "Error at column #{scanner.token_index}. \n #{p_token} cannot be followed by 'or'" if %w[and or].include?(p_token)
|
51
|
+
|
52
|
+
when "not"
|
53
|
+
unless n_token =~ /statement|\(|not|\+|-/
|
54
|
+
raise "Error at column #{scanner.token_index}. \nExpected 'statement' or '('. Found '#{n_token_value}'"
|
55
|
+
end
|
56
|
+
|
57
|
+
when "statement"
|
58
|
+
if c_token_value.is_a? Array
|
59
|
+
raise "Error at column #{scanner.token_index}\nError, cannot define '[' in a '[...]' block." if substatement
|
60
|
+
|
61
|
+
parse(c_token_value, 0)
|
62
|
+
end
|
63
|
+
|
64
|
+
if c_token_value =~ /!=/
|
65
|
+
c_token_value = c_token_value.gsub("!=", "=")
|
66
|
+
@execution_stack << {"not" => "not"}
|
67
|
+
end
|
68
|
+
|
69
|
+
if !n_token.nil? && !n_token.match(/and|or|\)/)
|
70
|
+
raise "Error at column #{scanner.token_index}. \nExpected 'and', 'or', ')'. Found '#{n_token_value}'"
|
71
|
+
end
|
72
|
+
|
73
|
+
when "+"
|
74
|
+
if !n_token.nil? && !n_token.match(/and|or|\)/)
|
75
|
+
raise "Error at column #{scanner.token_index}. \nExpected 'and', 'or', ')'. Found '#{n_token_value}'"
|
76
|
+
end
|
77
|
+
|
78
|
+
when "-"
|
79
|
+
if !n_token.nil? && !n_token.match(/and|or|\)/)
|
80
|
+
raise "Error at column #{scanner.token_index}. \nExpected 'and', 'or', ')'. Found '#{n_token_value}'"
|
81
|
+
end
|
82
|
+
|
83
|
+
when ")"
|
84
|
+
if !n_token.nil? && !n_token =~ /|and|or|not|\(/
|
85
|
+
raise "Error at column #{scanner.token_index}. \nExpected 'and', 'or', 'not' or '('. Found '#{n_token_value}'"
|
86
|
+
end
|
87
|
+
|
88
|
+
parenth += 1
|
89
|
+
|
90
|
+
when "("
|
91
|
+
unless n_token =~ /statement|not|\(|\+|-/
|
92
|
+
raise "Error at column #{scanner.token_index}. \nExpected 'statement', '(', not. Found '#{n_token_value}'"
|
93
|
+
end
|
94
|
+
|
95
|
+
parenth -= 1
|
96
|
+
|
97
|
+
else
|
98
|
+
raise "Unexpected token found at column #{scanner.token_index}. '#{c_token_value}'"
|
9
99
|
end
|
10
100
|
|
11
|
-
|
12
|
-
|
13
|
-
p_token,p_token_value = nil
|
14
|
-
|
15
|
-
unless substatement
|
16
|
-
c_token,c_token_value = @scanner.get_token
|
17
|
-
else
|
18
|
-
c_token,c_token_value = substatement[token_index]
|
19
|
-
end
|
20
|
-
|
21
|
-
parenth = 0
|
22
|
-
|
23
|
-
while (c_token != nil)
|
24
|
-
unless substatement
|
25
|
-
@scanner.token_index += 1
|
26
|
-
n_token, n_token_value = @scanner.get_token
|
27
|
-
else
|
28
|
-
token_index += 1
|
29
|
-
n_token, n_token_value = substatement[token_index]
|
30
|
-
end
|
31
|
-
|
32
|
-
unless n_token == " "
|
33
|
-
case c_token
|
34
|
-
|
35
|
-
when "and"
|
36
|
-
unless (n_token =~ /not|statement|\(|\+|-/) || (scanner.token_index == scanner.arguments.size)
|
37
|
-
raise "Error at column #{scanner.token_index}. \nExpected 'not', 'statement' or '('. Found '#{n_token_value}'"
|
38
|
-
end
|
39
|
-
|
40
|
-
if p_token == nil
|
41
|
-
raise "Error at column #{scanner.token_index}. \n Expression cannot start with 'and'"
|
42
|
-
elsif (p_token == "and" || p_token == "or")
|
43
|
-
raise "Error at column #{scanner.token_index}. \n #{p_token} cannot be followed by 'and'"
|
44
|
-
end
|
45
|
-
|
46
|
-
when "or"
|
47
|
-
unless (n_token =~ /not|statement|\(|\+|-/) || (scanner.token_index == scanner.arguments.size)
|
48
|
-
raise "Error at column #{scanner.token_index}. \nExpected 'not', 'statement', '('. Found '#{n_token_value}'"
|
49
|
-
end
|
50
|
-
|
51
|
-
if p_token == nil
|
52
|
-
raise "Error at column #{scanner.token_index}. \n Expression cannot start with 'or'"
|
53
|
-
elsif (p_token == "and" || p_token == "or")
|
54
|
-
raise "Error at column #{scanner.token_index}. \n #{p_token} cannot be followed by 'or'"
|
55
|
-
end
|
56
|
-
|
57
|
-
when "not"
|
58
|
-
unless n_token =~ /statement|\(|not|\+|-/
|
59
|
-
raise "Error at column #{scanner.token_index}. \nExpected 'statement' or '('. Found '#{n_token_value}'"
|
60
|
-
end
|
61
|
-
|
62
|
-
when "statement"
|
63
|
-
if c_token_value.is_a? Array
|
64
|
-
if substatement
|
65
|
-
raise "Error at column #{scanner.token_index}\nError, cannot define '[' in a '[...]' block."
|
66
|
-
end
|
67
|
-
parse(c_token_value, 0)
|
68
|
-
end
|
69
|
-
|
70
|
-
if c_token_value =~ /!=/
|
71
|
-
c_token_value = c_token_value.gsub("!=", "=")
|
72
|
-
@execution_stack << {"not" => "not"}
|
73
|
-
end
|
74
|
-
|
75
|
-
unless n_token =~ /and|or|\)/
|
76
|
-
unless n_token.nil?
|
77
|
-
raise "Error at column #{scanner.token_index}. \nExpected 'and', 'or', ')'. Found '#{n_token_value}'"
|
78
|
-
end
|
79
|
-
end
|
80
|
-
|
81
|
-
when "+"
|
82
|
-
unless n_token =~ /and|or|\)/
|
83
|
-
unless n_token.nil?
|
84
|
-
raise "Error at column #{scanner.token_index}. \nExpected 'and', 'or', ')'. Found '#{n_token_value}'"
|
85
|
-
end
|
86
|
-
end
|
87
|
-
|
88
|
-
when "-"
|
89
|
-
unless n_token =~ /and|or|\)/
|
90
|
-
unless n_token.nil?
|
91
|
-
raise "Error at column #{scanner.token_index}. \nExpected 'and', 'or', ')'. Found '#{n_token_value}'"
|
92
|
-
end
|
93
|
-
end
|
94
|
-
|
95
|
-
|
96
|
-
when ")"
|
97
|
-
unless (n_token =~ /|and|or|not|\(/)
|
98
|
-
unless n_token.nil?
|
99
|
-
raise "Error at column #{scanner.token_index}. \nExpected 'and', 'or', 'not' or '('. Found '#{n_token_value}'"
|
100
|
-
end
|
101
|
-
end
|
102
|
-
parenth += 1
|
103
|
-
|
104
|
-
when "("
|
105
|
-
unless n_token =~ /statement|not|\(|\+|-/
|
106
|
-
raise "Error at column #{scanner.token_index}. \nExpected 'statement', '(', not. Found '#{n_token_value}'"
|
107
|
-
end
|
108
|
-
parenth -= 1
|
109
|
-
|
110
|
-
else
|
111
|
-
raise "Unexpected token found at column #{scanner.token_index}. '#{c_token_value}'"
|
112
|
-
end
|
113
|
-
|
114
|
-
unless n_token == " " || substatement
|
115
|
-
@execution_stack << {c_token => c_token_value}
|
116
|
-
end
|
117
|
-
|
118
|
-
p_token, p_token_value = c_token, c_token_value
|
119
|
-
c_token, c_token_value = n_token, n_token_value
|
120
|
-
end
|
121
|
-
end
|
122
|
-
|
123
|
-
return if substatement
|
124
|
-
|
125
|
-
if parenth < 0
|
126
|
-
raise "Error. Missing parentheses ')'."
|
127
|
-
elsif parenth > 0
|
128
|
-
raise "Error. Missing parentheses '('."
|
129
|
-
end
|
101
|
+
unless n_token == " " || substatement
|
102
|
+
@execution_stack << {c_token => c_token_value}
|
130
103
|
end
|
104
|
+
|
105
|
+
p_token = c_token
|
106
|
+
c_token = n_token
|
107
|
+
c_token_value = n_token_value
|
108
|
+
end
|
109
|
+
|
110
|
+
return if substatement
|
111
|
+
|
112
|
+
raise "Error. Missing parentheses ')'." if parenth < 0
|
113
|
+
raise "Error. Missing parentheses '('." if parenth > 0
|
131
114
|
end
|
115
|
+
end
|
132
116
|
end
|
data/lib/parser/scanner.rb
CHANGED
@@ -1,165 +1,164 @@
|
|
1
1
|
module JGrep
|
2
|
-
|
3
|
-
|
2
|
+
class Scanner
|
3
|
+
attr_accessor :arguments, :token_index
|
4
4
|
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
5
|
+
def initialize(arguments)
|
6
|
+
@token_index = 0
|
7
|
+
@arguments = arguments
|
8
|
+
end
|
9
9
|
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
10
|
+
# Scans the input string and identifies single language tokens
|
11
|
+
def get_token
|
12
|
+
return nil if @token_index >= @arguments.size
|
13
|
+
|
14
|
+
begin
|
15
|
+
case chr(@arguments[@token_index])
|
16
|
+
when "["
|
17
|
+
return "statement", gen_substatement
|
18
|
+
|
19
|
+
when "]"
|
20
|
+
return "]"
|
21
|
+
|
22
|
+
when "("
|
23
|
+
return "(", "("
|
24
|
+
|
25
|
+
when ")"
|
26
|
+
return ")", ")"
|
27
|
+
|
28
|
+
when "n"
|
29
|
+
if (chr(@arguments[@token_index + 1]) == "o") && (chr(@arguments[@token_index + 2]) == "t") && ((chr(@arguments[@token_index + 3]) == " ") || (chr(@arguments[@token_index + 3]) == "("))
|
30
|
+
@token_index += 2
|
31
|
+
return "not", "not"
|
32
|
+
else
|
33
|
+
gen_statement
|
34
|
+
end
|
35
|
+
|
36
|
+
when "!"
|
37
|
+
return "not", "not"
|
38
|
+
|
39
|
+
when "a"
|
40
|
+
if (chr(@arguments[@token_index + 1]) == "n") && (chr(@arguments[@token_index + 2]) == "d") && ((chr(@arguments[@token_index + 3]) == " ") || (chr(@arguments[@token_index + 3]) == "("))
|
41
|
+
@token_index += 2
|
42
|
+
return "and", "and"
|
43
|
+
else
|
44
|
+
gen_statement
|
45
|
+
end
|
46
|
+
|
47
|
+
when "&"
|
48
|
+
if chr(@arguments[@token_index + 1]) == "&"
|
49
|
+
@token_index += 1
|
50
|
+
return "and", "and"
|
51
|
+
else
|
52
|
+
gen_statement
|
53
|
+
end
|
15
54
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
when "("
|
25
|
-
return "(", "("
|
26
|
-
|
27
|
-
when ")"
|
28
|
-
return ")", ")"
|
29
|
-
|
30
|
-
when "n"
|
31
|
-
if (chr(@arguments[@token_index + 1]) == "o") && (chr(@arguments[@token_index + 2]) == "t") && ((chr(@arguments[@token_index + 3]) == " ") || (chr(@arguments[@token_index + 3]) == "("))
|
32
|
-
@token_index += 2
|
33
|
-
return "not", "not"
|
34
|
-
else
|
35
|
-
gen_statement
|
36
|
-
end
|
37
|
-
|
38
|
-
when "!"
|
39
|
-
return "not", "not"
|
40
|
-
|
41
|
-
when "a"
|
42
|
-
if (chr(@arguments[@token_index + 1]) == "n") && (chr(@arguments[@token_index + 2]) == "d") && ((chr(@arguments[@token_index + 3]) == " ") || (chr(@arguments[@token_index + 3]) == "("))
|
43
|
-
@token_index += 2
|
44
|
-
return "and", "and"
|
45
|
-
else
|
46
|
-
gen_statement
|
47
|
-
end
|
48
|
-
|
49
|
-
when "&"
|
50
|
-
if(chr(@arguments[@token_index +1]) == "&")
|
51
|
-
@token_index +=1
|
52
|
-
return "and", "and"
|
53
|
-
else
|
54
|
-
gen_statement
|
55
|
-
end
|
56
|
-
|
57
|
-
when "o"
|
58
|
-
if (chr(@arguments[@token_index + 1]) == "r") && ((chr(@arguments[@token_index + 2]) == " ") || (chr(@arguments[@token_index + 2]) == "("))
|
59
|
-
@token_index += 1
|
60
|
-
return "or", "or"
|
61
|
-
else
|
62
|
-
gen_statement
|
63
|
-
end
|
64
|
-
|
65
|
-
when "|"
|
66
|
-
if(chr(@arguments[@token_index +1]) == "|")
|
67
|
-
@token_index +=1
|
68
|
-
return "or", "or"
|
69
|
-
else
|
70
|
-
gen_statement
|
71
|
-
end
|
72
|
-
|
73
|
-
when "+"
|
74
|
-
value = ""
|
75
|
-
i = @token_index + 1
|
76
|
-
|
77
|
-
begin
|
78
|
-
value += chr(@arguments[i])
|
79
|
-
i += 1
|
80
|
-
end until (i >= @arguments.size) || (chr(@arguments[i]) =~ /\s|\)/)
|
81
|
-
|
82
|
-
@token_index = i - 1
|
83
|
-
return "+", value
|
84
|
-
|
85
|
-
when "-"
|
86
|
-
value = ""
|
87
|
-
i = @token_index + 1
|
88
|
-
|
89
|
-
begin
|
90
|
-
value += chr(@arguments[i])
|
91
|
-
i += 1
|
92
|
-
end until (i >= @arguments.size) || (chr(@arguments[i]) =~ /\s|\)/)
|
93
|
-
|
94
|
-
@token_index = i - 1
|
95
|
-
return "-", value
|
96
|
-
|
97
|
-
|
98
|
-
when " "
|
99
|
-
return " ", " "
|
100
|
-
|
101
|
-
else
|
102
|
-
gen_statement
|
103
|
-
end
|
104
|
-
end
|
105
|
-
rescue NoMethodError => e
|
106
|
-
raise "Error. Expression cannot be parsed."
|
107
|
-
end
|
55
|
+
when "o"
|
56
|
+
if (chr(@arguments[@token_index + 1]) == "r") && ((chr(@arguments[@token_index + 2]) == " ") || (chr(@arguments[@token_index + 2]) == "("))
|
57
|
+
@token_index += 1
|
58
|
+
return "or", "or"
|
59
|
+
else
|
60
|
+
gen_statement
|
61
|
+
end
|
108
62
|
|
109
|
-
|
110
|
-
|
63
|
+
when "|"
|
64
|
+
if chr(@arguments[@token_index + 1]) == "|"
|
111
65
|
@token_index += 1
|
112
|
-
|
66
|
+
return "or", "or"
|
67
|
+
else
|
68
|
+
gen_statement
|
69
|
+
end
|
113
70
|
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
end
|
71
|
+
when "+"
|
72
|
+
value = ""
|
73
|
+
i = @token_index + 1
|
118
74
|
|
119
|
-
|
120
|
-
|
75
|
+
begin
|
76
|
+
value += chr(@arguments[i])
|
77
|
+
i += 1
|
78
|
+
end until (i >= @arguments.size) || (chr(@arguments[i]) =~ /\s|\)/)
|
121
79
|
|
122
|
-
|
123
|
-
|
124
|
-
j = @token_index
|
125
|
-
|
126
|
-
begin
|
127
|
-
if (chr(@arguments[j]) == "/")
|
128
|
-
begin
|
129
|
-
current_token_value << chr(@arguments[j])
|
130
|
-
j += 1
|
131
|
-
if chr(@arguments[j]) == "/"
|
132
|
-
current_token_value << "/"
|
133
|
-
break
|
134
|
-
end
|
135
|
-
end until (j >= @arguments.size) || (chr(@arguments[j]) =~ /\//)
|
136
|
-
else
|
137
|
-
begin
|
138
|
-
current_token_value << chr(@arguments[j])
|
139
|
-
j += 1
|
140
|
-
if chr(@arguments[j]) =~ /'|"/
|
141
|
-
begin
|
142
|
-
current_token_value << chr(@arguments[j])
|
143
|
-
j +=1
|
144
|
-
end until (j >= @arguments.size) || (chr(@arguments[j]) =~ /'|"/)
|
145
|
-
end
|
146
|
-
end until (j >= @arguments.size) || (chr(@arguments[j]) =~ /\s|\)|\]/)
|
147
|
-
end
|
148
|
-
rescue Exception => e
|
149
|
-
raise "Invalid token found - '#{current_token_value}'"
|
150
|
-
end
|
80
|
+
@token_index = i - 1
|
81
|
+
return "+", value
|
151
82
|
|
152
|
-
|
153
|
-
|
154
|
-
|
83
|
+
when "-"
|
84
|
+
value = ""
|
85
|
+
i = @token_index + 1
|
86
|
+
|
87
|
+
begin
|
88
|
+
value += chr(@arguments[i])
|
89
|
+
i += 1
|
90
|
+
end until (i >= @arguments.size) || (chr(@arguments[i]) =~ /\s|\)/)
|
91
|
+
|
92
|
+
@token_index = i - 1
|
93
|
+
return "-", value
|
94
|
+
|
95
|
+
when " "
|
96
|
+
return " ", " "
|
155
97
|
|
156
|
-
|
157
|
-
|
98
|
+
else
|
99
|
+
gen_statement
|
158
100
|
end
|
101
|
+
end
|
102
|
+
rescue NoMethodError
|
103
|
+
raise "Error. Expression cannot be parsed."
|
104
|
+
end
|
105
|
+
|
106
|
+
private
|
107
|
+
|
108
|
+
def gen_substatement
|
109
|
+
@token_index += 1
|
110
|
+
returnval = []
|
111
|
+
|
112
|
+
while (val = get_token) != "]"
|
113
|
+
@token_index += 1
|
114
|
+
returnval << val unless val[0] == " "
|
115
|
+
end
|
159
116
|
|
160
|
-
|
161
|
-
|
162
|
-
|
117
|
+
returnval
|
118
|
+
end
|
119
|
+
|
120
|
+
def gen_statement
|
121
|
+
current_token_value = ""
|
122
|
+
j = @token_index
|
123
|
+
|
124
|
+
begin
|
125
|
+
if chr(@arguments[j]) == "/"
|
126
|
+
begin
|
127
|
+
current_token_value << chr(@arguments[j])
|
128
|
+
j += 1
|
129
|
+
if chr(@arguments[j]) == "/"
|
130
|
+
current_token_value << "/"
|
131
|
+
break
|
132
|
+
end
|
133
|
+
end until (j >= @arguments.size) || (chr(@arguments[j]) =~ /\//)
|
134
|
+
else
|
135
|
+
begin
|
136
|
+
current_token_value << chr(@arguments[j])
|
137
|
+
j += 1
|
138
|
+
if chr(@arguments[j]) =~ /'|"/
|
139
|
+
begin
|
140
|
+
current_token_value << chr(@arguments[j])
|
141
|
+
j += 1
|
142
|
+
end until (j >= @arguments.size) || (chr(@arguments[j]) =~ /'|"/)
|
143
|
+
end
|
144
|
+
end until (j >= @arguments.size) || (chr(@arguments[j]) =~ /\s|\)|\]/ && chr(@arguments[j - 1]) != '\\')
|
163
145
|
end
|
146
|
+
rescue
|
147
|
+
raise "Invalid token found - '#{current_token_value}'"
|
148
|
+
end
|
149
|
+
|
150
|
+
if current_token_value =~ /^(and|or|not|!)$/
|
151
|
+
raise "Class name cannot be 'and', 'or', 'not'. Found '#{current_token_value}'"
|
152
|
+
end
|
153
|
+
|
154
|
+
@token_index += current_token_value.size - 1
|
155
|
+
|
156
|
+
["statement", current_token_value]
|
157
|
+
end
|
158
|
+
|
159
|
+
# Compatibility with 1.8.7, which returns a Fixnum from String#[]
|
160
|
+
def chr(character)
|
161
|
+
character.chr unless character.nil?
|
164
162
|
end
|
163
|
+
end
|
165
164
|
end
|