jgrep 1.4.1 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.markdown +5 -0
- data/README.markdown +13 -12
- data/Rakefile +6 -1
- data/bin/jgrep +115 -129
- data/lib/jgrep.rb +286 -377
- data/lib/parser/parser.rb +109 -125
- data/lib/parser/scanner.rb +148 -149
- data/spec/Rakefile +3 -3
- data/spec/spec_helper.rb +1 -2
- data/spec/unit/jgrep_spec.rb +233 -233
- data/spec/unit/parser_spec.rb +132 -127
- data/spec/unit/scanner_spec.rb +82 -86
- metadata +6 -18
data/lib/parser/parser.rb
CHANGED
@@ -1,132 +1,116 @@
|
|
1
1
|
module JGrep
|
2
|
-
|
3
|
-
|
2
|
+
class Parser
|
3
|
+
attr_reader :scanner, :execution_stack
|
4
4
|
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
5
|
+
def initialize(args)
|
6
|
+
@scanner = Scanner.new(args)
|
7
|
+
@execution_stack = []
|
8
|
+
|
9
|
+
parse
|
10
|
+
end
|
11
|
+
|
12
|
+
# Parse the input string, one token at a time a contruct the call stack
|
13
|
+
def parse(substatement = nil, token_index = 0)
|
14
|
+
p_token = nil
|
15
|
+
|
16
|
+
if substatement
|
17
|
+
c_token, c_token_value = substatement[token_index]
|
18
|
+
else
|
19
|
+
c_token, c_token_value = @scanner.get_token
|
20
|
+
end
|
21
|
+
|
22
|
+
parenth = 0
|
23
|
+
|
24
|
+
until c_token.nil?
|
25
|
+
if substatement
|
26
|
+
token_index += 1
|
27
|
+
n_token, n_token_value = substatement[token_index]
|
28
|
+
else
|
29
|
+
@scanner.token_index += 1
|
30
|
+
n_token, n_token_value = @scanner.get_token
|
31
|
+
end
|
32
|
+
|
33
|
+
next if n_token == " "
|
34
|
+
case c_token
|
35
|
+
|
36
|
+
when "and"
|
37
|
+
unless (n_token =~ /not|statement|\(|\+|-/) || (scanner.token_index == scanner.arguments.size)
|
38
|
+
raise "Error at column #{scanner.token_index}. \nExpected 'not', 'statement' or '('. Found '#{n_token_value}'"
|
39
|
+
end
|
40
|
+
|
41
|
+
raise "Error at column #{scanner.token_index}. \n Expression cannot start with 'and'" if p_token.nil?
|
42
|
+
raise "Error at column #{scanner.token_index}. \n #{p_token} cannot be followed by 'and'" if %w[and or].include?(p_token)
|
43
|
+
|
44
|
+
when "or"
|
45
|
+
unless (n_token =~ /not|statement|\(|\+|-/) || (scanner.token_index == scanner.arguments.size)
|
46
|
+
raise "Error at column #{scanner.token_index}. \nExpected 'not', 'statement', '('. Found '#{n_token_value}'"
|
47
|
+
end
|
48
|
+
|
49
|
+
raise "Error at column #{scanner.token_index}. \n Expression cannot start with 'or'" if p_token.nil?
|
50
|
+
raise "Error at column #{scanner.token_index}. \n #{p_token} cannot be followed by 'or'" if %w[and or].include?(p_token)
|
51
|
+
|
52
|
+
when "not"
|
53
|
+
unless n_token =~ /statement|\(|not|\+|-/
|
54
|
+
raise "Error at column #{scanner.token_index}. \nExpected 'statement' or '('. Found '#{n_token_value}'"
|
55
|
+
end
|
56
|
+
|
57
|
+
when "statement"
|
58
|
+
if c_token_value.is_a? Array
|
59
|
+
raise "Error at column #{scanner.token_index}\nError, cannot define '[' in a '[...]' block." if substatement
|
60
|
+
|
61
|
+
parse(c_token_value, 0)
|
62
|
+
end
|
63
|
+
|
64
|
+
if c_token_value =~ /!=/
|
65
|
+
c_token_value = c_token_value.gsub("!=", "=")
|
66
|
+
@execution_stack << {"not" => "not"}
|
67
|
+
end
|
68
|
+
|
69
|
+
if !n_token.nil? && !n_token.match(/and|or|\)/)
|
70
|
+
raise "Error at column #{scanner.token_index}. \nExpected 'and', 'or', ')'. Found '#{n_token_value}'"
|
71
|
+
end
|
72
|
+
|
73
|
+
when "+"
|
74
|
+
if !n_token.nil? && !n_token.match(/and|or|\)/)
|
75
|
+
raise "Error at column #{scanner.token_index}. \nExpected 'and', 'or', ')'. Found '#{n_token_value}'"
|
76
|
+
end
|
77
|
+
|
78
|
+
when "-"
|
79
|
+
if !n_token.nil? && !n_token.match(/and|or|\)/)
|
80
|
+
raise "Error at column #{scanner.token_index}. \nExpected 'and', 'or', ')'. Found '#{n_token_value}'"
|
81
|
+
end
|
82
|
+
|
83
|
+
when ")"
|
84
|
+
if !n_token.nil? && !n_token =~ /|and|or|not|\(/
|
85
|
+
raise "Error at column #{scanner.token_index}. \nExpected 'and', 'or', 'not' or '('. Found '#{n_token_value}'"
|
86
|
+
end
|
87
|
+
|
88
|
+
parenth += 1
|
89
|
+
|
90
|
+
when "("
|
91
|
+
unless n_token =~ /statement|not|\(|\+|-/
|
92
|
+
raise "Error at column #{scanner.token_index}. \nExpected 'statement', '(', not. Found '#{n_token_value}'"
|
93
|
+
end
|
94
|
+
|
95
|
+
parenth -= 1
|
96
|
+
|
97
|
+
else
|
98
|
+
raise "Unexpected token found at column #{scanner.token_index}. '#{c_token_value}'"
|
9
99
|
end
|
10
100
|
|
11
|
-
|
12
|
-
|
13
|
-
p_token,p_token_value = nil
|
14
|
-
|
15
|
-
unless substatement
|
16
|
-
c_token,c_token_value = @scanner.get_token
|
17
|
-
else
|
18
|
-
c_token,c_token_value = substatement[token_index]
|
19
|
-
end
|
20
|
-
|
21
|
-
parenth = 0
|
22
|
-
|
23
|
-
while (c_token != nil)
|
24
|
-
unless substatement
|
25
|
-
@scanner.token_index += 1
|
26
|
-
n_token, n_token_value = @scanner.get_token
|
27
|
-
else
|
28
|
-
token_index += 1
|
29
|
-
n_token, n_token_value = substatement[token_index]
|
30
|
-
end
|
31
|
-
|
32
|
-
unless n_token == " "
|
33
|
-
case c_token
|
34
|
-
|
35
|
-
when "and"
|
36
|
-
unless (n_token =~ /not|statement|\(|\+|-/) || (scanner.token_index == scanner.arguments.size)
|
37
|
-
raise "Error at column #{scanner.token_index}. \nExpected 'not', 'statement' or '('. Found '#{n_token_value}'"
|
38
|
-
end
|
39
|
-
|
40
|
-
if p_token == nil
|
41
|
-
raise "Error at column #{scanner.token_index}. \n Expression cannot start with 'and'"
|
42
|
-
elsif (p_token == "and" || p_token == "or")
|
43
|
-
raise "Error at column #{scanner.token_index}. \n #{p_token} cannot be followed by 'and'"
|
44
|
-
end
|
45
|
-
|
46
|
-
when "or"
|
47
|
-
unless (n_token =~ /not|statement|\(|\+|-/) || (scanner.token_index == scanner.arguments.size)
|
48
|
-
raise "Error at column #{scanner.token_index}. \nExpected 'not', 'statement', '('. Found '#{n_token_value}'"
|
49
|
-
end
|
50
|
-
|
51
|
-
if p_token == nil
|
52
|
-
raise "Error at column #{scanner.token_index}. \n Expression cannot start with 'or'"
|
53
|
-
elsif (p_token == "and" || p_token == "or")
|
54
|
-
raise "Error at column #{scanner.token_index}. \n #{p_token} cannot be followed by 'or'"
|
55
|
-
end
|
56
|
-
|
57
|
-
when "not"
|
58
|
-
unless n_token =~ /statement|\(|not|\+|-/
|
59
|
-
raise "Error at column #{scanner.token_index}. \nExpected 'statement' or '('. Found '#{n_token_value}'"
|
60
|
-
end
|
61
|
-
|
62
|
-
when "statement"
|
63
|
-
if c_token_value.is_a? Array
|
64
|
-
if substatement
|
65
|
-
raise "Error at column #{scanner.token_index}\nError, cannot define '[' in a '[...]' block."
|
66
|
-
end
|
67
|
-
parse(c_token_value, 0)
|
68
|
-
end
|
69
|
-
|
70
|
-
if c_token_value =~ /!=/
|
71
|
-
c_token_value = c_token_value.gsub("!=", "=")
|
72
|
-
@execution_stack << {"not" => "not"}
|
73
|
-
end
|
74
|
-
|
75
|
-
unless n_token =~ /and|or|\)/
|
76
|
-
unless n_token.nil?
|
77
|
-
raise "Error at column #{scanner.token_index}. \nExpected 'and', 'or', ')'. Found '#{n_token_value}'"
|
78
|
-
end
|
79
|
-
end
|
80
|
-
|
81
|
-
when "+"
|
82
|
-
unless n_token =~ /and|or|\)/
|
83
|
-
unless n_token.nil?
|
84
|
-
raise "Error at column #{scanner.token_index}. \nExpected 'and', 'or', ')'. Found '#{n_token_value}'"
|
85
|
-
end
|
86
|
-
end
|
87
|
-
|
88
|
-
when "-"
|
89
|
-
unless n_token =~ /and|or|\)/
|
90
|
-
unless n_token.nil?
|
91
|
-
raise "Error at column #{scanner.token_index}. \nExpected 'and', 'or', ')'. Found '#{n_token_value}'"
|
92
|
-
end
|
93
|
-
end
|
94
|
-
|
95
|
-
|
96
|
-
when ")"
|
97
|
-
unless (n_token =~ /|and|or|not|\(/)
|
98
|
-
unless n_token.nil?
|
99
|
-
raise "Error at column #{scanner.token_index}. \nExpected 'and', 'or', 'not' or '('. Found '#{n_token_value}'"
|
100
|
-
end
|
101
|
-
end
|
102
|
-
parenth += 1
|
103
|
-
|
104
|
-
when "("
|
105
|
-
unless n_token =~ /statement|not|\(|\+|-/
|
106
|
-
raise "Error at column #{scanner.token_index}. \nExpected 'statement', '(', not. Found '#{n_token_value}'"
|
107
|
-
end
|
108
|
-
parenth -= 1
|
109
|
-
|
110
|
-
else
|
111
|
-
raise "Unexpected token found at column #{scanner.token_index}. '#{c_token_value}'"
|
112
|
-
end
|
113
|
-
|
114
|
-
unless n_token == " " || substatement
|
115
|
-
@execution_stack << {c_token => c_token_value}
|
116
|
-
end
|
117
|
-
|
118
|
-
p_token, p_token_value = c_token, c_token_value
|
119
|
-
c_token, c_token_value = n_token, n_token_value
|
120
|
-
end
|
121
|
-
end
|
122
|
-
|
123
|
-
return if substatement
|
124
|
-
|
125
|
-
if parenth < 0
|
126
|
-
raise "Error. Missing parentheses ')'."
|
127
|
-
elsif parenth > 0
|
128
|
-
raise "Error. Missing parentheses '('."
|
129
|
-
end
|
101
|
+
unless n_token == " " || substatement
|
102
|
+
@execution_stack << {c_token => c_token_value}
|
130
103
|
end
|
104
|
+
|
105
|
+
p_token = c_token
|
106
|
+
c_token = n_token
|
107
|
+
c_token_value = n_token_value
|
108
|
+
end
|
109
|
+
|
110
|
+
return if substatement
|
111
|
+
|
112
|
+
raise "Error. Missing parentheses ')'." if parenth < 0
|
113
|
+
raise "Error. Missing parentheses '('." if parenth > 0
|
131
114
|
end
|
115
|
+
end
|
132
116
|
end
|
data/lib/parser/scanner.rb
CHANGED
@@ -1,165 +1,164 @@
|
|
1
1
|
module JGrep
|
2
|
-
|
3
|
-
|
2
|
+
class Scanner
|
3
|
+
attr_accessor :arguments, :token_index
|
4
4
|
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
5
|
+
def initialize(arguments)
|
6
|
+
@token_index = 0
|
7
|
+
@arguments = arguments
|
8
|
+
end
|
9
9
|
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
10
|
+
# Scans the input string and identifies single language tokens
|
11
|
+
def get_token
|
12
|
+
return nil if @token_index >= @arguments.size
|
13
|
+
|
14
|
+
begin
|
15
|
+
case chr(@arguments[@token_index])
|
16
|
+
when "["
|
17
|
+
return "statement", gen_substatement
|
18
|
+
|
19
|
+
when "]"
|
20
|
+
return "]"
|
21
|
+
|
22
|
+
when "("
|
23
|
+
return "(", "("
|
24
|
+
|
25
|
+
when ")"
|
26
|
+
return ")", ")"
|
27
|
+
|
28
|
+
when "n"
|
29
|
+
if (chr(@arguments[@token_index + 1]) == "o") && (chr(@arguments[@token_index + 2]) == "t") && ((chr(@arguments[@token_index + 3]) == " ") || (chr(@arguments[@token_index + 3]) == "("))
|
30
|
+
@token_index += 2
|
31
|
+
return "not", "not"
|
32
|
+
else
|
33
|
+
gen_statement
|
34
|
+
end
|
35
|
+
|
36
|
+
when "!"
|
37
|
+
return "not", "not"
|
38
|
+
|
39
|
+
when "a"
|
40
|
+
if (chr(@arguments[@token_index + 1]) == "n") && (chr(@arguments[@token_index + 2]) == "d") && ((chr(@arguments[@token_index + 3]) == " ") || (chr(@arguments[@token_index + 3]) == "("))
|
41
|
+
@token_index += 2
|
42
|
+
return "and", "and"
|
43
|
+
else
|
44
|
+
gen_statement
|
45
|
+
end
|
46
|
+
|
47
|
+
when "&"
|
48
|
+
if chr(@arguments[@token_index + 1]) == "&"
|
49
|
+
@token_index += 1
|
50
|
+
return "and", "and"
|
51
|
+
else
|
52
|
+
gen_statement
|
53
|
+
end
|
15
54
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
when "("
|
25
|
-
return "(", "("
|
26
|
-
|
27
|
-
when ")"
|
28
|
-
return ")", ")"
|
29
|
-
|
30
|
-
when "n"
|
31
|
-
if (chr(@arguments[@token_index + 1]) == "o") && (chr(@arguments[@token_index + 2]) == "t") && ((chr(@arguments[@token_index + 3]) == " ") || (chr(@arguments[@token_index + 3]) == "("))
|
32
|
-
@token_index += 2
|
33
|
-
return "not", "not"
|
34
|
-
else
|
35
|
-
gen_statement
|
36
|
-
end
|
37
|
-
|
38
|
-
when "!"
|
39
|
-
return "not", "not"
|
40
|
-
|
41
|
-
when "a"
|
42
|
-
if (chr(@arguments[@token_index + 1]) == "n") && (chr(@arguments[@token_index + 2]) == "d") && ((chr(@arguments[@token_index + 3]) == " ") || (chr(@arguments[@token_index + 3]) == "("))
|
43
|
-
@token_index += 2
|
44
|
-
return "and", "and"
|
45
|
-
else
|
46
|
-
gen_statement
|
47
|
-
end
|
48
|
-
|
49
|
-
when "&"
|
50
|
-
if(chr(@arguments[@token_index +1]) == "&")
|
51
|
-
@token_index +=1
|
52
|
-
return "and", "and"
|
53
|
-
else
|
54
|
-
gen_statement
|
55
|
-
end
|
56
|
-
|
57
|
-
when "o"
|
58
|
-
if (chr(@arguments[@token_index + 1]) == "r") && ((chr(@arguments[@token_index + 2]) == " ") || (chr(@arguments[@token_index + 2]) == "("))
|
59
|
-
@token_index += 1
|
60
|
-
return "or", "or"
|
61
|
-
else
|
62
|
-
gen_statement
|
63
|
-
end
|
64
|
-
|
65
|
-
when "|"
|
66
|
-
if(chr(@arguments[@token_index +1]) == "|")
|
67
|
-
@token_index +=1
|
68
|
-
return "or", "or"
|
69
|
-
else
|
70
|
-
gen_statement
|
71
|
-
end
|
72
|
-
|
73
|
-
when "+"
|
74
|
-
value = ""
|
75
|
-
i = @token_index + 1
|
76
|
-
|
77
|
-
begin
|
78
|
-
value += chr(@arguments[i])
|
79
|
-
i += 1
|
80
|
-
end until (i >= @arguments.size) || (chr(@arguments[i]) =~ /\s|\)/)
|
81
|
-
|
82
|
-
@token_index = i - 1
|
83
|
-
return "+", value
|
84
|
-
|
85
|
-
when "-"
|
86
|
-
value = ""
|
87
|
-
i = @token_index + 1
|
88
|
-
|
89
|
-
begin
|
90
|
-
value += chr(@arguments[i])
|
91
|
-
i += 1
|
92
|
-
end until (i >= @arguments.size) || (chr(@arguments[i]) =~ /\s|\)/)
|
93
|
-
|
94
|
-
@token_index = i - 1
|
95
|
-
return "-", value
|
96
|
-
|
97
|
-
|
98
|
-
when " "
|
99
|
-
return " ", " "
|
100
|
-
|
101
|
-
else
|
102
|
-
gen_statement
|
103
|
-
end
|
104
|
-
end
|
105
|
-
rescue NoMethodError => e
|
106
|
-
raise "Error. Expression cannot be parsed."
|
107
|
-
end
|
55
|
+
when "o"
|
56
|
+
if (chr(@arguments[@token_index + 1]) == "r") && ((chr(@arguments[@token_index + 2]) == " ") || (chr(@arguments[@token_index + 2]) == "("))
|
57
|
+
@token_index += 1
|
58
|
+
return "or", "or"
|
59
|
+
else
|
60
|
+
gen_statement
|
61
|
+
end
|
108
62
|
|
109
|
-
|
110
|
-
|
63
|
+
when "|"
|
64
|
+
if chr(@arguments[@token_index + 1]) == "|"
|
111
65
|
@token_index += 1
|
112
|
-
|
66
|
+
return "or", "or"
|
67
|
+
else
|
68
|
+
gen_statement
|
69
|
+
end
|
113
70
|
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
end
|
71
|
+
when "+"
|
72
|
+
value = ""
|
73
|
+
i = @token_index + 1
|
118
74
|
|
119
|
-
|
120
|
-
|
75
|
+
begin
|
76
|
+
value += chr(@arguments[i])
|
77
|
+
i += 1
|
78
|
+
end until (i >= @arguments.size) || (chr(@arguments[i]) =~ /\s|\)/)
|
121
79
|
|
122
|
-
|
123
|
-
|
124
|
-
j = @token_index
|
125
|
-
|
126
|
-
begin
|
127
|
-
if (chr(@arguments[j]) == "/")
|
128
|
-
begin
|
129
|
-
current_token_value << chr(@arguments[j])
|
130
|
-
j += 1
|
131
|
-
if chr(@arguments[j]) == "/"
|
132
|
-
current_token_value << "/"
|
133
|
-
break
|
134
|
-
end
|
135
|
-
end until (j >= @arguments.size) || (chr(@arguments[j]) =~ /\//)
|
136
|
-
else
|
137
|
-
begin
|
138
|
-
current_token_value << chr(@arguments[j])
|
139
|
-
j += 1
|
140
|
-
if chr(@arguments[j]) =~ /'|"/
|
141
|
-
begin
|
142
|
-
current_token_value << chr(@arguments[j])
|
143
|
-
j +=1
|
144
|
-
end until (j >= @arguments.size) || (chr(@arguments[j]) =~ /'|"/)
|
145
|
-
end
|
146
|
-
end until (j >= @arguments.size) || (chr(@arguments[j]) =~ /\s|\)|\]/)
|
147
|
-
end
|
148
|
-
rescue Exception => e
|
149
|
-
raise "Invalid token found - '#{current_token_value}'"
|
150
|
-
end
|
80
|
+
@token_index = i - 1
|
81
|
+
return "+", value
|
151
82
|
|
152
|
-
|
153
|
-
|
154
|
-
|
83
|
+
when "-"
|
84
|
+
value = ""
|
85
|
+
i = @token_index + 1
|
86
|
+
|
87
|
+
begin
|
88
|
+
value += chr(@arguments[i])
|
89
|
+
i += 1
|
90
|
+
end until (i >= @arguments.size) || (chr(@arguments[i]) =~ /\s|\)/)
|
91
|
+
|
92
|
+
@token_index = i - 1
|
93
|
+
return "-", value
|
94
|
+
|
95
|
+
when " "
|
96
|
+
return " ", " "
|
155
97
|
|
156
|
-
|
157
|
-
|
98
|
+
else
|
99
|
+
gen_statement
|
158
100
|
end
|
101
|
+
end
|
102
|
+
rescue NoMethodError
|
103
|
+
raise "Error. Expression cannot be parsed."
|
104
|
+
end
|
105
|
+
|
106
|
+
private
|
107
|
+
|
108
|
+
def gen_substatement
|
109
|
+
@token_index += 1
|
110
|
+
returnval = []
|
111
|
+
|
112
|
+
while (val = get_token) != "]"
|
113
|
+
@token_index += 1
|
114
|
+
returnval << val unless val[0] == " "
|
115
|
+
end
|
159
116
|
|
160
|
-
|
161
|
-
|
162
|
-
|
117
|
+
returnval
|
118
|
+
end
|
119
|
+
|
120
|
+
def gen_statement
|
121
|
+
current_token_value = ""
|
122
|
+
j = @token_index
|
123
|
+
|
124
|
+
begin
|
125
|
+
if chr(@arguments[j]) == "/"
|
126
|
+
begin
|
127
|
+
current_token_value << chr(@arguments[j])
|
128
|
+
j += 1
|
129
|
+
if chr(@arguments[j]) == "/"
|
130
|
+
current_token_value << "/"
|
131
|
+
break
|
132
|
+
end
|
133
|
+
end until (j >= @arguments.size) || (chr(@arguments[j]) =~ /\//)
|
134
|
+
else
|
135
|
+
begin
|
136
|
+
current_token_value << chr(@arguments[j])
|
137
|
+
j += 1
|
138
|
+
if chr(@arguments[j]) =~ /'|"/
|
139
|
+
begin
|
140
|
+
current_token_value << chr(@arguments[j])
|
141
|
+
j += 1
|
142
|
+
end until (j >= @arguments.size) || (chr(@arguments[j]) =~ /'|"/)
|
143
|
+
end
|
144
|
+
end until (j >= @arguments.size) || (chr(@arguments[j]) =~ /\s|\)|\]/)
|
163
145
|
end
|
146
|
+
rescue
|
147
|
+
raise "Invalid token found - '#{current_token_value}'"
|
148
|
+
end
|
149
|
+
|
150
|
+
if current_token_value =~ /^(and|or|not|!)$/
|
151
|
+
raise "Class name cannot be 'and', 'or', 'not'. Found '#{current_token_value}'"
|
152
|
+
end
|
153
|
+
|
154
|
+
@token_index += current_token_value.size - 1
|
155
|
+
|
156
|
+
["statement", current_token_value]
|
157
|
+
end
|
158
|
+
|
159
|
+
# Compatibility with 1.8.7, which returns a Fixnum from String#[]
|
160
|
+
def chr(character)
|
161
|
+
character.chr unless character.nil?
|
164
162
|
end
|
163
|
+
end
|
165
164
|
end
|