jgrep 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/jgrep +16 -0
- data/jgrep.gemspec +16 -0
- data/lib/jgrep.rb +164 -0
- data/lib/parser/parser.rb +112 -0
- data/lib/parser/scanner.rb +146 -0
- metadata +67 -0
data/bin/jgrep
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'jgrep'
|
4
|
+
|
5
|
+
begin
|
6
|
+
raise "Please pass a valid arugment to jgrep" if ARGV == []
|
7
|
+
rescue Exception => e
|
8
|
+
STDERR.puts e
|
9
|
+
exit 1
|
10
|
+
end
|
11
|
+
|
12
|
+
json = STDIN.read
|
13
|
+
|
14
|
+
|
15
|
+
result = JGrep::jgrep((json), ARGV[0])
|
16
|
+
puts result unless result == "[]"
|
data/jgrep.gemspec
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
Gem::Specification.new do |s|
|
2
|
+
s.name = "jgrep"
|
3
|
+
s.version = "1.0.0"
|
4
|
+
|
5
|
+
s.authors = ["P Loubser"]
|
6
|
+
s.date = %q{2011-07-19}
|
7
|
+
s.default_executable = "jgrep"
|
8
|
+
s.description = "Compare a list of json documents to a simple logical language and returns matches as output"
|
9
|
+
s.email = ["ploubser@gmail.com"]
|
10
|
+
s.executables = ["jgrep"]
|
11
|
+
s.files = ["jgrep.gemspec", "bin/jgrep", Dir.glob("lib/*"), Dir.glob("lib/parser/*")].flatten
|
12
|
+
s.has_rdoc = true
|
13
|
+
s.homepage = "https://github.com/psy1337/JSON-Grep"
|
14
|
+
s.require_paths = ["lib"]
|
15
|
+
s.summary = s.description
|
16
|
+
end
|
data/lib/jgrep.rb
ADDED
@@ -0,0 +1,164 @@
|
|
1
|
+
#! /usr/lib/env ruby
|
2
|
+
|
3
|
+
require 'parser/parser.rb'
|
4
|
+
require 'parser/scanner.rb'
|
5
|
+
require 'rubygems'
|
6
|
+
require 'json'
|
7
|
+
|
8
|
+
module JGrep
|
9
|
+
|
10
|
+
#Method parses json and returns documents that match the logical expression
|
11
|
+
def self.jgrep(json, expression)
|
12
|
+
begin
|
13
|
+
call_stack = Parser.new(expression).execution_stack
|
14
|
+
result = []
|
15
|
+
json = JSON.parse(json)
|
16
|
+
json.each do |document|
|
17
|
+
if eval_statement(document, call_stack)
|
18
|
+
result << document
|
19
|
+
end
|
20
|
+
end
|
21
|
+
return result.to_json
|
22
|
+
|
23
|
+
rescue NameError => e
|
24
|
+
var = e.to_s
|
25
|
+
STDERR.puts "Error. #{var.match(/`(.*)'/)} was not found in documents"
|
26
|
+
exit 1
|
27
|
+
|
28
|
+
rescue JSON::ParserError => e
|
29
|
+
STDERR.puts "Error. Invalid JSON given"
|
30
|
+
exit 1
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
#Correctly format values so we can do the correct type of comparison
|
35
|
+
def self.format(kvalue, value)
|
36
|
+
if kvalue =~ /^\d+$/ || value =~ /^\d+$/
|
37
|
+
return Integer(kvalue), Integer(value)
|
38
|
+
elsif kvalue =~ /^\d+.\d+$/ || value =~ /^\d+.\d+$/
|
39
|
+
return Float(kvalue), Float(value)
|
40
|
+
else
|
41
|
+
return kvalue, value
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
#Check if key=value is present in document
|
46
|
+
def self.has_object?(document, statement)
|
47
|
+
|
48
|
+
key,value = statement.split(/<=|>=|=|<|>/)
|
49
|
+
|
50
|
+
if statement =~ /(<=|>=|<|>|=)/
|
51
|
+
op = $1
|
52
|
+
else
|
53
|
+
op = statement
|
54
|
+
end
|
55
|
+
|
56
|
+
tmp = document
|
57
|
+
|
58
|
+
key.split(".").each_with_index do |item,i|
|
59
|
+
tmp = tmp[item]
|
60
|
+
result = false
|
61
|
+
if tmp.is_a? Array
|
62
|
+
return (is_object_in_array?(tmp, "#{key.split(".")[i+1]}#{op}#{value}"))
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
tmp, value = format(tmp, value.gsub(/"|'/, ""))
|
67
|
+
|
68
|
+
case op
|
69
|
+
when "="
|
70
|
+
(tmp == value) ? (return true) : (return false)
|
71
|
+
when "<="
|
72
|
+
(tmp <= value) ? (return true) : (return false)
|
73
|
+
when ">="
|
74
|
+
(tmp >= value) ? (return true) : (return false)
|
75
|
+
when ">"
|
76
|
+
(tmp > value) ? (return true) : (return false)
|
77
|
+
when "<"
|
78
|
+
(tmp < value) ? (return true) : (return false)
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
#Check if key=value is present in a sub array
|
83
|
+
def self.is_object_in_array?(document, statement)
|
84
|
+
|
85
|
+
document.each do |item|
|
86
|
+
if has_object?(item,statement)
|
87
|
+
return true
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
return false
|
92
|
+
end
|
93
|
+
|
94
|
+
|
95
|
+
#Check if complex statement (defined as [key=value...]) is
|
96
|
+
#present over an array of key value pairs
|
97
|
+
def self.has_complex?(document, compound)
|
98
|
+
field = ""
|
99
|
+
tmp = document
|
100
|
+
result = []
|
101
|
+
fresult = []
|
102
|
+
|
103
|
+
compound.each do |token|
|
104
|
+
if token[0] == "statement"
|
105
|
+
field = token
|
106
|
+
break
|
107
|
+
end
|
108
|
+
end
|
109
|
+
field = field[1].first.split(/=|<|>/).first
|
110
|
+
|
111
|
+
field.split(".").each_with_index do |item, i|
|
112
|
+
tmp = tmp[item]
|
113
|
+
if tmp.is_a? Array
|
114
|
+
tmp.each do |doc|
|
115
|
+
result = []
|
116
|
+
compound.each do |token|
|
117
|
+
case token[0]
|
118
|
+
when "and"
|
119
|
+
result << "&&"
|
120
|
+
when "or"
|
121
|
+
result << "||"
|
122
|
+
when /not|\!/
|
123
|
+
result << "!"
|
124
|
+
when "statement"
|
125
|
+
new_statement = token[1].split(".").last
|
126
|
+
result << has_object?(doc, new_statement)
|
127
|
+
end
|
128
|
+
end
|
129
|
+
fresult << eval(result.join(" "))
|
130
|
+
(fresult << "||") unless doc == tmp.last
|
131
|
+
end
|
132
|
+
return eval(fresult.join(" "))
|
133
|
+
end
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
#Evaluates the call stack en returns true of selected document
|
138
|
+
#matches logical expression
|
139
|
+
def self.eval_statement(document, callstack)
|
140
|
+
result = []
|
141
|
+
callstack.each do |expression|
|
142
|
+
case expression.keys.first
|
143
|
+
when "statement"
|
144
|
+
if expression.values.first.is_a? Array
|
145
|
+
result << has_complex?(document, expression.values.first)
|
146
|
+
else
|
147
|
+
result << has_object?(document, expression.values.first)
|
148
|
+
end
|
149
|
+
when "and"
|
150
|
+
result << "&&"
|
151
|
+
when "or"
|
152
|
+
result << "||"
|
153
|
+
when "("
|
154
|
+
result << "("
|
155
|
+
when ")"
|
156
|
+
result << ")"
|
157
|
+
when "not"
|
158
|
+
result << "!"
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
return eval(result.join(" "))
|
163
|
+
end
|
164
|
+
end
|
@@ -0,0 +1,112 @@
|
|
1
|
+
module JGrep
|
2
|
+
class Parser
|
3
|
+
attr_reader :scanner, :execution_stack
|
4
|
+
|
5
|
+
def initialize(args)
|
6
|
+
@scanner = Scanner.new(args)
|
7
|
+
@execution_stack = []
|
8
|
+
parse
|
9
|
+
end
|
10
|
+
|
11
|
+
# Parse the input string, one token at a time a contruct the call stack
|
12
|
+
def parse(substatement=nil, token_index = 0)
|
13
|
+
p_token,p_token_value = nil
|
14
|
+
|
15
|
+
unless substatement
|
16
|
+
c_token,c_token_value = @scanner.get_token
|
17
|
+
else
|
18
|
+
c_token,c_token_value = substatement[token_index]
|
19
|
+
end
|
20
|
+
|
21
|
+
parenth = 0
|
22
|
+
|
23
|
+
while (c_token != nil)
|
24
|
+
unless substatement
|
25
|
+
@scanner.token_index += 1
|
26
|
+
n_token, n_token_value = @scanner.get_token
|
27
|
+
else
|
28
|
+
token_index += 1
|
29
|
+
n_token, n_token_value = substatement[token_index]
|
30
|
+
end
|
31
|
+
|
32
|
+
unless n_token == " "
|
33
|
+
case c_token
|
34
|
+
|
35
|
+
when "and"
|
36
|
+
unless (n_token =~ /not|statement|\(/) || (scanner.token_index == scanner.arguments.size)
|
37
|
+
raise "Error at column #{scanner.token_index}. \nExpected 'not', 'statement' or '('. Found '#{n_token_value}'"
|
38
|
+
end
|
39
|
+
|
40
|
+
if p_token == nil
|
41
|
+
raise "Error at column #{scanner.token_index}. \n Expression cannot start with 'and'"
|
42
|
+
elsif (p_token == "and" || p_token == "or")
|
43
|
+
raise "Error at column #{scanner.token_index}. \n #{p_token} cannot be followed by 'and'"
|
44
|
+
end
|
45
|
+
|
46
|
+
when "or"
|
47
|
+
unless (n_token =~ /not|statement|\(/) || (scanner.token_index == scanner.arguments.size)
|
48
|
+
raise "Error at column #{scanner.token_index}. \nExpected 'not', 'statement', '('. Found '#{n_token_value}'"
|
49
|
+
end
|
50
|
+
|
51
|
+
if p_token == nil
|
52
|
+
raise "Error at column #{scanner.token_index}. \n Expression cannot start with 'or'"
|
53
|
+
elsif (p_token == "and" || p_token == "or")
|
54
|
+
raise "Error at column #{scanner.token_index}. \n #{p_token} cannot be followed by 'or'"
|
55
|
+
end
|
56
|
+
|
57
|
+
when "not"
|
58
|
+
unless n_token =~ /statement|\(|not/
|
59
|
+
raise "Error at column #{scanner.token_index}. \nExpected 'statement' or '('. Found '#{n_token_value}'"
|
60
|
+
end
|
61
|
+
|
62
|
+
when "statement"
|
63
|
+
if c_token_value.is_a? Array
|
64
|
+
if substatement
|
65
|
+
raise "Error at column #{scanner.token_index}\nError, cannot define '[' in a '[...]' block."
|
66
|
+
end
|
67
|
+
parse(c_token_value, 0)
|
68
|
+
end
|
69
|
+
|
70
|
+
unless n_token =~ /and|or|\)/
|
71
|
+
unless n_token.nil?
|
72
|
+
raise "Error at column #{scanner.token_index}. \nExpected 'and', 'or', ')'. Found '#{n_token_value}'"
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
when ")"
|
77
|
+
unless (n_token =~ /|and|or|not|\(/)
|
78
|
+
unless n_token.nil?
|
79
|
+
raise "Error at column #{scanner.token_index}. \nExpected 'and', 'or', 'not' or '('. Found '#{n_token_value}'"
|
80
|
+
end
|
81
|
+
end
|
82
|
+
parenth += 1
|
83
|
+
|
84
|
+
when "("
|
85
|
+
unless n_token =~ /statement|not|\(/
|
86
|
+
raise "Error at column #{scanner.token_index}. \nExpected 'statement', '(', not. Found '#{n_token_value}'"
|
87
|
+
end
|
88
|
+
parenth -= 1
|
89
|
+
|
90
|
+
else
|
91
|
+
raise "Unexpected token found at column #{scanner.token_index}. '#{c_token_value}'"
|
92
|
+
end
|
93
|
+
|
94
|
+
unless n_token == " " || substatement
|
95
|
+
@execution_stack << {c_token => c_token_value}
|
96
|
+
end
|
97
|
+
|
98
|
+
p_token, p_token_value = c_token, c_token_value
|
99
|
+
c_token, c_token_value = n_token, n_token_value
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
return if substatement
|
104
|
+
|
105
|
+
if parenth < 0
|
106
|
+
raise "Error. Missing parentheses ')'."
|
107
|
+
elsif parenth > 0
|
108
|
+
raise "Error. Missing parentheses '('."
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
@@ -0,0 +1,146 @@
|
|
1
|
+
module JGrep
|
2
|
+
class Scanner
|
3
|
+
attr_accessor :arguments, :token_index
|
4
|
+
|
5
|
+
def initialize(arguments)
|
6
|
+
@token_index = 0
|
7
|
+
@arguments = arguments
|
8
|
+
end
|
9
|
+
|
10
|
+
# Scans the input string and identifies single language tokens
|
11
|
+
def get_token
|
12
|
+
if @token_index >= @arguments.size
|
13
|
+
return nil
|
14
|
+
end
|
15
|
+
|
16
|
+
begin
|
17
|
+
case @arguments.split("")[@token_index]
|
18
|
+
when "["
|
19
|
+
return "statement", gen_substatement
|
20
|
+
|
21
|
+
when "]"
|
22
|
+
return "]"
|
23
|
+
|
24
|
+
when "("
|
25
|
+
return "(", "("
|
26
|
+
|
27
|
+
when ")"
|
28
|
+
return ")", ")"
|
29
|
+
|
30
|
+
when "n"
|
31
|
+
if (@arguments.split("")[@token_index + 1] == "o") && (@arguments.split("")[@token_index + 2] == "t") && ((@arguments.split("")[@token_index + 3] == " ") || (@arguments.split("")[@token_index + 3] == "("))
|
32
|
+
@token_index += 2
|
33
|
+
return "not", "not"
|
34
|
+
else
|
35
|
+
gen_statement
|
36
|
+
end
|
37
|
+
|
38
|
+
when "!"
|
39
|
+
return "not", "not"
|
40
|
+
|
41
|
+
when "a"
|
42
|
+
if (@arguments.split("")[@token_index + 1] == "n") && (@arguments.split("")[@token_index + 2] == "d") && ((@arguments.split("")[@token_index + 3] == " ") || (@arguments.split("")[@token_index + 3] == "("))
|
43
|
+
@token_index += 2
|
44
|
+
return "and", "and"
|
45
|
+
else
|
46
|
+
gen_statement
|
47
|
+
end
|
48
|
+
|
49
|
+
when "o"
|
50
|
+
if (@arguments.split("")[@token_index + 1] == "r") && ((@arguments.split("")[@token_index + 2] == " ") || (@arguments.split("")[@token_index + 2] == "("))
|
51
|
+
@token_index += 1
|
52
|
+
return "or", "or"
|
53
|
+
else
|
54
|
+
gen_statement
|
55
|
+
end
|
56
|
+
|
57
|
+
when " "
|
58
|
+
return " ", " "
|
59
|
+
|
60
|
+
else
|
61
|
+
gen_statement
|
62
|
+
end
|
63
|
+
end
|
64
|
+
rescue NoMethodError => e
|
65
|
+
pp e
|
66
|
+
raise "Cannot end statement with 'and', 'or', 'not'"
|
67
|
+
end
|
68
|
+
|
69
|
+
private
|
70
|
+
def gen_substatement
|
71
|
+
@token_index += 1
|
72
|
+
returnval = []
|
73
|
+
|
74
|
+
while (val = get_token) != "]"
|
75
|
+
@token_index += 1
|
76
|
+
returnval << val unless val[0] == " "
|
77
|
+
end
|
78
|
+
|
79
|
+
return returnval
|
80
|
+
end
|
81
|
+
|
82
|
+
def gen_statement
|
83
|
+
current_token_value = ""
|
84
|
+
j = @token_index
|
85
|
+
|
86
|
+
begin
|
87
|
+
if (@arguments.split("")[j] == "/")
|
88
|
+
begin
|
89
|
+
current_token_value << @arguments.split("")[j]
|
90
|
+
j += 1
|
91
|
+
if @arguments.split("")[j] == "/"
|
92
|
+
current_token_value << "/"
|
93
|
+
break
|
94
|
+
end
|
95
|
+
end until (j >= @arguments.size) || (@arguments.split("")[j] =~ /\//)
|
96
|
+
|
97
|
+
#Recondsider this bit. Don't see there being a case for it. (Pieter)
|
98
|
+
# elsif (@arguments.split("")[j] =~ /=|<|>/)
|
99
|
+
# while !(@arguments.split("")[j] =~ /=|<|>/)
|
100
|
+
# current_token_value << @arguments.split("")[j]
|
101
|
+
# j += 1
|
102
|
+
# end
|
103
|
+
#
|
104
|
+
# current_token_value << @arguments.split("")[j]
|
105
|
+
# j += 1
|
106
|
+
#
|
107
|
+
# if @arguments.split("")[j] == "/"
|
108
|
+
# begin
|
109
|
+
# current_token_value << @arguments.split("")[j]
|
110
|
+
# j += 1
|
111
|
+
# if @arguments.split("")[j] == "/"
|
112
|
+
# current_token_value << "/"
|
113
|
+
# break
|
114
|
+
# end
|
115
|
+
# end until (j >= @arguments.size) || (@arguments.split("")[j] =~ /\//)
|
116
|
+
# else
|
117
|
+
# while (j < @arguments.size) && ((@arguments.split("")[j] != " ") && (@arguments.split("")[j] != ")") && @arguments.split("")[j] != "]" )
|
118
|
+
# current_token_value << @arguments.split("")[j]
|
119
|
+
# j += 1
|
120
|
+
# end
|
121
|
+
# end
|
122
|
+
else
|
123
|
+
begin
|
124
|
+
current_token_value << @arguments.split("")[j]
|
125
|
+
j += 1
|
126
|
+
if @arguments.split("")[j] =~ /'|"/
|
127
|
+
begin
|
128
|
+
current_token_value << @arguments.split("")[j]
|
129
|
+
j +=1
|
130
|
+
end until (j >= @arguments.size) || (@arguments.split("")[j] =~ /'|"/)
|
131
|
+
end
|
132
|
+
end until (j >= @arguments.size) || (@arguments.split("")[j] =~ /\s|\)|\]/)
|
133
|
+
end
|
134
|
+
rescue Exception => e
|
135
|
+
raise "Invalid token found - '#{current_token_value}'"
|
136
|
+
end
|
137
|
+
|
138
|
+
if current_token_value =~ /^(and|or|not|!)$/
|
139
|
+
raise "Class name cannot be 'and', 'or', 'not'. Found '#{current_token_value}'"
|
140
|
+
end
|
141
|
+
|
142
|
+
@token_index += current_token_value.size - 1
|
143
|
+
return "statement", current_token_value
|
144
|
+
end
|
145
|
+
end
|
146
|
+
end
|
metadata
ADDED
@@ -0,0 +1,67 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: jgrep
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 1
|
7
|
+
- 0
|
8
|
+
- 0
|
9
|
+
version: 1.0.0
|
10
|
+
platform: ruby
|
11
|
+
authors:
|
12
|
+
- P Loubser
|
13
|
+
autorequire:
|
14
|
+
bindir: bin
|
15
|
+
cert_chain: []
|
16
|
+
|
17
|
+
date: 2011-07-19 00:00:00 +01:00
|
18
|
+
default_executable: jgrep
|
19
|
+
dependencies: []
|
20
|
+
|
21
|
+
description: Compare a list of json documents to a simple logical language and returns matches as output
|
22
|
+
email:
|
23
|
+
- ploubser@gmail.com
|
24
|
+
executables:
|
25
|
+
- jgrep
|
26
|
+
extensions: []
|
27
|
+
|
28
|
+
extra_rdoc_files: []
|
29
|
+
|
30
|
+
files:
|
31
|
+
- jgrep.gemspec
|
32
|
+
- bin/jgrep
|
33
|
+
- lib/jgrep.rb
|
34
|
+
- lib/parser/parser.rb
|
35
|
+
- lib/parser/scanner.rb
|
36
|
+
has_rdoc: true
|
37
|
+
homepage: https://github.com/psy1337/JSON-Grep
|
38
|
+
licenses: []
|
39
|
+
|
40
|
+
post_install_message:
|
41
|
+
rdoc_options: []
|
42
|
+
|
43
|
+
require_paths:
|
44
|
+
- lib
|
45
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
46
|
+
requirements:
|
47
|
+
- - ">="
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
segments:
|
50
|
+
- 0
|
51
|
+
version: "0"
|
52
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
53
|
+
requirements:
|
54
|
+
- - ">="
|
55
|
+
- !ruby/object:Gem::Version
|
56
|
+
segments:
|
57
|
+
- 0
|
58
|
+
version: "0"
|
59
|
+
requirements: []
|
60
|
+
|
61
|
+
rubyforge_project:
|
62
|
+
rubygems_version: 1.3.6
|
63
|
+
signing_key:
|
64
|
+
specification_version: 3
|
65
|
+
summary: Compare a list of json documents to a simple logical language and returns matches as output
|
66
|
+
test_files: []
|
67
|
+
|