jgrep 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/jgrep +16 -0
- data/jgrep.gemspec +16 -0
- data/lib/jgrep.rb +164 -0
- data/lib/parser/parser.rb +112 -0
- data/lib/parser/scanner.rb +146 -0
- metadata +67 -0
data/bin/jgrep
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'jgrep'
|
4
|
+
|
5
|
+
begin
|
6
|
+
raise "Please pass a valid arugment to jgrep" if ARGV == []
|
7
|
+
rescue Exception => e
|
8
|
+
STDERR.puts e
|
9
|
+
exit 1
|
10
|
+
end
|
11
|
+
|
12
|
+
json = STDIN.read
|
13
|
+
|
14
|
+
|
15
|
+
result = JGrep::jgrep((json), ARGV[0])
|
16
|
+
puts result unless result == "[]"
|
data/jgrep.gemspec
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
Gem::Specification.new do |s|
|
2
|
+
s.name = "jgrep"
|
3
|
+
s.version = "1.0.0"
|
4
|
+
|
5
|
+
s.authors = ["P Loubser"]
|
6
|
+
s.date = %q{2011-07-19}
|
7
|
+
s.default_executable = "jgrep"
|
8
|
+
s.description = "Compare a list of json documents to a simple logical language and returns matches as output"
|
9
|
+
s.email = ["ploubser@gmail.com"]
|
10
|
+
s.executables = ["jgrep"]
|
11
|
+
s.files = ["jgrep.gemspec", "bin/jgrep", Dir.glob("lib/*"), Dir.glob("lib/parser/*")].flatten
|
12
|
+
s.has_rdoc = true
|
13
|
+
s.homepage = "https://github.com/psy1337/JSON-Grep"
|
14
|
+
s.require_paths = ["lib"]
|
15
|
+
s.summary = s.description
|
16
|
+
end
|
data/lib/jgrep.rb
ADDED
@@ -0,0 +1,164 @@
|
|
1
|
+
#! /usr/lib/env ruby
|
2
|
+
|
3
|
+
require 'parser/parser.rb'
|
4
|
+
require 'parser/scanner.rb'
|
5
|
+
require 'rubygems'
|
6
|
+
require 'json'
|
7
|
+
|
8
|
+
module JGrep
|
9
|
+
|
10
|
+
#Method parses json and returns documents that match the logical expression
|
11
|
+
def self.jgrep(json, expression)
|
12
|
+
begin
|
13
|
+
call_stack = Parser.new(expression).execution_stack
|
14
|
+
result = []
|
15
|
+
json = JSON.parse(json)
|
16
|
+
json.each do |document|
|
17
|
+
if eval_statement(document, call_stack)
|
18
|
+
result << document
|
19
|
+
end
|
20
|
+
end
|
21
|
+
return result.to_json
|
22
|
+
|
23
|
+
rescue NameError => e
|
24
|
+
var = e.to_s
|
25
|
+
STDERR.puts "Error. #{var.match(/`(.*)'/)} was not found in documents"
|
26
|
+
exit 1
|
27
|
+
|
28
|
+
rescue JSON::ParserError => e
|
29
|
+
STDERR.puts "Error. Invalid JSON given"
|
30
|
+
exit 1
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
#Correctly format values so we can do the correct type of comparison
|
35
|
+
def self.format(kvalue, value)
|
36
|
+
if kvalue =~ /^\d+$/ || value =~ /^\d+$/
|
37
|
+
return Integer(kvalue), Integer(value)
|
38
|
+
elsif kvalue =~ /^\d+.\d+$/ || value =~ /^\d+.\d+$/
|
39
|
+
return Float(kvalue), Float(value)
|
40
|
+
else
|
41
|
+
return kvalue, value
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
#Check if key=value is present in document
|
46
|
+
def self.has_object?(document, statement)
|
47
|
+
|
48
|
+
key,value = statement.split(/<=|>=|=|<|>/)
|
49
|
+
|
50
|
+
if statement =~ /(<=|>=|<|>|=)/
|
51
|
+
op = $1
|
52
|
+
else
|
53
|
+
op = statement
|
54
|
+
end
|
55
|
+
|
56
|
+
tmp = document
|
57
|
+
|
58
|
+
key.split(".").each_with_index do |item,i|
|
59
|
+
tmp = tmp[item]
|
60
|
+
result = false
|
61
|
+
if tmp.is_a? Array
|
62
|
+
return (is_object_in_array?(tmp, "#{key.split(".")[i+1]}#{op}#{value}"))
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
tmp, value = format(tmp, value.gsub(/"|'/, ""))
|
67
|
+
|
68
|
+
case op
|
69
|
+
when "="
|
70
|
+
(tmp == value) ? (return true) : (return false)
|
71
|
+
when "<="
|
72
|
+
(tmp <= value) ? (return true) : (return false)
|
73
|
+
when ">="
|
74
|
+
(tmp >= value) ? (return true) : (return false)
|
75
|
+
when ">"
|
76
|
+
(tmp > value) ? (return true) : (return false)
|
77
|
+
when "<"
|
78
|
+
(tmp < value) ? (return true) : (return false)
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
#Check if key=value is present in a sub array
|
83
|
+
def self.is_object_in_array?(document, statement)
|
84
|
+
|
85
|
+
document.each do |item|
|
86
|
+
if has_object?(item,statement)
|
87
|
+
return true
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
return false
|
92
|
+
end
|
93
|
+
|
94
|
+
|
95
|
+
#Check if complex statement (defined as [key=value...]) is
|
96
|
+
#present over an array of key value pairs
|
97
|
+
def self.has_complex?(document, compound)
|
98
|
+
field = ""
|
99
|
+
tmp = document
|
100
|
+
result = []
|
101
|
+
fresult = []
|
102
|
+
|
103
|
+
compound.each do |token|
|
104
|
+
if token[0] == "statement"
|
105
|
+
field = token
|
106
|
+
break
|
107
|
+
end
|
108
|
+
end
|
109
|
+
field = field[1].first.split(/=|<|>/).first
|
110
|
+
|
111
|
+
field.split(".").each_with_index do |item, i|
|
112
|
+
tmp = tmp[item]
|
113
|
+
if tmp.is_a? Array
|
114
|
+
tmp.each do |doc|
|
115
|
+
result = []
|
116
|
+
compound.each do |token|
|
117
|
+
case token[0]
|
118
|
+
when "and"
|
119
|
+
result << "&&"
|
120
|
+
when "or"
|
121
|
+
result << "||"
|
122
|
+
when /not|\!/
|
123
|
+
result << "!"
|
124
|
+
when "statement"
|
125
|
+
new_statement = token[1].split(".").last
|
126
|
+
result << has_object?(doc, new_statement)
|
127
|
+
end
|
128
|
+
end
|
129
|
+
fresult << eval(result.join(" "))
|
130
|
+
(fresult << "||") unless doc == tmp.last
|
131
|
+
end
|
132
|
+
return eval(fresult.join(" "))
|
133
|
+
end
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
#Evaluates the call stack en returns true of selected document
|
138
|
+
#matches logical expression
|
139
|
+
def self.eval_statement(document, callstack)
|
140
|
+
result = []
|
141
|
+
callstack.each do |expression|
|
142
|
+
case expression.keys.first
|
143
|
+
when "statement"
|
144
|
+
if expression.values.first.is_a? Array
|
145
|
+
result << has_complex?(document, expression.values.first)
|
146
|
+
else
|
147
|
+
result << has_object?(document, expression.values.first)
|
148
|
+
end
|
149
|
+
when "and"
|
150
|
+
result << "&&"
|
151
|
+
when "or"
|
152
|
+
result << "||"
|
153
|
+
when "("
|
154
|
+
result << "("
|
155
|
+
when ")"
|
156
|
+
result << ")"
|
157
|
+
when "not"
|
158
|
+
result << "!"
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
return eval(result.join(" "))
|
163
|
+
end
|
164
|
+
end
|
@@ -0,0 +1,112 @@
|
|
1
|
+
module JGrep
|
2
|
+
class Parser
|
3
|
+
attr_reader :scanner, :execution_stack
|
4
|
+
|
5
|
+
def initialize(args)
|
6
|
+
@scanner = Scanner.new(args)
|
7
|
+
@execution_stack = []
|
8
|
+
parse
|
9
|
+
end
|
10
|
+
|
11
|
+
# Parse the input string, one token at a time a contruct the call stack
|
12
|
+
def parse(substatement=nil, token_index = 0)
|
13
|
+
p_token,p_token_value = nil
|
14
|
+
|
15
|
+
unless substatement
|
16
|
+
c_token,c_token_value = @scanner.get_token
|
17
|
+
else
|
18
|
+
c_token,c_token_value = substatement[token_index]
|
19
|
+
end
|
20
|
+
|
21
|
+
parenth = 0
|
22
|
+
|
23
|
+
while (c_token != nil)
|
24
|
+
unless substatement
|
25
|
+
@scanner.token_index += 1
|
26
|
+
n_token, n_token_value = @scanner.get_token
|
27
|
+
else
|
28
|
+
token_index += 1
|
29
|
+
n_token, n_token_value = substatement[token_index]
|
30
|
+
end
|
31
|
+
|
32
|
+
unless n_token == " "
|
33
|
+
case c_token
|
34
|
+
|
35
|
+
when "and"
|
36
|
+
unless (n_token =~ /not|statement|\(/) || (scanner.token_index == scanner.arguments.size)
|
37
|
+
raise "Error at column #{scanner.token_index}. \nExpected 'not', 'statement' or '('. Found '#{n_token_value}'"
|
38
|
+
end
|
39
|
+
|
40
|
+
if p_token == nil
|
41
|
+
raise "Error at column #{scanner.token_index}. \n Expression cannot start with 'and'"
|
42
|
+
elsif (p_token == "and" || p_token == "or")
|
43
|
+
raise "Error at column #{scanner.token_index}. \n #{p_token} cannot be followed by 'and'"
|
44
|
+
end
|
45
|
+
|
46
|
+
when "or"
|
47
|
+
unless (n_token =~ /not|statement|\(/) || (scanner.token_index == scanner.arguments.size)
|
48
|
+
raise "Error at column #{scanner.token_index}. \nExpected 'not', 'statement', '('. Found '#{n_token_value}'"
|
49
|
+
end
|
50
|
+
|
51
|
+
if p_token == nil
|
52
|
+
raise "Error at column #{scanner.token_index}. \n Expression cannot start with 'or'"
|
53
|
+
elsif (p_token == "and" || p_token == "or")
|
54
|
+
raise "Error at column #{scanner.token_index}. \n #{p_token} cannot be followed by 'or'"
|
55
|
+
end
|
56
|
+
|
57
|
+
when "not"
|
58
|
+
unless n_token =~ /statement|\(|not/
|
59
|
+
raise "Error at column #{scanner.token_index}. \nExpected 'statement' or '('. Found '#{n_token_value}'"
|
60
|
+
end
|
61
|
+
|
62
|
+
when "statement"
|
63
|
+
if c_token_value.is_a? Array
|
64
|
+
if substatement
|
65
|
+
raise "Error at column #{scanner.token_index}\nError, cannot define '[' in a '[...]' block."
|
66
|
+
end
|
67
|
+
parse(c_token_value, 0)
|
68
|
+
end
|
69
|
+
|
70
|
+
unless n_token =~ /and|or|\)/
|
71
|
+
unless n_token.nil?
|
72
|
+
raise "Error at column #{scanner.token_index}. \nExpected 'and', 'or', ')'. Found '#{n_token_value}'"
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
when ")"
|
77
|
+
unless (n_token =~ /|and|or|not|\(/)
|
78
|
+
unless n_token.nil?
|
79
|
+
raise "Error at column #{scanner.token_index}. \nExpected 'and', 'or', 'not' or '('. Found '#{n_token_value}'"
|
80
|
+
end
|
81
|
+
end
|
82
|
+
parenth += 1
|
83
|
+
|
84
|
+
when "("
|
85
|
+
unless n_token =~ /statement|not|\(/
|
86
|
+
raise "Error at column #{scanner.token_index}. \nExpected 'statement', '(', not. Found '#{n_token_value}'"
|
87
|
+
end
|
88
|
+
parenth -= 1
|
89
|
+
|
90
|
+
else
|
91
|
+
raise "Unexpected token found at column #{scanner.token_index}. '#{c_token_value}'"
|
92
|
+
end
|
93
|
+
|
94
|
+
unless n_token == " " || substatement
|
95
|
+
@execution_stack << {c_token => c_token_value}
|
96
|
+
end
|
97
|
+
|
98
|
+
p_token, p_token_value = c_token, c_token_value
|
99
|
+
c_token, c_token_value = n_token, n_token_value
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
return if substatement
|
104
|
+
|
105
|
+
if parenth < 0
|
106
|
+
raise "Error. Missing parentheses ')'."
|
107
|
+
elsif parenth > 0
|
108
|
+
raise "Error. Missing parentheses '('."
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
@@ -0,0 +1,146 @@
|
|
1
|
+
module JGrep
|
2
|
+
class Scanner
|
3
|
+
attr_accessor :arguments, :token_index
|
4
|
+
|
5
|
+
def initialize(arguments)
|
6
|
+
@token_index = 0
|
7
|
+
@arguments = arguments
|
8
|
+
end
|
9
|
+
|
10
|
+
# Scans the input string and identifies single language tokens
|
11
|
+
def get_token
|
12
|
+
if @token_index >= @arguments.size
|
13
|
+
return nil
|
14
|
+
end
|
15
|
+
|
16
|
+
begin
|
17
|
+
case @arguments.split("")[@token_index]
|
18
|
+
when "["
|
19
|
+
return "statement", gen_substatement
|
20
|
+
|
21
|
+
when "]"
|
22
|
+
return "]"
|
23
|
+
|
24
|
+
when "("
|
25
|
+
return "(", "("
|
26
|
+
|
27
|
+
when ")"
|
28
|
+
return ")", ")"
|
29
|
+
|
30
|
+
when "n"
|
31
|
+
if (@arguments.split("")[@token_index + 1] == "o") && (@arguments.split("")[@token_index + 2] == "t") && ((@arguments.split("")[@token_index + 3] == " ") || (@arguments.split("")[@token_index + 3] == "("))
|
32
|
+
@token_index += 2
|
33
|
+
return "not", "not"
|
34
|
+
else
|
35
|
+
gen_statement
|
36
|
+
end
|
37
|
+
|
38
|
+
when "!"
|
39
|
+
return "not", "not"
|
40
|
+
|
41
|
+
when "a"
|
42
|
+
if (@arguments.split("")[@token_index + 1] == "n") && (@arguments.split("")[@token_index + 2] == "d") && ((@arguments.split("")[@token_index + 3] == " ") || (@arguments.split("")[@token_index + 3] == "("))
|
43
|
+
@token_index += 2
|
44
|
+
return "and", "and"
|
45
|
+
else
|
46
|
+
gen_statement
|
47
|
+
end
|
48
|
+
|
49
|
+
when "o"
|
50
|
+
if (@arguments.split("")[@token_index + 1] == "r") && ((@arguments.split("")[@token_index + 2] == " ") || (@arguments.split("")[@token_index + 2] == "("))
|
51
|
+
@token_index += 1
|
52
|
+
return "or", "or"
|
53
|
+
else
|
54
|
+
gen_statement
|
55
|
+
end
|
56
|
+
|
57
|
+
when " "
|
58
|
+
return " ", " "
|
59
|
+
|
60
|
+
else
|
61
|
+
gen_statement
|
62
|
+
end
|
63
|
+
end
|
64
|
+
rescue NoMethodError => e
|
65
|
+
pp e
|
66
|
+
raise "Cannot end statement with 'and', 'or', 'not'"
|
67
|
+
end
|
68
|
+
|
69
|
+
private
|
70
|
+
def gen_substatement
|
71
|
+
@token_index += 1
|
72
|
+
returnval = []
|
73
|
+
|
74
|
+
while (val = get_token) != "]"
|
75
|
+
@token_index += 1
|
76
|
+
returnval << val unless val[0] == " "
|
77
|
+
end
|
78
|
+
|
79
|
+
return returnval
|
80
|
+
end
|
81
|
+
|
82
|
+
def gen_statement
|
83
|
+
current_token_value = ""
|
84
|
+
j = @token_index
|
85
|
+
|
86
|
+
begin
|
87
|
+
if (@arguments.split("")[j] == "/")
|
88
|
+
begin
|
89
|
+
current_token_value << @arguments.split("")[j]
|
90
|
+
j += 1
|
91
|
+
if @arguments.split("")[j] == "/"
|
92
|
+
current_token_value << "/"
|
93
|
+
break
|
94
|
+
end
|
95
|
+
end until (j >= @arguments.size) || (@arguments.split("")[j] =~ /\//)
|
96
|
+
|
97
|
+
#Recondsider this bit. Don't see there being a case for it. (Pieter)
|
98
|
+
# elsif (@arguments.split("")[j] =~ /=|<|>/)
|
99
|
+
# while !(@arguments.split("")[j] =~ /=|<|>/)
|
100
|
+
# current_token_value << @arguments.split("")[j]
|
101
|
+
# j += 1
|
102
|
+
# end
|
103
|
+
#
|
104
|
+
# current_token_value << @arguments.split("")[j]
|
105
|
+
# j += 1
|
106
|
+
#
|
107
|
+
# if @arguments.split("")[j] == "/"
|
108
|
+
# begin
|
109
|
+
# current_token_value << @arguments.split("")[j]
|
110
|
+
# j += 1
|
111
|
+
# if @arguments.split("")[j] == "/"
|
112
|
+
# current_token_value << "/"
|
113
|
+
# break
|
114
|
+
# end
|
115
|
+
# end until (j >= @arguments.size) || (@arguments.split("")[j] =~ /\//)
|
116
|
+
# else
|
117
|
+
# while (j < @arguments.size) && ((@arguments.split("")[j] != " ") && (@arguments.split("")[j] != ")") && @arguments.split("")[j] != "]" )
|
118
|
+
# current_token_value << @arguments.split("")[j]
|
119
|
+
# j += 1
|
120
|
+
# end
|
121
|
+
# end
|
122
|
+
else
|
123
|
+
begin
|
124
|
+
current_token_value << @arguments.split("")[j]
|
125
|
+
j += 1
|
126
|
+
if @arguments.split("")[j] =~ /'|"/
|
127
|
+
begin
|
128
|
+
current_token_value << @arguments.split("")[j]
|
129
|
+
j +=1
|
130
|
+
end until (j >= @arguments.size) || (@arguments.split("")[j] =~ /'|"/)
|
131
|
+
end
|
132
|
+
end until (j >= @arguments.size) || (@arguments.split("")[j] =~ /\s|\)|\]/)
|
133
|
+
end
|
134
|
+
rescue Exception => e
|
135
|
+
raise "Invalid token found - '#{current_token_value}'"
|
136
|
+
end
|
137
|
+
|
138
|
+
if current_token_value =~ /^(and|or|not|!)$/
|
139
|
+
raise "Class name cannot be 'and', 'or', 'not'. Found '#{current_token_value}'"
|
140
|
+
end
|
141
|
+
|
142
|
+
@token_index += current_token_value.size - 1
|
143
|
+
return "statement", current_token_value
|
144
|
+
end
|
145
|
+
end
|
146
|
+
end
|
metadata
ADDED
@@ -0,0 +1,67 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: jgrep
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 1
|
7
|
+
- 0
|
8
|
+
- 0
|
9
|
+
version: 1.0.0
|
10
|
+
platform: ruby
|
11
|
+
authors:
|
12
|
+
- P Loubser
|
13
|
+
autorequire:
|
14
|
+
bindir: bin
|
15
|
+
cert_chain: []
|
16
|
+
|
17
|
+
date: 2011-07-19 00:00:00 +01:00
|
18
|
+
default_executable: jgrep
|
19
|
+
dependencies: []
|
20
|
+
|
21
|
+
description: Compare a list of json documents to a simple logical language and returns matches as output
|
22
|
+
email:
|
23
|
+
- ploubser@gmail.com
|
24
|
+
executables:
|
25
|
+
- jgrep
|
26
|
+
extensions: []
|
27
|
+
|
28
|
+
extra_rdoc_files: []
|
29
|
+
|
30
|
+
files:
|
31
|
+
- jgrep.gemspec
|
32
|
+
- bin/jgrep
|
33
|
+
- lib/jgrep.rb
|
34
|
+
- lib/parser/parser.rb
|
35
|
+
- lib/parser/scanner.rb
|
36
|
+
has_rdoc: true
|
37
|
+
homepage: https://github.com/psy1337/JSON-Grep
|
38
|
+
licenses: []
|
39
|
+
|
40
|
+
post_install_message:
|
41
|
+
rdoc_options: []
|
42
|
+
|
43
|
+
require_paths:
|
44
|
+
- lib
|
45
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
46
|
+
requirements:
|
47
|
+
- - ">="
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
segments:
|
50
|
+
- 0
|
51
|
+
version: "0"
|
52
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
53
|
+
requirements:
|
54
|
+
- - ">="
|
55
|
+
- !ruby/object:Gem::Version
|
56
|
+
segments:
|
57
|
+
- 0
|
58
|
+
version: "0"
|
59
|
+
requirements: []
|
60
|
+
|
61
|
+
rubyforge_project:
|
62
|
+
rubygems_version: 1.3.6
|
63
|
+
signing_key:
|
64
|
+
specification_version: 3
|
65
|
+
summary: Compare a list of json documents to a simple logical language and returns matches as output
|
66
|
+
test_files: []
|
67
|
+
|