jgrep 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/bin/jgrep ADDED
@@ -0,0 +1,16 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'jgrep'
4
+
5
+ begin
6
+ raise "Please pass a valid arugment to jgrep" if ARGV == []
7
+ rescue Exception => e
8
+ STDERR.puts e
9
+ exit 1
10
+ end
11
+
12
+ json = STDIN.read
13
+
14
+
15
+ result = JGrep::jgrep((json), ARGV[0])
16
+ puts result unless result == "[]"
data/jgrep.gemspec ADDED
@@ -0,0 +1,16 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = "jgrep"
3
+ s.version = "1.0.0"
4
+
5
+ s.authors = ["P Loubser"]
6
+ s.date = %q{2011-07-19}
7
+ s.default_executable = "jgrep"
8
+ s.description = "Compare a list of json documents to a simple logical language and returns matches as output"
9
+ s.email = ["ploubser@gmail.com"]
10
+ s.executables = ["jgrep"]
11
+ s.files = ["jgrep.gemspec", "bin/jgrep", Dir.glob("lib/*"), Dir.glob("lib/parser/*")].flatten
12
+ s.has_rdoc = true
13
+ s.homepage = "https://github.com/psy1337/JSON-Grep"
14
+ s.require_paths = ["lib"]
15
+ s.summary = s.description
16
+ end
data/lib/jgrep.rb ADDED
@@ -0,0 +1,164 @@
1
+ #! /usr/lib/env ruby
2
+
3
+ require 'parser/parser.rb'
4
+ require 'parser/scanner.rb'
5
+ require 'rubygems'
6
+ require 'json'
7
+
8
+ module JGrep
9
+
10
+ #Method parses json and returns documents that match the logical expression
11
+ def self.jgrep(json, expression)
12
+ begin
13
+ call_stack = Parser.new(expression).execution_stack
14
+ result = []
15
+ json = JSON.parse(json)
16
+ json.each do |document|
17
+ if eval_statement(document, call_stack)
18
+ result << document
19
+ end
20
+ end
21
+ return result.to_json
22
+
23
+ rescue NameError => e
24
+ var = e.to_s
25
+ STDERR.puts "Error. #{var.match(/`(.*)'/)} was not found in documents"
26
+ exit 1
27
+
28
+ rescue JSON::ParserError => e
29
+ STDERR.puts "Error. Invalid JSON given"
30
+ exit 1
31
+ end
32
+ end
33
+
34
+ #Correctly format values so we can do the correct type of comparison
35
+ def self.format(kvalue, value)
36
+ if kvalue =~ /^\d+$/ || value =~ /^\d+$/
37
+ return Integer(kvalue), Integer(value)
38
+ elsif kvalue =~ /^\d+.\d+$/ || value =~ /^\d+.\d+$/
39
+ return Float(kvalue), Float(value)
40
+ else
41
+ return kvalue, value
42
+ end
43
+ end
44
+
45
+ #Check if key=value is present in document
46
+ def self.has_object?(document, statement)
47
+
48
+ key,value = statement.split(/<=|>=|=|<|>/)
49
+
50
+ if statement =~ /(<=|>=|<|>|=)/
51
+ op = $1
52
+ else
53
+ op = statement
54
+ end
55
+
56
+ tmp = document
57
+
58
+ key.split(".").each_with_index do |item,i|
59
+ tmp = tmp[item]
60
+ result = false
61
+ if tmp.is_a? Array
62
+ return (is_object_in_array?(tmp, "#{key.split(".")[i+1]}#{op}#{value}"))
63
+ end
64
+ end
65
+
66
+ tmp, value = format(tmp, value.gsub(/"|'/, ""))
67
+
68
+ case op
69
+ when "="
70
+ (tmp == value) ? (return true) : (return false)
71
+ when "<="
72
+ (tmp <= value) ? (return true) : (return false)
73
+ when ">="
74
+ (tmp >= value) ? (return true) : (return false)
75
+ when ">"
76
+ (tmp > value) ? (return true) : (return false)
77
+ when "<"
78
+ (tmp < value) ? (return true) : (return false)
79
+ end
80
+ end
81
+
82
+ #Check if key=value is present in a sub array
83
+ def self.is_object_in_array?(document, statement)
84
+
85
+ document.each do |item|
86
+ if has_object?(item,statement)
87
+ return true
88
+ end
89
+ end
90
+
91
+ return false
92
+ end
93
+
94
+
95
+ #Check if complex statement (defined as [key=value...]) is
96
+ #present over an array of key value pairs
97
+ def self.has_complex?(document, compound)
98
+ field = ""
99
+ tmp = document
100
+ result = []
101
+ fresult = []
102
+
103
+ compound.each do |token|
104
+ if token[0] == "statement"
105
+ field = token
106
+ break
107
+ end
108
+ end
109
+ field = field[1].first.split(/=|<|>/).first
110
+
111
+ field.split(".").each_with_index do |item, i|
112
+ tmp = tmp[item]
113
+ if tmp.is_a? Array
114
+ tmp.each do |doc|
115
+ result = []
116
+ compound.each do |token|
117
+ case token[0]
118
+ when "and"
119
+ result << "&&"
120
+ when "or"
121
+ result << "||"
122
+ when /not|\!/
123
+ result << "!"
124
+ when "statement"
125
+ new_statement = token[1].split(".").last
126
+ result << has_object?(doc, new_statement)
127
+ end
128
+ end
129
+ fresult << eval(result.join(" "))
130
+ (fresult << "||") unless doc == tmp.last
131
+ end
132
+ return eval(fresult.join(" "))
133
+ end
134
+ end
135
+ end
136
+
137
+ #Evaluates the call stack en returns true of selected document
138
+ #matches logical expression
139
+ def self.eval_statement(document, callstack)
140
+ result = []
141
+ callstack.each do |expression|
142
+ case expression.keys.first
143
+ when "statement"
144
+ if expression.values.first.is_a? Array
145
+ result << has_complex?(document, expression.values.first)
146
+ else
147
+ result << has_object?(document, expression.values.first)
148
+ end
149
+ when "and"
150
+ result << "&&"
151
+ when "or"
152
+ result << "||"
153
+ when "("
154
+ result << "("
155
+ when ")"
156
+ result << ")"
157
+ when "not"
158
+ result << "!"
159
+ end
160
+ end
161
+
162
+ return eval(result.join(" "))
163
+ end
164
+ end
@@ -0,0 +1,112 @@
1
+ module JGrep
2
+ class Parser
3
+ attr_reader :scanner, :execution_stack
4
+
5
+ def initialize(args)
6
+ @scanner = Scanner.new(args)
7
+ @execution_stack = []
8
+ parse
9
+ end
10
+
11
+ # Parse the input string, one token at a time a contruct the call stack
12
+ def parse(substatement=nil, token_index = 0)
13
+ p_token,p_token_value = nil
14
+
15
+ unless substatement
16
+ c_token,c_token_value = @scanner.get_token
17
+ else
18
+ c_token,c_token_value = substatement[token_index]
19
+ end
20
+
21
+ parenth = 0
22
+
23
+ while (c_token != nil)
24
+ unless substatement
25
+ @scanner.token_index += 1
26
+ n_token, n_token_value = @scanner.get_token
27
+ else
28
+ token_index += 1
29
+ n_token, n_token_value = substatement[token_index]
30
+ end
31
+
32
+ unless n_token == " "
33
+ case c_token
34
+
35
+ when "and"
36
+ unless (n_token =~ /not|statement|\(/) || (scanner.token_index == scanner.arguments.size)
37
+ raise "Error at column #{scanner.token_index}. \nExpected 'not', 'statement' or '('. Found '#{n_token_value}'"
38
+ end
39
+
40
+ if p_token == nil
41
+ raise "Error at column #{scanner.token_index}. \n Expression cannot start with 'and'"
42
+ elsif (p_token == "and" || p_token == "or")
43
+ raise "Error at column #{scanner.token_index}. \n #{p_token} cannot be followed by 'and'"
44
+ end
45
+
46
+ when "or"
47
+ unless (n_token =~ /not|statement|\(/) || (scanner.token_index == scanner.arguments.size)
48
+ raise "Error at column #{scanner.token_index}. \nExpected 'not', 'statement', '('. Found '#{n_token_value}'"
49
+ end
50
+
51
+ if p_token == nil
52
+ raise "Error at column #{scanner.token_index}. \n Expression cannot start with 'or'"
53
+ elsif (p_token == "and" || p_token == "or")
54
+ raise "Error at column #{scanner.token_index}. \n #{p_token} cannot be followed by 'or'"
55
+ end
56
+
57
+ when "not"
58
+ unless n_token =~ /statement|\(|not/
59
+ raise "Error at column #{scanner.token_index}. \nExpected 'statement' or '('. Found '#{n_token_value}'"
60
+ end
61
+
62
+ when "statement"
63
+ if c_token_value.is_a? Array
64
+ if substatement
65
+ raise "Error at column #{scanner.token_index}\nError, cannot define '[' in a '[...]' block."
66
+ end
67
+ parse(c_token_value, 0)
68
+ end
69
+
70
+ unless n_token =~ /and|or|\)/
71
+ unless n_token.nil?
72
+ raise "Error at column #{scanner.token_index}. \nExpected 'and', 'or', ')'. Found '#{n_token_value}'"
73
+ end
74
+ end
75
+
76
+ when ")"
77
+ unless (n_token =~ /|and|or|not|\(/)
78
+ unless n_token.nil?
79
+ raise "Error at column #{scanner.token_index}. \nExpected 'and', 'or', 'not' or '('. Found '#{n_token_value}'"
80
+ end
81
+ end
82
+ parenth += 1
83
+
84
+ when "("
85
+ unless n_token =~ /statement|not|\(/
86
+ raise "Error at column #{scanner.token_index}. \nExpected 'statement', '(', not. Found '#{n_token_value}'"
87
+ end
88
+ parenth -= 1
89
+
90
+ else
91
+ raise "Unexpected token found at column #{scanner.token_index}. '#{c_token_value}'"
92
+ end
93
+
94
+ unless n_token == " " || substatement
95
+ @execution_stack << {c_token => c_token_value}
96
+ end
97
+
98
+ p_token, p_token_value = c_token, c_token_value
99
+ c_token, c_token_value = n_token, n_token_value
100
+ end
101
+ end
102
+
103
+ return if substatement
104
+
105
+ if parenth < 0
106
+ raise "Error. Missing parentheses ')'."
107
+ elsif parenth > 0
108
+ raise "Error. Missing parentheses '('."
109
+ end
110
+ end
111
+ end
112
+ end
@@ -0,0 +1,146 @@
1
+ module JGrep
2
+ class Scanner
3
+ attr_accessor :arguments, :token_index
4
+
5
+ def initialize(arguments)
6
+ @token_index = 0
7
+ @arguments = arguments
8
+ end
9
+
10
+ # Scans the input string and identifies single language tokens
11
+ def get_token
12
+ if @token_index >= @arguments.size
13
+ return nil
14
+ end
15
+
16
+ begin
17
+ case @arguments.split("")[@token_index]
18
+ when "["
19
+ return "statement", gen_substatement
20
+
21
+ when "]"
22
+ return "]"
23
+
24
+ when "("
25
+ return "(", "("
26
+
27
+ when ")"
28
+ return ")", ")"
29
+
30
+ when "n"
31
+ if (@arguments.split("")[@token_index + 1] == "o") && (@arguments.split("")[@token_index + 2] == "t") && ((@arguments.split("")[@token_index + 3] == " ") || (@arguments.split("")[@token_index + 3] == "("))
32
+ @token_index += 2
33
+ return "not", "not"
34
+ else
35
+ gen_statement
36
+ end
37
+
38
+ when "!"
39
+ return "not", "not"
40
+
41
+ when "a"
42
+ if (@arguments.split("")[@token_index + 1] == "n") && (@arguments.split("")[@token_index + 2] == "d") && ((@arguments.split("")[@token_index + 3] == " ") || (@arguments.split("")[@token_index + 3] == "("))
43
+ @token_index += 2
44
+ return "and", "and"
45
+ else
46
+ gen_statement
47
+ end
48
+
49
+ when "o"
50
+ if (@arguments.split("")[@token_index + 1] == "r") && ((@arguments.split("")[@token_index + 2] == " ") || (@arguments.split("")[@token_index + 2] == "("))
51
+ @token_index += 1
52
+ return "or", "or"
53
+ else
54
+ gen_statement
55
+ end
56
+
57
+ when " "
58
+ return " ", " "
59
+
60
+ else
61
+ gen_statement
62
+ end
63
+ end
64
+ rescue NoMethodError => e
65
+ pp e
66
+ raise "Cannot end statement with 'and', 'or', 'not'"
67
+ end
68
+
69
+ private
70
+ def gen_substatement
71
+ @token_index += 1
72
+ returnval = []
73
+
74
+ while (val = get_token) != "]"
75
+ @token_index += 1
76
+ returnval << val unless val[0] == " "
77
+ end
78
+
79
+ return returnval
80
+ end
81
+
82
+ def gen_statement
83
+ current_token_value = ""
84
+ j = @token_index
85
+
86
+ begin
87
+ if (@arguments.split("")[j] == "/")
88
+ begin
89
+ current_token_value << @arguments.split("")[j]
90
+ j += 1
91
+ if @arguments.split("")[j] == "/"
92
+ current_token_value << "/"
93
+ break
94
+ end
95
+ end until (j >= @arguments.size) || (@arguments.split("")[j] =~ /\//)
96
+
97
+ #Recondsider this bit. Don't see there being a case for it. (Pieter)
98
+ # elsif (@arguments.split("")[j] =~ /=|<|>/)
99
+ # while !(@arguments.split("")[j] =~ /=|<|>/)
100
+ # current_token_value << @arguments.split("")[j]
101
+ # j += 1
102
+ # end
103
+ #
104
+ # current_token_value << @arguments.split("")[j]
105
+ # j += 1
106
+ #
107
+ # if @arguments.split("")[j] == "/"
108
+ # begin
109
+ # current_token_value << @arguments.split("")[j]
110
+ # j += 1
111
+ # if @arguments.split("")[j] == "/"
112
+ # current_token_value << "/"
113
+ # break
114
+ # end
115
+ # end until (j >= @arguments.size) || (@arguments.split("")[j] =~ /\//)
116
+ # else
117
+ # while (j < @arguments.size) && ((@arguments.split("")[j] != " ") && (@arguments.split("")[j] != ")") && @arguments.split("")[j] != "]" )
118
+ # current_token_value << @arguments.split("")[j]
119
+ # j += 1
120
+ # end
121
+ # end
122
+ else
123
+ begin
124
+ current_token_value << @arguments.split("")[j]
125
+ j += 1
126
+ if @arguments.split("")[j] =~ /'|"/
127
+ begin
128
+ current_token_value << @arguments.split("")[j]
129
+ j +=1
130
+ end until (j >= @arguments.size) || (@arguments.split("")[j] =~ /'|"/)
131
+ end
132
+ end until (j >= @arguments.size) || (@arguments.split("")[j] =~ /\s|\)|\]/)
133
+ end
134
+ rescue Exception => e
135
+ raise "Invalid token found - '#{current_token_value}'"
136
+ end
137
+
138
+ if current_token_value =~ /^(and|or|not|!)$/
139
+ raise "Class name cannot be 'and', 'or', 'not'. Found '#{current_token_value}'"
140
+ end
141
+
142
+ @token_index += current_token_value.size - 1
143
+ return "statement", current_token_value
144
+ end
145
+ end
146
+ end
metadata ADDED
@@ -0,0 +1,67 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: jgrep
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 1
7
+ - 0
8
+ - 0
9
+ version: 1.0.0
10
+ platform: ruby
11
+ authors:
12
+ - P Loubser
13
+ autorequire:
14
+ bindir: bin
15
+ cert_chain: []
16
+
17
+ date: 2011-07-19 00:00:00 +01:00
18
+ default_executable: jgrep
19
+ dependencies: []
20
+
21
+ description: Compare a list of json documents to a simple logical language and returns matches as output
22
+ email:
23
+ - ploubser@gmail.com
24
+ executables:
25
+ - jgrep
26
+ extensions: []
27
+
28
+ extra_rdoc_files: []
29
+
30
+ files:
31
+ - jgrep.gemspec
32
+ - bin/jgrep
33
+ - lib/jgrep.rb
34
+ - lib/parser/parser.rb
35
+ - lib/parser/scanner.rb
36
+ has_rdoc: true
37
+ homepage: https://github.com/psy1337/JSON-Grep
38
+ licenses: []
39
+
40
+ post_install_message:
41
+ rdoc_options: []
42
+
43
+ require_paths:
44
+ - lib
45
+ required_ruby_version: !ruby/object:Gem::Requirement
46
+ requirements:
47
+ - - ">="
48
+ - !ruby/object:Gem::Version
49
+ segments:
50
+ - 0
51
+ version: "0"
52
+ required_rubygems_version: !ruby/object:Gem::Requirement
53
+ requirements:
54
+ - - ">="
55
+ - !ruby/object:Gem::Version
56
+ segments:
57
+ - 0
58
+ version: "0"
59
+ requirements: []
60
+
61
+ rubyforge_project:
62
+ rubygems_version: 1.3.6
63
+ signing_key:
64
+ specification_version: 3
65
+ summary: Compare a list of json documents to a simple logical language and returns matches as output
66
+ test_files: []
67
+