RubyGems - jgrep - Versions diffs - 1.0.0 - Mend

jgrep 1.0.0

Files changed (6) hide show

data/bin/jgrep ADDED Viewed

@@ -0,0 +1,16 @@
+#!/usr/bin/env ruby
+require 'jgrep'
+begin
+    raise "Please pass a valid arugment to jgrep" if ARGV == []
+rescue Exception => e
+    STDERR.puts e
+    exit 1
+end
+json = STDIN.read
+result = JGrep::jgrep((json), ARGV[0])
+puts result unless result == "[]"

data/jgrep.gemspec ADDED Viewed

@@ -0,0 +1,16 @@
+Gem::Specification.new do |s|
+    s.name = "jgrep"
+    s.version = "1.0.0"
+    s.authors = ["P Loubser"]
+    s.date = %q{2011-07-19}
+    s.default_executable = "jgrep"
+    s.description = "Compare a list of json documents to a simple logical language and returns matches as output"
+    s.email = ["ploubser@gmail.com"]
+    s.executables = ["jgrep"]
+    s.files = ["jgrep.gemspec", "bin/jgrep", Dir.glob("lib/*"), Dir.glob("lib/parser/*")].flatten
+    s.has_rdoc = true
+    s.homepage = "https://github.com/psy1337/JSON-Grep"
+    s.require_paths = ["lib"]
+    s.summary = s.description
+end

data/lib/jgrep.rb ADDED Viewed

@@ -0,0 +1,164 @@
+#! /usr/lib/env ruby
+require 'parser/parser.rb'
+require 'parser/scanner.rb'
+require 'rubygems'
+require 'json'
+module JGrep
+    #Method parses json and returns documents that match the logical expression
+    def self.jgrep(json, expression)
+        begin
+            call_stack = Parser.new(expression).execution_stack
+            result = []
+            json = JSON.parse(json)
+            json.each do |document|
+                if eval_statement(document, call_stack)
+                    result << document
+                end
+            end
+            return result.to_json
+        rescue NameError => e
+            var = e.to_s
+            STDERR.puts "Error. #{var.match(/`(.*)'/)} was not found in documents"
+            exit 1
+        rescue JSON::ParserError => e
+            STDERR.puts "Error. Invalid JSON given"
+            exit 1
+        end
+    end
+    #Correctly format values so we can do the correct type of comparison
+    def self.format(kvalue, value)
+        if kvalue =~ /^\d+$/ || value =~ /^\d+$/
+            return Integer(kvalue), Integer(value)
+        elsif kvalue =~ /^\d+.\d+$/ || value =~ /^\d+.\d+$/
+            return Float(kvalue), Float(value)
+        else
+            return kvalue, value
+        end
+    end
+    #Check if key=value is present in document
+    def self.has_object?(document, statement)
+        key,value = statement.split(/<=|>=|=|<|>/)
+        if statement =~ /(<=|>=|<|>|=)/
+            op = $1
+        else
+            op = statement
+        end
+        tmp = document
+        key.split(".").each_with_index do |item,i|
+            tmp = tmp[item]
+            result = false
+            if tmp.is_a? Array
+                return (is_object_in_array?(tmp, "#{key.split(".")[i+1]}#{op}#{value}"))
+            end
+        end
+        tmp, value = format(tmp, value.gsub(/"|'/, ""))
+        case op
+        when "="
+            (tmp == value) ? (return true) : (return false)
+        when "<="
+            (tmp <= value) ? (return true) : (return false)
+        when ">="
+            (tmp >= value) ? (return true) : (return false)
+        when ">"
+            (tmp > value) ? (return true) : (return false)
+        when "<"
+            (tmp < value) ? (return true) : (return false)
+        end
+    end
+    #Check if key=value is present in a sub array
+    def self.is_object_in_array?(document, statement)
+        document.each do |item|
+            if has_object?(item,statement)
+                return true
+           end
+        end
+        return false
+    end
+    #Check if complex statement (defined as [key=value...]) is
+    #present over an array of key value pairs
+    def self.has_complex?(document, compound)
+        field = ""
+        tmp = document
+        result = []
+        fresult = []
+        compound.each do |token|
+            if token[0] == "statement"
+                field = token
+                break
+            end
+        end
+        field = field[1].first.split(/=|<|>/).first
+        field.split(".").each_with_index do |item, i|
+            tmp = tmp[item]
+            if tmp.is_a? Array
+                tmp.each do |doc|
+                    result = []
+                    compound.each do |token|
+                        case token[0]
+                            when "and"
+                                result << "&&"
+                            when "or"
+                                result << "||"
+                            when  /not|\!/
+                                result << "!"
+                            when "statement"
+                                new_statement = token[1].split(".").last
+                                result << has_object?(doc, new_statement)
+                        end
+                    end
+                    fresult << eval(result.join(" "))
+                    (fresult << "||") unless doc == tmp.last
+                end
+                return eval(fresult.join(" "))
+            end
+        end
+    end
+    #Evaluates the call stack en returns true of selected document
+    #matches logical expression
+    def self.eval_statement(document, callstack)
+        result = []
+        callstack.each do |expression|
+            case expression.keys.first
+            when "statement"
+                if  expression.values.first.is_a? Array
+                    result << has_complex?(document, expression.values.first)
+                else
+                    result << has_object?(document, expression.values.first)
+                end
+            when "and"
+                result << "&&"
+            when "or"
+                result << "||"
+            when "("
+                result << "("
+            when ")"
+                result << ")"
+            when "not"
+                result << "!"
+            end
+        end
+        return eval(result.join(" "))
+    end
+end

data/lib/parser/parser.rb ADDED Viewed

@@ -0,0 +1,112 @@
+module JGrep
+    class Parser
+        attr_reader :scanner, :execution_stack
+        def initialize(args)
+            @scanner = Scanner.new(args)
+            @execution_stack = []
+            parse
+        end
+        # Parse the input string, one token at a time a contruct the call stack
+        def parse(substatement=nil, token_index = 0)
+            p_token,p_token_value = nil
+            unless substatement
+                c_token,c_token_value = @scanner.get_token
+            else
+                c_token,c_token_value = substatement[token_index]
+            end
+            parenth = 0
+            while (c_token != nil)
+                unless substatement
+                    @scanner.token_index += 1
+                    n_token, n_token_value = @scanner.get_token
+                else
+                    token_index += 1
+                    n_token, n_token_value = substatement[token_index]
+                end
+                unless n_token == " "
+                    case c_token
+                        when "and"
+                            unless (n_token =~ /not|statement|\(/) || (scanner.token_index == scanner.arguments.size)
+                                raise "Error at column #{scanner.token_index}. \nExpected 'not', 'statement' or '('. Found '#{n_token_value}'"
+                            end
+                            if p_token == nil
+                                raise "Error at column #{scanner.token_index}. \n Expression cannot start with 'and'"
+                            elsif (p_token == "and" || p_token == "or")
+                                raise "Error at column #{scanner.token_index}. \n #{p_token} cannot be followed by 'and'"
+                            end
+                        when "or"
+                            unless (n_token =~ /not|statement|\(/) || (scanner.token_index == scanner.arguments.size)
+                                raise "Error at column #{scanner.token_index}. \nExpected 'not', 'statement', '('. Found '#{n_token_value}'"
+                            end
+                            if p_token == nil
+                                raise "Error at column #{scanner.token_index}. \n Expression cannot start with 'or'"
+                            elsif (p_token == "and" || p_token == "or")
+                                raise "Error at column #{scanner.token_index}. \n #{p_token} cannot be followed by 'or'"
+                            end
+                        when "not"
+                            unless n_token =~ /statement|\(|not/
+                                raise "Error at column #{scanner.token_index}. \nExpected 'statement' or '('. Found '#{n_token_value}'"
+                            end
+                        when "statement"
+                            if c_token_value.is_a? Array
+                                if substatement
+                                    raise "Error at column #{scanner.token_index}\nError, cannot define '[' in a '[...]' block."
+                                end
+                                parse(c_token_value, 0)
+                            end
+                            unless n_token =~ /and|or|\)/
+                                unless n_token.nil?
+                                    raise "Error at column #{scanner.token_index}. \nExpected 'and', 'or', ')'. Found '#{n_token_value}'"
+                                end
+                            end
+                        when ")"
+                            unless (n_token =~ /|and|or|not|\(/)
+                                unless n_token.nil?
+                                    raise "Error at column #{scanner.token_index}. \nExpected 'and', 'or', 'not' or '('. Found '#{n_token_value}'"
+                                end
+                            end
+                            parenth += 1
+                        when "("
+                            unless n_token =~ /statement|not|\(/
+                                raise "Error at column #{scanner.token_index}. \nExpected 'statement', '(',  not. Found '#{n_token_value}'"
+                            end
+                            parenth -= 1
+                        else
+                            raise "Unexpected token found at column #{scanner.token_index}. '#{c_token_value}'"
+                    end
+                    unless n_token == " " || substatement
+                        @execution_stack << {c_token => c_token_value}
+                    end
+                    p_token, p_token_value = c_token, c_token_value
+                    c_token, c_token_value = n_token, n_token_value
+                end
+            end
+            return if substatement
+            if parenth < 0
+                raise "Error. Missing parentheses ')'."
+            elsif parenth > 0
+                raise "Error. Missing parentheses '('."
+            end
+        end
+    end
+end

data/lib/parser/scanner.rb ADDED Viewed

@@ -0,0 +1,146 @@
+module JGrep
+    class Scanner
+        attr_accessor :arguments, :token_index
+        def initialize(arguments)
+            @token_index = 0
+            @arguments = arguments
+        end
+        # Scans the input string and identifies single language tokens
+        def get_token
+            if @token_index >= @arguments.size
+                return nil
+            end
+            begin
+                case @arguments.split("")[@token_index]
+                    when "["
+                        return "statement", gen_substatement
+                    when "]"
+                        return "]"
+                    when "("
+                        return "(", "("
+                    when ")"
+                        return ")", ")"
+                    when "n"
+                        if (@arguments.split("")[@token_index + 1] == "o") && (@arguments.split("")[@token_index + 2] == "t") && ((@arguments.split("")[@token_index + 3] == " ") || (@arguments.split("")[@token_index + 3] == "("))
+                            @token_index += 2
+                            return "not", "not"
+                        else
+                            gen_statement
+                        end
+                    when "!"
+                        return "not", "not"
+                    when "a"
+                        if (@arguments.split("")[@token_index + 1] == "n") && (@arguments.split("")[@token_index + 2] == "d") && ((@arguments.split("")[@token_index + 3] == " ") || (@arguments.split("")[@token_index + 3] == "("))
+                            @token_index += 2
+                            return "and", "and"
+                        else
+                            gen_statement
+                        end
+                    when "o"
+                        if (@arguments.split("")[@token_index + 1] == "r") && ((@arguments.split("")[@token_index + 2] == " ") || (@arguments.split("")[@token_index + 2] == "("))
+                            @token_index += 1
+                            return "or", "or"
+                        else
+                            gen_statement
+                        end
+                    when " "
+                        return " ", " "
+                    else
+                        gen_statement
+                end
+            end
+        rescue NoMethodError => e
+            pp e
+            raise "Cannot end statement with 'and', 'or', 'not'"
+        end
+        private
+        def gen_substatement
+            @token_index += 1
+            returnval = []
+            while (val = get_token) != "]"
+                @token_index += 1
+                returnval << val unless val[0] == " "
+            end
+            return returnval
+        end
+        def gen_statement
+            current_token_value = ""
+            j = @token_index
+            begin
+                if (@arguments.split("")[j] == "/")
+                    begin
+                        current_token_value << @arguments.split("")[j]
+                        j += 1
+                        if @arguments.split("")[j] == "/"
+                            current_token_value << "/"
+                            break
+                        end
+                    end until (j >= @arguments.size) || (@arguments.split("")[j] =~ /\//)
+                #Recondsider this bit. Don't see there being a case for it. (Pieter)
+#                elsif (@arguments.split("")[j] =~ /=|<|>/)
+#                    while !(@arguments.split("")[j] =~ /=|<|>/)
+#                        current_token_value << @arguments.split("")[j]
+#                        j += 1
+#                    end
+#
+#                    current_token_value << @arguments.split("")[j]
+#                    j += 1
+#
+#                    if @arguments.split("")[j] == "/"
+#                        begin
+#                            current_token_value << @arguments.split("")[j]
+#                            j += 1
+#                            if @arguments.split("")[j] == "/"
+#                                current_token_value << "/"
+#                                 break
+#                            end
+#                        end until (j >= @arguments.size) || (@arguments.split("")[j] =~ /\//)
+#                    else
+#                        while (j < @arguments.size) && ((@arguments.split("")[j] != " ") && (@arguments.split("")[j] != ")") && @arguments.split("")[j] != "]" )
+#                            current_token_value << @arguments.split("")[j]
+#                            j += 1
+#                        end
+#                    end
+                else
+                    begin
+                        current_token_value << @arguments.split("")[j]
+                        j += 1
+                        if @arguments.split("")[j] =~ /'|"/
+                            begin
+                                current_token_value << @arguments.split("")[j]
+                                j +=1
+                            end until (j >= @arguments.size) || (@arguments.split("")[j] =~ /'|"/)
+                        end
+                    end until (j >= @arguments.size) || (@arguments.split("")[j] =~ /\s|\)|\]/)
+                end
+            rescue Exception => e
+                raise "Invalid token found - '#{current_token_value}'"
+            end
+            if current_token_value =~ /^(and|or|not|!)$/
+                raise "Class name cannot be 'and', 'or', 'not'. Found '#{current_token_value}'"
+            end
+            @token_index += current_token_value.size - 1
+            return "statement", current_token_value
+        end
+    end
+end

metadata ADDED Viewed

@@ -0,0 +1,67 @@
+--- !ruby/object:Gem::Specification
+name: jgrep
+version: !ruby/object:Gem::Version
+  prerelease: false
+  segments:
+  - 1
+  - 0
+  - 0
+  version: 1.0.0
+platform: ruby
+authors:
+- P Loubser
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2011-07-19 00:00:00 +01:00
+default_executable: jgrep
+dependencies: []
+description: Compare a list of json documents to a simple logical language and returns matches as output
+email:
+- ploubser@gmail.com
+executables:
+- jgrep
+extensions: []
+extra_rdoc_files: []
+files:
+- jgrep.gemspec
+- bin/jgrep
+- lib/jgrep.rb
+- lib/parser/parser.rb
+- lib/parser/scanner.rb
+has_rdoc: true
+homepage: https://github.com/psy1337/JSON-Grep
+licenses: []
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      segments:
+      - 0
+      version: "0"
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      segments:
+      - 0
+      version: "0"
+requirements: []
+rubyforge_project:
+rubygems_version: 1.3.6
+signing_key:
+specification_version: 3
+summary: Compare a list of json documents to a simple logical language and returns matches as output
+test_files: []