RubyGems - jgrep - Versions diffs - 1.0.0 - Mend

jgrep 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

data/bin/jgrep ADDED Viewed

@@ -0,0 +1,16 @@
+#!/usr/bin/env ruby
+require 'jgrep'
+begin
+    raise "Please pass a valid arugment to jgrep" if ARGV == []
+rescue Exception => e
+    STDERR.puts e
+    exit 1
+end
+json = STDIN.read
+result = JGrep::jgrep((json), ARGV[0])
+puts result unless result == "[]"

data/jgrep.gemspec ADDED Viewed

@@ -0,0 +1,16 @@
+Gem::Specification.new do |s|
+    s.name = "jgrep"
+    s.version = "1.0.0"
+    s.authors = ["P Loubser"]
+    s.date = %q{2011-07-19}
+    s.default_executable = "jgrep"
+    s.description = "Compare a list of json documents to a simple logical language and returns matches as output"
+    s.email = ["ploubser@gmail.com"]
+    s.executables = ["jgrep"]
+    s.files = ["jgrep.gemspec", "bin/jgrep", Dir.glob("lib/*"), Dir.glob("lib/parser/*")].flatten
+    s.has_rdoc = true
+    s.homepage = "https://github.com/psy1337/JSON-Grep"
+    s.require_paths = ["lib"]
+    s.summary = s.description
+end

data/lib/jgrep.rb ADDED Viewed

@@ -0,0 +1,164 @@
+#! /usr/lib/env ruby
+require 'parser/parser.rb'
+require 'parser/scanner.rb'
+require 'rubygems'
+require 'json'
+module JGrep
+    #Method parses json and returns documents that match the logical expression
+    def self.jgrep(json, expression)
+        begin
+            call_stack = Parser.new(expression).execution_stack
+            result = []
+            json = JSON.parse(json)
+            json.each do |document|
+                if eval_statement(document, call_stack)
+                    result << document
+                end
+            end
+            return result.to_json
+        rescue NameError => e
+            var = e.to_s
+            STDERR.puts "Error. #{var.match(/`(.*)'/)} was not found in documents"
+            exit 1
+        rescue JSON::ParserError => e
+            STDERR.puts "Error. Invalid JSON given"
+            exit 1
+        end
+    end
+    #Correctly format values so we can do the correct type of comparison
+    def self.format(kvalue, value)
+        if kvalue =~ /^\d+$/ || value =~ /^\d+$/
+            return Integer(kvalue), Integer(value)
+        elsif kvalue =~ /^\d+.\d+$/ || value =~ /^\d+.\d+$/
+            return Float(kvalue), Float(value)
+        else
+            return kvalue, value
+        end
+    end
+    #Check if key=value is present in document
+    def self.has_object?(document, statement)
+        key,value = statement.split(/<=|>=|=|<|>/)
+        if statement =~ /(<=|>=|<|>|=)/
+            op = $1
+        else
+            op = statement
+        end
+        tmp = document
+        key.split(".").each_with_index do |item,i|
+            tmp = tmp[item]
+            result = false
+            if tmp.is_a? Array
+                return (is_object_in_array?(tmp, "#{key.split(".")[i+1]}#{op}#{value}"))
+            end
+        end
+        tmp, value = format(tmp, value.gsub(/"|'/, ""))
+        case op
+        when "="
+            (tmp == value) ? (return true) : (return false)
+        when "<="
+            (tmp <= value) ? (return true) : (return false)
+        when ">="
+            (tmp >= value) ? (return true) : (return false)
+        when ">"
+            (tmp > value) ? (return true) : (return false)
+        when "<"
+            (tmp < value) ? (return true) : (return false)
+        end
+    end
+    #Check if key=value is present in a sub array
+    def self.is_object_in_array?(document, statement)
+        document.each do |item|
+            if has_object?(item,statement)
+                return true
+           end
+        end
+        return false
+    end
+    #Check if complex statement (defined as [key=value...]) is
+    #present over an array of key value pairs
+    def self.has_complex?(document, compound)
+        field = ""
+        tmp = document
+        result = []
+        fresult = []
+        compound.each do |token|
+            if token[0] == "statement"
+                field = token
+                break
+            end
+        end
+        field = field[1].first.split(/=|<|>/).first
+        field.split(".").each_with_index do |item, i|
+            tmp = tmp[item]
+            if tmp.is_a? Array
+                tmp.each do |doc|
+                    result = []
+                    compound.each do |token|
+                        case token[0]
+                            when "and"
+                                result << "&&"
+                            when "or"
+                                result << "||"
+                            when  /not|\!/
+                                result << "!"
+                            when "statement"
+                                new_statement = token[1].split(".").last
+                                result << has_object?(doc, new_statement)
+                        end
+                    end
+                    fresult << eval(result.join(" "))
+                    (fresult << "||") unless doc == tmp.last
+                end
+                return eval(fresult.join(" "))
+            end
+        end
+    end
+    #Evaluates the call stack en returns true of selected document
+    #matches logical expression
+    def self.eval_statement(document, callstack)
+        result = []
+        callstack.each do |expression|
+            case expression.keys.first
+            when "statement"
+                if  expression.values.first.is_a? Array
+                    result << has_complex?(document, expression.values.first)
+                else
+                    result << has_object?(document, expression.values.first)
+                end
+            when "and"
+                result << "&&"
+            when "or"
+                result << "||"
+            when "("
+                result << "("
+            when ")"
+                result << ")"
+            when "not"
+                result << "!"
+            end
+        end
+        return eval(result.join(" "))
+    end
+end

data/lib/parser/parser.rb ADDED Viewed

@@ -0,0 +1,112 @@
+module JGrep
+    class Parser
+        attr_reader :scanner, :execution_stack
+        def initialize(args)
+            @scanner = Scanner.new(args)
+            @execution_stack = []
+            parse
+        end
+        # Parse the input string, one token at a time a contruct the call stack
+        def parse(substatement=nil, token_index = 0)
+            p_token,p_token_value = nil
+            unless substatement
+                c_token,c_token_value = @scanner.get_token
+            else
+                c_token,c_token_value = substatement[token_index]
+            end
+            parenth = 0
+            while (c_token != nil)
+                unless substatement
+                    @scanner.token_index += 1
+                    n_token, n_token_value = @scanner.get_token
+                else
+                    token_index += 1
+                    n_token, n_token_value = substatement[token_index]
+                end
+                unless n_token == " "
+                    case c_token
+                        when "and"
+                            unless (n_token =~ /not|statement|\(/) || (scanner.token_index == scanner.arguments.size)
+                                raise "Error at column #{scanner.token_index}. \nExpected 'not', 'statement' or '('. Found '#{n_token_value}'"
+                            end
+                            if p_token == nil
+                                raise "Error at column #{scanner.token_index}. \n Expression cannot start with 'and'"
+                            elsif (p_token == "and" || p_token == "or")
+                                raise "Error at column #{scanner.token_index}. \n #{p_token} cannot be followed by 'and'"
+                            end
+                        when "or"
+                            unless (n_token =~ /not|statement|\(/) || (scanner.token_index == scanner.arguments.size)
+                                raise "Error at column #{scanner.token_index}. \nExpected 'not', 'statement', '('. Found '#{n_token_value}'"
+                            end
+                            if p_token == nil
+                                raise "Error at column #{scanner.token_index}. \n Expression cannot start with 'or'"
+                            elsif (p_token == "and" || p_token == "or")
+                                raise "Error at column #{scanner.token_index}. \n #{p_token} cannot be followed by 'or'"
+                            end
+                        when "not"
+                            unless n_token =~ /statement|\(|not/
+                                raise "Error at column #{scanner.token_index}. \nExpected 'statement' or '('. Found '#{n_token_value}'"
+                            end
+                        when "statement"
+                            if c_token_value.is_a? Array
+                                if substatement
+                                    raise "Error at column #{scanner.token_index}\nError, cannot define '[' in a '[...]' block."
+                                end
+                                parse(c_token_value, 0)
+                            end
+                            unless n_token =~ /and|or|\)/
+                                unless n_token.nil?
+                                    raise "Error at column #{scanner.token_index}. \nExpected 'and', 'or', ')'. Found '#{n_token_value}'"
+                                end
+                            end
+                        when ")"
+                            unless (n_token =~ /|and|or|not|\(/)
+                                unless n_token.nil?
+                                    raise "Error at column #{scanner.token_index}. \nExpected 'and', 'or', 'not' or '('. Found '#{n_token_value}'"
+                                end
+                            end
+                            parenth += 1
+                        when "("
+                            unless n_token =~ /statement|not|\(/
+                                raise "Error at column #{scanner.token_index}. \nExpected 'statement', '(',  not. Found '#{n_token_value}'"
+                            end
+                            parenth -= 1
+                        else
+                            raise "Unexpected token found at column #{scanner.token_index}. '#{c_token_value}'"
+                    end
+                    unless n_token == " " || substatement
+                        @execution_stack << {c_token => c_token_value}
+                    end
+                    p_token, p_token_value = c_token, c_token_value
+                    c_token, c_token_value = n_token, n_token_value
+                end
+            end
+            return if substatement
+            if parenth < 0
+                raise "Error. Missing parentheses ')'."
+            elsif parenth > 0
+                raise "Error. Missing parentheses '('."
+            end
+        end
+    end
+end

data/lib/parser/scanner.rb ADDED Viewed

@@ -0,0 +1,146 @@
+module JGrep
+    class Scanner
+        attr_accessor :arguments, :token_index
+        def initialize(arguments)
+            @token_index = 0
+            @arguments = arguments
+        end
+        # Scans the input string and identifies single language tokens
+        def get_token
+            if @token_index >= @arguments.size
+                return nil
+            end
+            begin
+                case @arguments.split("")[@token_index]
+                    when "["
+                        return "statement", gen_substatement
+                    when "]"
+                        return "]"
+                    when "("
+                        return "(", "("
+                    when ")"
+                        return ")", ")"
+                    when "n"
+                        if (@arguments.split("")[@token_index + 1] == "o") && (@arguments.split("")[@token_index + 2] == "t") && ((@arguments.split("")[@token_index + 3] == " ") || (@arguments.split("")[@token_index + 3] == "("))
+                            @token_index += 2
+                            return "not", "not"
+                        else
+                            gen_statement
+                        end
+                    when "!"
+                        return "not", "not"
+                    when "a"
+                        if (@arguments.split("")[@token_index + 1] == "n") && (@arguments.split("")[@token_index + 2] == "d") && ((@arguments.split("")[@token_index + 3] == " ") || (@arguments.split("")[@token_index + 3] == "("))
+                            @token_index += 2
+                            return "and", "and"
+                        else
+                            gen_statement
+                        end
+                    when "o"
+                        if (@arguments.split("")[@token_index + 1] == "r") && ((@arguments.split("")[@token_index + 2] == " ") || (@arguments.split("")[@token_index + 2] == "("))
+                            @token_index += 1
+                            return "or", "or"
+                        else
+                            gen_statement
+                        end
+                    when " "
+                        return " ", " "
+                    else
+                        gen_statement
+                end
+            end
+        rescue NoMethodError => e
+            pp e
+            raise "Cannot end statement with 'and', 'or', 'not'"
+        end
+        private
+        def gen_substatement
+            @token_index += 1
+            returnval = []
+            while (val = get_token) != "]"
+                @token_index += 1
+                returnval << val unless val[0] == " "
+            end
+            return returnval
+        end
+        def gen_statement
+            current_token_value = ""
+            j = @token_index
+            begin
+                if (@arguments.split("")[j] == "/")
+                    begin
+                        current_token_value << @arguments.split("")[j]
+                        j += 1
+                        if @arguments.split("")[j] == "/"
+                            current_token_value << "/"
+                            break
+                        end
+                    end until (j >= @arguments.size) || (@arguments.split("")[j] =~ /\//)
+                #Recondsider this bit. Don't see there being a case for it. (Pieter)
+#                elsif (@arguments.split("")[j] =~ /=|<|>/)
+#                    while !(@arguments.split("")[j] =~ /=|<|>/)
+#                        current_token_value << @arguments.split("")[j]
+#                        j += 1
+#                    end
+#
+#                    current_token_value << @arguments.split("")[j]
+#                    j += 1
+#
+#                    if @arguments.split("")[j] == "/"
+#                        begin
+#                            current_token_value << @arguments.split("")[j]
+#                            j += 1
+#                            if @arguments.split("")[j] == "/"
+#                                current_token_value << "/"
+#                                 break
+#                            end
+#                        end until (j >= @arguments.size) || (@arguments.split("")[j] =~ /\//)
+#                    else
+#                        while (j < @arguments.size) && ((@arguments.split("")[j] != " ") && (@arguments.split("")[j] != ")") && @arguments.split("")[j] != "]" )
+#                            current_token_value << @arguments.split("")[j]
+#                            j += 1
+#                        end
+#                    end
+                else
+                    begin
+                        current_token_value << @arguments.split("")[j]
+                        j += 1
+                        if @arguments.split("")[j] =~ /'|"/
+                            begin
+                                current_token_value << @arguments.split("")[j]
+                                j +=1
+                            end until (j >= @arguments.size) || (@arguments.split("")[j] =~ /'|"/)
+                        end
+                    end until (j >= @arguments.size) || (@arguments.split("")[j] =~ /\s|\)|\]/)
+                end
+            rescue Exception => e
+                raise "Invalid token found - '#{current_token_value}'"
+            end
+            if current_token_value =~ /^(and|or|not|!)$/
+                raise "Class name cannot be 'and', 'or', 'not'. Found '#{current_token_value}'"
+            end
+            @token_index += current_token_value.size - 1
+            return "statement", current_token_value
+        end
+    end
+end

metadata ADDED Viewed

@@ -0,0 +1,67 @@
+--- !ruby/object:Gem::Specification
+name: jgrep
+version: !ruby/object:Gem::Version
+  prerelease: false
+  segments:
+  - 1
+  - 0
+  - 0
+  version: 1.0.0
+platform: ruby
+authors:
+- P Loubser
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2011-07-19 00:00:00 +01:00
+default_executable: jgrep
+dependencies: []
+description: Compare a list of json documents to a simple logical language and returns matches as output
+email:
+- ploubser@gmail.com
+executables:
+- jgrep
+extensions: []
+extra_rdoc_files: []
+files:
+- jgrep.gemspec
+- bin/jgrep
+- lib/jgrep.rb
+- lib/parser/parser.rb
+- lib/parser/scanner.rb
+has_rdoc: true
+homepage: https://github.com/psy1337/JSON-Grep
+licenses: []
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      segments:
+      - 0
+      version: "0"
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      segments:
+      - 0
+      version: "0"
+requirements: []
+rubyforge_project:
+rubygems_version: 1.3.6
+signing_key:
+specification_version: 3
+summary: Compare a list of json documents to a simple logical language and returns matches as output
+test_files: []