RubyGems - ruby-codex - Versions diffs - 0.0.1 - Mend

ruby-codex 0.0.1

Files changed (4) hide show

data/lib/codex.rb ADDED

@@ -0,0 +1,222 @@
+load 'data_node.rb'
+class Codex
+  attr_accessor :block, :func, :func_chain, :cond, :ident
+  def initialize(db,agg_db)
+    # helper procs
+    info = Proc.new do |node|
+      normal_node(node) do |n, norm|
+        norm.pretty_complexity.map { |k,v| v }.reduce(:+)
+      end
+    end
+    func_info = Proc.new do |node|
+      normal_node(node) do |n, norm|
+        norm.pretty_complexity[:send] || 0
+      end
+    end
+    type = Proc.new { |node| node.is_a?(AST::Node) ? node.type.to_s : node.class.to_s }
+    func_name = Proc.new do |node|
+      if type.call(node.children.first) == "const"
+        node.children.first.children[1].to_s + "." + node.children[1].to_s
+      else
+        node.children[1].to_s
+      end
+    end
+    key = {
+      :type => type
+    }
+    data_core = {
+      :file => Proc.new { |node, f, p| f },
+      :project => Proc.new { |node, f, p| p },
+      :line => Proc.new { |node, f, p| node.loc ? node.loc.line : nil },
+      :info => info,
+      :func_info => func_info,
+      :orig_code => Proc.new { |node, f, p| Unparser.unparse(node) rescue nil },
+    }
+    combine = {
+      :files => Proc.new { |v| v.map { |x| x[:file] }.uniq },
+      :file_count => Proc.new { |v| v.map { |x| x[:file] }.uniq.count },
+      :projects => Proc.new { |v| v.map { |x| x[:project] }.uniq },
+      :project_count => Proc.new { |v| v.map { |x| x[:project] }.uniq.count },
+      :orig_code => Proc.new { |v| v.sample(10).map do |x|
+          {:code => x[:orig_code], :file => x[:file], :line => x[:line]}
+        end.uniq },
+      :count => Proc.new { |v| v.map { |x| x[:orig_code] }.count },
+      :info => Proc.new { |v| v.first[:info] }, # some process may overwrite
+      :func_info => Proc.new { |v| v.first[:func_info] }
+    }
+    @block = DataNode.new(
+      db, agg_db,
+      Proc.new { |x| x.type == :block},
+      key.merge({
+        :func => Proc.new { |x| func_name.call(x.children.first) },
+        :body => Proc.new { |x| normalize_nodes(x.children.last) },
+        :arg_size => Proc.new { |x| x.children[1].children.size },
+        :ret_val => Proc.new do |x|
+            body = x.children.last
+            ret = type.call(body) == "begin" ? body.children.last : body
+            typ = type.call(ret)
+            typ == "send" ? func_name.call(ret) : typ
+          end,
+        :norm_code => Proc.new { |x|
+          normal_node(x) do |x|
+            Unparser.unparse(x.updated(nil, x.children.map.with_index do |y,i|
+              i == 0 ? without_caller(y) : y
+            end)) rescue nil
+          end }
+      }),
+      data_core.merge({
+        :args => Proc.new { |x| x.children[1].children.map{ |y| y.children[0].to_s }}
+      }),
+      combine.merge({
+        :args_list => Proc.new { |v| v.map { |x| x[:args] } }
+      }),
+      Proc.new { |db,keys,vals|
+        query = db.where(:type => keys[:type], :func => keys[:func], :ret_val => keys[:ret_val]).first
+        query_count = query.nil? ? 0 : query.count
+        blocks = db.where(:type => keys[:type], :func => keys[:func]).sum(:count)
+        rets = db.where(:type => keys[:type], :ret_val => keys[:ret_val]).sum(:count)
+        if query_count <= 0
+          { :keys => keys,
+            :message =>
+              "We've seen #{keys[:func]} blocks returning #{keys[:ret_val]} only #{query_count.to_s} " +
+              "times, though we've seen #{keys[:func]} blocks #{blocks.to_s} times and #{keys[:ret_val]} " +
+              "returned #{rets.to_s} times."
+          } if blocks > 0 && rets > 0
+        end
+      }
+    )
+    @func = DataNode.new(
+      db, agg_db,
+      Proc.new { |x| x.type == :send},
+      key.merge({
+        :func => Proc.new { |x| func_name.call(x) },
+        :norm_code => Proc.new { |x| normal_node(x) { |x| Unparser.unparse(without_caller(x)) rescue nil } },
+        :sig => Proc.new { |x| x.children.drop(2).map { |y| type.call(y) } }
+      }),
+      data_core,
+      combine,
+      Proc.new { |db,keys,values|
+        query = db.where(keys).first
+        query_count = query.nil? ? 0 : query.count
+        func = db.where(:type => keys[:type], :func => keys[:func]).sort(:count => -1).limit(1).first
+        alt_count = func.nil? ? 0 : func.count
+        { :keys => keys,
+          :message =>
+            "Function call #{keys[:norm_code]} has appeared #{query_count.to_s} times, but " +
+            "#{func.norm_code} has appeared #{alt_count.to_s} times."
+        } if alt_count > 10 * (query_count + 1)
+      }
+    )
+    @func_chain = func = DataNode.new(
+      db, agg_db,
+      Proc.new { |x| x.type == :send && type.call(x.children.first) == "send" },
+      key.merge({
+        :type => Proc.new { "func_chain" },
+        :f1 => Proc.new { |x| func_name.call(x) },
+        :f2 => Proc.new { |x| func_name.call(x.children.first) }
+      }),
+      data_core.merge({
+        :info => Proc.new { 0 },
+        :func_info => Proc.new { 0 }
+      }),
+      combine,
+      Proc.new do |db,keys,data|
+        query = db.where(keys).first
+        if query.nil? || query.count <= 0
+          fs = [:f1,:f2].map { |f| db.where(:type => "send", :func => keys[f]).size }
+          { :keys => keys,
+            :message =>
+              "Function #{keys[:f1]} has appeared #{fs[0].to_s} times " +
+              "and #{keys[:f2]} has appeared #{fs[1].to_s} times, but " +
+              "they haven't appeared together."
+          } unless fs[0] <= 0 || fs[1] <= 0
+        end
+      end
+    )
+    @cond = DataNode.new(
+      db, agg_db,
+      Proc.new { |x| x.type == :if },
+      key.merge({
+        :norm_code => Proc.new { |x| normalize_nodes(x) },
+        :cond => Proc.new { |x| normal_node(x) { |n| Unparser.unparse(n.children.first) }},
+        :iftrue => Proc.new { |x| normal_node(x) { |n| Unparser.unparse(n.children[1]) }},
+        :iffalse => Proc.new { |x| normal_node(x) { |n| Unparser.unparse(n.children[2]) }},
+      }),
+      data_core,
+      combine
+    )
+    @ident = DataNode.new(
+      db, agg_db,
+      Proc.new { |x| [:lvasgn, :ivasgn, :cvasgn, :gvasgn].include?(x.type) },
+      key.merge({
+        :type => Proc.new { "ident" },
+        :ident => Proc.new { |x| x.children.first.to_s },
+      }),
+      data_core.merge({
+        :ident_type => Proc.new { |x| type.call(x.children[1]) rescue nil },
+        :info => Proc.new { 0 },
+        :func_info => Proc.new { 0 }
+      }),
+      combine.merge({
+        :ident_types => Proc.new { |v| v.group_by { |y| y[:ident_type] }.map_hash { |x| x.size } }
+      }),
+      Proc.new { |db, keys, data|
+        query = db.where(keys).first
+        if query
+          types = query.ident_types.select { |k,v| ["str","int","float","array","hash"].include? k }
+          types.default = 0
+          pp types
+          first, second = types.sort_by{ |k,v| v*-1 }.take(2)
+          if first[1] > 10 * (second[1] + 1)
+            { :keys => keys,
+              :message =>
+                "The identifier #{keys[:ident]} has appeared #{first[1].to_s} " +
+                "times as #{first[0].to_s}, but only #{types[data[:ident_type]].to_s} " +
+                "times as #{data[:ident_type].to_s}"
+            } unless first[0].to_s == data[:ident_type]
+          end
+        end
+      }
+    )
+  end
+  def without_caller(node)
+    node.updated(nil, node.children.map.with_index do |x,i|
+      i == 0 ? nil : x
+    end)
+  end
+  def normal_node(node)
+    norm = ASTNormalizer.new
+    yield norm.rewrite_ast(node), norm
+  end
+  def normalize_nodes(nodes)
+    normal_node(nodes) do |n|
+      Unparser.unparse(n) rescue nil
+    end
+  end
+end
+class Hash
+  def map_hash
+    result = {}
+    self.each do |key, val|
+      result[key] = yield val
+    end
+    result
+  end
+end

data/lib/data_node.rb ADDED

@@ -0,0 +1,91 @@
+require 'parser/current'
+require 'unparser'
+require 'ast'
+require 'mongoid'
+load 'normalize_ast.rb'
+class DataNode
+  def initialize(db, agg_db, type, key_procs = {}, data_procs = {}, combine = {}, query = nil)
+    @type = type
+    @data = Hash.new { |h,k| h[k] = [] }
+    @combine = combine
+    @processed = {}
+    @key_procs = key_procs
+    @data_procs = data_procs
+    @db, @agg_db = db, agg_db
+    @query = query
+  end
+  def process_node(ast, file, project, &block)
+    if @type.call(ast)
+      keys = {}
+      data_point = {}
+      @key_procs.each do |name, proc|
+        keys[name] = proc.call(ast, file, project)
+      end
+      @data_procs.each do |name, proc|
+        data_point[name] = proc.call(ast, file, project)
+      end
+      yield keys, data_point if block
+    end
+  end
+  def add_ast(ast, file, project, &block)
+    process_node(ast, file, project) do |keys, data_point|
+      #join_keys = keys.map { |k,v| v }.join("-")
+      @data[keys].push(data_point)
+      yield keys, @data[keys] if block
+    end
+    @data
+  end
+  def process_all_ast
+    @processed = {}
+    @data.each do |k,v|
+      @processed[k] = collapse_data(v).merge(k)
+    end
+    @processed
+  end
+  def query(ast, file = "N/A", project = "N/A")
+    if @query
+      process_node(ast, file, project) do |keys, data|
+        unlikely = @query.call(@agg_db, keys, data)
+        unlikely ? unlikely : nil
+      end
+    end
+  end
+  def save!
+    process_all_ast
+    #@db.delete_all
+    #@agg_db.delete_all
+    count = 0
+    if !@key_procs.empty?
+      @processed.each do |k,v|
+        @agg_db.new(v).save
+        count += 1
+        puts count
+      end
+    end
+    # @data.each do |k,v|
+    #   v.each do |v_a|
+    #     @db.new(v_a.merge(k)).save
+    #     count += 1
+    #     puts count
+    #   end
+    # end
+    true
+  end
+  def collapse_data(data)
+    combined = {}
+    @combine.each do |k,proc|
+      combined[k] = proc.call(data)
+    end
+    combined
+  end
+end

data/lib/normalize_ast.rb ADDED

@@ -0,0 +1,107 @@
+require 'parser/current'
+require 'unparser'
+require 'ast'
+require 'pp'
+class NameTracker
+  def initialize
+    @var_hash = Hash.new { |h,k| h[k] = "var"+h.size.to_s }
+    @spt_hash = Hash.new { |h,k| h[k] = "*vr"+h.size.to_s }
+    @bar_hash = Hash.new { |h,k| h[k] = "&vr"+h.size.to_s }
+    @sym_hash = Hash.new { |h,k| h[k] = ("sym"+h.size.to_s).to_sym }
+    @str_hash = Hash.new { |h,k| h[k] = "str"+h.size.to_s }
+    @flt_hash = Hash.new { |h,k| h[k] = 0.0+h.size.to_f }
+    @int_hash = Hash.new { |h,k| h[k] = 0+h.size }
+    @mapping = {
+      :str => @str_hash,
+      :sym => @sym_hash,
+      :arg => @var_hash,
+      :float => @flt_hash,
+      :int => @int_hash,
+      :var => @var_hash,
+      :restarg => @spt_hash,
+      :blockarg => @bar_hash
+    }
+  end
+  def rename(type,id)
+    @mapping[type][id]
+  end
+end
+class ASTNormalizer
+  attr_accessor :complexity
+  def initialize
+    @track = NameTracker.new
+    @complexity = Hash.new { |h,k| h[k] = [] }
+  end
+  def update_complexity(type,val)
+    @complexity[type].push(val)
+  end
+  def pretty_complexity
+    measures, out = [:int, :str, :send, :var, :float, :sym], {}
+    @complexity.select { |k,v| measures.include?(k) }.each { |k,v| out[k] = v.size }
+    out
+  end
+  def rewrite_ast(ast)
+    if ast.is_a? AST::Node
+      type = ast.type
+      case type
+      # Variables
+      when :lvar, :ivar, :gvar
+        update_complexity(:var, ast.children.first)
+        ast.updated(nil, ast.children.map { |child| @track.rename(:var, child) })
+      # Assignment
+      when :lvasgn, :gvasgn, :ivasgn, :cvasgn
+        update_complexity(:assignment, ast.children.first)
+        ast.updated(nil, ast.children.map.with_index { |child,i|
+          i == 0 ? @track.rename(:var,child) : rewrite_ast(child)
+        })
+      # Primatives
+      when :int, :float, :str, :sym, :arg, :restarg, :blockarg
+        update_complexity(type, ast.children.first)
+        ast.updated(nil, ast.children.map { |child| @track.rename(type, child) })
+      when :optarg
+        update_complexity(:arg, ast.children.first)
+        ast.updated(nil, ast.children.map.with_index { |child,i|
+          if i == 0
+            @track.rename(:var, child)
+          else
+            rewrite_ast(child)
+          end
+        })
+      # Method definitions
+      when :def
+        update_complexity(:def, ast.children.first)
+        ast.updated(nil, ast.children.map.with_index { |child,i|
+          i == 0 ? :method : rewrite_ast(child)
+        })
+      when :defs
+        update_complexity(:def, ast.children.first)
+        ast.updated(nil, ast.children.map.with_index { |child,i|
+          i == 1 ? :method : rewrite_ast(child)
+        })
+      when :send
+        update_complexity(:send, ast.children[1])
+        ast.updated(nil, ast.children.map { |child| rewrite_ast(child) })
+      else
+        ast.updated(nil, ast.children.map { |child| rewrite_ast(child) })
+      end
+    else
+      ast
+    end
+  end
+end
+class ASTProcess
+  def store_nodes(ast, &block)
+    if ast.is_a? Parser::AST::Node
+      type = ast.type
+      yield ast, type
+      ast.children.each do |child|
+          store_nodes child, &block
+      end
+    end
+  end
+end

metadata ADDED

@@ -0,0 +1,48 @@
+--- !ruby/object:Gem::Specification
+name: ruby-codex
+version: !ruby/object:Gem::Version
+  version: 0.0.1
+  prerelease:
+platform: ruby
+authors:
+- Ethan Fast
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2013-07-31 00:00:00.000000000 Z
+dependencies: []
+description: A DSL for collecting and aggregating data on Ruby ASTs
+email: ejhfast@gmail.com
+executables: []
+extensions: []
+extra_rdoc_files: []
+files:
+- lib/normalize_ast.rb
+- lib/data_node.rb
+- lib/codex.rb
+homepage: http://rubygems.org/gems/normalize_ast
+licenses:
+- MIT
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  none: false
+  requirements:
+  - - ! '>='
+    - !ruby/object:Gem::Version
+      version: '0'
+required_rubygems_version: !ruby/object:Gem::Requirement
+  none: false
+  requirements:
+  - - ! '>='
+    - !ruby/object:Gem::Version
+      version: '0'
+requirements: []
+rubyforge_project:
+rubygems_version: 1.8.25
+signing_key:
+specification_version: 3
+summary: Analyze ruby ASTs with Codex
+test_files: []