RubyGems - nose - Versions diffs - 0.1.0pre - Mend

nose 0.1.0pre

Files changed (55) hide show

checksums.yaml +7 -0
data/lib/nose/backend/cassandra.rb +390 -0
data/lib/nose/backend/file.rb +185 -0
data/lib/nose/backend/mongo.rb +242 -0
data/lib/nose/backend.rb +557 -0
data/lib/nose/cost/cassandra.rb +33 -0
data/lib/nose/cost/entity_count.rb +27 -0
data/lib/nose/cost/field_size.rb +31 -0
data/lib/nose/cost/request_count.rb +32 -0
data/lib/nose/cost.rb +68 -0
data/lib/nose/debug.rb +45 -0
data/lib/nose/enumerator.rb +199 -0
data/lib/nose/indexes.rb +239 -0
data/lib/nose/loader/csv.rb +99 -0
data/lib/nose/loader/mysql.rb +199 -0
data/lib/nose/loader/random.rb +48 -0
data/lib/nose/loader/sql.rb +105 -0
data/lib/nose/loader.rb +38 -0
data/lib/nose/model/entity.rb +136 -0
data/lib/nose/model/fields.rb +293 -0
data/lib/nose/model.rb +113 -0
data/lib/nose/parser.rb +202 -0
data/lib/nose/plans/execution_plan.rb +282 -0
data/lib/nose/plans/filter.rb +99 -0
data/lib/nose/plans/index_lookup.rb +302 -0
data/lib/nose/plans/limit.rb +42 -0
data/lib/nose/plans/query_planner.rb +361 -0
data/lib/nose/plans/sort.rb +49 -0
data/lib/nose/plans/update.rb +60 -0
data/lib/nose/plans/update_planner.rb +270 -0
data/lib/nose/plans.rb +135 -0
data/lib/nose/proxy/mysql.rb +275 -0
data/lib/nose/proxy.rb +102 -0
data/lib/nose/query_graph.rb +481 -0
data/lib/nose/random/barbasi_albert.rb +48 -0
data/lib/nose/random/watts_strogatz.rb +50 -0
data/lib/nose/random.rb +391 -0
data/lib/nose/schema.rb +89 -0
data/lib/nose/search/constraints.rb +143 -0
data/lib/nose/search/problem.rb +328 -0
data/lib/nose/search/results.rb +200 -0
data/lib/nose/search.rb +266 -0
data/lib/nose/serialize.rb +747 -0
data/lib/nose/statements/connection.rb +160 -0
data/lib/nose/statements/delete.rb +83 -0
data/lib/nose/statements/insert.rb +146 -0
data/lib/nose/statements/query.rb +161 -0
data/lib/nose/statements/update.rb +101 -0
data/lib/nose/statements.rb +645 -0
data/lib/nose/timing.rb +79 -0
data/lib/nose/util.rb +305 -0
data/lib/nose/workload.rb +244 -0
data/lib/nose.rb +37 -0
data/templates/workload.erb +42 -0
metadata +700 -0

checksums.yaml ADDED Viewed

@@ -0,0 +1,7 @@
+---
+SHA1:
+  metadata.gz: 0cbc98973cefc286e5457924f9f314d79f8e00e2
+  data.tar.gz: f3182e0f7583fb068ddd3a679f1ad4ed34af4548
+SHA512:
+  metadata.gz: c0e4526807ab3d17ca001361b6bbee74afeb94df64d54886f15eaac9045a460d2cd7109e56d6d567d2636267e992f9e4195f4d240bff4975545239d3843cf8fb
+  data.tar.gz: 9445a603f50c33bfcf2d055a48cac3c0ded0763a6631100a33f16c8e3c92b01beba512725a5f73b0a8671a63bd4a45dea44eb60fb42a82cc454915fecbd046a9

data/lib/nose/backend/cassandra.rb ADDED Viewed

@@ -0,0 +1,390 @@
+# frozen_string_literal: true
+require 'cassandra'
+require 'zlib'
+module NoSE
+  module Backend
+    # A backend which communicates with Cassandra via CQL
+    class CassandraBackend < BackendBase
+      def initialize(model, indexes, plans, update_plans, config)
+        super
+        @hosts = config[:hosts]
+        @port = config[:port]
+        @keyspace = config[:keyspace]
+        @generator = Cassandra::Uuid::Generator.new
+      end
+      # Generate a random UUID
+      def generate_id
+        @generator.uuid
+      end
+      # Produce the DDL necessary for column families for the given indexes
+      # and optionally execute them against the server
+      def indexes_ddl(execute = false, skip_existing = false,
+                      drop_existing = false)
+        Enumerator.new do |enum|
+          @indexes.map do |index|
+            ddl = index_cql index
+            enum.yield ddl
+            begin
+              drop_index(index) if drop_existing && index_exists?(index)
+              client.execute(ddl) if execute
+            rescue Cassandra::Errors::AlreadyExistsError => exc
+              next if skip_existing
+              new_exc = IndexAlreadyExists.new exc.message
+              new_exc.set_backtrace exc.backtrace
+              raise new_exc
+            end
+          end
+        end
+      end
+      # Insert a chunk of rows into an index
+      # @return [Array<Array<Cassandra::Uuid>>]
+      def index_insert_chunk(index, chunk)
+        fields = index.all_fields.to_a
+        prepared = "INSERT INTO \"#{index.key}\" (" \
+                   "#{field_names fields}" \
+                   ") VALUES (#{(['?'] * fields.length).join ', '})"
+        prepared = client.prepare prepared
+        ids = []
+        client.execute(client.batch do |batch|
+          chunk.each do |row|
+            index_row = index_row(row, fields)
+            ids << (index.hash_fields.to_a + index.order_fields).map do |field|
+              index_row[fields.index field]
+            end
+            batch.add prepared, arguments: index_row
+          end
+        end)
+        ids
+      end
+      # Check if the given index is empty
+      def index_empty?(index)
+        query = "SELECT COUNT(*) FROM \"#{index.key}\" LIMIT 1"
+        client.execute(query).first.values.first.zero?
+      end
+      # Check if a given index exists in the target database
+      def index_exists?(index)
+        client
+        @cluster.keyspace(@keyspace).has_table? index.key
+      end
+      # Check if a given index exists in the target database
+      def drop_index(index)
+        client.execute "DROP TABLE \"#{index.key}\""
+      end
+      # Sample a number of values from the given index
+      def index_sample(index, count)
+        field_list = index.all_fields.map { |f| "\"#{f.id}\"" }
+        query = "SELECT #{field_list.join ', '} " \
+                "FROM \"#{index.key}\" LIMIT #{count}"
+        rows = client.execute(query).rows
+        # XXX Ignore null values for now
+        # fail if rows.any? { |row| row.values.any?(&:nil?) }
+        rows
+      end
+      private
+      # Produce an array of fields in the correct order for a CQL insert
+      # @return [Array]
+      def index_row(row, fields)
+        fields.map do |field|
+          value = row[field.id]
+          if field.is_a?(Fields::IDField)
+            value = case value
+                    when Numeric
+                      Cassandra::Uuid.new value.to_i
+                    when String
+                      Cassandra::Uuid.new value
+                    when nil
+                      Cassandra::Uuid::Generator.new.uuid
+                    else
+                      value
+                    end
+          end
+          value
+        end
+      end
+      # Produce the CQL to create the definition for a given index
+      # @return [String]
+      def index_cql(index)
+        ddl = "CREATE COLUMNFAMILY \"#{index.key}\" (" \
+          "#{field_names index.all_fields, true}, " \
+          "PRIMARY KEY((#{field_names index.hash_fields})"
+        cluster_key = index.order_fields
+        ddl += ", #{field_names cluster_key}" unless cluster_key.empty?
+        ddl += '));'
+        ddl
+      end
+      # Get a comma-separated list of field names with optional types
+      # @return [String]
+      def field_names(fields, types = false)
+        fields.map do |field|
+          name = "\"#{field.id}\""
+          name += ' ' + cassandra_type(field.class).to_s if types
+          name
+        end.join ', '
+      end
+      # Get a Cassandra client, connecting if not done already
+      def client
+        return @client unless @client.nil?
+        @cluster = Cassandra.cluster hosts: @hosts, port: @port,
+                                     timeout: nil
+        @client = @cluster.connect @keyspace
+      end
+      # Return the datatype to use in Cassandra for a given field
+      # @return [Symbol]
+      def cassandra_type(field_class)
+        case [field_class]
+        when [Fields::IntegerField]
+          :int
+        when [Fields::FloatField]
+          :float
+        when [Fields::StringField]
+          :text
+        when [Fields::DateField]
+          :timestamp
+        when [Fields::IDField],
+             [Fields::ForeignKeyField]
+          :uuid
+        end
+      end
+      # Insert data into an index on the backend
+      class InsertStatementStep < BackendBase::InsertStatementStep
+        def initialize(client, index, fields)
+          super
+          @fields = fields.map(&:id) & index.all_fields.map(&:id)
+          @prepared = client.prepare insert_cql
+          @generator = Cassandra::Uuid::Generator.new
+        end
+        # Insert each row into the index
+        def process(results)
+          results.each do |result|
+            fields = @index.all_fields.select { |field| result.key? field.id }
+            values = fields.map do |field|
+              value = result[field.id]
+              # If this is an ID, generate or construct a UUID object
+              if field.is_a?(Fields::IDField)
+                value = if value.nil?
+                          @generator.uuid
+                        else
+                          Cassandra::Uuid.new(value.to_i)
+                        end
+              end
+              # XXX Useful to test that we never insert null values
+              # fail if value.nil?
+              value
+            end
+            begin
+              @client.execute(@prepared, arguments: values)
+            rescue Cassandra::Errors::InvalidError
+              # We hit a value which does not actually need to be
+              # inserted based on the data since some foreign
+              # key in the graph corresponding to this column
+              # family does not exist
+              nil
+            end
+          end
+        end
+        private
+        # The CQL used to insert the fields into the index
+        def insert_cql
+          insert = "INSERT INTO #{@index.key} ("
+          insert += @fields.map { |f| "\"#{f}\"" }.join(', ')
+          insert << ') VALUES (' << (['?'] * @fields.length).join(', ') + ')'
+          insert
+        end
+      end
+      # Delete data from an index on the backend
+      class DeleteStatementStep < BackendBase::DeleteStatementStep
+        def initialize(client, index)
+          super
+          @index_keys = @index.hash_fields + @index.order_fields.to_set
+          # Prepare the statement required to perform the deletion
+          delete = "DELETE FROM #{index.key} WHERE "
+          delete += @index_keys.map { |key| "\"#{key.id}\" = ?" }.join(' AND ')
+          @prepared = client.prepare delete
+        end
+        # Execute the delete for a given set of keys
+        def process(results)
+          # Delete each row from the index
+          results.each do |result|
+            values = delete_values result
+            @client.execute(@prepared, arguments: values)
+          end
+        end
+        private
+        # Get the values used in the WHERE clause for a CQL DELETE
+        def delete_values(result)
+          @index_keys.map do |key|
+            cur_field = @index.all_fields.find { |field| field.id == key.id }
+            if cur_field.is_a?(Fields::IDField)
+              Cassandra::Uuid.new(result[key.id].to_i)
+            else
+              result[key.id]
+            end
+          end
+        end
+      end
+      # A query step to look up data from a particular column family
+      class IndexLookupStatementStep < BackendBase::IndexLookupStatementStep
+        # rubocop:disable Metrics/ParameterLists
+        def initialize(client, select, conditions, step, next_step, prev_step)
+          super
+          @logger = Logging.logger['nose::backend::cassandra::indexlookupstep']
+          # TODO: Check if we can apply the next filter via ALLOW FILTERING
+          @prepared = client.prepare select_cql(select, conditions)
+        end
+        # rubocop:enable Metrics/ParameterLists
+        # Perform a column family lookup in Cassandra
+        def process(conditions, results)
+          results = initial_results(conditions) if results.nil?
+          condition_list = result_conditions conditions, results
+          new_result = fetch_all_queries condition_list, results
+          # Limit the size of the results in case we fetched multiple keys
+          new_result[0..(@step.limit.nil? ? -1 : @step.limit)]
+        end
+        private
+        # Produce the select CQL statement for a provided set of fields
+        # @return [String]
+        def select_cql(select, conditions)
+          select = expand_selected_fields select
+          cql = "SELECT #{select.map { |f| "\"#{f.id}\"" }.join ', '} FROM " \
+                "\"#{@step.index.key}\" WHERE #{cql_where_clause conditions}"
+          cql += cql_order_by
+          # Add an optional limit
+          cql << " LIMIT #{@step.limit}" unless @step.limit.nil?
+          cql
+        end
+        # Produce a CQL where clause using the given conditions
+        # @return [String]
+        def cql_where_clause(conditions)
+          where = @eq_fields.map do |field|
+            "\"#{field.id}\" = ?"
+          end.join ' AND '
+          unless @range_field.nil?
+            condition = conditions.each_value.find(&:range?)
+            where << " AND \"#{condition.field.id}\" #{condition.operator} ?"
+          end
+          where
+        end
+        # Produce the CQL ORDER BY clause for this step
+        # @return [String]
+        def cql_order_by
+          # TODO: CQL3 requires all clustered columns before the one actually
+          #       ordered on also be specified
+          #
+          #       Example:
+          #
+          #         SELECT * FROM cf WHERE id=? AND col1=? ORDER by col1, col2
+          return '' if @step.order_by.empty?
+          ' ORDER BY ' + @step.order_by.map { |f| "\"#{f.id}\"" }.join(', ')
+        end
+        # Lookup values from an index selecting the given
+        # fields and filtering on the given conditions
+        def fetch_all_queries(condition_list, results)
+          new_result = []
+          @logger.debug { "  #{@prepared.cql} * #{condition_list.size}" }
+          # TODO: Chain enumerables of results instead
+          # Limit the total number of queries as well as the query limit
+          condition_list.zip(results).each do |condition_set, result|
+            # Loop over all pages to fetch results
+            values = lookup_values condition_set
+            fetch_query_pages values, new_result, result
+            # Don't continue with further queries
+            break if !@step.limit.nil? && new_result.length >= @step.limit
+          end
+          @logger.debug "Total result size = #{new_result.size}"
+          new_result
+        end
+        # Get the necessary pages of results for a given list of values
+        def fetch_query_pages(values, new_result, result)
+          new_results = @client.execute(@prepared, arguments: values)
+          loop do
+            # Add the previous results to each row
+            rows = new_results.map { |row| result.merge row }
+            # XXX Ignore null values in results for now
+            # fail if rows.any? { |row| row.values.any?(&:nil?) }
+            new_result.concat rows
+            break if new_results.last_page? ||
+                     (!@step.limit.nil? && result.length >= @step.limit)
+            new_results = new_results.next_page
+            @logger.debug "Fetched #{result.length} results"
+          end
+        end
+        # Produce the values used for lookup on a given set of conditions
+        def lookup_values(condition_set)
+          condition_set.map do |condition|
+            value = condition.value ||
+                    conditions[condition.field.id].value
+            fail if value.nil?
+            if condition.field.is_a?(Fields::IDField)
+              Cassandra::Uuid.new(value.to_i)
+            else
+              value
+            end
+          end
+        end
+      end
+    end
+  end
+end

data/lib/nose/backend/file.rb ADDED Viewed

@@ -0,0 +1,185 @@
+# frozen_string_literal: true
+module NoSE
+  module Backend
+    # Simple backend which persists data to a file
+    class FileBackend < BackendBase
+      def initialize(model, indexes, plans, update_plans, config)
+        super
+        # Try to load data from file or start fresh
+        @index_data = if !config[:file].nil? && File.file?(config[:file])
+                        Marshal.load File.open(config[:file])
+                      else
+                        {}
+                      end
+        # Ensure the data is saved when we exit
+        ObjectSpace.define_finalizer self, self.class.finalize(@index_data,
+                                                               config[:file])
+      end
+      # Save data when the object is destroyed
+      def self.finalize(index_data, file)
+        proc do
+          Marshal.dump(index_data, File.open(file, 'w'))
+        end
+      end
+      # Check for an empty array for the data
+      def index_empty?(index)
+        !index_exists?(index) || @index_data[index.key].empty?
+      end
+      # Check if we have prepared space for this index
+      def index_exists?(index)
+        @index_data.key? index.key
+      end
+      # @abstract Subclasses implement to allow inserting
+      def index_insert_chunk(index, chunk)
+        @index_data[index.key].concat chunk
+      end
+      # Generate a simple UUID
+      def generate_id
+        SecureRandom.uuid
+      end
+      # Allocate space for data on the new indexes
+      def indexes_ddl(execute = false, skip_existing = false,
+                      drop_existing = false)
+        @indexes.each do |index|
+          # Do the appropriate behaviour based on the flags passed in
+          if index_exists?(index)
+            next if skip_existing
+            fail unless drop_existing
+          end
+          @index_data[index.key] = []
+        end if execute
+        # We just use the original index definition as DDL
+        @indexes.map(&:inspect)
+      end
+      # Sample a number of values from the given index
+      def index_sample(index, count)
+        data = @index_data[index.key]
+        data.nil? ? [] : data.sample(count)
+      end
+      # We just produce the data here which can be manipulated as needed
+      # @return [Hash]
+      def client
+        @index_data
+      end
+      # Provide some helper functions which allow the matching of rows
+      # based on a set of list of conditions
+      module RowMatcher
+        # Check if a row matches the given condition
+        # @return [Boolean]
+        def row_matches?(row, conditions)
+          row_matches_eq?(row, conditions) &&
+            row_matches_range?(row, conditions)
+        end
+        # Check if a row matches the given condition on equality predicates
+        # @return [Boolean]
+        def row_matches_eq?(row, conditions)
+          @eq_fields.all? do |field|
+            row[field.id] == conditions.find { |c| c.field == field }.value
+          end
+        end
+        # Check if a row matches the given condition on the range predicate
+        # @return [Boolean]
+        def row_matches_range?(row, conditions)
+          return true if @range_field.nil?
+          range_cond = conditions.find { |c| c.field == @range_field }
+          row[@range_field.id].send range_cond.operator, range_cond.value
+        end
+      end
+      # Look up data on an index in the backend
+      class IndexLookupStatementStep < BackendBase::IndexLookupStatementStep
+        include RowMatcher
+        # Filter all the rows in the specified index to those requested
+        def process(conditions, results)
+          # Get the set of conditions we need to process
+          results = initial_results(conditions) if results.nil?
+          condition_list = result_conditions conditions, results
+          # Loop through all rows to find the matching ones
+          rows = @client[@index.key] || []
+          selected = condition_list.flat_map do |condition|
+            rows.select { |row| row_matches? row, condition }
+          end.compact
+          # Apply the limit and only return selected fields
+          field_ids = Set.new @step.fields.map(&:id).to_set
+          selected[0..(@step.limit.nil? ? -1 : @step.limit)].map do |row|
+            row.select { |k, _| field_ids.include? k }
+          end
+        end
+      end
+      # Insert data into an index on the backend
+      class InsertStatementStep < BackendBase::InsertStatementStep
+        # Add new rows to the index
+        def process(results)
+          key_ids = (@index.hash_fields + @index.order_fields).map(&:id).to_set
+          results.each do |row|
+            # Pick out primary key fields we can use to match
+            conditions = row.select do |field_id|
+              key_ids.include? field_id
+            end
+            # If we have all the primary keys, check for a match
+            if conditions.length == key_ids.length
+              # Try to find a row with this ID and update it
+              matching_row = @client[index.key].find do |index_row|
+                index_row.merge(conditions) == index_row
+              end
+              unless matching_row.nil?
+                matching_row.merge! row
+                next
+              end
+            end
+            # Populate IDs as needed
+            key_ids.each do |key_id|
+              row[key_id] = SecureRandom.uuid if row[key_id].nil?
+            end
+            @client[index.key] << row
+          end
+        end
+      end
+      # Delete data from an index on the backend
+      class DeleteStatementStep < BackendBase::DeleteStatementStep
+        include RowMatcher
+        # Remove rows matching the results from the dataset
+        def process(results)
+          # Loop over all rows
+          @client[index.key].reject! do |row|
+            # Check against all results
+            results.any? do |result|
+              # If all fields match, drop the row
+              result.all? do |field, value|
+                row[field] == value
+              end
+            end
+          end
+        end
+      end
+    end
+  end
+end