RubyGems - nose - Versions diffs - 0.1.0pre - Mend

nose 0.1.0pre

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (55) hide show

checksums.yaml +7 -0
data/lib/nose/backend/cassandra.rb +390 -0
data/lib/nose/backend/file.rb +185 -0
data/lib/nose/backend/mongo.rb +242 -0
data/lib/nose/backend.rb +557 -0
data/lib/nose/cost/cassandra.rb +33 -0
data/lib/nose/cost/entity_count.rb +27 -0
data/lib/nose/cost/field_size.rb +31 -0
data/lib/nose/cost/request_count.rb +32 -0
data/lib/nose/cost.rb +68 -0
data/lib/nose/debug.rb +45 -0
data/lib/nose/enumerator.rb +199 -0
data/lib/nose/indexes.rb +239 -0
data/lib/nose/loader/csv.rb +99 -0
data/lib/nose/loader/mysql.rb +199 -0
data/lib/nose/loader/random.rb +48 -0
data/lib/nose/loader/sql.rb +105 -0
data/lib/nose/loader.rb +38 -0
data/lib/nose/model/entity.rb +136 -0
data/lib/nose/model/fields.rb +293 -0
data/lib/nose/model.rb +113 -0
data/lib/nose/parser.rb +202 -0
data/lib/nose/plans/execution_plan.rb +282 -0
data/lib/nose/plans/filter.rb +99 -0
data/lib/nose/plans/index_lookup.rb +302 -0
data/lib/nose/plans/limit.rb +42 -0
data/lib/nose/plans/query_planner.rb +361 -0
data/lib/nose/plans/sort.rb +49 -0
data/lib/nose/plans/update.rb +60 -0
data/lib/nose/plans/update_planner.rb +270 -0
data/lib/nose/plans.rb +135 -0
data/lib/nose/proxy/mysql.rb +275 -0
data/lib/nose/proxy.rb +102 -0
data/lib/nose/query_graph.rb +481 -0
data/lib/nose/random/barbasi_albert.rb +48 -0
data/lib/nose/random/watts_strogatz.rb +50 -0
data/lib/nose/random.rb +391 -0
data/lib/nose/schema.rb +89 -0
data/lib/nose/search/constraints.rb +143 -0
data/lib/nose/search/problem.rb +328 -0
data/lib/nose/search/results.rb +200 -0
data/lib/nose/search.rb +266 -0
data/lib/nose/serialize.rb +747 -0
data/lib/nose/statements/connection.rb +160 -0
data/lib/nose/statements/delete.rb +83 -0
data/lib/nose/statements/insert.rb +146 -0
data/lib/nose/statements/query.rb +161 -0
data/lib/nose/statements/update.rb +101 -0
data/lib/nose/statements.rb +645 -0
data/lib/nose/timing.rb +79 -0
data/lib/nose/util.rb +305 -0
data/lib/nose/workload.rb +244 -0
data/lib/nose.rb +37 -0
data/templates/workload.erb +42 -0
metadata +700 -0

checksums.yaml ADDED Viewed

@@ -0,0 +1,7 @@
+---
+SHA1:
+  metadata.gz: 0cbc98973cefc286e5457924f9f314d79f8e00e2
+  data.tar.gz: f3182e0f7583fb068ddd3a679f1ad4ed34af4548
+SHA512:
+  metadata.gz: c0e4526807ab3d17ca001361b6bbee74afeb94df64d54886f15eaac9045a460d2cd7109e56d6d567d2636267e992f9e4195f4d240bff4975545239d3843cf8fb
+  data.tar.gz: 9445a603f50c33bfcf2d055a48cac3c0ded0763a6631100a33f16c8e3c92b01beba512725a5f73b0a8671a63bd4a45dea44eb60fb42a82cc454915fecbd046a9

data/lib/nose/backend/cassandra.rb ADDED Viewed

@@ -0,0 +1,390 @@
+# frozen_string_literal: true
+require 'cassandra'
+require 'zlib'
+module NoSE
+  module Backend
+    # A backend which communicates with Cassandra via CQL
+    class CassandraBackend < BackendBase
+      def initialize(model, indexes, plans, update_plans, config)
+        super
+        @hosts = config[:hosts]
+        @port = config[:port]
+        @keyspace = config[:keyspace]
+        @generator = Cassandra::Uuid::Generator.new
+      end
+      # Generate a random UUID
+      def generate_id
+        @generator.uuid
+      end
+      # Produce the DDL necessary for column families for the given indexes
+      # and optionally execute them against the server
+      def indexes_ddl(execute = false, skip_existing = false,
+                      drop_existing = false)
+        Enumerator.new do |enum|
+          @indexes.map do |index|
+            ddl = index_cql index
+            enum.yield ddl
+            begin
+              drop_index(index) if drop_existing && index_exists?(index)
+              client.execute(ddl) if execute
+            rescue Cassandra::Errors::AlreadyExistsError => exc
+              next if skip_existing
+              new_exc = IndexAlreadyExists.new exc.message
+              new_exc.set_backtrace exc.backtrace
+              raise new_exc
+            end
+          end
+        end
+      end
+      # Insert a chunk of rows into an index
+      # @return [Array<Array<Cassandra::Uuid>>]
+      def index_insert_chunk(index, chunk)
+        fields = index.all_fields.to_a
+        prepared = "INSERT INTO \"#{index.key}\" (" \
+                   "#{field_names fields}" \
+                   ") VALUES (#{(['?'] * fields.length).join ', '})"
+        prepared = client.prepare prepared
+        ids = []
+        client.execute(client.batch do |batch|
+          chunk.each do |row|
+            index_row = index_row(row, fields)
+            ids << (index.hash_fields.to_a + index.order_fields).map do |field|
+              index_row[fields.index field]
+            end
+            batch.add prepared, arguments: index_row
+          end
+        end)
+        ids
+      end
+      # Check if the given index is empty
+      def index_empty?(index)
+        query = "SELECT COUNT(*) FROM \"#{index.key}\" LIMIT 1"
+        client.execute(query).first.values.first.zero?
+      end
+      # Check if a given index exists in the target database
+      def index_exists?(index)
+        client
+        @cluster.keyspace(@keyspace).has_table? index.key
+      end
+      # Check if a given index exists in the target database
+      def drop_index(index)
+        client.execute "DROP TABLE \"#{index.key}\""
+      end
+      # Sample a number of values from the given index
+      def index_sample(index, count)
+        field_list = index.all_fields.map { |f| "\"#{f.id}\"" }
+        query = "SELECT #{field_list.join ', '} " \
+                "FROM \"#{index.key}\" LIMIT #{count}"
+        rows = client.execute(query).rows
+        # XXX Ignore null values for now
+        # fail if rows.any? { |row| row.values.any?(&:nil?) }
+        rows
+      end
+      private
+      # Produce an array of fields in the correct order for a CQL insert
+      # @return [Array]
+      def index_row(row, fields)
+        fields.map do |field|
+          value = row[field.id]
+          if field.is_a?(Fields::IDField)
+            value = case value
+                    when Numeric
+                      Cassandra::Uuid.new value.to_i
+                    when String
+                      Cassandra::Uuid.new value
+                    when nil
+                      Cassandra::Uuid::Generator.new.uuid
+                    else
+                      value
+                    end
+          end
+          value
+        end
+      end
+      # Produce the CQL to create the definition for a given index
+      # @return [String]
+      def index_cql(index)
+        ddl = "CREATE COLUMNFAMILY \"#{index.key}\" (" \
+          "#{field_names index.all_fields, true}, " \
+          "PRIMARY KEY((#{field_names index.hash_fields})"
+        cluster_key = index.order_fields
+        ddl += ", #{field_names cluster_key}" unless cluster_key.empty?
+        ddl += '));'
+        ddl
+      end
+      # Get a comma-separated list of field names with optional types
+      # @return [String]
+      def field_names(fields, types = false)
+        fields.map do |field|
+          name = "\"#{field.id}\""
+          name += ' ' + cassandra_type(field.class).to_s if types
+          name
+        end.join ', '
+      end
+      # Get a Cassandra client, connecting if not done already
+      def client
+        return @client unless @client.nil?
+        @cluster = Cassandra.cluster hosts: @hosts, port: @port,
+                                     timeout: nil
+        @client = @cluster.connect @keyspace
+      end
+      # Return the datatype to use in Cassandra for a given field
+      # @return [Symbol]
+      def cassandra_type(field_class)
+        case [field_class]
+        when [Fields::IntegerField]
+          :int
+        when [Fields::FloatField]
+          :float
+        when [Fields::StringField]
+          :text
+        when [Fields::DateField]
+          :timestamp
+        when [Fields::IDField],
+             [Fields::ForeignKeyField]
+          :uuid
+        end
+      end
+      # Insert data into an index on the backend
+      class InsertStatementStep < BackendBase::InsertStatementStep
+        def initialize(client, index, fields)
+          super
+          @fields = fields.map(&:id) & index.all_fields.map(&:id)
+          @prepared = client.prepare insert_cql
+          @generator = Cassandra::Uuid::Generator.new
+        end
+        # Insert each row into the index
+        def process(results)
+          results.each do |result|
+            fields = @index.all_fields.select { |field| result.key? field.id }
+            values = fields.map do |field|
+              value = result[field.id]
+              # If this is an ID, generate or construct a UUID object
+              if field.is_a?(Fields::IDField)
+                value = if value.nil?
+                          @generator.uuid
+                        else
+                          Cassandra::Uuid.new(value.to_i)
+                        end
+              end
+              # XXX Useful to test that we never insert null values
+              # fail if value.nil?
+              value
+            end
+            begin
+              @client.execute(@prepared, arguments: values)
+            rescue Cassandra::Errors::InvalidError
+              # We hit a value which does not actually need to be
+              # inserted based on the data since some foreign
+              # key in the graph corresponding to this column
+              # family does not exist
+              nil
+            end
+          end
+        end
+        private
+        # The CQL used to insert the fields into the index
+        def insert_cql
+          insert = "INSERT INTO #{@index.key} ("
+          insert += @fields.map { |f| "\"#{f}\"" }.join(', ')
+          insert << ') VALUES (' << (['?'] * @fields.length).join(', ') + ')'
+          insert
+        end
+      end
+      # Delete data from an index on the backend
+      class DeleteStatementStep < BackendBase::DeleteStatementStep
+        def initialize(client, index)
+          super
+          @index_keys = @index.hash_fields + @index.order_fields.to_set
+          # Prepare the statement required to perform the deletion
+          delete = "DELETE FROM #{index.key} WHERE "
+          delete += @index_keys.map { |key| "\"#{key.id}\" = ?" }.join(' AND ')
+          @prepared = client.prepare delete
+        end
+        # Execute the delete for a given set of keys
+        def process(results)
+          # Delete each row from the index
+          results.each do |result|
+            values = delete_values result
+            @client.execute(@prepared, arguments: values)
+          end
+        end
+        private
+        # Get the values used in the WHERE clause for a CQL DELETE
+        def delete_values(result)
+          @index_keys.map do |key|
+            cur_field = @index.all_fields.find { |field| field.id == key.id }
+            if cur_field.is_a?(Fields::IDField)
+              Cassandra::Uuid.new(result[key.id].to_i)
+            else
+              result[key.id]
+            end
+          end
+        end
+      end
+      # A query step to look up data from a particular column family
+      class IndexLookupStatementStep < BackendBase::IndexLookupStatementStep
+        # rubocop:disable Metrics/ParameterLists
+        def initialize(client, select, conditions, step, next_step, prev_step)
+          super
+          @logger = Logging.logger['nose::backend::cassandra::indexlookupstep']
+          # TODO: Check if we can apply the next filter via ALLOW FILTERING
+          @prepared = client.prepare select_cql(select, conditions)
+        end
+        # rubocop:enable Metrics/ParameterLists
+        # Perform a column family lookup in Cassandra
+        def process(conditions, results)
+          results = initial_results(conditions) if results.nil?
+          condition_list = result_conditions conditions, results
+          new_result = fetch_all_queries condition_list, results
+          # Limit the size of the results in case we fetched multiple keys
+          new_result[0..(@step.limit.nil? ? -1 : @step.limit)]
+        end
+        private
+        # Produce the select CQL statement for a provided set of fields
+        # @return [String]
+        def select_cql(select, conditions)
+          select = expand_selected_fields select
+          cql = "SELECT #{select.map { |f| "\"#{f.id}\"" }.join ', '} FROM " \
+                "\"#{@step.index.key}\" WHERE #{cql_where_clause conditions}"
+          cql += cql_order_by
+          # Add an optional limit
+          cql << " LIMIT #{@step.limit}" unless @step.limit.nil?
+          cql
+        end
+        # Produce a CQL where clause using the given conditions
+        # @return [String]
+        def cql_where_clause(conditions)
+          where = @eq_fields.map do |field|
+            "\"#{field.id}\" = ?"
+          end.join ' AND '
+          unless @range_field.nil?
+            condition = conditions.each_value.find(&:range?)
+            where << " AND \"#{condition.field.id}\" #{condition.operator} ?"
+          end
+          where
+        end
+        # Produce the CQL ORDER BY clause for this step
+        # @return [String]
+        def cql_order_by
+          # TODO: CQL3 requires all clustered columns before the one actually
+          #       ordered on also be specified
+          #
+          #       Example:
+          #
+          #         SELECT * FROM cf WHERE id=? AND col1=? ORDER by col1, col2
+          return '' if @step.order_by.empty?
+          ' ORDER BY ' + @step.order_by.map { |f| "\"#{f.id}\"" }.join(', ')
+        end
+        # Lookup values from an index selecting the given
+        # fields and filtering on the given conditions
+        def fetch_all_queries(condition_list, results)
+          new_result = []
+          @logger.debug { "  #{@prepared.cql} * #{condition_list.size}" }
+          # TODO: Chain enumerables of results instead
+          # Limit the total number of queries as well as the query limit
+          condition_list.zip(results).each do |condition_set, result|
+            # Loop over all pages to fetch results
+            values = lookup_values condition_set
+            fetch_query_pages values, new_result, result
+            # Don't continue with further queries
+            break if !@step.limit.nil? && new_result.length >= @step.limit
+          end
+          @logger.debug "Total result size = #{new_result.size}"
+          new_result
+        end
+        # Get the necessary pages of results for a given list of values
+        def fetch_query_pages(values, new_result, result)
+          new_results = @client.execute(@prepared, arguments: values)
+          loop do
+            # Add the previous results to each row
+            rows = new_results.map { |row| result.merge row }
+            # XXX Ignore null values in results for now
+            # fail if rows.any? { |row| row.values.any?(&:nil?) }
+            new_result.concat rows
+            break if new_results.last_page? ||
+                     (!@step.limit.nil? && result.length >= @step.limit)
+            new_results = new_results.next_page
+            @logger.debug "Fetched #{result.length} results"
+          end
+        end
+        # Produce the values used for lookup on a given set of conditions
+        def lookup_values(condition_set)
+          condition_set.map do |condition|
+            value = condition.value ||
+                    conditions[condition.field.id].value
+            fail if value.nil?
+            if condition.field.is_a?(Fields::IDField)
+              Cassandra::Uuid.new(value.to_i)
+            else
+              value
+            end
+          end
+        end
+      end
+    end
+  end
+end

data/lib/nose/backend/file.rb ADDED Viewed

@@ -0,0 +1,185 @@
+# frozen_string_literal: true
+module NoSE
+  module Backend
+    # Simple backend which persists data to a file
+    class FileBackend < BackendBase
+      def initialize(model, indexes, plans, update_plans, config)
+        super
+        # Try to load data from file or start fresh
+        @index_data = if !config[:file].nil? && File.file?(config[:file])
+                        Marshal.load File.open(config[:file])
+                      else
+                        {}
+                      end
+        # Ensure the data is saved when we exit
+        ObjectSpace.define_finalizer self, self.class.finalize(@index_data,
+                                                               config[:file])
+      end
+      # Save data when the object is destroyed
+      def self.finalize(index_data, file)
+        proc do
+          Marshal.dump(index_data, File.open(file, 'w'))
+        end
+      end
+      # Check for an empty array for the data
+      def index_empty?(index)
+        !index_exists?(index) || @index_data[index.key].empty?
+      end
+      # Check if we have prepared space for this index
+      def index_exists?(index)
+        @index_data.key? index.key
+      end
+      # @abstract Subclasses implement to allow inserting
+      def index_insert_chunk(index, chunk)
+        @index_data[index.key].concat chunk
+      end
+      # Generate a simple UUID
+      def generate_id
+        SecureRandom.uuid
+      end
+      # Allocate space for data on the new indexes
+      def indexes_ddl(execute = false, skip_existing = false,
+                      drop_existing = false)
+        @indexes.each do |index|
+          # Do the appropriate behaviour based on the flags passed in
+          if index_exists?(index)
+            next if skip_existing
+            fail unless drop_existing
+          end
+          @index_data[index.key] = []
+        end if execute
+        # We just use the original index definition as DDL
+        @indexes.map(&:inspect)
+      end
+      # Sample a number of values from the given index
+      def index_sample(index, count)
+        data = @index_data[index.key]
+        data.nil? ? [] : data.sample(count)
+      end
+      # We just produce the data here which can be manipulated as needed
+      # @return [Hash]
+      def client
+        @index_data
+      end
+      # Provide some helper functions which allow the matching of rows
+      # based on a set of list of conditions
+      module RowMatcher
+        # Check if a row matches the given condition
+        # @return [Boolean]
+        def row_matches?(row, conditions)
+          row_matches_eq?(row, conditions) &&
+            row_matches_range?(row, conditions)
+        end
+        # Check if a row matches the given condition on equality predicates
+        # @return [Boolean]
+        def row_matches_eq?(row, conditions)
+          @eq_fields.all? do |field|
+            row[field.id] == conditions.find { |c| c.field == field }.value
+          end
+        end
+        # Check if a row matches the given condition on the range predicate
+        # @return [Boolean]
+        def row_matches_range?(row, conditions)
+          return true if @range_field.nil?
+          range_cond = conditions.find { |c| c.field == @range_field }
+          row[@range_field.id].send range_cond.operator, range_cond.value
+        end
+      end
+      # Look up data on an index in the backend
+      class IndexLookupStatementStep < BackendBase::IndexLookupStatementStep
+        include RowMatcher
+        # Filter all the rows in the specified index to those requested
+        def process(conditions, results)
+          # Get the set of conditions we need to process
+          results = initial_results(conditions) if results.nil?
+          condition_list = result_conditions conditions, results
+          # Loop through all rows to find the matching ones
+          rows = @client[@index.key] || []
+          selected = condition_list.flat_map do |condition|
+            rows.select { |row| row_matches? row, condition }
+          end.compact
+          # Apply the limit and only return selected fields
+          field_ids = Set.new @step.fields.map(&:id).to_set
+          selected[0..(@step.limit.nil? ? -1 : @step.limit)].map do |row|
+            row.select { |k, _| field_ids.include? k }
+          end
+        end
+      end
+      # Insert data into an index on the backend
+      class InsertStatementStep < BackendBase::InsertStatementStep
+        # Add new rows to the index
+        def process(results)
+          key_ids = (@index.hash_fields + @index.order_fields).map(&:id).to_set
+          results.each do |row|
+            # Pick out primary key fields we can use to match
+            conditions = row.select do |field_id|
+              key_ids.include? field_id
+            end
+            # If we have all the primary keys, check for a match
+            if conditions.length == key_ids.length
+              # Try to find a row with this ID and update it
+              matching_row = @client[index.key].find do |index_row|
+                index_row.merge(conditions) == index_row
+              end
+              unless matching_row.nil?
+                matching_row.merge! row
+                next
+              end
+            end
+            # Populate IDs as needed
+            key_ids.each do |key_id|
+              row[key_id] = SecureRandom.uuid if row[key_id].nil?
+            end
+            @client[index.key] << row
+          end
+        end
+      end
+      # Delete data from an index on the backend
+      class DeleteStatementStep < BackendBase::DeleteStatementStep
+        include RowMatcher
+        # Remove rows matching the results from the dataset
+        def process(results)
+          # Loop over all rows
+          @client[index.key].reject! do |row|
+            # Check against all results
+            results.any? do |result|
+              # If all fields match, drop the row
+              result.all? do |field, value|
+                row[field] == value
+              end
+            end
+          end
+        end
+      end
+    end
+  end
+end