nose 0.1.0pre

Sign up to get free protection for your applications and to get access to all the features.
Files changed (55) hide show
  1. checksums.yaml +7 -0
  2. data/lib/nose/backend/cassandra.rb +390 -0
  3. data/lib/nose/backend/file.rb +185 -0
  4. data/lib/nose/backend/mongo.rb +242 -0
  5. data/lib/nose/backend.rb +557 -0
  6. data/lib/nose/cost/cassandra.rb +33 -0
  7. data/lib/nose/cost/entity_count.rb +27 -0
  8. data/lib/nose/cost/field_size.rb +31 -0
  9. data/lib/nose/cost/request_count.rb +32 -0
  10. data/lib/nose/cost.rb +68 -0
  11. data/lib/nose/debug.rb +45 -0
  12. data/lib/nose/enumerator.rb +199 -0
  13. data/lib/nose/indexes.rb +239 -0
  14. data/lib/nose/loader/csv.rb +99 -0
  15. data/lib/nose/loader/mysql.rb +199 -0
  16. data/lib/nose/loader/random.rb +48 -0
  17. data/lib/nose/loader/sql.rb +105 -0
  18. data/lib/nose/loader.rb +38 -0
  19. data/lib/nose/model/entity.rb +136 -0
  20. data/lib/nose/model/fields.rb +293 -0
  21. data/lib/nose/model.rb +113 -0
  22. data/lib/nose/parser.rb +202 -0
  23. data/lib/nose/plans/execution_plan.rb +282 -0
  24. data/lib/nose/plans/filter.rb +99 -0
  25. data/lib/nose/plans/index_lookup.rb +302 -0
  26. data/lib/nose/plans/limit.rb +42 -0
  27. data/lib/nose/plans/query_planner.rb +361 -0
  28. data/lib/nose/plans/sort.rb +49 -0
  29. data/lib/nose/plans/update.rb +60 -0
  30. data/lib/nose/plans/update_planner.rb +270 -0
  31. data/lib/nose/plans.rb +135 -0
  32. data/lib/nose/proxy/mysql.rb +275 -0
  33. data/lib/nose/proxy.rb +102 -0
  34. data/lib/nose/query_graph.rb +481 -0
  35. data/lib/nose/random/barbasi_albert.rb +48 -0
  36. data/lib/nose/random/watts_strogatz.rb +50 -0
  37. data/lib/nose/random.rb +391 -0
  38. data/lib/nose/schema.rb +89 -0
  39. data/lib/nose/search/constraints.rb +143 -0
  40. data/lib/nose/search/problem.rb +328 -0
  41. data/lib/nose/search/results.rb +200 -0
  42. data/lib/nose/search.rb +266 -0
  43. data/lib/nose/serialize.rb +747 -0
  44. data/lib/nose/statements/connection.rb +160 -0
  45. data/lib/nose/statements/delete.rb +83 -0
  46. data/lib/nose/statements/insert.rb +146 -0
  47. data/lib/nose/statements/query.rb +161 -0
  48. data/lib/nose/statements/update.rb +101 -0
  49. data/lib/nose/statements.rb +645 -0
  50. data/lib/nose/timing.rb +79 -0
  51. data/lib/nose/util.rb +305 -0
  52. data/lib/nose/workload.rb +244 -0
  53. data/lib/nose.rb +37 -0
  54. data/templates/workload.erb +42 -0
  55. metadata +700 -0
@@ -0,0 +1,199 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This is optional so other things can run under JRuby,
4
+ # however this loader won't work so we need to use MRI
5
+ begin
6
+ require 'mysql2'
7
+ rescue LoadError
8
+ require 'mysql'
9
+ end
10
+
11
+ module NoSE
12
+ module Loader
13
+ # Load data from a MySQL database into a backend
14
+ class MysqlLoader < LoaderBase
15
+ def initialize(workload = nil, backend = nil)
16
+ @logger = Logging.logger['nose::loader::mysqlloader']
17
+
18
+ @workload = workload
19
+ @backend = backend
20
+ end
21
+
22
+ # Load a generated set of indexes with data from MySQL
23
+ def load(indexes, config, show_progress = false, limit = nil,
24
+ skip_existing = true)
25
+ indexes.map!(&:to_id_graph).uniq! if @backend.by_id_graph
26
+
27
+ # XXX Assuming backend is thread-safe
28
+ Parallel.each(indexes, in_threads: 2) do |index|
29
+ load_index index, config, show_progress, limit, skip_existing
30
+ end
31
+ end
32
+
33
+ # Read all tables in the database and construct a workload object
34
+ def workload(config)
35
+ client = new_client config
36
+
37
+ workload = Workload.new
38
+ results = if @array_options
39
+ client.query('SHOW TABLES').each(**@array_options)
40
+ else
41
+ client.query('SHOW TABLES').each
42
+ end
43
+
44
+ results.each do |table, *|
45
+ # TODO: Handle foreign keys
46
+ workload << entity_for_table(client, table)
47
+ end
48
+
49
+ workload
50
+ end
51
+
52
+ private
53
+
54
+ # Create a new client from the given configuration
55
+ def new_client(config)
56
+ if Object.const_defined?(:Mysql2)
57
+ @query_options = { stream: true, cache_rows: false }
58
+ @array_options = { as: :array }
59
+ Mysql2::Client.new host: config[:host],
60
+ username: config[:username],
61
+ password: config[:password],
62
+ database: config[:database]
63
+ else
64
+ @query_options = false
65
+ @array_options = false
66
+ Mysql.connect config[:host], config[:username], config[:password],
67
+ config[:database]
68
+ end
69
+ end
70
+
71
+ # Load a single index into the backend
72
+ # @return [void]
73
+ def load_index(index, config, show_progress, limit, skip_existing)
74
+ client = new_client config
75
+
76
+ # Skip this index if it's not empty
77
+ if skip_existing && !@backend.index_empty?(index)
78
+ @logger.info "Skipping index #{index.inspect}" if show_progress
79
+ return
80
+ end
81
+ @logger.info index.inspect if show_progress
82
+
83
+ sql, fields = index_sql index, limit
84
+ results = if @query_options
85
+ client.query(sql, **@query_options)
86
+ else
87
+ client.query(sql).map { |row| hash_from_row row, fields }
88
+ end
89
+
90
+ result_chunk = []
91
+ results.each do |result|
92
+ result_chunk.push result
93
+ next if result_chunk.length < 1000
94
+
95
+ @backend.index_insert_chunk index, result_chunk
96
+ result_chunk = []
97
+ end
98
+ @backend.index_insert_chunk index, result_chunk \
99
+ unless result_chunk.empty?
100
+ end
101
+
102
+ # Construct a hash from the given row returned by the client
103
+ # @return [Hash]
104
+ def hash_from_row(row, fields)
105
+ row_hash = {}
106
+ fields.each_with_index do |field, i|
107
+ value = field.class.value_from_string row[i]
108
+ row_hash[field.id] = value
109
+ end
110
+
111
+ row_hash
112
+ end
113
+
114
+ # Get all the fields selected by this index
115
+ def index_sql_select(index)
116
+ fields = index.hash_fields.to_a + index.order_fields + index.extra.to_a
117
+
118
+ [fields, fields.map do |field|
119
+ "#{field.parent.name}.#{field.name} AS " \
120
+ "#{field.parent.name}_#{field.name}"
121
+ end]
122
+ end
123
+
124
+ # Get the list of tables along with the join condition
125
+ # for a query to fetch index data
126
+ # @return [String]
127
+ def index_sql_tables(index)
128
+ # Create JOIN statements
129
+ tables = index.graph.entities.map(&:name).join ' JOIN '
130
+ return tables if index.graph.size == 1
131
+
132
+ tables << ' WHERE '
133
+ tables << index.path.each_cons(2).map do |_prev_key, key|
134
+ key = key.reverse if key.relationship == :many
135
+ "#{key.parent.name}.#{key.name}=" \
136
+ "#{key.entity.name}.#{key.entity.id_field.name}"
137
+ end.join(' AND ')
138
+
139
+ tables
140
+ end
141
+
142
+ # Construct a SQL statement to fetch the data to populate this index
143
+ # @return [String]
144
+ def index_sql(index, limit = nil)
145
+ # Get all the necessary fields
146
+ fields, select = index_sql_select index
147
+
148
+ # Construct the join condition
149
+ tables = index_sql_tables index
150
+
151
+ query = "SELECT #{select.join ', '} FROM #{tables}"
152
+ query += " LIMIT #{limit}" unless limit.nil?
153
+
154
+ @logger.debug query
155
+ [query, fields]
156
+ end
157
+
158
+ # Generate an entity definition from a given table
159
+ # @return [Entity]
160
+ def entity_for_table(client, table)
161
+ entity = Entity.new table
162
+ count = client.query("SELECT COUNT(*) FROM #{table}").first
163
+ entity.count = count.is_a?(Hash) ? count.values.first : count
164
+
165
+ describe = if @array_options
166
+ client.query("DESCRIBE #{table}").each(**@array_options)
167
+ else
168
+ client.query("DESCRIBE #{table}").each
169
+ end
170
+
171
+ describe.each do |name, type, _, key|
172
+ field_class = key == 'PRI' ? Fields::IDField : field_class(type)
173
+ entity << field_class.new(name)
174
+ end
175
+
176
+ entity
177
+ end
178
+
179
+ # Produce the Ruby class used to represent a MySQL type
180
+ # @return [Class]
181
+ def field_class(type)
182
+ case type
183
+ when /datetime/
184
+ Fields::DateField
185
+ when /float/
186
+ Fields::FloatField
187
+ when /text/
188
+ # TODO: Get length
189
+ Fields::StringField
190
+ when /varchar\(([0-9]+)\)/
191
+ # TODO: Use length
192
+ Fields::StringField
193
+ when /(tiny)?int/
194
+ Fields::IntegerField
195
+ end
196
+ end
197
+ end
198
+ end
199
+ end
@@ -0,0 +1,48 @@
1
+ # frozen_string_literal: true
2
+
3
+ module NoSE
4
+ module Loader
5
+ # Load some random data (mostly useful for testing)
6
+ class RandomLoader < LoaderBase
7
+ def initialize(workload = nil, backend = nil)
8
+ @logger = Logging.logger['nose::loader::randomloader']
9
+
10
+ @workload = workload
11
+ @backend = backend
12
+ end
13
+
14
+ # Load a generated set of indexes with data from MySQL
15
+ # @return [void]
16
+ def load(indexes, config, show_progress = false, limit = nil,
17
+ skip_existing = true)
18
+ limit = 1 if limit.nil?
19
+
20
+ indexes.map!(&:to_id_graph).uniq! if @backend.by_id_graph
21
+ indexes.uniq.each do |index|
22
+ load_index index, config, show_progress, limit, skip_existing
23
+ end
24
+ end
25
+
26
+ private
27
+
28
+ # Load a single index into the backend
29
+ # @return [void]
30
+ def load_index(index, _config, show_progress, limit, skip_existing)
31
+ # Skip this index if it's not empty
32
+ if skip_existing && !@backend.index_empty?(index)
33
+ @logger.info "Skipping index #{index.inspect}" if show_progress
34
+ return
35
+ end
36
+ @logger.info index.inspect if show_progress
37
+
38
+ chunk = Array.new(limit) do
39
+ Hash[index.all_fields.map do |field|
40
+ [field.id, field.random_value]
41
+ end]
42
+ end
43
+
44
+ @backend.index_insert_chunk index, chunk
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,105 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'sequel'
4
+
5
+ module NoSE
6
+ module Loader
7
+ # Load data from a MySQL database into a backend
8
+ class SqlLoader < LoaderBase
9
+ def initialize(workload = nil, backend = nil)
10
+ @logger = Logging.logger['nose::loader::sqlloader']
11
+
12
+ @workload = workload
13
+ @backend = backend
14
+ end
15
+
16
+ # Load a generated set of indexes with data from MySQL
17
+ def load(indexes, config, show_progress = false, limit = nil,
18
+ skip_existing = true)
19
+ indexes.map!(&:to_id_graph).uniq! if @backend.by_id_graph
20
+
21
+ # XXX Assuming backend is thread-safe
22
+ Parallel.each(indexes, in_threads: 2) do |index|
23
+ client = new_client config
24
+
25
+ # Skip this index if it's not empty
26
+ if skip_existing && !@backend.index_empty?(index)
27
+ @logger.info "Skipping index #{index.inspect}" if show_progress
28
+ next
29
+ end
30
+ @logger.info index.inspect if show_progress
31
+
32
+ query = index_sql client, index, limit
33
+
34
+ result_chunk = []
35
+ query.each do |result|
36
+ result = Hash[result.map { |k, v| [k.to_s, v] }]
37
+ result_chunk.push result
38
+ if result_chunk.length >= 100
39
+ @backend.index_insert_chunk index, result_chunk
40
+ result_chunk = []
41
+ end
42
+ end
43
+ @backend.index_insert_chunk index, result_chunk \
44
+ unless result_chunk.empty?
45
+ end
46
+ end
47
+
48
+ private
49
+
50
+ # Create a new client from the given configuration
51
+ def new_client(config)
52
+ Sequel.connect config[:uri]
53
+ end
54
+
55
+ # Get all the fields selected by this index
56
+ # @return [Array<String>]
57
+ def index_sql_select(index)
58
+ fields = index.hash_fields.to_a + index.order_fields + index.extra.to_a
59
+
60
+ fields.map do |field|
61
+ "#{field.parent.name}__#{field.name}___" \
62
+ "#{field.parent.name}_#{field.name}".to_sym
63
+ end
64
+ end
65
+
66
+ # Get the list of tables along with the join condition
67
+ # for a query to fetch index data
68
+ def index_sql_tables(index)
69
+ # Create JOIN statements
70
+ tables = index.graph.entities.map { |entity| entity.name.to_sym }
71
+ return [tables, []] if index.graph.size == 1
72
+
73
+ keys = index.path.each_cons(2).map do |_prev_key, key|
74
+ is_many = key.relationship == :many
75
+ key = key.reverse if is_many
76
+ fields = [key.entity.id_field.name.to_sym, key.name.to_sym]
77
+ fields = fields.reverse if is_many
78
+ Hash[[fields]]
79
+ end
80
+
81
+ [tables, keys]
82
+ end
83
+
84
+ # Construct a SQL statement to fetch the data to populate this index
85
+ def index_sql(client, index, limit = nil)
86
+ # Get all the necessary fields
87
+ select = index_sql_select index
88
+
89
+ # Construct the join condition
90
+ tables, keys = index_sql_tables index
91
+
92
+ query = client[tables.first]
93
+ keys.map.with_index do |key, i|
94
+ query = query.join tables[i + 1], key
95
+ end
96
+
97
+ query = query.select(*select)
98
+ query = query.limit limit unless limit.nil?
99
+
100
+ @logger.debug { query.sql }
101
+ query
102
+ end
103
+ end
104
+ end
105
+ end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ module NoSE
4
+ # Loaders which insert data into indexes from external sources
5
+ module Loader
6
+ # Superclass for all data loaders
7
+ class LoaderBase
8
+ def initialize(workload = nil, backend = nil)
9
+ @workload = workload
10
+ @backend = backend
11
+ end
12
+
13
+ # :nocov:
14
+ # @abstract Subclasses should produce a workload
15
+ # @return [void]
16
+ def workload(_config)
17
+ fail NotImplementedError
18
+ end
19
+ # :nocov:
20
+
21
+ # :nocov:
22
+ # @abstract Subclasses should load data for the given list of indexes
23
+ # @return [void]
24
+ def load(_indexes, _config, _show_progress = false, _limit = nil,
25
+ _skip_existing = true)
26
+ fail NotImplementedError
27
+ end
28
+ # :nocov:
29
+
30
+ # @abstract Subclasses should generate a model from the external source
31
+ # :nocov:
32
+ def model(_config)
33
+ fail NotImplementedError
34
+ end
35
+ # :nocov:
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,136 @@
1
+ # frozen_string_literal: true
2
+
3
+ module NoSE
4
+ # A representation of an object in the conceptual data model
5
+ class Entity
6
+ attr_reader :fields
7
+ attr_reader :foreign_keys, :name
8
+ attr_accessor :count
9
+
10
+ def initialize(name, &block)
11
+ @name = name
12
+ @fields = {}
13
+ @foreign_keys = {}
14
+ @count = 1
15
+
16
+ # Precompute the hash
17
+ hash
18
+
19
+ # Apply the DSL
20
+ EntityDSL.new(self).instance_eval(&block) if block_given?
21
+ end
22
+
23
+ # :nocov:
24
+ # @return [String]
25
+ def to_color
26
+ "[light_blue]#{@name}[/] [#{fields.each_key.map(&:to_color).join ', '}]"
27
+ end
28
+ # :nocov:
29
+
30
+ # Compare by name
31
+ # @return [Boolean]
32
+ def ==(other)
33
+ @name == other.instance_variable_get(:@name)
34
+ end
35
+ alias eql? ==
36
+
37
+ # The hash is based on the name of the entity and its fields
38
+ # @return [Fixnum]
39
+ def hash
40
+ @hash ||= @name.hash
41
+ end
42
+
43
+ # Get the key fields for the entity
44
+ # @return [Fields::IDField>]
45
+ def id_field
46
+ fields.each_value.find(&:primary_key?)
47
+ end
48
+
49
+ # Adds a {Fields::Field} to the entity
50
+ # @return [self] the current entity to allow chaining
51
+ def <<(field, freeze: true)
52
+ if field.is_a? Fields::ForeignKeyField
53
+ @foreign_keys[field.name] = field
54
+ else
55
+ @fields[field.name] = field
56
+ end
57
+
58
+ field.instance_variable_set(:@parent, self)
59
+ field.hash
60
+ field.freeze if freeze
61
+
62
+ self
63
+ end
64
+
65
+ # Shortcut for {#count=}
66
+ # @return [Entity]
67
+ def *(other)
68
+ fail TypeError, 'count must be an integer' unless other.is_a? Integer
69
+ @count = other
70
+
71
+ self
72
+ end
73
+
74
+ # Get the field on the entity with the given name
75
+ # @return [Field]
76
+ def [](field_name)
77
+ field = @fields[field_name] || @foreign_keys[field_name]
78
+ fail FieldNotFound if field.nil?
79
+ field
80
+ end
81
+
82
+ # Return true if the entity contains a field with the given name
83
+ def field?(field)
84
+ @fields.key? field
85
+ end
86
+
87
+ # Generate a hash with random values for fields in the entity
88
+ # @return [Hash]
89
+ def random_entity(prefix_entity = true)
90
+ Hash[@fields.map do |name, field|
91
+ key = name
92
+ key = "#{@name}_#{name}" if prefix_entity
93
+ [key, field.random_value]
94
+ end]
95
+ end
96
+ end
97
+
98
+ # A helper class for DSL creation to avoid messing with {Entity}
99
+ class EntityDSL
100
+ def initialize(entity)
101
+ @entity = entity
102
+ end
103
+
104
+ # rubocop:disable MethodName
105
+
106
+ # Specify a list of field names for the primary key
107
+ def PrimaryKey(*names)
108
+ # Unset the old keys and set new ones,
109
+ # we dup because the fields are frozen
110
+ @entity.fields.each_value do |field|
111
+ next unless field.primary_key?
112
+ field = field.dup
113
+ field.primary_key = false
114
+ @entity.fields[field.name] = field
115
+ field.freeze
116
+ end
117
+
118
+ names.each do |name|
119
+ field = @entity[name].dup
120
+ field.primary_key = true
121
+ @entity.fields[name] = field
122
+ field.freeze
123
+ end
124
+ end
125
+
126
+ # rubocop:enable MethodName
127
+
128
+ def etc(size = 1)
129
+ @entity << Fields::HashField.new('**', size)
130
+ end
131
+ end
132
+
133
+ # Raised when looking up a field on an entity which does not exist
134
+ class FieldNotFound < StandardError
135
+ end
136
+ end