nose 0.1.0pre

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. checksums.yaml +7 -0
  2. data/lib/nose/backend/cassandra.rb +390 -0
  3. data/lib/nose/backend/file.rb +185 -0
  4. data/lib/nose/backend/mongo.rb +242 -0
  5. data/lib/nose/backend.rb +557 -0
  6. data/lib/nose/cost/cassandra.rb +33 -0
  7. data/lib/nose/cost/entity_count.rb +27 -0
  8. data/lib/nose/cost/field_size.rb +31 -0
  9. data/lib/nose/cost/request_count.rb +32 -0
  10. data/lib/nose/cost.rb +68 -0
  11. data/lib/nose/debug.rb +45 -0
  12. data/lib/nose/enumerator.rb +199 -0
  13. data/lib/nose/indexes.rb +239 -0
  14. data/lib/nose/loader/csv.rb +99 -0
  15. data/lib/nose/loader/mysql.rb +199 -0
  16. data/lib/nose/loader/random.rb +48 -0
  17. data/lib/nose/loader/sql.rb +105 -0
  18. data/lib/nose/loader.rb +38 -0
  19. data/lib/nose/model/entity.rb +136 -0
  20. data/lib/nose/model/fields.rb +293 -0
  21. data/lib/nose/model.rb +113 -0
  22. data/lib/nose/parser.rb +202 -0
  23. data/lib/nose/plans/execution_plan.rb +282 -0
  24. data/lib/nose/plans/filter.rb +99 -0
  25. data/lib/nose/plans/index_lookup.rb +302 -0
  26. data/lib/nose/plans/limit.rb +42 -0
  27. data/lib/nose/plans/query_planner.rb +361 -0
  28. data/lib/nose/plans/sort.rb +49 -0
  29. data/lib/nose/plans/update.rb +60 -0
  30. data/lib/nose/plans/update_planner.rb +270 -0
  31. data/lib/nose/plans.rb +135 -0
  32. data/lib/nose/proxy/mysql.rb +275 -0
  33. data/lib/nose/proxy.rb +102 -0
  34. data/lib/nose/query_graph.rb +481 -0
  35. data/lib/nose/random/barbasi_albert.rb +48 -0
  36. data/lib/nose/random/watts_strogatz.rb +50 -0
  37. data/lib/nose/random.rb +391 -0
  38. data/lib/nose/schema.rb +89 -0
  39. data/lib/nose/search/constraints.rb +143 -0
  40. data/lib/nose/search/problem.rb +328 -0
  41. data/lib/nose/search/results.rb +200 -0
  42. data/lib/nose/search.rb +266 -0
  43. data/lib/nose/serialize.rb +747 -0
  44. data/lib/nose/statements/connection.rb +160 -0
  45. data/lib/nose/statements/delete.rb +83 -0
  46. data/lib/nose/statements/insert.rb +146 -0
  47. data/lib/nose/statements/query.rb +161 -0
  48. data/lib/nose/statements/update.rb +101 -0
  49. data/lib/nose/statements.rb +645 -0
  50. data/lib/nose/timing.rb +79 -0
  51. data/lib/nose/util.rb +305 -0
  52. data/lib/nose/workload.rb +244 -0
  53. data/lib/nose.rb +37 -0
  54. data/templates/workload.erb +42 -0
  55. metadata +700 -0
@@ -0,0 +1,199 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This is optional so other things can run under JRuby,
4
+ # however this loader won't work so we need to use MRI
5
+ begin
6
+ require 'mysql2'
7
+ rescue LoadError
8
+ require 'mysql'
9
+ end
10
+
11
+ module NoSE
12
+ module Loader
13
+ # Load data from a MySQL database into a backend
14
+ class MysqlLoader < LoaderBase
15
+ def initialize(workload = nil, backend = nil)
16
+ @logger = Logging.logger['nose::loader::mysqlloader']
17
+
18
+ @workload = workload
19
+ @backend = backend
20
+ end
21
+
22
+ # Load a generated set of indexes with data from MySQL
23
+ def load(indexes, config, show_progress = false, limit = nil,
24
+ skip_existing = true)
25
+ indexes.map!(&:to_id_graph).uniq! if @backend.by_id_graph
26
+
27
+ # XXX Assuming backend is thread-safe
28
+ Parallel.each(indexes, in_threads: 2) do |index|
29
+ load_index index, config, show_progress, limit, skip_existing
30
+ end
31
+ end
32
+
33
+ # Read all tables in the database and construct a workload object
34
+ def workload(config)
35
+ client = new_client config
36
+
37
+ workload = Workload.new
38
+ results = if @array_options
39
+ client.query('SHOW TABLES').each(**@array_options)
40
+ else
41
+ client.query('SHOW TABLES').each
42
+ end
43
+
44
+ results.each do |table, *|
45
+ # TODO: Handle foreign keys
46
+ workload << entity_for_table(client, table)
47
+ end
48
+
49
+ workload
50
+ end
51
+
52
+ private
53
+
54
+ # Create a new client from the given configuration
55
+ def new_client(config)
56
+ if Object.const_defined?(:Mysql2)
57
+ @query_options = { stream: true, cache_rows: false }
58
+ @array_options = { as: :array }
59
+ Mysql2::Client.new host: config[:host],
60
+ username: config[:username],
61
+ password: config[:password],
62
+ database: config[:database]
63
+ else
64
+ @query_options = false
65
+ @array_options = false
66
+ Mysql.connect config[:host], config[:username], config[:password],
67
+ config[:database]
68
+ end
69
+ end
70
+
71
+ # Load a single index into the backend
72
+ # @return [void]
73
+ def load_index(index, config, show_progress, limit, skip_existing)
74
+ client = new_client config
75
+
76
+ # Skip this index if it's not empty
77
+ if skip_existing && !@backend.index_empty?(index)
78
+ @logger.info "Skipping index #{index.inspect}" if show_progress
79
+ return
80
+ end
81
+ @logger.info index.inspect if show_progress
82
+
83
+ sql, fields = index_sql index, limit
84
+ results = if @query_options
85
+ client.query(sql, **@query_options)
86
+ else
87
+ client.query(sql).map { |row| hash_from_row row, fields }
88
+ end
89
+
90
+ result_chunk = []
91
+ results.each do |result|
92
+ result_chunk.push result
93
+ next if result_chunk.length < 1000
94
+
95
+ @backend.index_insert_chunk index, result_chunk
96
+ result_chunk = []
97
+ end
98
+ @backend.index_insert_chunk index, result_chunk \
99
+ unless result_chunk.empty?
100
+ end
101
+
102
+ # Construct a hash from the given row returned by the client
103
+ # @return [Hash]
104
+ def hash_from_row(row, fields)
105
+ row_hash = {}
106
+ fields.each_with_index do |field, i|
107
+ value = field.class.value_from_string row[i]
108
+ row_hash[field.id] = value
109
+ end
110
+
111
+ row_hash
112
+ end
113
+
114
+ # Get all the fields selected by this index
115
+ def index_sql_select(index)
116
+ fields = index.hash_fields.to_a + index.order_fields + index.extra.to_a
117
+
118
+ [fields, fields.map do |field|
119
+ "#{field.parent.name}.#{field.name} AS " \
120
+ "#{field.parent.name}_#{field.name}"
121
+ end]
122
+ end
123
+
124
+ # Get the list of tables along with the join condition
125
+ # for a query to fetch index data
126
+ # @return [String]
127
+ def index_sql_tables(index)
128
+ # Create JOIN statements
129
+ tables = index.graph.entities.map(&:name).join ' JOIN '
130
+ return tables if index.graph.size == 1
131
+
132
+ tables << ' WHERE '
133
+ tables << index.path.each_cons(2).map do |_prev_key, key|
134
+ key = key.reverse if key.relationship == :many
135
+ "#{key.parent.name}.#{key.name}=" \
136
+ "#{key.entity.name}.#{key.entity.id_field.name}"
137
+ end.join(' AND ')
138
+
139
+ tables
140
+ end
141
+
142
+ # Construct a SQL statement to fetch the data to populate this index
143
+ # @return [String]
144
+ def index_sql(index, limit = nil)
145
+ # Get all the necessary fields
146
+ fields, select = index_sql_select index
147
+
148
+ # Construct the join condition
149
+ tables = index_sql_tables index
150
+
151
+ query = "SELECT #{select.join ', '} FROM #{tables}"
152
+ query += " LIMIT #{limit}" unless limit.nil?
153
+
154
+ @logger.debug query
155
+ [query, fields]
156
+ end
157
+
158
+ # Generate an entity definition from a given table
159
+ # @return [Entity]
160
+ def entity_for_table(client, table)
161
+ entity = Entity.new table
162
+ count = client.query("SELECT COUNT(*) FROM #{table}").first
163
+ entity.count = count.is_a?(Hash) ? count.values.first : count
164
+
165
+ describe = if @array_options
166
+ client.query("DESCRIBE #{table}").each(**@array_options)
167
+ else
168
+ client.query("DESCRIBE #{table}").each
169
+ end
170
+
171
+ describe.each do |name, type, _, key|
172
+ field_class = key == 'PRI' ? Fields::IDField : field_class(type)
173
+ entity << field_class.new(name)
174
+ end
175
+
176
+ entity
177
+ end
178
+
179
+ # Produce the Ruby class used to represent a MySQL type
180
+ # @return [Class]
181
+ def field_class(type)
182
+ case type
183
+ when /datetime/
184
+ Fields::DateField
185
+ when /float/
186
+ Fields::FloatField
187
+ when /text/
188
+ # TODO: Get length
189
+ Fields::StringField
190
+ when /varchar\(([0-9]+)\)/
191
+ # TODO: Use length
192
+ Fields::StringField
193
+ when /(tiny)?int/
194
+ Fields::IntegerField
195
+ end
196
+ end
197
+ end
198
+ end
199
+ end
@@ -0,0 +1,48 @@
1
+ # frozen_string_literal: true
2
+
3
+ module NoSE
4
+ module Loader
5
+ # Load some random data (mostly useful for testing)
6
+ class RandomLoader < LoaderBase
7
+ def initialize(workload = nil, backend = nil)
8
+ @logger = Logging.logger['nose::loader::randomloader']
9
+
10
+ @workload = workload
11
+ @backend = backend
12
+ end
13
+
14
+ # Load a generated set of indexes with data from MySQL
15
+ # @return [void]
16
+ def load(indexes, config, show_progress = false, limit = nil,
17
+ skip_existing = true)
18
+ limit = 1 if limit.nil?
19
+
20
+ indexes.map!(&:to_id_graph).uniq! if @backend.by_id_graph
21
+ indexes.uniq.each do |index|
22
+ load_index index, config, show_progress, limit, skip_existing
23
+ end
24
+ end
25
+
26
+ private
27
+
28
+ # Load a single index into the backend
29
+ # @return [void]
30
+ def load_index(index, _config, show_progress, limit, skip_existing)
31
+ # Skip this index if it's not empty
32
+ if skip_existing && !@backend.index_empty?(index)
33
+ @logger.info "Skipping index #{index.inspect}" if show_progress
34
+ return
35
+ end
36
+ @logger.info index.inspect if show_progress
37
+
38
+ chunk = Array.new(limit) do
39
+ Hash[index.all_fields.map do |field|
40
+ [field.id, field.random_value]
41
+ end]
42
+ end
43
+
44
+ @backend.index_insert_chunk index, chunk
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,105 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'sequel'
4
+
5
+ module NoSE
6
+ module Loader
7
+ # Load data from a MySQL database into a backend
8
+ class SqlLoader < LoaderBase
9
+ def initialize(workload = nil, backend = nil)
10
+ @logger = Logging.logger['nose::loader::sqlloader']
11
+
12
+ @workload = workload
13
+ @backend = backend
14
+ end
15
+
16
+ # Load a generated set of indexes with data from MySQL
17
+ def load(indexes, config, show_progress = false, limit = nil,
18
+ skip_existing = true)
19
+ indexes.map!(&:to_id_graph).uniq! if @backend.by_id_graph
20
+
21
+ # XXX Assuming backend is thread-safe
22
+ Parallel.each(indexes, in_threads: 2) do |index|
23
+ client = new_client config
24
+
25
+ # Skip this index if it's not empty
26
+ if skip_existing && !@backend.index_empty?(index)
27
+ @logger.info "Skipping index #{index.inspect}" if show_progress
28
+ next
29
+ end
30
+ @logger.info index.inspect if show_progress
31
+
32
+ query = index_sql client, index, limit
33
+
34
+ result_chunk = []
35
+ query.each do |result|
36
+ result = Hash[result.map { |k, v| [k.to_s, v] }]
37
+ result_chunk.push result
38
+ if result_chunk.length >= 100
39
+ @backend.index_insert_chunk index, result_chunk
40
+ result_chunk = []
41
+ end
42
+ end
43
+ @backend.index_insert_chunk index, result_chunk \
44
+ unless result_chunk.empty?
45
+ end
46
+ end
47
+
48
+ private
49
+
50
+ # Create a new client from the given configuration
51
+ def new_client(config)
52
+ Sequel.connect config[:uri]
53
+ end
54
+
55
+ # Get all the fields selected by this index
56
+ # @return [Array<String>]
57
+ def index_sql_select(index)
58
+ fields = index.hash_fields.to_a + index.order_fields + index.extra.to_a
59
+
60
+ fields.map do |field|
61
+ "#{field.parent.name}__#{field.name}___" \
62
+ "#{field.parent.name}_#{field.name}".to_sym
63
+ end
64
+ end
65
+
66
+ # Get the list of tables along with the join condition
67
+ # for a query to fetch index data
68
+ def index_sql_tables(index)
69
+ # Create JOIN statements
70
+ tables = index.graph.entities.map { |entity| entity.name.to_sym }
71
+ return [tables, []] if index.graph.size == 1
72
+
73
+ keys = index.path.each_cons(2).map do |_prev_key, key|
74
+ is_many = key.relationship == :many
75
+ key = key.reverse if is_many
76
+ fields = [key.entity.id_field.name.to_sym, key.name.to_sym]
77
+ fields = fields.reverse if is_many
78
+ Hash[[fields]]
79
+ end
80
+
81
+ [tables, keys]
82
+ end
83
+
84
+ # Construct a SQL statement to fetch the data to populate this index
85
+ def index_sql(client, index, limit = nil)
86
+ # Get all the necessary fields
87
+ select = index_sql_select index
88
+
89
+ # Construct the join condition
90
+ tables, keys = index_sql_tables index
91
+
92
+ query = client[tables.first]
93
+ keys.map.with_index do |key, i|
94
+ query = query.join tables[i + 1], key
95
+ end
96
+
97
+ query = query.select(*select)
98
+ query = query.limit limit unless limit.nil?
99
+
100
+ @logger.debug { query.sql }
101
+ query
102
+ end
103
+ end
104
+ end
105
+ end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ module NoSE
4
+ # Loaders which insert data into indexes from external sources
5
+ module Loader
6
+ # Superclass for all data loaders
7
+ class LoaderBase
8
+ def initialize(workload = nil, backend = nil)
9
+ @workload = workload
10
+ @backend = backend
11
+ end
12
+
13
+ # :nocov:
14
+ # @abstract Subclasses should produce a workload
15
+ # @return [void]
16
+ def workload(_config)
17
+ fail NotImplementedError
18
+ end
19
+ # :nocov:
20
+
21
+ # :nocov:
22
+ # @abstract Subclasses should load data for the given list of indexes
23
+ # @return [void]
24
+ def load(_indexes, _config, _show_progress = false, _limit = nil,
25
+ _skip_existing = true)
26
+ fail NotImplementedError
27
+ end
28
+ # :nocov:
29
+
30
+ # @abstract Subclasses should generate a model from the external source
31
+ # :nocov:
32
+ def model(_config)
33
+ fail NotImplementedError
34
+ end
35
+ # :nocov:
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,136 @@
1
+ # frozen_string_literal: true
2
+
3
+ module NoSE
4
+ # A representation of an object in the conceptual data model
5
+ class Entity
6
+ attr_reader :fields
7
+ attr_reader :foreign_keys, :name
8
+ attr_accessor :count
9
+
10
+ def initialize(name, &block)
11
+ @name = name
12
+ @fields = {}
13
+ @foreign_keys = {}
14
+ @count = 1
15
+
16
+ # Precompute the hash
17
+ hash
18
+
19
+ # Apply the DSL
20
+ EntityDSL.new(self).instance_eval(&block) if block_given?
21
+ end
22
+
23
+ # :nocov:
24
+ # @return [String]
25
+ def to_color
26
+ "[light_blue]#{@name}[/] [#{fields.each_key.map(&:to_color).join ', '}]"
27
+ end
28
+ # :nocov:
29
+
30
+ # Compare by name
31
+ # @return [Boolean]
32
+ def ==(other)
33
+ @name == other.instance_variable_get(:@name)
34
+ end
35
+ alias eql? ==
36
+
37
+ # The hash is based on the name of the entity and its fields
38
+ # @return [Fixnum]
39
+ def hash
40
+ @hash ||= @name.hash
41
+ end
42
+
43
+ # Get the key fields for the entity
44
+ # @return [Fields::IDField>]
45
+ def id_field
46
+ fields.each_value.find(&:primary_key?)
47
+ end
48
+
49
+ # Adds a {Fields::Field} to the entity
50
+ # @return [self] the current entity to allow chaining
51
+ def <<(field, freeze: true)
52
+ if field.is_a? Fields::ForeignKeyField
53
+ @foreign_keys[field.name] = field
54
+ else
55
+ @fields[field.name] = field
56
+ end
57
+
58
+ field.instance_variable_set(:@parent, self)
59
+ field.hash
60
+ field.freeze if freeze
61
+
62
+ self
63
+ end
64
+
65
+ # Shortcut for {#count=}
66
+ # @return [Entity]
67
+ def *(other)
68
+ fail TypeError, 'count must be an integer' unless other.is_a? Integer
69
+ @count = other
70
+
71
+ self
72
+ end
73
+
74
+ # Get the field on the entity with the given name
75
+ # @return [Field]
76
+ def [](field_name)
77
+ field = @fields[field_name] || @foreign_keys[field_name]
78
+ fail FieldNotFound if field.nil?
79
+ field
80
+ end
81
+
82
+ # Return true if the entity contains a field with the given name
83
+ def field?(field)
84
+ @fields.key? field
85
+ end
86
+
87
+ # Generate a hash with random values for fields in the entity
88
+ # @return [Hash]
89
+ def random_entity(prefix_entity = true)
90
+ Hash[@fields.map do |name, field|
91
+ key = name
92
+ key = "#{@name}_#{name}" if prefix_entity
93
+ [key, field.random_value]
94
+ end]
95
+ end
96
+ end
97
+
98
+ # A helper class for DSL creation to avoid messing with {Entity}
99
+ class EntityDSL
100
+ def initialize(entity)
101
+ @entity = entity
102
+ end
103
+
104
+ # rubocop:disable MethodName
105
+
106
+ # Specify a list of field names for the primary key
107
+ def PrimaryKey(*names)
108
+ # Unset the old keys and set new ones,
109
+ # we dup because the fields are frozen
110
+ @entity.fields.each_value do |field|
111
+ next unless field.primary_key?
112
+ field = field.dup
113
+ field.primary_key = false
114
+ @entity.fields[field.name] = field
115
+ field.freeze
116
+ end
117
+
118
+ names.each do |name|
119
+ field = @entity[name].dup
120
+ field.primary_key = true
121
+ @entity.fields[name] = field
122
+ field.freeze
123
+ end
124
+ end
125
+
126
+ # rubocop:enable MethodName
127
+
128
+ def etc(size = 1)
129
+ @entity << Fields::HashField.new('**', size)
130
+ end
131
+ end
132
+
133
+ # Raised when looking up a field on an entity which does not exist
134
+ class FieldNotFound < StandardError
135
+ end
136
+ end