nose 0.1.0pre

Sign up to get free protection for your applications and to get access to all the features.
Files changed (55) hide show
  1. checksums.yaml +7 -0
  2. data/lib/nose/backend/cassandra.rb +390 -0
  3. data/lib/nose/backend/file.rb +185 -0
  4. data/lib/nose/backend/mongo.rb +242 -0
  5. data/lib/nose/backend.rb +557 -0
  6. data/lib/nose/cost/cassandra.rb +33 -0
  7. data/lib/nose/cost/entity_count.rb +27 -0
  8. data/lib/nose/cost/field_size.rb +31 -0
  9. data/lib/nose/cost/request_count.rb +32 -0
  10. data/lib/nose/cost.rb +68 -0
  11. data/lib/nose/debug.rb +45 -0
  12. data/lib/nose/enumerator.rb +199 -0
  13. data/lib/nose/indexes.rb +239 -0
  14. data/lib/nose/loader/csv.rb +99 -0
  15. data/lib/nose/loader/mysql.rb +199 -0
  16. data/lib/nose/loader/random.rb +48 -0
  17. data/lib/nose/loader/sql.rb +105 -0
  18. data/lib/nose/loader.rb +38 -0
  19. data/lib/nose/model/entity.rb +136 -0
  20. data/lib/nose/model/fields.rb +293 -0
  21. data/lib/nose/model.rb +113 -0
  22. data/lib/nose/parser.rb +202 -0
  23. data/lib/nose/plans/execution_plan.rb +282 -0
  24. data/lib/nose/plans/filter.rb +99 -0
  25. data/lib/nose/plans/index_lookup.rb +302 -0
  26. data/lib/nose/plans/limit.rb +42 -0
  27. data/lib/nose/plans/query_planner.rb +361 -0
  28. data/lib/nose/plans/sort.rb +49 -0
  29. data/lib/nose/plans/update.rb +60 -0
  30. data/lib/nose/plans/update_planner.rb +270 -0
  31. data/lib/nose/plans.rb +135 -0
  32. data/lib/nose/proxy/mysql.rb +275 -0
  33. data/lib/nose/proxy.rb +102 -0
  34. data/lib/nose/query_graph.rb +481 -0
  35. data/lib/nose/random/barbasi_albert.rb +48 -0
  36. data/lib/nose/random/watts_strogatz.rb +50 -0
  37. data/lib/nose/random.rb +391 -0
  38. data/lib/nose/schema.rb +89 -0
  39. data/lib/nose/search/constraints.rb +143 -0
  40. data/lib/nose/search/problem.rb +328 -0
  41. data/lib/nose/search/results.rb +200 -0
  42. data/lib/nose/search.rb +266 -0
  43. data/lib/nose/serialize.rb +747 -0
  44. data/lib/nose/statements/connection.rb +160 -0
  45. data/lib/nose/statements/delete.rb +83 -0
  46. data/lib/nose/statements/insert.rb +146 -0
  47. data/lib/nose/statements/query.rb +161 -0
  48. data/lib/nose/statements/update.rb +101 -0
  49. data/lib/nose/statements.rb +645 -0
  50. data/lib/nose/timing.rb +79 -0
  51. data/lib/nose/util.rb +305 -0
  52. data/lib/nose/workload.rb +244 -0
  53. data/lib/nose.rb +37 -0
  54. data/templates/workload.erb +42 -0
  55. metadata +700 -0
data/lib/nose/cost.rb ADDED
@@ -0,0 +1,68 @@
1
+ # frozen_string_literal: true
2
+
3
+ module NoSE
4
+ # Cost models for steps of backend statement excution
5
+ module Cost
6
+ # Cost model for a backend database
7
+ class Cost
8
+ include Supertype
9
+
10
+ def initialize(**options)
11
+ @options = options
12
+ end
13
+
14
+ # The cost of filtering intermediate results
15
+ # @return [Fixnum]
16
+ def filter_cost(_step)
17
+ # Assume this has no cost and the cost is captured in the fact that we
18
+ # have to retrieve more data earlier. All this does is skip records.
19
+ 0
20
+ end
21
+
22
+ # The cost of limiting a result set
23
+ # @return [Fixnum]
24
+ def limit_cost(_step)
25
+ # This is basically free since we just discard data
26
+ 0
27
+ end
28
+
29
+ # The cost of sorting a set of results
30
+ # @return [Fixnum]
31
+ def sort_cost(_step)
32
+ # TODO: Find some estimate of sort cost
33
+ # This could be partially captured by the fact that sort + limit
34
+ # effectively removes the limit
35
+ 1
36
+ end
37
+
38
+ # The cost of performing a lookup via an index
39
+ # @return [Fixnum]
40
+ def index_lookup_cost(_step)
41
+ fail NotImplementedError, 'Must be implemented in a subclass'
42
+ end
43
+
44
+ # The cost of performing a deletion from an index
45
+ # @return [Fixnum]
46
+ def delete_cost(_step)
47
+ fail NotImplementedError, 'Must be implemented in a subclass'
48
+ end
49
+
50
+ # The cost of performing an insert into an index
51
+ # @return [Fixnum]
52
+ def insert_cost(_step)
53
+ fail NotImplementedError, 'Must be implemented in a subclass'
54
+ end
55
+
56
+ # This is here for debugging purposes because we need a cost
57
+ # @return [Fixnum]
58
+ def pruned_cost(_step)
59
+ 0
60
+ end
61
+ end
62
+ end
63
+ end
64
+
65
+ require_relative 'cost/cassandra'
66
+ require_relative 'cost/entity_count'
67
+ require_relative 'cost/field_size'
68
+ require_relative 'cost/request_count'
data/lib/nose/debug.rb ADDED
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+ # rubocop:disable Lint/HandleExceptions
3
+ begin
4
+ require 'binding_of_caller'
5
+ require 'pry'
6
+ rescue LoadError
7
+ # Ignore in case we are not in development mode
8
+ end
9
+ # rubocop:enable Lint/HandleExceptions
10
+
11
+ module NoSE
12
+ # Various helpful debugging snippets
13
+ module Debug
14
+ # Convenience method to break in IndexLookupStep when
15
+ # a particular set of indexes is reach when planning
16
+ # @return [void]
17
+ def self.break_on_indexes(*index_keys)
18
+ apply = binding.of_caller(1)
19
+ parent = apply.eval 'parent'
20
+ index = apply.eval 'index'
21
+ current_keys = parent.parent_steps.indexes.map(&:key) << index.key
22
+
23
+ # rubocop:disable Lint/Debugger
24
+ binding.pry if current_keys == index_keys
25
+ # rubocop:enable Lint/Debugger
26
+ end
27
+
28
+ # Export entities in a model as global
29
+ # variales for easier access when debugging
30
+ # @return [void]
31
+ def self.export_model(model)
32
+ model.entities.each do |name, entity|
33
+ # rubocop:disable Lint/Eval
34
+ eval("$#{name} = entity")
35
+ # rubocop:enable Lint/Eval
36
+
37
+ entity.fields.merge(entity.foreign_keys).each do |field_name, field|
38
+ entity.define_singleton_method field_name.to_sym, -> { field }
39
+ end
40
+ end
41
+
42
+ nil
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,199 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'logging'
4
+
5
+ module NoSE
6
+ # Produces potential indices to be used in schemas
7
+ class IndexEnumerator
8
+ def initialize(workload)
9
+ @logger = Logging.logger['nose::enumerator']
10
+
11
+ @workload = workload
12
+ end
13
+
14
+ # Produce all possible indices for a given query
15
+ # @return [Array<Index>]
16
+ def indexes_for_query(query)
17
+ @logger.debug "Enumerating indexes for query #{query.text}"
18
+
19
+ range = if query.range_field.nil?
20
+ query.order
21
+ else
22
+ [query.range_field] + query.order
23
+ end
24
+
25
+ eq = query.eq_fields.group_by(&:parent)
26
+ eq.default_proc = ->(*) { [] }
27
+
28
+ range = range.group_by(&:parent)
29
+ range.default_proc = ->(*) { [] }
30
+
31
+ query.graph.subgraphs.flat_map do |graph|
32
+ indexes_for_graph graph, query.select, eq, range
33
+ end.uniq << query.materialize_view
34
+ end
35
+
36
+ # Produce all possible indices for a given workload
37
+ # @return [Set<Index>]
38
+ def indexes_for_workload(additional_indexes = [], by_id_graph = false)
39
+ queries = @workload.queries
40
+ indexes = Parallel.map(queries) do |query|
41
+ indexes_for_query(query).to_a
42
+ end.inject(additional_indexes, &:+)
43
+
44
+ # Add indexes generated for support queries
45
+ supporting = support_indexes indexes, by_id_graph
46
+ supporting += support_indexes supporting, by_id_graph
47
+ indexes += supporting
48
+
49
+ # Deduplicate indexes, combine them and deduplicate again
50
+ indexes.uniq!
51
+ combine_indexes indexes
52
+ indexes.uniq!
53
+
54
+ @logger.debug do
55
+ "Indexes for workload:\n" + indexes.map.with_index do |index, i|
56
+ "#{i} #{index.inspect}"
57
+ end.join("\n")
58
+ end
59
+
60
+ indexes
61
+ end
62
+
63
+ private
64
+
65
+ # Produce the indexes necessary for support queries for these indexes
66
+ # @return [Array<Index>]
67
+ def support_indexes(indexes, by_id_graph)
68
+ # If indexes are grouped by ID graph, convert them before updating
69
+ # since other updates will be managed automatically by index maintenance
70
+ indexes = indexes.map(&:to_id_graph).uniq if by_id_graph
71
+
72
+ # Collect all possible support queries
73
+ queries = indexes.flat_map do |index|
74
+ @workload.updates.flat_map do |update|
75
+ update.support_queries(index)
76
+ end
77
+ end
78
+
79
+ # Enumerate indexes for each support query
80
+ queries.uniq!
81
+ queries.flat_map do |query|
82
+ indexes_for_query(query).to_a
83
+ end
84
+ end
85
+
86
+ # Combine the data of indices based on matching hash fields
87
+ def combine_indexes(indexes)
88
+ no_order_indexes = indexes.select do |index|
89
+ index.order_fields.empty?
90
+ end
91
+ no_order_indexes = no_order_indexes.group_by do |index|
92
+ [index.hash_fields, index.graph]
93
+ end
94
+
95
+ no_order_indexes.each do |(hash_fields, graph), hash_indexes|
96
+ extra_choices = hash_indexes.map(&:extra).uniq
97
+
98
+ # XXX More combos?
99
+ combos = extra_choices.combination(2)
100
+
101
+ combos.map do |combo|
102
+ indexes << Index.new(hash_fields, [], combo.inject(Set.new, &:+),
103
+ graph)
104
+ @logger.debug "Enumerated combined index #{indexes.last.inspect}"
105
+ end
106
+ end
107
+ end
108
+
109
+ # Get all possible choices of fields to use for equality
110
+ # @return [Array<Array>]
111
+ def eq_choices(graph, eq)
112
+ entity_choices = graph.entities.flat_map do |entity|
113
+ # Get the fields for the entity and add in the IDs
114
+ entity_fields = eq[entity] << entity.id_field
115
+ entity_fields.uniq!
116
+ 1.upto(entity_fields.count).flat_map do |n|
117
+ entity_fields.permutation(n).to_a
118
+ end
119
+ end
120
+
121
+ 2.upto(graph.entities.length).flat_map do |n|
122
+ entity_choices.permutation(n).map(&:flatten).to_a
123
+ end + entity_choices
124
+ end
125
+
126
+ # Get fields which should be included in an index for the given graph
127
+ # @return [Array<Array>]
128
+ def extra_choices(graph, select, eq, range)
129
+ choices = eq.values + range.values << select.to_a
130
+
131
+ choices.each do |choice|
132
+ choice.select { |field| graph.entities.include?(field.parent) }
133
+ end
134
+
135
+ choices.reject(&:empty?) << []
136
+ end
137
+
138
+ # Get all possible indices which jump a given piece of a query graph
139
+ # @return [Array<Index>]
140
+ def indexes_for_graph(graph, select, eq, range)
141
+ eq_choices = eq_choices graph, eq
142
+ range_fields = graph.entities.map { |entity| range[entity] }.reduce(&:+)
143
+ range_fields.uniq!
144
+ order_choices = range_fields.prefixes.flat_map do |fields|
145
+ fields.permutation.to_a
146
+ end.uniq << []
147
+ extra_choices = extra_choices graph, select, eq, range
148
+ extra_choices = 1.upto(extra_choices.length).flat_map do |n|
149
+ extra_choices.combination(n).map(&:flatten).map(&:uniq)
150
+ end.uniq
151
+
152
+ # Generate all possible indices based on the field choices
153
+ choices = eq_choices.product(extra_choices)
154
+ indexes = choices.map! do |index, extra|
155
+ indexes = []
156
+
157
+ order_choices.each do |order|
158
+ # Append the primary key of the entities in the graph if needed
159
+ order += graph.entities.sort_by(&:name).map(&:id_field) -
160
+ (index + order)
161
+
162
+ # Partition into the ordering portion
163
+ index.partitions.each do |index_prefix, order_prefix|
164
+ hash_fields = index_prefix.take_while do |field|
165
+ field.parent == index.first.parent
166
+ end
167
+ order_fields = index_prefix[hash_fields.length..-1] + \
168
+ order_prefix + order
169
+ extra_fields = extra - hash_fields - order_fields
170
+ next if order_fields.empty? && extra_fields.empty?
171
+
172
+ new_index = generate_index hash_fields, order_fields, extra_fields,
173
+ graph
174
+ indexes << new_index unless new_index.nil?
175
+ end
176
+ end
177
+
178
+ indexes
179
+ end.inject([], &:+)
180
+ indexes.flatten!
181
+
182
+ indexes
183
+ end
184
+
185
+ # Generate a new index and ignore if invalid
186
+ # @return [Index]
187
+ def generate_index(hash, order, extra, graph)
188
+ begin
189
+ index = Index.new hash, order.uniq, extra, graph
190
+ @logger.debug { "Enumerated #{index.inspect}" }
191
+ rescue InvalidIndexException
192
+ # This combination of fields is not valid, that's ok
193
+ index = nil
194
+ end
195
+
196
+ index
197
+ end
198
+ end
199
+ end
@@ -0,0 +1,239 @@
1
+ # frozen_string_literal: true
2
+
3
+ module NoSE
4
+ # A representation of materialized views over fields in an entity
5
+ class Index
6
+ attr_reader :hash_fields, :order_fields, :extra, :all_fields, :path,
7
+ :entries, :entry_size, :size, :hash_count, :per_hash_count,
8
+ :graph
9
+
10
+ def initialize(hash_fields, order_fields, extra, graph, saved_key = nil)
11
+ order_set = order_fields.to_set
12
+ @hash_fields = hash_fields.to_set
13
+ @order_fields = order_fields.delete_if { |e| hash_fields.include? e }
14
+ @extra = extra.to_set.delete_if do |e|
15
+ @hash_fields.include?(e) || order_set.include?(e)
16
+ end
17
+ @all_fields = Set.new(@hash_fields).merge(order_set).merge(@extra)
18
+
19
+ validate_hash_fields
20
+
21
+ # Store whether this index is an identity
22
+ @identity = @hash_fields == [
23
+ @hash_fields.first.parent.id_field
24
+ ].to_set && graph.nodes.size == 1
25
+
26
+ @graph = graph
27
+ @path = graph.longest_path
28
+ @path = nil unless @path.length == graph.size
29
+
30
+ validate_graph
31
+
32
+ build_hash saved_key
33
+ end
34
+
35
+ # Check if this index maps from the primary key to fields from one entity
36
+ # @return [Boolean]
37
+ def identity?
38
+ @identity
39
+ end
40
+
41
+ # A simple key which uniquely identifies the index
42
+ # @return [String]
43
+ def key
44
+ @key ||= "i#{Zlib.crc32 hash_str}"
45
+ end
46
+
47
+ # Look up a field in the index based on its ID
48
+ # @return [Fields::Field]
49
+ def [](field_id)
50
+ @all_fields.find { |field| field.id == field_id }
51
+ end
52
+
53
+ # Check if this index is an ID graph
54
+ # @return [Boolean]
55
+ def id_graph?
56
+ @hash_fields.all?(&:primary_key?) && @order_fields.all?(&:primary_key)
57
+ end
58
+
59
+ # Produce an index with the same fields but keyed by entities in the graph
60
+ def to_id_graph
61
+ return self if id_graph?
62
+
63
+ all_ids = (@hash_fields.to_a + @order_fields + @extra.to_a)
64
+ all_ids.map! { |f| f.parent.id_field }.uniq!
65
+
66
+ hash_fields = [all_ids.first]
67
+ order_fields = all_ids[1..-1]
68
+ extra = @all_fields - hash_fields - order_fields
69
+
70
+ Index.new hash_fields, order_fields, extra, @graph
71
+ end
72
+
73
+ # :nocov:
74
+ def to_color
75
+ fields = [@hash_fields, @order_fields, @extra].map do |field_group|
76
+ '[' + field_group.map(&:inspect).join(', ') + ']'
77
+ end
78
+
79
+ "[magenta]#{key}[/] #{fields[0]} #{fields[1]} → #{fields[2]}" \
80
+ " [yellow]$#{size}[/]" \
81
+ " [magenta]#{@graph.inspect}[/]"
82
+ end
83
+ # :nocov:
84
+
85
+ # Two indices are equal if they contain the same fields
86
+ # @return [Boolean]
87
+ def ==(other)
88
+ hash == other.hash
89
+ end
90
+ alias eql? ==
91
+
92
+ # Hash based on the fields, their keys, and the graph
93
+ # @return [String]
94
+ def hash_str
95
+ @hash_str ||= [
96
+ @hash_fields.map(&:id).sort!,
97
+ @order_fields.map(&:id),
98
+ @extra.map(&:id).sort!,
99
+ @graph.unique_edges.map(&:canonical_params).sort!
100
+ ].to_s.freeze
101
+ end
102
+
103
+ def hash
104
+ @hash ||= Zlib.crc32 hash_str
105
+ end
106
+
107
+ # Check if the index contains a given field
108
+ # @return [Boolean]
109
+ def contains_field?(field)
110
+ @all_fields.include? field
111
+ end
112
+
113
+ private
114
+
115
+ # Initialize the hash function and freeze ourselves
116
+ # @return [void]
117
+ def build_hash(saved_key)
118
+ @key = saved_key
119
+
120
+ hash
121
+ key
122
+ calculate_size
123
+ freeze
124
+ end
125
+
126
+ # Check for valid hash fields in an index
127
+ # @return [void]
128
+ def validate_hash_fields
129
+ fail InvalidIndexException, 'hash fields cannot be empty' \
130
+ if @hash_fields.empty?
131
+
132
+ fail InvalidIndexException, 'hash fields can only involve one entity' \
133
+ if @hash_fields.map(&:parent).to_set.size > 1
134
+ end
135
+
136
+ # Ensure an index is nonempty
137
+ # @return [void]
138
+ def validate_nonempty
139
+ fail InvalidIndexException, 'must have fields other than hash fields' \
140
+ if @order_fields.empty? && @extra.empty?
141
+ end
142
+
143
+ # Ensure an index and its fields correspond to a valid graph
144
+ # @return [void]
145
+ def validate_graph
146
+ validate_graph_entities
147
+ validate_graph_keys
148
+ end
149
+
150
+ # Ensure the graph of the index is valid
151
+ # @return [void]
152
+ def validate_graph_entities
153
+ entities = @all_fields.map(&:parent).to_set
154
+ fail InvalidIndexException, 'graph entities do match index' \
155
+ unless entities == @graph.entities.to_set
156
+ end
157
+
158
+ # We must have the primary keys of the all entities in the graph
159
+ # @return [void]
160
+ def validate_graph_keys
161
+ fail InvalidIndexException, 'missing graph entity keys' \
162
+ unless @graph.entities.map(&:id_field).all? do |field|
163
+ @hash_fields.include?(field) || @order_fields.include?(field)
164
+ end
165
+ end
166
+
167
+ # Precalculate the size of the index
168
+ # @return [void]
169
+ def calculate_size
170
+ @hash_count = @hash_fields.product_by(&:cardinality)
171
+
172
+ # XXX This only works if foreign keys span all possible keys
173
+ # Take the maximum possible count at each join and multiply
174
+ @entries = @graph.entities.map(&:count).max
175
+ @per_hash_count = (@entries * 1.0 / @hash_count)
176
+
177
+ @entry_size = @all_fields.sum_by(&:size)
178
+ @size = @entries * @entry_size
179
+ end
180
+ end
181
+
182
+ # Thrown when something tries to create an invalid index
183
+ class InvalidIndexException < StandardError
184
+ end
185
+
186
+ # Allow entities to create their own indices
187
+ class Entity
188
+ # Create a simple index which maps entity keys to other fields
189
+ # @return [Index]
190
+ def simple_index
191
+ Index.new [id_field], [], fields.values - [id_field],
192
+ QueryGraph::Graph.from_path([id_field]), name
193
+ end
194
+ end
195
+
196
+ # Allow statements to materialize views
197
+ class Statement
198
+ # Construct an index which acts as a materialized view for a query
199
+ # @return [Index]
200
+ def materialize_view
201
+ eq = materialized_view_eq join_order.first
202
+ order_fields = materialized_view_order(join_order.first) - eq
203
+
204
+ Index.new(eq, order_fields,
205
+ all_fields - (@eq_fields + @order).to_set, @graph)
206
+ end
207
+
208
+ private
209
+
210
+ # Get the fields used as parition keys for a materialized view
211
+ # based over a given entity
212
+ # @return [Array<Fields::Field>]
213
+ def materialized_view_eq(hash_entity)
214
+ eq = @eq_fields.select { |field| field.parent == hash_entity }
215
+ eq = [join_order.last.id_field] if eq.empty?
216
+
217
+ eq
218
+ end
219
+
220
+ # Get the ordered keys for a materialized view
221
+ # @return [Array<Fields::Field>]
222
+ def materialized_view_order(hash_entity)
223
+ # Start the ordered fields with the equality predicates
224
+ # on other entities, followed by all of the attributes
225
+ # used in ordering, then the range field
226
+ order_fields = @eq_fields.select do |field|
227
+ field.parent != hash_entity
228
+ end + @order
229
+ if @range_field && !@order.include?(@range_field)
230
+ order_fields << @range_field
231
+ end
232
+
233
+ # Ensure we include IDs of the final entity
234
+ order_fields += join_order.map(&:id_field)
235
+
236
+ order_fields.uniq
237
+ end
238
+ end
239
+ end
@@ -0,0 +1,99 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'formatador'
4
+ require 'smarter_csv'
5
+ require 'zlib'
6
+
7
+ module NoSE
8
+ module Loader
9
+ # Load data into an index from a set of CSV files
10
+ class CsvLoader < LoaderBase
11
+ def initialize(workload = nil, backend = nil)
12
+ super
13
+
14
+ @logger = Logging.logger['nose::loader::csvloader']
15
+ end
16
+
17
+ # Load data for all the indexes
18
+ def load(indexes, config, show_progress = false, limit = nil,
19
+ skip_existing = true)
20
+ indexes.map!(&:to_id_graph).uniq! if @backend.by_id_graph
21
+
22
+ simple_indexes = find_simple_indexes indexes, skip_existing
23
+ simple_indexes.each do |entity, simple_index_list|
24
+ filename = File.join config[:directory], "#{entity.name}.csv"
25
+ total_rows = (limit || 0) - 1 # account for header row
26
+ File.open(filename) { |file| file.each_line { total_rows += 1 } }
27
+
28
+ progress = initialize_progress entity, simple_index_list,
29
+ total_rows if show_progress
30
+ load_file_indexes filename, entity, simple_index_list, progress
31
+ end
32
+ end
33
+
34
+ private
35
+
36
+ # Find the simple indexes we should populate
37
+ # @return [Hash<Entity, Index>]
38
+ def find_simple_indexes(indexes, skip_existing)
39
+ simple_indexes = indexes.select do |index|
40
+ index.graph.size == 1 &&
41
+ !(skip_existing && !@backend.index_empty?(index))
42
+ end
43
+
44
+ simple_indexes.group_by do |index|
45
+ index.hash_fields.first.parent
46
+ end
47
+ end
48
+
49
+ # Initialize a progress bar to reporting loading results
50
+ # @return [Formatador::ProgressBar]
51
+ def initialize_progress(entity, simple_index_list, total_rows)
52
+ @logger.info "Loading simple indexes for #{entity.name}"
53
+ @logger.info simple_index_list.map(&:key).join(', ')
54
+
55
+ Formatador.new.redisplay_progressbar 0, total_rows
56
+ Formatador::ProgressBar.new total_rows, started_at: Time.now.utc
57
+ end
58
+
59
+ # Load all indexes for a given file
60
+ # @return [void]
61
+ def load_file_indexes(filename, entity, simple_index_list, progress)
62
+ SmarterCSV.process(filename,
63
+ downcase_header: false,
64
+ chunk_size: 1000,
65
+ convert_values_to_numeric: false) do |chunk|
66
+ Parallel.each(chunk.each_slice(100),
67
+ finish: (lambda do |_, _, _|
68
+ next if progress.nil?
69
+ inc = [progress.total - progress.current, 100].min
70
+ progress.increment inc
71
+ end)) do |minichunk|
72
+ load_simple_chunk minichunk, entity, simple_index_list
73
+ end
74
+ end
75
+ end
76
+
77
+ # Load a chunk of data from a simple entity index
78
+ # @return [void]
79
+ def load_simple_chunk(chunk, entity, indexes)
80
+ # Prefix all hash keys with the entity name and convert values
81
+ chunk.map! do |row|
82
+ index_row = {}
83
+ row.each_key do |key|
84
+ field_class = entity[key.to_s].class
85
+ value = field_class.value_from_string row[key]
86
+ index_row["#{entity.name}_#{key}"] = value
87
+ end
88
+
89
+ index_row
90
+ end
91
+
92
+ # Insert the batch into the index
93
+ indexes.each do |index|
94
+ @backend.index_insert_chunk index, chunk
95
+ end
96
+ end
97
+ end
98
+ end
99
+ end