nose 0.1.0pre

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. checksums.yaml +7 -0
  2. data/lib/nose/backend/cassandra.rb +390 -0
  3. data/lib/nose/backend/file.rb +185 -0
  4. data/lib/nose/backend/mongo.rb +242 -0
  5. data/lib/nose/backend.rb +557 -0
  6. data/lib/nose/cost/cassandra.rb +33 -0
  7. data/lib/nose/cost/entity_count.rb +27 -0
  8. data/lib/nose/cost/field_size.rb +31 -0
  9. data/lib/nose/cost/request_count.rb +32 -0
  10. data/lib/nose/cost.rb +68 -0
  11. data/lib/nose/debug.rb +45 -0
  12. data/lib/nose/enumerator.rb +199 -0
  13. data/lib/nose/indexes.rb +239 -0
  14. data/lib/nose/loader/csv.rb +99 -0
  15. data/lib/nose/loader/mysql.rb +199 -0
  16. data/lib/nose/loader/random.rb +48 -0
  17. data/lib/nose/loader/sql.rb +105 -0
  18. data/lib/nose/loader.rb +38 -0
  19. data/lib/nose/model/entity.rb +136 -0
  20. data/lib/nose/model/fields.rb +293 -0
  21. data/lib/nose/model.rb +113 -0
  22. data/lib/nose/parser.rb +202 -0
  23. data/lib/nose/plans/execution_plan.rb +282 -0
  24. data/lib/nose/plans/filter.rb +99 -0
  25. data/lib/nose/plans/index_lookup.rb +302 -0
  26. data/lib/nose/plans/limit.rb +42 -0
  27. data/lib/nose/plans/query_planner.rb +361 -0
  28. data/lib/nose/plans/sort.rb +49 -0
  29. data/lib/nose/plans/update.rb +60 -0
  30. data/lib/nose/plans/update_planner.rb +270 -0
  31. data/lib/nose/plans.rb +135 -0
  32. data/lib/nose/proxy/mysql.rb +275 -0
  33. data/lib/nose/proxy.rb +102 -0
  34. data/lib/nose/query_graph.rb +481 -0
  35. data/lib/nose/random/barbasi_albert.rb +48 -0
  36. data/lib/nose/random/watts_strogatz.rb +50 -0
  37. data/lib/nose/random.rb +391 -0
  38. data/lib/nose/schema.rb +89 -0
  39. data/lib/nose/search/constraints.rb +143 -0
  40. data/lib/nose/search/problem.rb +328 -0
  41. data/lib/nose/search/results.rb +200 -0
  42. data/lib/nose/search.rb +266 -0
  43. data/lib/nose/serialize.rb +747 -0
  44. data/lib/nose/statements/connection.rb +160 -0
  45. data/lib/nose/statements/delete.rb +83 -0
  46. data/lib/nose/statements/insert.rb +146 -0
  47. data/lib/nose/statements/query.rb +161 -0
  48. data/lib/nose/statements/update.rb +101 -0
  49. data/lib/nose/statements.rb +645 -0
  50. data/lib/nose/timing.rb +79 -0
  51. data/lib/nose/util.rb +305 -0
  52. data/lib/nose/workload.rb +244 -0
  53. data/lib/nose.rb +37 -0
  54. data/templates/workload.erb +42 -0
  55. metadata +700 -0
data/lib/nose/cost.rb ADDED
@@ -0,0 +1,68 @@
1
+ # frozen_string_literal: true
2
+
3
+ module NoSE
4
+ # Cost models for steps of backend statement excution
5
+ module Cost
6
+ # Cost model for a backend database
7
+ class Cost
8
+ include Supertype
9
+
10
+ def initialize(**options)
11
+ @options = options
12
+ end
13
+
14
+ # The cost of filtering intermediate results
15
+ # @return [Fixnum]
16
+ def filter_cost(_step)
17
+ # Assume this has no cost and the cost is captured in the fact that we
18
+ # have to retrieve more data earlier. All this does is skip records.
19
+ 0
20
+ end
21
+
22
+ # The cost of limiting a result set
23
+ # @return [Fixnum]
24
+ def limit_cost(_step)
25
+ # This is basically free since we just discard data
26
+ 0
27
+ end
28
+
29
+ # The cost of sorting a set of results
30
+ # @return [Fixnum]
31
+ def sort_cost(_step)
32
+ # TODO: Find some estimate of sort cost
33
+ # This could be partially captured by the fact that sort + limit
34
+ # effectively removes the limit
35
+ 1
36
+ end
37
+
38
+ # The cost of performing a lookup via an index
39
+ # @return [Fixnum]
40
+ def index_lookup_cost(_step)
41
+ fail NotImplementedError, 'Must be implemented in a subclass'
42
+ end
43
+
44
+ # The cost of performing a deletion from an index
45
+ # @return [Fixnum]
46
+ def delete_cost(_step)
47
+ fail NotImplementedError, 'Must be implemented in a subclass'
48
+ end
49
+
50
+ # The cost of performing an insert into an index
51
+ # @return [Fixnum]
52
+ def insert_cost(_step)
53
+ fail NotImplementedError, 'Must be implemented in a subclass'
54
+ end
55
+
56
+ # This is here for debugging purposes because we need a cost
57
+ # @return [Fixnum]
58
+ def pruned_cost(_step)
59
+ 0
60
+ end
61
+ end
62
+ end
63
+ end
64
+
65
+ require_relative 'cost/cassandra'
66
+ require_relative 'cost/entity_count'
67
+ require_relative 'cost/field_size'
68
+ require_relative 'cost/request_count'
data/lib/nose/debug.rb ADDED
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+ # rubocop:disable Lint/HandleExceptions
3
+ begin
4
+ require 'binding_of_caller'
5
+ require 'pry'
6
+ rescue LoadError
7
+ # Ignore in case we are not in development mode
8
+ end
9
+ # rubocop:enable Lint/HandleExceptions
10
+
11
+ module NoSE
12
+ # Various helpful debugging snippets
13
+ module Debug
14
+ # Convenience method to break in IndexLookupStep when
15
+ # a particular set of indexes is reach when planning
16
+ # @return [void]
17
+ def self.break_on_indexes(*index_keys)
18
+ apply = binding.of_caller(1)
19
+ parent = apply.eval 'parent'
20
+ index = apply.eval 'index'
21
+ current_keys = parent.parent_steps.indexes.map(&:key) << index.key
22
+
23
+ # rubocop:disable Lint/Debugger
24
+ binding.pry if current_keys == index_keys
25
+ # rubocop:enable Lint/Debugger
26
+ end
27
+
28
+ # Export entities in a model as global
29
+ # variales for easier access when debugging
30
+ # @return [void]
31
+ def self.export_model(model)
32
+ model.entities.each do |name, entity|
33
+ # rubocop:disable Lint/Eval
34
+ eval("$#{name} = entity")
35
+ # rubocop:enable Lint/Eval
36
+
37
+ entity.fields.merge(entity.foreign_keys).each do |field_name, field|
38
+ entity.define_singleton_method field_name.to_sym, -> { field }
39
+ end
40
+ end
41
+
42
+ nil
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,199 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'logging'
4
+
5
+ module NoSE
6
+ # Produces potential indices to be used in schemas
7
+ class IndexEnumerator
8
+ def initialize(workload)
9
+ @logger = Logging.logger['nose::enumerator']
10
+
11
+ @workload = workload
12
+ end
13
+
14
+ # Produce all possible indices for a given query
15
+ # @return [Array<Index>]
16
+ def indexes_for_query(query)
17
+ @logger.debug "Enumerating indexes for query #{query.text}"
18
+
19
+ range = if query.range_field.nil?
20
+ query.order
21
+ else
22
+ [query.range_field] + query.order
23
+ end
24
+
25
+ eq = query.eq_fields.group_by(&:parent)
26
+ eq.default_proc = ->(*) { [] }
27
+
28
+ range = range.group_by(&:parent)
29
+ range.default_proc = ->(*) { [] }
30
+
31
+ query.graph.subgraphs.flat_map do |graph|
32
+ indexes_for_graph graph, query.select, eq, range
33
+ end.uniq << query.materialize_view
34
+ end
35
+
36
+ # Produce all possible indices for a given workload
37
+ # @return [Set<Index>]
38
+ def indexes_for_workload(additional_indexes = [], by_id_graph = false)
39
+ queries = @workload.queries
40
+ indexes = Parallel.map(queries) do |query|
41
+ indexes_for_query(query).to_a
42
+ end.inject(additional_indexes, &:+)
43
+
44
+ # Add indexes generated for support queries
45
+ supporting = support_indexes indexes, by_id_graph
46
+ supporting += support_indexes supporting, by_id_graph
47
+ indexes += supporting
48
+
49
+ # Deduplicate indexes, combine them and deduplicate again
50
+ indexes.uniq!
51
+ combine_indexes indexes
52
+ indexes.uniq!
53
+
54
+ @logger.debug do
55
+ "Indexes for workload:\n" + indexes.map.with_index do |index, i|
56
+ "#{i} #{index.inspect}"
57
+ end.join("\n")
58
+ end
59
+
60
+ indexes
61
+ end
62
+
63
+ private
64
+
65
+ # Produce the indexes necessary for support queries for these indexes
66
+ # @return [Array<Index>]
67
+ def support_indexes(indexes, by_id_graph)
68
+ # If indexes are grouped by ID graph, convert them before updating
69
+ # since other updates will be managed automatically by index maintenance
70
+ indexes = indexes.map(&:to_id_graph).uniq if by_id_graph
71
+
72
+ # Collect all possible support queries
73
+ queries = indexes.flat_map do |index|
74
+ @workload.updates.flat_map do |update|
75
+ update.support_queries(index)
76
+ end
77
+ end
78
+
79
+ # Enumerate indexes for each support query
80
+ queries.uniq!
81
+ queries.flat_map do |query|
82
+ indexes_for_query(query).to_a
83
+ end
84
+ end
85
+
86
+ # Combine the data of indices based on matching hash fields
87
+ def combine_indexes(indexes)
88
+ no_order_indexes = indexes.select do |index|
89
+ index.order_fields.empty?
90
+ end
91
+ no_order_indexes = no_order_indexes.group_by do |index|
92
+ [index.hash_fields, index.graph]
93
+ end
94
+
95
+ no_order_indexes.each do |(hash_fields, graph), hash_indexes|
96
+ extra_choices = hash_indexes.map(&:extra).uniq
97
+
98
+ # XXX More combos?
99
+ combos = extra_choices.combination(2)
100
+
101
+ combos.map do |combo|
102
+ indexes << Index.new(hash_fields, [], combo.inject(Set.new, &:+),
103
+ graph)
104
+ @logger.debug "Enumerated combined index #{indexes.last.inspect}"
105
+ end
106
+ end
107
+ end
108
+
109
+ # Get all possible choices of fields to use for equality
110
+ # @return [Array<Array>]
111
+ def eq_choices(graph, eq)
112
+ entity_choices = graph.entities.flat_map do |entity|
113
+ # Get the fields for the entity and add in the IDs
114
+ entity_fields = eq[entity] << entity.id_field
115
+ entity_fields.uniq!
116
+ 1.upto(entity_fields.count).flat_map do |n|
117
+ entity_fields.permutation(n).to_a
118
+ end
119
+ end
120
+
121
+ 2.upto(graph.entities.length).flat_map do |n|
122
+ entity_choices.permutation(n).map(&:flatten).to_a
123
+ end + entity_choices
124
+ end
125
+
126
+ # Get fields which should be included in an index for the given graph
127
+ # @return [Array<Array>]
128
+ def extra_choices(graph, select, eq, range)
129
+ choices = eq.values + range.values << select.to_a
130
+
131
+ choices.each do |choice|
132
+ choice.select { |field| graph.entities.include?(field.parent) }
133
+ end
134
+
135
+ choices.reject(&:empty?) << []
136
+ end
137
+
138
+ # Get all possible indices which jump a given piece of a query graph
139
+ # @return [Array<Index>]
140
+ def indexes_for_graph(graph, select, eq, range)
141
+ eq_choices = eq_choices graph, eq
142
+ range_fields = graph.entities.map { |entity| range[entity] }.reduce(&:+)
143
+ range_fields.uniq!
144
+ order_choices = range_fields.prefixes.flat_map do |fields|
145
+ fields.permutation.to_a
146
+ end.uniq << []
147
+ extra_choices = extra_choices graph, select, eq, range
148
+ extra_choices = 1.upto(extra_choices.length).flat_map do |n|
149
+ extra_choices.combination(n).map(&:flatten).map(&:uniq)
150
+ end.uniq
151
+
152
+ # Generate all possible indices based on the field choices
153
+ choices = eq_choices.product(extra_choices)
154
+ indexes = choices.map! do |index, extra|
155
+ indexes = []
156
+
157
+ order_choices.each do |order|
158
+ # Append the primary key of the entities in the graph if needed
159
+ order += graph.entities.sort_by(&:name).map(&:id_field) -
160
+ (index + order)
161
+
162
+ # Partition into the ordering portion
163
+ index.partitions.each do |index_prefix, order_prefix|
164
+ hash_fields = index_prefix.take_while do |field|
165
+ field.parent == index.first.parent
166
+ end
167
+ order_fields = index_prefix[hash_fields.length..-1] + \
168
+ order_prefix + order
169
+ extra_fields = extra - hash_fields - order_fields
170
+ next if order_fields.empty? && extra_fields.empty?
171
+
172
+ new_index = generate_index hash_fields, order_fields, extra_fields,
173
+ graph
174
+ indexes << new_index unless new_index.nil?
175
+ end
176
+ end
177
+
178
+ indexes
179
+ end.inject([], &:+)
180
+ indexes.flatten!
181
+
182
+ indexes
183
+ end
184
+
185
+ # Generate a new index and ignore if invalid
186
+ # @return [Index]
187
+ def generate_index(hash, order, extra, graph)
188
+ begin
189
+ index = Index.new hash, order.uniq, extra, graph
190
+ @logger.debug { "Enumerated #{index.inspect}" }
191
+ rescue InvalidIndexException
192
+ # This combination of fields is not valid, that's ok
193
+ index = nil
194
+ end
195
+
196
+ index
197
+ end
198
+ end
199
+ end
@@ -0,0 +1,239 @@
1
+ # frozen_string_literal: true
2
+
3
+ module NoSE
4
+ # A representation of materialized views over fields in an entity
5
+ class Index
6
+ attr_reader :hash_fields, :order_fields, :extra, :all_fields, :path,
7
+ :entries, :entry_size, :size, :hash_count, :per_hash_count,
8
+ :graph
9
+
10
+ def initialize(hash_fields, order_fields, extra, graph, saved_key = nil)
11
+ order_set = order_fields.to_set
12
+ @hash_fields = hash_fields.to_set
13
+ @order_fields = order_fields.delete_if { |e| hash_fields.include? e }
14
+ @extra = extra.to_set.delete_if do |e|
15
+ @hash_fields.include?(e) || order_set.include?(e)
16
+ end
17
+ @all_fields = Set.new(@hash_fields).merge(order_set).merge(@extra)
18
+
19
+ validate_hash_fields
20
+
21
+ # Store whether this index is an identity
22
+ @identity = @hash_fields == [
23
+ @hash_fields.first.parent.id_field
24
+ ].to_set && graph.nodes.size == 1
25
+
26
+ @graph = graph
27
+ @path = graph.longest_path
28
+ @path = nil unless @path.length == graph.size
29
+
30
+ validate_graph
31
+
32
+ build_hash saved_key
33
+ end
34
+
35
+ # Check if this index maps from the primary key to fields from one entity
36
+ # @return [Boolean]
37
+ def identity?
38
+ @identity
39
+ end
40
+
41
+ # A simple key which uniquely identifies the index
42
+ # @return [String]
43
+ def key
44
+ @key ||= "i#{Zlib.crc32 hash_str}"
45
+ end
46
+
47
+ # Look up a field in the index based on its ID
48
+ # @return [Fields::Field]
49
+ def [](field_id)
50
+ @all_fields.find { |field| field.id == field_id }
51
+ end
52
+
53
+ # Check if this index is an ID graph
54
+ # @return [Boolean]
55
+ def id_graph?
56
+ @hash_fields.all?(&:primary_key?) && @order_fields.all?(&:primary_key)
57
+ end
58
+
59
+ # Produce an index with the same fields but keyed by entities in the graph
60
+ def to_id_graph
61
+ return self if id_graph?
62
+
63
+ all_ids = (@hash_fields.to_a + @order_fields + @extra.to_a)
64
+ all_ids.map! { |f| f.parent.id_field }.uniq!
65
+
66
+ hash_fields = [all_ids.first]
67
+ order_fields = all_ids[1..-1]
68
+ extra = @all_fields - hash_fields - order_fields
69
+
70
+ Index.new hash_fields, order_fields, extra, @graph
71
+ end
72
+
73
+ # :nocov:
74
+ def to_color
75
+ fields = [@hash_fields, @order_fields, @extra].map do |field_group|
76
+ '[' + field_group.map(&:inspect).join(', ') + ']'
77
+ end
78
+
79
+ "[magenta]#{key}[/] #{fields[0]} #{fields[1]} → #{fields[2]}" \
80
+ " [yellow]$#{size}[/]" \
81
+ " [magenta]#{@graph.inspect}[/]"
82
+ end
83
+ # :nocov:
84
+
85
+ # Two indices are equal if they contain the same fields
86
+ # @return [Boolean]
87
+ def ==(other)
88
+ hash == other.hash
89
+ end
90
+ alias eql? ==
91
+
92
+ # Hash based on the fields, their keys, and the graph
93
+ # @return [String]
94
+ def hash_str
95
+ @hash_str ||= [
96
+ @hash_fields.map(&:id).sort!,
97
+ @order_fields.map(&:id),
98
+ @extra.map(&:id).sort!,
99
+ @graph.unique_edges.map(&:canonical_params).sort!
100
+ ].to_s.freeze
101
+ end
102
+
103
+ def hash
104
+ @hash ||= Zlib.crc32 hash_str
105
+ end
106
+
107
+ # Check if the index contains a given field
108
+ # @return [Boolean]
109
+ def contains_field?(field)
110
+ @all_fields.include? field
111
+ end
112
+
113
+ private
114
+
115
+ # Initialize the hash function and freeze ourselves
116
+ # @return [void]
117
+ def build_hash(saved_key)
118
+ @key = saved_key
119
+
120
+ hash
121
+ key
122
+ calculate_size
123
+ freeze
124
+ end
125
+
126
+ # Check for valid hash fields in an index
127
+ # @return [void]
128
+ def validate_hash_fields
129
+ fail InvalidIndexException, 'hash fields cannot be empty' \
130
+ if @hash_fields.empty?
131
+
132
+ fail InvalidIndexException, 'hash fields can only involve one entity' \
133
+ if @hash_fields.map(&:parent).to_set.size > 1
134
+ end
135
+
136
+ # Ensure an index is nonempty
137
+ # @return [void]
138
+ def validate_nonempty
139
+ fail InvalidIndexException, 'must have fields other than hash fields' \
140
+ if @order_fields.empty? && @extra.empty?
141
+ end
142
+
143
+ # Ensure an index and its fields correspond to a valid graph
144
+ # @return [void]
145
+ def validate_graph
146
+ validate_graph_entities
147
+ validate_graph_keys
148
+ end
149
+
150
+ # Ensure the graph of the index is valid
151
+ # @return [void]
152
+ def validate_graph_entities
153
+ entities = @all_fields.map(&:parent).to_set
154
+ fail InvalidIndexException, 'graph entities do match index' \
155
+ unless entities == @graph.entities.to_set
156
+ end
157
+
158
+ # We must have the primary keys of the all entities in the graph
159
+ # @return [void]
160
+ def validate_graph_keys
161
+ fail InvalidIndexException, 'missing graph entity keys' \
162
+ unless @graph.entities.map(&:id_field).all? do |field|
163
+ @hash_fields.include?(field) || @order_fields.include?(field)
164
+ end
165
+ end
166
+
167
+ # Precalculate the size of the index
168
+ # @return [void]
169
+ def calculate_size
170
+ @hash_count = @hash_fields.product_by(&:cardinality)
171
+
172
+ # XXX This only works if foreign keys span all possible keys
173
+ # Take the maximum possible count at each join and multiply
174
+ @entries = @graph.entities.map(&:count).max
175
+ @per_hash_count = (@entries * 1.0 / @hash_count)
176
+
177
+ @entry_size = @all_fields.sum_by(&:size)
178
+ @size = @entries * @entry_size
179
+ end
180
+ end
181
+
182
+ # Thrown when something tries to create an invalid index
183
+ class InvalidIndexException < StandardError
184
+ end
185
+
186
+ # Allow entities to create their own indices
187
+ class Entity
188
+ # Create a simple index which maps entity keys to other fields
189
+ # @return [Index]
190
+ def simple_index
191
+ Index.new [id_field], [], fields.values - [id_field],
192
+ QueryGraph::Graph.from_path([id_field]), name
193
+ end
194
+ end
195
+
196
+ # Allow statements to materialize views
197
+ class Statement
198
+ # Construct an index which acts as a materialized view for a query
199
+ # @return [Index]
200
+ def materialize_view
201
+ eq = materialized_view_eq join_order.first
202
+ order_fields = materialized_view_order(join_order.first) - eq
203
+
204
+ Index.new(eq, order_fields,
205
+ all_fields - (@eq_fields + @order).to_set, @graph)
206
+ end
207
+
208
+ private
209
+
210
+ # Get the fields used as parition keys for a materialized view
211
+ # based over a given entity
212
+ # @return [Array<Fields::Field>]
213
+ def materialized_view_eq(hash_entity)
214
+ eq = @eq_fields.select { |field| field.parent == hash_entity }
215
+ eq = [join_order.last.id_field] if eq.empty?
216
+
217
+ eq
218
+ end
219
+
220
+ # Get the ordered keys for a materialized view
221
+ # @return [Array<Fields::Field>]
222
+ def materialized_view_order(hash_entity)
223
+ # Start the ordered fields with the equality predicates
224
+ # on other entities, followed by all of the attributes
225
+ # used in ordering, then the range field
226
+ order_fields = @eq_fields.select do |field|
227
+ field.parent != hash_entity
228
+ end + @order
229
+ if @range_field && !@order.include?(@range_field)
230
+ order_fields << @range_field
231
+ end
232
+
233
+ # Ensure we include IDs of the final entity
234
+ order_fields += join_order.map(&:id_field)
235
+
236
+ order_fields.uniq
237
+ end
238
+ end
239
+ end
@@ -0,0 +1,99 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'formatador'
4
+ require 'smarter_csv'
5
+ require 'zlib'
6
+
7
+ module NoSE
8
+ module Loader
9
+ # Load data into an index from a set of CSV files
10
+ class CsvLoader < LoaderBase
11
+ def initialize(workload = nil, backend = nil)
12
+ super
13
+
14
+ @logger = Logging.logger['nose::loader::csvloader']
15
+ end
16
+
17
+ # Load data for all the indexes
18
+ def load(indexes, config, show_progress = false, limit = nil,
19
+ skip_existing = true)
20
+ indexes.map!(&:to_id_graph).uniq! if @backend.by_id_graph
21
+
22
+ simple_indexes = find_simple_indexes indexes, skip_existing
23
+ simple_indexes.each do |entity, simple_index_list|
24
+ filename = File.join config[:directory], "#{entity.name}.csv"
25
+ total_rows = (limit || 0) - 1 # account for header row
26
+ File.open(filename) { |file| file.each_line { total_rows += 1 } }
27
+
28
+ progress = initialize_progress entity, simple_index_list,
29
+ total_rows if show_progress
30
+ load_file_indexes filename, entity, simple_index_list, progress
31
+ end
32
+ end
33
+
34
+ private
35
+
36
+ # Find the simple indexes we should populate
37
+ # @return [Hash<Entity, Index>]
38
+ def find_simple_indexes(indexes, skip_existing)
39
+ simple_indexes = indexes.select do |index|
40
+ index.graph.size == 1 &&
41
+ !(skip_existing && !@backend.index_empty?(index))
42
+ end
43
+
44
+ simple_indexes.group_by do |index|
45
+ index.hash_fields.first.parent
46
+ end
47
+ end
48
+
49
+ # Initialize a progress bar to reporting loading results
50
+ # @return [Formatador::ProgressBar]
51
+ def initialize_progress(entity, simple_index_list, total_rows)
52
+ @logger.info "Loading simple indexes for #{entity.name}"
53
+ @logger.info simple_index_list.map(&:key).join(', ')
54
+
55
+ Formatador.new.redisplay_progressbar 0, total_rows
56
+ Formatador::ProgressBar.new total_rows, started_at: Time.now.utc
57
+ end
58
+
59
+ # Load all indexes for a given file
60
+ # @return [void]
61
+ def load_file_indexes(filename, entity, simple_index_list, progress)
62
+ SmarterCSV.process(filename,
63
+ downcase_header: false,
64
+ chunk_size: 1000,
65
+ convert_values_to_numeric: false) do |chunk|
66
+ Parallel.each(chunk.each_slice(100),
67
+ finish: (lambda do |_, _, _|
68
+ next if progress.nil?
69
+ inc = [progress.total - progress.current, 100].min
70
+ progress.increment inc
71
+ end)) do |minichunk|
72
+ load_simple_chunk minichunk, entity, simple_index_list
73
+ end
74
+ end
75
+ end
76
+
77
+ # Load a chunk of data from a simple entity index
78
+ # @return [void]
79
+ def load_simple_chunk(chunk, entity, indexes)
80
+ # Prefix all hash keys with the entity name and convert values
81
+ chunk.map! do |row|
82
+ index_row = {}
83
+ row.each_key do |key|
84
+ field_class = entity[key.to_s].class
85
+ value = field_class.value_from_string row[key]
86
+ index_row["#{entity.name}_#{key}"] = value
87
+ end
88
+
89
+ index_row
90
+ end
91
+
92
+ # Insert the batch into the index
93
+ indexes.each do |index|
94
+ @backend.index_insert_chunk index, chunk
95
+ end
96
+ end
97
+ end
98
+ end
99
+ end