dataflow-rb 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. checksums.yaml +7 -0
  2. data/.env.test.example +6 -0
  3. data/.gitignore +14 -0
  4. data/.rspec +2 -0
  5. data/.travis.yml +4 -0
  6. data/Gemfile +4 -0
  7. data/LICENSE +21 -0
  8. data/README.md +46 -0
  9. data/Rakefile +6 -0
  10. data/bin/console +14 -0
  11. data/bin/setup +7 -0
  12. data/dataflow-rb.gemspec +42 -0
  13. data/lib/config/mongoid.yml +21 -0
  14. data/lib/dataflow/adapters/csv_adapter.rb +123 -0
  15. data/lib/dataflow/adapters/mongo_db_adapter.rb +307 -0
  16. data/lib/dataflow/adapters/mysql_adapter.rb +21 -0
  17. data/lib/dataflow/adapters/psql_adapter.rb +21 -0
  18. data/lib/dataflow/adapters/settings.rb +33 -0
  19. data/lib/dataflow/adapters/sql_adapter.rb +322 -0
  20. data/lib/dataflow/errors/invalid_configuration_error.rb +7 -0
  21. data/lib/dataflow/errors/not_implemented_error.rb +7 -0
  22. data/lib/dataflow/event_mixin.rb +77 -0
  23. data/lib/dataflow/extensions/mongo_driver.rb +21 -0
  24. data/lib/dataflow/extensions/msgpack.rb +19 -0
  25. data/lib/dataflow/logger.rb +27 -0
  26. data/lib/dataflow/node.rb +37 -0
  27. data/lib/dataflow/nodes/compute_node.rb +495 -0
  28. data/lib/dataflow/nodes/data_node.rb +331 -0
  29. data/lib/dataflow/nodes/export/to_csv_node.rb +54 -0
  30. data/lib/dataflow/nodes/filter/drop_while_node.rb +117 -0
  31. data/lib/dataflow/nodes/filter/newest_node.rb +66 -0
  32. data/lib/dataflow/nodes/filter/where_node.rb +44 -0
  33. data/lib/dataflow/nodes/join_node.rb +151 -0
  34. data/lib/dataflow/nodes/map_node.rb +50 -0
  35. data/lib/dataflow/nodes/merge_node.rb +33 -0
  36. data/lib/dataflow/nodes/mixin/add_internal_timestamp.rb +27 -0
  37. data/lib/dataflow/nodes/mixin/rename_dotted_fields.rb +63 -0
  38. data/lib/dataflow/nodes/select_keys_node.rb +39 -0
  39. data/lib/dataflow/nodes/snapshot_node.rb +77 -0
  40. data/lib/dataflow/nodes/sql_query_node.rb +50 -0
  41. data/lib/dataflow/nodes/transformation/to_time_node.rb +41 -0
  42. data/lib/dataflow/nodes/upsert_node.rb +68 -0
  43. data/lib/dataflow/properties_mixin.rb +35 -0
  44. data/lib/dataflow/schema_mixin.rb +134 -0
  45. data/lib/dataflow/version.rb +4 -0
  46. data/lib/dataflow-rb.rb +72 -0
  47. metadata +371 -0
@@ -0,0 +1,151 @@
1
+ # frozen_string_literal: true
2
+ module Dataflow
3
+ module Nodes
4
+ # Performs a join operation on 2 dependencies.
5
+ class JoinNode < ComputeNode
6
+ VALID_TYPES = %w(inner left).freeze
7
+ field :join_type, type: String, required_for_computing: true, values: VALID_TYPES, default: VALID_TYPES[0]
8
+ field :key1, type: String, required_for_computing: true
9
+ field :key2, type: String, required_for_computing: true
10
+ # Support joining on multiple keys by setting them in the other keys.
11
+ # other_keys_1 and 2 must match in length
12
+ field :other_keys1, type: Array, default: []
13
+ field :other_keys2, type: Array, default: []
14
+ field :prefix1, type: String, default: ''
15
+ field :prefix2, type: String, default: ''
16
+
17
+ ensure_data_node_exists
18
+ ensure_dependencies exactly: 2
19
+
20
+ def valid_for_computation?
21
+ # We need an equivalent number of keys as they will be matched with each others
22
+ if other_keys1.count != other_keys2.count
23
+ errors.add(:other_keys2, "#{self.class} other_keys2 must match other_keys1's length")
24
+ end
25
+
26
+ super
27
+ end
28
+
29
+ def required_schema
30
+ return {} unless dependencies.count == 2
31
+
32
+ # merge both dependencies schemas
33
+ sch = dependencies.first.schema || {}
34
+ sch.merge(dependencies.second.schema || {})
35
+ end
36
+
37
+ def compute_impl
38
+ all_same_postgresql = db_backend == :postgresql
39
+ all_same_postgresql &&= dependencies[1..-1].all? do |dep|
40
+ dep.db_backend == :postgresql && dep.db_name == db_name
41
+ end
42
+
43
+ if all_same_postgresql
44
+ # use SQL join
45
+ execute_sql_join
46
+ self.updated_at = Time.now
47
+ else
48
+ # use software join
49
+ super
50
+ end
51
+ end
52
+
53
+ private
54
+
55
+ def execute_sql_join
56
+ fields = required_schema.keys
57
+ select_keys = dependencies[0].schema.keys.map { |x| "d1.#{x}" } + (dependencies[1].schema.keys - dependencies[0].schema.keys).map { |x| "d2.#{x}" }
58
+ query = "INSERT INTO #{write_dataset_name} (#{fields.join(',')})
59
+ SELECT #{select_keys.join(', ')}
60
+ FROM #{dependencies[0].read_dataset_name} as d1
61
+ INNER JOIN #{dependencies[1].read_dataset_name} as d2
62
+ ON d1.#{key1} = d2.#{key2}"
63
+ p query
64
+ db_adapter.client[query].to_a
65
+ end
66
+
67
+ def compute_batch(records:)
68
+ join(n1_records: records)
69
+ end
70
+
71
+ def join(n1_records:)
72
+ tokens_key1 = record_dig_tokens(key: key1, use_sym: dependencies.first.use_symbols?)
73
+ tokens_key2 = record_dig_tokens(key: key2, use_sym: dependencies.second.use_symbols?)
74
+ other_tokens_key1 = (other_keys1 || []).map do |key|
75
+ record_dig_tokens(key: key, use_sym: dependencies.second.use_symbols?)
76
+ end
77
+ other_tokens_key2 = (other_keys2 || []).map do |key|
78
+ record_dig_tokens(key: key, use_sym: dependencies.second.use_symbols?)
79
+ end
80
+
81
+ # fetch necessary records from node2
82
+ node2 = dependencies.second
83
+ n2_ids = n1_records.map { |x| x.dig(*tokens_key1) }.compact.uniq
84
+ n2_records = node2.all(where: { key2 => n2_ids })
85
+
86
+ # preload and map dataset2 by the key we want to lookup
87
+ mapped_data2 = {}
88
+ if has_multiple_keys?
89
+ n2_records.each do |datum2|
90
+ lookup_value = datum2.dig(*tokens_key2)
91
+ mapped_data2[lookup_value] ||= []
92
+ mapped_data2[lookup_value] << datum2
93
+ end
94
+ else
95
+ n2_records.each do |datum2|
96
+ lookup_value = datum2.dig(*tokens_key2)
97
+ mapped_data2[lookup_value] = datum2
98
+ end
99
+ end
100
+
101
+ # for each datum in dataset1, find the corresponding datum in dataset2
102
+ n1_records.map do |d1|
103
+ join_value = d1.dig(*tokens_key1)
104
+ next if join_value.nil?
105
+
106
+ d2 = mapped_data2[join_value]
107
+ if has_multiple_keys? && !d2.nil?
108
+ # in this case, it will be an array,
109
+ # so we need to further search the correct datum
110
+ d2 = find_matching_record(d1, d2, other_tokens_key1, other_tokens_key2)
111
+ end
112
+
113
+ # if there is no d2, only continue based on the type of join we want.
114
+ next if d2.blank? && join_type == 'inner'
115
+
116
+ # there might be the case that nothing was found after-all
117
+ d2 ||= {}
118
+
119
+ # prefix if needed
120
+ d1 = Hash[d1.map { |k, v| ["#{prefix1}#{k}", v] }] if prefix1.present?
121
+ d2 = Hash[d2.map { |k, v| ["#{prefix2}#{k}", v] }] if prefix2.present?
122
+
123
+ d1.reverse_merge(d2)
124
+ end.compact
125
+ end
126
+
127
+ def has_multiple_keys?
128
+ other_keys1.present? && other_keys2.present?
129
+ end
130
+
131
+ # Find a record in d2_list that can be join with d1 based on
132
+ # the values in the fields specified in other_keys_1/2.
133
+ # @param d1 [Hash] a datum
134
+ # @param d2_list [Array] an array of datums that may match with d1
135
+ # @param other_keys1 [Array] an array of arrays (tokens) that will
136
+ # be used to fetch the corresponding value in d1
137
+ # @param other_keys2 [Array] an array of arrays (tokens) that will
138
+ # be used to fetch the corresponding value in the d2_list
139
+ # @return [Hash] a record if found, nil otherwise.
140
+ def find_matching_record(d1, d2_list, other_tokens1, other_tokens2)
141
+ values1 = other_tokens1.map { |tokens| d1.dig(*tokens) }
142
+ d2_list.find do |d2|
143
+ values1.each_with_index.all? do |value1, idx|
144
+ # does this record match d1 on all the fields in other_key1/2?
145
+ value1 == d2.dig(*(other_tokens2[idx]))
146
+ end
147
+ end
148
+ end
149
+ end
150
+ end
151
+ end
@@ -0,0 +1,50 @@
1
+ # frozen_string_literal: true
2
+ module Dataflow
3
+ module Nodes
4
+ # Performs a map operation on 2 dependencies.
5
+ class MapNode < ComputeNode
6
+ ensure_data_node_exists
7
+ ensure_dependencies exactly: 2
8
+
9
+ private
10
+
11
+ def compute_batch(records:)
12
+ map(records: records, mapping_node: dependencies.second)
13
+ end
14
+
15
+ def map(records:, mapping_node:)
16
+ mapping_table = mapping_node.all
17
+
18
+ records.each do |record|
19
+ mapping_table.each { |mapping| map_record(record, mapping) }
20
+ end
21
+
22
+ records
23
+ end
24
+
25
+ def map_record(record, mapping)
26
+ original_key = mapping['key']
27
+ original_value = record_value(record: record, key: original_key)
28
+ mapped_key = mapping['mapped_key']
29
+ mapped_value = nil
30
+
31
+ if mapping['map'].present?
32
+ # re-map either the key/value with a lambda(key,value)
33
+ result = eval(mapping['map']).call(original_key, original_value)
34
+ mapped_key = result.keys[0]
35
+ mapped_value = result.values[0]
36
+ elsif mapping['values'].is_a? Hash
37
+ # or from a hash-table that directly translates values
38
+ mapped_value = mapping['values'][original_value]
39
+ mapped_value ||= mapping['default']
40
+ elsif mapping['values'].present?
41
+ # or map the current value with a lambda(value)
42
+ mapped_value = eval(mapping['values']).call(original_value)
43
+ end
44
+
45
+ mapped_key ||= original_key
46
+ record[mapped_key] = mapped_value || original_value
47
+ end
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+ module Dataflow
3
+ module Nodes
4
+ # Performs a merge operation on 2 dependencies.
5
+ class MergeNode < ComputeNode
6
+ field :merge_key, type: String, default: ''
7
+ field :merge_values, type: Array, default: []
8
+
9
+ ensure_data_node_exists
10
+ ensure_dependencies exactly: 2
11
+
12
+ private
13
+
14
+ def compute_impl
15
+ process_parallel(node: dependencies.first) do |records|
16
+ merge_records(records: records, index: 0)
17
+ end
18
+
19
+ process_parallel(node: dependencies.second) do |records|
20
+ merge_records(records: records, index: 1)
21
+ end
22
+ end
23
+
24
+ def merge_records(records:, index:)
25
+ records.each do |record|
26
+ # add a merge key with the corresponding value if necessary
27
+ record[merge_key] = merge_values[index] if merge_key.present?
28
+ end
29
+ records
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+ module Dataflow
3
+ module Nodes
4
+ module Mixin
5
+ # Add an internal updated_at timestamp to the records.
6
+ module AddInternalTimestamp
7
+ def self.included(base)
8
+ base.class_eval do
9
+ field :use_internal_timestamp, type: Boolean, default: true
10
+ field :internal_timestamp_key, type: String, default: '_mojaco_updated_at'
11
+ end
12
+ end
13
+
14
+ # Add an internal updated_at timestamp to the records
15
+ def add_internal_timestamp(records:)
16
+ return unless use_internal_timestamp
17
+ return unless internal_timestamp_key.present?
18
+
19
+ updated_at = Time.now
20
+ records.each do |record|
21
+ record[internal_timestamp_key] = updated_at
22
+ end
23
+ end
24
+ end # module AddInternalTimestamp
25
+ end # module Mixin
26
+ end # module Nodes
27
+ end # module Dataflow
@@ -0,0 +1,63 @@
1
+ # frozen_string_literal: true
2
+ module Dataflow
3
+ module Nodes
4
+ module Mixin
5
+ # Support tranversing the record and rename fields that contain a dot '.'.
6
+ module RenameDottedFields
7
+ # Add a mixin-specific field to the node
8
+ def self.included(base)
9
+ base.class_eval do
10
+ field :rename_dotted_fields_in, type: Array
11
+ end
12
+ end
13
+
14
+ # Rename the specified dotted fields
15
+ def rename_dotted_fields(records:)
16
+ return if rename_dotted_fields_in.blank?
17
+
18
+ traverse_whole_record = rename_dotted_fields_in.include?('.')
19
+
20
+ records.each do |record|
21
+ if traverse_whole_record
22
+ traverse_and_rename_dotted_fields(record)
23
+ else
24
+ rename_dotted_fields_in.each do |field|
25
+ value = record[field]
26
+ if value.is_a?(Array)
27
+ traverse_and_rename_dotted_fields_in_array(value)
28
+ elsif value.is_a?(Hash)
29
+ traverse_and_rename_dotted_fields(value)
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end
35
+
36
+ # Traverse a hash and look for the fields to rename
37
+ def traverse_and_rename_dotted_fields(hash)
38
+ return if hash.blank?
39
+
40
+ hash.keys.each do |k|
41
+ value = hash[k]
42
+ if value.is_a?(Array)
43
+ traverse_and_rename_dotted_fields_in_array(value)
44
+ elsif value.is_a?(Hash)
45
+ traverse_and_rename_dotted_fields(value)
46
+ end
47
+
48
+ next unless k.include?('.')
49
+ hash[k.tr('.', '_')] = value
50
+ hash.delete(k)
51
+ end
52
+ end
53
+
54
+ # Looks for hashs in the array that may require a transformation
55
+ def traverse_and_rename_dotted_fields_in_array(array)
56
+ array.each do |v|
57
+ traverse_and_rename_dotted_fields(v) if v.is_a?(Hash)
58
+ end
59
+ end
60
+ end # module RenameDottedFields
61
+ end # module Mixin
62
+ end # module Nodes
63
+ end # module Dataflow
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+ module Dataflow
3
+ module Nodes
4
+ # Performs a select operation on its dependency.
5
+ class SelectKeysNode < ComputeNode
6
+ field :keys, type: Array, required_for_computing: true
7
+
8
+ ensure_data_node_exists
9
+ ensure_dependencies exactly: 1
10
+
11
+ def export(connection_opts: { db_backend: :csv }, keys: nil)
12
+ super(connection_opts: connection_opts, keys: keys || self.keys)
13
+ end
14
+
15
+ private
16
+
17
+ def compute_batch(records:)
18
+ k = keys
19
+ k = k.map(&:to_sym) if dependencies.first.use_symbols?
20
+ select_keys(records: records, keys: k)
21
+ end
22
+
23
+ def select_keys(records:, keys:)
24
+ records.map do |base_record|
25
+ new_record = {}
26
+ keys.each do |key|
27
+ value = record_value(record: base_record, key: key)
28
+ next unless value.present?
29
+
30
+ add_value_to_record(record: new_record, key: key, value: value)
31
+ end
32
+
33
+ next unless new_record.present?
34
+ new_record
35
+ end.compact
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,77 @@
1
+ # frozen_string_literal: true
2
+ module Dataflow
3
+ # Represents a node that captures changes over time.
4
+ module Nodes
5
+ # TODO: extend the unique node?
6
+ class SnapshotNode < DataNode
7
+ include Mixin::RenameDottedFields
8
+ include Mixin::AddInternalTimestamp
9
+
10
+ field :index_key, type: String, required_for_computing: true
11
+ field :updated_at_key, type: String, required_for_computing: true
12
+
13
+ validates_presence_of :index_key
14
+ validates_presence_of :updated_at_key
15
+
16
+ def set_defaults
17
+ super
18
+
19
+ self.indexes ||= []
20
+ # get rid of keys/string confusion
21
+ self.indexes = JSON.parse(self.indexes.to_json)
22
+
23
+ # add keys for the index, updated_at and unique keys
24
+ self.indexes += [{ 'key' => index_key }] if index_key
25
+ self.indexes += [{ 'key' => updated_at_key }] if updated_at_key
26
+ self.indexes += [{ 'key' => [index_key, updated_at_key], 'unique' => true }] if index_key && updated_at_key
27
+ self.indexes.uniq!
28
+
29
+ self.updated_at ||= Time.now
30
+ end
31
+
32
+ def add(records:)
33
+ # TODO: create a chain of behavior "before add"
34
+ rename_dotted_fields(records: records)
35
+ add_internal_timestamp(records: records)
36
+
37
+ records.delete_if do |record|
38
+ convert_update_at_key(record)
39
+ is_record_redundant?(record: record)
40
+ end.compact
41
+ super(records: records)
42
+ end
43
+
44
+ private
45
+
46
+ # If this record already exists, and only the updated_at
47
+ # key changed, but the rest of the content is the same,
48
+ # we will consider it to be redundant
49
+ def is_record_redundant?(record:)
50
+ id = record[index_key]
51
+ previous_record = db_adapter.find(where: { index_key => id },
52
+ sort: { updated_at_key => -1 })
53
+ return false if previous_record.blank?
54
+
55
+ has_same_content = previous_record.keys == record.keys
56
+ has_same_content &&= previous_record.keys.all? do |k|
57
+ # we allow the updated_at key to change, or the mojaco time stamp
58
+ next true if k == updated_at_key || k == internal_timestamp_key
59
+ # but most importantly, the rest of the content should be the same
60
+ record[k] == previous_record[k]
61
+ end
62
+
63
+ has_same_content
64
+ end
65
+
66
+ def convert_update_at_key(record)
67
+ return if record[updated_at_key].is_a?(Time)
68
+
69
+ # try to parse as a string
70
+ record[updated_at_key] = Time.parse(record[updated_at_key])
71
+ rescue TypeError
72
+ # try to parse as a timestamp
73
+ record[updated_at_key] = Time.at(record[updated_at_key])
74
+ end
75
+ end
76
+ end
77
+ end
@@ -0,0 +1,50 @@
1
+ # frozen_string_literal: true
2
+ module Dataflow
3
+ module Nodes
4
+ # Transforms the dependency's dataset to a SQL-compatible one.
5
+ class SqlQueryNode < ComputeNode
6
+ ensure_data_node_exists
7
+ ensure_dependencies min: 0 # dependencies are not necessarily needed
8
+ field :query, type: String, required_for_computing: true
9
+
10
+ def valid_for_computation?
11
+ unless (data_node&.db_backend.to_s =~ /sql/).present?
12
+ errors.add(:db_backend, 'Must have a SQL based backend.')
13
+ end
14
+
15
+ begin
16
+ computed_query
17
+ rescue StandardError => e
18
+ errors.add(:query, "Specified query has errors: #{e.message}")
19
+ end
20
+
21
+ super
22
+ end
23
+
24
+ def computed_query
25
+ # 1. replace the current write dataset's name
26
+ q = query.gsub('<node>', write_dataset_name)
27
+
28
+ # 2. replace the dependencies' (read) dataset names
29
+ q.gsub(/<[0-9]+>/) do |match|
30
+ # [1..-2] will remove the 'less than' < and 'greater than' >
31
+ dep_index = match[1..-2].to_i
32
+ raise "Specified depependency #{match} does not exist. There are only #{dependencies.count} dependencies." if dep_index >= dependencies.count
33
+ dependencies[dep_index].read_dataset_name
34
+ end
35
+ end
36
+
37
+ def execute_query
38
+ data_node.send(:db_adapter).client[computed_query].to_a
39
+ end
40
+
41
+ private
42
+
43
+ # Overrides the base implementation.
44
+ # This node will leave all the work to the DB.
45
+ def compute_impl
46
+ execute_query
47
+ end
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+ module Dataflow
3
+ module Nodes
4
+ module Transformation
5
+ # Transforms the given keys' values to Time.
6
+ class ToTimeNode < ComputeNode
7
+ field :keys, type: Array, required_for_computing: true, default: []
8
+
9
+ ensure_data_node_exists
10
+ ensure_dependencies exactly: 1
11
+
12
+ def valid_for_computation?
13
+ # It does not make sense to use this node without any keys specified.
14
+ if (keys || []).count.zero?
15
+ errors.add(:keys, "#{self.class} keys must contain at least 1 value")
16
+ end
17
+
18
+ super
19
+ end
20
+
21
+ def compute_batch(records:)
22
+ key_tokens = keys.map do |key|
23
+ record_dig_tokens(key: key, use_sym: dependencies.first.use_symbols?)
24
+ end
25
+
26
+ records.each do |record|
27
+ key_tokens.each_with_index do |tokens, index|
28
+ value = record.dig(*tokens)
29
+ next unless value.present?
30
+
31
+ value = value.to_time
32
+ add_value_to_record(record: record, key: keys[index], value: value)
33
+ end
34
+ end
35
+
36
+ records
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,68 @@
1
+ # frozen_string_literal: true
2
+ module Dataflow
3
+ # Represents a node with a unique index and upsert behavior:
4
+ # If there is any existing that that match on that index,
5
+ # it gets replaced. If not, it simply gets added.
6
+ module Nodes
7
+ class UpsertNode < DataNode
8
+ include Mixin::RenameDottedFields
9
+ include Mixin::AddInternalTimestamp
10
+
11
+ before_save :transform_index_key
12
+
13
+ field :index_key, required_for_computing: true
14
+ validates_presence_of :index_key
15
+
16
+ def set_defaults
17
+ super
18
+
19
+ self.indexes ||= []
20
+ # get rid of keys/string confusion
21
+ self.indexes = JSON.parse(self.indexes.to_json)
22
+
23
+ # if there is no index_key, take the first unique index
24
+ if index_key.blank?
25
+ first_unique_index = self.indexes.find { |x| x['unique'] }
26
+ self.index_key = (first_unique_index || {})['key']
27
+ end
28
+
29
+ # add keys for the unique index keys
30
+ if index_key.present?
31
+ auto_generated_indexes = [{ 'key' => index_key, 'unique' => true }]
32
+
33
+ if index_key.is_a? Array
34
+ # generated non-unique indexes for each key in a compound index
35
+ auto_generated_indexes += index_key.map { |idx| { 'key' => idx } }
36
+ end
37
+ self.indexes += auto_generated_indexes
38
+ self.indexes.uniq!
39
+ end
40
+
41
+ self.updated_at ||= Time.now
42
+ end
43
+
44
+ def add(records:)
45
+ return if records.blank?
46
+
47
+ # TODO: create a chain of behavior "before add"
48
+ rename_dotted_fields(records: records)
49
+ add_internal_timestamp(records: records)
50
+
51
+ db_adapter.save(records: records, replace_by: index_key)
52
+ self.updated_at = Time.now
53
+ save!
54
+ end
55
+
56
+ private
57
+
58
+ def transform_index_key
59
+ return unless index_key.is_a?(String)
60
+
61
+ # try to split the comma separated string
62
+ keys = index_key.split(',')
63
+ # if there was no comma, leave as-is
64
+ self.index_key = keys if keys.count > 1
65
+ end
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+ module Dataflow
3
+ module PropertiesMixin
4
+ extend ActiveSupport::Concern
5
+
6
+ module ClassMethods
7
+ # Override the mongoid `field` method to produce a list of
8
+ # properties for each node.
9
+ def field(name, opts = {})
10
+ add_property(name, opts)
11
+ # make sure we pass mongoid-only keys to the superclass
12
+ opts.delete(:editable)
13
+ opts.delete(:required_for_computing)
14
+ opts.delete(:values)
15
+ super
16
+ end
17
+
18
+ def add_property(name, opts)
19
+ # skip properties that start by underscore
20
+ return if name =~ /^_/
21
+ @properties ||= {}
22
+ @properties[name] ||= {}
23
+ @properties[name].merge!(opts)
24
+ end
25
+
26
+ def properties
27
+ @properties ||= {}
28
+ @properties.merge(superclass.properties)
29
+ rescue NoMethodError => e
30
+ # handle cases where we're already on top of the hierarchy.
31
+ @properties
32
+ end
33
+ end
34
+ end
35
+ end