remi 0.2.42 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.yardopts +7 -0
- data/Gemfile +1 -1
- data/Gemfile.lock +13 -26
- data/README.md +1 -1
- data/features/step_definitions/remi_step.rb +33 -13
- data/features/sub_job_example.feature +24 -0
- data/features/sub_transform_example.feature +35 -0
- data/features/sub_transform_many_to_many.feature +49 -0
- data/features/support/env_app.rb +1 -1
- data/jobs/all_jobs_shared.rb +19 -16
- data/jobs/copy_source_job.rb +11 -9
- data/jobs/csv_file_target_job.rb +10 -9
- data/jobs/json_job.rb +18 -14
- data/jobs/metadata_job.rb +33 -28
- data/jobs/parameters_job.rb +14 -11
- data/jobs/sample_job.rb +106 -77
- data/jobs/sftp_file_target_job.rb +14 -13
- data/jobs/sub_job_example_job.rb +86 -0
- data/jobs/sub_transform_example_job.rb +43 -0
- data/jobs/sub_transform_many_to_many_job.rb +46 -0
- data/jobs/transforms/concatenate_job.rb +16 -12
- data/jobs/transforms/data_frame_sieve_job.rb +24 -19
- data/jobs/transforms/date_diff_job.rb +15 -11
- data/jobs/transforms/nvl_job.rb +16 -12
- data/jobs/transforms/parse_date_job.rb +17 -14
- data/jobs/transforms/partitioner_job.rb +27 -19
- data/jobs/transforms/prefix_job.rb +13 -10
- data/jobs/transforms/truncate_job.rb +14 -10
- data/jobs/transforms/truthy_job.rb +11 -8
- data/lib/remi.rb +25 -11
- data/lib/remi/data_frame.rb +4 -4
- data/lib/remi/data_frame/daru.rb +1 -37
- data/lib/remi/data_subject.rb +234 -48
- data/lib/remi/data_subjects/csv_file.rb +171 -0
- data/lib/remi/data_subjects/data_frame.rb +106 -0
- data/lib/remi/data_subjects/file_system.rb +115 -0
- data/lib/remi/data_subjects/local_file.rb +109 -0
- data/lib/remi/data_subjects/none.rb +31 -0
- data/lib/remi/data_subjects/postgres.rb +186 -0
- data/lib/remi/data_subjects/s3_file.rb +84 -0
- data/lib/remi/data_subjects/salesforce.rb +211 -0
- data/lib/remi/data_subjects/sftp_file.rb +196 -0
- data/lib/remi/data_subjects/sub_job.rb +50 -0
- data/lib/remi/dsl.rb +74 -0
- data/lib/remi/encoder.rb +45 -0
- data/lib/remi/extractor.rb +21 -0
- data/lib/remi/field_symbolizers.rb +1 -0
- data/lib/remi/job.rb +279 -113
- data/lib/remi/job/parameters.rb +90 -0
- data/lib/remi/job/sub_job.rb +35 -0
- data/lib/remi/job/transform.rb +165 -0
- data/lib/remi/loader.rb +22 -0
- data/lib/remi/monkeys/daru.rb +4 -0
- data/lib/remi/parser.rb +44 -0
- data/lib/remi/testing/business_rules.rb +17 -23
- data/lib/remi/testing/data_stub.rb +2 -2
- data/lib/remi/version.rb +1 -1
- data/remi.gemspec +3 -0
- data/spec/data_subject_spec.rb +475 -11
- data/spec/data_subjects/csv_file_spec.rb +69 -0
- data/spec/data_subjects/data_frame_spec.rb +52 -0
- data/spec/{extractor → data_subjects}/file_system_spec.rb +0 -0
- data/spec/{extractor → data_subjects}/local_file_spec.rb +0 -0
- data/spec/data_subjects/none_spec.rb +41 -0
- data/spec/data_subjects/postgres_spec.rb +80 -0
- data/spec/{extractor → data_subjects}/s3_file_spec.rb +0 -0
- data/spec/data_subjects/salesforce_spec.rb +117 -0
- data/spec/{extractor → data_subjects}/sftp_file_spec.rb +16 -0
- data/spec/data_subjects/sub_job_spec.rb +33 -0
- data/spec/encoder_spec.rb +38 -0
- data/spec/extractor_spec.rb +11 -0
- data/spec/fixtures/sf_bulk_helper_stubs.rb +443 -0
- data/spec/job/transform_spec.rb +257 -0
- data/spec/job_spec.rb +507 -0
- data/spec/loader_spec.rb +11 -0
- data/spec/parser_spec.rb +38 -0
- data/spec/sf_bulk_helper_spec.rb +117 -0
- data/spec/testing/data_stub_spec.rb +5 -3
- metadata +109 -27
- data/features/aggregate.feature +0 -42
- data/jobs/aggregate_job.rb +0 -31
- data/jobs/transforms/transform_jobs.rb +0 -4
- data/lib/remi/data_subject/csv_file.rb +0 -162
- data/lib/remi/data_subject/data_frame.rb +0 -52
- data/lib/remi/data_subject/postgres.rb +0 -134
- data/lib/remi/data_subject/salesforce.rb +0 -136
- data/lib/remi/data_subject/sftp_file.rb +0 -65
- data/lib/remi/extractor/file_system.rb +0 -92
- data/lib/remi/extractor/local_file.rb +0 -43
- data/lib/remi/extractor/s3_file.rb +0 -57
- data/lib/remi/extractor/sftp_file.rb +0 -83
- data/spec/data_subject/csv_file_spec.rb +0 -79
- data/spec/data_subject/data_frame.rb +0 -27
@@ -0,0 +1,90 @@
|
|
1
|
+
module Remi
|
2
|
+
class Job
|
3
|
+
# A job parameter adds flexiblity to defining job templates. An
|
4
|
+
# instance of Parameters contains a collection of parameters that
|
5
|
+
# are evaluatin in the context of a job. It functions very
|
6
|
+
# similarly to Rspec's #let, in that in can be defined using a
|
7
|
+
# block of code that is only evaluated the first time it is used,
|
8
|
+
# and cached for later use.
|
9
|
+
#
|
10
|
+
# Parameters should only be used in the context of a job.
|
11
|
+
# @example
|
12
|
+
# class MyJob < Remi::Job
|
13
|
+
# param(:my_param) { 'some parameter' }
|
14
|
+
# param :my_calculated_param do
|
15
|
+
# 1.upto(1000).size
|
16
|
+
# end
|
17
|
+
#
|
18
|
+
# transform :something do
|
19
|
+
# puts "my_param is #{job.params[:my_param]}"
|
20
|
+
# puts "my_calculated_param is #{job.params[:my_calculated_param]}"
|
21
|
+
# end
|
22
|
+
# end
|
23
|
+
#
|
24
|
+
# job1 = MyJob.new
|
25
|
+
# job1.execute
|
26
|
+
# #=> my_param is some parameter
|
27
|
+
# #=> my_calculated_param is 1000
|
28
|
+
#
|
29
|
+
# job2 = MyJob.new
|
30
|
+
# job2.params[:my_param] = 'override'
|
31
|
+
# job2.execute
|
32
|
+
# #=> my_param is override
|
33
|
+
# #=> my_calculated_param is 1000
|
34
|
+
#
|
35
|
+
# job3 = MyJob.new(my_param: 'constructor override', my_calculated_param: 322)
|
36
|
+
# job3.execute
|
37
|
+
# #=> my_param is constructor override
|
38
|
+
# #=> my_calculated_param is 322
|
39
|
+
class Parameters
|
40
|
+
def initialize(context=nil)
|
41
|
+
@context = context
|
42
|
+
@params = {}
|
43
|
+
end
|
44
|
+
|
45
|
+
# @return [Object] The context in which parameter blocks will be evaluated
|
46
|
+
attr_accessor :context
|
47
|
+
|
48
|
+
# Get the value of a parameter
|
49
|
+
#
|
50
|
+
# @param name [Symbol] The name of the parameter
|
51
|
+
#
|
52
|
+
# @return [Object] The value of the parameter
|
53
|
+
def [](name)
|
54
|
+
return send(name) if respond_to?(name)
|
55
|
+
raise ArgumentError, "Job parameter #{name} is not defined"
|
56
|
+
end
|
57
|
+
|
58
|
+
|
59
|
+
# Set the value of a parameter
|
60
|
+
#
|
61
|
+
# @param name [Symbol] The name of the parameter
|
62
|
+
# @param value [Object] The new value of the parameter
|
63
|
+
#
|
64
|
+
# @return [Object] The new value of the parameter
|
65
|
+
def []=(name, value)
|
66
|
+
__define__(name) { value } unless respond_to? name
|
67
|
+
@params[name] = value
|
68
|
+
end
|
69
|
+
|
70
|
+
# @return [Hash] The parameters as a hash
|
71
|
+
def to_h
|
72
|
+
@params
|
73
|
+
end
|
74
|
+
|
75
|
+
# @return [Job::Parameters] A clone of this parameter set
|
76
|
+
def clone
|
77
|
+
the_clone = super
|
78
|
+
the_clone.instance_variable_set(:@params, @params.dup)
|
79
|
+
the_clone
|
80
|
+
end
|
81
|
+
|
82
|
+
def __define__(name, &block)
|
83
|
+
@params[name] = nil
|
84
|
+
define_singleton_method name do
|
85
|
+
@params[name] ||= Remi::Dsl.dsl_return(self, @context, &block)
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
module Remi
|
2
|
+
class Job
|
3
|
+
class SubJob
|
4
|
+
def initialize(context=nil, name: 'UNDEFINED SubJob', **kargs, &block)
|
5
|
+
@context = context
|
6
|
+
@name = name
|
7
|
+
@block = block
|
8
|
+
end
|
9
|
+
|
10
|
+
attr_accessor :context, :name
|
11
|
+
|
12
|
+
def dsl_return
|
13
|
+
sub_job = Dsl.dsl_return(self, @context, &@block)
|
14
|
+
raise ArgumentError, "SubJob DSL must return a Remi::Job" unless sub_job.is_a? Job
|
15
|
+
sub_job
|
16
|
+
end
|
17
|
+
|
18
|
+
def job
|
19
|
+
@job ||= dsl_return
|
20
|
+
end
|
21
|
+
|
22
|
+
def fields(data_subject)
|
23
|
+
job.send(data_subject).dsl_eval.fields
|
24
|
+
end
|
25
|
+
|
26
|
+
def execute
|
27
|
+
job.execute
|
28
|
+
end
|
29
|
+
|
30
|
+
def execute_transforms
|
31
|
+
job.execute(:transforms)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,165 @@
|
|
1
|
+
module Remi
|
2
|
+
class Job
|
3
|
+
# A Transform contains a block of code that is executed in a context.
|
4
|
+
# Transforms are usually defined in a Job, according to the Job DSL.
|
5
|
+
#
|
6
|
+
# Transforms may optionally have a mapping defined that links a
|
7
|
+
# local definition of a data frame to a definition of the data
|
8
|
+
# frame in the associated context.
|
9
|
+
# @example
|
10
|
+
#
|
11
|
+
# # Transforms should typically be defined using the Job DSL
|
12
|
+
# job = MyJob.new
|
13
|
+
# tform = Job::Transform.new(job) do
|
14
|
+
# # ... stuff to do in the context of the job
|
15
|
+
# end
|
16
|
+
# tform.execute
|
17
|
+
class Transform
|
18
|
+
|
19
|
+
FieldMap = Struct.new(:from_subject, :to_subject, :field_from_to)
|
20
|
+
|
21
|
+
# Initializes a transform
|
22
|
+
#
|
23
|
+
# @param context [Object, Job] sets the context in which the block will be executed
|
24
|
+
# @param name [String, Symbol] optionally gives the transform a name
|
25
|
+
# @param kargs [Hash] any keyword arguments are accessable within the block as `#params` (e.g., `params[:my_custom_param]`)
|
26
|
+
# @param block [Proc] a block of code to execute in the context
|
27
|
+
def initialize(context, name: 'NOT DEFINED', **kargs, &block)
|
28
|
+
@context = context
|
29
|
+
@name = name
|
30
|
+
@block = block
|
31
|
+
params.merge! kargs
|
32
|
+
|
33
|
+
@sources = []
|
34
|
+
@targets = []
|
35
|
+
|
36
|
+
@field_maps = { sources: {}, targets: {} }
|
37
|
+
end
|
38
|
+
|
39
|
+
attr_accessor :context, :name, :sources, :targets, :field_maps
|
40
|
+
|
41
|
+
# Executes the transform block
|
42
|
+
# @return [Object] the context of the transform after executing
|
43
|
+
def execute
|
44
|
+
context.logger.info "Running transformation #{@name}"
|
45
|
+
Dsl.dsl_eval(self, @context, &@block)
|
46
|
+
end
|
47
|
+
|
48
|
+
# @return [Hash] the parameters defined during initialization of the transform
|
49
|
+
def params
|
50
|
+
@params ||= Hash.new { |_, key| raise ArgumentError, "Transform parameter #{key} is not defined" }
|
51
|
+
end
|
52
|
+
|
53
|
+
# Validates that a data source used in the transform has been defined
|
54
|
+
# @param name [Symbol] the name of a data source used in the transform
|
55
|
+
# @param fields [Array<Symbol>] a list of fields used by the transform for this data source
|
56
|
+
# @raise [ArgumentError] if the transform source is not defined
|
57
|
+
def source(name, fields)
|
58
|
+
raise NoMethodError, "Need to define a source mapping for #{name}" unless sources.include? name
|
59
|
+
raise ArgumentError, "Need to map fields to source #{name} (#{fields})" unless (fields - field_maps[:sources][name].field_from_to.values).empty?
|
60
|
+
end
|
61
|
+
|
62
|
+
# Validates that a data target used in the transform has been defined
|
63
|
+
# @param name [Symbol] the name of a data target used in the transform
|
64
|
+
# @param fields [Array<Symbol>] a list of fields used by the transform for this data target
|
65
|
+
# @raise [ArgumentError] if the transform target is not defined
|
66
|
+
def target(name, fields)
|
67
|
+
raise NoMethodError, "Need to define a target mapping for #{name}" unless targets.include? name
|
68
|
+
raise ArgumentError, "Need to map fields to target #{name} (#{fields})" unless (fields - field_maps[:targets][name].field_from_to.keys).empty?
|
69
|
+
end
|
70
|
+
|
71
|
+
# Maps data sources and fields from the transform context to the local transform
|
72
|
+
# @param from_source [Symbol] name of the source data in the context
|
73
|
+
# @param to_source [Symbol] name of the source data local to the transform
|
74
|
+
# @param field_map [Hash] mapping of the key names from the context source to the local source
|
75
|
+
def map_source_fields(from_source, to_source, field_map)
|
76
|
+
sources << to_source unless sources.include? to_source
|
77
|
+
|
78
|
+
job_ds = context.send(from_source)
|
79
|
+
sub_trans_ds = Remi::DataSubject.new(name: to_source)
|
80
|
+
define_singleton_method(to_source) { sub_trans_ds }
|
81
|
+
|
82
|
+
field_maps[:sources][to_source] = FieldMap.new(job_ds, send(to_source), field_map)
|
83
|
+
end
|
84
|
+
|
85
|
+
# Maps data targets and fields from the local tarnsform to the transform context
|
86
|
+
# @param from_target [Symbol] name of the target data local to the transform
|
87
|
+
# @param to_target [Symbol] name of the target data in the context
|
88
|
+
# @param field_map [Hash] mapping of the key names from the local transform target to the context target
|
89
|
+
def map_target_fields(from_target, to_target, field_map)
|
90
|
+
targets << from_target unless targets.include? from_target
|
91
|
+
|
92
|
+
job_ds = context.send(to_target)
|
93
|
+
sub_trans_ds = Remi::DataSubject.new
|
94
|
+
define_singleton_method(from_target) { sub_trans_ds }
|
95
|
+
|
96
|
+
field_maps[:targets][from_target] = FieldMap.new(send(from_target), job_ds, field_map)
|
97
|
+
end
|
98
|
+
|
99
|
+
# Imports another transform to be executed as part of this transform. The block
|
100
|
+
# is used to perform any source/target field mapping.
|
101
|
+
#
|
102
|
+
# @param sub_transform [Job::Transform] the transform to import into this one
|
103
|
+
# @param block [Proc] a block of code to be executed prior to the execution of the
|
104
|
+
# imported transform. This is where field mapping would be defined.
|
105
|
+
# @example
|
106
|
+
#
|
107
|
+
# sub_transform = Job::Transform.new('arbitrary') do
|
108
|
+
# source :sub_transform_source, [] # validate that this source has been defined
|
109
|
+
# # do stuff to sub_transform_source here
|
110
|
+
# end
|
111
|
+
#
|
112
|
+
# job = MyJob.new
|
113
|
+
# my_transform = Job::Transform.new(job) do
|
114
|
+
# import sub_transform do
|
115
|
+
# map_source_fields :some_method_in_my_job, :sub_sub_transform_source, { :job_id => :sub_transform_id }
|
116
|
+
# end
|
117
|
+
# end
|
118
|
+
def import(sub_transform, **kargs, &block)
|
119
|
+
sub_transform.context = context
|
120
|
+
sub_transform.params.merge! kargs
|
121
|
+
Dsl.dsl_eval(sub_transform, context, &block)
|
122
|
+
|
123
|
+
sub_transform.map_inputs
|
124
|
+
sub_transform.execute
|
125
|
+
sub_transform.map_outputs
|
126
|
+
end
|
127
|
+
|
128
|
+
|
129
|
+
|
130
|
+
protected
|
131
|
+
|
132
|
+
def map_inputs
|
133
|
+
sources.each do |source_input|
|
134
|
+
field_map = field_maps[:sources][source_input]
|
135
|
+
job_ds = field_map.from_subject
|
136
|
+
sub_trans_ds = field_map.to_subject
|
137
|
+
fields_to_map = field_map.field_from_to.keys
|
138
|
+
|
139
|
+
fields_to_map.each do |job_field|
|
140
|
+
sub_trans_field = field_map.field_from_to[job_field]
|
141
|
+
sub_trans_ds.fields[sub_trans_field] = job_ds.fields[job_field]
|
142
|
+
|
143
|
+
sub_trans_ds.df[sub_trans_field] = job_ds.df[job_field]
|
144
|
+
end
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
def map_outputs
|
149
|
+
targets.each do |target_output|
|
150
|
+
field_map = field_maps[:targets][target_output]
|
151
|
+
job_ds = field_map.to_subject
|
152
|
+
sub_trans_ds = field_map.from_subject
|
153
|
+
fields_to_map = field_map.field_from_to.keys
|
154
|
+
|
155
|
+
fields_to_map.each do |sub_trans_field|
|
156
|
+
job_field = field_map.field_from_to[sub_trans_field]
|
157
|
+
job_ds.fields[job_field].merge! sub_trans_ds.fields[sub_trans_field]
|
158
|
+
job_ds.df[job_field] = sub_trans_ds.df[sub_trans_field]
|
159
|
+
end
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
end
|
164
|
+
end
|
165
|
+
end
|
data/lib/remi/loader.rb
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
module Remi
|
2
|
+
# A loader is an object meant to load data into a some external system.
|
3
|
+
# This is a parent class meant to be inherited by child classes that
|
4
|
+
# define specific ways to load data.
|
5
|
+
class Loader
|
6
|
+
|
7
|
+
def initialize(*args, logger: Remi::Settings.logger, **kargs, &block)
|
8
|
+
@logger = logger
|
9
|
+
end
|
10
|
+
|
11
|
+
attr_accessor :logger
|
12
|
+
|
13
|
+
# Any child classes need to define a load method that loads data from
|
14
|
+
# the given dataframe into the target system.
|
15
|
+
# @param data [Remi::DataFrame] Data that has been encoded appropriately to be loaded into the target
|
16
|
+
# @return [true] On success
|
17
|
+
def load(data)
|
18
|
+
raise NoMethodError, "#{__method__} not defined for #{self.class.name}"
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
22
|
+
end
|
data/lib/remi/parser.rb
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
module Remi
|
2
|
+
# A parser is an object that converts data returned from an
|
3
|
+
# Remi::Extractor into a dataframe. This is a parent class meant to be
|
4
|
+
# inherited by child classes that define specific ways to parse
|
5
|
+
# data.
|
6
|
+
class Parser
|
7
|
+
|
8
|
+
# @param context [Object] The context (e.g., DataSource) for the parser (default: `nil`)
|
9
|
+
# @param field_symbolizer [Proc] The field symbolizer to use for this parser
|
10
|
+
# @param fields [Remi::Fields] A hash of field metadata to be used by the parser
|
11
|
+
def initialize(*args, context: nil, field_symbolizer: Remi::FieldSymbolizers[:standard], fields: Remi::Fields.new({}), logger: Remi::Settings.logger, **kargs, &block)
|
12
|
+
@context = context
|
13
|
+
@field_symbolizer = field_symbolizer
|
14
|
+
|
15
|
+
@fields = fields
|
16
|
+
@logger = logger
|
17
|
+
end
|
18
|
+
|
19
|
+
attr_accessor :context
|
20
|
+
attr_accessor :logger
|
21
|
+
attr_writer :field_symbolizer
|
22
|
+
attr_writer :fields
|
23
|
+
|
24
|
+
# Any child classes need to define a parse method that converts extracted data
|
25
|
+
# into a dataframe.
|
26
|
+
# @param data [Object] Extracted data that needs to be parsed
|
27
|
+
# @return [Remi::DataFrame] The data converted into a dataframe
|
28
|
+
def parse(data)
|
29
|
+
raise NoMethodError, "#{__method__} not defined for #{self.class.name}"
|
30
|
+
end
|
31
|
+
|
32
|
+
# @return [Proc] The field symbolizer (uses the context field symbolizer if defined)
|
33
|
+
def field_symbolizer
|
34
|
+
return context.field_symbolizer if context.respond_to? :field_symbolizer
|
35
|
+
@field_symbolizer
|
36
|
+
end
|
37
|
+
|
38
|
+
# @return [Remi::Fields] The fields (uses the context fields if defined)
|
39
|
+
def fields
|
40
|
+
return context.fields if context if context.respond_to? :fields
|
41
|
+
@fields
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -175,7 +175,7 @@ module Remi::Testing::BusinessRules
|
|
175
175
|
end
|
176
176
|
|
177
177
|
def run_transforms
|
178
|
-
@job.
|
178
|
+
@job.execute(:transforms)
|
179
179
|
end
|
180
180
|
end
|
181
181
|
|
@@ -262,7 +262,6 @@ module Remi::Testing::BusinessRules
|
|
262
262
|
end
|
263
263
|
|
264
264
|
attr_reader :name
|
265
|
-
attr_reader :data_subject
|
266
265
|
|
267
266
|
def add_field(field_name)
|
268
267
|
@fields.add_field(self, field_name)
|
@@ -277,17 +276,17 @@ module Remi::Testing::BusinessRules
|
|
277
276
|
end
|
278
277
|
|
279
278
|
def size
|
280
|
-
|
279
|
+
data_subject.df.size
|
281
280
|
end
|
282
281
|
|
283
|
-
def
|
284
|
-
@data_subject.
|
282
|
+
def data_subject
|
283
|
+
@data_subject.dsl_eval
|
285
284
|
end
|
286
285
|
|
287
286
|
# Public: Converts the data subject to a hash where the keys are the table
|
288
287
|
# columns and the values are an array for the value of column for each row.
|
289
288
|
def column_hash
|
290
|
-
|
289
|
+
data_subject.df.to_h.reduce({}) do |h, (k,v)|
|
291
290
|
h[k.symbolize] = v.to_a
|
292
291
|
h
|
293
292
|
end
|
@@ -297,7 +296,7 @@ module Remi::Testing::BusinessRules
|
|
297
296
|
# Need more robust duping to make that feasible.
|
298
297
|
# Don't use results for anything more than size.
|
299
298
|
def where(field_name, operation)
|
300
|
-
|
299
|
+
data_subject.df.where(data_subject.df[field_name.symbolize(data_subject.field_symbolizer)].recode { |v| operation.call(v) })
|
301
300
|
end
|
302
301
|
|
303
302
|
def where_is(field_name, value)
|
@@ -323,11 +322,11 @@ module Remi::Testing::BusinessRules
|
|
323
322
|
|
324
323
|
|
325
324
|
def stub_data
|
326
|
-
|
325
|
+
data_subject.stub_df if data_subject.respond_to? :stub_df
|
327
326
|
end
|
328
327
|
|
329
328
|
def example_to_df(example)
|
330
|
-
df = example.to_df(
|
329
|
+
df = example.to_df(data_subject.df.row[0].to_h, field_symbolizer: data_subject.field_symbolizer)
|
331
330
|
data_subject.fields.each do |vector, metadata|
|
332
331
|
if metadata[:type] == :json
|
333
332
|
df[vector].recode! { |v| JSON.parse(v) rescue v }
|
@@ -338,20 +337,20 @@ module Remi::Testing::BusinessRules
|
|
338
337
|
|
339
338
|
def stub_data_with(example)
|
340
339
|
stub_data
|
341
|
-
|
340
|
+
data_subject.df = example_to_df(example)
|
342
341
|
end
|
343
342
|
|
344
343
|
def append_data_with(example)
|
345
|
-
|
344
|
+
data_subject.df = data_subject.df.concat example_to_df(example)
|
346
345
|
end
|
347
346
|
|
348
347
|
|
349
348
|
def replicate_rows(n_rows)
|
350
|
-
replicated_df = Daru::DataFrame.new([], order:
|
351
|
-
|
349
|
+
replicated_df = Daru::DataFrame.new([], order: data_subject.df.vectors.to_a)
|
350
|
+
data_subject.df.each do |vector|
|
352
351
|
replicated_df[vector.name] = vector.to_a * n_rows
|
353
352
|
end
|
354
|
-
|
353
|
+
data_subject.df = replicated_df
|
355
354
|
end
|
356
355
|
|
357
356
|
def cumulative_dist_from_freq_table(table, freq_field: 'frequency')
|
@@ -383,28 +382,23 @@ module Remi::Testing::BusinessRules
|
|
383
382
|
|
384
383
|
def distribute_values(table)
|
385
384
|
cumulative_dist = cumulative_dist_from_freq_table(table)
|
386
|
-
generated_data = generate_values_from_cumulative_dist(
|
385
|
+
generated_data = generate_values_from_cumulative_dist(data_subject.df.size, cumulative_dist)
|
387
386
|
|
388
387
|
generated_data.each do |field_name, data_array|
|
389
388
|
vector_name = fields[field_name].field_name
|
390
|
-
|
389
|
+
data_subject.df[vector_name] = Daru::Vector.new(data_array, index: data_subject.df.index)
|
391
390
|
end
|
392
391
|
end
|
393
392
|
|
394
393
|
def freq_by(*field_names)
|
395
|
-
|
394
|
+
data_subject.df.group_by(field_names).size * 1.0 / data_subject.df.size
|
396
395
|
end
|
397
396
|
|
398
397
|
def unique_integer_field(field_name)
|
399
398
|
vector_name = fields[field_name].field_name
|
400
399
|
i = 0
|
401
|
-
|
400
|
+
data_subject.df[vector_name].recode! { |v| i += 1 }
|
402
401
|
end
|
403
|
-
|
404
|
-
def csv_options
|
405
|
-
@data_subject.csv_options
|
406
|
-
end
|
407
|
-
|
408
402
|
end
|
409
403
|
|
410
404
|
|