remi 0.2.42 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.yardopts +7 -0
- data/Gemfile +1 -1
- data/Gemfile.lock +13 -26
- data/README.md +1 -1
- data/features/step_definitions/remi_step.rb +33 -13
- data/features/sub_job_example.feature +24 -0
- data/features/sub_transform_example.feature +35 -0
- data/features/sub_transform_many_to_many.feature +49 -0
- data/features/support/env_app.rb +1 -1
- data/jobs/all_jobs_shared.rb +19 -16
- data/jobs/copy_source_job.rb +11 -9
- data/jobs/csv_file_target_job.rb +10 -9
- data/jobs/json_job.rb +18 -14
- data/jobs/metadata_job.rb +33 -28
- data/jobs/parameters_job.rb +14 -11
- data/jobs/sample_job.rb +106 -77
- data/jobs/sftp_file_target_job.rb +14 -13
- data/jobs/sub_job_example_job.rb +86 -0
- data/jobs/sub_transform_example_job.rb +43 -0
- data/jobs/sub_transform_many_to_many_job.rb +46 -0
- data/jobs/transforms/concatenate_job.rb +16 -12
- data/jobs/transforms/data_frame_sieve_job.rb +24 -19
- data/jobs/transforms/date_diff_job.rb +15 -11
- data/jobs/transforms/nvl_job.rb +16 -12
- data/jobs/transforms/parse_date_job.rb +17 -14
- data/jobs/transforms/partitioner_job.rb +27 -19
- data/jobs/transforms/prefix_job.rb +13 -10
- data/jobs/transforms/truncate_job.rb +14 -10
- data/jobs/transforms/truthy_job.rb +11 -8
- data/lib/remi.rb +25 -11
- data/lib/remi/data_frame.rb +4 -4
- data/lib/remi/data_frame/daru.rb +1 -37
- data/lib/remi/data_subject.rb +234 -48
- data/lib/remi/data_subjects/csv_file.rb +171 -0
- data/lib/remi/data_subjects/data_frame.rb +106 -0
- data/lib/remi/data_subjects/file_system.rb +115 -0
- data/lib/remi/data_subjects/local_file.rb +109 -0
- data/lib/remi/data_subjects/none.rb +31 -0
- data/lib/remi/data_subjects/postgres.rb +186 -0
- data/lib/remi/data_subjects/s3_file.rb +84 -0
- data/lib/remi/data_subjects/salesforce.rb +211 -0
- data/lib/remi/data_subjects/sftp_file.rb +196 -0
- data/lib/remi/data_subjects/sub_job.rb +50 -0
- data/lib/remi/dsl.rb +74 -0
- data/lib/remi/encoder.rb +45 -0
- data/lib/remi/extractor.rb +21 -0
- data/lib/remi/field_symbolizers.rb +1 -0
- data/lib/remi/job.rb +279 -113
- data/lib/remi/job/parameters.rb +90 -0
- data/lib/remi/job/sub_job.rb +35 -0
- data/lib/remi/job/transform.rb +165 -0
- data/lib/remi/loader.rb +22 -0
- data/lib/remi/monkeys/daru.rb +4 -0
- data/lib/remi/parser.rb +44 -0
- data/lib/remi/testing/business_rules.rb +17 -23
- data/lib/remi/testing/data_stub.rb +2 -2
- data/lib/remi/version.rb +1 -1
- data/remi.gemspec +3 -0
- data/spec/data_subject_spec.rb +475 -11
- data/spec/data_subjects/csv_file_spec.rb +69 -0
- data/spec/data_subjects/data_frame_spec.rb +52 -0
- data/spec/{extractor → data_subjects}/file_system_spec.rb +0 -0
- data/spec/{extractor → data_subjects}/local_file_spec.rb +0 -0
- data/spec/data_subjects/none_spec.rb +41 -0
- data/spec/data_subjects/postgres_spec.rb +80 -0
- data/spec/{extractor → data_subjects}/s3_file_spec.rb +0 -0
- data/spec/data_subjects/salesforce_spec.rb +117 -0
- data/spec/{extractor → data_subjects}/sftp_file_spec.rb +16 -0
- data/spec/data_subjects/sub_job_spec.rb +33 -0
- data/spec/encoder_spec.rb +38 -0
- data/spec/extractor_spec.rb +11 -0
- data/spec/fixtures/sf_bulk_helper_stubs.rb +443 -0
- data/spec/job/transform_spec.rb +257 -0
- data/spec/job_spec.rb +507 -0
- data/spec/loader_spec.rb +11 -0
- data/spec/parser_spec.rb +38 -0
- data/spec/sf_bulk_helper_spec.rb +117 -0
- data/spec/testing/data_stub_spec.rb +5 -3
- metadata +109 -27
- data/features/aggregate.feature +0 -42
- data/jobs/aggregate_job.rb +0 -31
- data/jobs/transforms/transform_jobs.rb +0 -4
- data/lib/remi/data_subject/csv_file.rb +0 -162
- data/lib/remi/data_subject/data_frame.rb +0 -52
- data/lib/remi/data_subject/postgres.rb +0 -134
- data/lib/remi/data_subject/salesforce.rb +0 -136
- data/lib/remi/data_subject/sftp_file.rb +0 -65
- data/lib/remi/extractor/file_system.rb +0 -92
- data/lib/remi/extractor/local_file.rb +0 -43
- data/lib/remi/extractor/s3_file.rb +0 -57
- data/lib/remi/extractor/sftp_file.rb +0 -83
- data/spec/data_subject/csv_file_spec.rb +0 -79
- data/spec/data_subject/data_frame.rb +0 -27
@@ -0,0 +1,90 @@
|
|
1
|
+
module Remi
|
2
|
+
class Job
|
3
|
+
# A job parameter adds flexiblity to defining job templates. An
|
4
|
+
# instance of Parameters contains a collection of parameters that
|
5
|
+
# are evaluatin in the context of a job. It functions very
|
6
|
+
# similarly to Rspec's #let, in that in can be defined using a
|
7
|
+
# block of code that is only evaluated the first time it is used,
|
8
|
+
# and cached for later use.
|
9
|
+
#
|
10
|
+
# Parameters should only be used in the context of a job.
|
11
|
+
# @example
|
12
|
+
# class MyJob < Remi::Job
|
13
|
+
# param(:my_param) { 'some parameter' }
|
14
|
+
# param :my_calculated_param do
|
15
|
+
# 1.upto(1000).size
|
16
|
+
# end
|
17
|
+
#
|
18
|
+
# transform :something do
|
19
|
+
# puts "my_param is #{job.params[:my_param]}"
|
20
|
+
# puts "my_calculated_param is #{job.params[:my_calculated_param]}"
|
21
|
+
# end
|
22
|
+
# end
|
23
|
+
#
|
24
|
+
# job1 = MyJob.new
|
25
|
+
# job1.execute
|
26
|
+
# #=> my_param is some parameter
|
27
|
+
# #=> my_calculated_param is 1000
|
28
|
+
#
|
29
|
+
# job2 = MyJob.new
|
30
|
+
# job2.params[:my_param] = 'override'
|
31
|
+
# job2.execute
|
32
|
+
# #=> my_param is override
|
33
|
+
# #=> my_calculated_param is 1000
|
34
|
+
#
|
35
|
+
# job3 = MyJob.new(my_param: 'constructor override', my_calculated_param: 322)
|
36
|
+
# job3.execute
|
37
|
+
# #=> my_param is constructor override
|
38
|
+
# #=> my_calculated_param is 322
|
39
|
+
class Parameters
|
40
|
+
def initialize(context=nil)
|
41
|
+
@context = context
|
42
|
+
@params = {}
|
43
|
+
end
|
44
|
+
|
45
|
+
# @return [Object] The context in which parameter blocks will be evaluated
|
46
|
+
attr_accessor :context
|
47
|
+
|
48
|
+
# Get the value of a parameter
|
49
|
+
#
|
50
|
+
# @param name [Symbol] The name of the parameter
|
51
|
+
#
|
52
|
+
# @return [Object] The value of the parameter
|
53
|
+
def [](name)
|
54
|
+
return send(name) if respond_to?(name)
|
55
|
+
raise ArgumentError, "Job parameter #{name} is not defined"
|
56
|
+
end
|
57
|
+
|
58
|
+
|
59
|
+
# Set the value of a parameter
|
60
|
+
#
|
61
|
+
# @param name [Symbol] The name of the parameter
|
62
|
+
# @param value [Object] The new value of the parameter
|
63
|
+
#
|
64
|
+
# @return [Object] The new value of the parameter
|
65
|
+
def []=(name, value)
|
66
|
+
__define__(name) { value } unless respond_to? name
|
67
|
+
@params[name] = value
|
68
|
+
end
|
69
|
+
|
70
|
+
# @return [Hash] The parameters as a hash
|
71
|
+
def to_h
|
72
|
+
@params
|
73
|
+
end
|
74
|
+
|
75
|
+
# @return [Job::Parameters] A clone of this parameter set
|
76
|
+
def clone
|
77
|
+
the_clone = super
|
78
|
+
the_clone.instance_variable_set(:@params, @params.dup)
|
79
|
+
the_clone
|
80
|
+
end
|
81
|
+
|
82
|
+
def __define__(name, &block)
|
83
|
+
@params[name] = nil
|
84
|
+
define_singleton_method name do
|
85
|
+
@params[name] ||= Remi::Dsl.dsl_return(self, @context, &block)
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
module Remi
|
2
|
+
class Job
|
3
|
+
class SubJob
|
4
|
+
def initialize(context=nil, name: 'UNDEFINED SubJob', **kargs, &block)
|
5
|
+
@context = context
|
6
|
+
@name = name
|
7
|
+
@block = block
|
8
|
+
end
|
9
|
+
|
10
|
+
attr_accessor :context, :name
|
11
|
+
|
12
|
+
def dsl_return
|
13
|
+
sub_job = Dsl.dsl_return(self, @context, &@block)
|
14
|
+
raise ArgumentError, "SubJob DSL must return a Remi::Job" unless sub_job.is_a? Job
|
15
|
+
sub_job
|
16
|
+
end
|
17
|
+
|
18
|
+
def job
|
19
|
+
@job ||= dsl_return
|
20
|
+
end
|
21
|
+
|
22
|
+
def fields(data_subject)
|
23
|
+
job.send(data_subject).dsl_eval.fields
|
24
|
+
end
|
25
|
+
|
26
|
+
def execute
|
27
|
+
job.execute
|
28
|
+
end
|
29
|
+
|
30
|
+
def execute_transforms
|
31
|
+
job.execute(:transforms)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,165 @@
|
|
1
|
+
module Remi
|
2
|
+
class Job
|
3
|
+
# A Transform contains a block of code that is executed in a context.
|
4
|
+
# Transforms are usually defined in a Job, according to the Job DSL.
|
5
|
+
#
|
6
|
+
# Transforms may optionally have a mapping defined that links a
|
7
|
+
# local definition of a data frame to a definition of the data
|
8
|
+
# frame in the associated context.
|
9
|
+
# @example
|
10
|
+
#
|
11
|
+
# # Transforms should typically be defined using the Job DSL
|
12
|
+
# job = MyJob.new
|
13
|
+
# tform = Job::Transform.new(job) do
|
14
|
+
# # ... stuff to do in the context of the job
|
15
|
+
# end
|
16
|
+
# tform.execute
|
17
|
+
class Transform
|
18
|
+
|
19
|
+
FieldMap = Struct.new(:from_subject, :to_subject, :field_from_to)
|
20
|
+
|
21
|
+
# Initializes a transform
|
22
|
+
#
|
23
|
+
# @param context [Object, Job] sets the context in which the block will be executed
|
24
|
+
# @param name [String, Symbol] optionally gives the transform a name
|
25
|
+
# @param kargs [Hash] any keyword arguments are accessable within the block as `#params` (e.g., `params[:my_custom_param]`)
|
26
|
+
# @param block [Proc] a block of code to execute in the context
|
27
|
+
def initialize(context, name: 'NOT DEFINED', **kargs, &block)
|
28
|
+
@context = context
|
29
|
+
@name = name
|
30
|
+
@block = block
|
31
|
+
params.merge! kargs
|
32
|
+
|
33
|
+
@sources = []
|
34
|
+
@targets = []
|
35
|
+
|
36
|
+
@field_maps = { sources: {}, targets: {} }
|
37
|
+
end
|
38
|
+
|
39
|
+
attr_accessor :context, :name, :sources, :targets, :field_maps
|
40
|
+
|
41
|
+
# Executes the transform block
|
42
|
+
# @return [Object] the context of the transform after executing
|
43
|
+
def execute
|
44
|
+
context.logger.info "Running transformation #{@name}"
|
45
|
+
Dsl.dsl_eval(self, @context, &@block)
|
46
|
+
end
|
47
|
+
|
48
|
+
# @return [Hash] the parameters defined during initialization of the transform
|
49
|
+
def params
|
50
|
+
@params ||= Hash.new { |_, key| raise ArgumentError, "Transform parameter #{key} is not defined" }
|
51
|
+
end
|
52
|
+
|
53
|
+
# Validates that a data source used in the transform has been defined
|
54
|
+
# @param name [Symbol] the name of a data source used in the transform
|
55
|
+
# @param fields [Array<Symbol>] a list of fields used by the transform for this data source
|
56
|
+
# @raise [ArgumentError] if the transform source is not defined
|
57
|
+
def source(name, fields)
|
58
|
+
raise NoMethodError, "Need to define a source mapping for #{name}" unless sources.include? name
|
59
|
+
raise ArgumentError, "Need to map fields to source #{name} (#{fields})" unless (fields - field_maps[:sources][name].field_from_to.values).empty?
|
60
|
+
end
|
61
|
+
|
62
|
+
# Validates that a data target used in the transform has been defined
|
63
|
+
# @param name [Symbol] the name of a data target used in the transform
|
64
|
+
# @param fields [Array<Symbol>] a list of fields used by the transform for this data target
|
65
|
+
# @raise [ArgumentError] if the transform target is not defined
|
66
|
+
def target(name, fields)
|
67
|
+
raise NoMethodError, "Need to define a target mapping for #{name}" unless targets.include? name
|
68
|
+
raise ArgumentError, "Need to map fields to target #{name} (#{fields})" unless (fields - field_maps[:targets][name].field_from_to.keys).empty?
|
69
|
+
end
|
70
|
+
|
71
|
+
# Maps data sources and fields from the transform context to the local transform
|
72
|
+
# @param from_source [Symbol] name of the source data in the context
|
73
|
+
# @param to_source [Symbol] name of the source data local to the transform
|
74
|
+
# @param field_map [Hash] mapping of the key names from the context source to the local source
|
75
|
+
def map_source_fields(from_source, to_source, field_map)
|
76
|
+
sources << to_source unless sources.include? to_source
|
77
|
+
|
78
|
+
job_ds = context.send(from_source)
|
79
|
+
sub_trans_ds = Remi::DataSubject.new(name: to_source)
|
80
|
+
define_singleton_method(to_source) { sub_trans_ds }
|
81
|
+
|
82
|
+
field_maps[:sources][to_source] = FieldMap.new(job_ds, send(to_source), field_map)
|
83
|
+
end
|
84
|
+
|
85
|
+
# Maps data targets and fields from the local tarnsform to the transform context
|
86
|
+
# @param from_target [Symbol] name of the target data local to the transform
|
87
|
+
# @param to_target [Symbol] name of the target data in the context
|
88
|
+
# @param field_map [Hash] mapping of the key names from the local transform target to the context target
|
89
|
+
def map_target_fields(from_target, to_target, field_map)
|
90
|
+
targets << from_target unless targets.include? from_target
|
91
|
+
|
92
|
+
job_ds = context.send(to_target)
|
93
|
+
sub_trans_ds = Remi::DataSubject.new
|
94
|
+
define_singleton_method(from_target) { sub_trans_ds }
|
95
|
+
|
96
|
+
field_maps[:targets][from_target] = FieldMap.new(send(from_target), job_ds, field_map)
|
97
|
+
end
|
98
|
+
|
99
|
+
# Imports another transform to be executed as part of this transform. The block
|
100
|
+
# is used to perform any source/target field mapping.
|
101
|
+
#
|
102
|
+
# @param sub_transform [Job::Transform] the transform to import into this one
|
103
|
+
# @param block [Proc] a block of code to be executed prior to the execution of the
|
104
|
+
# imported transform. This is where field mapping would be defined.
|
105
|
+
# @example
|
106
|
+
#
|
107
|
+
# sub_transform = Job::Transform.new('arbitrary') do
|
108
|
+
# source :sub_transform_source, [] # validate that this source has been defined
|
109
|
+
# # do stuff to sub_transform_source here
|
110
|
+
# end
|
111
|
+
#
|
112
|
+
# job = MyJob.new
|
113
|
+
# my_transform = Job::Transform.new(job) do
|
114
|
+
# import sub_transform do
|
115
|
+
# map_source_fields :some_method_in_my_job, :sub_sub_transform_source, { :job_id => :sub_transform_id }
|
116
|
+
# end
|
117
|
+
# end
|
118
|
+
def import(sub_transform, **kargs, &block)
|
119
|
+
sub_transform.context = context
|
120
|
+
sub_transform.params.merge! kargs
|
121
|
+
Dsl.dsl_eval(sub_transform, context, &block)
|
122
|
+
|
123
|
+
sub_transform.map_inputs
|
124
|
+
sub_transform.execute
|
125
|
+
sub_transform.map_outputs
|
126
|
+
end
|
127
|
+
|
128
|
+
|
129
|
+
|
130
|
+
protected
|
131
|
+
|
132
|
+
def map_inputs
|
133
|
+
sources.each do |source_input|
|
134
|
+
field_map = field_maps[:sources][source_input]
|
135
|
+
job_ds = field_map.from_subject
|
136
|
+
sub_trans_ds = field_map.to_subject
|
137
|
+
fields_to_map = field_map.field_from_to.keys
|
138
|
+
|
139
|
+
fields_to_map.each do |job_field|
|
140
|
+
sub_trans_field = field_map.field_from_to[job_field]
|
141
|
+
sub_trans_ds.fields[sub_trans_field] = job_ds.fields[job_field]
|
142
|
+
|
143
|
+
sub_trans_ds.df[sub_trans_field] = job_ds.df[job_field]
|
144
|
+
end
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
def map_outputs
|
149
|
+
targets.each do |target_output|
|
150
|
+
field_map = field_maps[:targets][target_output]
|
151
|
+
job_ds = field_map.to_subject
|
152
|
+
sub_trans_ds = field_map.from_subject
|
153
|
+
fields_to_map = field_map.field_from_to.keys
|
154
|
+
|
155
|
+
fields_to_map.each do |sub_trans_field|
|
156
|
+
job_field = field_map.field_from_to[sub_trans_field]
|
157
|
+
job_ds.fields[job_field].merge! sub_trans_ds.fields[sub_trans_field]
|
158
|
+
job_ds.df[job_field] = sub_trans_ds.df[sub_trans_field]
|
159
|
+
end
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
end
|
164
|
+
end
|
165
|
+
end
|
data/lib/remi/loader.rb
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
module Remi
|
2
|
+
# A loader is an object meant to load data into a some external system.
|
3
|
+
# This is a parent class meant to be inherited by child classes that
|
4
|
+
# define specific ways to load data.
|
5
|
+
class Loader
|
6
|
+
|
7
|
+
def initialize(*args, logger: Remi::Settings.logger, **kargs, &block)
|
8
|
+
@logger = logger
|
9
|
+
end
|
10
|
+
|
11
|
+
attr_accessor :logger
|
12
|
+
|
13
|
+
# Any child classes need to define a load method that loads data from
|
14
|
+
# the given dataframe into the target system.
|
15
|
+
# @param data [Remi::DataFrame] Data that has been encoded appropriately to be loaded into the target
|
16
|
+
# @return [true] On success
|
17
|
+
def load(data)
|
18
|
+
raise NoMethodError, "#{__method__} not defined for #{self.class.name}"
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
22
|
+
end
|
data/lib/remi/parser.rb
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
module Remi
|
2
|
+
# A parser is an object that converts data returned from an
|
3
|
+
# Remi::Extractor into a dataframe. This is a parent class meant to be
|
4
|
+
# inherited by child classes that define specific ways to parse
|
5
|
+
# data.
|
6
|
+
class Parser
|
7
|
+
|
8
|
+
# @param context [Object] The context (e.g., DataSource) for the parser (default: `nil`)
|
9
|
+
# @param field_symbolizer [Proc] The field symbolizer to use for this parser
|
10
|
+
# @param fields [Remi::Fields] A hash of field metadata to be used by the parser
|
11
|
+
def initialize(*args, context: nil, field_symbolizer: Remi::FieldSymbolizers[:standard], fields: Remi::Fields.new({}), logger: Remi::Settings.logger, **kargs, &block)
|
12
|
+
@context = context
|
13
|
+
@field_symbolizer = field_symbolizer
|
14
|
+
|
15
|
+
@fields = fields
|
16
|
+
@logger = logger
|
17
|
+
end
|
18
|
+
|
19
|
+
attr_accessor :context
|
20
|
+
attr_accessor :logger
|
21
|
+
attr_writer :field_symbolizer
|
22
|
+
attr_writer :fields
|
23
|
+
|
24
|
+
# Any child classes need to define a parse method that converts extracted data
|
25
|
+
# into a dataframe.
|
26
|
+
# @param data [Object] Extracted data that needs to be parsed
|
27
|
+
# @return [Remi::DataFrame] The data converted into a dataframe
|
28
|
+
def parse(data)
|
29
|
+
raise NoMethodError, "#{__method__} not defined for #{self.class.name}"
|
30
|
+
end
|
31
|
+
|
32
|
+
# @return [Proc] The field symbolizer (uses the context field symbolizer if defined)
|
33
|
+
def field_symbolizer
|
34
|
+
return context.field_symbolizer if context.respond_to? :field_symbolizer
|
35
|
+
@field_symbolizer
|
36
|
+
end
|
37
|
+
|
38
|
+
# @return [Remi::Fields] The fields (uses the context fields if defined)
|
39
|
+
def fields
|
40
|
+
return context.fields if context if context.respond_to? :fields
|
41
|
+
@fields
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -175,7 +175,7 @@ module Remi::Testing::BusinessRules
|
|
175
175
|
end
|
176
176
|
|
177
177
|
def run_transforms
|
178
|
-
@job.
|
178
|
+
@job.execute(:transforms)
|
179
179
|
end
|
180
180
|
end
|
181
181
|
|
@@ -262,7 +262,6 @@ module Remi::Testing::BusinessRules
|
|
262
262
|
end
|
263
263
|
|
264
264
|
attr_reader :name
|
265
|
-
attr_reader :data_subject
|
266
265
|
|
267
266
|
def add_field(field_name)
|
268
267
|
@fields.add_field(self, field_name)
|
@@ -277,17 +276,17 @@ module Remi::Testing::BusinessRules
|
|
277
276
|
end
|
278
277
|
|
279
278
|
def size
|
280
|
-
|
279
|
+
data_subject.df.size
|
281
280
|
end
|
282
281
|
|
283
|
-
def
|
284
|
-
@data_subject.
|
282
|
+
def data_subject
|
283
|
+
@data_subject.dsl_eval
|
285
284
|
end
|
286
285
|
|
287
286
|
# Public: Converts the data subject to a hash where the keys are the table
|
288
287
|
# columns and the values are an array for the value of column for each row.
|
289
288
|
def column_hash
|
290
|
-
|
289
|
+
data_subject.df.to_h.reduce({}) do |h, (k,v)|
|
291
290
|
h[k.symbolize] = v.to_a
|
292
291
|
h
|
293
292
|
end
|
@@ -297,7 +296,7 @@ module Remi::Testing::BusinessRules
|
|
297
296
|
# Need more robust duping to make that feasible.
|
298
297
|
# Don't use results for anything more than size.
|
299
298
|
def where(field_name, operation)
|
300
|
-
|
299
|
+
data_subject.df.where(data_subject.df[field_name.symbolize(data_subject.field_symbolizer)].recode { |v| operation.call(v) })
|
301
300
|
end
|
302
301
|
|
303
302
|
def where_is(field_name, value)
|
@@ -323,11 +322,11 @@ module Remi::Testing::BusinessRules
|
|
323
322
|
|
324
323
|
|
325
324
|
def stub_data
|
326
|
-
|
325
|
+
data_subject.stub_df if data_subject.respond_to? :stub_df
|
327
326
|
end
|
328
327
|
|
329
328
|
def example_to_df(example)
|
330
|
-
df = example.to_df(
|
329
|
+
df = example.to_df(data_subject.df.row[0].to_h, field_symbolizer: data_subject.field_symbolizer)
|
331
330
|
data_subject.fields.each do |vector, metadata|
|
332
331
|
if metadata[:type] == :json
|
333
332
|
df[vector].recode! { |v| JSON.parse(v) rescue v }
|
@@ -338,20 +337,20 @@ module Remi::Testing::BusinessRules
|
|
338
337
|
|
339
338
|
def stub_data_with(example)
|
340
339
|
stub_data
|
341
|
-
|
340
|
+
data_subject.df = example_to_df(example)
|
342
341
|
end
|
343
342
|
|
344
343
|
def append_data_with(example)
|
345
|
-
|
344
|
+
data_subject.df = data_subject.df.concat example_to_df(example)
|
346
345
|
end
|
347
346
|
|
348
347
|
|
349
348
|
def replicate_rows(n_rows)
|
350
|
-
replicated_df = Daru::DataFrame.new([], order:
|
351
|
-
|
349
|
+
replicated_df = Daru::DataFrame.new([], order: data_subject.df.vectors.to_a)
|
350
|
+
data_subject.df.each do |vector|
|
352
351
|
replicated_df[vector.name] = vector.to_a * n_rows
|
353
352
|
end
|
354
|
-
|
353
|
+
data_subject.df = replicated_df
|
355
354
|
end
|
356
355
|
|
357
356
|
def cumulative_dist_from_freq_table(table, freq_field: 'frequency')
|
@@ -383,28 +382,23 @@ module Remi::Testing::BusinessRules
|
|
383
382
|
|
384
383
|
def distribute_values(table)
|
385
384
|
cumulative_dist = cumulative_dist_from_freq_table(table)
|
386
|
-
generated_data = generate_values_from_cumulative_dist(
|
385
|
+
generated_data = generate_values_from_cumulative_dist(data_subject.df.size, cumulative_dist)
|
387
386
|
|
388
387
|
generated_data.each do |field_name, data_array|
|
389
388
|
vector_name = fields[field_name].field_name
|
390
|
-
|
389
|
+
data_subject.df[vector_name] = Daru::Vector.new(data_array, index: data_subject.df.index)
|
391
390
|
end
|
392
391
|
end
|
393
392
|
|
394
393
|
def freq_by(*field_names)
|
395
|
-
|
394
|
+
data_subject.df.group_by(field_names).size * 1.0 / data_subject.df.size
|
396
395
|
end
|
397
396
|
|
398
397
|
def unique_integer_field(field_name)
|
399
398
|
vector_name = fields[field_name].field_name
|
400
399
|
i = 0
|
401
|
-
|
400
|
+
data_subject.df[vector_name].recode! { |v| i += 1 }
|
402
401
|
end
|
403
|
-
|
404
|
-
def csv_options
|
405
|
-
@data_subject.csv_options
|
406
|
-
end
|
407
|
-
|
408
402
|
end
|
409
403
|
|
410
404
|
|