remi 0.2.42 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. checksums.yaml +4 -4
  2. data/.yardopts +7 -0
  3. data/Gemfile +1 -1
  4. data/Gemfile.lock +13 -26
  5. data/README.md +1 -1
  6. data/features/step_definitions/remi_step.rb +33 -13
  7. data/features/sub_job_example.feature +24 -0
  8. data/features/sub_transform_example.feature +35 -0
  9. data/features/sub_transform_many_to_many.feature +49 -0
  10. data/features/support/env_app.rb +1 -1
  11. data/jobs/all_jobs_shared.rb +19 -16
  12. data/jobs/copy_source_job.rb +11 -9
  13. data/jobs/csv_file_target_job.rb +10 -9
  14. data/jobs/json_job.rb +18 -14
  15. data/jobs/metadata_job.rb +33 -28
  16. data/jobs/parameters_job.rb +14 -11
  17. data/jobs/sample_job.rb +106 -77
  18. data/jobs/sftp_file_target_job.rb +14 -13
  19. data/jobs/sub_job_example_job.rb +86 -0
  20. data/jobs/sub_transform_example_job.rb +43 -0
  21. data/jobs/sub_transform_many_to_many_job.rb +46 -0
  22. data/jobs/transforms/concatenate_job.rb +16 -12
  23. data/jobs/transforms/data_frame_sieve_job.rb +24 -19
  24. data/jobs/transforms/date_diff_job.rb +15 -11
  25. data/jobs/transforms/nvl_job.rb +16 -12
  26. data/jobs/transforms/parse_date_job.rb +17 -14
  27. data/jobs/transforms/partitioner_job.rb +27 -19
  28. data/jobs/transforms/prefix_job.rb +13 -10
  29. data/jobs/transforms/truncate_job.rb +14 -10
  30. data/jobs/transforms/truthy_job.rb +11 -8
  31. data/lib/remi.rb +25 -11
  32. data/lib/remi/data_frame.rb +4 -4
  33. data/lib/remi/data_frame/daru.rb +1 -37
  34. data/lib/remi/data_subject.rb +234 -48
  35. data/lib/remi/data_subjects/csv_file.rb +171 -0
  36. data/lib/remi/data_subjects/data_frame.rb +106 -0
  37. data/lib/remi/data_subjects/file_system.rb +115 -0
  38. data/lib/remi/data_subjects/local_file.rb +109 -0
  39. data/lib/remi/data_subjects/none.rb +31 -0
  40. data/lib/remi/data_subjects/postgres.rb +186 -0
  41. data/lib/remi/data_subjects/s3_file.rb +84 -0
  42. data/lib/remi/data_subjects/salesforce.rb +211 -0
  43. data/lib/remi/data_subjects/sftp_file.rb +196 -0
  44. data/lib/remi/data_subjects/sub_job.rb +50 -0
  45. data/lib/remi/dsl.rb +74 -0
  46. data/lib/remi/encoder.rb +45 -0
  47. data/lib/remi/extractor.rb +21 -0
  48. data/lib/remi/field_symbolizers.rb +1 -0
  49. data/lib/remi/job.rb +279 -113
  50. data/lib/remi/job/parameters.rb +90 -0
  51. data/lib/remi/job/sub_job.rb +35 -0
  52. data/lib/remi/job/transform.rb +165 -0
  53. data/lib/remi/loader.rb +22 -0
  54. data/lib/remi/monkeys/daru.rb +4 -0
  55. data/lib/remi/parser.rb +44 -0
  56. data/lib/remi/testing/business_rules.rb +17 -23
  57. data/lib/remi/testing/data_stub.rb +2 -2
  58. data/lib/remi/version.rb +1 -1
  59. data/remi.gemspec +3 -0
  60. data/spec/data_subject_spec.rb +475 -11
  61. data/spec/data_subjects/csv_file_spec.rb +69 -0
  62. data/spec/data_subjects/data_frame_spec.rb +52 -0
  63. data/spec/{extractor → data_subjects}/file_system_spec.rb +0 -0
  64. data/spec/{extractor → data_subjects}/local_file_spec.rb +0 -0
  65. data/spec/data_subjects/none_spec.rb +41 -0
  66. data/spec/data_subjects/postgres_spec.rb +80 -0
  67. data/spec/{extractor → data_subjects}/s3_file_spec.rb +0 -0
  68. data/spec/data_subjects/salesforce_spec.rb +117 -0
  69. data/spec/{extractor → data_subjects}/sftp_file_spec.rb +16 -0
  70. data/spec/data_subjects/sub_job_spec.rb +33 -0
  71. data/spec/encoder_spec.rb +38 -0
  72. data/spec/extractor_spec.rb +11 -0
  73. data/spec/fixtures/sf_bulk_helper_stubs.rb +443 -0
  74. data/spec/job/transform_spec.rb +257 -0
  75. data/spec/job_spec.rb +507 -0
  76. data/spec/loader_spec.rb +11 -0
  77. data/spec/parser_spec.rb +38 -0
  78. data/spec/sf_bulk_helper_spec.rb +117 -0
  79. data/spec/testing/data_stub_spec.rb +5 -3
  80. metadata +109 -27
  81. data/features/aggregate.feature +0 -42
  82. data/jobs/aggregate_job.rb +0 -31
  83. data/jobs/transforms/transform_jobs.rb +0 -4
  84. data/lib/remi/data_subject/csv_file.rb +0 -162
  85. data/lib/remi/data_subject/data_frame.rb +0 -52
  86. data/lib/remi/data_subject/postgres.rb +0 -134
  87. data/lib/remi/data_subject/salesforce.rb +0 -136
  88. data/lib/remi/data_subject/sftp_file.rb +0 -65
  89. data/lib/remi/extractor/file_system.rb +0 -92
  90. data/lib/remi/extractor/local_file.rb +0 -43
  91. data/lib/remi/extractor/s3_file.rb +0 -57
  92. data/lib/remi/extractor/sftp_file.rb +0 -83
  93. data/spec/data_subject/csv_file_spec.rb +0 -79
  94. data/spec/data_subject/data_frame.rb +0 -27
@@ -0,0 +1,90 @@
1
+ module Remi
2
+ class Job
3
+ # A job parameter adds flexiblity to defining job templates. An
4
+ # instance of Parameters contains a collection of parameters that
5
+ # are evaluatin in the context of a job. It functions very
6
+ # similarly to Rspec's #let, in that in can be defined using a
7
+ # block of code that is only evaluated the first time it is used,
8
+ # and cached for later use.
9
+ #
10
+ # Parameters should only be used in the context of a job.
11
+ # @example
12
+ # class MyJob < Remi::Job
13
+ # param(:my_param) { 'some parameter' }
14
+ # param :my_calculated_param do
15
+ # 1.upto(1000).size
16
+ # end
17
+ #
18
+ # transform :something do
19
+ # puts "my_param is #{job.params[:my_param]}"
20
+ # puts "my_calculated_param is #{job.params[:my_calculated_param]}"
21
+ # end
22
+ # end
23
+ #
24
+ # job1 = MyJob.new
25
+ # job1.execute
26
+ # #=> my_param is some parameter
27
+ # #=> my_calculated_param is 1000
28
+ #
29
+ # job2 = MyJob.new
30
+ # job2.params[:my_param] = 'override'
31
+ # job2.execute
32
+ # #=> my_param is override
33
+ # #=> my_calculated_param is 1000
34
+ #
35
+ # job3 = MyJob.new(my_param: 'constructor override', my_calculated_param: 322)
36
+ # job3.execute
37
+ # #=> my_param is constructor override
38
+ # #=> my_calculated_param is 322
39
+ class Parameters
40
+ def initialize(context=nil)
41
+ @context = context
42
+ @params = {}
43
+ end
44
+
45
+ # @return [Object] The context in which parameter blocks will be evaluated
46
+ attr_accessor :context
47
+
48
+ # Get the value of a parameter
49
+ #
50
+ # @param name [Symbol] The name of the parameter
51
+ #
52
+ # @return [Object] The value of the parameter
53
+ def [](name)
54
+ return send(name) if respond_to?(name)
55
+ raise ArgumentError, "Job parameter #{name} is not defined"
56
+ end
57
+
58
+
59
+ # Set the value of a parameter
60
+ #
61
+ # @param name [Symbol] The name of the parameter
62
+ # @param value [Object] The new value of the parameter
63
+ #
64
+ # @return [Object] The new value of the parameter
65
+ def []=(name, value)
66
+ __define__(name) { value } unless respond_to? name
67
+ @params[name] = value
68
+ end
69
+
70
+ # @return [Hash] The parameters as a hash
71
+ def to_h
72
+ @params
73
+ end
74
+
75
+ # @return [Job::Parameters] A clone of this parameter set
76
+ def clone
77
+ the_clone = super
78
+ the_clone.instance_variable_set(:@params, @params.dup)
79
+ the_clone
80
+ end
81
+
82
+ def __define__(name, &block)
83
+ @params[name] = nil
84
+ define_singleton_method name do
85
+ @params[name] ||= Remi::Dsl.dsl_return(self, @context, &block)
86
+ end
87
+ end
88
+ end
89
+ end
90
+ end
@@ -0,0 +1,35 @@
1
+ module Remi
2
+ class Job
3
+ class SubJob
4
+ def initialize(context=nil, name: 'UNDEFINED SubJob', **kargs, &block)
5
+ @context = context
6
+ @name = name
7
+ @block = block
8
+ end
9
+
10
+ attr_accessor :context, :name
11
+
12
+ def dsl_return
13
+ sub_job = Dsl.dsl_return(self, @context, &@block)
14
+ raise ArgumentError, "SubJob DSL must return a Remi::Job" unless sub_job.is_a? Job
15
+ sub_job
16
+ end
17
+
18
+ def job
19
+ @job ||= dsl_return
20
+ end
21
+
22
+ def fields(data_subject)
23
+ job.send(data_subject).dsl_eval.fields
24
+ end
25
+
26
+ def execute
27
+ job.execute
28
+ end
29
+
30
+ def execute_transforms
31
+ job.execute(:transforms)
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,165 @@
1
+ module Remi
2
+ class Job
3
+ # A Transform contains a block of code that is executed in a context.
4
+ # Transforms are usually defined in a Job, according to the Job DSL.
5
+ #
6
+ # Transforms may optionally have a mapping defined that links a
7
+ # local definition of a data frame to a definition of the data
8
+ # frame in the associated context.
9
+ # @example
10
+ #
11
+ # # Transforms should typically be defined using the Job DSL
12
+ # job = MyJob.new
13
+ # tform = Job::Transform.new(job) do
14
+ # # ... stuff to do in the context of the job
15
+ # end
16
+ # tform.execute
17
+ class Transform
18
+
19
+ FieldMap = Struct.new(:from_subject, :to_subject, :field_from_to)
20
+
21
+ # Initializes a transform
22
+ #
23
+ # @param context [Object, Job] sets the context in which the block will be executed
24
+ # @param name [String, Symbol] optionally gives the transform a name
25
+ # @param kargs [Hash] any keyword arguments are accessable within the block as `#params` (e.g., `params[:my_custom_param]`)
26
+ # @param block [Proc] a block of code to execute in the context
27
+ def initialize(context, name: 'NOT DEFINED', **kargs, &block)
28
+ @context = context
29
+ @name = name
30
+ @block = block
31
+ params.merge! kargs
32
+
33
+ @sources = []
34
+ @targets = []
35
+
36
+ @field_maps = { sources: {}, targets: {} }
37
+ end
38
+
39
+ attr_accessor :context, :name, :sources, :targets, :field_maps
40
+
41
+ # Executes the transform block
42
+ # @return [Object] the context of the transform after executing
43
+ def execute
44
+ context.logger.info "Running transformation #{@name}"
45
+ Dsl.dsl_eval(self, @context, &@block)
46
+ end
47
+
48
+ # @return [Hash] the parameters defined during initialization of the transform
49
+ def params
50
+ @params ||= Hash.new { |_, key| raise ArgumentError, "Transform parameter #{key} is not defined" }
51
+ end
52
+
53
+ # Validates that a data source used in the transform has been defined
54
+ # @param name [Symbol] the name of a data source used in the transform
55
+ # @param fields [Array<Symbol>] a list of fields used by the transform for this data source
56
+ # @raise [ArgumentError] if the transform source is not defined
57
+ def source(name, fields)
58
+ raise NoMethodError, "Need to define a source mapping for #{name}" unless sources.include? name
59
+ raise ArgumentError, "Need to map fields to source #{name} (#{fields})" unless (fields - field_maps[:sources][name].field_from_to.values).empty?
60
+ end
61
+
62
+ # Validates that a data target used in the transform has been defined
63
+ # @param name [Symbol] the name of a data target used in the transform
64
+ # @param fields [Array<Symbol>] a list of fields used by the transform for this data target
65
+ # @raise [ArgumentError] if the transform target is not defined
66
+ def target(name, fields)
67
+ raise NoMethodError, "Need to define a target mapping for #{name}" unless targets.include? name
68
+ raise ArgumentError, "Need to map fields to target #{name} (#{fields})" unless (fields - field_maps[:targets][name].field_from_to.keys).empty?
69
+ end
70
+
71
+ # Maps data sources and fields from the transform context to the local transform
72
+ # @param from_source [Symbol] name of the source data in the context
73
+ # @param to_source [Symbol] name of the source data local to the transform
74
+ # @param field_map [Hash] mapping of the key names from the context source to the local source
75
+ def map_source_fields(from_source, to_source, field_map)
76
+ sources << to_source unless sources.include? to_source
77
+
78
+ job_ds = context.send(from_source)
79
+ sub_trans_ds = Remi::DataSubject.new(name: to_source)
80
+ define_singleton_method(to_source) { sub_trans_ds }
81
+
82
+ field_maps[:sources][to_source] = FieldMap.new(job_ds, send(to_source), field_map)
83
+ end
84
+
85
+ # Maps data targets and fields from the local tarnsform to the transform context
86
+ # @param from_target [Symbol] name of the target data local to the transform
87
+ # @param to_target [Symbol] name of the target data in the context
88
+ # @param field_map [Hash] mapping of the key names from the local transform target to the context target
89
+ def map_target_fields(from_target, to_target, field_map)
90
+ targets << from_target unless targets.include? from_target
91
+
92
+ job_ds = context.send(to_target)
93
+ sub_trans_ds = Remi::DataSubject.new
94
+ define_singleton_method(from_target) { sub_trans_ds }
95
+
96
+ field_maps[:targets][from_target] = FieldMap.new(send(from_target), job_ds, field_map)
97
+ end
98
+
99
+ # Imports another transform to be executed as part of this transform. The block
100
+ # is used to perform any source/target field mapping.
101
+ #
102
+ # @param sub_transform [Job::Transform] the transform to import into this one
103
+ # @param block [Proc] a block of code to be executed prior to the execution of the
104
+ # imported transform. This is where field mapping would be defined.
105
+ # @example
106
+ #
107
+ # sub_transform = Job::Transform.new('arbitrary') do
108
+ # source :sub_transform_source, [] # validate that this source has been defined
109
+ # # do stuff to sub_transform_source here
110
+ # end
111
+ #
112
+ # job = MyJob.new
113
+ # my_transform = Job::Transform.new(job) do
114
+ # import sub_transform do
115
+ # map_source_fields :some_method_in_my_job, :sub_sub_transform_source, { :job_id => :sub_transform_id }
116
+ # end
117
+ # end
118
+ def import(sub_transform, **kargs, &block)
119
+ sub_transform.context = context
120
+ sub_transform.params.merge! kargs
121
+ Dsl.dsl_eval(sub_transform, context, &block)
122
+
123
+ sub_transform.map_inputs
124
+ sub_transform.execute
125
+ sub_transform.map_outputs
126
+ end
127
+
128
+
129
+
130
+ protected
131
+
132
+ def map_inputs
133
+ sources.each do |source_input|
134
+ field_map = field_maps[:sources][source_input]
135
+ job_ds = field_map.from_subject
136
+ sub_trans_ds = field_map.to_subject
137
+ fields_to_map = field_map.field_from_to.keys
138
+
139
+ fields_to_map.each do |job_field|
140
+ sub_trans_field = field_map.field_from_to[job_field]
141
+ sub_trans_ds.fields[sub_trans_field] = job_ds.fields[job_field]
142
+
143
+ sub_trans_ds.df[sub_trans_field] = job_ds.df[job_field]
144
+ end
145
+ end
146
+ end
147
+
148
+ def map_outputs
149
+ targets.each do |target_output|
150
+ field_map = field_maps[:targets][target_output]
151
+ job_ds = field_map.to_subject
152
+ sub_trans_ds = field_map.from_subject
153
+ fields_to_map = field_map.field_from_to.keys
154
+
155
+ fields_to_map.each do |sub_trans_field|
156
+ job_field = field_map.field_from_to[sub_trans_field]
157
+ job_ds.fields[job_field].merge! sub_trans_ds.fields[sub_trans_field]
158
+ job_ds.df[job_field] = sub_trans_ds.df[sub_trans_field]
159
+ end
160
+ end
161
+ end
162
+
163
+ end
164
+ end
165
+ end
@@ -0,0 +1,22 @@
1
+ module Remi
2
+ # A loader is an object meant to load data into a some external system.
3
+ # This is a parent class meant to be inherited by child classes that
4
+ # define specific ways to load data.
5
+ class Loader
6
+
7
+ def initialize(*args, logger: Remi::Settings.logger, **kargs, &block)
8
+ @logger = logger
9
+ end
10
+
11
+ attr_accessor :logger
12
+
13
+ # Any child classes need to define a load method that loads data from
14
+ # the given dataframe into the target system.
15
+ # @param data [Remi::DataFrame] Data that has been encoded appropriately to be loaded into the target
16
+ # @return [true] On success
17
+ def load(data)
18
+ raise NoMethodError, "#{__method__} not defined for #{self.class.name}"
19
+ end
20
+
21
+ end
22
+ end
@@ -0,0 +1,4 @@
1
+ # Needed to fix issue in Daru 0.1.4.1
2
+ class Daru::DataFrame
3
+ remove_method :to_hash
4
+ end
@@ -0,0 +1,44 @@
1
+ module Remi
2
+ # A parser is an object that converts data returned from an
3
+ # Remi::Extractor into a dataframe. This is a parent class meant to be
4
+ # inherited by child classes that define specific ways to parse
5
+ # data.
6
+ class Parser
7
+
8
+ # @param context [Object] The context (e.g., DataSource) for the parser (default: `nil`)
9
+ # @param field_symbolizer [Proc] The field symbolizer to use for this parser
10
+ # @param fields [Remi::Fields] A hash of field metadata to be used by the parser
11
+ def initialize(*args, context: nil, field_symbolizer: Remi::FieldSymbolizers[:standard], fields: Remi::Fields.new({}), logger: Remi::Settings.logger, **kargs, &block)
12
+ @context = context
13
+ @field_symbolizer = field_symbolizer
14
+
15
+ @fields = fields
16
+ @logger = logger
17
+ end
18
+
19
+ attr_accessor :context
20
+ attr_accessor :logger
21
+ attr_writer :field_symbolizer
22
+ attr_writer :fields
23
+
24
+ # Any child classes need to define a parse method that converts extracted data
25
+ # into a dataframe.
26
+ # @param data [Object] Extracted data that needs to be parsed
27
+ # @return [Remi::DataFrame] The data converted into a dataframe
28
+ def parse(data)
29
+ raise NoMethodError, "#{__method__} not defined for #{self.class.name}"
30
+ end
31
+
32
+ # @return [Proc] The field symbolizer (uses the context field symbolizer if defined)
33
+ def field_symbolizer
34
+ return context.field_symbolizer if context.respond_to? :field_symbolizer
35
+ @field_symbolizer
36
+ end
37
+
38
+ # @return [Remi::Fields] The fields (uses the context fields if defined)
39
+ def fields
40
+ return context.fields if context if context.respond_to? :fields
41
+ @fields
42
+ end
43
+ end
44
+ end
@@ -175,7 +175,7 @@ module Remi::Testing::BusinessRules
175
175
  end
176
176
 
177
177
  def run_transforms
178
- @job.run_all_transforms
178
+ @job.execute(:transforms)
179
179
  end
180
180
  end
181
181
 
@@ -262,7 +262,6 @@ module Remi::Testing::BusinessRules
262
262
  end
263
263
 
264
264
  attr_reader :name
265
- attr_reader :data_subject
266
265
 
267
266
  def add_field(field_name)
268
267
  @fields.add_field(self, field_name)
@@ -277,17 +276,17 @@ module Remi::Testing::BusinessRules
277
276
  end
278
277
 
279
278
  def size
280
- @data_subject.df.size
279
+ data_subject.df.size
281
280
  end
282
281
 
283
- def get_attrib(name)
284
- @data_subject.send(name)
282
+ def data_subject
283
+ @data_subject.dsl_eval
285
284
  end
286
285
 
287
286
  # Public: Converts the data subject to a hash where the keys are the table
288
287
  # columns and the values are an array for the value of column for each row.
289
288
  def column_hash
290
- @data_subject.df.to_h.reduce({}) do |h, (k,v)|
289
+ data_subject.df.to_h.reduce({}) do |h, (k,v)|
291
290
  h[k.symbolize] = v.to_a
292
291
  h
293
292
  end
@@ -297,7 +296,7 @@ module Remi::Testing::BusinessRules
297
296
  # Need more robust duping to make that feasible.
298
297
  # Don't use results for anything more than size.
299
298
  def where(field_name, operation)
300
- @data_subject.df.where(@data_subject.df[field_name.symbolize(@data_subject.field_symbolizer)].recode { |v| operation.call(v) })
299
+ data_subject.df.where(data_subject.df[field_name.symbolize(data_subject.field_symbolizer)].recode { |v| operation.call(v) })
301
300
  end
302
301
 
303
302
  def where_is(field_name, value)
@@ -323,11 +322,11 @@ module Remi::Testing::BusinessRules
323
322
 
324
323
 
325
324
  def stub_data
326
- @data_subject.stub_df if @data_subject.respond_to? :stub_df
325
+ data_subject.stub_df if data_subject.respond_to? :stub_df
327
326
  end
328
327
 
329
328
  def example_to_df(example)
330
- df = example.to_df(@data_subject.df.row[0].to_h, field_symbolizer: @data_subject.field_symbolizer)
329
+ df = example.to_df(data_subject.df.row[0].to_h, field_symbolizer: data_subject.field_symbolizer)
331
330
  data_subject.fields.each do |vector, metadata|
332
331
  if metadata[:type] == :json
333
332
  df[vector].recode! { |v| JSON.parse(v) rescue v }
@@ -338,20 +337,20 @@ module Remi::Testing::BusinessRules
338
337
 
339
338
  def stub_data_with(example)
340
339
  stub_data
341
- @data_subject.df = example_to_df(example)
340
+ data_subject.df = example_to_df(example)
342
341
  end
343
342
 
344
343
  def append_data_with(example)
345
- @data_subject.df = @data_subject.df.concat example_to_df(example)
344
+ data_subject.df = data_subject.df.concat example_to_df(example)
346
345
  end
347
346
 
348
347
 
349
348
  def replicate_rows(n_rows)
350
- replicated_df = Daru::DataFrame.new([], order: @data_subject.df.vectors.to_a)
351
- @data_subject.df.each do |vector|
349
+ replicated_df = Daru::DataFrame.new([], order: data_subject.df.vectors.to_a)
350
+ data_subject.df.each do |vector|
352
351
  replicated_df[vector.name] = vector.to_a * n_rows
353
352
  end
354
- @data_subject.df = replicated_df
353
+ data_subject.df = replicated_df
355
354
  end
356
355
 
357
356
  def cumulative_dist_from_freq_table(table, freq_field: 'frequency')
@@ -383,28 +382,23 @@ module Remi::Testing::BusinessRules
383
382
 
384
383
  def distribute_values(table)
385
384
  cumulative_dist = cumulative_dist_from_freq_table(table)
386
- generated_data = generate_values_from_cumulative_dist(@data_subject.df.size, cumulative_dist)
385
+ generated_data = generate_values_from_cumulative_dist(data_subject.df.size, cumulative_dist)
387
386
 
388
387
  generated_data.each do |field_name, data_array|
389
388
  vector_name = fields[field_name].field_name
390
- @data_subject.df[vector_name] = Daru::Vector.new(data_array, index: @data_subject.df.index)
389
+ data_subject.df[vector_name] = Daru::Vector.new(data_array, index: data_subject.df.index)
391
390
  end
392
391
  end
393
392
 
394
393
  def freq_by(*field_names)
395
- @data_subject.df.group_by(field_names).size * 1.0 / @data_subject.df.size
394
+ data_subject.df.group_by(field_names).size * 1.0 / data_subject.df.size
396
395
  end
397
396
 
398
397
  def unique_integer_field(field_name)
399
398
  vector_name = fields[field_name].field_name
400
399
  i = 0
401
- @data_subject.df[vector_name].recode! { |v| i += 1 }
400
+ data_subject.df[vector_name].recode! { |v| i += 1 }
402
401
  end
403
-
404
- def csv_options
405
- @data_subject.csv_options
406
- end
407
-
408
402
  end
409
403
 
410
404