cure 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +3 -0
  3. data/Gemfile.lock +1 -1
  4. data/README.md +16 -6
  5. data/exe/cure +7 -3
  6. data/lib/cure/config.rb +17 -3
  7. data/lib/cure/coordinator.rb +72 -0
  8. data/lib/cure/export/exporter.rb +32 -7
  9. data/lib/cure/extract/builder.rb +27 -0
  10. data/lib/cure/extract/csv_lookup.rb +32 -0
  11. data/lib/cure/extract/extractor.rb +123 -0
  12. data/lib/cure/generator/base_generator.rb +51 -0
  13. data/lib/cure/generator/case_generator.rb +25 -0
  14. data/lib/cure/generator/character_generator.rb +35 -0
  15. data/lib/cure/generator/faker_generator.rb +25 -0
  16. data/lib/cure/generator/guid_generator.rb +16 -0
  17. data/lib/cure/generator/hex_generator.rb +16 -0
  18. data/lib/cure/generator/imports.rb +12 -0
  19. data/lib/cure/generator/number_generator.rb +16 -0
  20. data/lib/cure/generator/placeholder_generator.rb +20 -0
  21. data/lib/cure/generator/redact_generator.rb +16 -0
  22. data/lib/cure/generator/variable_generator.rb +20 -0
  23. data/lib/cure/helpers/file_helpers.rb +40 -0
  24. data/lib/cure/helpers/object_helpers.rb +29 -0
  25. data/lib/cure/log.rb +3 -3
  26. data/lib/cure/main.rb +40 -31
  27. data/lib/cure/strategy/append_strategy.rb +24 -0
  28. data/lib/cure/strategy/base_strategy.rb +123 -0
  29. data/lib/cure/strategy/end_with_strategy.rb +46 -0
  30. data/lib/cure/strategy/full_strategy.rb +24 -0
  31. data/lib/cure/strategy/imports.rb +10 -0
  32. data/lib/cure/strategy/match_strategy.rb +43 -0
  33. data/lib/cure/strategy/regex_strategy.rb +49 -0
  34. data/lib/cure/strategy/split_strategy.rb +53 -0
  35. data/lib/cure/strategy/start_with_strategy.rb +47 -0
  36. data/lib/cure/template/dispatch.rb +30 -0
  37. data/lib/cure/template/extraction.rb +38 -0
  38. data/lib/cure/template/template.rb +28 -0
  39. data/lib/cure/template/transformations.rb +26 -0
  40. data/lib/cure/transformation/candidate.rb +23 -9
  41. data/lib/cure/transformation/transform.rb +33 -41
  42. data/lib/cure/validators.rb +71 -0
  43. data/lib/cure/version.rb +1 -1
  44. data/lib/cure.rb +9 -4
  45. data/templates/aws_cur_template.json +130 -128
  46. data/templates/example_template.json +46 -30
  47. metadata +36 -9
  48. data/lib/cure/csv_helpers.rb +0 -6
  49. data/lib/cure/file_helpers.rb +0 -38
  50. data/lib/cure/generator/base.rb +0 -148
  51. data/lib/cure/object_helpers.rb +0 -27
  52. data/lib/cure/strategy/base.rb +0 -223
@@ -1,143 +1,145 @@
1
1
  {
2
- "candidates" : [
3
- {
4
- "column" : "identity/LineItemId",
5
- "translations" : [{
6
- "strategy" : {
7
- "name": "full",
8
- "options" : {}
9
- },
10
- "generator" : {
11
- "name" : "character",
12
- "options" : {
13
- "length" : 52,
14
- "types" : [
15
- "lowercase", "number"
16
- ]
2
+ "transformations" : {
3
+ "candidates" : [
4
+ {
5
+ "column" : "identity/LineItemId",
6
+ "translations" : [{
7
+ "strategy" : {
8
+ "name": "full",
9
+ "options" : {}
10
+ },
11
+ "generator" : {
12
+ "name" : "character",
13
+ "options" : {
14
+ "length" : 52,
15
+ "types" : [
16
+ "lowercase", "number"
17
+ ]
18
+ }
17
19
  }
18
- }
19
- }]
20
- },
21
- {
22
- "column" : "bill/PayerAccountId",
23
- "translations" : [{
24
- "strategy" : {
25
- "name": "full",
26
- "options" : {}
27
- },
28
- "generator" : {
29
- "name" : "placeholder",
30
- "options" : {
31
- "name" : "$account_number"
32
- }
33
- }
34
- }]
35
- },
36
- {
37
- "column" : "lineItem/UsageAccountId",
38
- "translations" : [{
39
- "strategy" : {
40
- "name": "full",
41
- "options" : {}
42
- },
43
- "generator" : {
44
- "name" : "number",
45
- "options" : {
46
- "length" : 6
47
- }
48
- }
49
- }]
50
- },
51
- {
52
- "column" : "lineItem/ResourceId",
53
- "translations" : [{
54
- "strategy" : {
55
- "name": "regex",
56
- "options" : {
57
- "regex_cg" : "^i-(.*)"
58
- }
59
- },
60
- "generator" : {
61
- "name" : "hex",
62
- "options" : {
63
- "length" : 10
20
+ }]
21
+ },
22
+ {
23
+ "column" : "bill/PayerAccountId",
24
+ "translations" : [{
25
+ "strategy" : {
26
+ "name": "full",
27
+ "options" : {}
28
+ },
29
+ "generator" : {
30
+ "name" : "placeholder",
31
+ "options" : {
32
+ "name" : "$account_number"
33
+ }
64
34
  }
65
- }
66
- },{
67
- "strategy" : {
68
- "name": "regex",
69
- "options" : {
70
- "regex_cg" : "^vol-(.*)"
35
+ }]
36
+ },
37
+ {
38
+ "column" : "lineItem/UsageAccountId",
39
+ "translations" : [{
40
+ "strategy" : {
41
+ "name": "full",
42
+ "options" : {}
43
+ },
44
+ "generator" : {
45
+ "name" : "number",
46
+ "options" : {
47
+ "length" : 6
48
+ }
71
49
  }
72
- },
73
- "generator" : {
74
- "name" : "hex",
75
- "options" : {
76
- "length" : 10
50
+ }]
51
+ },
52
+ {
53
+ "column" : "lineItem/ResourceId",
54
+ "translations" : [{
55
+ "strategy" : {
56
+ "name": "regex",
57
+ "options" : {
58
+ "regex_cg" : "^i-(.*)"
59
+ }
60
+ },
61
+ "generator" : {
62
+ "name" : "hex",
63
+ "options" : {
64
+ "length" : 10
65
+ }
77
66
  }
78
- }
79
- },{
80
- "strategy" : {
81
- "name": "split",
82
- "options" : {
83
- "token": ":",
84
- "index": 4
67
+ },{
68
+ "strategy" : {
69
+ "name": "regex",
70
+ "options" : {
71
+ "regex_cg" : "^vol-(.*)"
72
+ }
73
+ },
74
+ "generator" : {
75
+ "name" : "hex",
76
+ "options" : {
77
+ "length" : 10
78
+ }
85
79
  }
86
- },
87
- "generator" : {
88
- "name" : "placeholder",
89
- "options" : {
90
- "name" : "$account_number"
80
+ },{
81
+ "strategy" : {
82
+ "name": "split",
83
+ "options" : {
84
+ "token": ":",
85
+ "index": 4
86
+ }
87
+ },
88
+ "generator" : {
89
+ "name" : "placeholder",
90
+ "options" : {
91
+ "name" : "$account_number"
92
+ }
91
93
  }
92
- }
93
- },{
94
- "strategy" : {
95
- "name": "split",
96
- "options" : {
97
- "token": ":",
98
- "index": -1
94
+ },{
95
+ "strategy" : {
96
+ "name": "split",
97
+ "options" : {
98
+ "token": ":",
99
+ "index": -1
100
+ }
101
+ },
102
+ "generator" : {
103
+ "name" : "faker",
104
+ "options" : {
105
+ "module" : "App",
106
+ "method" : "name"
107
+ }
99
108
  }
100
- },
101
- "generator" : {
102
- "name" : "faker",
103
- "options" : {
104
- "module" : "App",
105
- "method" : "name"
109
+ }],
110
+ "no_match_translation" : {
111
+ "strategy" : {
112
+ "name": "full",
113
+ "options" : {}
114
+ },
115
+ "generator" : {
116
+ "name" : "hex",
117
+ "options" : {
118
+ "length" : 10,
119
+ "prefix" : "s3_bucket_"
120
+ }
106
121
  }
107
122
  }
108
- }],
109
- "no_match_translation" : {
110
- "strategy" : {
111
- "name": "full",
112
- "options" : {}
113
- },
114
- "generator" : {
115
- "name" : "hex",
116
- "options" : {
117
- "length" : 10,
118
- "prefix" : "s3_bucket_"
123
+ },
124
+ {
125
+ "column" : "resourceTags/aws:createdBy",
126
+ "translations" : [{
127
+ "strategy" : {
128
+ "name": "full",
129
+ "options" : {}
130
+ },
131
+ "generator" : {
132
+ "name" : "faker",
133
+ "options" : {
134
+ "module" : "Faker::Movies::StarWars",
135
+ "method" : "character"
136
+ }
119
137
  }
120
- }
138
+ }]
121
139
  }
122
- },
123
- {
124
- "column" : "resourceTags/aws:createdBy",
125
- "translations" : [{
126
- "strategy" : {
127
- "name": "full",
128
- "options" : {}
129
- },
130
- "generator" : {
131
- "name" : "faker",
132
- "options" : {
133
- "module" : "Faker::Movies::StarWars",
134
- "method" : "character"
135
- }
136
- }
137
- }]
140
+ ],
141
+ "placeholders" : {
142
+ "$account_number" : "1234567891234"
138
143
  }
139
- ],
140
- "placeholders" : {
141
- "$account_number" : "1234567891234"
142
144
  }
143
145
  }
@@ -1,38 +1,54 @@
1
1
  {
2
- "replacements" : [
3
- {
4
- "column" : "bill/PayerAccountId",
5
- "strategy" : {
6
- "name": "full",
7
- "options" : {}
8
- },
9
- "generator" : {
10
- "name" : "placeholder",
11
- "options" : {
12
- "name" : "$account_number"
13
- }
14
- }
15
- },{
16
- "column" : "lineItem/ResourceId",
17
- "strategy" : {
18
- "name": "regex",
19
- "options" : {
20
- "regex" : ""
2
+ "extraction": {
3
+ "named_ranges": [{
4
+ "name": "main",
5
+ "section": "B2:F2"
6
+ }],
7
+ "variables" : [{
8
+ "name" : "date",
9
+ "type" : "single_field",
10
+ "location" : "B2"
11
+ }]
12
+ },
13
+ "transformations": {
14
+ "candidates": [
15
+ {
16
+ "column": "bill/PayerAccountId",
17
+ "strategy": {
18
+ "name": "full",
19
+ "options": {}
20
+ },
21
+ "generator": {
22
+ "name": "placeholder",
23
+ "options": {
24
+ "name": "$account_number"
25
+ }
21
26
  }
22
- },
23
- "generator" : {
24
- "name": "number",
25
- "options" : {
26
- "length" : 12
27
+ },{
28
+ "column": "lineItem/ResourceId",
29
+ "strategy": {
30
+ "name": "regex",
31
+ "options": {
32
+ "regex": ""
33
+ }
34
+ },
35
+ "generator": {
36
+ "name": "number",
37
+ "options": {
38
+ "length": 12
39
+ }
27
40
  }
28
41
  }
42
+ ],
43
+ "placeholders": {
44
+ "$account_number": "1234567891234"
29
45
  }
30
- ],
31
- "placeholders" : {
32
- "$account_number" : "1234567891234"
33
46
  },
34
- "column_export" : {
35
- "type" : "blacklist",
36
- "columns" : []
47
+ "export": {
48
+ "sections" : {
49
+ "named_range" : "main",
50
+ "file_name" : "main-file",
51
+ "type" : "csv"
52
+ }
37
53
  }
38
54
  }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cure
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - william
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-08-31 00:00:00.000000000 Z
11
+ date: 2022-10-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: simplecov
@@ -52,7 +52,8 @@ dependencies:
52
52
  - - ">="
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0'
55
- description: Anonymize private data inside CSV files
55
+ description: Transform, select, anonymize or manipulate data inside CSV files with
56
+ templates.
56
57
  email:
57
58
  - me@williamthom.as
58
59
  executables:
@@ -72,16 +73,42 @@ files:
72
73
  - exe/cure
73
74
  - lib/cure.rb
74
75
  - lib/cure/config.rb
75
- - lib/cure/csv_helpers.rb
76
+ - lib/cure/coordinator.rb
76
77
  - lib/cure/export/exporter.rb
77
- - lib/cure/file_helpers.rb
78
- - lib/cure/generator/base.rb
78
+ - lib/cure/extract/builder.rb
79
+ - lib/cure/extract/csv_lookup.rb
80
+ - lib/cure/extract/extractor.rb
81
+ - lib/cure/generator/base_generator.rb
82
+ - lib/cure/generator/case_generator.rb
83
+ - lib/cure/generator/character_generator.rb
84
+ - lib/cure/generator/faker_generator.rb
85
+ - lib/cure/generator/guid_generator.rb
86
+ - lib/cure/generator/hex_generator.rb
87
+ - lib/cure/generator/imports.rb
88
+ - lib/cure/generator/number_generator.rb
89
+ - lib/cure/generator/placeholder_generator.rb
90
+ - lib/cure/generator/redact_generator.rb
91
+ - lib/cure/generator/variable_generator.rb
92
+ - lib/cure/helpers/file_helpers.rb
93
+ - lib/cure/helpers/object_helpers.rb
79
94
  - lib/cure/log.rb
80
95
  - lib/cure/main.rb
81
- - lib/cure/object_helpers.rb
82
- - lib/cure/strategy/base.rb
96
+ - lib/cure/strategy/append_strategy.rb
97
+ - lib/cure/strategy/base_strategy.rb
98
+ - lib/cure/strategy/end_with_strategy.rb
99
+ - lib/cure/strategy/full_strategy.rb
100
+ - lib/cure/strategy/imports.rb
101
+ - lib/cure/strategy/match_strategy.rb
102
+ - lib/cure/strategy/regex_strategy.rb
103
+ - lib/cure/strategy/split_strategy.rb
104
+ - lib/cure/strategy/start_with_strategy.rb
105
+ - lib/cure/template/dispatch.rb
106
+ - lib/cure/template/extraction.rb
107
+ - lib/cure/template/template.rb
108
+ - lib/cure/template/transformations.rb
83
109
  - lib/cure/transformation/candidate.rb
84
110
  - lib/cure/transformation/transform.rb
111
+ - lib/cure/validators.rb
85
112
  - lib/cure/version.rb
86
113
  - templates/aws_cur_template.json
87
114
  - templates/example_template.json
@@ -107,5 +134,5 @@ requirements: []
107
134
  rubygems_version: 3.2.22
108
135
  signing_key:
109
136
  specification_version: 4
110
- summary: Anonymize private data inside CSV files
137
+ summary: Cure provides the ability to transform CSVs using descriptive templates.
111
138
  test_files: []
@@ -1,6 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Cure
4
- module CSVHelpers
5
- end
6
- end
@@ -1,38 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "fileutils"
4
-
5
- module Cure
6
- module FileHelpers
7
- def with_file(path, extension, &block)
8
- dir = File.dirname(path)
9
-
10
- FileUtils.mkdir_p(dir) unless File.directory?(dir)
11
-
12
- path = "#{path}.#{extension}"
13
- File.open(path, "w", &block)
14
- end
15
-
16
- def clean_dir(path)
17
- dir = File.file?(path) ? File.dirname(path) : path
18
-
19
- FileUtils.remove_dir(dir) if File.directory?(dir)
20
- end
21
-
22
- def read_file(file_location)
23
- result = file_location.start_with?("/") ? file_location : File.join(File.dirname(__FILE__), file_location)
24
-
25
- raise "No file found at [#{file_location}]" unless File.exist? result
26
-
27
- File.read(result)
28
- end
29
-
30
- def with_temp_dir(temp_dir, &_block)
31
- return unless block_given?
32
-
33
- clean_dir(temp_dir)
34
- yield
35
- clean_dir(temp_dir)
36
- end
37
- end
38
- end
@@ -1,148 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Cure
4
- module Generator
5
- class Base
6
- # @return [Hash]
7
- attr_accessor :options
8
-
9
- def initialize(options={})
10
- @options = options
11
- end
12
-
13
- # @param [Object/Nil] source_value
14
- # @return [String]
15
- def generate(source_value=nil)
16
- translated = _generate(source_value)
17
- translated = "#{prefix}#{translated}" if prefix
18
- translated = "#{translated}#{suffix}" if suffix
19
- translated
20
- end
21
-
22
- private
23
-
24
- # @param [Object/Nil] _source_value
25
- # @return [String]
26
- def _generate(_source_value)
27
- raise NotImplementedError, "#{self.class} has not implemented method '#{__method__}'"
28
- end
29
-
30
- def prefix(default=nil)
31
- extract_property("prefix", default)
32
- end
33
-
34
- def suffix(default=nil)
35
- extract_property("suffix", default)
36
- end
37
-
38
- def length(default=nil)
39
- extract_property("length", default)
40
- end
41
-
42
- def property_name(default=nil)
43
- extract_property("name", default)
44
- end
45
-
46
- def extract_property(property, default_val)
47
- @options.fetch(property, default_val)
48
- end
49
- end
50
-
51
- class HexGenerator < Base
52
- private
53
-
54
- # @param [Object] _source_value
55
- def _generate(_source_value)
56
- 1.upto(length(rand(0..9))).map { rand(0..15).to_s(16) }.join("")
57
- end
58
- end
59
-
60
- class NumberGenerator < Base
61
- private
62
-
63
- # @param [Object] _source_value
64
- def _generate(_source_value)
65
- 1.upto(length(rand(0..9))).map { rand(1..9) }.join("").to_i
66
- end
67
- end
68
-
69
- class RedactGenerator < Base
70
- private
71
-
72
- # @param [Object] source_value
73
- def _generate(source_value)
74
- 1.upto(length(source_value&.length || 5)).map { "X" }.join("")
75
- end
76
- end
77
-
78
- class PlaceholderGenerator < Base
79
- include Configuration
80
-
81
- private
82
-
83
- # @param [Object] _source_value
84
- def _generate(_source_value)
85
- value = config.placeholders[property_name]
86
- value || raise("Missing placeholder value. Available candidates: [#{config.placeholders.join(", ")}]")
87
- end
88
- end
89
-
90
- require "securerandom"
91
-
92
- class GuidGenerator < Base
93
- private
94
-
95
- # @param [Object] _source_value
96
- def _generate(_source_value)
97
- SecureRandom.uuid.to_s
98
- end
99
- end
100
-
101
- require "faker"
102
-
103
- class FakerGenerator < Base
104
- private
105
-
106
- # @param [Object] _source_value
107
- def _generate(_source_value)
108
- mod_code = extract_property("module", nil)
109
- mod = Faker.const_get(mod_code)
110
-
111
- raise "No Faker module found for [#{mod_code}]" unless mod
112
-
113
- meth_code = extract_property("method", nil)&.to_sym
114
- raise "No Faker module found for [#{meth_code}]" unless mod.methods.include?(meth_code)
115
-
116
- mod.send(meth_code)
117
- end
118
- end
119
-
120
- class CharacterGenerator < Base
121
-
122
- def initialize(options=nil)
123
- super(options)
124
- end
125
-
126
- private
127
-
128
- # @param [Object] source_value
129
- def _generate(source_value)
130
- arr = build_options.map(&:to_a).flatten
131
- (0...length(source_value&.length || 5)).map { arr[rand(arr.length)] }.join
132
- end
133
-
134
- def build_options
135
- return [("a".."z"), ("A".."Z"), (0..9)] unless @options.key?("types")
136
-
137
- type_array = @options["types"]
138
-
139
- arr = []
140
- arr << ("a".."z") if type_array.include? "lowercase"
141
- arr << ("A".."Z") if type_array.include? "uppercase"
142
- arr << (0..9) if type_array.include? "number"
143
- arr << ("!".."+") if type_array.include? "symbol"
144
- arr
145
- end
146
- end
147
- end
148
- end
@@ -1,27 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "cure"
4
- require "json"
5
-
6
- module Cure
7
- module ObjectHelpers
8
- def attributes=(hash)
9
- hash.each do |key, value|
10
- send("#{key}=", value)
11
- rescue NoMethodError
12
- Cure.logger.warn("Error deserializing object: No property for #{key}")
13
- end
14
- end
15
-
16
- def from_json(json)
17
- return from_hash(json) if json.is_a?(Hash) # Just a guard in case serialisation is done
18
-
19
- from_hash(JSON.parse(json))
20
- end
21
-
22
- def from_hash(hash)
23
- self.attributes = hash
24
- self
25
- end
26
- end
27
- end