activerecord-data_classification 0.1.0.pre.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: fec4de7b27cfc673e959455b96af6a64532ee748132d492c1efd1ea4d297ecc7
4
+ data.tar.gz: ee6fdc5d5f131ee7a461756e43c6f9c2237f9e2f7b494f0ef2b4109548da4952
5
+ SHA512:
6
+ metadata.gz: 32cf7e5c9e49e20c15354503ca24bb3b0600557cef24e1de59e4fa9b4ac87dcb2b321620ace1b4324f0cae59d434414afe236f4bc622118ae9f5cf50b8fd6e7a
7
+ data.tar.gz: ebf1cd76fe551605fc854a6efe96fc4d5ea0242e9f0e18eabd9d81d481304d2d2468b359e73a09860afacc3535d70ad640c91a3a95341cf54415382a5e397d00
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2023 Invoca Inc.
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,31 @@
1
+ # ActiveRecord::DataClassification
2
+
3
+ TODO: Delete this and the text below, and describe your gem
4
+
5
+ Welcome to your new gem! In this directory, you'll find the files you need to be able to package up your Ruby library into a gem. Put your Ruby code in the file `lib/active_record/data_classification`. To experiment with that code, run `bin/console` for an interactive prompt.
6
+
7
+ ## Installation
8
+
9
+ TODO: Replace `UPDATE_WITH_YOUR_GEM_NAME_PRIOR_TO_RELEASE_TO_RUBYGEMS_ORG` with your gem name right after releasing it to RubyGems.org. Please do not do it earlier due to security reasons. Alternatively, replace this section with instructions to install your gem from git if you don't plan to release to RubyGems.org.
10
+
11
+ Install the gem and add to the application's Gemfile by executing:
12
+
13
+ $ bundle add UPDATE_WITH_YOUR_GEM_NAME_PRIOR_TO_RELEASE_TO_RUBYGEMS_ORG
14
+
15
+ If bundler is not being used to manage dependencies, install the gem by executing:
16
+
17
+ $ gem install UPDATE_WITH_YOUR_GEM_NAME_PRIOR_TO_RELEASE_TO_RUBYGEMS_ORG
18
+
19
+ ## Usage
20
+
21
+ TODO: Write usage instructions here
22
+
23
+ ## Development
24
+
25
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
26
+
27
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and the created tag, and push the `.gem` file to [rubygems.org](https://rubygems.org).
28
+
29
+ ## Contributing
30
+
31
+ Bug reports and pull requests are welcome on GitHub at https://github.com/invoca/activerecord-data_classification.
@@ -0,0 +1,101 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ActiveRecord
4
+ module DataClassification
5
+ module Aggregate
6
+ module Extension
7
+ extend ActiveSupport::Concern
8
+
9
+ AGGREGATE_BUILTINS =
10
+ ::Aggregate::AttributeHandler::BUILTIN_TYPES
11
+ .invert
12
+ .merge(::Aggregate::Attribute::ForeignKey => "foreign_key").freeze
13
+
14
+ # rubocop:disable Metrics/BlockLength
15
+ included do
16
+ alias_method :columns_without_aggregates, :columns
17
+
18
+ def columns
19
+ agg_columns = if (storage_field = @model.try(:aggregate_storage_field))
20
+ aggregate_columns(@model, "#{storage_field}->\"$.", @models)
21
+ else
22
+ []
23
+ end
24
+
25
+ [*columns_without_aggregates, *agg_columns].flatten.compact
26
+ end
27
+
28
+ private
29
+
30
+ def aggregate_columns(_klass, prefix, all_models)
31
+ aggregate_models_by_field = {}
32
+ aggregate_handlers_by_field = {}
33
+
34
+ all_models.each do |model|
35
+ model.aggregated_attribute_handlers.map do |key, handler|
36
+ aggregate_models_by_field[key] ||= []
37
+ aggregate_models_by_field[key] << model
38
+
39
+ aggregate_handlers_by_field[key] ||= handler
40
+ end
41
+ end
42
+
43
+ aggregate_models_by_field.map do |field, models|
44
+ columns_for_aggregate_attribute(aggregate_handlers_by_field[field], field, models, prefix)
45
+ end
46
+ end
47
+
48
+ def nested_aggregate_columns(klass, prefix, models)
49
+ klass.aggregated_attribute_handlers.map do |field, handler|
50
+ columns_for_aggregate_attribute(handler, field, models + [klass], prefix)
51
+ end
52
+ end
53
+
54
+ def aggregate_type_name(_field, handler)
55
+ AGGREGATE_BUILTINS[handler.class] || handler.class.name
56
+ end
57
+
58
+ def columns_for_aggregate_attribute(handler, key, models, prefix)
59
+ case handler
60
+ when *AGGREGATE_BUILTINS.keys
61
+ Column.new(
62
+ "#{prefix}#{key}\"", AGGREGATE_BUILTINS[handler.class],
63
+ sensitivity_friendly(models, nil), sensitivity_source(models, nil), anonymization_type, models
64
+ )
65
+ when ::Aggregate::Attribute::List
66
+ columns_for_aggregate_has_many(handler, key, models, prefix)
67
+ when ::Aggregate::Attribute::NestedAggregate
68
+ nested_aggregate_columns(handler.class_name.constantize, "#{prefix}#{key}.", models)
69
+ when ::Aggregate::Attribute::SchemaVersion
70
+ Column.new(
71
+ "#{prefix}schema_version\"", "schema_version",
72
+ sensitivity_friendly(models, nil), sensitivity_source(models, nil), anonymization_type, models
73
+ )
74
+ else
75
+ raise "Unexpected handler #{handler.inspect}"
76
+ end
77
+ end
78
+
79
+ def columns_for_aggregate_has_many(handler, key, models, prefix)
80
+ case handler.element_helper
81
+ when *AGGREGATE_BUILTINS.keys
82
+ Column.new(
83
+ "#{prefix}#{key}\"",
84
+ "list of #{AGGREGATE_BUILTINS[handler.element_helper.class]}",
85
+ sensitivity_friendly(models, nil),
86
+ sensitivity_source(models, nil),
87
+ anonymization_type,
88
+ models
89
+ )
90
+ when ::Aggregate::Attribute::NestedAggregate
91
+ nested_aggregate_columns(handler.element_helper.class_name.constantize, "#{prefix}#{key}.*.", models)
92
+ else
93
+ raise "Unexpected element_helper #{handler.element_helper.inspect}"
94
+ end
95
+ end
96
+ end
97
+ # rubocop:enable Metrics/BlockLength
98
+ end
99
+ end
100
+ end
101
+ end
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ActiveRecord
4
+ module DataClassification
5
+ Column = Struct.new(
6
+ :column,
7
+ :type,
8
+ :sensitivity,
9
+ :sensitivity_source,
10
+ :anonymization,
11
+ :models
12
+ ).freeze
13
+ end
14
+ end
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ActiveRecord
4
+ module DataClassification
5
+ class ConfigurationError < StandardError; end
6
+
7
+ class Configuration
8
+ attr_accessor :default_classification,
9
+ :ignored_tables,
10
+ :confidential_class_names,
11
+ :excluded_table_patterns
12
+
13
+ attr_reader :load_models_block,
14
+ :table_name_transformer_block
15
+
16
+ def initialize
17
+ @default_classification = :Private
18
+ @load_models_block = -> { }
19
+ @table_name_transformer_block = ->(table_name) { table_name }
20
+ @confidential_class_names = []
21
+ @excluded_table_patterns = []
22
+ @ignored_tables = [
23
+ "schema_migrations",
24
+ "ar_internal_metadata",
25
+ ]
26
+ end
27
+
28
+ def load_models(&block)
29
+ @load_models_block = block
30
+ end
31
+
32
+ def transform_table_name(&block)
33
+ @table_name_transformer_block = block
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ActiveRecord
4
+ module DataClassification
5
+ module DeclareSchema
6
+ module Extension
7
+ [:ruby_default, :data_security, :anonymize_using].each do |option|
8
+ define_method(option) { @options[option] }
9
+ end
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Adds the ability to specify data security to a model.
4
+ module ActiveRecord
5
+ module DataClassification
6
+ module ModelAnnotation
7
+ extend ActiveSupport::Concern
8
+
9
+ CLASSIFICATIONS = [:Public, :Private, :Confidential].freeze
10
+
11
+ module ClassMethods
12
+ def data_security(type = nil)
13
+ if type
14
+ type.in?(CLASSIFICATIONS) or raise ArgumentError, "Unknown classification type #{type}"
15
+ self._data_security_type = type
16
+ end
17
+ _data_security_type
18
+ end
19
+
20
+ def confidential?
21
+ _data_security_type == :Confidential
22
+ end
23
+ end
24
+
25
+ included do
26
+ class_attribute :_data_security_type
27
+ if defined?(::DeclareSchema)
28
+ require_relative 'declare_schema/extension'
29
+
30
+ unless ::DeclareSchema::Model::FieldSpec < DeclareSchema::Extension
31
+ ::DeclareSchema::Model::FieldSpec.prepend(DeclareSchema::Extension)
32
+ end
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,81 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ActiveRecord
4
+ module DataClassification
5
+ class ModelReflection
6
+ attr_reader :server_type, :table_name, :model_name
7
+
8
+ def initialize(table_name, models)
9
+ @models = models
10
+ @model = @models.find { |m| m.try(:base_class) == m } || @models.first
11
+
12
+ @server_type = find_server_type(table_name)
13
+
14
+ @model_name = @model.name
15
+ @table_name = table_name
16
+ end
17
+
18
+ def columns
19
+ connection.columns(table_name).map do |column|
20
+ column_spec = @model.try(:field_specs)&.[](column.name)
21
+ Column.new(
22
+ column.name,
23
+ column.sql_type,
24
+ sensitivity_friendly(@models, column_spec&.data_security),
25
+ sensitivity_source(@models, column_spec&.data_security),
26
+ anonymization_type || column_spec&.anonymize_using,
27
+ @models
28
+ )
29
+ end
30
+ end
31
+
32
+ private
33
+
34
+ def anonymization_type
35
+ if sensitivity_friendly(@models, nil) == "Confidential"
36
+ "Truncated"
37
+ end
38
+ end
39
+
40
+ def sensitivity(models, field_sensitivity)
41
+ if (model_sensitivity = models.map_and_find { |m| m.try(:data_security) })
42
+ model_sensitivity
43
+ elsif field_sensitivity
44
+ field_sensitivity
45
+ end
46
+ end
47
+
48
+ def sensitivity_friendly(models, field_sensitivity)
49
+ sensitivity(models, field_sensitivity)&.to_s
50
+ end
51
+
52
+ def sensitivity_source(models, field_sensitivity)
53
+ if (model = models.find { |m| m.try(:data_security) })
54
+ model.name
55
+ elsif field_sensitivity
56
+ "Field"
57
+ end
58
+ end
59
+
60
+ def find_server_type(table_name)
61
+ case table_name
62
+ when /number_attributions_partitioned/
63
+ "attr"
64
+ when /\Acf_.*/
65
+ "shard"
66
+ else
67
+ "common"
68
+ end
69
+ end
70
+
71
+ def connection
72
+ ActiveRecord::Base.connection
73
+ end
74
+ end
75
+ end
76
+ end
77
+
78
+ if defined?(Aggregate)
79
+ require_relative 'aggregate/extension'
80
+ ActiveRecord::DataClassification::ModelReflection.include(ActiveRecord::DataClassification::Aggregate::Extension)
81
+ end
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ActiveRecord
4
+ module DataClassification
5
+ module ModelSpace
6
+ class << self
7
+ def models
8
+ models_by_table.map do |table, all_models|
9
+ DataClassification::ModelReflection.new(table, all_models)
10
+ end
11
+ end
12
+
13
+ def all_mysql_model_tables
14
+ models_by_table.keys
15
+ end
16
+
17
+ private
18
+
19
+ def include_table?(table_name)
20
+ DataClassification::TableSpace.all_tables.include?(table_name)
21
+ end
22
+
23
+ def table_for_model(model)
24
+ model.try(:table_name)
25
+ rescue
26
+ # Some rails models raise an error when you call table name on them. Ignore these.
27
+ end
28
+
29
+ def models_by_table
30
+ unless @models_by_table
31
+ @models_by_table = {}
32
+ ObjectSpace.each_object(Class) do |model|
33
+ table_name = table_for_model(model)
34
+ if include_table?(table_name)
35
+ @models_by_table[table_name] ||= []
36
+ @models_by_table[table_name] << model
37
+ end
38
+ end
39
+ end
40
+ @models_by_table
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,94 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'csv'
4
+
5
+ module ActiveRecord
6
+ module DataClassification
7
+ module Report
8
+ REPORT_COLUNMS = [
9
+ "Server Type",
10
+ "Table",
11
+ "Column",
12
+ "Type",
13
+ "Sensitivity",
14
+ "Clone Handling",
15
+ "Models",
16
+ "Sensitivity Source",
17
+ ].freeze
18
+
19
+ class << self
20
+ def generate
21
+ load_all_models
22
+ assert_all_tables
23
+
24
+ models = DataClassification::ModelSpace.models
25
+
26
+ generate_csv(models)
27
+ end
28
+
29
+ private
30
+
31
+ def generate_csv(models)
32
+ CSV.generate(col_sep: "\t", headers: REPORT_COLUNMS) do |csv|
33
+ csv << REPORT_COLUNMS
34
+ found_models = Set.new
35
+ models.sort_by(&:table_name).each do |model|
36
+ if include_table?(model.table_name)
37
+ fixed_name = fixed_table_name(model.table_name)
38
+ unless found_models.member?(fixed_name)
39
+ found_models.add(fixed_name)
40
+ model.columns.map do |column|
41
+ csv << csv_row(model, fixed_name, column)
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
47
+ end
48
+
49
+ def csv_row(model, fixed_name, column)
50
+ [
51
+ model.server_type,
52
+ fixed_name,
53
+ column.column,
54
+ column.type,
55
+ column.sensitivity || ActiveRecord::DataClassification.config.default_classification,
56
+ column.anonymization,
57
+ column.models.map(&:name).compact.sort.join(","),
58
+ column.sensitivity_source,
59
+ ]
60
+ end
61
+
62
+ def include_table?(table_name)
63
+ ActiveRecord::DataClassification.config.excluded_table_patterns.none? { table_name.match?(_1) }
64
+ end
65
+
66
+ def assert_all_tables
67
+ missing_models = DataClassification::TableSpace.all_tables -
68
+ DataClassification::ModelSpace.all_mysql_model_tables -
69
+ ActiveRecord::DataClassification.config.ignored_tables
70
+
71
+ if missing_models.any?
72
+ raise "Found database tables without an associated model: #{missing_models.inspect}"
73
+ end
74
+ end
75
+
76
+ def fixed_table_name(table_name)
77
+ ActiveRecord::DataClassification.config.table_name_transformer_block.call(table_name)
78
+ end
79
+
80
+ def load_all_models
81
+ ActiveRecord::DataClassification.config.load_models_block.call
82
+ ActiveRecord::DataClassification.config.confidential_class_names.each do |klass_name|
83
+ make_confidential(klass_name.constantize)
84
+ end
85
+ end
86
+
87
+ def make_confidential(klass)
88
+ klass.send(:include, DataClassification::ModelAnnotation)
89
+ klass.data_security(:Confidential)
90
+ end
91
+ end
92
+ end
93
+ end
94
+ end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ActiveRecord
4
+ module DataClassification
5
+ module TableSpace
6
+ class << self
7
+ def all_tables
8
+ @all_tables ||= connection.select_rows("select distinct(TABLE_NAME) from information_schema.columns where TABLE_SCHEMA='#{schema}'").flatten
9
+ end
10
+
11
+ def connection
12
+ ActiveRecord::Base.connection
13
+ end
14
+
15
+ def schema
16
+ connection.config[:database]
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rake'
4
+ require 'active_record/data_classification'
5
+
6
+ namespace :data_classification do
7
+ #
8
+ # Table, Field, Type, Security Profile
9
+ #
10
+ desc "Generate a report of all models"
11
+ task report: :environment do
12
+ filename = "../data_classification_report.csv"
13
+ File.write(filename, ActiveRecord::DataClassification::Report.generate)
14
+ puts "Report written to #{filename}"
15
+ end
16
+ end
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ActiveRecord
4
+ module DataClassification
5
+ VERSION = "0.1.0.pre.1"
6
+ end
7
+ end
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'active_support'
4
+ require 'active_support/core_ext'
5
+
6
+ require_relative "data_classification/version"
7
+ require_relative "data_classification/column"
8
+ require_relative "data_classification/configuration"
9
+ require_relative "data_classification/report"
10
+ require_relative "data_classification/model_annotation"
11
+ require_relative "data_classification/model_reflection"
12
+ require_relative "data_classification/table_space"
13
+ require_relative "data_classification/model_space"
14
+
15
+ module ActiveRecord
16
+ module DataClassification
17
+
18
+ class << self
19
+ def configure(&block)
20
+ @config = Configuration.new.tap(&block).freeze
21
+ end
22
+
23
+ def config
24
+ @config ||= Configuration.new
25
+ end
26
+ end
27
+ end
28
+ end
metadata ADDED
@@ -0,0 +1,103 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: activerecord-data_classification
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0.pre.1
5
+ platform: ruby
6
+ authors:
7
+ - Invoca Development
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2023-10-20 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: activemodel
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '6.0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '6.0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: activerecord
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '6.0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '6.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: activesupport
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '6.0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '6.0'
55
+ description: An ActiveRecord extension for classifying models and fields as confidential
56
+ data
57
+ email:
58
+ - development@invoca.com
59
+ executables: []
60
+ extensions: []
61
+ extra_rdoc_files: []
62
+ files:
63
+ - LICENSE
64
+ - README.md
65
+ - lib/active_record/data_classification.rb
66
+ - lib/active_record/data_classification/aggregate/extension.rb
67
+ - lib/active_record/data_classification/column.rb
68
+ - lib/active_record/data_classification/configuration.rb
69
+ - lib/active_record/data_classification/declare_schema/extension.rb
70
+ - lib/active_record/data_classification/model_annotation.rb
71
+ - lib/active_record/data_classification/model_reflection.rb
72
+ - lib/active_record/data_classification/model_space.rb
73
+ - lib/active_record/data_classification/report.rb
74
+ - lib/active_record/data_classification/table_space.rb
75
+ - lib/active_record/data_classification/tasks.rb
76
+ - lib/active_record/data_classification/version.rb
77
+ homepage: https://github.com/invoca/activerecord-data_classification
78
+ licenses:
79
+ - MIT
80
+ metadata:
81
+ source_code_uri: https://github.com/invoca/activerecord-data_classification
82
+ allowed_push_host: https://rubygems.org
83
+ post_install_message:
84
+ rdoc_options: []
85
+ require_paths:
86
+ - lib
87
+ required_ruby_version: !ruby/object:Gem::Requirement
88
+ requirements:
89
+ - - ">="
90
+ - !ruby/object:Gem::Version
91
+ version: 2.7.0
92
+ required_rubygems_version: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">"
95
+ - !ruby/object:Gem::Version
96
+ version: 1.3.1
97
+ requirements: []
98
+ rubygems_version: 3.4.17
99
+ signing_key:
100
+ specification_version: 4
101
+ summary: An ActiveRecord extension for classifying models and fields as confidential
102
+ data
103
+ test_files: []