dbtools 0.5.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/README.md +333 -0
- data/Thorfile +1 -0
- data/bin/dbtools +5 -0
- data/config/client_secret_dbtools.json +1 -0
- data/config/config.yml +1 -0
- data/config/database_config.yml +12 -0
- data/config/databases.txt +5 -0
- data/config/schedule.rb +8 -0
- data/dbtools.gemspec +37 -0
- data/lib/dbtools.rb +47 -0
- data/lib/dbtools/constants.rb +847 -0
- data/lib/dbtools/converter/csv2rdf_converter.rb +68 -0
- data/lib/dbtools/converter/csv_importer.rb +107 -0
- data/lib/dbtools/converter/excel2csv_converter.rb +40 -0
- data/lib/dbtools/converter/google_drive2_rdf_converter.rb +97 -0
- data/lib/dbtools/database/database_data.rb +146 -0
- data/lib/dbtools/database/db_connection.rb +236 -0
- data/lib/dbtools/database/mysql_connection.rb +78 -0
- data/lib/dbtools/database/postgresql_connection.rb +132 -0
- data/lib/dbtools/database/violation.rb +45 -0
- data/lib/dbtools/google_drive/google_drive_api.rb +211 -0
- data/lib/dbtools/google_drive/google_drive_entity.rb +22 -0
- data/lib/dbtools/google_drive/google_drive_file.rb +10 -0
- data/lib/dbtools/google_drive/google_drive_folder.rb +9 -0
- data/lib/dbtools/plsql_functions/connect_server.sql +30 -0
- data/lib/dbtools/plsql_functions/link.sql +17 -0
- data/lib/dbtools/plsql_functions/unlink.sql +15 -0
- data/lib/dbtools/rdf/rdf_reader.rb +136 -0
- data/lib/dbtools/version.rb +3 -0
- data/lib/rdf/geophy.rb +27 -0
- data/lib/tasks/aws.rb +43 -0
- data/lib/tasks/backup.rb +107 -0
- data/lib/tasks/check.rb +220 -0
- data/lib/tasks/ckan.rb +151 -0
- data/lib/tasks/convert.rb +139 -0
- data/lib/tasks/dump.rb +110 -0
- data/lib/tasks/googledrivetool.rb +252 -0
- data/lib/tasks/import.rb +142 -0
- data/lib/tasks/postgres.rb +29 -0
- metadata +307 -0
@@ -0,0 +1,236 @@
|
|
1
|
+
require 'active_record'
|
2
|
+
require 'ru_bee'
|
3
|
+
require 'dbtools/constants'
|
4
|
+
require 'dbtools/database/database_data'
|
5
|
+
require 'dbtools/database/violation'
|
6
|
+
|
7
|
+
module Dbtools::Database
|
8
|
+
class DbConnection
|
9
|
+
attr_accessor :connection
|
10
|
+
|
11
|
+
# Creates a connection to a database using the URL.
|
12
|
+
def initialize(url)
|
13
|
+
@connection = ActiveRecord::Base.establish_connection(url).connection
|
14
|
+
@database = get_current_database
|
15
|
+
end
|
16
|
+
|
17
|
+
def close
|
18
|
+
@connection.close
|
19
|
+
end
|
20
|
+
|
21
|
+
def database_name
|
22
|
+
@connection.current_database
|
23
|
+
end
|
24
|
+
|
25
|
+
# Return an object representing the current database structure.
|
26
|
+
def get_current_database
|
27
|
+
query = %{select c.table_catalog as "table_catalog", c.table_schema as "table_schema", c.table_name as "table_name", c.column_name as "column_name", c.data_type as "data_type"
|
28
|
+
from information_schema.columns as c
|
29
|
+
join information_schema.tables as t
|
30
|
+
on c.table_catalog = t.table_catalog
|
31
|
+
and c.table_schema = t.table_schema
|
32
|
+
and c.table_name = t.table_name
|
33
|
+
where c.data_type not like '%text%'
|
34
|
+
and c.table_schema not in ('information_schema', 'performance_schema', 'mysql', 'sys', 'pg_catalog')}
|
35
|
+
database = DatabaseData.new(database_name)
|
36
|
+
execute_query(query).each do |h|
|
37
|
+
table_name = h['table_name']
|
38
|
+
table_schema = h['table_schema']
|
39
|
+
column_name = h['column_name']
|
40
|
+
data_type = h['data_type']
|
41
|
+
database.add_table(table_name, table_schema).add_column(column_name, data_type)
|
42
|
+
end
|
43
|
+
return database
|
44
|
+
end
|
45
|
+
|
46
|
+
def get_all_columns
|
47
|
+
query = %{select c.COLUMN_NAME
|
48
|
+
from information_schema.COLUMNS as c
|
49
|
+
where c.TABLE_SCHEMA not in ('information_schema', 'performance_schema', 'mysql', 'sys', 'pg_catalog')}
|
50
|
+
execute_query(query)
|
51
|
+
end
|
52
|
+
|
53
|
+
|
54
|
+
# Function to check if reserved keywords occur in the schema/table/column names.
|
55
|
+
def check_reserved_keywords
|
56
|
+
reserved_keywords = "("
|
57
|
+
Dbtools::Constants::RESERVED_KEYWORDS.each do |keyword|
|
58
|
+
reserved_keywords << "'#{keyword}', "
|
59
|
+
end
|
60
|
+
reserved_keywords = reserved_keywords[0..-3]
|
61
|
+
reserved_keywords << ")"
|
62
|
+
sql = %{
|
63
|
+
select c.TABLE_CATALOG as "table_catalog", c.TABLE_SCHEMA as "table_schema", c.TABLE_NAME as "table_name", c.COLUMN_NAME as "column_name"
|
64
|
+
from information_schema.COLUMNS as c
|
65
|
+
where c.TABLE_SCHEMA not in ('information_schema', 'performance_schema', 'mysql', 'sys', 'pg_catalog')
|
66
|
+
and (upper(c.COLUMN_NAME) in #{reserved_keywords}
|
67
|
+
or upper(c.TABLE_CATALOG) in #{reserved_keywords}
|
68
|
+
or upper(c.TABLE_NAME) in #{reserved_keywords})
|
69
|
+
}
|
70
|
+
violations = []
|
71
|
+
execute_query(sql).each do |h|
|
72
|
+
table_schema = h['table_schema']
|
73
|
+
table_name = h['table_name']
|
74
|
+
column_name = h['column_name']
|
75
|
+
violations << Violation.new(database: database_name,
|
76
|
+
metric: "Reserved keywords",
|
77
|
+
offender: "#{table_schema}.#{table_name}.#{column_name}")
|
78
|
+
end
|
79
|
+
return violations
|
80
|
+
end
|
81
|
+
|
82
|
+
# Get the completeness of the columns.
|
83
|
+
def get_completeness
|
84
|
+
database = @database
|
85
|
+
|
86
|
+
database.tables.values.each do |table|
|
87
|
+
# This query counts all null entries(or entries containing '' for string columns) in every column.
|
88
|
+
query = table.query_empty_records
|
89
|
+
execute_query(query).each do |res|
|
90
|
+
table.columns.each {|k, col| col.missing_entries = res[k]}
|
91
|
+
end
|
92
|
+
# This query counts the total values in the table.
|
93
|
+
# The query should only return a single hashmap/dictionary as result.
|
94
|
+
# Since the total records for all columns should be equal, taking just the first value
|
95
|
+
# should be fine.
|
96
|
+
query = table.query_total_records
|
97
|
+
execute_query(query).each do |res|
|
98
|
+
table.columns.each {|k, col| col.total_entries = res.values.first}
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
violations = []
|
103
|
+
database.tables.each do |table_name, table|
|
104
|
+
table.columns.each do |col_name, col|
|
105
|
+
next if col.missing_entries.to_i.zero?
|
106
|
+
violations << Violation.new(database: database_name,
|
107
|
+
metric: "Completeness",
|
108
|
+
offender: "#{col.full_name.delete('"')}",
|
109
|
+
violating_records: col.missing_entries.to_i,
|
110
|
+
total_records: col.total_entries.to_i)
|
111
|
+
end
|
112
|
+
end
|
113
|
+
return violations
|
114
|
+
end
|
115
|
+
|
116
|
+
# Get the compression of the entries grouped by columns.
|
117
|
+
def get_syntax_compression
|
118
|
+
database = @database
|
119
|
+
|
120
|
+
database.tables.values.each do |table|
|
121
|
+
# This query counts all lowercased distinct values in every column.
|
122
|
+
query = table.query_distinct_lowercased_entries
|
123
|
+
execute_query(query).each do |res|
|
124
|
+
table.columns.each {|k, col| col.distinct_lower_entries = res[k]}
|
125
|
+
end
|
126
|
+
# This query counts all distinct values in every column.
|
127
|
+
query = table.query_distinct_entries
|
128
|
+
execute_query(query).each do |res|
|
129
|
+
table.columns.each {|k, col| col.distinct_entries = res[k]}
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
violations = []
|
134
|
+
database.tables.each do |table_name, table|
|
135
|
+
table.columns.each do |col_name, col|
|
136
|
+
next if col.distinct_lower_entries == col.distinct_entries
|
137
|
+
violations << Violation.new(database: database_name,
|
138
|
+
metric: "Syntax compression",
|
139
|
+
offender: "#{col.full_name.delete('"')}",
|
140
|
+
violating_records: col.distinct_entries.to_i - col.distinct_lower_entries.to_i,
|
141
|
+
total_records: col.distinct_entries.to_i)
|
142
|
+
end
|
143
|
+
end
|
144
|
+
return violations
|
145
|
+
end
|
146
|
+
|
147
|
+
# Calculate the inverse functional property of the database.
|
148
|
+
def get_inverse_functional_property
|
149
|
+
database = @database
|
150
|
+
|
151
|
+
# Run the query on every table.
|
152
|
+
database.tables.values.each do |table|
|
153
|
+
# This query counts all distinct values in every column.
|
154
|
+
# The query should only return a single hashmap/dictionary as result.
|
155
|
+
query = table.query_distinct_entries
|
156
|
+
execute_query(query).each do |res|
|
157
|
+
table.columns.each {|k, col| col.distinct_entries = res[k]}
|
158
|
+
end
|
159
|
+
# This query counts the total values in the table.
|
160
|
+
# The query should only return a single hashmap/dictionary as result.
|
161
|
+
# Since the total records for all columns should be equal, taking just the first value
|
162
|
+
# should be fine.
|
163
|
+
query = table.query_total_records
|
164
|
+
execute_query(query).each do |res|
|
165
|
+
table.columns.each {|k, col| col.total_entries = res.values.first}
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
violations = []
|
170
|
+
database.tables.each do |table_name, table|
|
171
|
+
#puts "Table: #{table_name}:"
|
172
|
+
table.columns.each do |col_name, col|
|
173
|
+
#puts "\t #{col_name}: #{col.distinct_entries}/#{col.total_entries}" unless col.distinct_entries.to_i.zero?
|
174
|
+
violating_records = col.distinct_entries.to_i
|
175
|
+
next if violating_records.zero?
|
176
|
+
violations << Violation.new(database: database_name,
|
177
|
+
metric: "Inverse functional property",
|
178
|
+
offender: "#{col.full_name.delete('"')}",
|
179
|
+
violating_records: violating_records,
|
180
|
+
total_records: col.total_entries.to_i)
|
181
|
+
end
|
182
|
+
end
|
183
|
+
return violations
|
184
|
+
end
|
185
|
+
|
186
|
+
# Get the columns that are not downcase.
|
187
|
+
def get_uppercase_columns
|
188
|
+
database = @database
|
189
|
+
violations = []
|
190
|
+
|
191
|
+
database.tables.each do |table_name, table|
|
192
|
+
table.columns.each do |col_name, col|
|
193
|
+
next if col.name.downcase.eql?(col.name)
|
194
|
+
violations << Violation.new(database: database_name,
|
195
|
+
metric: "Uppercase column names",
|
196
|
+
offender: "#{col.full_name.delete('"')}",
|
197
|
+
solution: "#{table.schema}.#{table.name}.#{col.name.downcase.delete('"')}")
|
198
|
+
end
|
199
|
+
end
|
200
|
+
return violations
|
201
|
+
end
|
202
|
+
|
203
|
+
# Checks the spelling of all column names.
|
204
|
+
def check_spelling
|
205
|
+
database = @database
|
206
|
+
violations = []
|
207
|
+
|
208
|
+
database.tables.each do |table_name, table|
|
209
|
+
table.columns.each do |col_name, col|
|
210
|
+
next if col.name.correct?
|
211
|
+
violations << Violation.new(database: database_name,
|
212
|
+
metric: "Spelling",
|
213
|
+
offender: "#{col.full_name.delete('"')}")
|
214
|
+
end
|
215
|
+
end
|
216
|
+
return violations
|
217
|
+
end
|
218
|
+
|
219
|
+
# Creates a new database.
|
220
|
+
def create_database(name)
|
221
|
+
@connection.create_database(name)
|
222
|
+
end
|
223
|
+
|
224
|
+
# Runs all sql files in the specified directory
|
225
|
+
def execute_files(directory)
|
226
|
+
Dir.foreach(directory) do |file|
|
227
|
+
# Skip these files.
|
228
|
+
next if file == '.' or file == '..' or not ['.sql'].include?(File.extname(file))
|
229
|
+
file_path = File.join(directory, file)
|
230
|
+
content = File.read(file_path)
|
231
|
+
execute_query(content)
|
232
|
+
end
|
233
|
+
end
|
234
|
+
|
235
|
+
end
|
236
|
+
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
require 'dbtools/database/db_connection'
|
2
|
+
|
3
|
+
module Dbtools::Database
|
4
|
+
class MysqlConnection < DbConnection
|
5
|
+
|
6
|
+
# Executes a SQL statement on the connected database.
|
7
|
+
def execute_query(query)
|
8
|
+
# Replaces quotes with ticks, so queries are compatible with MySQL.
|
9
|
+
query.gsub!("\"", "`")
|
10
|
+
# The MySQL ActiveRecord adapter throws an error when the query is empty.
|
11
|
+
result = begin
|
12
|
+
@connection.exec_query(query).to_hash
|
13
|
+
rescue
|
14
|
+
{}
|
15
|
+
end
|
16
|
+
return result
|
17
|
+
end
|
18
|
+
|
19
|
+
# Queries all the primary keys in the database and
|
20
|
+
# outputs a query to create an index for that key.
|
21
|
+
def check_indexes
|
22
|
+
sql = %{
|
23
|
+
select cols.table_schema as "table_schema", cols.table_name as "table_name", cols.column_name as "column_name"
|
24
|
+
from information_schema.columns as cols
|
25
|
+
left join information_schema.statistics as stats
|
26
|
+
on cols.table_schema = stats.table_schema
|
27
|
+
and cols.table_name = stats.table_name
|
28
|
+
and cols.column_name = stats.column_name
|
29
|
+
where cols.table_schema not in ('information_schema', 'pg_catalog', 'performance_schema', 'mysql', 'sys')
|
30
|
+
and cols.column_name like '%\_id'
|
31
|
+
and stats.column_name IS NULL}
|
32
|
+
|
33
|
+
violations = []
|
34
|
+
execute_query(sql).each do |h|
|
35
|
+
table_schema = h['table_schema']
|
36
|
+
table_name = h['table_name']
|
37
|
+
column_name = h['column_name']
|
38
|
+
violations << Violation.new(database: database_name,
|
39
|
+
metric: "Missing indexes",
|
40
|
+
offender: "#{table_schema}.#{table_name}.#{column_name}",
|
41
|
+
solution: "CREATE INDEX idx_#{table_schema}_#{table_name}_#{column_name} ON #{table_schema}.#{table_name} (#{column_name}); ")
|
42
|
+
end
|
43
|
+
return violations
|
44
|
+
end
|
45
|
+
|
46
|
+
# Adds a description to a table by adding a comment.
|
47
|
+
def set_description_table(comment, object_name)
|
48
|
+
query = %{ALTER TABLE `#{object_name}` COMMENT is '#{comment}'}
|
49
|
+
execute_query(query)
|
50
|
+
end
|
51
|
+
|
52
|
+
# Returns all databases on the system.
|
53
|
+
def get_all_databases
|
54
|
+
sql = %q{show databases}
|
55
|
+
execute_query(sql).map {|v| v.values}.flatten.to_set
|
56
|
+
end
|
57
|
+
|
58
|
+
|
59
|
+
# Queries for all columns that don't have comment metadata.
|
60
|
+
def get_tables_without_comments
|
61
|
+
query = %{select t.table_schema as "table_schema", t.table_name as "table_name"
|
62
|
+
from information_schema.tables as t
|
63
|
+
where t.TABLE_SCHEMA not in ('information_schema', 'performance_schema', 'mysql', 'sys', 'pg_catalog')
|
64
|
+
and t.table_comment = ''}
|
65
|
+
|
66
|
+
violations = []
|
67
|
+
execute_query(query).each do |h|
|
68
|
+
table_schema = h['table_schema']
|
69
|
+
table_name = h['table_name']
|
70
|
+
violations << Violation.new(database: database_name,
|
71
|
+
metric: "Table without comments",
|
72
|
+
offender: "#{table_schema}.#{table_name}")
|
73
|
+
end
|
74
|
+
return violations
|
75
|
+
end
|
76
|
+
|
77
|
+
end
|
78
|
+
end
|
@@ -0,0 +1,132 @@
|
|
1
|
+
require 'dbtools/database/db_connection'
|
2
|
+
|
3
|
+
module Dbtools::Database
|
4
|
+
class PostgresqlConnection < DbConnection
|
5
|
+
|
6
|
+
# Executes a SQL query on the connected database.
|
7
|
+
def execute_query(query)
|
8
|
+
result = begin
|
9
|
+
@connection.exec_query(query)
|
10
|
+
rescue
|
11
|
+
{}
|
12
|
+
end
|
13
|
+
return result
|
14
|
+
end
|
15
|
+
|
16
|
+
|
17
|
+
# Analyzes the tables and checks if there are more sequence scans
|
18
|
+
# than index scans. Suggests where indexes could be created
|
19
|
+
def analyze_missing_indexes
|
20
|
+
sql = %{
|
21
|
+
SELECT relname, seq_scan-idx_scan AS too_much_seq, case when seq_scan-idx_scan>0 THEN 'Missing Index?' ELSE 'OK' END, pg_relation_size(relname::regclass) AS rel_size, seq_scan, idx_scan
|
22
|
+
FROM pg_stat_all_tables
|
23
|
+
WHERE schemaname ='public' AND pg_relation_size(relname::regclass)>80000 ORDER BY too_much_seq DESC;
|
24
|
+
}
|
25
|
+
execute_query(sql).each do |index|
|
26
|
+
puts index
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
# Queries all the primary keys in the database and
|
31
|
+
# outputs a query to create an index for that key.
|
32
|
+
def check_indexes
|
33
|
+
sql = %{
|
34
|
+
select cols.table_schema as "table_schema", cols.table_name as "table_name", cols.column_name as "column_name"
|
35
|
+
from
|
36
|
+
(select c.table_schema, c.table_name, c.column_name
|
37
|
+
from information_schema.columns as c
|
38
|
+
where c.column_name != 'ID' and c.column_name != 'id' and c.column_name like '%\\_id'
|
39
|
+
and c.table_schema not in ('information_schema', 'pg_catalog', 'performance_schema', 'mysql', 'sys')) as cols
|
40
|
+
left join
|
41
|
+
(SELECT UNNEST(ARRAY(
|
42
|
+
SELECT pg_get_indexdef(idx.indexrelid, k + 1, true)
|
43
|
+
FROM generate_subscripts(idx.indkey, 1) as k
|
44
|
+
ORDER BY k
|
45
|
+
)) as indkey_names
|
46
|
+
FROM pg_index as idx
|
47
|
+
JOIN pg_class as i ON i.oid = idx.indexrelid
|
48
|
+
JOIN pg_am as am ON i.relam = am.oid) as indexes
|
49
|
+
on cols.column_name = indexes.indkey_names
|
50
|
+
where indexes.indkey_names IS NULL}
|
51
|
+
violations = []
|
52
|
+
execute_query(sql).each do |h|
|
53
|
+
table_schema = h['table_schema']
|
54
|
+
table_name = h['table_name']
|
55
|
+
column_name = h['column_name']
|
56
|
+
violations << Violation.new(database: database_name,
|
57
|
+
metric: "Missing indexes",
|
58
|
+
offender: "#{table_schema}.#{table_name}.#{column_name}",
|
59
|
+
solution: "CREATE INDEX idx_#{table_schema}_#{table_name}_#{column_name} ON #{table_schema}.#{table_name} (#{column_name}); ")
|
60
|
+
end
|
61
|
+
return violations
|
62
|
+
end
|
63
|
+
|
64
|
+
# Adds a description to the current database by adding a comment.
|
65
|
+
def set_description_database(comment)
|
66
|
+
query = %{COMMENT ON DATABASE #{@connection.current_database} IS '#{comment}'}
|
67
|
+
execute_query(query)
|
68
|
+
end
|
69
|
+
|
70
|
+
# Adds a description to a table by adding a comment.
|
71
|
+
def set_description_table(comment, object_name)
|
72
|
+
query = %{COMMENT ON TABLE #{object_name} IS '#{comment}'}
|
73
|
+
execute_query(query)
|
74
|
+
end
|
75
|
+
|
76
|
+
|
77
|
+
# Queries for all columns that don't have comment metadata.
|
78
|
+
def get_tables_without_comments
|
79
|
+
query = %{select t.table_catalog, t.table_schema, t.table_name, d.description
|
80
|
+
from information_schema.tables as t
|
81
|
+
join
|
82
|
+
pg_class as c on c.relname = t.table_name
|
83
|
+
join pg_namespace as n on c.relnamespace = n.oid
|
84
|
+
left join pg_description as d on c.oid = d.objoid
|
85
|
+
where t.table_schema not in ('information_schema', 'pg_catalog', 'performance_schema', 'mysql', 'sys')
|
86
|
+
and d.objoid is null}
|
87
|
+
violations = []
|
88
|
+
execute_query(query).each do |h|
|
89
|
+
table_catalog = h['table_catalog']
|
90
|
+
table_schema = h['table_schema']
|
91
|
+
table_name = h['table_name']
|
92
|
+
violations << Violation.new(database: database_name,
|
93
|
+
metric: "Table without comments",
|
94
|
+
offender: "#{table_catalog}.#{table_schema}.#{table_name}")
|
95
|
+
end
|
96
|
+
return violations
|
97
|
+
end
|
98
|
+
|
99
|
+
# Returns all databases from the postgres.
|
100
|
+
def get_all_databases
|
101
|
+
sql = %q{SELECT datname FROM pg_database WHERE datistemplate = false;}
|
102
|
+
execute_query(sql).map {|v| v.values}.flatten.to_set
|
103
|
+
end
|
104
|
+
|
105
|
+
# Queries for all databases that don't have comment metadata.
|
106
|
+
def get_databases_without_comments
|
107
|
+
query = %{select db.datname
|
108
|
+
from pg_database as db
|
109
|
+
left join pg_shdescription as sd on sd.objoid = db.oid
|
110
|
+
where sd.objoid is null}
|
111
|
+
violations = []
|
112
|
+
execute_query(query).each do |h|
|
113
|
+
datname = h['datname']
|
114
|
+
violations << Violation.new(database: database_name,
|
115
|
+
metric: "Database without comment",
|
116
|
+
offender: datname)
|
117
|
+
end
|
118
|
+
return violations
|
119
|
+
end
|
120
|
+
|
121
|
+
# Runs the copy command using stdin
|
122
|
+
def copy_from_file(file, table_name, delimiter)
|
123
|
+
raw_connection = @connection.raw_connection
|
124
|
+
|
125
|
+
raw_connection.copy_data(%(COPY "#{table_name}" FROM STDIN DELIMITER '#{delimiter}' CSV HEADER;)) do
|
126
|
+
File.foreach(file) do |line|
|
127
|
+
raw_connection.put_copy_data(line)
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
131
|
+
end
|
132
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
require 'json'
|
2
|
+
require 'date'
|
3
|
+
|
4
|
+
module Dbtools::Database
|
5
|
+
class Violation
|
6
|
+
attr_reader :metric, :database, :offender, :violating_records, :total_records, :solution, :schema, :table, :column
|
7
|
+
|
8
|
+
def initialize(metric:, database:, offender:,
|
9
|
+
schema: nil, table: nil, column: nil,
|
10
|
+
violating_records: nil, total_records: nil, solution: nil)
|
11
|
+
@metric = metric
|
12
|
+
@database = database
|
13
|
+
@offender = offender
|
14
|
+
@violating_records = violating_records
|
15
|
+
@total_records = total_records
|
16
|
+
@solution = solution
|
17
|
+
@timestamp = Time.now.utc.iso8601
|
18
|
+
|
19
|
+
# Save everything in a Hash to make json serialization easily possible.
|
20
|
+
@violation = Hash.new
|
21
|
+
@violation['metric'] = @metric
|
22
|
+
@violation['database'] = @database
|
23
|
+
@violation['offender'] = @offender
|
24
|
+
|
25
|
+
@schema, @table, @column = @offender.split(".")
|
26
|
+
@schema ||= schema
|
27
|
+
@table ||= table
|
28
|
+
@column ||= column
|
29
|
+
|
30
|
+
@violation['schema'] = @schema
|
31
|
+
@violation['table'] = @table
|
32
|
+
@violation['column'] = @column
|
33
|
+
|
34
|
+
@violation['violating_records'] = @violating_records unless violating_records.nil?
|
35
|
+
@violation['total_records'] = @total_records unless total_records.nil?
|
36
|
+
@violation['measure'] = @violating_records.to_f / total_records.to_f unless (violating_records.nil? || total_records.to_i.zero?)
|
37
|
+
@violation['solution'] = @solution unless solution.nil?
|
38
|
+
@violation['timestamp'] = @timestamp
|
39
|
+
end
|
40
|
+
|
41
|
+
def to_s
|
42
|
+
@violation.to_json
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|