dbtools 0.5.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/README.md +333 -0
- data/Thorfile +1 -0
- data/bin/dbtools +5 -0
- data/config/client_secret_dbtools.json +1 -0
- data/config/config.yml +1 -0
- data/config/database_config.yml +12 -0
- data/config/databases.txt +5 -0
- data/config/schedule.rb +8 -0
- data/dbtools.gemspec +37 -0
- data/lib/dbtools.rb +47 -0
- data/lib/dbtools/constants.rb +847 -0
- data/lib/dbtools/converter/csv2rdf_converter.rb +68 -0
- data/lib/dbtools/converter/csv_importer.rb +107 -0
- data/lib/dbtools/converter/excel2csv_converter.rb +40 -0
- data/lib/dbtools/converter/google_drive2_rdf_converter.rb +97 -0
- data/lib/dbtools/database/database_data.rb +146 -0
- data/lib/dbtools/database/db_connection.rb +236 -0
- data/lib/dbtools/database/mysql_connection.rb +78 -0
- data/lib/dbtools/database/postgresql_connection.rb +132 -0
- data/lib/dbtools/database/violation.rb +45 -0
- data/lib/dbtools/google_drive/google_drive_api.rb +211 -0
- data/lib/dbtools/google_drive/google_drive_entity.rb +22 -0
- data/lib/dbtools/google_drive/google_drive_file.rb +10 -0
- data/lib/dbtools/google_drive/google_drive_folder.rb +9 -0
- data/lib/dbtools/plsql_functions/connect_server.sql +30 -0
- data/lib/dbtools/plsql_functions/link.sql +17 -0
- data/lib/dbtools/plsql_functions/unlink.sql +15 -0
- data/lib/dbtools/rdf/rdf_reader.rb +136 -0
- data/lib/dbtools/version.rb +3 -0
- data/lib/rdf/geophy.rb +27 -0
- data/lib/tasks/aws.rb +43 -0
- data/lib/tasks/backup.rb +107 -0
- data/lib/tasks/check.rb +220 -0
- data/lib/tasks/ckan.rb +151 -0
- data/lib/tasks/convert.rb +139 -0
- data/lib/tasks/dump.rb +110 -0
- data/lib/tasks/googledrivetool.rb +252 -0
- data/lib/tasks/import.rb +142 -0
- data/lib/tasks/postgres.rb +29 -0
- metadata +307 -0
data/lib/rdf/geophy.rb
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
require 'rdf'
|
2
|
+
module RDF
|
3
|
+
# Quick and dirty way to put the RDF vocabulary for the Spira models in one place.
|
4
|
+
class Geophy
|
5
|
+
def self.vocab
|
6
|
+
return RDF::URI.new('http://geophy.io/google_drive#')
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.GoogleDriveEntity
|
10
|
+
return self.vocab.join('google_drive/entity')
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.GoogleDriveFile
|
14
|
+
return self.vocab.join('google_drive/file')
|
15
|
+
end
|
16
|
+
|
17
|
+
def self.GoogleDriveFolder
|
18
|
+
return self.vocab.join('google_drive/folder')
|
19
|
+
end
|
20
|
+
|
21
|
+
def self.ChangesPageToken
|
22
|
+
return self.vocab.join('google_drive/changes_page_token')
|
23
|
+
end
|
24
|
+
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
data/lib/tasks/aws.rb
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
require 'thor'
|
2
|
+
require 'aws-sdk'
|
3
|
+
|
4
|
+
class Dbtools::Aws < Thor
|
5
|
+
package_name "dbtools"
|
6
|
+
|
7
|
+
def initialize(*args)
|
8
|
+
super
|
9
|
+
credentials_path = File.join(Dir.home, '.aws', 'credentials')
|
10
|
+
if !File.exists?(credentials_path) && (ENV['AWS_ACCESS_KEY_ID'].nil? || ENV['AWS_SECRET_ACCESS_KEY'].nil?)
|
11
|
+
STDERR.puts "No credentials for AWS found. You might want to configure them. " +
|
12
|
+
"Your credentials should be configured in ~/.aws/credentials, or in the environmental variables ENV['AWS_ACCESS_KEY_ID'] and ENV['AWS_SECRET_ACCESS_KEY']." +
|
13
|
+
"\nSee https://aws.amazon.com/blogs/security/a-new-and-standardized-way-to-manage-credentials-in-the-aws-sdks/ for more info. "
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
desc 'upload_to_s3 [file, bucket, key]', 'Uploads a file to S3. This requires your credentials to be configured in ~/.aws/credentials.'
|
18
|
+
long_desc <<-LONGDESC
|
19
|
+
`upload_to_s3 [file, bucket, key]` uploads a file to S3. You must specify the bucket name, along with
|
20
|
+
the key.
|
21
|
+
This task requires your credentials to be configured in ~/.aws/credentials,
|
22
|
+
or in the following environmental variables: ENV['AWS_ACCESS_KEY_ID'] and ENV['AWS_SECRET_ACCESS_KEY']
|
23
|
+
|
24
|
+
Example ~/.aws/credentials:
|
25
|
+
|
26
|
+
[default]
|
27
|
+
aws_access_key_id=ABCDEF123
|
28
|
+
aws_secret_access_key=+aBcDeF123
|
29
|
+
region=eu-central-1
|
30
|
+
|
31
|
+
Example usage:
|
32
|
+
\x5$ dbtools aws upload_to_s3 /tmp/file.txt bucket backups/something.txt
|
33
|
+
LONGDESC
|
34
|
+
method_option :prefix => :string, :default => nil
|
35
|
+
def upload_to_s3(file, bucket, key)
|
36
|
+
client = Aws::S3::Client.new
|
37
|
+
resource = Aws::S3::Resource.new(client: client)
|
38
|
+
s3_bucket = resource.bucket(bucket)
|
39
|
+
key = File.join(options[:prefix], key) if !options[:prefix].nil? && !options[:prefix].empty?
|
40
|
+
obj = s3_bucket.object(key)
|
41
|
+
obj.upload_file(file)
|
42
|
+
end
|
43
|
+
end
|
data/lib/tasks/backup.rb
ADDED
@@ -0,0 +1,107 @@
|
|
1
|
+
require 'fileutils'
|
2
|
+
require 'thor'
|
3
|
+
require 'rdf'
|
4
|
+
require 'sparql/client'
|
5
|
+
|
6
|
+
module Dbtools
|
7
|
+
class Backup < Thor
|
8
|
+
package_name "dbtools"
|
9
|
+
|
10
|
+
# Backs up a rdf graph at a sparql endpoint
|
11
|
+
desc 'rdf [sparql_endpoint]', 'Backup a RDF store at a SPARQL endpoint.'
|
12
|
+
long_desc <<-LONGDESC
|
13
|
+
`rdf [sparql_endpoint]` will create a dump containing all ntriples
|
14
|
+
located at the SPARQL endpoint, and upload it automatically to GoogleDrive or AWS S3.
|
15
|
+
The backup location can be specified with the arguments --googledrive and --aws_s3.
|
16
|
+
|
17
|
+
For Google Drive backups, the folder where the backup will be stored can be given with the --folder=folder_id argument.
|
18
|
+
|
19
|
+
The name can be specified with the optional argument --filename. The default
|
20
|
+
is "metatools_rdf_backup_#{Time.now.strftime('%Y%m%d-%H%M')}.nt".
|
21
|
+
|
22
|
+
Argument --nagios will run the `geophy-nagios-report` command.
|
23
|
+
|
24
|
+
Example:
|
25
|
+
\x5$ dbtools backup rdf http://localhost:9999/blazegraph/namespace/test/sparql --googledrive
|
26
|
+
\x5$ dbtools backup rdf http://localhost:9999/blazegraph/namespace/test/sparql --aws_s3 --bucket=example-bucket --key=example.txt
|
27
|
+
LONGDESC
|
28
|
+
method_option :filename, :type => :string, :default => "blazegraph_#{Time.now.strftime('%Y%m%d-%H%M')}.nt.gz"
|
29
|
+
method_option :folder, :type => :array, :default => ['0Byv6wMVo_JE4MElLLVJUS1U1RE0']
|
30
|
+
method_option :googledrive, :type => :boolean, :default => false
|
31
|
+
method_option :aws_s3, :type => :boolean, :default => false
|
32
|
+
method_option :nagios, :type => :boolean, :default => false
|
33
|
+
method_options :bucket => :string, :key => :string
|
34
|
+
def rdf(sparql_endpoint)
|
35
|
+
if !options[:googledrive] && !options[:aws_s3]
|
36
|
+
error_message = "You must choose either Google Drive or AWS S3 as backup location. " +
|
37
|
+
"Pass --googledrive or --aws_s3 as an argument."
|
38
|
+
STDERR.puts error_message
|
39
|
+
`sudo /usr/local/bin/geophy-nagios-report -s blazegraph-backup -m "#{error_message}" -c "#{$PROGRAM_NAME}" ERROR` if options[:nagios]
|
40
|
+
return
|
41
|
+
end
|
42
|
+
|
43
|
+
if options[:aws_s3]
|
44
|
+
if options[:bucket].nil? || options[:key].nil?
|
45
|
+
error_message = "Bucket and key must be specified. Use the arguments --bucket= and --key="
|
46
|
+
STDERR.puts error_message
|
47
|
+
`sudo /usr/local/bin/geophy-nagios-report -s blazegraph-backup -m "#{error_message}" -c "#{$PROGRAM_NAME}" ERROR` if options[:nagios]
|
48
|
+
return
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
filename = options[:filename]
|
53
|
+
tempfile = Tempfile.new(['backup', '.nt.gz'])
|
54
|
+
begin
|
55
|
+
invoke "dbtools:dump:rdf", [sparql_endpoint, tempfile.path], :compress => true
|
56
|
+
invoke "dbtools:google_drive:upload", [tempfile.path], :filename => filename, :folder => options[:folder] if options[:googledrive]
|
57
|
+
invoke "dbtools:aws:upload_to_s3", [tempfile.path, options[:bucket], options[:key]], :attributes => false if options[:aws_s3]
|
58
|
+
`sudo /usr/local/bin/geophy-nagios-report -s blazegraph-backup -m "Backup successful" -c "#{$PROGRAM_NAME}" OK` if options[:nagios]
|
59
|
+
rescue Exception => e
|
60
|
+
`sudo /usr/local/bin/geophy-nagios-report -s blazegraph-backup -m "#{e.message}" -c "#{$PROGRAM_NAME}" ERROR` if options[:nagios]
|
61
|
+
ensure
|
62
|
+
tempfile.close
|
63
|
+
tempfile.unlink
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
desc 'blazegraph [url]', 'Dumps a blazegraph database to a jnl file.'
|
68
|
+
long_desc <<-LONGDESC
|
69
|
+
`blazegraph [sparql_endpoint]` will create a backup of a blazegraph database,
|
70
|
+
using the built in backup function, and upload it to GoogleDrive or AWS S3.
|
71
|
+
The backup location can be specified with the arguments --googledrive and --aws_s3.
|
72
|
+
|
73
|
+
For Google Drive backups, the folder of the backup can be given with the --folder=folder_id argument.
|
74
|
+
The name can be specified with the optional argument --filename. The default
|
75
|
+
is "metatools_rdf_backup_#{Time.now.strftime('%Y%m%d-%H%M')}.jnl.gz".
|
76
|
+
|
77
|
+
Example:
|
78
|
+
\x5$ dbtools backup rdf http://localhost:9999/blazegraph/backup --googledrive
|
79
|
+
\x5$ dbtools backup rdf http://localhost:9999/blazegraph/backup --aws_s3 --bucket=example-bucket --key=example.txt
|
80
|
+
LONGDESC
|
81
|
+
option :filename, :type => :string, :default => "metatools_rdf_backup_#{Time.now.strftime('%Y%m%d-%H%M')}.jnl.gz"
|
82
|
+
option :folder, :type => :array, :default => ['0Byv6wMVo_JE4MElLLVJUS1U1RE0']
|
83
|
+
option :googledrive, :type => :boolean, :default => false
|
84
|
+
option :aws_s3, :type => :boolean, :default => false
|
85
|
+
options :bucket => :string, :key => :string
|
86
|
+
def blazegraph(url)
|
87
|
+
if !options[:googledrive] && !options[:aws_s3]
|
88
|
+
STDERR.puts "You must choose either Google Drive or AWS S3 as backup location. " +
|
89
|
+
"Pass --googledrive or --aws_s3 as an argument."
|
90
|
+
return
|
91
|
+
end
|
92
|
+
if options[:aws_s3]
|
93
|
+
STDERR.puts "Bucket and key must be specified. Use the arguments --bucket= and --key=." if options[:bucket].nil? || options[:key].nil?
|
94
|
+
return
|
95
|
+
end
|
96
|
+
|
97
|
+
filename = File.join(Dir.pwd, options[:filename])
|
98
|
+
begin
|
99
|
+
invoke "dbtools:dump:blazegraph", [url, filename], :compress => true
|
100
|
+
invoke "dbtools:google_drive:upload", [filename], :folder => options[:folder]
|
101
|
+
invoke "dbtools:aws:upload_to_s3", [filename, options[:bucket], options[:key]] if options[:aws_s3]
|
102
|
+
ensure
|
103
|
+
File.delete(filename) if File.exists?(filename)
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
data/lib/tasks/check.rb
ADDED
@@ -0,0 +1,220 @@
|
|
1
|
+
require 'dbtools/database/db_connection'
|
2
|
+
require 'dbtools/database/postgresql_connection'
|
3
|
+
require 'dbtools/database/mysql_connection'
|
4
|
+
require 'thor'
|
5
|
+
# require 'slack-notifier'
|
6
|
+
require 'dbtools/constants'
|
7
|
+
|
8
|
+
module Dbtools
|
9
|
+
class Check < Thor
|
10
|
+
package_name "dbtools"
|
11
|
+
|
12
|
+
desc "all [url]", "Run all tasks on this database."
|
13
|
+
def all(url)
|
14
|
+
@url = url
|
15
|
+
db = check_adapter(url)
|
16
|
+
return if db.nil?
|
17
|
+
|
18
|
+
output_result(db.check_indexes)
|
19
|
+
output_result(db.check_reserved_keywords)
|
20
|
+
output_result(db.get_uppercase_columns)
|
21
|
+
output_result(db.get_completeness)
|
22
|
+
output_result(db.get_syntax_compression)
|
23
|
+
output_result(db.get_inverse_functional_property)
|
24
|
+
db.close
|
25
|
+
end
|
26
|
+
|
27
|
+
desc "all_databases", "Run all tasks on all databases it finds. Specify the credentials in ~/.dbtools/database_config.yml"
|
28
|
+
def all_databases()
|
29
|
+
load_config
|
30
|
+
@config.each do |k, db_credentials|
|
31
|
+
begin
|
32
|
+
db_connection = check_adapter(db_credentials)
|
33
|
+
databases = db_connection.get_all_databases
|
34
|
+
databases.each do |database|
|
35
|
+
next if Dbtools::Constants::IGNORE_DATABASES.include?(database)
|
36
|
+
db_credentials['database'] = database
|
37
|
+
self.all(db_credentials)
|
38
|
+
end
|
39
|
+
rescue Exception
|
40
|
+
nil
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
desc "indexes [URL]", "This task runs the function \'create_indexes\' on the database. Works on a mysql and postgres database. "
|
46
|
+
def indexes(url)
|
47
|
+
@url = url
|
48
|
+
db = check_adapter(url)
|
49
|
+
return if db.nil?
|
50
|
+
|
51
|
+
result = db.check_indexes
|
52
|
+
output_result(result)
|
53
|
+
end
|
54
|
+
|
55
|
+
desc "output_indexes [URL]", "This task runs the function \'create_indexes\' on the database and outputs the result. Works on a mysql and postgres database. Outputs the queries."
|
56
|
+
def output_indexes(url)
|
57
|
+
@url = url
|
58
|
+
db = check_adapter(url)
|
59
|
+
return if db.nil?
|
60
|
+
|
61
|
+
violations = db.check_indexes
|
62
|
+
queries = violations.map do |violation|
|
63
|
+
violation.solution
|
64
|
+
end
|
65
|
+
puts queries.join("\n") unless queries.empty?
|
66
|
+
end
|
67
|
+
|
68
|
+
# Checks if column names or table names include reserved keywords.
|
69
|
+
desc 'keywords [URL]', 'Checks if column names or table names include reserved keywords.'
|
70
|
+
def keywords(url)
|
71
|
+
@url = url
|
72
|
+
db = check_adapter(url)
|
73
|
+
return if db.nil?
|
74
|
+
|
75
|
+
result = db.check_reserved_keywords
|
76
|
+
output_result(result)
|
77
|
+
end
|
78
|
+
|
79
|
+
desc 'completeness [URL]', 'Checks the amount of empty/null entries in the database.'
|
80
|
+
def completeness(url)
|
81
|
+
@url = url
|
82
|
+
db = check_adapter(url)
|
83
|
+
return if db.nil?
|
84
|
+
|
85
|
+
result = db.get_completeness
|
86
|
+
output_result(result)
|
87
|
+
end
|
88
|
+
|
89
|
+
desc 'compression [URL]', 'Checks the amount of entries that can be compressed in the database.'
|
90
|
+
def compression(url)
|
91
|
+
@url = url
|
92
|
+
db = check_adapter(url)
|
93
|
+
return if db.nil?
|
94
|
+
|
95
|
+
result = db.get_syntax_compression
|
96
|
+
output_result(result)
|
97
|
+
end
|
98
|
+
|
99
|
+
desc 'casing [URL]', 'Checks whether all column names are lowercase. '
|
100
|
+
def casing(url)
|
101
|
+
@url = url
|
102
|
+
db = check_adapter(url)
|
103
|
+
return if db.nil?
|
104
|
+
|
105
|
+
result = db.get_uppercase_columns
|
106
|
+
output_result(result)
|
107
|
+
end
|
108
|
+
|
109
|
+
desc 'spelling [URL]', 'Checks whether all column names are correctly spelled. '
|
110
|
+
def spelling(url)
|
111
|
+
@url = url
|
112
|
+
db = check_adapter(url)
|
113
|
+
return if db.nil?
|
114
|
+
|
115
|
+
result = db.check_spelling
|
116
|
+
output_result(result)
|
117
|
+
end
|
118
|
+
|
119
|
+
desc 'table_comments [URL]', 'Checks for table without comment metadata. '
|
120
|
+
def table_comments(url)
|
121
|
+
@url = url
|
122
|
+
db = check_adapter(url)
|
123
|
+
return if db.nil?
|
124
|
+
|
125
|
+
result = db.get_tables_without_comments
|
126
|
+
output_result(result)
|
127
|
+
end
|
128
|
+
|
129
|
+
desc 'database_comments [URL]', 'Checks for databases without comment metadata. '
|
130
|
+
def database_comments(url)
|
131
|
+
@url = url
|
132
|
+
db = check_adapter(url)
|
133
|
+
return if db.nil?
|
134
|
+
|
135
|
+
result = db.get_databases_without_comments
|
136
|
+
output_result(result)
|
137
|
+
end
|
138
|
+
|
139
|
+
desc 'inverse_functional_property [URL]', 'Gets the inverse functional property of the database. '
|
140
|
+
def inverse_functional_property(url)
|
141
|
+
@url = url
|
142
|
+
db = check_adapter(url)
|
143
|
+
return if db.nil?
|
144
|
+
|
145
|
+
result = db.get_inverse_functional_property
|
146
|
+
output_result(result)
|
147
|
+
end
|
148
|
+
|
149
|
+
private
|
150
|
+
# Check if the url is a postgres or mysql connection.
|
151
|
+
def check_adapter(url)
|
152
|
+
adapter = if url.is_a?(Hash)
|
153
|
+
url['adapter']
|
154
|
+
else
|
155
|
+
url.match("^([a-zA-Z0-9]+):\/\/(.+)@(.+)\/(.+)").captures[0]
|
156
|
+
end
|
157
|
+
case adapter
|
158
|
+
when "postgres", "postgresql"
|
159
|
+
db = Dbtools::Database::PostgresqlConnection.new(url)
|
160
|
+
when "mysql2"
|
161
|
+
db = Dbtools::Database::MysqlConnection.new(url)
|
162
|
+
else
|
163
|
+
puts "Invalid url"
|
164
|
+
return nil
|
165
|
+
end
|
166
|
+
return db
|
167
|
+
end
|
168
|
+
|
169
|
+
# Loads the config file
|
170
|
+
def load_config()
|
171
|
+
@config = YAML.load_file(Dbtools::Constants::DB_CONFIG_PATH)
|
172
|
+
end
|
173
|
+
|
174
|
+
def output_result(result)
|
175
|
+
result = result.join("\n")
|
176
|
+
#notifier = init_slack_notifier(@url)
|
177
|
+
#notifier.ping(result) unless result.empty?
|
178
|
+
if not result.empty?
|
179
|
+
puts result
|
180
|
+
# Only write if directory is writable
|
181
|
+
if File.writable?(File.dirname(Dbtools::Constants::OUTPUT_FILE))
|
182
|
+
File.open(Dbtools::Constants::OUTPUT_FILE, 'a') { |f| f.puts(result) }
|
183
|
+
end
|
184
|
+
end
|
185
|
+
end
|
186
|
+
end
|
187
|
+
class NoOpHTTPClient
|
188
|
+
def self.post uri, params={}
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
192
|
+
# "Database name": [
|
193
|
+
# "metric": {
|
194
|
+
# "name": "name"
|
195
|
+
# "counter": 0
|
196
|
+
# "Violations": [
|
197
|
+
# "offender": {
|
198
|
+
# "total_records": 132
|
199
|
+
# "violating_records": 123
|
200
|
+
# "measure": 12
|
201
|
+
# "solution":
|
202
|
+
# }
|
203
|
+
# ]
|
204
|
+
# }]
|
205
|
+
#
|
206
|
+
# {
|
207
|
+
# "metrics": [{
|
208
|
+
# "metric": "metric name",
|
209
|
+
# "counter": "number of violations",
|
210
|
+
# "violations": {
|
211
|
+
# "database.schema?.table?.col?": {
|
212
|
+
# "total_records": "a number",
|
213
|
+
# "violating_records": "a number",
|
214
|
+
# "measure": "a number",
|
215
|
+
# "solution": " a query/text with a proposed solution"
|
216
|
+
# }
|
217
|
+
# }
|
218
|
+
# }]
|
219
|
+
# }
|
220
|
+
end
|
data/lib/tasks/ckan.rb
ADDED
@@ -0,0 +1,151 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
require 'thor'
|
3
|
+
require 'find'
|
4
|
+
require 'rdf'
|
5
|
+
require 'open-uri'
|
6
|
+
require 'dbtools/rdf/rdf_reader'
|
7
|
+
require 'dbtools/constants'
|
8
|
+
require 'dbtools/converter/csv_importer'
|
9
|
+
require 'dbtools/converter/excel2csv_converter'
|
10
|
+
require 'dbtools/database/mysql_connection'
|
11
|
+
require 'dbtools/database/postgresql_connection'
|
12
|
+
require 'dbtools/google_drive/google_drive_api'
|
13
|
+
require 'tasks/import'
|
14
|
+
require 'fileutils'
|
15
|
+
|
16
|
+
module Dbtools
|
17
|
+
class Ckan < Thor
|
18
|
+
package_name "dbtools"
|
19
|
+
|
20
|
+
def initialize(*args)
|
21
|
+
super
|
22
|
+
load_config
|
23
|
+
@gdrive = Dbtools::Google_Drive::Google_drive_api.new
|
24
|
+
@service = @gdrive.service
|
25
|
+
@rdf_graph = @gdrive.rdf_graph
|
26
|
+
@import = Import.new
|
27
|
+
end
|
28
|
+
|
29
|
+
desc 'load_dataset [dataset]', 'Loads a dataset from a CKAN source by querying the rdf graph. Accepts an url containing the UUID or the UUID itself. '
|
30
|
+
def load_dataset(dataset)
|
31
|
+
datasets_metadata = @rdf_graph.get_metadata(dataset)
|
32
|
+
raise "Dataset not found. " if datasets_metadata.empty?
|
33
|
+
datasets_metadata.values.each do |metadata|
|
34
|
+
load_dataset_resource(metadata)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
desc 'load_resource [dataset, resource]', 'Loads a single resource from a ckan dataset. Accepts urls containing the UUID or the UUID itself. '
|
39
|
+
def load_resource(dataset, resource)
|
40
|
+
datasets_metadata = @rdf_graph.get_metadata(dataset)
|
41
|
+
puts datasets_metadata.inspect
|
42
|
+
resource_metadata = datasets_metadata.select { |k, v| v['resource'].to_s.include?(resource) }
|
43
|
+
|
44
|
+
raise "Resource not found." if resource_metadata.empty?
|
45
|
+
raise "Multiple resources found." if resource_metadata.length > 1
|
46
|
+
|
47
|
+
load_dataset_resource(resource_metadata.values.first)
|
48
|
+
end
|
49
|
+
|
50
|
+
#desc 'check_missing_databases', 'Checks for databases that are listed in the RDF, but not loaded on the system. Prints the result.'
|
51
|
+
#def check_missing_databases
|
52
|
+
#postgres_databases = PostgresqlConnection.new(@postgres_connection_url).get_all_databases
|
53
|
+
#mysql_databases = MysqlConnection.new(@mysql_connection_url).get_all_databases
|
54
|
+
#installed_databases = postgres_databases + mysql_databases
|
55
|
+
#rdf_databases = @rdf_graph.get_available_databases.map {|k, v| [v['database_title'].gsub(/[^0-9a-zA-Z_]/,'_'), v]}.to_h
|
56
|
+
#missing_databases = rdf_databases.keys.to_set - installed_databases
|
57
|
+
#puts missing_databases.inspect
|
58
|
+
#return missing_databases.map {|title| [title, rdf_databases[title]]}.to_h
|
59
|
+
#end
|
60
|
+
|
61
|
+
#desc 'load_missing_databases', 'Loads all databases that are listed in the RDF, but missing on the system.'
|
62
|
+
#def load_missing_databases
|
63
|
+
#missing_databases = check_missing_databases
|
64
|
+
#missing_databases.each do |database_title, metadata|
|
65
|
+
#load_dataset(metadata['dataset'])
|
66
|
+
#end
|
67
|
+
#end
|
68
|
+
|
69
|
+
desc 'list_databases', 'Lists all databases by querying the rdf graph'
|
70
|
+
def list_databases
|
71
|
+
databases = @rdf_graph.get_available_databases
|
72
|
+
databases.each do |index, res|
|
73
|
+
puts "#{index}. #{res['dataset_title']}"
|
74
|
+
end
|
75
|
+
selection = ask("Which data set do you want to load? ").to_i
|
76
|
+
unless databases.key?(selection)
|
77
|
+
puts 'Data set not found. '
|
78
|
+
return
|
79
|
+
end
|
80
|
+
load_dataset(databases[selection]['dataset'])
|
81
|
+
return databases
|
82
|
+
end
|
83
|
+
|
84
|
+
desc 'load_rdf_in_desc [target_database, ckan_dataset]', 'Loads the RDF metadata into the database description. '
|
85
|
+
def load_rdf_in_desc(target_database, dataset)
|
86
|
+
begin
|
87
|
+
# Open the rdf of the file.
|
88
|
+
description = open("#{dataset}.ttl").read
|
89
|
+
|
90
|
+
# Put the rdf in the comments
|
91
|
+
psql = Dbtools::Database::PostgresqlConnection.new(target_database)
|
92
|
+
psql.set_description_database(description)
|
93
|
+
psql.close
|
94
|
+
rescue
|
95
|
+
puts "Could not open rdf from dataset: #{dataset}"
|
96
|
+
end
|
97
|
+
|
98
|
+
end
|
99
|
+
|
100
|
+
private
|
101
|
+
def load_dataset_resource(metadata)
|
102
|
+
dataset = metadata['dataset'].to_s
|
103
|
+
table_name = metadata['resource_title'].gsub(/[^0-9a-zA-Z_]/,'_')
|
104
|
+
database_name = metadata['database_title'].gsub(/[^0-9a-zA-Z_]/,'_')
|
105
|
+
format = metadata['format'].gsub(/[^0-9a-zA-Z_]/,'_')
|
106
|
+
folder = "/tmp/#{database_name}"
|
107
|
+
|
108
|
+
# Create folder if it doesn't exist
|
109
|
+
FileUtils.mkdir_p(folder)
|
110
|
+
|
111
|
+
begin
|
112
|
+
file_id = @gdrive.get_file_id(metadata['access_url'])
|
113
|
+
file_name = @service.get_file(file_id).name
|
114
|
+
destination = File.join(folder, file_name)
|
115
|
+
@service.get_file(file_id, download_dest: destination)
|
116
|
+
connection = load_database(database_name, destination, format, table_name: table_name)
|
117
|
+
load_rdf_in_desc(connection, dataset)
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
|
122
|
+
# Loads a database into either postgres or mysql, depending on the format.
|
123
|
+
def load_database(database_name, file, format, table_name: '')
|
124
|
+
return case format.downcase
|
125
|
+
when /postgres/
|
126
|
+
@import.postgres_dump(database_name, file)
|
127
|
+
when /mysql/
|
128
|
+
@import.mysql_dump(database_name, file)
|
129
|
+
when /csv/, /txt/
|
130
|
+
@import.csv_in_postgres(file, database_name, table_name)
|
131
|
+
when /xls/
|
132
|
+
@import.excel(database_name, file)
|
133
|
+
else
|
134
|
+
puts "Can't load #{format} file."
|
135
|
+
return nil
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
# Loads all configurations needed
|
140
|
+
def load_config
|
141
|
+
config = YAML.load(File.read(Dbtools::Constants::DB_TARGET_CONFIG_PATH))
|
142
|
+
postgres_config = config['postgres']
|
143
|
+
mysql_config = config['mysql']
|
144
|
+
@postgres_connection_url = "postgres://#{postgres_config['username']}:#{postgres_config['password']}@#{postgres_config['host']}/"
|
145
|
+
@mysql_connection_url = "mysql2://#{mysql_config['username']}:#{mysql_config['password']}@#{mysql_config['host']}/"
|
146
|
+
@postgres_connection_options = "--username=#{postgres_config['username']} --host=#{postgres_config['host']} --port=#{postgres_config['port']}"
|
147
|
+
@postgres_connection_command = "psql #{@postgres_connection_options}"
|
148
|
+
@mysql_connection_command = "mysql -u #{mysql_config['username']} -p#{mysql_config['password']} -h #{mysql_config['host']}"
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|