lexicon-common 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lexicon-common.gemspec +2 -0
- data/lib/lexicon-common.rb +3 -0
- data/lib/lexicon/common.rb +1 -0
- data/lib/lexicon/common/mixin/logger_aware.rb +2 -2
- data/lib/lexicon/common/mixin/nameable.rb +16 -0
- data/lib/lexicon/common/package/directory_package_loader.rb +56 -12
- data/lib/lexicon/common/package/package.rb +11 -43
- data/lib/lexicon/common/package/package_file.rb +50 -0
- data/lib/lexicon/common/package/v1/package.rb +83 -0
- data/lib/lexicon/common/package/v1/package_builder.rb +70 -0
- data/lib/lexicon/common/package/v1/source_file_set.rb +28 -0
- data/lib/lexicon/common/package/v2/package.rb +56 -0
- data/lib/lexicon/common/package/v2/package_builder.rb +72 -0
- data/lib/lexicon/common/package/v2/source_file_set.rb +26 -0
- data/lib/lexicon/common/production/datasource_loader.rb +119 -60
- data/lib/lexicon/common/production/table_locker.rb +58 -0
- data/lib/lexicon/common/psql.rb +47 -0
- data/lib/lexicon/common/remote/package_downloader.rb +61 -43
- data/lib/lexicon/common/remote/package_uploader.rb +37 -28
- data/lib/lexicon/common/remote/s3_client.rb +6 -0
- data/lib/lexicon/common/schema/validator_factory.rb +1 -1
- data/lib/lexicon/common/shell_executor.rb +0 -3
- data/lib/lexicon/common/version.rb +1 -1
- data/resources/lexicon.schema.json +116 -31
- metadata +39 -3
- data/lib/lexicon/common/package/package_builder.rb +0 -68
- data/lib/lexicon/common/package/source_file_set.rb +0 -24
@@ -0,0 +1,28 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Lexicon
|
4
|
+
module Common
|
5
|
+
module Package
|
6
|
+
module V1
|
7
|
+
class SourceFileSet
|
8
|
+
include Mixin::Nameable
|
9
|
+
|
10
|
+
attr_reader :id, :name, :structure_path, :data_path, :tables
|
11
|
+
|
12
|
+
# @param [String] id
|
13
|
+
# @param [String] name
|
14
|
+
# @param [String] structure
|
15
|
+
# @param [String] data
|
16
|
+
# @param [Array<String>] tables
|
17
|
+
def initialize(id:, name:, structure:, data:, tables:)
|
18
|
+
@id = id
|
19
|
+
@name = name
|
20
|
+
@structure_path = structure
|
21
|
+
@data_path = data
|
22
|
+
@tables = tables
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Lexicon
|
4
|
+
module Common
|
5
|
+
module Package
|
6
|
+
module V2
|
7
|
+
class Package < Common::Package::Package
|
8
|
+
# @param [Pathname] dir
|
9
|
+
# @param [Pathname] checksum_file
|
10
|
+
# @param [Semantic::Version] version
|
11
|
+
def initialize(version:, spec_file:, checksum_file:, dir:, file_sets:)
|
12
|
+
super(
|
13
|
+
checksum_file: checksum_file,
|
14
|
+
dir: dir,
|
15
|
+
spec_file: spec_file,
|
16
|
+
schema_version: 2,
|
17
|
+
version: version,
|
18
|
+
)
|
19
|
+
|
20
|
+
@file_sets = file_sets
|
21
|
+
end
|
22
|
+
|
23
|
+
def valid?
|
24
|
+
super
|
25
|
+
end
|
26
|
+
|
27
|
+
def files
|
28
|
+
file_sets.flat_map { |fs| file_set_files(fs) }
|
29
|
+
end
|
30
|
+
|
31
|
+
# @return [SourceFileSet]
|
32
|
+
attr_reader :file_sets
|
33
|
+
|
34
|
+
def data_dir
|
35
|
+
dir.join('data')
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
# @param [SourceFileSet] file_set
|
41
|
+
# @return [Array<PackageFile>]
|
42
|
+
def file_set_files(file_set)
|
43
|
+
relative_data_dir = data_dir.basename
|
44
|
+
|
45
|
+
structure_file = PackageFile.new_structure(relative_data_dir.join(file_set.structure))
|
46
|
+
table_files = file_set.tables
|
47
|
+
.values.flatten(1)
|
48
|
+
.map { |table_file| PackageFile.new_data(relative_data_dir.join(table_file)) }
|
49
|
+
|
50
|
+
[structure_file, *table_files]
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,72 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Lexicon
|
4
|
+
module Common
|
5
|
+
module Package
|
6
|
+
module V2
|
7
|
+
class PackageBuilder < Package
|
8
|
+
def initialize(version:, dir:)
|
9
|
+
super(
|
10
|
+
file_sets: [],
|
11
|
+
version: version,
|
12
|
+
dir: dir,
|
13
|
+
checksum_file: dir.join(CHECKSUM_FILE_NAME),
|
14
|
+
spec_file: dir.join(SPEC_FILE_NAME),
|
15
|
+
)
|
16
|
+
|
17
|
+
FileUtils.mkdir_p(data_dir)
|
18
|
+
end
|
19
|
+
|
20
|
+
# @param [String] id
|
21
|
+
# @param [String] name
|
22
|
+
# @param [Pathname] structure
|
23
|
+
# Takes ownership of the file (moves it to the correct folder)
|
24
|
+
# @param [Hash{String=>Array<Pathname>}] tables
|
25
|
+
# Takes ownership of the files (moves them to the correct folder)
|
26
|
+
def add_file_set(id, name:, structure:, tables:)
|
27
|
+
# @type [Pathname] structure_file_path
|
28
|
+
structure_file_path = data_dir.join(structure_file_name(id))
|
29
|
+
FileUtils.mv(structure.to_s, structure_file_path.to_s)
|
30
|
+
|
31
|
+
table_data = tables.map do |table_name, files|
|
32
|
+
index = 0
|
33
|
+
|
34
|
+
file_names = files.map do |file|
|
35
|
+
file_name = "#{table_name}_#{index}.csv.gz"
|
36
|
+
FileUtils.mv(file.to_s, data_dir.join(file_name))
|
37
|
+
index += 1
|
38
|
+
|
39
|
+
file_name
|
40
|
+
end
|
41
|
+
|
42
|
+
[table_name, file_names]
|
43
|
+
end
|
44
|
+
|
45
|
+
file_sets << SourceFileSet.new(
|
46
|
+
id: id,
|
47
|
+
name: name,
|
48
|
+
structure: structure_file_name(id),
|
49
|
+
tables: table_data.to_h
|
50
|
+
)
|
51
|
+
end
|
52
|
+
|
53
|
+
def as_package
|
54
|
+
Package.new(
|
55
|
+
checksum_file: checksum_file,
|
56
|
+
dir: dir,
|
57
|
+
file_sets: file_sets,
|
58
|
+
spec_file: spec_file,
|
59
|
+
version: version,
|
60
|
+
)
|
61
|
+
end
|
62
|
+
|
63
|
+
private
|
64
|
+
|
65
|
+
def structure_file_name(id)
|
66
|
+
"#{id}__structure.sql"
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Lexicon
|
4
|
+
module Common
|
5
|
+
module Package
|
6
|
+
module V2
|
7
|
+
class SourceFileSet
|
8
|
+
include Mixin::Nameable
|
9
|
+
|
10
|
+
attr_reader :id, :name, :structure, :tables
|
11
|
+
|
12
|
+
# @param [String] id
|
13
|
+
# @param [String] name
|
14
|
+
# @param [String] structure
|
15
|
+
# @param [Hash{String=>Array<String>}] tables
|
16
|
+
def initialize(id:, name:, structure:, tables:)
|
17
|
+
@id = id
|
18
|
+
@name = name
|
19
|
+
@structure = structure
|
20
|
+
@tables = tables.freeze
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -1,61 +1,139 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
using Corindon::Result::Ext
|
4
|
+
|
3
5
|
module Lexicon
|
4
6
|
module Common
|
5
7
|
module Production
|
6
8
|
class DatasourceLoader
|
9
|
+
include Mixin::LoggerAware
|
7
10
|
include Mixin::SchemaNamer
|
11
|
+
|
8
12
|
# @param [ShellExecutor] shell
|
9
13
|
# @param [Database::Factory] database_factory
|
10
14
|
# @param [FileLoader] file_loader
|
11
15
|
# @param [String] database_url
|
12
|
-
|
16
|
+
# @param [TableLocker] table_locker
|
17
|
+
# @param [Psql] psql
|
18
|
+
def initialize(shell:, database_factory:, file_loader:, database_url:, table_locker:, psql:)
|
13
19
|
@shell = shell
|
14
20
|
@database_factory = database_factory
|
15
21
|
@file_loader = file_loader
|
16
22
|
@database_url = database_url
|
23
|
+
@table_locker = table_locker
|
24
|
+
@psql = psql
|
17
25
|
end
|
18
26
|
|
19
27
|
# @param [Package::Package] package
|
20
28
|
# @param [Array<String>, nil] only
|
21
|
-
# @param [Array<String>] without
|
22
29
|
# If nil, all datasets are loaded.
|
23
30
|
# If present, only listed datasets are loaded.
|
24
31
|
# Structures are ALWAYS loaded
|
32
|
+
# @param [Array<String>] without
|
25
33
|
def load_package(package, only: nil, without: [])
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
34
|
+
case package.schema_version
|
35
|
+
when 1
|
36
|
+
load_v1(package, only: only, without: without)
|
37
|
+
when 2
|
38
|
+
load_v2(package, only: only, without: without)
|
39
|
+
else
|
40
|
+
log("Schema version #{package.schema_version} is not supported")
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
private
|
30
45
|
|
31
|
-
|
32
|
-
|
46
|
+
# @param [Package::V1::Package] package
|
47
|
+
def load_v1(package, only: nil, without: [])
|
48
|
+
file_sets = filter_file_sets(package.file_sets, only: only, without: without)
|
49
|
+
.unwrap!
|
50
|
+
.select(&:data_path)
|
33
51
|
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
52
|
+
load_structure_files(
|
53
|
+
package.files.select(&:structure?).map(&:path),
|
54
|
+
schema: version_to_schema(package.version),
|
55
|
+
dir: package.dir
|
56
|
+
)
|
38
57
|
|
39
|
-
|
40
|
-
.select(&:data_path)
|
41
|
-
end
|
58
|
+
remaining = ::Concurrent::Set.new(file_sets.map(&:name))
|
42
59
|
|
43
|
-
|
60
|
+
file_sets.map do |fs|
|
61
|
+
Thread.new do
|
62
|
+
file_loader.load_file(package.data_path(fs))
|
63
|
+
remaining.delete(fs.name)
|
64
|
+
|
65
|
+
puts '[ OK ] '.green + fs.name.yellow + ", #{remaining_message(remaining)}"
|
66
|
+
end
|
67
|
+
end.each(&:join)
|
44
68
|
|
45
|
-
|
69
|
+
table_locker.lock_tables(package: package, tables: package.file_sets.flat_map(&:tables))
|
70
|
+
end
|
46
71
|
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
72
|
+
def remaining_message(remaining)
|
73
|
+
if remaining.size.zero?
|
74
|
+
'All done!'
|
75
|
+
elsif remaining.size > 5
|
76
|
+
"#{remaining.size} remaining"
|
77
|
+
else
|
78
|
+
"Remaining: #{remaining.to_a.sort.join(', ')}"
|
52
79
|
end
|
53
|
-
end
|
80
|
+
end
|
54
81
|
|
55
|
-
|
56
|
-
|
82
|
+
# @param [Package::V2::Package] package
|
83
|
+
# @param [Array<String>, nil] only
|
84
|
+
# @param [Array<String>] without
|
85
|
+
def load_v2(package, only: nil, without: [])
|
86
|
+
file_sets = filter_file_sets(package.file_sets, only: only, without: without)
|
87
|
+
.unwrap!
|
88
|
+
.select { |fs| fs.tables.any? }
|
57
89
|
|
58
|
-
|
90
|
+
schema = version_to_schema(package.version)
|
91
|
+
|
92
|
+
load_structure_files(package.files.select(&:structure?).map(&:path), schema: schema, dir: package.dir)
|
93
|
+
|
94
|
+
remaining = ::Concurrent::Set.new(file_sets.flat_map{|fs| fs.tables.values.flatten(1) })
|
95
|
+
|
96
|
+
threads = file_sets.flat_map do |fs|
|
97
|
+
fs.tables.flat_map do |name, files|
|
98
|
+
files.map do |file|
|
99
|
+
Thread.new do
|
100
|
+
load_csv(package.data_dir.join(file), into: name, schema: schema)
|
101
|
+
remaining.delete(file)
|
102
|
+
|
103
|
+
puts '[ OK ] '.green + file.to_s.yellow + ", #{remaining_message(remaining)}"
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
threads.each(&:join)
|
110
|
+
end
|
111
|
+
|
112
|
+
# @param [Array<Package::Mixin::Nameable>] file_sets
|
113
|
+
# @param [Array<String>, nil] only
|
114
|
+
# @param [Array<String>] without
|
115
|
+
# @return [Corindon::Result::Result]
|
116
|
+
def filter_file_sets(file_sets, only:, without:)
|
117
|
+
sets = if only.nil?
|
118
|
+
file_sets
|
119
|
+
else
|
120
|
+
sets_by_name = file_sets.map { |fs| [fs.name, fs] }.to_h
|
121
|
+
|
122
|
+
missing, present = only.map { |name| [name, sets_by_name.fetch(name, nil)] }
|
123
|
+
.partition { |(_name, value)| value.nil? }
|
124
|
+
|
125
|
+
if missing.any?
|
126
|
+
puts "[ NOK ] Datasources #{missing.map(&:first).join(', ')} don't exist!"
|
127
|
+
|
128
|
+
return Failure(StandardError.new("Datasources #{missing.map(&:first).join(', ')} don't exist!"))
|
129
|
+
end
|
130
|
+
|
131
|
+
present.map(&:second)
|
132
|
+
.select(&:data_path)
|
133
|
+
end
|
134
|
+
|
135
|
+
Success(sets.reject { |fs| without.include?(fs.name) })
|
136
|
+
end
|
59
137
|
|
60
138
|
# @return [Database::Factory]
|
61
139
|
attr_reader :database_factory
|
@@ -65,44 +143,25 @@ module Lexicon
|
|
65
143
|
attr_reader :file_loader
|
66
144
|
# @return [String]
|
67
145
|
attr_reader :database_url
|
146
|
+
# @return [TableLocker]
|
147
|
+
attr_reader :table_locker
|
148
|
+
# @return [Psql]
|
149
|
+
attr_reader :psql
|
68
150
|
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
151
|
+
# @param [Pathname] file
|
152
|
+
# @param [String] into
|
153
|
+
# @param [String] schema
|
154
|
+
def load_csv(file, into:, schema:)
|
155
|
+
psql.execute_raw(<<~SQL)
|
156
|
+
\\copy "#{schema}"."#{into}" FROM PROGRAM 'zcat #{file}' WITH csv
|
157
|
+
SQL
|
76
158
|
end
|
77
159
|
|
78
|
-
|
79
|
-
def lock_tables(package)
|
160
|
+
def load_structure_files(files, schema:, dir:)
|
80
161
|
database = database_factory.new_instance(url: database_url)
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
database.prepend_search_path schema do
|
85
|
-
database.query <<~SQL
|
86
|
-
CREATE OR REPLACE FUNCTION #{schema}.deny_changes()
|
87
|
-
RETURNS TRIGGER
|
88
|
-
AS $$
|
89
|
-
BEGIN
|
90
|
-
RAISE EXCEPTION '% denied on % (master data)', TG_OP, TG_RELNAME;
|
91
|
-
END;
|
92
|
-
$$
|
93
|
-
LANGUAGE plpgsql;
|
94
|
-
SQL
|
95
|
-
package.file_sets.flat_map(&:tables).each do |table_name|
|
96
|
-
database.query <<~SQL
|
97
|
-
CREATE TRIGGER deny_changes
|
98
|
-
BEFORE INSERT
|
99
|
-
OR UPDATE
|
100
|
-
OR DELETE
|
101
|
-
OR TRUNCATE
|
102
|
-
ON #{schema}.#{table_name}
|
103
|
-
FOR EACH STATEMENT
|
104
|
-
EXECUTE PROCEDURE #{schema}.deny_changes()
|
105
|
-
SQL
|
162
|
+
database.prepend_search_path(schema) do
|
163
|
+
files.each do |file|
|
164
|
+
database.query(dir.join(file).read)
|
106
165
|
end
|
107
166
|
end
|
108
167
|
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Lexicon
|
4
|
+
module Common
|
5
|
+
module Production
|
6
|
+
class TableLocker
|
7
|
+
include Mixin::SchemaNamer
|
8
|
+
|
9
|
+
# @param [Database::Factory] database_factory
|
10
|
+
# @param [String] database_url
|
11
|
+
def initialize(database_factory:, database_url:)
|
12
|
+
@database_factory = database_factory
|
13
|
+
@database_url = database_url
|
14
|
+
end
|
15
|
+
|
16
|
+
# @param [Package::Package] package
|
17
|
+
# @param [Array<String>] tables
|
18
|
+
def lock_tables(package:, tables: [])
|
19
|
+
database = database_factory.new_instance(url: database_url)
|
20
|
+
|
21
|
+
schema = version_to_schema(package.version)
|
22
|
+
|
23
|
+
database.prepend_search_path schema do
|
24
|
+
database.query <<~SQL
|
25
|
+
CREATE OR REPLACE FUNCTION #{schema}.deny_changes()
|
26
|
+
RETURNS TRIGGER
|
27
|
+
AS $$
|
28
|
+
BEGIN
|
29
|
+
RAISE EXCEPTION '% denied on % (master data)', TG_OP, TG_RELNAME;
|
30
|
+
END;
|
31
|
+
$$
|
32
|
+
LANGUAGE plpgsql;
|
33
|
+
SQL
|
34
|
+
tables.each do |table_name|
|
35
|
+
database.query <<~SQL
|
36
|
+
CREATE TRIGGER deny_changes
|
37
|
+
BEFORE INSERT
|
38
|
+
OR UPDATE
|
39
|
+
OR DELETE
|
40
|
+
OR TRUNCATE
|
41
|
+
ON #{schema}.#{table_name}
|
42
|
+
FOR EACH STATEMENT
|
43
|
+
EXECUTE PROCEDURE #{schema}.deny_changes()
|
44
|
+
SQL
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
private
|
50
|
+
|
51
|
+
# @return [Database::Factory]
|
52
|
+
attr_reader :database_factory
|
53
|
+
# @return [String]
|
54
|
+
attr_reader :database_url
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|