lexicon-common 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lexicon-common.gemspec +2 -0
- data/lib/lexicon-common.rb +3 -0
- data/lib/lexicon/common.rb +1 -0
- data/lib/lexicon/common/mixin/logger_aware.rb +2 -2
- data/lib/lexicon/common/mixin/nameable.rb +16 -0
- data/lib/lexicon/common/package/directory_package_loader.rb +56 -12
- data/lib/lexicon/common/package/package.rb +11 -43
- data/lib/lexicon/common/package/package_file.rb +50 -0
- data/lib/lexicon/common/package/v1/package.rb +83 -0
- data/lib/lexicon/common/package/v1/package_builder.rb +70 -0
- data/lib/lexicon/common/package/v1/source_file_set.rb +28 -0
- data/lib/lexicon/common/package/v2/package.rb +56 -0
- data/lib/lexicon/common/package/v2/package_builder.rb +72 -0
- data/lib/lexicon/common/package/v2/source_file_set.rb +26 -0
- data/lib/lexicon/common/production/datasource_loader.rb +119 -60
- data/lib/lexicon/common/production/table_locker.rb +58 -0
- data/lib/lexicon/common/psql.rb +47 -0
- data/lib/lexicon/common/remote/package_downloader.rb +61 -43
- data/lib/lexicon/common/remote/package_uploader.rb +37 -28
- data/lib/lexicon/common/remote/s3_client.rb +6 -0
- data/lib/lexicon/common/schema/validator_factory.rb +1 -1
- data/lib/lexicon/common/shell_executor.rb +0 -3
- data/lib/lexicon/common/version.rb +1 -1
- data/resources/lexicon.schema.json +116 -31
- metadata +39 -3
- data/lib/lexicon/common/package/package_builder.rb +0 -68
- data/lib/lexicon/common/package/source_file_set.rb +0 -24
@@ -0,0 +1,28 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Lexicon
|
4
|
+
module Common
|
5
|
+
module Package
|
6
|
+
module V1
|
7
|
+
class SourceFileSet
|
8
|
+
include Mixin::Nameable
|
9
|
+
|
10
|
+
attr_reader :id, :name, :structure_path, :data_path, :tables
|
11
|
+
|
12
|
+
# @param [String] id
|
13
|
+
# @param [String] name
|
14
|
+
# @param [String] structure
|
15
|
+
# @param [String] data
|
16
|
+
# @param [Array<String>] tables
|
17
|
+
def initialize(id:, name:, structure:, data:, tables:)
|
18
|
+
@id = id
|
19
|
+
@name = name
|
20
|
+
@structure_path = structure
|
21
|
+
@data_path = data
|
22
|
+
@tables = tables
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Lexicon
|
4
|
+
module Common
|
5
|
+
module Package
|
6
|
+
module V2
|
7
|
+
class Package < Common::Package::Package
|
8
|
+
# @param [Pathname] dir
|
9
|
+
# @param [Pathname] checksum_file
|
10
|
+
# @param [Semantic::Version] version
|
11
|
+
def initialize(version:, spec_file:, checksum_file:, dir:, file_sets:)
|
12
|
+
super(
|
13
|
+
checksum_file: checksum_file,
|
14
|
+
dir: dir,
|
15
|
+
spec_file: spec_file,
|
16
|
+
schema_version: 2,
|
17
|
+
version: version,
|
18
|
+
)
|
19
|
+
|
20
|
+
@file_sets = file_sets
|
21
|
+
end
|
22
|
+
|
23
|
+
def valid?
|
24
|
+
super
|
25
|
+
end
|
26
|
+
|
27
|
+
def files
|
28
|
+
file_sets.flat_map { |fs| file_set_files(fs) }
|
29
|
+
end
|
30
|
+
|
31
|
+
# @return [SourceFileSet]
|
32
|
+
attr_reader :file_sets
|
33
|
+
|
34
|
+
def data_dir
|
35
|
+
dir.join('data')
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
# @param [SourceFileSet] file_set
|
41
|
+
# @return [Array<PackageFile>]
|
42
|
+
def file_set_files(file_set)
|
43
|
+
relative_data_dir = data_dir.basename
|
44
|
+
|
45
|
+
structure_file = PackageFile.new_structure(relative_data_dir.join(file_set.structure))
|
46
|
+
table_files = file_set.tables
|
47
|
+
.values.flatten(1)
|
48
|
+
.map { |table_file| PackageFile.new_data(relative_data_dir.join(table_file)) }
|
49
|
+
|
50
|
+
[structure_file, *table_files]
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,72 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Lexicon
|
4
|
+
module Common
|
5
|
+
module Package
|
6
|
+
module V2
|
7
|
+
class PackageBuilder < Package
|
8
|
+
def initialize(version:, dir:)
|
9
|
+
super(
|
10
|
+
file_sets: [],
|
11
|
+
version: version,
|
12
|
+
dir: dir,
|
13
|
+
checksum_file: dir.join(CHECKSUM_FILE_NAME),
|
14
|
+
spec_file: dir.join(SPEC_FILE_NAME),
|
15
|
+
)
|
16
|
+
|
17
|
+
FileUtils.mkdir_p(data_dir)
|
18
|
+
end
|
19
|
+
|
20
|
+
# @param [String] id
|
21
|
+
# @param [String] name
|
22
|
+
# @param [Pathname] structure
|
23
|
+
# Takes ownership of the file (moves it to the correct folder)
|
24
|
+
# @param [Hash{String=>Array<Pathname>}] tables
|
25
|
+
# Takes ownership of the files (moves them to the correct folder)
|
26
|
+
def add_file_set(id, name:, structure:, tables:)
|
27
|
+
# @type [Pathname] structure_file_path
|
28
|
+
structure_file_path = data_dir.join(structure_file_name(id))
|
29
|
+
FileUtils.mv(structure.to_s, structure_file_path.to_s)
|
30
|
+
|
31
|
+
table_data = tables.map do |table_name, files|
|
32
|
+
index = 0
|
33
|
+
|
34
|
+
file_names = files.map do |file|
|
35
|
+
file_name = "#{table_name}_#{index}.csv.gz"
|
36
|
+
FileUtils.mv(file.to_s, data_dir.join(file_name))
|
37
|
+
index += 1
|
38
|
+
|
39
|
+
file_name
|
40
|
+
end
|
41
|
+
|
42
|
+
[table_name, file_names]
|
43
|
+
end
|
44
|
+
|
45
|
+
file_sets << SourceFileSet.new(
|
46
|
+
id: id,
|
47
|
+
name: name,
|
48
|
+
structure: structure_file_name(id),
|
49
|
+
tables: table_data.to_h
|
50
|
+
)
|
51
|
+
end
|
52
|
+
|
53
|
+
def as_package
|
54
|
+
Package.new(
|
55
|
+
checksum_file: checksum_file,
|
56
|
+
dir: dir,
|
57
|
+
file_sets: file_sets,
|
58
|
+
spec_file: spec_file,
|
59
|
+
version: version,
|
60
|
+
)
|
61
|
+
end
|
62
|
+
|
63
|
+
private
|
64
|
+
|
65
|
+
def structure_file_name(id)
|
66
|
+
"#{id}__structure.sql"
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Lexicon
|
4
|
+
module Common
|
5
|
+
module Package
|
6
|
+
module V2
|
7
|
+
class SourceFileSet
|
8
|
+
include Mixin::Nameable
|
9
|
+
|
10
|
+
attr_reader :id, :name, :structure, :tables
|
11
|
+
|
12
|
+
# @param [String] id
|
13
|
+
# @param [String] name
|
14
|
+
# @param [String] structure
|
15
|
+
# @param [Hash{String=>Array<String>}] tables
|
16
|
+
def initialize(id:, name:, structure:, tables:)
|
17
|
+
@id = id
|
18
|
+
@name = name
|
19
|
+
@structure = structure
|
20
|
+
@tables = tables.freeze
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -1,61 +1,139 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
using Corindon::Result::Ext
|
4
|
+
|
3
5
|
module Lexicon
|
4
6
|
module Common
|
5
7
|
module Production
|
6
8
|
class DatasourceLoader
|
9
|
+
include Mixin::LoggerAware
|
7
10
|
include Mixin::SchemaNamer
|
11
|
+
|
8
12
|
# @param [ShellExecutor] shell
|
9
13
|
# @param [Database::Factory] database_factory
|
10
14
|
# @param [FileLoader] file_loader
|
11
15
|
# @param [String] database_url
|
12
|
-
|
16
|
+
# @param [TableLocker] table_locker
|
17
|
+
# @param [Psql] psql
|
18
|
+
def initialize(shell:, database_factory:, file_loader:, database_url:, table_locker:, psql:)
|
13
19
|
@shell = shell
|
14
20
|
@database_factory = database_factory
|
15
21
|
@file_loader = file_loader
|
16
22
|
@database_url = database_url
|
23
|
+
@table_locker = table_locker
|
24
|
+
@psql = psql
|
17
25
|
end
|
18
26
|
|
19
27
|
# @param [Package::Package] package
|
20
28
|
# @param [Array<String>, nil] only
|
21
|
-
# @param [Array<String>] without
|
22
29
|
# If nil, all datasets are loaded.
|
23
30
|
# If present, only listed datasets are loaded.
|
24
31
|
# Structures are ALWAYS loaded
|
32
|
+
# @param [Array<String>] without
|
25
33
|
def load_package(package, only: nil, without: [])
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
34
|
+
case package.schema_version
|
35
|
+
when 1
|
36
|
+
load_v1(package, only: only, without: without)
|
37
|
+
when 2
|
38
|
+
load_v2(package, only: only, without: without)
|
39
|
+
else
|
40
|
+
log("Schema version #{package.schema_version} is not supported")
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
private
|
30
45
|
|
31
|
-
|
32
|
-
|
46
|
+
# @param [Package::V1::Package] package
|
47
|
+
def load_v1(package, only: nil, without: [])
|
48
|
+
file_sets = filter_file_sets(package.file_sets, only: only, without: without)
|
49
|
+
.unwrap!
|
50
|
+
.select(&:data_path)
|
33
51
|
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
52
|
+
load_structure_files(
|
53
|
+
package.files.select(&:structure?).map(&:path),
|
54
|
+
schema: version_to_schema(package.version),
|
55
|
+
dir: package.dir
|
56
|
+
)
|
38
57
|
|
39
|
-
|
40
|
-
.select(&:data_path)
|
41
|
-
end
|
58
|
+
remaining = ::Concurrent::Set.new(file_sets.map(&:name))
|
42
59
|
|
43
|
-
|
60
|
+
file_sets.map do |fs|
|
61
|
+
Thread.new do
|
62
|
+
file_loader.load_file(package.data_path(fs))
|
63
|
+
remaining.delete(fs.name)
|
64
|
+
|
65
|
+
puts '[ OK ] '.green + fs.name.yellow + ", #{remaining_message(remaining)}"
|
66
|
+
end
|
67
|
+
end.each(&:join)
|
44
68
|
|
45
|
-
|
69
|
+
table_locker.lock_tables(package: package, tables: package.file_sets.flat_map(&:tables))
|
70
|
+
end
|
46
71
|
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
72
|
+
def remaining_message(remaining)
|
73
|
+
if remaining.size.zero?
|
74
|
+
'All done!'
|
75
|
+
elsif remaining.size > 5
|
76
|
+
"#{remaining.size} remaining"
|
77
|
+
else
|
78
|
+
"Remaining: #{remaining.to_a.sort.join(', ')}"
|
52
79
|
end
|
53
|
-
end
|
80
|
+
end
|
54
81
|
|
55
|
-
|
56
|
-
|
82
|
+
# @param [Package::V2::Package] package
|
83
|
+
# @param [Array<String>, nil] only
|
84
|
+
# @param [Array<String>] without
|
85
|
+
def load_v2(package, only: nil, without: [])
|
86
|
+
file_sets = filter_file_sets(package.file_sets, only: only, without: without)
|
87
|
+
.unwrap!
|
88
|
+
.select { |fs| fs.tables.any? }
|
57
89
|
|
58
|
-
|
90
|
+
schema = version_to_schema(package.version)
|
91
|
+
|
92
|
+
load_structure_files(package.files.select(&:structure?).map(&:path), schema: schema, dir: package.dir)
|
93
|
+
|
94
|
+
remaining = ::Concurrent::Set.new(file_sets.flat_map{|fs| fs.tables.values.flatten(1) })
|
95
|
+
|
96
|
+
threads = file_sets.flat_map do |fs|
|
97
|
+
fs.tables.flat_map do |name, files|
|
98
|
+
files.map do |file|
|
99
|
+
Thread.new do
|
100
|
+
load_csv(package.data_dir.join(file), into: name, schema: schema)
|
101
|
+
remaining.delete(file)
|
102
|
+
|
103
|
+
puts '[ OK ] '.green + file.to_s.yellow + ", #{remaining_message(remaining)}"
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
threads.each(&:join)
|
110
|
+
end
|
111
|
+
|
112
|
+
# @param [Array<Package::Mixin::Nameable>] file_sets
|
113
|
+
# @param [Array<String>, nil] only
|
114
|
+
# @param [Array<String>] without
|
115
|
+
# @return [Corindon::Result::Result]
|
116
|
+
def filter_file_sets(file_sets, only:, without:)
|
117
|
+
sets = if only.nil?
|
118
|
+
file_sets
|
119
|
+
else
|
120
|
+
sets_by_name = file_sets.map { |fs| [fs.name, fs] }.to_h
|
121
|
+
|
122
|
+
missing, present = only.map { |name| [name, sets_by_name.fetch(name, nil)] }
|
123
|
+
.partition { |(_name, value)| value.nil? }
|
124
|
+
|
125
|
+
if missing.any?
|
126
|
+
puts "[ NOK ] Datasources #{missing.map(&:first).join(', ')} don't exist!"
|
127
|
+
|
128
|
+
return Failure(StandardError.new("Datasources #{missing.map(&:first).join(', ')} don't exist!"))
|
129
|
+
end
|
130
|
+
|
131
|
+
present.map(&:second)
|
132
|
+
.select(&:data_path)
|
133
|
+
end
|
134
|
+
|
135
|
+
Success(sets.reject { |fs| without.include?(fs.name) })
|
136
|
+
end
|
59
137
|
|
60
138
|
# @return [Database::Factory]
|
61
139
|
attr_reader :database_factory
|
@@ -65,44 +143,25 @@ module Lexicon
|
|
65
143
|
attr_reader :file_loader
|
66
144
|
# @return [String]
|
67
145
|
attr_reader :database_url
|
146
|
+
# @return [TableLocker]
|
147
|
+
attr_reader :table_locker
|
148
|
+
# @return [Psql]
|
149
|
+
attr_reader :psql
|
68
150
|
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
151
|
+
# @param [Pathname] file
|
152
|
+
# @param [String] into
|
153
|
+
# @param [String] schema
|
154
|
+
def load_csv(file, into:, schema:)
|
155
|
+
psql.execute_raw(<<~SQL)
|
156
|
+
\\copy "#{schema}"."#{into}" FROM PROGRAM 'zcat #{file}' WITH csv
|
157
|
+
SQL
|
76
158
|
end
|
77
159
|
|
78
|
-
|
79
|
-
def lock_tables(package)
|
160
|
+
def load_structure_files(files, schema:, dir:)
|
80
161
|
database = database_factory.new_instance(url: database_url)
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
database.prepend_search_path schema do
|
85
|
-
database.query <<~SQL
|
86
|
-
CREATE OR REPLACE FUNCTION #{schema}.deny_changes()
|
87
|
-
RETURNS TRIGGER
|
88
|
-
AS $$
|
89
|
-
BEGIN
|
90
|
-
RAISE EXCEPTION '% denied on % (master data)', TG_OP, TG_RELNAME;
|
91
|
-
END;
|
92
|
-
$$
|
93
|
-
LANGUAGE plpgsql;
|
94
|
-
SQL
|
95
|
-
package.file_sets.flat_map(&:tables).each do |table_name|
|
96
|
-
database.query <<~SQL
|
97
|
-
CREATE TRIGGER deny_changes
|
98
|
-
BEFORE INSERT
|
99
|
-
OR UPDATE
|
100
|
-
OR DELETE
|
101
|
-
OR TRUNCATE
|
102
|
-
ON #{schema}.#{table_name}
|
103
|
-
FOR EACH STATEMENT
|
104
|
-
EXECUTE PROCEDURE #{schema}.deny_changes()
|
105
|
-
SQL
|
162
|
+
database.prepend_search_path(schema) do
|
163
|
+
files.each do |file|
|
164
|
+
database.query(dir.join(file).read)
|
106
165
|
end
|
107
166
|
end
|
108
167
|
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Lexicon
|
4
|
+
module Common
|
5
|
+
module Production
|
6
|
+
class TableLocker
|
7
|
+
include Mixin::SchemaNamer
|
8
|
+
|
9
|
+
# @param [Database::Factory] database_factory
|
10
|
+
# @param [String] database_url
|
11
|
+
def initialize(database_factory:, database_url:)
|
12
|
+
@database_factory = database_factory
|
13
|
+
@database_url = database_url
|
14
|
+
end
|
15
|
+
|
16
|
+
# @param [Package::Package] package
|
17
|
+
# @param [Array<String>] tables
|
18
|
+
def lock_tables(package:, tables: [])
|
19
|
+
database = database_factory.new_instance(url: database_url)
|
20
|
+
|
21
|
+
schema = version_to_schema(package.version)
|
22
|
+
|
23
|
+
database.prepend_search_path schema do
|
24
|
+
database.query <<~SQL
|
25
|
+
CREATE OR REPLACE FUNCTION #{schema}.deny_changes()
|
26
|
+
RETURNS TRIGGER
|
27
|
+
AS $$
|
28
|
+
BEGIN
|
29
|
+
RAISE EXCEPTION '% denied on % (master data)', TG_OP, TG_RELNAME;
|
30
|
+
END;
|
31
|
+
$$
|
32
|
+
LANGUAGE plpgsql;
|
33
|
+
SQL
|
34
|
+
tables.each do |table_name|
|
35
|
+
database.query <<~SQL
|
36
|
+
CREATE TRIGGER deny_changes
|
37
|
+
BEFORE INSERT
|
38
|
+
OR UPDATE
|
39
|
+
OR DELETE
|
40
|
+
OR TRUNCATE
|
41
|
+
ON #{schema}.#{table_name}
|
42
|
+
FOR EACH STATEMENT
|
43
|
+
EXECUTE PROCEDURE #{schema}.deny_changes()
|
44
|
+
SQL
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
private
|
50
|
+
|
51
|
+
# @return [Database::Factory]
|
52
|
+
attr_reader :database_factory
|
53
|
+
# @return [String]
|
54
|
+
attr_reader :database_url
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|