lexicon-common 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Lexicon
4
+ module Common
5
+ module Package
6
+ module V1
7
+ class SourceFileSet
8
+ include Mixin::Nameable
9
+
10
+ attr_reader :id, :name, :structure_path, :data_path, :tables
11
+
12
+ # @param [String] id
13
+ # @param [String] name
14
+ # @param [String] structure
15
+ # @param [String] data
16
+ # @param [Array<String>] tables
17
+ def initialize(id:, name:, structure:, data:, tables:)
18
+ @id = id
19
+ @name = name
20
+ @structure_path = structure
21
+ @data_path = data
22
+ @tables = tables
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,56 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Lexicon
4
+ module Common
5
+ module Package
6
+ module V2
7
+ class Package < Common::Package::Package
8
+ # @param [Pathname] dir
9
+ # @param [Pathname] checksum_file
10
+ # @param [Semantic::Version] version
11
+ def initialize(version:, spec_file:, checksum_file:, dir:, file_sets:)
12
+ super(
13
+ checksum_file: checksum_file,
14
+ dir: dir,
15
+ spec_file: spec_file,
16
+ schema_version: 2,
17
+ version: version,
18
+ )
19
+
20
+ @file_sets = file_sets
21
+ end
22
+
23
+ def valid?
24
+ super
25
+ end
26
+
27
+ def files
28
+ file_sets.flat_map { |fs| file_set_files(fs) }
29
+ end
30
+
31
+ # @return [SourceFileSet]
32
+ attr_reader :file_sets
33
+
34
+ def data_dir
35
+ dir.join('data')
36
+ end
37
+
38
+ private
39
+
40
+ # @param [SourceFileSet] file_set
41
+ # @return [Array<PackageFile>]
42
+ def file_set_files(file_set)
43
+ relative_data_dir = data_dir.basename
44
+
45
+ structure_file = PackageFile.new_structure(relative_data_dir.join(file_set.structure))
46
+ table_files = file_set.tables
47
+ .values.flatten(1)
48
+ .map { |table_file| PackageFile.new_data(relative_data_dir.join(table_file)) }
49
+
50
+ [structure_file, *table_files]
51
+ end
52
+ end
53
+ end
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,72 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Lexicon
4
+ module Common
5
+ module Package
6
+ module V2
7
+ class PackageBuilder < Package
8
+ def initialize(version:, dir:)
9
+ super(
10
+ file_sets: [],
11
+ version: version,
12
+ dir: dir,
13
+ checksum_file: dir.join(CHECKSUM_FILE_NAME),
14
+ spec_file: dir.join(SPEC_FILE_NAME),
15
+ )
16
+
17
+ FileUtils.mkdir_p(data_dir)
18
+ end
19
+
20
+ # @param [String] id
21
+ # @param [String] name
22
+ # @param [Pathname] structure
23
+ # Takes ownership of the file (moves it to the correct folder)
24
+ # @param [Hash{String=>Array<Pathname>}] tables
25
+ # Takes ownership of the files (moves them to the correct folder)
26
+ def add_file_set(id, name:, structure:, tables:)
27
+ # @type [Pathname] structure_file_path
28
+ structure_file_path = data_dir.join(structure_file_name(id))
29
+ FileUtils.mv(structure.to_s, structure_file_path.to_s)
30
+
31
+ table_data = tables.map do |table_name, files|
32
+ index = 0
33
+
34
+ file_names = files.map do |file|
35
+ file_name = "#{table_name}_#{index}.csv.gz"
36
+ FileUtils.mv(file.to_s, data_dir.join(file_name))
37
+ index += 1
38
+
39
+ file_name
40
+ end
41
+
42
+ [table_name, file_names]
43
+ end
44
+
45
+ file_sets << SourceFileSet.new(
46
+ id: id,
47
+ name: name,
48
+ structure: structure_file_name(id),
49
+ tables: table_data.to_h
50
+ )
51
+ end
52
+
53
+ def as_package
54
+ Package.new(
55
+ checksum_file: checksum_file,
56
+ dir: dir,
57
+ file_sets: file_sets,
58
+ spec_file: spec_file,
59
+ version: version,
60
+ )
61
+ end
62
+
63
+ private
64
+
65
+ def structure_file_name(id)
66
+ "#{id}__structure.sql"
67
+ end
68
+ end
69
+ end
70
+ end
71
+ end
72
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Lexicon
4
+ module Common
5
+ module Package
6
+ module V2
7
+ class SourceFileSet
8
+ include Mixin::Nameable
9
+
10
+ attr_reader :id, :name, :structure, :tables
11
+
12
+ # @param [String] id
13
+ # @param [String] name
14
+ # @param [String] structure
15
+ # @param [Hash{String=>Array<String>}] tables
16
+ def initialize(id:, name:, structure:, tables:)
17
+ @id = id
18
+ @name = name
19
+ @structure = structure
20
+ @tables = tables.freeze
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
@@ -1,61 +1,139 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ using Corindon::Result::Ext
4
+
3
5
  module Lexicon
4
6
  module Common
5
7
  module Production
6
8
  class DatasourceLoader
9
+ include Mixin::LoggerAware
7
10
  include Mixin::SchemaNamer
11
+
8
12
  # @param [ShellExecutor] shell
9
13
  # @param [Database::Factory] database_factory
10
14
  # @param [FileLoader] file_loader
11
15
  # @param [String] database_url
12
- def initialize(shell:, database_factory:, file_loader:, database_url:)
16
+ # @param [TableLocker] table_locker
17
+ # @param [Psql] psql
18
+ def initialize(shell:, database_factory:, file_loader:, database_url:, table_locker:, psql:)
13
19
  @shell = shell
14
20
  @database_factory = database_factory
15
21
  @file_loader = file_loader
16
22
  @database_url = database_url
23
+ @table_locker = table_locker
24
+ @psql = psql
17
25
  end
18
26
 
19
27
  # @param [Package::Package] package
20
28
  # @param [Array<String>, nil] only
21
- # @param [Array<String>] without
22
29
  # If nil, all datasets are loaded.
23
30
  # If present, only listed datasets are loaded.
24
31
  # Structures are ALWAYS loaded
32
+ # @param [Array<String>] without
25
33
  def load_package(package, only: nil, without: [])
26
- file_sets = if only.nil?
27
- package.file_sets.select(&:data_path)
28
- else
29
- sets_by_name = package.file_sets.map { |fs| [fs.name, fs] }.to_h
34
+ case package.schema_version
35
+ when 1
36
+ load_v1(package, only: only, without: without)
37
+ when 2
38
+ load_v2(package, only: only, without: without)
39
+ else
40
+ log("Schema version #{package.schema_version} is not supported")
41
+ end
42
+ end
43
+
44
+ private
30
45
 
31
- missing, present = only.map { |name| [name, sets_by_name.fetch(name, nil)] }
32
- .partition { |(_name, value)| value.nil? }
46
+ # @param [Package::V1::Package] package
47
+ def load_v1(package, only: nil, without: [])
48
+ file_sets = filter_file_sets(package.file_sets, only: only, without: without)
49
+ .unwrap!
50
+ .select(&:data_path)
33
51
 
34
- if missing.any?
35
- puts "[ NOK ] Datasources #{missing.map(&:first).join(', ')} don't exist!"
36
- return
37
- end
52
+ load_structure_files(
53
+ package.files.select(&:structure?).map(&:path),
54
+ schema: version_to_schema(package.version),
55
+ dir: package.dir
56
+ )
38
57
 
39
- present.map(&:second)
40
- .select(&:data_path)
41
- end
58
+ remaining = ::Concurrent::Set.new(file_sets.map(&:name))
42
59
 
43
- file_sets = file_sets.reject { |fs| without.include?(fs.name) }
60
+ file_sets.map do |fs|
61
+ Thread.new do
62
+ file_loader.load_file(package.data_path(fs))
63
+ remaining.delete(fs.name)
64
+
65
+ puts '[ OK ] '.green + fs.name.yellow + ", #{remaining_message(remaining)}"
66
+ end
67
+ end.each(&:join)
44
68
 
45
- load_structure_files(package.structure_files, schema: version_to_schema(package.version))
69
+ table_locker.lock_tables(package: package, tables: package.file_sets.flat_map(&:tables))
70
+ end
46
71
 
47
- file_sets.map do |fs|
48
- Thread.new do
49
- puts "Loading #{fs.name}"
50
- file_loader.load_file(package.data_path(fs))
51
- puts '[ OK ] '.green + fs.name.yellow
72
+ def remaining_message(remaining)
73
+ if remaining.size.zero?
74
+ 'All done!'
75
+ elsif remaining.size > 5
76
+ "#{remaining.size} remaining"
77
+ else
78
+ "Remaining: #{remaining.to_a.sort.join(', ')}"
52
79
  end
53
- end.each(&:join)
80
+ end
54
81
 
55
- lock_tables(package)
56
- end
82
+ # @param [Package::V2::Package] package
83
+ # @param [Array<String>, nil] only
84
+ # @param [Array<String>] without
85
+ def load_v2(package, only: nil, without: [])
86
+ file_sets = filter_file_sets(package.file_sets, only: only, without: without)
87
+ .unwrap!
88
+ .select { |fs| fs.tables.any? }
57
89
 
58
- private
90
+ schema = version_to_schema(package.version)
91
+
92
+ load_structure_files(package.files.select(&:structure?).map(&:path), schema: schema, dir: package.dir)
93
+
94
+ remaining = ::Concurrent::Set.new(file_sets.flat_map{|fs| fs.tables.values.flatten(1) })
95
+
96
+ threads = file_sets.flat_map do |fs|
97
+ fs.tables.flat_map do |name, files|
98
+ files.map do |file|
99
+ Thread.new do
100
+ load_csv(package.data_dir.join(file), into: name, schema: schema)
101
+ remaining.delete(file)
102
+
103
+ puts '[ OK ] '.green + file.to_s.yellow + ", #{remaining_message(remaining)}"
104
+ end
105
+ end
106
+ end
107
+ end
108
+
109
+ threads.each(&:join)
110
+ end
111
+
112
+ # @param [Array<Package::Mixin::Nameable>] file_sets
113
+ # @param [Array<String>, nil] only
114
+ # @param [Array<String>] without
115
+ # @return [Corindon::Result::Result]
116
+ def filter_file_sets(file_sets, only:, without:)
117
+ sets = if only.nil?
118
+ file_sets
119
+ else
120
+ sets_by_name = file_sets.map { |fs| [fs.name, fs] }.to_h
121
+
122
+ missing, present = only.map { |name| [name, sets_by_name.fetch(name, nil)] }
123
+ .partition { |(_name, value)| value.nil? }
124
+
125
+ if missing.any?
126
+ puts "[ NOK ] Datasources #{missing.map(&:first).join(', ')} don't exist!"
127
+
128
+ return Failure(StandardError.new("Datasources #{missing.map(&:first).join(', ')} don't exist!"))
129
+ end
130
+
131
+ present.map(&:second)
132
+ .select(&:data_path)
133
+ end
134
+
135
+ Success(sets.reject { |fs| without.include?(fs.name) })
136
+ end
59
137
 
60
138
  # @return [Database::Factory]
61
139
  attr_reader :database_factory
@@ -65,44 +143,25 @@ module Lexicon
65
143
  attr_reader :file_loader
66
144
  # @return [String]
67
145
  attr_reader :database_url
146
+ # @return [TableLocker]
147
+ attr_reader :table_locker
148
+ # @return [Psql]
149
+ attr_reader :psql
68
150
 
69
- def load_structure_files(files, schema:)
70
- database = database_factory.new_instance(url: database_url)
71
- database.prepend_search_path(schema) do
72
- files.each do |file|
73
- database.query(file.read)
74
- end
75
- end
151
+ # @param [Pathname] file
152
+ # @param [String] into
153
+ # @param [String] schema
154
+ def load_csv(file, into:, schema:)
155
+ psql.execute_raw(<<~SQL)
156
+ \\copy "#{schema}"."#{into}" FROM PROGRAM 'zcat #{file}' WITH csv
157
+ SQL
76
158
  end
77
159
 
78
- # @param [Package::Package] package
79
- def lock_tables(package)
160
+ def load_structure_files(files, schema:, dir:)
80
161
  database = database_factory.new_instance(url: database_url)
81
-
82
- schema = version_to_schema(package.version)
83
-
84
- database.prepend_search_path schema do
85
- database.query <<~SQL
86
- CREATE OR REPLACE FUNCTION #{schema}.deny_changes()
87
- RETURNS TRIGGER
88
- AS $$
89
- BEGIN
90
- RAISE EXCEPTION '% denied on % (master data)', TG_OP, TG_RELNAME;
91
- END;
92
- $$
93
- LANGUAGE plpgsql;
94
- SQL
95
- package.file_sets.flat_map(&:tables).each do |table_name|
96
- database.query <<~SQL
97
- CREATE TRIGGER deny_changes
98
- BEFORE INSERT
99
- OR UPDATE
100
- OR DELETE
101
- OR TRUNCATE
102
- ON #{schema}.#{table_name}
103
- FOR EACH STATEMENT
104
- EXECUTE PROCEDURE #{schema}.deny_changes()
105
- SQL
162
+ database.prepend_search_path(schema) do
163
+ files.each do |file|
164
+ database.query(dir.join(file).read)
106
165
  end
107
166
  end
108
167
  end
@@ -0,0 +1,58 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Lexicon
4
+ module Common
5
+ module Production
6
+ class TableLocker
7
+ include Mixin::SchemaNamer
8
+
9
+ # @param [Database::Factory] database_factory
10
+ # @param [String] database_url
11
+ def initialize(database_factory:, database_url:)
12
+ @database_factory = database_factory
13
+ @database_url = database_url
14
+ end
15
+
16
+ # @param [Package::Package] package
17
+ # @param [Array<String>] tables
18
+ def lock_tables(package:, tables: [])
19
+ database = database_factory.new_instance(url: database_url)
20
+
21
+ schema = version_to_schema(package.version)
22
+
23
+ database.prepend_search_path schema do
24
+ database.query <<~SQL
25
+ CREATE OR REPLACE FUNCTION #{schema}.deny_changes()
26
+ RETURNS TRIGGER
27
+ AS $$
28
+ BEGIN
29
+ RAISE EXCEPTION '% denied on % (master data)', TG_OP, TG_RELNAME;
30
+ END;
31
+ $$
32
+ LANGUAGE plpgsql;
33
+ SQL
34
+ tables.each do |table_name|
35
+ database.query <<~SQL
36
+ CREATE TRIGGER deny_changes
37
+ BEFORE INSERT
38
+ OR UPDATE
39
+ OR DELETE
40
+ OR TRUNCATE
41
+ ON #{schema}.#{table_name}
42
+ FOR EACH STATEMENT
43
+ EXECUTE PROCEDURE #{schema}.deny_changes()
44
+ SQL
45
+ end
46
+ end
47
+ end
48
+
49
+ private
50
+
51
+ # @return [Database::Factory]
52
+ attr_reader :database_factory
53
+ # @return [String]
54
+ attr_reader :database_url
55
+ end
56
+ end
57
+ end
58
+ end