lexicon-common 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Lexicon
4
+ module Common
5
+ module Package
6
+ module V1
7
+ class SourceFileSet
8
+ include Mixin::Nameable
9
+
10
+ attr_reader :id, :name, :structure_path, :data_path, :tables
11
+
12
+ # @param [String] id
13
+ # @param [String] name
14
+ # @param [String] structure
15
+ # @param [String] data
16
+ # @param [Array<String>] tables
17
+ def initialize(id:, name:, structure:, data:, tables:)
18
+ @id = id
19
+ @name = name
20
+ @structure_path = structure
21
+ @data_path = data
22
+ @tables = tables
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,56 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Lexicon
4
+ module Common
5
+ module Package
6
+ module V2
7
+ class Package < Common::Package::Package
8
+ # @param [Pathname] dir
9
+ # @param [Pathname] checksum_file
10
+ # @param [Semantic::Version] version
11
+ def initialize(version:, spec_file:, checksum_file:, dir:, file_sets:)
12
+ super(
13
+ checksum_file: checksum_file,
14
+ dir: dir,
15
+ spec_file: spec_file,
16
+ schema_version: 2,
17
+ version: version,
18
+ )
19
+
20
+ @file_sets = file_sets
21
+ end
22
+
23
+ def valid?
24
+ super
25
+ end
26
+
27
+ def files
28
+ file_sets.flat_map { |fs| file_set_files(fs) }
29
+ end
30
+
31
+ # @return [SourceFileSet]
32
+ attr_reader :file_sets
33
+
34
+ def data_dir
35
+ dir.join('data')
36
+ end
37
+
38
+ private
39
+
40
+ # @param [SourceFileSet] file_set
41
+ # @return [Array<PackageFile>]
42
+ def file_set_files(file_set)
43
+ relative_data_dir = data_dir.basename
44
+
45
+ structure_file = PackageFile.new_structure(relative_data_dir.join(file_set.structure))
46
+ table_files = file_set.tables
47
+ .values.flatten(1)
48
+ .map { |table_file| PackageFile.new_data(relative_data_dir.join(table_file)) }
49
+
50
+ [structure_file, *table_files]
51
+ end
52
+ end
53
+ end
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,72 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Lexicon
4
+ module Common
5
+ module Package
6
+ module V2
7
+ class PackageBuilder < Package
8
+ def initialize(version:, dir:)
9
+ super(
10
+ file_sets: [],
11
+ version: version,
12
+ dir: dir,
13
+ checksum_file: dir.join(CHECKSUM_FILE_NAME),
14
+ spec_file: dir.join(SPEC_FILE_NAME),
15
+ )
16
+
17
+ FileUtils.mkdir_p(data_dir)
18
+ end
19
+
20
+ # @param [String] id
21
+ # @param [String] name
22
+ # @param [Pathname] structure
23
+ # Takes ownership of the file (moves it to the correct folder)
24
+ # @param [Hash{String=>Array<Pathname>}] tables
25
+ # Takes ownership of the files (moves them to the correct folder)
26
+ def add_file_set(id, name:, structure:, tables:)
27
+ # @type [Pathname] structure_file_path
28
+ structure_file_path = data_dir.join(structure_file_name(id))
29
+ FileUtils.mv(structure.to_s, structure_file_path.to_s)
30
+
31
+ table_data = tables.map do |table_name, files|
32
+ index = 0
33
+
34
+ file_names = files.map do |file|
35
+ file_name = "#{table_name}_#{index}.csv.gz"
36
+ FileUtils.mv(file.to_s, data_dir.join(file_name))
37
+ index += 1
38
+
39
+ file_name
40
+ end
41
+
42
+ [table_name, file_names]
43
+ end
44
+
45
+ file_sets << SourceFileSet.new(
46
+ id: id,
47
+ name: name,
48
+ structure: structure_file_name(id),
49
+ tables: table_data.to_h
50
+ )
51
+ end
52
+
53
+ def as_package
54
+ Package.new(
55
+ checksum_file: checksum_file,
56
+ dir: dir,
57
+ file_sets: file_sets,
58
+ spec_file: spec_file,
59
+ version: version,
60
+ )
61
+ end
62
+
63
+ private
64
+
65
+ def structure_file_name(id)
66
+ "#{id}__structure.sql"
67
+ end
68
+ end
69
+ end
70
+ end
71
+ end
72
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Lexicon
4
+ module Common
5
+ module Package
6
+ module V2
7
+ class SourceFileSet
8
+ include Mixin::Nameable
9
+
10
+ attr_reader :id, :name, :structure, :tables
11
+
12
+ # @param [String] id
13
+ # @param [String] name
14
+ # @param [String] structure
15
+ # @param [Hash{String=>Array<String>}] tables
16
+ def initialize(id:, name:, structure:, tables:)
17
+ @id = id
18
+ @name = name
19
+ @structure = structure
20
+ @tables = tables.freeze
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
@@ -1,61 +1,139 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ using Corindon::Result::Ext
4
+
3
5
  module Lexicon
4
6
  module Common
5
7
  module Production
6
8
  class DatasourceLoader
9
+ include Mixin::LoggerAware
7
10
  include Mixin::SchemaNamer
11
+
8
12
  # @param [ShellExecutor] shell
9
13
  # @param [Database::Factory] database_factory
10
14
  # @param [FileLoader] file_loader
11
15
  # @param [String] database_url
12
- def initialize(shell:, database_factory:, file_loader:, database_url:)
16
+ # @param [TableLocker] table_locker
17
+ # @param [Psql] psql
18
+ def initialize(shell:, database_factory:, file_loader:, database_url:, table_locker:, psql:)
13
19
  @shell = shell
14
20
  @database_factory = database_factory
15
21
  @file_loader = file_loader
16
22
  @database_url = database_url
23
+ @table_locker = table_locker
24
+ @psql = psql
17
25
  end
18
26
 
19
27
  # @param [Package::Package] package
20
28
  # @param [Array<String>, nil] only
21
- # @param [Array<String>] without
22
29
  # If nil, all datasets are loaded.
23
30
  # If present, only listed datasets are loaded.
24
31
  # Structures are ALWAYS loaded
32
+ # @param [Array<String>] without
25
33
  def load_package(package, only: nil, without: [])
26
- file_sets = if only.nil?
27
- package.file_sets.select(&:data_path)
28
- else
29
- sets_by_name = package.file_sets.map { |fs| [fs.name, fs] }.to_h
34
+ case package.schema_version
35
+ when 1
36
+ load_v1(package, only: only, without: without)
37
+ when 2
38
+ load_v2(package, only: only, without: without)
39
+ else
40
+ log("Schema version #{package.schema_version} is not supported")
41
+ end
42
+ end
43
+
44
+ private
30
45
 
31
- missing, present = only.map { |name| [name, sets_by_name.fetch(name, nil)] }
32
- .partition { |(_name, value)| value.nil? }
46
+ # @param [Package::V1::Package] package
47
+ def load_v1(package, only: nil, without: [])
48
+ file_sets = filter_file_sets(package.file_sets, only: only, without: without)
49
+ .unwrap!
50
+ .select(&:data_path)
33
51
 
34
- if missing.any?
35
- puts "[ NOK ] Datasources #{missing.map(&:first).join(', ')} don't exist!"
36
- return
37
- end
52
+ load_structure_files(
53
+ package.files.select(&:structure?).map(&:path),
54
+ schema: version_to_schema(package.version),
55
+ dir: package.dir
56
+ )
38
57
 
39
- present.map(&:second)
40
- .select(&:data_path)
41
- end
58
+ remaining = ::Concurrent::Set.new(file_sets.map(&:name))
42
59
 
43
- file_sets = file_sets.reject { |fs| without.include?(fs.name) }
60
+ file_sets.map do |fs|
61
+ Thread.new do
62
+ file_loader.load_file(package.data_path(fs))
63
+ remaining.delete(fs.name)
64
+
65
+ puts '[ OK ] '.green + fs.name.yellow + ", #{remaining_message(remaining)}"
66
+ end
67
+ end.each(&:join)
44
68
 
45
- load_structure_files(package.structure_files, schema: version_to_schema(package.version))
69
+ table_locker.lock_tables(package: package, tables: package.file_sets.flat_map(&:tables))
70
+ end
46
71
 
47
- file_sets.map do |fs|
48
- Thread.new do
49
- puts "Loading #{fs.name}"
50
- file_loader.load_file(package.data_path(fs))
51
- puts '[ OK ] '.green + fs.name.yellow
72
+ def remaining_message(remaining)
73
+ if remaining.size.zero?
74
+ 'All done!'
75
+ elsif remaining.size > 5
76
+ "#{remaining.size} remaining"
77
+ else
78
+ "Remaining: #{remaining.to_a.sort.join(', ')}"
52
79
  end
53
- end.each(&:join)
80
+ end
54
81
 
55
- lock_tables(package)
56
- end
82
+ # @param [Package::V2::Package] package
83
+ # @param [Array<String>, nil] only
84
+ # @param [Array<String>] without
85
+ def load_v2(package, only: nil, without: [])
86
+ file_sets = filter_file_sets(package.file_sets, only: only, without: without)
87
+ .unwrap!
88
+ .select { |fs| fs.tables.any? }
57
89
 
58
- private
90
+ schema = version_to_schema(package.version)
91
+
92
+ load_structure_files(package.files.select(&:structure?).map(&:path), schema: schema, dir: package.dir)
93
+
94
+ remaining = ::Concurrent::Set.new(file_sets.flat_map{|fs| fs.tables.values.flatten(1) })
95
+
96
+ threads = file_sets.flat_map do |fs|
97
+ fs.tables.flat_map do |name, files|
98
+ files.map do |file|
99
+ Thread.new do
100
+ load_csv(package.data_dir.join(file), into: name, schema: schema)
101
+ remaining.delete(file)
102
+
103
+ puts '[ OK ] '.green + file.to_s.yellow + ", #{remaining_message(remaining)}"
104
+ end
105
+ end
106
+ end
107
+ end
108
+
109
+ threads.each(&:join)
110
+ end
111
+
112
+ # @param [Array<Package::Mixin::Nameable>] file_sets
113
+ # @param [Array<String>, nil] only
114
+ # @param [Array<String>] without
115
+ # @return [Corindon::Result::Result]
116
+ def filter_file_sets(file_sets, only:, without:)
117
+ sets = if only.nil?
118
+ file_sets
119
+ else
120
+ sets_by_name = file_sets.map { |fs| [fs.name, fs] }.to_h
121
+
122
+ missing, present = only.map { |name| [name, sets_by_name.fetch(name, nil)] }
123
+ .partition { |(_name, value)| value.nil? }
124
+
125
+ if missing.any?
126
+ puts "[ NOK ] Datasources #{missing.map(&:first).join(', ')} don't exist!"
127
+
128
+ return Failure(StandardError.new("Datasources #{missing.map(&:first).join(', ')} don't exist!"))
129
+ end
130
+
131
+ present.map(&:second)
132
+ .select(&:data_path)
133
+ end
134
+
135
+ Success(sets.reject { |fs| without.include?(fs.name) })
136
+ end
59
137
 
60
138
  # @return [Database::Factory]
61
139
  attr_reader :database_factory
@@ -65,44 +143,25 @@ module Lexicon
65
143
  attr_reader :file_loader
66
144
  # @return [String]
67
145
  attr_reader :database_url
146
+ # @return [TableLocker]
147
+ attr_reader :table_locker
148
+ # @return [Psql]
149
+ attr_reader :psql
68
150
 
69
- def load_structure_files(files, schema:)
70
- database = database_factory.new_instance(url: database_url)
71
- database.prepend_search_path(schema) do
72
- files.each do |file|
73
- database.query(file.read)
74
- end
75
- end
151
+ # @param [Pathname] file
152
+ # @param [String] into
153
+ # @param [String] schema
154
+ def load_csv(file, into:, schema:)
155
+ psql.execute_raw(<<~SQL)
156
+ \\copy "#{schema}"."#{into}" FROM PROGRAM 'zcat #{file}' WITH csv
157
+ SQL
76
158
  end
77
159
 
78
- # @param [Package::Package] package
79
- def lock_tables(package)
160
+ def load_structure_files(files, schema:, dir:)
80
161
  database = database_factory.new_instance(url: database_url)
81
-
82
- schema = version_to_schema(package.version)
83
-
84
- database.prepend_search_path schema do
85
- database.query <<~SQL
86
- CREATE OR REPLACE FUNCTION #{schema}.deny_changes()
87
- RETURNS TRIGGER
88
- AS $$
89
- BEGIN
90
- RAISE EXCEPTION '% denied on % (master data)', TG_OP, TG_RELNAME;
91
- END;
92
- $$
93
- LANGUAGE plpgsql;
94
- SQL
95
- package.file_sets.flat_map(&:tables).each do |table_name|
96
- database.query <<~SQL
97
- CREATE TRIGGER deny_changes
98
- BEFORE INSERT
99
- OR UPDATE
100
- OR DELETE
101
- OR TRUNCATE
102
- ON #{schema}.#{table_name}
103
- FOR EACH STATEMENT
104
- EXECUTE PROCEDURE #{schema}.deny_changes()
105
- SQL
162
+ database.prepend_search_path(schema) do
163
+ files.each do |file|
164
+ database.query(dir.join(file).read)
106
165
  end
107
166
  end
108
167
  end
@@ -0,0 +1,58 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Lexicon
4
+ module Common
5
+ module Production
6
+ class TableLocker
7
+ include Mixin::SchemaNamer
8
+
9
+ # @param [Database::Factory] database_factory
10
+ # @param [String] database_url
11
+ def initialize(database_factory:, database_url:)
12
+ @database_factory = database_factory
13
+ @database_url = database_url
14
+ end
15
+
16
+ # @param [Package::Package] package
17
+ # @param [Array<String>] tables
18
+ def lock_tables(package:, tables: [])
19
+ database = database_factory.new_instance(url: database_url)
20
+
21
+ schema = version_to_schema(package.version)
22
+
23
+ database.prepend_search_path schema do
24
+ database.query <<~SQL
25
+ CREATE OR REPLACE FUNCTION #{schema}.deny_changes()
26
+ RETURNS TRIGGER
27
+ AS $$
28
+ BEGIN
29
+ RAISE EXCEPTION '% denied on % (master data)', TG_OP, TG_RELNAME;
30
+ END;
31
+ $$
32
+ LANGUAGE plpgsql;
33
+ SQL
34
+ tables.each do |table_name|
35
+ database.query <<~SQL
36
+ CREATE TRIGGER deny_changes
37
+ BEFORE INSERT
38
+ OR UPDATE
39
+ OR DELETE
40
+ OR TRUNCATE
41
+ ON #{schema}.#{table_name}
42
+ FOR EACH STATEMENT
43
+ EXECUTE PROCEDURE #{schema}.deny_changes()
44
+ SQL
45
+ end
46
+ end
47
+ end
48
+
49
+ private
50
+
51
+ # @return [Database::Factory]
52
+ attr_reader :database_factory
53
+ # @return [String]
54
+ attr_reader :database_url
55
+ end
56
+ end
57
+ end
58
+ end