leda 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,71 @@
1
+ require 'leda'
2
+
3
+ module Leda
4
+ ##
5
+ # Actually runs a dump or restore using the store info in a {{Configuration}}.
6
+ class Runner
7
+ attr_reader :current_env, :configuration
8
+
9
+ def initialize(current_env, configuration)
10
+ @current_env = current_env
11
+ @configuration = configuration
12
+ end
13
+
14
+ def directory(env, data_unit=nil, store=nil)
15
+ p = configuration.base_dir.join(env)
16
+ p = p.join(data_unit.name) if data_unit
17
+ p = p.join(store.name) if store
18
+ p
19
+ end
20
+
21
+ def relative_directory(env, data_unit=nil, store=nil)
22
+ directory(@current_env, data_unit, store).
23
+ relative_path_from(configuration.project_root_dir)
24
+ end
25
+
26
+ ##
27
+ # Performs dumps for the configured stores. Can optionally be limited to
28
+ # one data unit and/or store type.
29
+ def dump(data_unit_name=nil, store_name=nil)
30
+ each_data_unit_store(data_unit_name, store_name).each do |data_unit, store|
31
+ dir = directory(@current_env, data_unit, store)
32
+ dir.mkpath
33
+ store.dump(dir)
34
+ end
35
+ end
36
+
37
+ def dump_relative_paths(data_unit_name=nil, store_name=nil)
38
+ each_data_unit_store(data_unit_name, store_name).flat_map do |data_unit, store|
39
+ relative_directory(@current_env, data_unit, store)
40
+ end
41
+ end
42
+
43
+ ##
44
+ # Performs restores for the configured stores. Can optionally be limited to
45
+ # one data unit and/or store type.
46
+ def restore_from(source_env, data_unit_name=nil, store_name=nil)
47
+ each_data_unit_store(data_unit_name, store_name).each do |data_unit, store|
48
+ store.restore_from(directory(source_env, data_unit, store))
49
+ end
50
+ end
51
+
52
+ private
53
+
54
+ def each_data_unit_store(data_unit_name=nil, store_name=nil)
55
+ Enumerator.new do |y|
56
+ yielded_any = false
57
+ configuration.data_units.each do |du|
58
+ if data_unit_name.nil? || du.name == data_unit_name
59
+ du.stores.each do |store|
60
+ if store_name.nil? || store.name == store_name
61
+ yielded_any = true
62
+ y << [du, store]
63
+ end
64
+ end
65
+ end
66
+ end
67
+ fail "No data configured that matches #{[data_unit_name, store_name].compact.join(':')}" unless yielded_any
68
+ end
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,53 @@
1
+ require 'leda'
2
+ require 'active_support/core_ext/string/inflections'
3
+
4
+ module Leda
5
+ ##
6
+ # Mix-in for defining the set of data needed from a particular backing store
7
+ # in a data unit. E.g., for a relational database it might be a set of tables.
8
+ #
9
+ # A store must define the following methods:
10
+ #
11
+ # # Dump the configured data to the specified directory
12
+ # # @param [Pathname]
13
+ # def dump(directory); end
14
+ #
15
+ # # Restore from the data found in the given directory
16
+ # # @param [Pathname]
17
+ # def restore_from(directory); end
18
+ module Store
19
+ attr_reader :options
20
+
21
+ def initialize(options={})
22
+ @options = options.dup
23
+ end
24
+
25
+ def name
26
+ Store.default_name(self.class)
27
+ end
28
+
29
+ def self.default_name(clazz)
30
+ clazz.name.demodulize.underscore
31
+ end
32
+
33
+ def self.registered_stores
34
+ @registered_stores ||= {}
35
+ end
36
+
37
+ def self.included(included_into)
38
+ register_store(included_into, default_name(included_into))
39
+ end
40
+
41
+ def self.register_store(store_class, name)
42
+ registered_stores[name.to_s] = store_class
43
+ end
44
+
45
+ def self.find(store_name)
46
+ registered_stores[store_name.to_s]
47
+ end
48
+ end
49
+ end
50
+
51
+ # XXX: temporary
52
+ require 'leda/stores/postgresql'
53
+ require 'leda/stores/elasticsearch'
@@ -0,0 +1,190 @@
1
+ require 'leda'
2
+
3
+ require 'oj'
4
+
5
+ module Leda
6
+ module Stores
7
+ class Elasticsearch
8
+ include Leda::Store
9
+
10
+ attr_reader :indices, :es_client
11
+
12
+ def initialize(*)
13
+ super
14
+
15
+ @indices = options[:indices] || options[:indexes]
16
+ end
17
+
18
+ def dump(directory)
19
+ Runner.new(directory, indices, es_client).dump
20
+ end
21
+
22
+ def restore_from(directory)
23
+ Runner.new(directory, indices, es_client).restore
24
+ end
25
+
26
+ private
27
+
28
+ def es_client
29
+ # TODO: make this configuration externalizable
30
+ ::Elasticsearch::Model.client
31
+ end
32
+
33
+ class Runner
34
+ attr_reader :directory, :indices, :es_client
35
+
36
+ def initialize(directory, indices, es_client)
37
+ @directory = directory
38
+ @indices = indices
39
+ @es_client = es_client
40
+ end
41
+
42
+ def dump
43
+ $stderr.puts "Exporting to #{echo_fn(directory)} ..."
44
+ indices.each do |index|
45
+ dump_index_metadata(index)
46
+ scan_all_records_into_bulk_format(index)
47
+ end
48
+ $stderr.puts "... export complete."
49
+ end
50
+
51
+ def restore
52
+ $stderr.puts "Importing from #{echo_fn(directory)} ..."
53
+ indices.each do |index|
54
+ replace_index(index)
55
+ bulk_load_records(index)
56
+ end
57
+ $stderr.puts "... import complete."
58
+ end
59
+
60
+ private
61
+
62
+ def echo_fn(pathname)
63
+ # TODO: an alternative
64
+ pathname.relative_path_from(Rails.root)
65
+ end
66
+
67
+ def mapping_filename(index)
68
+ directory.join("#{index}_mapping.json")
69
+ end
70
+
71
+ def settings_filename(index)
72
+ directory.join("#{index}_settings.json")
73
+ end
74
+
75
+ def bulk_records_filename(index)
76
+ directory.join("#{index}_bulk-records.json")
77
+ end
78
+
79
+ def dump_index_metadata(index)
80
+ dump_mapping(index)
81
+ dump_settings(index)
82
+ end
83
+
84
+ def dump_mapping(index)
85
+ fn = mapping_filename(index)
86
+ $stderr.puts " - Dumping mapping for #{index} to #{echo_fn(fn)}"
87
+ mapping = es_client.indices.get_mapping index: index
88
+ fn.open('w') { |f| f.puts JSON.pretty_generate(mapping) }
89
+ end
90
+
91
+ def dump_settings(index)
92
+ fn = settings_filename(index)
93
+ $stderr.puts " - Dumping settings for #{index} to #{echo_fn(fn)}"
94
+ settings = es_client.indices.get_settings index: index
95
+ fn.open('w') { |f| f.puts JSON.pretty_generate(settings) }
96
+ end
97
+
98
+ def scan_all_records_into_bulk_format(index)
99
+ fn = bulk_records_filename(index)
100
+ $stderr.puts " - Dumping records for #{index} to #{echo_fn(fn)} "
101
+
102
+ # start the scroll with a search
103
+ results = es_client.search index: index, search_type: 'scan', scroll: '5m', size: 500
104
+ total_ct = results['hits']['total']
105
+
106
+ written_ct = 0
107
+ fn.open('w:utf-8') do |f|
108
+ while results = es_client.scroll(scroll_id: results['_scroll_id'], scroll: '5m') and not results['hits']['hits'].empty?
109
+ results['hits']['hits'].each do |hit|
110
+ f.puts convert_to_bulk_index_rows(hit)
111
+ end
112
+ written_ct += results['hits']['hits'].size
113
+ $stderr.print "\r #{written_ct} / #{total_ct} => %5.1f%% done" % (written_ct * 100.0 / total_ct)
114
+ end
115
+ end
116
+ $stderr.puts "\r #{written_ct} / #{total_ct} => all done."
117
+ end
118
+
119
+ def convert_to_bulk_index_rows(hit)
120
+ [
121
+ Oj.dump({ "index" => hit.slice("_index", "_type", "_id") }),
122
+ Oj.dump(hit['_source'])
123
+ ]
124
+ end
125
+
126
+ def replace_index(index)
127
+ map_fn = mapping_filename(index)
128
+ $stderr.puts " - Reading mapping from #{echo_fn(map_fn)}"
129
+ mappings = Oj.load(map_fn.read).values.first # assume only one index
130
+
131
+ set_fn = settings_filename(index)
132
+ $stderr.puts " - Reading settings from #{echo_fn(set_fn)}"
133
+ settings = Oj.load(set_fn.read).values.first # assume only one index
134
+
135
+ body = {}.merge!(mappings).merge!(settings)
136
+
137
+ begin
138
+ $stderr.print " - Deleting index #{index} ... "
139
+ es_client.indices.delete index: index
140
+ $stderr.puts "done"
141
+ rescue ::Elasticsearch::Transport::Transport::Errors::NotFound
142
+ $stderr.puts "not necessary"
143
+ end
144
+
145
+ $stderr.puts " - Creating index #{index} using settings and mapping"
146
+ es_client.indices.create index: index, body: body
147
+ end
148
+
149
+ RESTORE_BATCH_DISPATCH_TRIGGER=(1 << 19) # Stay below 1MB per request
150
+ # N.b.: Assumption that each bulk op is two lines. This is true
151
+ # so long as they are all index ops.
152
+ BULK_LINES_PER_RECORD=2
153
+
154
+ def bulk_load_records(index)
155
+ fn = bulk_records_filename(index)
156
+ $stderr.puts " - Reading records for #{index} from #{echo_fn(fn)} "
157
+
158
+ total_ct = 0
159
+ fn.each_line { |l| total_ct += 1 }
160
+ total_ct /= BULK_LINES_PER_RECORD
161
+
162
+ batch = ""
163
+ batch_line_ct = 0
164
+ loaded_ct = 0
165
+ fn.each_line do |line|
166
+ batch_line_ct += 1
167
+ batch << line
168
+ if batch_line_ct % BULK_LINES_PER_RECORD == 0 && batch.size > RESTORE_BATCH_DISPATCH_TRIGGER
169
+ bulk_load_batch(batch)
170
+ loaded_ct += batch_line_ct / BULK_LINES_PER_RECORD
171
+ $stderr.print "\r #{loaded_ct} / #{total_ct} => %5.1f%% done" % (loaded_ct * 100.0 / total_ct)
172
+ batch = ""
173
+ batch_line_ct = 0
174
+ end
175
+ end
176
+ unless batch.empty?
177
+ bulk_load_batch(batch)
178
+ loaded_ct += batch_line_ct / BULK_LINES_PER_RECORD
179
+ end
180
+ $stderr.puts "\r #{loaded_ct} / #{total_ct} => all done."
181
+ end
182
+
183
+ def bulk_load_batch(batch)
184
+ es_client.bulk body: batch
185
+ end
186
+
187
+ end
188
+ end
189
+ end
190
+ end
@@ -0,0 +1,134 @@
1
+ require 'leda'
2
+
3
+ require 'tempfile'
4
+ require 'shellwords'
5
+
6
+ module Leda
7
+ module Stores
8
+ ##
9
+ # Store for PostgreSQL. Uses PG command line utilties to dump and restore.
10
+ #
11
+ # Options:
12
+ #
13
+ # * `:tables`. An array of table names to dump/restore. The tables will be
14
+ # restored in the order given in the array.
15
+ class Postgresql
16
+ include Leda::Store
17
+
18
+ attr_reader :tables
19
+
20
+ def initialize(*args)
21
+ super
22
+
23
+ @tables = options[:tables]
24
+ @filter_executable = options[:filter]
25
+ end
26
+
27
+ def filename(directory)
28
+ directory.join('dump.psql')
29
+ end
30
+
31
+ def dump(directory)
32
+ pgenv
33
+
34
+ fn = filename(directory).to_s
35
+ $stderr.puts "Exporting to #{fn} ..."
36
+ dump_cmd = (['pg_dump', '-a', '-Fp', '-O', '-x'] + tables.flat_map { |t| ['-t', t] }).shelljoin
37
+
38
+ # TODO:
39
+ filter_cmd = nil
40
+ if @filter_executable
41
+ filter_cmd = "| #{@filter_executable}"
42
+ end
43
+
44
+ out_cmd = "> " + [fn].shelljoin
45
+ if system([dump_cmd, filter_cmd, out_cmd].compact.join(' '))
46
+ $stderr.puts "... export complete."
47
+ else
48
+ fail "Export failed."
49
+ end
50
+ end
51
+
52
+ def restore_from(directory)
53
+ pgenv
54
+
55
+ source_file = filename(directory)
56
+
57
+ unless source_file.exist?
58
+ fail "Expected provider dump not found: #{source_file}"
59
+ end
60
+
61
+ begin
62
+ $stderr.puts "Importing from #{source_file}"
63
+ open('|psql -aq', 'w') do |psql|
64
+ psql.puts '\set ON_ERROR_STOP'
65
+ psql.puts "BEGIN;"
66
+ psql.puts "TRUNCATE #{tables.join(', ')} CASCADE;"
67
+ psql.puts source_file.read
68
+ psql.puts "COMMIT;"
69
+ end
70
+ rescue Errno::EPIPE => e
71
+ $stderr.puts "psql terminated early; check above for a reason"
72
+ end
73
+ end
74
+
75
+ private
76
+
77
+ def database_config
78
+ # TODO: make this agnostic
79
+ @database_config ||= ActiveRecord::Base.configurations[Rails.env].reverse_merge(
80
+ 'host' => 'localhost'
81
+ )
82
+ end
83
+
84
+ ##
85
+ # Sets the libpq environment variables based on the current AR database config.
86
+ def pgenv
87
+ pgenv_values.each do |env_var, value|
88
+ ENV[env_var] = value.to_s if value
89
+ end
90
+ ENV['PGPASSFILE'] = temporary_pgpassfile
91
+ end
92
+
93
+ ##
94
+ # Computes, but does not set into the environment, the libpq env vars implied
95
+ # by the given AR-style config hash. Does not include the password, since
96
+ # there is no libpq value for the password.
97
+ def pgenv_values
98
+ @pgenv_values ||=
99
+ {
100
+ 'host' => 'PGHOST',
101
+ 'port' => 'PGPORT',
102
+ 'username' => 'PGUSER',
103
+ 'database' => 'PGDATABASE',
104
+ }.each_with_object({}) do |(param_name, env_var), values|
105
+ values[env_var] = database_config[param_name] if database_config[param_name]
106
+ end
107
+ end
108
+
109
+ ##
110
+ # Creates a temporary pgpass file based on the given AR-style config hash.
111
+ # Returns the path to the file. This file will be automatically deleted when
112
+ # the interpreter shuts down, so don't share the path outside of the process
113
+ # and its children.
114
+ def temporary_pgpassfile
115
+ return @temporary_pgpassfile.path if @temporary_pgpassfile
116
+
117
+ pass = database_config['password']
118
+
119
+ # Tempfile.open does not return the tempfile
120
+ begin
121
+ # must maintain a reference to the tempfile object so that it doesn't
122
+ # get deleted until we're done with it.
123
+ @temporary_pgpassfile = Tempfile.open('dc')
124
+ @temporary_pgpassfile.chmod(0600)
125
+ @temporary_pgpassfile.puts [pgenv_values['PGHOST'], pgenv_values['PGPORT'] || '*', pgenv_values['PGDATABASE'], pgenv_values['PGUSER'], pass].join(':')
126
+ ensure
127
+ @temporary_pgpassfile.close
128
+ end
129
+
130
+ @temporary_pgpassfile.path
131
+ end
132
+ end
133
+ end
134
+ end