pgsync 0.5.4 → 0.6.3
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of pgsync might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/CHANGELOG.md +38 -0
- data/LICENSE.txt +1 -1
- data/README.md +123 -38
- data/config.yml +4 -0
- data/lib/pgsync.rb +5 -1
- data/lib/pgsync/client.rb +55 -53
- data/lib/pgsync/data_source.rb +78 -80
- data/lib/pgsync/init.rb +50 -6
- data/lib/pgsync/schema_sync.rb +83 -0
- data/lib/pgsync/sync.rb +95 -177
- data/lib/pgsync/table.rb +28 -0
- data/lib/pgsync/table_sync.rb +185 -186
- data/lib/pgsync/task.rb +329 -0
- data/lib/pgsync/task_resolver.rb +237 -0
- data/lib/pgsync/utils.rb +53 -13
- data/lib/pgsync/version.rb +1 -1
- metadata +6 -3
- data/lib/pgsync/table_list.rb +0 -141
@@ -0,0 +1,237 @@
|
|
1
|
+
module PgSync
|
2
|
+
class TaskResolver
|
3
|
+
include Utils
|
4
|
+
|
5
|
+
attr_reader :args, :opts, :source, :destination, :config, :first_schema, :notes
|
6
|
+
|
7
|
+
def initialize(args:, opts:, source:, destination:, config:, first_schema:)
|
8
|
+
@args = args
|
9
|
+
@opts = opts
|
10
|
+
@source = source
|
11
|
+
@destination = destination
|
12
|
+
@config = config
|
13
|
+
@groups = config["groups"] || {}
|
14
|
+
@first_schema = first_schema
|
15
|
+
@notes = []
|
16
|
+
end
|
17
|
+
|
18
|
+
def tasks
|
19
|
+
tasks = []
|
20
|
+
|
21
|
+
# get lists from args
|
22
|
+
groups, tables = process_args
|
23
|
+
|
24
|
+
# expand groups into tasks
|
25
|
+
groups.each do |group|
|
26
|
+
tasks.concat(group_to_tasks(group))
|
27
|
+
end
|
28
|
+
|
29
|
+
# expand tables into tasks
|
30
|
+
tables.each do |table|
|
31
|
+
tasks.concat(table_to_tasks(table))
|
32
|
+
end
|
33
|
+
|
34
|
+
# get default if none given
|
35
|
+
if !opts[:groups] && !opts[:tables] && args.size == 0
|
36
|
+
tasks.concat(default_tasks)
|
37
|
+
end
|
38
|
+
|
39
|
+
# resolve any tables that need it
|
40
|
+
tasks.each do |task|
|
41
|
+
task[:table] = fully_resolve(task[:table])
|
42
|
+
end
|
43
|
+
|
44
|
+
tasks
|
45
|
+
end
|
46
|
+
|
47
|
+
def group?(group)
|
48
|
+
@groups.key?(group)
|
49
|
+
end
|
50
|
+
|
51
|
+
private
|
52
|
+
|
53
|
+
def group_to_tasks(value)
|
54
|
+
group, param = value.split(":", 2)
|
55
|
+
raise Error, "Group not found: #{group}" unless group?(group)
|
56
|
+
|
57
|
+
@groups[group].map do |table|
|
58
|
+
table_sql = nil
|
59
|
+
if table.is_a?(Array)
|
60
|
+
table, table_sql = table
|
61
|
+
end
|
62
|
+
|
63
|
+
{
|
64
|
+
table: to_table(table),
|
65
|
+
sql: expand_sql(table_sql, param)
|
66
|
+
}
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
def table_to_tasks(value)
|
71
|
+
raise Error, "Cannot use parameters with tables" if value.include?(":")
|
72
|
+
|
73
|
+
tables =
|
74
|
+
if value.include?("*")
|
75
|
+
regex = Regexp.new('\A' + Regexp.escape(value).gsub('\*','[^\.]*') + '\z')
|
76
|
+
shared_tables.select { |t| regex.match(t.full_name) || regex.match(t.name) }
|
77
|
+
else
|
78
|
+
[to_table(value)]
|
79
|
+
end
|
80
|
+
|
81
|
+
tables.map do |table|
|
82
|
+
{
|
83
|
+
table: table,
|
84
|
+
sql: sql_arg # doesn't support params
|
85
|
+
}
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
# treats identifiers as if they were quoted (Users == "Users")
|
90
|
+
# this is different from Postgres (Users == "users")
|
91
|
+
#
|
92
|
+
# TODO add support for quoted identifiers like "my.schema"."my.table"
|
93
|
+
# so it's possible to specify identifiers with "." in them
|
94
|
+
def to_table(value)
|
95
|
+
parts = value.split(".")
|
96
|
+
case parts.size
|
97
|
+
when 1
|
98
|
+
# unknown schema
|
99
|
+
Table.new(nil, parts[0])
|
100
|
+
when 2
|
101
|
+
Table.new(*parts)
|
102
|
+
else
|
103
|
+
raise Error, "Cannot resolve table: #{value}"
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
def default_tasks
|
108
|
+
shared_tables.map do |table|
|
109
|
+
{
|
110
|
+
table: table
|
111
|
+
}
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
# tables that exists in both source and destination
|
116
|
+
# used when no tables specified, or a wildcard
|
117
|
+
# removes excluded tables and filters by schema
|
118
|
+
def shared_tables
|
119
|
+
tables = filter_tables(source.tables)
|
120
|
+
|
121
|
+
unless opts[:schema_only] || opts[:schema_first]
|
122
|
+
from_tables = tables
|
123
|
+
to_tables = filter_tables(destination.tables)
|
124
|
+
|
125
|
+
extra_tables = to_tables - from_tables
|
126
|
+
notes << "Extra tables: #{extra_tables.map { |t| friendly_name(t) }.join(", ")}" if extra_tables.any?
|
127
|
+
|
128
|
+
missing_tables = from_tables - to_tables
|
129
|
+
notes << "Missing tables: #{missing_tables.map { |t| friendly_name(t) }.join(", ")}" if missing_tables.any?
|
130
|
+
|
131
|
+
tables &= to_tables
|
132
|
+
end
|
133
|
+
|
134
|
+
tables
|
135
|
+
end
|
136
|
+
|
137
|
+
def filter_tables(tables)
|
138
|
+
tables = tables.dup
|
139
|
+
|
140
|
+
unless opts[:all_schemas]
|
141
|
+
# could support wildcard schemas as well
|
142
|
+
schemas = Set.new(opts[:schemas] ? to_arr(opts[:schemas]) : source.search_path)
|
143
|
+
tables.select! { |t| schemas.include?(t.schema) }
|
144
|
+
end
|
145
|
+
|
146
|
+
to_arr(opts[:exclude]).each do |value|
|
147
|
+
if value.include?("*")
|
148
|
+
regex = Regexp.new('\A' + Regexp.escape(value).gsub('\*','[^\.]*') + '\z')
|
149
|
+
tables.reject! { |t| regex.match(t.full_name) || regex.match(t.name) }
|
150
|
+
else
|
151
|
+
tables -= [fully_resolve(to_table(value), error: false)].compact
|
152
|
+
end
|
153
|
+
end
|
154
|
+
|
155
|
+
tables
|
156
|
+
end
|
157
|
+
|
158
|
+
def process_args
|
159
|
+
groups = to_arr(opts[:groups])
|
160
|
+
tables = to_arr(opts[:tables])
|
161
|
+
if args[0]
|
162
|
+
# could be a group, table, or mix
|
163
|
+
to_arr(args[0]).each do |value|
|
164
|
+
if group?(value.split(":", 2)[0])
|
165
|
+
groups << value
|
166
|
+
else
|
167
|
+
tables << value
|
168
|
+
end
|
169
|
+
end
|
170
|
+
end
|
171
|
+
[groups, tables]
|
172
|
+
end
|
173
|
+
|
174
|
+
def no_schema_tables
|
175
|
+
@no_schema_tables ||= begin
|
176
|
+
search_path_index = source.search_path.map.with_index.to_h
|
177
|
+
source.tables.group_by(&:name).map do |group, t2|
|
178
|
+
[group, t2.select { |t| search_path_index[t.schema] }.sort_by { |t| search_path_index[t.schema] }.first]
|
179
|
+
end.to_h
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
# for tables without a schema, find the table in the search path
|
184
|
+
def fully_resolve(table, error: true)
|
185
|
+
return table if table.schema
|
186
|
+
resolved_table = no_schema_tables[table.name]
|
187
|
+
raise Error, "Table not found in source: #{table.name}" if !resolved_table && error
|
188
|
+
resolved_table
|
189
|
+
end
|
190
|
+
|
191
|
+
# parse command line arguments and YAML
|
192
|
+
def to_arr(value)
|
193
|
+
if value.is_a?(Array)
|
194
|
+
value
|
195
|
+
else
|
196
|
+
# Split by commas, but don't use commas inside double quotes
|
197
|
+
# https://stackoverflow.com/questions/21105360/regex-find-comma-not-inside-quotes
|
198
|
+
value.to_s.split(/(?!\B"[^"]*),(?![^"]*"\B)/)
|
199
|
+
end
|
200
|
+
end
|
201
|
+
|
202
|
+
def sql_arg
|
203
|
+
args[1]
|
204
|
+
end
|
205
|
+
|
206
|
+
def expand_sql(sql, param)
|
207
|
+
# command line option takes precedence over group option
|
208
|
+
sql = sql_arg if sql_arg
|
209
|
+
|
210
|
+
return unless sql
|
211
|
+
|
212
|
+
# vars must match \w
|
213
|
+
missing_vars = sql.scan(/{\w+}/).map { |v| v[1..-2] }
|
214
|
+
|
215
|
+
vars = {}
|
216
|
+
if param
|
217
|
+
vars["id"] = cast(param)
|
218
|
+
vars["1"] = cast(param)
|
219
|
+
end
|
220
|
+
|
221
|
+
sql = sql.dup
|
222
|
+
vars.each do |k, v|
|
223
|
+
# only sub if in var list
|
224
|
+
sql.gsub!("{#{k}}", cast(v)) if missing_vars.delete(k)
|
225
|
+
end
|
226
|
+
|
227
|
+
raise Error, "Missing variables: #{missing_vars.uniq.join(", ")}" if missing_vars.any?
|
228
|
+
|
229
|
+
sql
|
230
|
+
end
|
231
|
+
|
232
|
+
# TODO quote vars in next major version
|
233
|
+
def cast(value)
|
234
|
+
value.to_s.gsub(/\A\"|\"\z/, '')
|
235
|
+
end
|
236
|
+
end
|
237
|
+
end
|
data/lib/pgsync/utils.rb
CHANGED
@@ -18,29 +18,69 @@ module PgSync
|
|
18
18
|
end
|
19
19
|
end
|
20
20
|
|
21
|
+
def warning(message)
|
22
|
+
log colorize(message, :yellow)
|
23
|
+
end
|
24
|
+
|
25
|
+
def deprecated(message)
|
26
|
+
warning "[DEPRECATED] #{message}"
|
27
|
+
end
|
28
|
+
|
21
29
|
def output
|
22
30
|
$stderr
|
23
31
|
end
|
24
32
|
|
25
|
-
def
|
26
|
-
|
33
|
+
def db_config_file(db)
|
34
|
+
".pgsync-#{db}.yml"
|
27
35
|
end
|
28
36
|
|
29
|
-
def
|
30
|
-
|
37
|
+
def confirm_tables_exist(data_source, tasks, description)
|
38
|
+
tasks.map(&:table).each do |table|
|
39
|
+
unless data_source.table_exists?(table)
|
40
|
+
raise Error, "Table not found in #{description}: #{table}"
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def first_schema
|
46
|
+
@first_schema ||= source.search_path.find { |sp| sp != "pg_catalog" }
|
47
|
+
end
|
48
|
+
|
49
|
+
def task_name(task)
|
50
|
+
friendly_name(task.table)
|
31
51
|
end
|
32
52
|
|
33
|
-
def
|
34
|
-
|
53
|
+
def friendly_name(table)
|
54
|
+
if table.schema == first_schema
|
55
|
+
table.name
|
56
|
+
else
|
57
|
+
table.full_name
|
58
|
+
end
|
59
|
+
end
|
35
60
|
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
path = File.dirname(path)
|
42
|
-
break if path == "/"
|
61
|
+
def quote_ident_full(ident)
|
62
|
+
if ident.is_a?(Table)
|
63
|
+
[quote_ident(ident.schema), quote_ident(ident.name)].join(".")
|
64
|
+
else # temp table names are strings
|
65
|
+
quote_ident(ident)
|
43
66
|
end
|
44
67
|
end
|
68
|
+
|
69
|
+
def quote_ident(value)
|
70
|
+
PG::Connection.quote_ident(value)
|
71
|
+
end
|
72
|
+
|
73
|
+
def escape(value)
|
74
|
+
if value.is_a?(String)
|
75
|
+
"'#{quote_string(value)}'"
|
76
|
+
else
|
77
|
+
value
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
# activerecord
|
82
|
+
def quote_string(s)
|
83
|
+
s.gsub(/\\/, '\&\&').gsub(/'/, "''")
|
84
|
+
end
|
45
85
|
end
|
46
86
|
end
|
data/lib/pgsync/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pgsync
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-06-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: parallel
|
@@ -124,9 +124,12 @@ files:
|
|
124
124
|
- lib/pgsync/client.rb
|
125
125
|
- lib/pgsync/data_source.rb
|
126
126
|
- lib/pgsync/init.rb
|
127
|
+
- lib/pgsync/schema_sync.rb
|
127
128
|
- lib/pgsync/sync.rb
|
128
|
-
- lib/pgsync/
|
129
|
+
- lib/pgsync/table.rb
|
129
130
|
- lib/pgsync/table_sync.rb
|
131
|
+
- lib/pgsync/task.rb
|
132
|
+
- lib/pgsync/task_resolver.rb
|
130
133
|
- lib/pgsync/utils.rb
|
131
134
|
- lib/pgsync/version.rb
|
132
135
|
homepage: https://github.com/ankane/pgsync
|
data/lib/pgsync/table_list.rb
DELETED
@@ -1,141 +0,0 @@
|
|
1
|
-
module PgSync
|
2
|
-
class TableList
|
3
|
-
include Utils
|
4
|
-
|
5
|
-
attr_reader :args, :opts, :source, :config
|
6
|
-
|
7
|
-
def initialize(args, options, source, config)
|
8
|
-
@args = args
|
9
|
-
@opts = options
|
10
|
-
@source = source
|
11
|
-
@config = config
|
12
|
-
@groups = config["groups"] || {}
|
13
|
-
end
|
14
|
-
|
15
|
-
def group?(group)
|
16
|
-
@groups.key?(group)
|
17
|
-
end
|
18
|
-
|
19
|
-
def tables
|
20
|
-
tables = {}
|
21
|
-
sql = args[1]
|
22
|
-
|
23
|
-
groups = to_arr(opts[:groups])
|
24
|
-
tables2 = to_arr(opts[:tables])
|
25
|
-
|
26
|
-
if args[0]
|
27
|
-
# could be a group, table, or mix
|
28
|
-
to_arr(args[0]).each do |tag|
|
29
|
-
group, id = tag.split(":", 2)
|
30
|
-
if group?(group)
|
31
|
-
groups << tag
|
32
|
-
else
|
33
|
-
tables2 << tag
|
34
|
-
end
|
35
|
-
end
|
36
|
-
end
|
37
|
-
|
38
|
-
groups.each do |tag|
|
39
|
-
group, id = tag.split(":", 2)
|
40
|
-
raise Error, "Group not found: #{group}" unless group?(group)
|
41
|
-
|
42
|
-
# if id
|
43
|
-
# # TODO show group name and value
|
44
|
-
# log colorize("`pgsync group:value` is deprecated and will have a different function in 0.6.0.", :yellow)
|
45
|
-
# log colorize("Use `pgsync group --var 1=value` instead.", :yellow)
|
46
|
-
# end
|
47
|
-
|
48
|
-
@groups[group].each do |table|
|
49
|
-
table_sql = nil
|
50
|
-
if table.is_a?(Array)
|
51
|
-
table, table_sql = table
|
52
|
-
end
|
53
|
-
add_table(tables, table, id, sql || table_sql)
|
54
|
-
end
|
55
|
-
end
|
56
|
-
|
57
|
-
tables2.each do |tag|
|
58
|
-
table, id = tag.split(":", 2)
|
59
|
-
raise Error, "Cannot use parameters with tables" if id
|
60
|
-
add_table(tables, table, id, sql)
|
61
|
-
end
|
62
|
-
|
63
|
-
if !opts[:groups] && !opts[:tables] && !args[0]
|
64
|
-
exclude = to_arr(opts[:exclude])
|
65
|
-
exclude = source.fully_resolve_tables(exclude).keys if exclude.any?
|
66
|
-
|
67
|
-
tabs = source.tables
|
68
|
-
unless opts[:all_schemas]
|
69
|
-
schemas = Set.new(opts[:schemas] ? to_arr(opts[:schemas]) : source.search_path)
|
70
|
-
tabs.select! { |t| schemas.include?(t.split(".", 2)[0]) }
|
71
|
-
end
|
72
|
-
|
73
|
-
(tabs - exclude).each do |k|
|
74
|
-
tables[k] = {}
|
75
|
-
end
|
76
|
-
end
|
77
|
-
|
78
|
-
source.fully_resolve_tables(tables)
|
79
|
-
end
|
80
|
-
|
81
|
-
private
|
82
|
-
|
83
|
-
def to_arr(value)
|
84
|
-
if value.is_a?(Array)
|
85
|
-
value
|
86
|
-
else
|
87
|
-
# Split by commas, but don't use commas inside double quotes
|
88
|
-
# https://stackoverflow.com/questions/21105360/regex-find-comma-not-inside-quotes
|
89
|
-
value.to_s.split(/(?!\B"[^"]*),(?![^"]*"\B)/)
|
90
|
-
end
|
91
|
-
end
|
92
|
-
|
93
|
-
def add_table(tables, table, id, sql)
|
94
|
-
tables2 =
|
95
|
-
if table.include?("*")
|
96
|
-
regex = Regexp.new('\A' + Regexp.escape(table).gsub('\*','[^\.]*') + '\z')
|
97
|
-
source.tables.select { |t| regex.match(t) || regex.match(t.split(".", 2).last) }
|
98
|
-
else
|
99
|
-
[table]
|
100
|
-
end
|
101
|
-
|
102
|
-
tables2.each do |tab|
|
103
|
-
tables[tab] = {}
|
104
|
-
tables[tab][:sql] = table_sql(sql, id) if sql
|
105
|
-
end
|
106
|
-
end
|
107
|
-
|
108
|
-
def table_sql(sql, id)
|
109
|
-
# vars must match \w
|
110
|
-
missing_vars = sql.scan(/{\w+}/).map { |v| v[1..-2] }
|
111
|
-
|
112
|
-
vars = {}
|
113
|
-
|
114
|
-
# legacy
|
115
|
-
if id
|
116
|
-
vars["id"] = cast(id)
|
117
|
-
vars["1"] = cast(id)
|
118
|
-
end
|
119
|
-
|
120
|
-
# opts[:var].each do |value|
|
121
|
-
# k, v = value.split("=", 2)
|
122
|
-
# vars[k] = v
|
123
|
-
# end
|
124
|
-
|
125
|
-
sql = sql.dup
|
126
|
-
vars.each do |k, v|
|
127
|
-
# only sub if in var list
|
128
|
-
sql.gsub!("{#{k}}", cast(v)) if missing_vars.delete(k)
|
129
|
-
end
|
130
|
-
|
131
|
-
raise Error, "Missing variables: #{missing_vars.uniq.join(", ")}" if missing_vars.any?
|
132
|
-
|
133
|
-
sql
|
134
|
-
end
|
135
|
-
|
136
|
-
# TODO quote vars in next major version
|
137
|
-
def cast(value)
|
138
|
-
value.to_s.gsub(/\A\"|\"\z/, '')
|
139
|
-
end
|
140
|
-
end
|
141
|
-
end
|