directory_diff 0.4.8 → 0.4.9
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/lib/directory_diff/transformer/temp_table.rb +249 -115
- data/lib/directory_diff/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: fa326af22cd07dc2ab6819d6f1221b030c98a849
|
4
|
+
data.tar.gz: f46469c536c45a3cbcea3b26932bcaac0a0280e9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0b539659c810d9bd19472074663bfda1c1ee2937349eafb44fca14503251860caa80743ddd3a3d11e9e902af4668f02380603f88990bc3ca3e53854f32f5077f
|
7
|
+
data.tar.gz: dd56c2f064b7a4e2e708c59bbdbb962bd72483bf2dbe490bb703bedaa42b4bc4ed37a5a6fca2596a42e66e441ca7e9b8304cf40d6276b4aa475b2a88661b4549
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "activerecord_pg_stuff"
|
2
4
|
|
3
5
|
Arel::Predications.module_eval do
|
@@ -23,44 +25,8 @@ module DirectoryDiff
|
|
23
25
|
# @param new_directory a table containing only the new records to compare
|
24
26
|
# against, most likely a temp table.
|
25
27
|
def into(new_directory, options = {})
|
26
|
-
|
27
|
-
|
28
|
-
lower(email) email,
|
29
|
-
coalesce(phone_number, '') phone_number,
|
30
|
-
array_remove(
|
31
|
-
regexp_split_to_array(
|
32
|
-
coalesce(assistants, ''),
|
33
|
-
'\s*,\s*'
|
34
|
-
)::varchar[],
|
35
|
-
''
|
36
|
-
) assistants
|
37
|
-
SQL
|
38
|
-
current_directory.select(projection).temporary_table do |temp_current_directory|
|
39
|
-
# Remove dupe email rows, keeping the last one
|
40
|
-
latest_unique_sql = <<-SQL
|
41
|
-
SELECT
|
42
|
-
DISTINCT ON (lower(email)) name,
|
43
|
-
lower(email) email,
|
44
|
-
coalesce(phone_number, '') phone_number,
|
45
|
-
array_remove(
|
46
|
-
regexp_split_to_array(
|
47
|
-
coalesce(assistants, ''),
|
48
|
-
'\s*,\s*'
|
49
|
-
)::varchar[],
|
50
|
-
''
|
51
|
-
) assistants,
|
52
|
-
extra,
|
53
|
-
ROW_NUMBER () OVER ()
|
54
|
-
FROM
|
55
|
-
#{new_directory.arel_table.name}
|
56
|
-
ORDER BY
|
57
|
-
lower(email),
|
58
|
-
row_number desc
|
59
|
-
SQL
|
60
|
-
|
61
|
-
new_directory.select('*')
|
62
|
-
.from(Arel.sql("(#{latest_unique_sql}) as t"))
|
63
|
-
.order("row_number").temporary_table do |deduped_csv|
|
28
|
+
temp_table(current_directory_relation) do |temp_current_directory|
|
29
|
+
new_directory_temp_table(new_directory) do |deduped_csv|
|
64
30
|
# Get Arel tables for referencing fields, table names
|
65
31
|
employees = temp_current_directory.table
|
66
32
|
csv = deduped_csv.table
|
@@ -68,8 +34,12 @@ module DirectoryDiff
|
|
68
34
|
# Reusable Arel predicates
|
69
35
|
csv_employee_join = csv[:email].eq(employees[:email])
|
70
36
|
attributes_unchanged = employees[:name].eq(csv[:name])
|
71
|
-
.and(
|
72
|
-
|
37
|
+
.and(
|
38
|
+
employees[:phone_number].eq(csv[:phone_number])
|
39
|
+
)
|
40
|
+
.and(
|
41
|
+
employees[:assistants].contains(csv[:assistants])
|
42
|
+
)
|
73
43
|
|
74
44
|
# Creates joins between the temp table and the csv table and
|
75
45
|
# vice versa
|
@@ -87,98 +57,44 @@ module DirectoryDiff
|
|
87
57
|
# left
|
88
58
|
employee_records = temp_current_directory.joins(employees_to_csv)
|
89
59
|
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
# 2. Assistant email is employee's own email. Remove the assistant
|
94
|
-
# email.
|
95
|
-
# TODO move this into the temp table creation above
|
96
|
-
# https://www.db-fiddle.com/f/gxg6qABP1LygYvvgRvyH2N/1
|
97
|
-
cleanup_sql = <<-SQL
|
98
|
-
with
|
99
|
-
unnested_assistants as
|
100
|
-
(
|
101
|
-
select
|
102
|
-
email,
|
103
|
-
name,
|
104
|
-
unnest(assistants) assistant
|
105
|
-
from #{csv.name}
|
106
|
-
),
|
107
|
-
own_email_removed as
|
108
|
-
(
|
109
|
-
select
|
110
|
-
a.*
|
111
|
-
from unnested_assistants a
|
112
|
-
where a.email != a.assistant
|
113
|
-
),
|
114
|
-
missing_assistants_removed as
|
115
|
-
(
|
116
|
-
select
|
117
|
-
a.*
|
118
|
-
from own_email_removed a
|
119
|
-
left outer join #{csv.name} b on a.assistant = b.email
|
120
|
-
where
|
121
|
-
(a.assistant is null and b.email is null)
|
122
|
-
or (a.assistant is not null and b.email is not null)
|
123
|
-
),
|
124
|
-
only_valid_assistants as
|
125
|
-
(
|
126
|
-
select
|
127
|
-
a.email,
|
128
|
-
a.name,
|
129
|
-
array_remove(
|
130
|
-
array_agg(b.assistant),
|
131
|
-
null
|
132
|
-
) assistants
|
133
|
-
from #{csv.name} a
|
134
|
-
left outer join missing_assistants_removed b
|
135
|
-
using (email)
|
136
|
-
group by
|
137
|
-
a.email, a.name
|
138
|
-
)
|
139
|
-
update #{csv.name}
|
140
|
-
set assistants = only_valid_assistants.assistants
|
141
|
-
from only_valid_assistants
|
142
|
-
where #{csv.name}.email = only_valid_assistants.email
|
143
|
-
SQL
|
144
|
-
deduped_csv.connection.execute(cleanup_sql)
|
60
|
+
connection.execute(SQL.cleanup_sql(csv.name))
|
61
|
+
|
62
|
+
csv_fields = [:name, :email, :phone_number, :assistants, :extra]
|
145
63
|
|
146
64
|
# new records are records in the new directory that don't exist in
|
147
65
|
# the current directory
|
148
|
-
new_records = csv_records
|
149
|
-
.select(
|
66
|
+
new_records = csv_records
|
67
|
+
.select("'insert'::varchar operation, row_number")
|
68
|
+
.select(csv_fields)
|
150
69
|
.where({ employees.name => { email: nil } })
|
151
70
|
# deleted records are records in the current directory that don't
|
152
71
|
# exist in the new directory
|
153
|
-
deleted_records = employee_records
|
154
|
-
.select(
|
72
|
+
deleted_records = employee_records
|
73
|
+
.select("'delete'::varchar operation, row_number")
|
74
|
+
.select(csv_fields)
|
155
75
|
.where({ csv.name => { email: nil } })
|
156
76
|
# changed records are records that have difference in name, phone
|
157
77
|
# number and/or assistants
|
158
|
-
changed_records = csv_records
|
159
|
-
.select(
|
78
|
+
changed_records = csv_records
|
79
|
+
.select("'update'::varchar operation, row_number")
|
80
|
+
.select(csv_fields)
|
160
81
|
.where.not(attributes_unchanged)
|
161
82
|
# unchanged records are records that are exactly the same in both
|
162
83
|
# directories (without considering the extra field)
|
163
|
-
unchanged_records = csv_records
|
164
|
-
.select(
|
84
|
+
unchanged_records = csv_records
|
85
|
+
.select("'noop'::varchar operation, row_number")
|
86
|
+
.select(csv_fields)
|
165
87
|
.where(attributes_unchanged)
|
166
88
|
|
167
89
|
# create temp table for holding operations
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
rel = ActiveRecord::Relation.new dec, table: dec.arel_table
|
172
|
-
rel.readonly!
|
173
|
-
|
174
|
-
rel.connection.execute("insert into #{name}(operation, row_number, name, email, phone_number, assistants, extra) #{deleted_records.to_sql}")
|
175
|
-
rel.connection.execute("insert into #{name}(operation, row_number, name, email, phone_number, assistants, extra) #{changed_records.to_sql}")
|
176
|
-
|
90
|
+
temp_table(new_records.to_sql) do |operations|
|
91
|
+
insert_into_operations(operations, deleted_records.to_sql)
|
92
|
+
insert_into_operations(operations, changed_records.to_sql)
|
177
93
|
if options[:skip_noop] != true
|
178
|
-
|
94
|
+
insert_into_operations(operations, unchanged_records.to_sql)
|
179
95
|
end
|
180
96
|
|
181
|
-
|
97
|
+
operations.order(:row_number).each do |operation|
|
182
98
|
add_operation(operation)
|
183
99
|
end
|
184
100
|
end
|
@@ -190,6 +106,91 @@ module DirectoryDiff
|
|
190
106
|
|
191
107
|
private
|
192
108
|
|
109
|
+
def current_directory_relation(&block)
|
110
|
+
current_directory.select(SQL.current_directory_projection)
|
111
|
+
end
|
112
|
+
|
113
|
+
def new_directory_temp_table(source, &block)
|
114
|
+
convert_to_relation(source) do |relation|
|
115
|
+
relation = relation.select("*")
|
116
|
+
.from(Arel.sql("(#{SQL.latest_unique_sql(relation.table.name)}) as t"))
|
117
|
+
.order("row_number")
|
118
|
+
|
119
|
+
temp_table(relation, &block)
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
def convert_to_relation(source, &block)
|
124
|
+
return block.call(source) if source.is_a?(ActiveRecord::Relation)
|
125
|
+
|
126
|
+
temp_table do |relation|
|
127
|
+
table_name = relation.table.name
|
128
|
+
connection.change_table(table_name) do |t|
|
129
|
+
t.column :name, :string
|
130
|
+
t.column :email, :string
|
131
|
+
t.column :phone_number, :string
|
132
|
+
t.column :assistants, :string
|
133
|
+
t.column :extra, :string
|
134
|
+
end
|
135
|
+
insert_into_csv_table(table_name, source)
|
136
|
+
block.call(relation)
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
# TODO chunk this into batch sizes
|
141
|
+
def insert_into_csv_table(table_name, records)
|
142
|
+
return if records.empty?
|
143
|
+
|
144
|
+
values = records.map do |row|
|
145
|
+
(name, email, phone_number, assistants, extra) = row
|
146
|
+
columns = [
|
147
|
+
connection.quote(name),
|
148
|
+
connection.quote(email),
|
149
|
+
connection.quote(phone_number),
|
150
|
+
connection.quote(assistants),
|
151
|
+
connection.quote(extra)
|
152
|
+
]
|
153
|
+
"(#{columns.join(", ")})"
|
154
|
+
end
|
155
|
+
|
156
|
+
connection.execute(SQL.insert_into_temp_csv_table(table_name, values))
|
157
|
+
end
|
158
|
+
|
159
|
+
def temp_table(source = nil, &block)
|
160
|
+
return source.temporary_table(&block) if source.is_a?(ActiveRecord::Relation)
|
161
|
+
|
162
|
+
create_temp_table(source) do |name|
|
163
|
+
klass = current_directory.klass
|
164
|
+
dec = ActiveRecordPgStuff::Relation::TemporaryTable::Decorator.new(klass, name)
|
165
|
+
rel = ActiveRecord::Relation.new(dec, table: dec.arel_table)
|
166
|
+
rel.readonly!
|
167
|
+
block.call(rel)
|
168
|
+
end
|
169
|
+
end
|
170
|
+
|
171
|
+
def create_temp_table(initial_sql=nil)
|
172
|
+
table_name = "temporary_#{(Time.now.to_f * 1000).to_i}"
|
173
|
+
|
174
|
+
if initial_sql
|
175
|
+
connection.with_temporary_table(table_name, initial_sql) do |name|
|
176
|
+
yield name
|
177
|
+
end
|
178
|
+
else
|
179
|
+
connection.transaction do
|
180
|
+
begin
|
181
|
+
connection.create_table(table_name, temporary: true)
|
182
|
+
yield table_name
|
183
|
+
ensure
|
184
|
+
connection.drop_table(table_name)
|
185
|
+
end
|
186
|
+
end
|
187
|
+
end
|
188
|
+
end
|
189
|
+
|
190
|
+
def insert_into_operations(relation, sql)
|
191
|
+
connection.execute(SQL.insert_into_operations(relation.table.name, sql))
|
192
|
+
end
|
193
|
+
|
193
194
|
def add_operation(operation)
|
194
195
|
op = [
|
195
196
|
operation.operation.to_sym,
|
@@ -223,11 +224,144 @@ module DirectoryDiff
|
|
223
224
|
(assistants || '').split(',').each do |assistant_email|
|
224
225
|
next if tail.include?(assistant_email)
|
225
226
|
assistant_operation = operations.find { |_, _, email| email == assistant_email }
|
226
|
-
process_operation(
|
227
|
+
process_operation(
|
228
|
+
assistant_operation,
|
229
|
+
operations,
|
230
|
+
prioritized_operations,
|
231
|
+
tail.add(email)
|
232
|
+
)
|
227
233
|
end
|
228
234
|
|
229
235
|
prioritized_operations << operation
|
230
236
|
end
|
237
|
+
|
238
|
+
def connection
|
239
|
+
current_directory.connection
|
240
|
+
end
|
241
|
+
end
|
242
|
+
|
243
|
+
module SQL
|
244
|
+
# Cleanup some bad records
|
245
|
+
# 1. Assistant email is set on an employee, but no assistant record
|
246
|
+
# in csv. Remove the assistant email.
|
247
|
+
# 2. Assistant email is employee's own email. Remove the assistant
|
248
|
+
# email.
|
249
|
+
# TODO move this into the temp table creation above
|
250
|
+
# https://www.db-fiddle.com/f/gxg6qABP1LygYvvgRvyH2N/1
|
251
|
+
def self.cleanup_sql(table_name)
|
252
|
+
<<-SQL
|
253
|
+
with
|
254
|
+
unnested_assistants as
|
255
|
+
(
|
256
|
+
select
|
257
|
+
email,
|
258
|
+
name,
|
259
|
+
unnest(assistants) assistant
|
260
|
+
from #{table_name}
|
261
|
+
),
|
262
|
+
own_email_removed as
|
263
|
+
(
|
264
|
+
select
|
265
|
+
a.*
|
266
|
+
from unnested_assistants a
|
267
|
+
where a.email != a.assistant
|
268
|
+
),
|
269
|
+
missing_assistants_removed as
|
270
|
+
(
|
271
|
+
select
|
272
|
+
a.*
|
273
|
+
from own_email_removed a
|
274
|
+
left outer join #{table_name} b on a.assistant = b.email
|
275
|
+
where
|
276
|
+
(a.assistant is null and b.email is null)
|
277
|
+
or (a.assistant is not null and b.email is not null)
|
278
|
+
),
|
279
|
+
only_valid_assistants as
|
280
|
+
(
|
281
|
+
select
|
282
|
+
a.email,
|
283
|
+
a.name,
|
284
|
+
array_remove(
|
285
|
+
array_agg(b.assistant),
|
286
|
+
null
|
287
|
+
) assistants
|
288
|
+
from #{table_name} a
|
289
|
+
left outer join missing_assistants_removed b
|
290
|
+
using (email)
|
291
|
+
group by
|
292
|
+
a.email, a.name
|
293
|
+
)
|
294
|
+
update #{table_name}
|
295
|
+
set assistants = only_valid_assistants.assistants
|
296
|
+
from only_valid_assistants
|
297
|
+
where #{table_name}.email = only_valid_assistants.email
|
298
|
+
SQL
|
299
|
+
end
|
300
|
+
|
301
|
+
# Remove dupe email rows, keeping the last one
|
302
|
+
def self.latest_unique_sql(table_name)
|
303
|
+
<<-SQL
|
304
|
+
SELECT
|
305
|
+
DISTINCT ON (lower(email)) name,
|
306
|
+
lower(email) email,
|
307
|
+
coalesce(phone_number, '') phone_number,
|
308
|
+
array_remove(
|
309
|
+
regexp_split_to_array(
|
310
|
+
coalesce(assistants, ''),
|
311
|
+
'\s*,\s*'
|
312
|
+
)::varchar[],
|
313
|
+
''
|
314
|
+
) assistants,
|
315
|
+
extra,
|
316
|
+
ROW_NUMBER () OVER ()
|
317
|
+
FROM
|
318
|
+
#{table_name}
|
319
|
+
ORDER BY
|
320
|
+
lower(email),
|
321
|
+
row_number desc
|
322
|
+
SQL
|
323
|
+
end
|
324
|
+
|
325
|
+
def self.current_directory_projection
|
326
|
+
<<-SQL
|
327
|
+
name,
|
328
|
+
lower(email) email,
|
329
|
+
coalesce(phone_number, '') phone_number,
|
330
|
+
array_remove(
|
331
|
+
regexp_split_to_array(
|
332
|
+
coalesce(assistants, ''),
|
333
|
+
'\s*,\s*'
|
334
|
+
)::varchar[],
|
335
|
+
''
|
336
|
+
) assistants
|
337
|
+
SQL
|
338
|
+
end
|
339
|
+
|
340
|
+
def self.insert_into_operations(table_name, sql)
|
341
|
+
<<-SQL
|
342
|
+
insert into #{table_name}(
|
343
|
+
operation,
|
344
|
+
row_number,
|
345
|
+
name,
|
346
|
+
email,
|
347
|
+
phone_number,
|
348
|
+
assistants,
|
349
|
+
extra
|
350
|
+
) #{sql}
|
351
|
+
SQL
|
352
|
+
end
|
353
|
+
|
354
|
+
def self.insert_into_temp_csv_table(table_name, values)
|
355
|
+
<<-SQL
|
356
|
+
insert into #{table_name}(
|
357
|
+
name,
|
358
|
+
email,
|
359
|
+
phone_number,
|
360
|
+
assistants,
|
361
|
+
extra
|
362
|
+
) values #{values.join(", ")}
|
363
|
+
SQL
|
364
|
+
end
|
231
365
|
end
|
232
366
|
end
|
233
367
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: directory_diff
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kamal Mahyuddin
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-10-
|
11
|
+
date: 2018-10-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activerecord
|
@@ -153,7 +153,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
153
153
|
version: '0'
|
154
154
|
requirements: []
|
155
155
|
rubyforge_project:
|
156
|
-
rubygems_version: 2.
|
156
|
+
rubygems_version: 2.5.2
|
157
157
|
signing_key:
|
158
158
|
specification_version: 4
|
159
159
|
summary: Envoy employee directory diffing.
|