directory_diff 0.4.8 → 0.4.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/lib/directory_diff/transformer/temp_table.rb +249 -115
- data/lib/directory_diff/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: fa326af22cd07dc2ab6819d6f1221b030c98a849
|
4
|
+
data.tar.gz: f46469c536c45a3cbcea3b26932bcaac0a0280e9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0b539659c810d9bd19472074663bfda1c1ee2937349eafb44fca14503251860caa80743ddd3a3d11e9e902af4668f02380603f88990bc3ca3e53854f32f5077f
|
7
|
+
data.tar.gz: dd56c2f064b7a4e2e708c59bbdbb962bd72483bf2dbe490bb703bedaa42b4bc4ed37a5a6fca2596a42e66e441ca7e9b8304cf40d6276b4aa475b2a88661b4549
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "activerecord_pg_stuff"
|
2
4
|
|
3
5
|
Arel::Predications.module_eval do
|
@@ -23,44 +25,8 @@ module DirectoryDiff
|
|
23
25
|
# @param new_directory a table containing only the new records to compare
|
24
26
|
# against, most likely a temp table.
|
25
27
|
def into(new_directory, options = {})
|
26
|
-
|
27
|
-
|
28
|
-
lower(email) email,
|
29
|
-
coalesce(phone_number, '') phone_number,
|
30
|
-
array_remove(
|
31
|
-
regexp_split_to_array(
|
32
|
-
coalesce(assistants, ''),
|
33
|
-
'\s*,\s*'
|
34
|
-
)::varchar[],
|
35
|
-
''
|
36
|
-
) assistants
|
37
|
-
SQL
|
38
|
-
current_directory.select(projection).temporary_table do |temp_current_directory|
|
39
|
-
# Remove dupe email rows, keeping the last one
|
40
|
-
latest_unique_sql = <<-SQL
|
41
|
-
SELECT
|
42
|
-
DISTINCT ON (lower(email)) name,
|
43
|
-
lower(email) email,
|
44
|
-
coalesce(phone_number, '') phone_number,
|
45
|
-
array_remove(
|
46
|
-
regexp_split_to_array(
|
47
|
-
coalesce(assistants, ''),
|
48
|
-
'\s*,\s*'
|
49
|
-
)::varchar[],
|
50
|
-
''
|
51
|
-
) assistants,
|
52
|
-
extra,
|
53
|
-
ROW_NUMBER () OVER ()
|
54
|
-
FROM
|
55
|
-
#{new_directory.arel_table.name}
|
56
|
-
ORDER BY
|
57
|
-
lower(email),
|
58
|
-
row_number desc
|
59
|
-
SQL
|
60
|
-
|
61
|
-
new_directory.select('*')
|
62
|
-
.from(Arel.sql("(#{latest_unique_sql}) as t"))
|
63
|
-
.order("row_number").temporary_table do |deduped_csv|
|
28
|
+
temp_table(current_directory_relation) do |temp_current_directory|
|
29
|
+
new_directory_temp_table(new_directory) do |deduped_csv|
|
64
30
|
# Get Arel tables for referencing fields, table names
|
65
31
|
employees = temp_current_directory.table
|
66
32
|
csv = deduped_csv.table
|
@@ -68,8 +34,12 @@ module DirectoryDiff
|
|
68
34
|
# Reusable Arel predicates
|
69
35
|
csv_employee_join = csv[:email].eq(employees[:email])
|
70
36
|
attributes_unchanged = employees[:name].eq(csv[:name])
|
71
|
-
.and(
|
72
|
-
|
37
|
+
.and(
|
38
|
+
employees[:phone_number].eq(csv[:phone_number])
|
39
|
+
)
|
40
|
+
.and(
|
41
|
+
employees[:assistants].contains(csv[:assistants])
|
42
|
+
)
|
73
43
|
|
74
44
|
# Creates joins between the temp table and the csv table and
|
75
45
|
# vice versa
|
@@ -87,98 +57,44 @@ module DirectoryDiff
|
|
87
57
|
# left
|
88
58
|
employee_records = temp_current_directory.joins(employees_to_csv)
|
89
59
|
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
# 2. Assistant email is employee's own email. Remove the assistant
|
94
|
-
# email.
|
95
|
-
# TODO move this into the temp table creation above
|
96
|
-
# https://www.db-fiddle.com/f/gxg6qABP1LygYvvgRvyH2N/1
|
97
|
-
cleanup_sql = <<-SQL
|
98
|
-
with
|
99
|
-
unnested_assistants as
|
100
|
-
(
|
101
|
-
select
|
102
|
-
email,
|
103
|
-
name,
|
104
|
-
unnest(assistants) assistant
|
105
|
-
from #{csv.name}
|
106
|
-
),
|
107
|
-
own_email_removed as
|
108
|
-
(
|
109
|
-
select
|
110
|
-
a.*
|
111
|
-
from unnested_assistants a
|
112
|
-
where a.email != a.assistant
|
113
|
-
),
|
114
|
-
missing_assistants_removed as
|
115
|
-
(
|
116
|
-
select
|
117
|
-
a.*
|
118
|
-
from own_email_removed a
|
119
|
-
left outer join #{csv.name} b on a.assistant = b.email
|
120
|
-
where
|
121
|
-
(a.assistant is null and b.email is null)
|
122
|
-
or (a.assistant is not null and b.email is not null)
|
123
|
-
),
|
124
|
-
only_valid_assistants as
|
125
|
-
(
|
126
|
-
select
|
127
|
-
a.email,
|
128
|
-
a.name,
|
129
|
-
array_remove(
|
130
|
-
array_agg(b.assistant),
|
131
|
-
null
|
132
|
-
) assistants
|
133
|
-
from #{csv.name} a
|
134
|
-
left outer join missing_assistants_removed b
|
135
|
-
using (email)
|
136
|
-
group by
|
137
|
-
a.email, a.name
|
138
|
-
)
|
139
|
-
update #{csv.name}
|
140
|
-
set assistants = only_valid_assistants.assistants
|
141
|
-
from only_valid_assistants
|
142
|
-
where #{csv.name}.email = only_valid_assistants.email
|
143
|
-
SQL
|
144
|
-
deduped_csv.connection.execute(cleanup_sql)
|
60
|
+
connection.execute(SQL.cleanup_sql(csv.name))
|
61
|
+
|
62
|
+
csv_fields = [:name, :email, :phone_number, :assistants, :extra]
|
145
63
|
|
146
64
|
# new records are records in the new directory that don't exist in
|
147
65
|
# the current directory
|
148
|
-
new_records = csv_records
|
149
|
-
.select(
|
66
|
+
new_records = csv_records
|
67
|
+
.select("'insert'::varchar operation, row_number")
|
68
|
+
.select(csv_fields)
|
150
69
|
.where({ employees.name => { email: nil } })
|
151
70
|
# deleted records are records in the current directory that don't
|
152
71
|
# exist in the new directory
|
153
|
-
deleted_records = employee_records
|
154
|
-
.select(
|
72
|
+
deleted_records = employee_records
|
73
|
+
.select("'delete'::varchar operation, row_number")
|
74
|
+
.select(csv_fields)
|
155
75
|
.where({ csv.name => { email: nil } })
|
156
76
|
# changed records are records that have difference in name, phone
|
157
77
|
# number and/or assistants
|
158
|
-
changed_records = csv_records
|
159
|
-
.select(
|
78
|
+
changed_records = csv_records
|
79
|
+
.select("'update'::varchar operation, row_number")
|
80
|
+
.select(csv_fields)
|
160
81
|
.where.not(attributes_unchanged)
|
161
82
|
# unchanged records are records that are exactly the same in both
|
162
83
|
# directories (without considering the extra field)
|
163
|
-
unchanged_records = csv_records
|
164
|
-
.select(
|
84
|
+
unchanged_records = csv_records
|
85
|
+
.select("'noop'::varchar operation, row_number")
|
86
|
+
.select(csv_fields)
|
165
87
|
.where(attributes_unchanged)
|
166
88
|
|
167
89
|
# create temp table for holding operations
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
rel = ActiveRecord::Relation.new dec, table: dec.arel_table
|
172
|
-
rel.readonly!
|
173
|
-
|
174
|
-
rel.connection.execute("insert into #{name}(operation, row_number, name, email, phone_number, assistants, extra) #{deleted_records.to_sql}")
|
175
|
-
rel.connection.execute("insert into #{name}(operation, row_number, name, email, phone_number, assistants, extra) #{changed_records.to_sql}")
|
176
|
-
|
90
|
+
temp_table(new_records.to_sql) do |operations|
|
91
|
+
insert_into_operations(operations, deleted_records.to_sql)
|
92
|
+
insert_into_operations(operations, changed_records.to_sql)
|
177
93
|
if options[:skip_noop] != true
|
178
|
-
|
94
|
+
insert_into_operations(operations, unchanged_records.to_sql)
|
179
95
|
end
|
180
96
|
|
181
|
-
|
97
|
+
operations.order(:row_number).each do |operation|
|
182
98
|
add_operation(operation)
|
183
99
|
end
|
184
100
|
end
|
@@ -190,6 +106,91 @@ module DirectoryDiff
|
|
190
106
|
|
191
107
|
private
|
192
108
|
|
109
|
+
def current_directory_relation(&block)
|
110
|
+
current_directory.select(SQL.current_directory_projection)
|
111
|
+
end
|
112
|
+
|
113
|
+
def new_directory_temp_table(source, &block)
|
114
|
+
convert_to_relation(source) do |relation|
|
115
|
+
relation = relation.select("*")
|
116
|
+
.from(Arel.sql("(#{SQL.latest_unique_sql(relation.table.name)}) as t"))
|
117
|
+
.order("row_number")
|
118
|
+
|
119
|
+
temp_table(relation, &block)
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
def convert_to_relation(source, &block)
|
124
|
+
return block.call(source) if source.is_a?(ActiveRecord::Relation)
|
125
|
+
|
126
|
+
temp_table do |relation|
|
127
|
+
table_name = relation.table.name
|
128
|
+
connection.change_table(table_name) do |t|
|
129
|
+
t.column :name, :string
|
130
|
+
t.column :email, :string
|
131
|
+
t.column :phone_number, :string
|
132
|
+
t.column :assistants, :string
|
133
|
+
t.column :extra, :string
|
134
|
+
end
|
135
|
+
insert_into_csv_table(table_name, source)
|
136
|
+
block.call(relation)
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
# TODO chunk this into batch sizes
|
141
|
+
def insert_into_csv_table(table_name, records)
|
142
|
+
return if records.empty?
|
143
|
+
|
144
|
+
values = records.map do |row|
|
145
|
+
(name, email, phone_number, assistants, extra) = row
|
146
|
+
columns = [
|
147
|
+
connection.quote(name),
|
148
|
+
connection.quote(email),
|
149
|
+
connection.quote(phone_number),
|
150
|
+
connection.quote(assistants),
|
151
|
+
connection.quote(extra)
|
152
|
+
]
|
153
|
+
"(#{columns.join(", ")})"
|
154
|
+
end
|
155
|
+
|
156
|
+
connection.execute(SQL.insert_into_temp_csv_table(table_name, values))
|
157
|
+
end
|
158
|
+
|
159
|
+
def temp_table(source = nil, &block)
|
160
|
+
return source.temporary_table(&block) if source.is_a?(ActiveRecord::Relation)
|
161
|
+
|
162
|
+
create_temp_table(source) do |name|
|
163
|
+
klass = current_directory.klass
|
164
|
+
dec = ActiveRecordPgStuff::Relation::TemporaryTable::Decorator.new(klass, name)
|
165
|
+
rel = ActiveRecord::Relation.new(dec, table: dec.arel_table)
|
166
|
+
rel.readonly!
|
167
|
+
block.call(rel)
|
168
|
+
end
|
169
|
+
end
|
170
|
+
|
171
|
+
def create_temp_table(initial_sql=nil)
|
172
|
+
table_name = "temporary_#{(Time.now.to_f * 1000).to_i}"
|
173
|
+
|
174
|
+
if initial_sql
|
175
|
+
connection.with_temporary_table(table_name, initial_sql) do |name|
|
176
|
+
yield name
|
177
|
+
end
|
178
|
+
else
|
179
|
+
connection.transaction do
|
180
|
+
begin
|
181
|
+
connection.create_table(table_name, temporary: true)
|
182
|
+
yield table_name
|
183
|
+
ensure
|
184
|
+
connection.drop_table(table_name)
|
185
|
+
end
|
186
|
+
end
|
187
|
+
end
|
188
|
+
end
|
189
|
+
|
190
|
+
def insert_into_operations(relation, sql)
|
191
|
+
connection.execute(SQL.insert_into_operations(relation.table.name, sql))
|
192
|
+
end
|
193
|
+
|
193
194
|
def add_operation(operation)
|
194
195
|
op = [
|
195
196
|
operation.operation.to_sym,
|
@@ -223,11 +224,144 @@ module DirectoryDiff
|
|
223
224
|
(assistants || '').split(',').each do |assistant_email|
|
224
225
|
next if tail.include?(assistant_email)
|
225
226
|
assistant_operation = operations.find { |_, _, email| email == assistant_email }
|
226
|
-
process_operation(
|
227
|
+
process_operation(
|
228
|
+
assistant_operation,
|
229
|
+
operations,
|
230
|
+
prioritized_operations,
|
231
|
+
tail.add(email)
|
232
|
+
)
|
227
233
|
end
|
228
234
|
|
229
235
|
prioritized_operations << operation
|
230
236
|
end
|
237
|
+
|
238
|
+
def connection
|
239
|
+
current_directory.connection
|
240
|
+
end
|
241
|
+
end
|
242
|
+
|
243
|
+
module SQL
|
244
|
+
# Cleanup some bad records
|
245
|
+
# 1. Assistant email is set on an employee, but no assistant record
|
246
|
+
# in csv. Remove the assistant email.
|
247
|
+
# 2. Assistant email is employee's own email. Remove the assistant
|
248
|
+
# email.
|
249
|
+
# TODO move this into the temp table creation above
|
250
|
+
# https://www.db-fiddle.com/f/gxg6qABP1LygYvvgRvyH2N/1
|
251
|
+
def self.cleanup_sql(table_name)
|
252
|
+
<<-SQL
|
253
|
+
with
|
254
|
+
unnested_assistants as
|
255
|
+
(
|
256
|
+
select
|
257
|
+
email,
|
258
|
+
name,
|
259
|
+
unnest(assistants) assistant
|
260
|
+
from #{table_name}
|
261
|
+
),
|
262
|
+
own_email_removed as
|
263
|
+
(
|
264
|
+
select
|
265
|
+
a.*
|
266
|
+
from unnested_assistants a
|
267
|
+
where a.email != a.assistant
|
268
|
+
),
|
269
|
+
missing_assistants_removed as
|
270
|
+
(
|
271
|
+
select
|
272
|
+
a.*
|
273
|
+
from own_email_removed a
|
274
|
+
left outer join #{table_name} b on a.assistant = b.email
|
275
|
+
where
|
276
|
+
(a.assistant is null and b.email is null)
|
277
|
+
or (a.assistant is not null and b.email is not null)
|
278
|
+
),
|
279
|
+
only_valid_assistants as
|
280
|
+
(
|
281
|
+
select
|
282
|
+
a.email,
|
283
|
+
a.name,
|
284
|
+
array_remove(
|
285
|
+
array_agg(b.assistant),
|
286
|
+
null
|
287
|
+
) assistants
|
288
|
+
from #{table_name} a
|
289
|
+
left outer join missing_assistants_removed b
|
290
|
+
using (email)
|
291
|
+
group by
|
292
|
+
a.email, a.name
|
293
|
+
)
|
294
|
+
update #{table_name}
|
295
|
+
set assistants = only_valid_assistants.assistants
|
296
|
+
from only_valid_assistants
|
297
|
+
where #{table_name}.email = only_valid_assistants.email
|
298
|
+
SQL
|
299
|
+
end
|
300
|
+
|
301
|
+
# Remove dupe email rows, keeping the last one
|
302
|
+
def self.latest_unique_sql(table_name)
|
303
|
+
<<-SQL
|
304
|
+
SELECT
|
305
|
+
DISTINCT ON (lower(email)) name,
|
306
|
+
lower(email) email,
|
307
|
+
coalesce(phone_number, '') phone_number,
|
308
|
+
array_remove(
|
309
|
+
regexp_split_to_array(
|
310
|
+
coalesce(assistants, ''),
|
311
|
+
'\s*,\s*'
|
312
|
+
)::varchar[],
|
313
|
+
''
|
314
|
+
) assistants,
|
315
|
+
extra,
|
316
|
+
ROW_NUMBER () OVER ()
|
317
|
+
FROM
|
318
|
+
#{table_name}
|
319
|
+
ORDER BY
|
320
|
+
lower(email),
|
321
|
+
row_number desc
|
322
|
+
SQL
|
323
|
+
end
|
324
|
+
|
325
|
+
def self.current_directory_projection
|
326
|
+
<<-SQL
|
327
|
+
name,
|
328
|
+
lower(email) email,
|
329
|
+
coalesce(phone_number, '') phone_number,
|
330
|
+
array_remove(
|
331
|
+
regexp_split_to_array(
|
332
|
+
coalesce(assistants, ''),
|
333
|
+
'\s*,\s*'
|
334
|
+
)::varchar[],
|
335
|
+
''
|
336
|
+
) assistants
|
337
|
+
SQL
|
338
|
+
end
|
339
|
+
|
340
|
+
def self.insert_into_operations(table_name, sql)
|
341
|
+
<<-SQL
|
342
|
+
insert into #{table_name}(
|
343
|
+
operation,
|
344
|
+
row_number,
|
345
|
+
name,
|
346
|
+
email,
|
347
|
+
phone_number,
|
348
|
+
assistants,
|
349
|
+
extra
|
350
|
+
) #{sql}
|
351
|
+
SQL
|
352
|
+
end
|
353
|
+
|
354
|
+
def self.insert_into_temp_csv_table(table_name, values)
|
355
|
+
<<-SQL
|
356
|
+
insert into #{table_name}(
|
357
|
+
name,
|
358
|
+
email,
|
359
|
+
phone_number,
|
360
|
+
assistants,
|
361
|
+
extra
|
362
|
+
) values #{values.join(", ")}
|
363
|
+
SQL
|
364
|
+
end
|
231
365
|
end
|
232
366
|
end
|
233
367
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: directory_diff
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kamal Mahyuddin
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-10-
|
11
|
+
date: 2018-10-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activerecord
|
@@ -153,7 +153,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
153
153
|
version: '0'
|
154
154
|
requirements: []
|
155
155
|
rubyforge_project:
|
156
|
-
rubygems_version: 2.
|
156
|
+
rubygems_version: 2.5.2
|
157
157
|
signing_key:
|
158
158
|
specification_version: 4
|
159
159
|
summary: Envoy employee directory diffing.
|