directory_diff 0.4.8 → 0.4.9

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA256:
3
- metadata.gz: eb4b21702f675f87108eed7a2c64821b62326e065cdc297ab11f55da88888133
4
- data.tar.gz: de242958fd80fe890f12af3efc5ecc4c6cb8e91e4f77990a94ed6731a794d570
2
+ SHA1:
3
+ metadata.gz: fa326af22cd07dc2ab6819d6f1221b030c98a849
4
+ data.tar.gz: f46469c536c45a3cbcea3b26932bcaac0a0280e9
5
5
  SHA512:
6
- metadata.gz: 6f7f0c92e3119ec10a1976d9c2bb0aed90386917b2b654698a53a7344d0e401ac9fad194acb70b1b4814522583a8ec9aeba5b7265d1e89e9ac76ed391e32ca76
7
- data.tar.gz: 685a3747cc7b22702f0f595da3e533cf71e9a1726dcc6a0a5145144b4077db6289767d124531e53707f24f2973703eabc66280a8fbfe6df9d6cfa95c2d6311ce
6
+ metadata.gz: 0b539659c810d9bd19472074663bfda1c1ee2937349eafb44fca14503251860caa80743ddd3a3d11e9e902af4668f02380603f88990bc3ca3e53854f32f5077f
7
+ data.tar.gz: dd56c2f064b7a4e2e708c59bbdbb962bd72483bf2dbe490bb703bedaa42b4bc4ed37a5a6fca2596a42e66e441ca7e9b8304cf40d6276b4aa475b2a88661b4549
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "activerecord_pg_stuff"
2
4
 
3
5
  Arel::Predications.module_eval do
@@ -23,44 +25,8 @@ module DirectoryDiff
23
25
  # @param new_directory a table containing only the new records to compare
24
26
  # against, most likely a temp table.
25
27
  def into(new_directory, options = {})
26
- projection = <<-SQL
27
- name,
28
- lower(email) email,
29
- coalesce(phone_number, '') phone_number,
30
- array_remove(
31
- regexp_split_to_array(
32
- coalesce(assistants, ''),
33
- '\s*,\s*'
34
- )::varchar[],
35
- ''
36
- ) assistants
37
- SQL
38
- current_directory.select(projection).temporary_table do |temp_current_directory|
39
- # Remove dupe email rows, keeping the last one
40
- latest_unique_sql = <<-SQL
41
- SELECT
42
- DISTINCT ON (lower(email)) name,
43
- lower(email) email,
44
- coalesce(phone_number, '') phone_number,
45
- array_remove(
46
- regexp_split_to_array(
47
- coalesce(assistants, ''),
48
- '\s*,\s*'
49
- )::varchar[],
50
- ''
51
- ) assistants,
52
- extra,
53
- ROW_NUMBER () OVER ()
54
- FROM
55
- #{new_directory.arel_table.name}
56
- ORDER BY
57
- lower(email),
58
- row_number desc
59
- SQL
60
-
61
- new_directory.select('*')
62
- .from(Arel.sql("(#{latest_unique_sql}) as t"))
63
- .order("row_number").temporary_table do |deduped_csv|
28
+ temp_table(current_directory_relation) do |temp_current_directory|
29
+ new_directory_temp_table(new_directory) do |deduped_csv|
64
30
  # Get Arel tables for referencing fields, table names
65
31
  employees = temp_current_directory.table
66
32
  csv = deduped_csv.table
@@ -68,8 +34,12 @@ module DirectoryDiff
68
34
  # Reusable Arel predicates
69
35
  csv_employee_join = csv[:email].eq(employees[:email])
70
36
  attributes_unchanged = employees[:name].eq(csv[:name])
71
- .and(employees[:phone_number].eq(csv[:phone_number]))
72
- .and(employees[:assistants].contains(csv[:assistants]))
37
+ .and(
38
+ employees[:phone_number].eq(csv[:phone_number])
39
+ )
40
+ .and(
41
+ employees[:assistants].contains(csv[:assistants])
42
+ )
73
43
 
74
44
  # Creates joins between the temp table and the csv table and
75
45
  # vice versa
@@ -87,98 +57,44 @@ module DirectoryDiff
87
57
  # left
88
58
  employee_records = temp_current_directory.joins(employees_to_csv)
89
59
 
90
- # Cleanup some bad records
91
- # 1. Assistant email is set on an employee, but no assistant record
92
- # in csv. Remove the assistant email.
93
- # 2. Assistant email is employee's own email. Remove the assistant
94
- # email.
95
- # TODO move this into the temp table creation above
96
- # https://www.db-fiddle.com/f/gxg6qABP1LygYvvgRvyH2N/1
97
- cleanup_sql = <<-SQL
98
- with
99
- unnested_assistants as
100
- (
101
- select
102
- email,
103
- name,
104
- unnest(assistants) assistant
105
- from #{csv.name}
106
- ),
107
- own_email_removed as
108
- (
109
- select
110
- a.*
111
- from unnested_assistants a
112
- where a.email != a.assistant
113
- ),
114
- missing_assistants_removed as
115
- (
116
- select
117
- a.*
118
- from own_email_removed a
119
- left outer join #{csv.name} b on a.assistant = b.email
120
- where
121
- (a.assistant is null and b.email is null)
122
- or (a.assistant is not null and b.email is not null)
123
- ),
124
- only_valid_assistants as
125
- (
126
- select
127
- a.email,
128
- a.name,
129
- array_remove(
130
- array_agg(b.assistant),
131
- null
132
- ) assistants
133
- from #{csv.name} a
134
- left outer join missing_assistants_removed b
135
- using (email)
136
- group by
137
- a.email, a.name
138
- )
139
- update #{csv.name}
140
- set assistants = only_valid_assistants.assistants
141
- from only_valid_assistants
142
- where #{csv.name}.email = only_valid_assistants.email
143
- SQL
144
- deduped_csv.connection.execute(cleanup_sql)
60
+ connection.execute(SQL.cleanup_sql(csv.name))
61
+
62
+ csv_fields = [:name, :email, :phone_number, :assistants, :extra]
145
63
 
146
64
  # new records are records in the new directory that don't exist in
147
65
  # the current directory
148
- new_records = csv_records.select("'insert'::varchar operation, row_number")
149
- .select(:name, :email, :phone_number, :assistants, :extra)
66
+ new_records = csv_records
67
+ .select("'insert'::varchar operation, row_number")
68
+ .select(csv_fields)
150
69
  .where({ employees.name => { email: nil } })
151
70
  # deleted records are records in the current directory that don't
152
71
  # exist in the new directory
153
- deleted_records = employee_records.select("'delete'::varchar operation, row_number")
154
- .select(:name, :email, :phone_number, :assistants, :extra)
72
+ deleted_records = employee_records
73
+ .select("'delete'::varchar operation, row_number")
74
+ .select(csv_fields)
155
75
  .where({ csv.name => { email: nil } })
156
76
  # changed records are records that have difference in name, phone
157
77
  # number and/or assistants
158
- changed_records = csv_records.select("'update'::varchar operation, row_number")
159
- .select(:name, :email, :phone_number, :assistants, :extra)
78
+ changed_records = csv_records
79
+ .select("'update'::varchar operation, row_number")
80
+ .select(csv_fields)
160
81
  .where.not(attributes_unchanged)
161
82
  # unchanged records are records that are exactly the same in both
162
83
  # directories (without considering the extra field)
163
- unchanged_records = csv_records.select("'noop'::varchar operation, row_number")
164
- .select(:name, :email, :phone_number, :assistants, :extra)
84
+ unchanged_records = csv_records
85
+ .select("'noop'::varchar operation, row_number")
86
+ .select(csv_fields)
165
87
  .where(attributes_unchanged)
166
88
 
167
89
  # create temp table for holding operations
168
- operations_temp_table = "temporary_operations_#{self.object_id}"
169
- deduped_csv.connection.with_temporary_table operations_temp_table, new_records.to_sql do |name|
170
- dec = ActiveRecordPgStuff::Relation::TemporaryTable::Decorator.new csv_records.klass, name
171
- rel = ActiveRecord::Relation.new dec, table: dec.arel_table
172
- rel.readonly!
173
-
174
- rel.connection.execute("insert into #{name}(operation, row_number, name, email, phone_number, assistants, extra) #{deleted_records.to_sql}")
175
- rel.connection.execute("insert into #{name}(operation, row_number, name, email, phone_number, assistants, extra) #{changed_records.to_sql}")
176
-
90
+ temp_table(new_records.to_sql) do |operations|
91
+ insert_into_operations(operations, deleted_records.to_sql)
92
+ insert_into_operations(operations, changed_records.to_sql)
177
93
  if options[:skip_noop] != true
178
- rel.connection.execute("insert into #{name}(operation, row_number, name, email, phone_number, assistants, extra) #{unchanged_records.to_sql}")
94
+ insert_into_operations(operations, unchanged_records.to_sql)
179
95
  end
180
96
 
181
- rel.order(:row_number).each do |operation|
97
+ operations.order(:row_number).each do |operation|
182
98
  add_operation(operation)
183
99
  end
184
100
  end
@@ -190,6 +106,91 @@ module DirectoryDiff
190
106
 
191
107
  private
192
108
 
109
+ def current_directory_relation(&block)
110
+ current_directory.select(SQL.current_directory_projection)
111
+ end
112
+
113
+ def new_directory_temp_table(source, &block)
114
+ convert_to_relation(source) do |relation|
115
+ relation = relation.select("*")
116
+ .from(Arel.sql("(#{SQL.latest_unique_sql(relation.table.name)}) as t"))
117
+ .order("row_number")
118
+
119
+ temp_table(relation, &block)
120
+ end
121
+ end
122
+
123
+ def convert_to_relation(source, &block)
124
+ return block.call(source) if source.is_a?(ActiveRecord::Relation)
125
+
126
+ temp_table do |relation|
127
+ table_name = relation.table.name
128
+ connection.change_table(table_name) do |t|
129
+ t.column :name, :string
130
+ t.column :email, :string
131
+ t.column :phone_number, :string
132
+ t.column :assistants, :string
133
+ t.column :extra, :string
134
+ end
135
+ insert_into_csv_table(table_name, source)
136
+ block.call(relation)
137
+ end
138
+ end
139
+
140
+ # TODO chunk this into batch sizes
141
+ def insert_into_csv_table(table_name, records)
142
+ return if records.empty?
143
+
144
+ values = records.map do |row|
145
+ (name, email, phone_number, assistants, extra) = row
146
+ columns = [
147
+ connection.quote(name),
148
+ connection.quote(email),
149
+ connection.quote(phone_number),
150
+ connection.quote(assistants),
151
+ connection.quote(extra)
152
+ ]
153
+ "(#{columns.join(", ")})"
154
+ end
155
+
156
+ connection.execute(SQL.insert_into_temp_csv_table(table_name, values))
157
+ end
158
+
159
+ def temp_table(source = nil, &block)
160
+ return source.temporary_table(&block) if source.is_a?(ActiveRecord::Relation)
161
+
162
+ create_temp_table(source) do |name|
163
+ klass = current_directory.klass
164
+ dec = ActiveRecordPgStuff::Relation::TemporaryTable::Decorator.new(klass, name)
165
+ rel = ActiveRecord::Relation.new(dec, table: dec.arel_table)
166
+ rel.readonly!
167
+ block.call(rel)
168
+ end
169
+ end
170
+
171
+ def create_temp_table(initial_sql=nil)
172
+ table_name = "temporary_#{(Time.now.to_f * 1000).to_i}"
173
+
174
+ if initial_sql
175
+ connection.with_temporary_table(table_name, initial_sql) do |name|
176
+ yield name
177
+ end
178
+ else
179
+ connection.transaction do
180
+ begin
181
+ connection.create_table(table_name, temporary: true)
182
+ yield table_name
183
+ ensure
184
+ connection.drop_table(table_name)
185
+ end
186
+ end
187
+ end
188
+ end
189
+
190
+ def insert_into_operations(relation, sql)
191
+ connection.execute(SQL.insert_into_operations(relation.table.name, sql))
192
+ end
193
+
193
194
  def add_operation(operation)
194
195
  op = [
195
196
  operation.operation.to_sym,
@@ -223,11 +224,144 @@ module DirectoryDiff
223
224
  (assistants || '').split(',').each do |assistant_email|
224
225
  next if tail.include?(assistant_email)
225
226
  assistant_operation = operations.find { |_, _, email| email == assistant_email }
226
- process_operation(assistant_operation, operations, prioritized_operations, tail.add(email))
227
+ process_operation(
228
+ assistant_operation,
229
+ operations,
230
+ prioritized_operations,
231
+ tail.add(email)
232
+ )
227
233
  end
228
234
 
229
235
  prioritized_operations << operation
230
236
  end
237
+
238
+ def connection
239
+ current_directory.connection
240
+ end
241
+ end
242
+
243
+ module SQL
244
+ # Cleanup some bad records
245
+ # 1. Assistant email is set on an employee, but no assistant record
246
+ # in csv. Remove the assistant email.
247
+ # 2. Assistant email is employee's own email. Remove the assistant
248
+ # email.
249
+ # TODO move this into the temp table creation above
250
+ # https://www.db-fiddle.com/f/gxg6qABP1LygYvvgRvyH2N/1
251
+ def self.cleanup_sql(table_name)
252
+ <<-SQL
253
+ with
254
+ unnested_assistants as
255
+ (
256
+ select
257
+ email,
258
+ name,
259
+ unnest(assistants) assistant
260
+ from #{table_name}
261
+ ),
262
+ own_email_removed as
263
+ (
264
+ select
265
+ a.*
266
+ from unnested_assistants a
267
+ where a.email != a.assistant
268
+ ),
269
+ missing_assistants_removed as
270
+ (
271
+ select
272
+ a.*
273
+ from own_email_removed a
274
+ left outer join #{table_name} b on a.assistant = b.email
275
+ where
276
+ (a.assistant is null and b.email is null)
277
+ or (a.assistant is not null and b.email is not null)
278
+ ),
279
+ only_valid_assistants as
280
+ (
281
+ select
282
+ a.email,
283
+ a.name,
284
+ array_remove(
285
+ array_agg(b.assistant),
286
+ null
287
+ ) assistants
288
+ from #{table_name} a
289
+ left outer join missing_assistants_removed b
290
+ using (email)
291
+ group by
292
+ a.email, a.name
293
+ )
294
+ update #{table_name}
295
+ set assistants = only_valid_assistants.assistants
296
+ from only_valid_assistants
297
+ where #{table_name}.email = only_valid_assistants.email
298
+ SQL
299
+ end
300
+
301
+ # Remove dupe email rows, keeping the last one
302
+ def self.latest_unique_sql(table_name)
303
+ <<-SQL
304
+ SELECT
305
+ DISTINCT ON (lower(email)) name,
306
+ lower(email) email,
307
+ coalesce(phone_number, '') phone_number,
308
+ array_remove(
309
+ regexp_split_to_array(
310
+ coalesce(assistants, ''),
311
+ '\s*,\s*'
312
+ )::varchar[],
313
+ ''
314
+ ) assistants,
315
+ extra,
316
+ ROW_NUMBER () OVER ()
317
+ FROM
318
+ #{table_name}
319
+ ORDER BY
320
+ lower(email),
321
+ row_number desc
322
+ SQL
323
+ end
324
+
325
+ def self.current_directory_projection
326
+ <<-SQL
327
+ name,
328
+ lower(email) email,
329
+ coalesce(phone_number, '') phone_number,
330
+ array_remove(
331
+ regexp_split_to_array(
332
+ coalesce(assistants, ''),
333
+ '\s*,\s*'
334
+ )::varchar[],
335
+ ''
336
+ ) assistants
337
+ SQL
338
+ end
339
+
340
+ def self.insert_into_operations(table_name, sql)
341
+ <<-SQL
342
+ insert into #{table_name}(
343
+ operation,
344
+ row_number,
345
+ name,
346
+ email,
347
+ phone_number,
348
+ assistants,
349
+ extra
350
+ ) #{sql}
351
+ SQL
352
+ end
353
+
354
+ def self.insert_into_temp_csv_table(table_name, values)
355
+ <<-SQL
356
+ insert into #{table_name}(
357
+ name,
358
+ email,
359
+ phone_number,
360
+ assistants,
361
+ extra
362
+ ) values #{values.join(", ")}
363
+ SQL
364
+ end
231
365
  end
232
366
  end
233
367
  end
@@ -1,3 +1,3 @@
1
1
  module DirectoryDiff
2
- VERSION = "0.4.8"
2
+ VERSION = "0.4.9"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: directory_diff
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.8
4
+ version: 0.4.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kamal Mahyuddin
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2018-10-05 00:00:00.000000000 Z
11
+ date: 2018-10-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activerecord
@@ -153,7 +153,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
153
153
  version: '0'
154
154
  requirements: []
155
155
  rubyforge_project:
156
- rubygems_version: 2.7.7
156
+ rubygems_version: 2.5.2
157
157
  signing_key:
158
158
  specification_version: 4
159
159
  summary: Envoy employee directory diffing.