directory_diff 0.4.8 → 0.4.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA256:
3
- metadata.gz: eb4b21702f675f87108eed7a2c64821b62326e065cdc297ab11f55da88888133
4
- data.tar.gz: de242958fd80fe890f12af3efc5ecc4c6cb8e91e4f77990a94ed6731a794d570
2
+ SHA1:
3
+ metadata.gz: fa326af22cd07dc2ab6819d6f1221b030c98a849
4
+ data.tar.gz: f46469c536c45a3cbcea3b26932bcaac0a0280e9
5
5
  SHA512:
6
- metadata.gz: 6f7f0c92e3119ec10a1976d9c2bb0aed90386917b2b654698a53a7344d0e401ac9fad194acb70b1b4814522583a8ec9aeba5b7265d1e89e9ac76ed391e32ca76
7
- data.tar.gz: 685a3747cc7b22702f0f595da3e533cf71e9a1726dcc6a0a5145144b4077db6289767d124531e53707f24f2973703eabc66280a8fbfe6df9d6cfa95c2d6311ce
6
+ metadata.gz: 0b539659c810d9bd19472074663bfda1c1ee2937349eafb44fca14503251860caa80743ddd3a3d11e9e902af4668f02380603f88990bc3ca3e53854f32f5077f
7
+ data.tar.gz: dd56c2f064b7a4e2e708c59bbdbb962bd72483bf2dbe490bb703bedaa42b4bc4ed37a5a6fca2596a42e66e441ca7e9b8304cf40d6276b4aa475b2a88661b4549
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "activerecord_pg_stuff"
2
4
 
3
5
  Arel::Predications.module_eval do
@@ -23,44 +25,8 @@ module DirectoryDiff
23
25
  # @param new_directory a table containing only the new records to compare
24
26
  # against, most likely a temp table.
25
27
  def into(new_directory, options = {})
26
- projection = <<-SQL
27
- name,
28
- lower(email) email,
29
- coalesce(phone_number, '') phone_number,
30
- array_remove(
31
- regexp_split_to_array(
32
- coalesce(assistants, ''),
33
- '\s*,\s*'
34
- )::varchar[],
35
- ''
36
- ) assistants
37
- SQL
38
- current_directory.select(projection).temporary_table do |temp_current_directory|
39
- # Remove dupe email rows, keeping the last one
40
- latest_unique_sql = <<-SQL
41
- SELECT
42
- DISTINCT ON (lower(email)) name,
43
- lower(email) email,
44
- coalesce(phone_number, '') phone_number,
45
- array_remove(
46
- regexp_split_to_array(
47
- coalesce(assistants, ''),
48
- '\s*,\s*'
49
- )::varchar[],
50
- ''
51
- ) assistants,
52
- extra,
53
- ROW_NUMBER () OVER ()
54
- FROM
55
- #{new_directory.arel_table.name}
56
- ORDER BY
57
- lower(email),
58
- row_number desc
59
- SQL
60
-
61
- new_directory.select('*')
62
- .from(Arel.sql("(#{latest_unique_sql}) as t"))
63
- .order("row_number").temporary_table do |deduped_csv|
28
+ temp_table(current_directory_relation) do |temp_current_directory|
29
+ new_directory_temp_table(new_directory) do |deduped_csv|
64
30
  # Get Arel tables for referencing fields, table names
65
31
  employees = temp_current_directory.table
66
32
  csv = deduped_csv.table
@@ -68,8 +34,12 @@ module DirectoryDiff
68
34
  # Reusable Arel predicates
69
35
  csv_employee_join = csv[:email].eq(employees[:email])
70
36
  attributes_unchanged = employees[:name].eq(csv[:name])
71
- .and(employees[:phone_number].eq(csv[:phone_number]))
72
- .and(employees[:assistants].contains(csv[:assistants]))
37
+ .and(
38
+ employees[:phone_number].eq(csv[:phone_number])
39
+ )
40
+ .and(
41
+ employees[:assistants].contains(csv[:assistants])
42
+ )
73
43
 
74
44
  # Creates joins between the temp table and the csv table and
75
45
  # vice versa
@@ -87,98 +57,44 @@ module DirectoryDiff
87
57
  # left
88
58
  employee_records = temp_current_directory.joins(employees_to_csv)
89
59
 
90
- # Cleanup some bad records
91
- # 1. Assistant email is set on an employee, but no assistant record
92
- # in csv. Remove the assistant email.
93
- # 2. Assistant email is employee's own email. Remove the assistant
94
- # email.
95
- # TODO move this into the temp table creation above
96
- # https://www.db-fiddle.com/f/gxg6qABP1LygYvvgRvyH2N/1
97
- cleanup_sql = <<-SQL
98
- with
99
- unnested_assistants as
100
- (
101
- select
102
- email,
103
- name,
104
- unnest(assistants) assistant
105
- from #{csv.name}
106
- ),
107
- own_email_removed as
108
- (
109
- select
110
- a.*
111
- from unnested_assistants a
112
- where a.email != a.assistant
113
- ),
114
- missing_assistants_removed as
115
- (
116
- select
117
- a.*
118
- from own_email_removed a
119
- left outer join #{csv.name} b on a.assistant = b.email
120
- where
121
- (a.assistant is null and b.email is null)
122
- or (a.assistant is not null and b.email is not null)
123
- ),
124
- only_valid_assistants as
125
- (
126
- select
127
- a.email,
128
- a.name,
129
- array_remove(
130
- array_agg(b.assistant),
131
- null
132
- ) assistants
133
- from #{csv.name} a
134
- left outer join missing_assistants_removed b
135
- using (email)
136
- group by
137
- a.email, a.name
138
- )
139
- update #{csv.name}
140
- set assistants = only_valid_assistants.assistants
141
- from only_valid_assistants
142
- where #{csv.name}.email = only_valid_assistants.email
143
- SQL
144
- deduped_csv.connection.execute(cleanup_sql)
60
+ connection.execute(SQL.cleanup_sql(csv.name))
61
+
62
+ csv_fields = [:name, :email, :phone_number, :assistants, :extra]
145
63
 
146
64
  # new records are records in the new directory that don't exist in
147
65
  # the current directory
148
- new_records = csv_records.select("'insert'::varchar operation, row_number")
149
- .select(:name, :email, :phone_number, :assistants, :extra)
66
+ new_records = csv_records
67
+ .select("'insert'::varchar operation, row_number")
68
+ .select(csv_fields)
150
69
  .where({ employees.name => { email: nil } })
151
70
  # deleted records are records in the current directory that don't
152
71
  # exist in the new directory
153
- deleted_records = employee_records.select("'delete'::varchar operation, row_number")
154
- .select(:name, :email, :phone_number, :assistants, :extra)
72
+ deleted_records = employee_records
73
+ .select("'delete'::varchar operation, row_number")
74
+ .select(csv_fields)
155
75
  .where({ csv.name => { email: nil } })
156
76
  # changed records are records that have difference in name, phone
157
77
  # number and/or assistants
158
- changed_records = csv_records.select("'update'::varchar operation, row_number")
159
- .select(:name, :email, :phone_number, :assistants, :extra)
78
+ changed_records = csv_records
79
+ .select("'update'::varchar operation, row_number")
80
+ .select(csv_fields)
160
81
  .where.not(attributes_unchanged)
161
82
  # unchanged records are records that are exactly the same in both
162
83
  # directories (without considering the extra field)
163
- unchanged_records = csv_records.select("'noop'::varchar operation, row_number")
164
- .select(:name, :email, :phone_number, :assistants, :extra)
84
+ unchanged_records = csv_records
85
+ .select("'noop'::varchar operation, row_number")
86
+ .select(csv_fields)
165
87
  .where(attributes_unchanged)
166
88
 
167
89
  # create temp table for holding operations
168
- operations_temp_table = "temporary_operations_#{self.object_id}"
169
- deduped_csv.connection.with_temporary_table operations_temp_table, new_records.to_sql do |name|
170
- dec = ActiveRecordPgStuff::Relation::TemporaryTable::Decorator.new csv_records.klass, name
171
- rel = ActiveRecord::Relation.new dec, table: dec.arel_table
172
- rel.readonly!
173
-
174
- rel.connection.execute("insert into #{name}(operation, row_number, name, email, phone_number, assistants, extra) #{deleted_records.to_sql}")
175
- rel.connection.execute("insert into #{name}(operation, row_number, name, email, phone_number, assistants, extra) #{changed_records.to_sql}")
176
-
90
+ temp_table(new_records.to_sql) do |operations|
91
+ insert_into_operations(operations, deleted_records.to_sql)
92
+ insert_into_operations(operations, changed_records.to_sql)
177
93
  if options[:skip_noop] != true
178
- rel.connection.execute("insert into #{name}(operation, row_number, name, email, phone_number, assistants, extra) #{unchanged_records.to_sql}")
94
+ insert_into_operations(operations, unchanged_records.to_sql)
179
95
  end
180
96
 
181
- rel.order(:row_number).each do |operation|
97
+ operations.order(:row_number).each do |operation|
182
98
  add_operation(operation)
183
99
  end
184
100
  end
@@ -190,6 +106,91 @@ module DirectoryDiff
190
106
 
191
107
  private
192
108
 
109
+ def current_directory_relation(&block)
110
+ current_directory.select(SQL.current_directory_projection)
111
+ end
112
+
113
+ def new_directory_temp_table(source, &block)
114
+ convert_to_relation(source) do |relation|
115
+ relation = relation.select("*")
116
+ .from(Arel.sql("(#{SQL.latest_unique_sql(relation.table.name)}) as t"))
117
+ .order("row_number")
118
+
119
+ temp_table(relation, &block)
120
+ end
121
+ end
122
+
123
+ def convert_to_relation(source, &block)
124
+ return block.call(source) if source.is_a?(ActiveRecord::Relation)
125
+
126
+ temp_table do |relation|
127
+ table_name = relation.table.name
128
+ connection.change_table(table_name) do |t|
129
+ t.column :name, :string
130
+ t.column :email, :string
131
+ t.column :phone_number, :string
132
+ t.column :assistants, :string
133
+ t.column :extra, :string
134
+ end
135
+ insert_into_csv_table(table_name, source)
136
+ block.call(relation)
137
+ end
138
+ end
139
+
140
+ # TODO chunk this into batch sizes
141
+ def insert_into_csv_table(table_name, records)
142
+ return if records.empty?
143
+
144
+ values = records.map do |row|
145
+ (name, email, phone_number, assistants, extra) = row
146
+ columns = [
147
+ connection.quote(name),
148
+ connection.quote(email),
149
+ connection.quote(phone_number),
150
+ connection.quote(assistants),
151
+ connection.quote(extra)
152
+ ]
153
+ "(#{columns.join(", ")})"
154
+ end
155
+
156
+ connection.execute(SQL.insert_into_temp_csv_table(table_name, values))
157
+ end
158
+
159
+ def temp_table(source = nil, &block)
160
+ return source.temporary_table(&block) if source.is_a?(ActiveRecord::Relation)
161
+
162
+ create_temp_table(source) do |name|
163
+ klass = current_directory.klass
164
+ dec = ActiveRecordPgStuff::Relation::TemporaryTable::Decorator.new(klass, name)
165
+ rel = ActiveRecord::Relation.new(dec, table: dec.arel_table)
166
+ rel.readonly!
167
+ block.call(rel)
168
+ end
169
+ end
170
+
171
+ def create_temp_table(initial_sql=nil)
172
+ table_name = "temporary_#{(Time.now.to_f * 1000).to_i}"
173
+
174
+ if initial_sql
175
+ connection.with_temporary_table(table_name, initial_sql) do |name|
176
+ yield name
177
+ end
178
+ else
179
+ connection.transaction do
180
+ begin
181
+ connection.create_table(table_name, temporary: true)
182
+ yield table_name
183
+ ensure
184
+ connection.drop_table(table_name)
185
+ end
186
+ end
187
+ end
188
+ end
189
+
190
+ def insert_into_operations(relation, sql)
191
+ connection.execute(SQL.insert_into_operations(relation.table.name, sql))
192
+ end
193
+
193
194
  def add_operation(operation)
194
195
  op = [
195
196
  operation.operation.to_sym,
@@ -223,11 +224,144 @@ module DirectoryDiff
223
224
  (assistants || '').split(',').each do |assistant_email|
224
225
  next if tail.include?(assistant_email)
225
226
  assistant_operation = operations.find { |_, _, email| email == assistant_email }
226
- process_operation(assistant_operation, operations, prioritized_operations, tail.add(email))
227
+ process_operation(
228
+ assistant_operation,
229
+ operations,
230
+ prioritized_operations,
231
+ tail.add(email)
232
+ )
227
233
  end
228
234
 
229
235
  prioritized_operations << operation
230
236
  end
237
+
238
+ def connection
239
+ current_directory.connection
240
+ end
241
+ end
242
+
243
+ module SQL
244
+ # Cleanup some bad records
245
+ # 1. Assistant email is set on an employee, but no assistant record
246
+ # in csv. Remove the assistant email.
247
+ # 2. Assistant email is employee's own email. Remove the assistant
248
+ # email.
249
+ # TODO move this into the temp table creation above
250
+ # https://www.db-fiddle.com/f/gxg6qABP1LygYvvgRvyH2N/1
251
+ def self.cleanup_sql(table_name)
252
+ <<-SQL
253
+ with
254
+ unnested_assistants as
255
+ (
256
+ select
257
+ email,
258
+ name,
259
+ unnest(assistants) assistant
260
+ from #{table_name}
261
+ ),
262
+ own_email_removed as
263
+ (
264
+ select
265
+ a.*
266
+ from unnested_assistants a
267
+ where a.email != a.assistant
268
+ ),
269
+ missing_assistants_removed as
270
+ (
271
+ select
272
+ a.*
273
+ from own_email_removed a
274
+ left outer join #{table_name} b on a.assistant = b.email
275
+ where
276
+ (a.assistant is null and b.email is null)
277
+ or (a.assistant is not null and b.email is not null)
278
+ ),
279
+ only_valid_assistants as
280
+ (
281
+ select
282
+ a.email,
283
+ a.name,
284
+ array_remove(
285
+ array_agg(b.assistant),
286
+ null
287
+ ) assistants
288
+ from #{table_name} a
289
+ left outer join missing_assistants_removed b
290
+ using (email)
291
+ group by
292
+ a.email, a.name
293
+ )
294
+ update #{table_name}
295
+ set assistants = only_valid_assistants.assistants
296
+ from only_valid_assistants
297
+ where #{table_name}.email = only_valid_assistants.email
298
+ SQL
299
+ end
300
+
301
+ # Remove dupe email rows, keeping the last one
302
+ def self.latest_unique_sql(table_name)
303
+ <<-SQL
304
+ SELECT
305
+ DISTINCT ON (lower(email)) name,
306
+ lower(email) email,
307
+ coalesce(phone_number, '') phone_number,
308
+ array_remove(
309
+ regexp_split_to_array(
310
+ coalesce(assistants, ''),
311
+ '\s*,\s*'
312
+ )::varchar[],
313
+ ''
314
+ ) assistants,
315
+ extra,
316
+ ROW_NUMBER () OVER ()
317
+ FROM
318
+ #{table_name}
319
+ ORDER BY
320
+ lower(email),
321
+ row_number desc
322
+ SQL
323
+ end
324
+
325
+ def self.current_directory_projection
326
+ <<-SQL
327
+ name,
328
+ lower(email) email,
329
+ coalesce(phone_number, '') phone_number,
330
+ array_remove(
331
+ regexp_split_to_array(
332
+ coalesce(assistants, ''),
333
+ '\s*,\s*'
334
+ )::varchar[],
335
+ ''
336
+ ) assistants
337
+ SQL
338
+ end
339
+
340
+ def self.insert_into_operations(table_name, sql)
341
+ <<-SQL
342
+ insert into #{table_name}(
343
+ operation,
344
+ row_number,
345
+ name,
346
+ email,
347
+ phone_number,
348
+ assistants,
349
+ extra
350
+ ) #{sql}
351
+ SQL
352
+ end
353
+
354
+ def self.insert_into_temp_csv_table(table_name, values)
355
+ <<-SQL
356
+ insert into #{table_name}(
357
+ name,
358
+ email,
359
+ phone_number,
360
+ assistants,
361
+ extra
362
+ ) values #{values.join(", ")}
363
+ SQL
364
+ end
231
365
  end
232
366
  end
233
367
  end
@@ -1,3 +1,3 @@
1
1
  module DirectoryDiff
2
- VERSION = "0.4.8"
2
+ VERSION = "0.4.9"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: directory_diff
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.8
4
+ version: 0.4.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kamal Mahyuddin
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2018-10-05 00:00:00.000000000 Z
11
+ date: 2018-10-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activerecord
@@ -153,7 +153,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
153
153
  version: '0'
154
154
  requirements: []
155
155
  rubyforge_project:
156
- rubygems_version: 2.7.7
156
+ rubygems_version: 2.5.2
157
157
  signing_key:
158
158
  specification_version: 4
159
159
  summary: Envoy employee directory diffing.