directory_diff 0.4.14 → 0.4.15

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: c5895d2d77767028f8894d7b9148b487fc0f7a3d
4
- data.tar.gz: ce8f0d7b29b8028735c46959f88a39e628a786b6
2
+ SHA256:
3
+ metadata.gz: 2e2bf8a6d21cbf21189c07cbba63f6ba6011f722f432b1b9804e5b1abbb3683b
4
+ data.tar.gz: c2081478703a7d3843b2d8faf9db9487c6848c169350c60f6eea0168ff77720e
5
5
  SHA512:
6
- metadata.gz: 71c87ae049b8362daa88922991a8cec444713baa09f1de7a8ffd1a29aec2464e18828bde4d965871a457d95fba0e4d2845333f9c80f45d862331fd60032edf78
7
- data.tar.gz: d22c8496602c2ba38f8e48cda9738b679093d81fcb22fcbdadde91b9a01c689d08c0a6c597f2dab523fb1686edf1f05aceb2657728c7c0f26d8f171aa20a3ccf
6
+ metadata.gz: 284e7d9653a30f7029a402a352091fffc8aa82ac1b0dec0130b90f6c572c2f8936ce2ab46b1bdbbf190737a971935174c1c7c0bc26ef165bb955dc9a79d453af
7
+ data.tar.gz: 9296b7cac1a1100dbeef7d19bbee14cc8d22c6f5c250ecbb959b14a195157985bc4bb49c6269a27139bf206647c6b19bd2ac089fdd613bc5a45961a46c5f2a45
@@ -21,9 +21,8 @@ Gem::Specification.new do |spec|
21
21
 
22
22
  spec.add_dependency "activerecord", ">= 5.1"
23
23
  spec.add_dependency "pg", "~> 1.1.3"
24
- spec.add_dependency "activerecord_pg_stuff", "~> 0.2.1"
25
24
 
26
- spec.add_development_dependency "bundler", "~> 1.11"
25
+ spec.add_development_dependency "bundler", "~> 2"
27
26
  spec.add_development_dependency "rake", "~> 10.0"
28
27
  spec.add_development_dependency "rspec", "~> 3.0"
29
28
  spec.add_development_dependency "temping", "~> 3.10.0"
@@ -1,5 +1,4 @@
1
1
  require_relative "transformer/in_memory"
2
- require_relative "transformer/temp_table"
3
2
 
4
3
  module DirectoryDiff
5
4
  class Transform
@@ -10,21 +9,7 @@ module DirectoryDiff
10
9
  end
11
10
 
12
11
  def into(new_directory, options = {})
13
- processor_class = processor_for(options[:processor])
14
- processor_class.new(current_directory).into(new_directory, options)
15
- end
16
-
17
- private
18
-
19
- def processor_for(processor)
20
- case processor
21
- when nil, :in_memory
22
- Transformer::InMemory
23
- when :temp_table
24
- Transformer::TempTable
25
- else
26
- raise ArgumentError, "unsupported processor #{processor.inspect}"
27
- end
12
+ Transformer::InMemory.new(current_directory).into(new_directory, options)
28
13
  end
29
14
  end
30
15
  end
@@ -1,3 +1,3 @@
1
1
  module DirectoryDiff
2
- VERSION = "0.4.14"
2
+ VERSION = "0.4.15"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: directory_diff
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.14
4
+ version: 0.4.15
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kamal Mahyuddin
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2018-10-16 00:00:00.000000000 Z
11
+ date: 2021-10-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activerecord
@@ -38,34 +38,20 @@ dependencies:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
40
  version: 1.1.3
41
- - !ruby/object:Gem::Dependency
42
- name: activerecord_pg_stuff
43
- requirement: !ruby/object:Gem::Requirement
44
- requirements:
45
- - - "~>"
46
- - !ruby/object:Gem::Version
47
- version: 0.2.1
48
- type: :runtime
49
- prerelease: false
50
- version_requirements: !ruby/object:Gem::Requirement
51
- requirements:
52
- - - "~>"
53
- - !ruby/object:Gem::Version
54
- version: 0.2.1
55
41
  - !ruby/object:Gem::Dependency
56
42
  name: bundler
57
43
  requirement: !ruby/object:Gem::Requirement
58
44
  requirements:
59
45
  - - "~>"
60
46
  - !ruby/object:Gem::Version
61
- version: '1.11'
47
+ version: '2'
62
48
  type: :development
63
49
  prerelease: false
64
50
  version_requirements: !ruby/object:Gem::Requirement
65
51
  requirements:
66
52
  - - "~>"
67
53
  - !ruby/object:Gem::Version
68
- version: '1.11'
54
+ version: '2'
69
55
  - !ruby/object:Gem::Dependency
70
56
  name: rake
71
57
  requirement: !ruby/object:Gem::Requirement
@@ -149,7 +135,6 @@ files:
149
135
  - lib/directory_diff.rb
150
136
  - lib/directory_diff/transform.rb
151
137
  - lib/directory_diff/transformer/in_memory.rb
152
- - lib/directory_diff/transformer/temp_table.rb
153
138
  - lib/directory_diff/version.rb
154
139
  homepage: https://github.com/envoy/directory_diff
155
140
  licenses:
@@ -171,7 +156,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
171
156
  version: '0'
172
157
  requirements: []
173
158
  rubyforge_project:
174
- rubygems_version: 2.5.2
159
+ rubygems_version: 2.7.6
175
160
  signing_key:
176
161
  specification_version: 4
177
162
  summary: Envoy employee directory diffing.
@@ -1,385 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "activerecord_pg_stuff"
4
-
5
- module DirectoryDiff
6
- module Transformer
7
- class TempTable
8
- attr_reader :current_directory, :operations
9
-
10
- # @params current_directory a relation that filters out only the records
11
- # that represent the current directory. This is
12
- # mostly likely an Employee relation. This
13
- # relation will be pulled into a temporary table.
14
- def initialize(current_directory)
15
- @current_directory = current_directory
16
- @operations = []
17
- end
18
-
19
- # @param new_directory a table containing only the new records to compare
20
- # against, most likely a temp table.
21
- def into(new_directory, options = {})
22
- current_directory_temp_table do |temp_current_directory|
23
- new_directory_temp_table(new_directory) do |deduped_csv|
24
- # Get Arel tables for referencing fields, table names
25
- employees = temp_current_directory.table
26
- csv = deduped_csv.table
27
-
28
- # Reusable Arel predicates
29
- csv_employee_join = csv[:email].eq(employees[:email])
30
- attributes_unchanged = employees[:name].eq(csv[:name])
31
- .and(
32
- employees[:phone_number].eq(csv[:phone_number])
33
- .or(csv[:phone_number].eq(""))
34
- # ☝🏽 Comparing to an empty string because we cast
35
- # phone number to an empty string. The reason is
36
- # comparing NULL = NULL is always false in SQL
37
- )
38
- .and(
39
- employees[:assistants].eq(csv[:assistants])
40
- .or(csv[:assistants].eq("{}"))
41
- )
42
-
43
- # Creates joins between the temp table and the csv table and
44
- # vice versa
45
- # Cribbed from https://gist.github.com/mildmojo/3724189
46
- csv_to_employees = csv.join(employees, Arel::Nodes::OuterJoin)
47
- .on(csv_employee_join)
48
- .join_sources
49
- employees_to_csv = employees.join(csv, Arel::Nodes::OuterJoin)
50
- .on(csv_employee_join)
51
- .join_sources
52
-
53
- # Representation of the joined csv-employees, with csv on the left
54
- csv_records = deduped_csv.joins(csv_to_employees).order('row_number asc')
55
- # Representation of the joined employees-csv, with employees on the
56
- # left
57
- employee_records = temp_current_directory.joins(employees_to_csv)
58
-
59
- connection.execute(SQL.cleanup_sql(csv.name))
60
-
61
- csv_fields = %I[name email phone_number assistants extra].map { |c| csv[c] }
62
- emp_fields = %I[name email phone_number assistants].map { |c| employees[c] }
63
-
64
- # new records are records in the new directory that don't exist in
65
- # the current directory
66
- new_records = csv_records
67
- .select("'insert'::varchar operation, row_number")
68
- .select(csv_fields)
69
- .where({ employees.name => { email: nil } })
70
- # deleted records are records in the current directory that don't
71
- # exist in the new directory
72
- deleted_records = employee_records
73
- .select("'delete'::varchar operation, row_number")
74
- .select(emp_fields)
75
- .select("null extra")
76
- .where({ csv.name => { email: nil } })
77
- # changed records are records that have difference in name, phone
78
- # number and/or assistants
79
- changed_records = csv_records
80
- .select("'update'::varchar operation, row_number")
81
- .select(csv_fields)
82
- .where.not(attributes_unchanged)
83
- # unchanged records are records that are exactly the same in both
84
- # directories (without considering the extra field)
85
- unchanged_records = csv_records
86
- .select("'noop'::varchar operation, row_number")
87
- .select(csv_fields)
88
- .where(attributes_unchanged)
89
-
90
- # create temp table for holding operations
91
- temp_table(new_records.to_sql) do |operations|
92
- insert_into_operations(operations, deleted_records.to_sql)
93
- insert_into_operations(operations, changed_records.to_sql)
94
- if options[:skip_noop] != true
95
- insert_into_operations(operations, unchanged_records.to_sql)
96
- end
97
-
98
- operations.order(:row_number).each do |operation|
99
- add_operation(operation)
100
- end
101
- end
102
- end
103
- end
104
-
105
- prioritize_assistants(operations)
106
- end
107
-
108
- private
109
-
110
- def current_directory_temp_table(&block)
111
- # outer temp table is required so that the projection does not run into
112
- # ambiguous column issues
113
- temp_table(current_directory) do |rel|
114
- temp_table(rel.select(SQL.current_directory_projection), &block)
115
- end
116
- end
117
-
118
- def new_directory_temp_table(source, &block)
119
- convert_to_relation(source) do |relation|
120
- relation = relation.select("*")
121
- .from(Arel.sql("(#{SQL.latest_unique_sql(relation.table.name)}) as t"))
122
- .order("row_number")
123
-
124
- temp_table(relation, &block)
125
- end
126
- end
127
-
128
- def convert_to_relation(source, &block)
129
- return block.call(source) if source.is_a?(ActiveRecord::Relation)
130
-
131
- temp_table do |relation|
132
- table_name = relation.table.name
133
- connection.change_table(table_name) do |t|
134
- t.column :name, :string
135
- t.column :email, :string
136
- t.column :phone_number, :string
137
- t.column :assistants, :string
138
- t.column :extra, :string
139
- end
140
- insert_into_csv_table(table_name, source)
141
- block.call(relation)
142
- end
143
- end
144
-
145
- # TODO chunk this into batch sizes
146
- def insert_into_csv_table(table_name, records)
147
- return if records.empty?
148
-
149
- values = records.map do |row|
150
- (name, email, phone_number, assistants, extra) = row
151
- columns = [
152
- connection.quote(name),
153
- connection.quote(email),
154
- connection.quote(phone_number),
155
- connection.quote(assistants),
156
- connection.quote(extra)
157
- ]
158
- "(#{columns.join(", ")})"
159
- end
160
-
161
- connection.execute(SQL.insert_into_temp_csv_table(table_name, values))
162
- end
163
-
164
- def temp_table(source = nil, &block)
165
- return source.temporary_table(&block) if source.is_a?(ActiveRecord::Relation)
166
-
167
- create_temp_table(source) do |name|
168
- klass = current_directory.klass
169
- dec = ActiveRecordPgStuff::Relation::TemporaryTable::Decorator.new(klass, name)
170
- if activerecord52?
171
- rel = ActiveRecord::Relation.new(dec)
172
- else
173
- rel = ActiveRecord::Relation.new(
174
- dec,
175
- dec.arel_table,
176
- dec.predicate_builder,
177
- {}
178
- )
179
- end
180
- rel.readonly!
181
- block.call(rel)
182
- end
183
- end
184
-
185
- def create_temp_table(initial_sql=nil)
186
- table_name = "temporary_#{(Time.now.to_f * 1000).to_i}"
187
-
188
- if initial_sql
189
- connection.with_temporary_table(table_name, initial_sql) do |name|
190
- yield name
191
- end
192
- else
193
- connection.transaction do
194
- begin
195
- connection.create_table(table_name, temporary: true)
196
- yield table_name
197
- ensure
198
- connection.drop_table(table_name)
199
- end
200
- end
201
- end
202
- end
203
-
204
- def insert_into_operations(relation, sql)
205
- connection.execute(SQL.insert_into_operations(relation.table.name, sql))
206
- end
207
-
208
- def add_operation(operation)
209
- op = [
210
- operation.operation.to_sym,
211
- operation.name,
212
- operation.email,
213
- operation.phone_number.presence,
214
- serialize_pg_array(operation.assistants)
215
- ]
216
- op << operation.extra unless operation[:extra].nil?
217
- operations << op
218
- end
219
-
220
- def serialize_pg_array(pg_array)
221
- return if pg_array.nil?
222
- pg_array = pg_array[1...-1] # remove the curly braces
223
- pg_array.presence
224
- end
225
-
226
- def prioritize_assistants(operations)
227
- prioritized_operations = []
228
- operations.each do |operation|
229
- process_operation(operation, operations, prioritized_operations, Set.new)
230
- end
231
- prioritized_operations
232
- end
233
-
234
- def process_operation(operation, operations, prioritized_operations, tail)
235
- (_, _, email, _, assistants) = operation
236
- return if prioritized_operations.find { |_, _, e| e == email }
237
-
238
- (assistants || '').split(',').each do |assistant_email|
239
- next if tail.include?(assistant_email)
240
- assistant_operation = operations.find { |_, _, email| email == assistant_email }
241
- process_operation(
242
- assistant_operation,
243
- operations,
244
- prioritized_operations,
245
- tail.add(email)
246
- )
247
- end
248
-
249
- prioritized_operations << operation
250
- end
251
-
252
- def connection
253
- current_directory.connection
254
- end
255
-
256
- def activerecord52?
257
- ActiveRecord.gem_version >= Gem::Version.new("5.2.x")
258
- end
259
- end
260
-
261
- module SQL
262
- # Cleanup some bad records
263
- # 1. Assistant email is set on an employee, but no assistant record
264
- # in csv. Remove the assistant email.
265
- # 2. Assistant email is employee's own email. Remove the assistant
266
- # email.
267
- # TODO move this into the temp table creation above
268
- # https://www.db-fiddle.com/f/gxg6qABP1LygYvvgRvyH2N/1
269
- def self.cleanup_sql(table_name)
270
- <<-SQL
271
- with
272
- unnested_assistants as
273
- (
274
- select
275
- email,
276
- name,
277
- unnest(assistants) assistant
278
- from #{table_name}
279
- ),
280
- own_email_removed as
281
- (
282
- select
283
- a.*
284
- from unnested_assistants a
285
- where a.email != a.assistant
286
- ),
287
- missing_assistants_removed as
288
- (
289
- select
290
- a.*
291
- from own_email_removed a
292
- left outer join #{table_name} b on a.assistant = b.email
293
- where
294
- (a.assistant is null and b.email is null)
295
- or (a.assistant is not null and b.email is not null)
296
- ),
297
- only_valid_assistants as
298
- (
299
- select
300
- a.email,
301
- a.name,
302
- array_remove(
303
- array_agg(b.assistant),
304
- null
305
- ) assistants
306
- from #{table_name} a
307
- left outer join missing_assistants_removed b
308
- using (email)
309
- group by
310
- a.email, a.name
311
- )
312
- update #{table_name}
313
- set assistants = only_valid_assistants.assistants
314
- from only_valid_assistants
315
- where #{table_name}.email = only_valid_assistants.email
316
- SQL
317
- end
318
-
319
- # Remove dupe email rows, keeping the last one
320
- def self.latest_unique_sql(table_name)
321
- <<-SQL
322
- SELECT
323
- DISTINCT ON (lower(email)) name,
324
- lower(email) email,
325
- coalesce(phone_number, '') phone_number,
326
- array_remove(
327
- regexp_split_to_array(
328
- coalesce(assistants, ''),
329
- '\s*,\s*'
330
- )::varchar[],
331
- ''
332
- ) assistants,
333
- extra,
334
- ROW_NUMBER () OVER ()
335
- FROM
336
- #{table_name}
337
- ORDER BY
338
- lower(email),
339
- row_number desc
340
- SQL
341
- end
342
-
343
- def self.current_directory_projection
344
- <<-SQL
345
- name,
346
- lower(email) email,
347
- coalesce(phone_number, '') phone_number,
348
- array_remove(
349
- regexp_split_to_array(
350
- coalesce(assistants, ''),
351
- '\s*,\s*'
352
- )::varchar[],
353
- ''
354
- ) assistants
355
- SQL
356
- end
357
-
358
- def self.insert_into_operations(table_name, sql)
359
- <<-SQL
360
- insert into #{table_name}(
361
- operation,
362
- row_number,
363
- name,
364
- email,
365
- phone_number,
366
- assistants,
367
- extra
368
- ) #{sql}
369
- SQL
370
- end
371
-
372
- def self.insert_into_temp_csv_table(table_name, values)
373
- <<-SQL
374
- insert into #{table_name}(
375
- name,
376
- email,
377
- phone_number,
378
- assistants,
379
- extra
380
- ) values #{values.join(", ")}
381
- SQL
382
- end
383
- end
384
- end
385
- end