directory_diff 0.4.14 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: c5895d2d77767028f8894d7b9148b487fc0f7a3d
4
- data.tar.gz: ce8f0d7b29b8028735c46959f88a39e628a786b6
2
+ SHA256:
3
+ metadata.gz: d8ee4b73f492c39e557743dfb1fe4c8b84cd10dc4807f11d4606dad2cecf3327
4
+ data.tar.gz: 918743f81b8d6bc18b5a7ba5d8722efc67a7af35fa718e7095b63b5ed6c91961
5
5
  SHA512:
6
- metadata.gz: 71c87ae049b8362daa88922991a8cec444713baa09f1de7a8ffd1a29aec2464e18828bde4d965871a457d95fba0e4d2845333f9c80f45d862331fd60032edf78
7
- data.tar.gz: d22c8496602c2ba38f8e48cda9738b679093d81fcb22fcbdadde91b9a01c689d08c0a6c597f2dab523fb1686edf1f05aceb2657728c7c0f26d8f171aa20a3ccf
6
+ metadata.gz: 4cf77d75a44f310640a5ce99e4b569a5a7e82e046647b6c70fabf7407c27e460dea47003d376935132bbabe2ca8b088dea9052c29c39f90b02e6dcd145d88714
7
+ data.tar.gz: 8590c90ab336c515bb9ab7633caf380f1ca9ed9dd7bf8ed30e8a72e85cc1c8bd857cd8ec6a9b1f2823129e5663242fccb7692e0171bb2abd29fcd30a58856abb
@@ -21,7 +21,6 @@ Gem::Specification.new do |spec|
21
21
 
22
22
  spec.add_dependency "activerecord", ">= 5.1"
23
23
  spec.add_dependency "pg", "~> 1.1.3"
24
- spec.add_dependency "activerecord_pg_stuff", "~> 0.2.1"
25
24
 
26
25
  spec.add_development_dependency "bundler", "~> 1.11"
27
26
  spec.add_development_dependency "rake", "~> 10.0"
@@ -1,5 +1,4 @@
1
1
  require_relative "transformer/in_memory"
2
- require_relative "transformer/temp_table"
3
2
 
4
3
  module DirectoryDiff
5
4
  class Transform
@@ -10,21 +9,7 @@ module DirectoryDiff
10
9
  end
11
10
 
12
11
  def into(new_directory, options = {})
13
- processor_class = processor_for(options[:processor])
14
- processor_class.new(current_directory).into(new_directory, options)
15
- end
16
-
17
- private
18
-
19
- def processor_for(processor)
20
- case processor
21
- when nil, :in_memory
22
- Transformer::InMemory
23
- when :temp_table
24
- Transformer::TempTable
25
- else
26
- raise ArgumentError, "unsupported processor #{processor.inspect}"
27
- end
12
+ Transformer::InMemory.new(current_directory).into(new_directory, options)
28
13
  end
29
14
  end
30
15
  end
@@ -1,6 +1,6 @@
1
1
  module DirectoryDiff
2
2
  module Transformer
3
- class InMemory
3
+ class InMemory
4
4
  attr_reader :current_directory, :new_directory
5
5
  attr_reader :transforms, :transforms_index
6
6
  attr_reader :options
@@ -59,7 +59,7 @@ module DirectoryDiff
59
59
  # the new record appears to be the same as the current record
60
60
  if assistant_emails.empty?
61
61
  original_assistant_value = nil
62
- new_employee[3] = old_employee&.fetch(3)
62
+ new_employee[3] = old_employee&.fetch(3)
63
63
  else
64
64
  original_assistant_value = new_employee[3]
65
65
  end
@@ -77,7 +77,7 @@ module DirectoryDiff
77
77
 
78
78
  if old_employee.nil?
79
79
  add_transform(:insert, new_employee)
80
- elsif new_employee[0, 4] == old_employee[0, 4]
80
+ elsif new_employee[0, 5] == old_employee[0, 5]
81
81
  # restore assistant value after cleanup like missing assistants and own email
82
82
  new_employee[3] = original_assistant_value
83
83
  # restore phone number value
@@ -1,3 +1,3 @@
1
1
  module DirectoryDiff
2
- VERSION = "0.4.14"
2
+ VERSION = "0.5.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: directory_diff
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.14
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kamal Mahyuddin
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2018-10-16 00:00:00.000000000 Z
11
+ date: 2023-02-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activerecord
@@ -38,20 +38,6 @@ dependencies:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
40
  version: 1.1.3
41
- - !ruby/object:Gem::Dependency
42
- name: activerecord_pg_stuff
43
- requirement: !ruby/object:Gem::Requirement
44
- requirements:
45
- - - "~>"
46
- - !ruby/object:Gem::Version
47
- version: 0.2.1
48
- type: :runtime
49
- prerelease: false
50
- version_requirements: !ruby/object:Gem::Requirement
51
- requirements:
52
- - - "~>"
53
- - !ruby/object:Gem::Version
54
- version: 0.2.1
55
41
  - !ruby/object:Gem::Dependency
56
42
  name: bundler
57
43
  requirement: !ruby/object:Gem::Requirement
@@ -149,13 +135,12 @@ files:
149
135
  - lib/directory_diff.rb
150
136
  - lib/directory_diff/transform.rb
151
137
  - lib/directory_diff/transformer/in_memory.rb
152
- - lib/directory_diff/transformer/temp_table.rb
153
138
  - lib/directory_diff/version.rb
154
139
  homepage: https://github.com/envoy/directory_diff
155
140
  licenses:
156
141
  - MIT
157
142
  metadata: {}
158
- post_install_message:
143
+ post_install_message:
159
144
  rdoc_options: []
160
145
  require_paths:
161
146
  - lib
@@ -170,9 +155,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
170
155
  - !ruby/object:Gem::Version
171
156
  version: '0'
172
157
  requirements: []
173
- rubyforge_project:
174
- rubygems_version: 2.5.2
175
- signing_key:
158
+ rubygems_version: 3.3.12
159
+ signing_key:
176
160
  specification_version: 4
177
161
  summary: Envoy employee directory diffing.
178
162
  test_files: []
@@ -1,385 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "activerecord_pg_stuff"
4
-
5
- module DirectoryDiff
6
- module Transformer
7
- class TempTable
8
- attr_reader :current_directory, :operations
9
-
10
- # @params current_directory a relation that filters out only the records
11
- # that represent the current directory. This is
12
- # mostly likely an Employee relation. This
13
- # relation will be pulled into a temporary table.
14
- def initialize(current_directory)
15
- @current_directory = current_directory
16
- @operations = []
17
- end
18
-
19
- # @param new_directory a table containing only the new records to compare
20
- # against, most likely a temp table.
21
- def into(new_directory, options = {})
22
- current_directory_temp_table do |temp_current_directory|
23
- new_directory_temp_table(new_directory) do |deduped_csv|
24
- # Get Arel tables for referencing fields, table names
25
- employees = temp_current_directory.table
26
- csv = deduped_csv.table
27
-
28
- # Reusable Arel predicates
29
- csv_employee_join = csv[:email].eq(employees[:email])
30
- attributes_unchanged = employees[:name].eq(csv[:name])
31
- .and(
32
- employees[:phone_number].eq(csv[:phone_number])
33
- .or(csv[:phone_number].eq(""))
34
- # ☝🏽 Comparing to an empty string because we cast
35
- # phone number to an empty string. The reason is
36
- # comparing NULL = NULL is always false in SQL
37
- )
38
- .and(
39
- employees[:assistants].eq(csv[:assistants])
40
- .or(csv[:assistants].eq("{}"))
41
- )
42
-
43
- # Creates joins between the temp table and the csv table and
44
- # vice versa
45
- # Cribbed from https://gist.github.com/mildmojo/3724189
46
- csv_to_employees = csv.join(employees, Arel::Nodes::OuterJoin)
47
- .on(csv_employee_join)
48
- .join_sources
49
- employees_to_csv = employees.join(csv, Arel::Nodes::OuterJoin)
50
- .on(csv_employee_join)
51
- .join_sources
52
-
53
- # Representation of the joined csv-employees, with csv on the left
54
- csv_records = deduped_csv.joins(csv_to_employees).order('row_number asc')
55
- # Representation of the joined employees-csv, with employees on the
56
- # left
57
- employee_records = temp_current_directory.joins(employees_to_csv)
58
-
59
- connection.execute(SQL.cleanup_sql(csv.name))
60
-
61
- csv_fields = %I[name email phone_number assistants extra].map { |c| csv[c] }
62
- emp_fields = %I[name email phone_number assistants].map { |c| employees[c] }
63
-
64
- # new records are records in the new directory that don't exist in
65
- # the current directory
66
- new_records = csv_records
67
- .select("'insert'::varchar operation, row_number")
68
- .select(csv_fields)
69
- .where({ employees.name => { email: nil } })
70
- # deleted records are records in the current directory that don't
71
- # exist in the new directory
72
- deleted_records = employee_records
73
- .select("'delete'::varchar operation, row_number")
74
- .select(emp_fields)
75
- .select("null extra")
76
- .where({ csv.name => { email: nil } })
77
- # changed records are records that have difference in name, phone
78
- # number and/or assistants
79
- changed_records = csv_records
80
- .select("'update'::varchar operation, row_number")
81
- .select(csv_fields)
82
- .where.not(attributes_unchanged)
83
- # unchanged records are records that are exactly the same in both
84
- # directories (without considering the extra field)
85
- unchanged_records = csv_records
86
- .select("'noop'::varchar operation, row_number")
87
- .select(csv_fields)
88
- .where(attributes_unchanged)
89
-
90
- # create temp table for holding operations
91
- temp_table(new_records.to_sql) do |operations|
92
- insert_into_operations(operations, deleted_records.to_sql)
93
- insert_into_operations(operations, changed_records.to_sql)
94
- if options[:skip_noop] != true
95
- insert_into_operations(operations, unchanged_records.to_sql)
96
- end
97
-
98
- operations.order(:row_number).each do |operation|
99
- add_operation(operation)
100
- end
101
- end
102
- end
103
- end
104
-
105
- prioritize_assistants(operations)
106
- end
107
-
108
- private
109
-
110
- def current_directory_temp_table(&block)
111
- # outer temp table is required so that the projection does not run into
112
- # ambiguous column issues
113
- temp_table(current_directory) do |rel|
114
- temp_table(rel.select(SQL.current_directory_projection), &block)
115
- end
116
- end
117
-
118
- def new_directory_temp_table(source, &block)
119
- convert_to_relation(source) do |relation|
120
- relation = relation.select("*")
121
- .from(Arel.sql("(#{SQL.latest_unique_sql(relation.table.name)}) as t"))
122
- .order("row_number")
123
-
124
- temp_table(relation, &block)
125
- end
126
- end
127
-
128
- def convert_to_relation(source, &block)
129
- return block.call(source) if source.is_a?(ActiveRecord::Relation)
130
-
131
- temp_table do |relation|
132
- table_name = relation.table.name
133
- connection.change_table(table_name) do |t|
134
- t.column :name, :string
135
- t.column :email, :string
136
- t.column :phone_number, :string
137
- t.column :assistants, :string
138
- t.column :extra, :string
139
- end
140
- insert_into_csv_table(table_name, source)
141
- block.call(relation)
142
- end
143
- end
144
-
145
- # TODO chunk this into batch sizes
146
- def insert_into_csv_table(table_name, records)
147
- return if records.empty?
148
-
149
- values = records.map do |row|
150
- (name, email, phone_number, assistants, extra) = row
151
- columns = [
152
- connection.quote(name),
153
- connection.quote(email),
154
- connection.quote(phone_number),
155
- connection.quote(assistants),
156
- connection.quote(extra)
157
- ]
158
- "(#{columns.join(", ")})"
159
- end
160
-
161
- connection.execute(SQL.insert_into_temp_csv_table(table_name, values))
162
- end
163
-
164
- def temp_table(source = nil, &block)
165
- return source.temporary_table(&block) if source.is_a?(ActiveRecord::Relation)
166
-
167
- create_temp_table(source) do |name|
168
- klass = current_directory.klass
169
- dec = ActiveRecordPgStuff::Relation::TemporaryTable::Decorator.new(klass, name)
170
- if activerecord52?
171
- rel = ActiveRecord::Relation.new(dec)
172
- else
173
- rel = ActiveRecord::Relation.new(
174
- dec,
175
- dec.arel_table,
176
- dec.predicate_builder,
177
- {}
178
- )
179
- end
180
- rel.readonly!
181
- block.call(rel)
182
- end
183
- end
184
-
185
- def create_temp_table(initial_sql=nil)
186
- table_name = "temporary_#{(Time.now.to_f * 1000).to_i}"
187
-
188
- if initial_sql
189
- connection.with_temporary_table(table_name, initial_sql) do |name|
190
- yield name
191
- end
192
- else
193
- connection.transaction do
194
- begin
195
- connection.create_table(table_name, temporary: true)
196
- yield table_name
197
- ensure
198
- connection.drop_table(table_name)
199
- end
200
- end
201
- end
202
- end
203
-
204
- def insert_into_operations(relation, sql)
205
- connection.execute(SQL.insert_into_operations(relation.table.name, sql))
206
- end
207
-
208
- def add_operation(operation)
209
- op = [
210
- operation.operation.to_sym,
211
- operation.name,
212
- operation.email,
213
- operation.phone_number.presence,
214
- serialize_pg_array(operation.assistants)
215
- ]
216
- op << operation.extra unless operation[:extra].nil?
217
- operations << op
218
- end
219
-
220
- def serialize_pg_array(pg_array)
221
- return if pg_array.nil?
222
- pg_array = pg_array[1...-1] # remove the curly braces
223
- pg_array.presence
224
- end
225
-
226
- def prioritize_assistants(operations)
227
- prioritized_operations = []
228
- operations.each do |operation|
229
- process_operation(operation, operations, prioritized_operations, Set.new)
230
- end
231
- prioritized_operations
232
- end
233
-
234
- def process_operation(operation, operations, prioritized_operations, tail)
235
- (_, _, email, _, assistants) = operation
236
- return if prioritized_operations.find { |_, _, e| e == email }
237
-
238
- (assistants || '').split(',').each do |assistant_email|
239
- next if tail.include?(assistant_email)
240
- assistant_operation = operations.find { |_, _, email| email == assistant_email }
241
- process_operation(
242
- assistant_operation,
243
- operations,
244
- prioritized_operations,
245
- tail.add(email)
246
- )
247
- end
248
-
249
- prioritized_operations << operation
250
- end
251
-
252
- def connection
253
- current_directory.connection
254
- end
255
-
256
- def activerecord52?
257
- ActiveRecord.gem_version >= Gem::Version.new("5.2.x")
258
- end
259
- end
260
-
261
- module SQL
262
- # Cleanup some bad records
263
- # 1. Assistant email is set on an employee, but no assistant record
264
- # in csv. Remove the assistant email.
265
- # 2. Assistant email is employee's own email. Remove the assistant
266
- # email.
267
- # TODO move this into the temp table creation above
268
- # https://www.db-fiddle.com/f/gxg6qABP1LygYvvgRvyH2N/1
269
- def self.cleanup_sql(table_name)
270
- <<-SQL
271
- with
272
- unnested_assistants as
273
- (
274
- select
275
- email,
276
- name,
277
- unnest(assistants) assistant
278
- from #{table_name}
279
- ),
280
- own_email_removed as
281
- (
282
- select
283
- a.*
284
- from unnested_assistants a
285
- where a.email != a.assistant
286
- ),
287
- missing_assistants_removed as
288
- (
289
- select
290
- a.*
291
- from own_email_removed a
292
- left outer join #{table_name} b on a.assistant = b.email
293
- where
294
- (a.assistant is null and b.email is null)
295
- or (a.assistant is not null and b.email is not null)
296
- ),
297
- only_valid_assistants as
298
- (
299
- select
300
- a.email,
301
- a.name,
302
- array_remove(
303
- array_agg(b.assistant),
304
- null
305
- ) assistants
306
- from #{table_name} a
307
- left outer join missing_assistants_removed b
308
- using (email)
309
- group by
310
- a.email, a.name
311
- )
312
- update #{table_name}
313
- set assistants = only_valid_assistants.assistants
314
- from only_valid_assistants
315
- where #{table_name}.email = only_valid_assistants.email
316
- SQL
317
- end
318
-
319
- # Remove dupe email rows, keeping the last one
320
- def self.latest_unique_sql(table_name)
321
- <<-SQL
322
- SELECT
323
- DISTINCT ON (lower(email)) name,
324
- lower(email) email,
325
- coalesce(phone_number, '') phone_number,
326
- array_remove(
327
- regexp_split_to_array(
328
- coalesce(assistants, ''),
329
- '\s*,\s*'
330
- )::varchar[],
331
- ''
332
- ) assistants,
333
- extra,
334
- ROW_NUMBER () OVER ()
335
- FROM
336
- #{table_name}
337
- ORDER BY
338
- lower(email),
339
- row_number desc
340
- SQL
341
- end
342
-
343
- def self.current_directory_projection
344
- <<-SQL
345
- name,
346
- lower(email) email,
347
- coalesce(phone_number, '') phone_number,
348
- array_remove(
349
- regexp_split_to_array(
350
- coalesce(assistants, ''),
351
- '\s*,\s*'
352
- )::varchar[],
353
- ''
354
- ) assistants
355
- SQL
356
- end
357
-
358
- def self.insert_into_operations(table_name, sql)
359
- <<-SQL
360
- insert into #{table_name}(
361
- operation,
362
- row_number,
363
- name,
364
- email,
365
- phone_number,
366
- assistants,
367
- extra
368
- ) #{sql}
369
- SQL
370
- end
371
-
372
- def self.insert_into_temp_csv_table(table_name, values)
373
- <<-SQL
374
- insert into #{table_name}(
375
- name,
376
- email,
377
- phone_number,
378
- assistants,
379
- extra
380
- ) values #{values.join(", ")}
381
- SQL
382
- end
383
- end
384
- end
385
- end