directory_diff 0.4.11 → 0.4.15

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 15e07baf8cd9794e7c7725ff856c366829f17de7
4
- data.tar.gz: 97966de2c4dbd7e6361fa6669fef1e57d8cbf68e
2
+ SHA256:
3
+ metadata.gz: 2e2bf8a6d21cbf21189c07cbba63f6ba6011f722f432b1b9804e5b1abbb3683b
4
+ data.tar.gz: c2081478703a7d3843b2d8faf9db9487c6848c169350c60f6eea0168ff77720e
5
5
  SHA512:
6
- metadata.gz: 7dad0393c22d490b7b9d777b1217183dc144da9d8feb36d4575e80e879a97bdaaf2aab77a5f9e0d0b98b06dffc0d3cd23fcbe661d1fb540c37d4710e6c151442
7
- data.tar.gz: 12f8cdb06594281b48664bb96ace66c7a671ff69493dae688a283e1c461fd3c9b125842047bb61fd98acac0cc2f7585a0783527677d3884efd016355e86f43b6
6
+ metadata.gz: 284e7d9653a30f7029a402a352091fffc8aa82ac1b0dec0130b90f6c572c2f8936ce2ab46b1bdbbf190737a971935174c1c7c0bc26ef165bb955dc9a79d453af
7
+ data.tar.gz: 9296b7cac1a1100dbeef7d19bbee14cc8d22c6f5c250ecbb959b14a195157985bc4bb49c6269a27139bf206647c6b19bd2ac089fdd613bc5a45961a46c5f2a45
@@ -19,13 +19,12 @@ Gem::Specification.new do |spec|
19
19
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
20
20
  spec.require_paths = ["lib"]
21
21
 
22
- spec.add_dependency "activerecord", ">= 5.1.4"
22
+ spec.add_dependency "activerecord", ">= 5.1"
23
23
  spec.add_dependency "pg", "~> 1.1.3"
24
- spec.add_dependency "temping", "~> 3.10.0"
25
- spec.add_dependency "activerecord_pg_stuff", "~> 0.2.1"
26
24
 
27
- spec.add_development_dependency "bundler", "~> 1.11"
25
+ spec.add_development_dependency "bundler", "~> 2"
28
26
  spec.add_development_dependency "rake", "~> 10.0"
29
27
  spec.add_development_dependency "rspec", "~> 3.0"
28
+ spec.add_development_dependency "temping", "~> 3.10.0"
30
29
  spec.add_development_dependency "appraisal"
31
30
  end
@@ -1,5 +1,4 @@
1
1
  require_relative "transformer/in_memory"
2
- require_relative "transformer/temp_table"
3
2
 
4
3
  module DirectoryDiff
5
4
  class Transform
@@ -10,21 +9,7 @@ module DirectoryDiff
10
9
  end
11
10
 
12
11
  def into(new_directory, options = {})
13
- processor_class = processor_for(options[:processor])
14
- processor_class.new(current_directory).into(new_directory, options)
15
- end
16
-
17
- private
18
-
19
- def processor_for(processor)
20
- case processor
21
- when nil, :in_memory
22
- Transformer::InMemory
23
- when :temp_table
24
- Transformer::TempTable
25
- else
26
- raise ArgumentError, "unsupported processor #{processor.inspect}"
27
- end
12
+ Transformer::InMemory.new(current_directory).into(new_directory, options)
28
13
  end
29
14
  end
30
15
  end
@@ -64,11 +64,24 @@ module DirectoryDiff
64
64
  original_assistant_value = new_employee[3]
65
65
  end
66
66
 
67
+ # phone_number may be nil. we only use the csv to *set* phone numbers if
68
+ # it has a value. if it was nil, we backfill from current employee so that
69
+ # the new record apperas to the be same as the current record
70
+ phone_number = new_employee[2].presence
71
+ if phone_number.nil?
72
+ original_phone_number_value = nil
73
+ new_employee[2] = old_employee&.fetch(2)
74
+ else
75
+ original_phone_number_value = new_employee[2]
76
+ end
77
+
67
78
  if old_employee.nil?
68
79
  add_transform(:insert, new_employee)
69
80
  elsif new_employee[0, 4] == old_employee[0, 4]
70
81
  # restore assistant value after cleanup like missing assistants and own email
71
82
  new_employee[3] = original_assistant_value
83
+ # restore phone number value
84
+ new_employee[2] = original_phone_number_value
72
85
  add_transform(:noop, new_employee) unless options[:skip_noop]
73
86
  else
74
87
  add_transform(:update, new_employee)
@@ -1,3 +1,3 @@
1
1
  module DirectoryDiff
2
- VERSION = "0.4.11"
2
+ VERSION = "0.4.15"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: directory_diff
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.11
4
+ version: 0.4.15
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kamal Mahyuddin
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2018-10-11 00:00:00.000000000 Z
11
+ date: 2021-10-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activerecord
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: 5.1.4
19
+ version: '5.1'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
- version: 5.1.4
26
+ version: '5.1'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: pg
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -39,75 +39,61 @@ dependencies:
39
39
  - !ruby/object:Gem::Version
40
40
  version: 1.1.3
41
41
  - !ruby/object:Gem::Dependency
42
- name: temping
43
- requirement: !ruby/object:Gem::Requirement
44
- requirements:
45
- - - "~>"
46
- - !ruby/object:Gem::Version
47
- version: 3.10.0
48
- type: :runtime
49
- prerelease: false
50
- version_requirements: !ruby/object:Gem::Requirement
51
- requirements:
52
- - - "~>"
53
- - !ruby/object:Gem::Version
54
- version: 3.10.0
55
- - !ruby/object:Gem::Dependency
56
- name: activerecord_pg_stuff
42
+ name: bundler
57
43
  requirement: !ruby/object:Gem::Requirement
58
44
  requirements:
59
45
  - - "~>"
60
46
  - !ruby/object:Gem::Version
61
- version: 0.2.1
62
- type: :runtime
47
+ version: '2'
48
+ type: :development
63
49
  prerelease: false
64
50
  version_requirements: !ruby/object:Gem::Requirement
65
51
  requirements:
66
52
  - - "~>"
67
53
  - !ruby/object:Gem::Version
68
- version: 0.2.1
54
+ version: '2'
69
55
  - !ruby/object:Gem::Dependency
70
- name: bundler
56
+ name: rake
71
57
  requirement: !ruby/object:Gem::Requirement
72
58
  requirements:
73
59
  - - "~>"
74
60
  - !ruby/object:Gem::Version
75
- version: '1.11'
61
+ version: '10.0'
76
62
  type: :development
77
63
  prerelease: false
78
64
  version_requirements: !ruby/object:Gem::Requirement
79
65
  requirements:
80
66
  - - "~>"
81
67
  - !ruby/object:Gem::Version
82
- version: '1.11'
68
+ version: '10.0'
83
69
  - !ruby/object:Gem::Dependency
84
- name: rake
70
+ name: rspec
85
71
  requirement: !ruby/object:Gem::Requirement
86
72
  requirements:
87
73
  - - "~>"
88
74
  - !ruby/object:Gem::Version
89
- version: '10.0'
75
+ version: '3.0'
90
76
  type: :development
91
77
  prerelease: false
92
78
  version_requirements: !ruby/object:Gem::Requirement
93
79
  requirements:
94
80
  - - "~>"
95
81
  - !ruby/object:Gem::Version
96
- version: '10.0'
82
+ version: '3.0'
97
83
  - !ruby/object:Gem::Dependency
98
- name: rspec
84
+ name: temping
99
85
  requirement: !ruby/object:Gem::Requirement
100
86
  requirements:
101
87
  - - "~>"
102
88
  - !ruby/object:Gem::Version
103
- version: '3.0'
89
+ version: 3.10.0
104
90
  type: :development
105
91
  prerelease: false
106
92
  version_requirements: !ruby/object:Gem::Requirement
107
93
  requirements:
108
94
  - - "~>"
109
95
  - !ruby/object:Gem::Version
110
- version: '3.0'
96
+ version: 3.10.0
111
97
  - !ruby/object:Gem::Dependency
112
98
  name: appraisal
113
99
  requirement: !ruby/object:Gem::Requirement
@@ -149,7 +135,6 @@ files:
149
135
  - lib/directory_diff.rb
150
136
  - lib/directory_diff/transform.rb
151
137
  - lib/directory_diff/transformer/in_memory.rb
152
- - lib/directory_diff/transformer/temp_table.rb
153
138
  - lib/directory_diff/version.rb
154
139
  homepage: https://github.com/envoy/directory_diff
155
140
  licenses:
@@ -171,7 +156,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
171
156
  version: '0'
172
157
  requirements: []
173
158
  rubyforge_project:
174
- rubygems_version: 2.5.2
159
+ rubygems_version: 2.7.6
175
160
  signing_key:
176
161
  specification_version: 4
177
162
  summary: Envoy employee directory diffing.
@@ -1,379 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "activerecord_pg_stuff"
4
-
5
- Arel::Predications.module_eval do
6
- def contains(other)
7
- Arel::Nodes::InfixOperation.new(:"@>", self, other)
8
- end
9
- end
10
-
11
- module DirectoryDiff
12
- module Transformer
13
- class TempTable
14
- attr_reader :current_directory, :operations
15
-
16
- # @params current_directory a relation that filters out only the records
17
- # that represent the current directory. This is
18
- # mostly likely an Employee relation. This
19
- # relation will be pulled into a temporary table.
20
- def initialize(current_directory)
21
- @current_directory = current_directory
22
- @operations = []
23
- end
24
-
25
- # @param new_directory a table containing only the new records to compare
26
- # against, most likely a temp table.
27
- def into(new_directory, options = {})
28
- current_directory_temp_table do |temp_current_directory|
29
- new_directory_temp_table(new_directory) do |deduped_csv|
30
- # Get Arel tables for referencing fields, table names
31
- employees = temp_current_directory.table
32
- csv = deduped_csv.table
33
-
34
- # Reusable Arel predicates
35
- csv_employee_join = csv[:email].eq(employees[:email])
36
- attributes_unchanged = employees[:name].eq(csv[:name])
37
- .and(
38
- employees[:phone_number].eq(csv[:phone_number])
39
- )
40
- .and(
41
- employees[:assistants].contains(csv[:assistants])
42
- )
43
-
44
- # Creates joins between the temp table and the csv table and
45
- # vice versa
46
- # Cribbed from https://gist.github.com/mildmojo/3724189
47
- csv_to_employees = csv.join(employees, Arel::Nodes::OuterJoin)
48
- .on(csv_employee_join)
49
- .join_sources
50
- employees_to_csv = employees.join(csv, Arel::Nodes::OuterJoin)
51
- .on(csv_employee_join)
52
- .join_sources
53
-
54
- # Representation of the joined csv-employees, with csv on the left
55
- csv_records = deduped_csv.joins(csv_to_employees).order('row_number asc')
56
- # Representation of the joined employees-csv, with employees on the
57
- # left
58
- employee_records = temp_current_directory.joins(employees_to_csv)
59
-
60
- connection.execute(SQL.cleanup_sql(csv.name))
61
-
62
- csv_fields = [:name, :email, :phone_number, :assistants, :extra]
63
-
64
- # new records are records in the new directory that don't exist in
65
- # the current directory
66
- new_records = csv_records
67
- .select("'insert'::varchar operation, row_number")
68
- .select(csv_fields)
69
- .where({ employees.name => { email: nil } })
70
- # deleted records are records in the current directory that don't
71
- # exist in the new directory
72
- deleted_records = employee_records
73
- .select("'delete'::varchar operation, row_number")
74
- .select(csv_fields)
75
- .where({ csv.name => { email: nil } })
76
- # changed records are records that have difference in name, phone
77
- # number and/or assistants
78
- changed_records = csv_records
79
- .select("'update'::varchar operation, row_number")
80
- .select(csv_fields)
81
- .where.not(attributes_unchanged)
82
- # unchanged records are records that are exactly the same in both
83
- # directories (without considering the extra field)
84
- unchanged_records = csv_records
85
- .select("'noop'::varchar operation, row_number")
86
- .select(csv_fields)
87
- .where(attributes_unchanged)
88
-
89
- # create temp table for holding operations
90
- temp_table(new_records.to_sql) do |operations|
91
- insert_into_operations(operations, deleted_records.to_sql)
92
- insert_into_operations(operations, changed_records.to_sql)
93
- if options[:skip_noop] != true
94
- insert_into_operations(operations, unchanged_records.to_sql)
95
- end
96
-
97
- operations.order(:row_number).each do |operation|
98
- add_operation(operation)
99
- end
100
- end
101
- end
102
- end
103
-
104
- prioritize_assistants(operations)
105
- end
106
-
107
- private
108
-
109
- def current_directory_temp_table(&block)
110
- # outer temp table is required so that the projection does not run into
111
- # ambiguous column issues
112
- temp_table(current_directory) do |rel|
113
- temp_table(rel.select(SQL.current_directory_projection), &block)
114
- end
115
- end
116
-
117
- def new_directory_temp_table(source, &block)
118
- convert_to_relation(source) do |relation|
119
- relation = relation.select("*")
120
- .from(Arel.sql("(#{SQL.latest_unique_sql(relation.table.name)}) as t"))
121
- .order("row_number")
122
-
123
- temp_table(relation, &block)
124
- end
125
- end
126
-
127
- def convert_to_relation(source, &block)
128
- return block.call(source) if source.is_a?(ActiveRecord::Relation)
129
-
130
- temp_table do |relation|
131
- table_name = relation.table.name
132
- connection.change_table(table_name) do |t|
133
- t.column :name, :string
134
- t.column :email, :string
135
- t.column :phone_number, :string
136
- t.column :assistants, :string
137
- t.column :extra, :string
138
- end
139
- insert_into_csv_table(table_name, source)
140
- block.call(relation)
141
- end
142
- end
143
-
144
- # TODO chunk this into batch sizes
145
- def insert_into_csv_table(table_name, records)
146
- return if records.empty?
147
-
148
- values = records.map do |row|
149
- (name, email, phone_number, assistants, extra) = row
150
- columns = [
151
- connection.quote(name),
152
- connection.quote(email),
153
- connection.quote(phone_number),
154
- connection.quote(assistants),
155
- connection.quote(extra)
156
- ]
157
- "(#{columns.join(", ")})"
158
- end
159
-
160
- connection.execute(SQL.insert_into_temp_csv_table(table_name, values))
161
- end
162
-
163
- def temp_table(source = nil, &block)
164
- return source.temporary_table(&block) if source.is_a?(ActiveRecord::Relation)
165
-
166
- create_temp_table(source) do |name|
167
- klass = current_directory.klass
168
- dec = ActiveRecordPgStuff::Relation::TemporaryTable::Decorator.new(klass, name)
169
- if activerecord52?
170
- rel = ActiveRecord::Relation.new(dec)
171
- else
172
- rel = ActiveRecord::Relation.new(dec, dec.arel_table, dec.predicate_builder, {})
173
- end
174
- rel.readonly!
175
- block.call(rel)
176
- end
177
- end
178
-
179
- def create_temp_table(initial_sql=nil)
180
- table_name = "temporary_#{(Time.now.to_f * 1000).to_i}"
181
-
182
- if initial_sql
183
- connection.with_temporary_table(table_name, initial_sql) do |name|
184
- yield name
185
- end
186
- else
187
- connection.transaction do
188
- begin
189
- connection.create_table(table_name, temporary: true)
190
- yield table_name
191
- ensure
192
- connection.drop_table(table_name)
193
- end
194
- end
195
- end
196
- end
197
-
198
- def insert_into_operations(relation, sql)
199
- connection.execute(SQL.insert_into_operations(relation.table.name, sql))
200
- end
201
-
202
- def add_operation(operation)
203
- op = [
204
- operation.operation.to_sym,
205
- operation.name,
206
- operation.email,
207
- operation.phone_number.presence,
208
- serialize_pg_array(operation.assistants)
209
- ]
210
- op << operation.extra unless operation[:extra].nil?
211
- operations << op
212
- end
213
-
214
- def serialize_pg_array(pg_array)
215
- return if pg_array.nil?
216
- pg_array = pg_array[1...-1] # remove the curly braces
217
- pg_array.presence
218
- end
219
-
220
- def prioritize_assistants(operations)
221
- prioritized_operations = []
222
- operations.each do |operation|
223
- process_operation(operation, operations, prioritized_operations, Set.new)
224
- end
225
- prioritized_operations
226
- end
227
-
228
- def process_operation(operation, operations, prioritized_operations, tail)
229
- (_, _, email, _, assistants) = operation
230
- return if prioritized_operations.find { |_, _, e| e == email }
231
-
232
- (assistants || '').split(',').each do |assistant_email|
233
- next if tail.include?(assistant_email)
234
- assistant_operation = operations.find { |_, _, email| email == assistant_email }
235
- process_operation(
236
- assistant_operation,
237
- operations,
238
- prioritized_operations,
239
- tail.add(email)
240
- )
241
- end
242
-
243
- prioritized_operations << operation
244
- end
245
-
246
- def connection
247
- current_directory.connection
248
- end
249
-
250
- def activerecord52?
251
- ActiveRecord.gem_version >= Gem::Version.new("5.2.x")
252
- end
253
- end
254
-
255
- module SQL
256
- # Cleanup some bad records
257
- # 1. Assistant email is set on an employee, but no assistant record
258
- # in csv. Remove the assistant email.
259
- # 2. Assistant email is employee's own email. Remove the assistant
260
- # email.
261
- # TODO move this into the temp table creation above
262
- # https://www.db-fiddle.com/f/gxg6qABP1LygYvvgRvyH2N/1
263
- def self.cleanup_sql(table_name)
264
- <<-SQL
265
- with
266
- unnested_assistants as
267
- (
268
- select
269
- email,
270
- name,
271
- unnest(assistants) assistant
272
- from #{table_name}
273
- ),
274
- own_email_removed as
275
- (
276
- select
277
- a.*
278
- from unnested_assistants a
279
- where a.email != a.assistant
280
- ),
281
- missing_assistants_removed as
282
- (
283
- select
284
- a.*
285
- from own_email_removed a
286
- left outer join #{table_name} b on a.assistant = b.email
287
- where
288
- (a.assistant is null and b.email is null)
289
- or (a.assistant is not null and b.email is not null)
290
- ),
291
- only_valid_assistants as
292
- (
293
- select
294
- a.email,
295
- a.name,
296
- array_remove(
297
- array_agg(b.assistant),
298
- null
299
- ) assistants
300
- from #{table_name} a
301
- left outer join missing_assistants_removed b
302
- using (email)
303
- group by
304
- a.email, a.name
305
- )
306
- update #{table_name}
307
- set assistants = only_valid_assistants.assistants
308
- from only_valid_assistants
309
- where #{table_name}.email = only_valid_assistants.email
310
- SQL
311
- end
312
-
313
- # Remove dupe email rows, keeping the last one
314
- def self.latest_unique_sql(table_name)
315
- <<-SQL
316
- SELECT
317
- DISTINCT ON (lower(email)) name,
318
- lower(email) email,
319
- coalesce(phone_number, '') phone_number,
320
- array_remove(
321
- regexp_split_to_array(
322
- coalesce(assistants, ''),
323
- '\s*,\s*'
324
- )::varchar[],
325
- ''
326
- ) assistants,
327
- extra,
328
- ROW_NUMBER () OVER ()
329
- FROM
330
- #{table_name}
331
- ORDER BY
332
- lower(email),
333
- row_number desc
334
- SQL
335
- end
336
-
337
- def self.current_directory_projection
338
- <<-SQL
339
- name,
340
- lower(email) email,
341
- coalesce(phone_number, '') phone_number,
342
- array_remove(
343
- regexp_split_to_array(
344
- coalesce(assistants, ''),
345
- '\s*,\s*'
346
- )::varchar[],
347
- ''
348
- ) assistants
349
- SQL
350
- end
351
-
352
- def self.insert_into_operations(table_name, sql)
353
- <<-SQL
354
- insert into #{table_name}(
355
- operation,
356
- row_number,
357
- name,
358
- email,
359
- phone_number,
360
- assistants,
361
- extra
362
- ) #{sql}
363
- SQL
364
- end
365
-
366
- def self.insert_into_temp_csv_table(table_name, values)
367
- <<-SQL
368
- insert into #{table_name}(
369
- name,
370
- email,
371
- phone_number,
372
- assistants,
373
- extra
374
- ) values #{values.join(", ")}
375
- SQL
376
- end
377
- end
378
- end
379
- end