directory_diff 0.4.11 → 0.4.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 15e07baf8cd9794e7c7725ff856c366829f17de7
4
- data.tar.gz: 97966de2c4dbd7e6361fa6669fef1e57d8cbf68e
2
+ SHA256:
3
+ metadata.gz: 2e2bf8a6d21cbf21189c07cbba63f6ba6011f722f432b1b9804e5b1abbb3683b
4
+ data.tar.gz: c2081478703a7d3843b2d8faf9db9487c6848c169350c60f6eea0168ff77720e
5
5
  SHA512:
6
- metadata.gz: 7dad0393c22d490b7b9d777b1217183dc144da9d8feb36d4575e80e879a97bdaaf2aab77a5f9e0d0b98b06dffc0d3cd23fcbe661d1fb540c37d4710e6c151442
7
- data.tar.gz: 12f8cdb06594281b48664bb96ace66c7a671ff69493dae688a283e1c461fd3c9b125842047bb61fd98acac0cc2f7585a0783527677d3884efd016355e86f43b6
6
+ metadata.gz: 284e7d9653a30f7029a402a352091fffc8aa82ac1b0dec0130b90f6c572c2f8936ce2ab46b1bdbbf190737a971935174c1c7c0bc26ef165bb955dc9a79d453af
7
+ data.tar.gz: 9296b7cac1a1100dbeef7d19bbee14cc8d22c6f5c250ecbb959b14a195157985bc4bb49c6269a27139bf206647c6b19bd2ac089fdd613bc5a45961a46c5f2a45
@@ -19,13 +19,12 @@ Gem::Specification.new do |spec|
19
19
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
20
20
  spec.require_paths = ["lib"]
21
21
 
22
- spec.add_dependency "activerecord", ">= 5.1.4"
22
+ spec.add_dependency "activerecord", ">= 5.1"
23
23
  spec.add_dependency "pg", "~> 1.1.3"
24
- spec.add_dependency "temping", "~> 3.10.0"
25
- spec.add_dependency "activerecord_pg_stuff", "~> 0.2.1"
26
24
 
27
- spec.add_development_dependency "bundler", "~> 1.11"
25
+ spec.add_development_dependency "bundler", "~> 2"
28
26
  spec.add_development_dependency "rake", "~> 10.0"
29
27
  spec.add_development_dependency "rspec", "~> 3.0"
28
+ spec.add_development_dependency "temping", "~> 3.10.0"
30
29
  spec.add_development_dependency "appraisal"
31
30
  end
@@ -1,5 +1,4 @@
1
1
  require_relative "transformer/in_memory"
2
- require_relative "transformer/temp_table"
3
2
 
4
3
  module DirectoryDiff
5
4
  class Transform
@@ -10,21 +9,7 @@ module DirectoryDiff
10
9
  end
11
10
 
12
11
  def into(new_directory, options = {})
13
- processor_class = processor_for(options[:processor])
14
- processor_class.new(current_directory).into(new_directory, options)
15
- end
16
-
17
- private
18
-
19
- def processor_for(processor)
20
- case processor
21
- when nil, :in_memory
22
- Transformer::InMemory
23
- when :temp_table
24
- Transformer::TempTable
25
- else
26
- raise ArgumentError, "unsupported processor #{processor.inspect}"
27
- end
12
+ Transformer::InMemory.new(current_directory).into(new_directory, options)
28
13
  end
29
14
  end
30
15
  end
@@ -64,11 +64,24 @@ module DirectoryDiff
64
64
  original_assistant_value = new_employee[3]
65
65
  end
66
66
 
67
+ # phone_number may be nil. we only use the csv to *set* phone numbers if
68
+ # it has a value. if it was nil, we backfill from current employee so that
69
+ # the new record apperas to the be same as the current record
70
+ phone_number = new_employee[2].presence
71
+ if phone_number.nil?
72
+ original_phone_number_value = nil
73
+ new_employee[2] = old_employee&.fetch(2)
74
+ else
75
+ original_phone_number_value = new_employee[2]
76
+ end
77
+
67
78
  if old_employee.nil?
68
79
  add_transform(:insert, new_employee)
69
80
  elsif new_employee[0, 4] == old_employee[0, 4]
70
81
  # restore assistant value after cleanup like missing assistants and own email
71
82
  new_employee[3] = original_assistant_value
83
+ # restore phone number value
84
+ new_employee[2] = original_phone_number_value
72
85
  add_transform(:noop, new_employee) unless options[:skip_noop]
73
86
  else
74
87
  add_transform(:update, new_employee)
@@ -1,3 +1,3 @@
1
1
  module DirectoryDiff
2
- VERSION = "0.4.11"
2
+ VERSION = "0.4.15"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: directory_diff
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.11
4
+ version: 0.4.15
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kamal Mahyuddin
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2018-10-11 00:00:00.000000000 Z
11
+ date: 2021-10-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activerecord
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: 5.1.4
19
+ version: '5.1'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
- version: 5.1.4
26
+ version: '5.1'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: pg
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -39,75 +39,61 @@ dependencies:
39
39
  - !ruby/object:Gem::Version
40
40
  version: 1.1.3
41
41
  - !ruby/object:Gem::Dependency
42
- name: temping
43
- requirement: !ruby/object:Gem::Requirement
44
- requirements:
45
- - - "~>"
46
- - !ruby/object:Gem::Version
47
- version: 3.10.0
48
- type: :runtime
49
- prerelease: false
50
- version_requirements: !ruby/object:Gem::Requirement
51
- requirements:
52
- - - "~>"
53
- - !ruby/object:Gem::Version
54
- version: 3.10.0
55
- - !ruby/object:Gem::Dependency
56
- name: activerecord_pg_stuff
42
+ name: bundler
57
43
  requirement: !ruby/object:Gem::Requirement
58
44
  requirements:
59
45
  - - "~>"
60
46
  - !ruby/object:Gem::Version
61
- version: 0.2.1
62
- type: :runtime
47
+ version: '2'
48
+ type: :development
63
49
  prerelease: false
64
50
  version_requirements: !ruby/object:Gem::Requirement
65
51
  requirements:
66
52
  - - "~>"
67
53
  - !ruby/object:Gem::Version
68
- version: 0.2.1
54
+ version: '2'
69
55
  - !ruby/object:Gem::Dependency
70
- name: bundler
56
+ name: rake
71
57
  requirement: !ruby/object:Gem::Requirement
72
58
  requirements:
73
59
  - - "~>"
74
60
  - !ruby/object:Gem::Version
75
- version: '1.11'
61
+ version: '10.0'
76
62
  type: :development
77
63
  prerelease: false
78
64
  version_requirements: !ruby/object:Gem::Requirement
79
65
  requirements:
80
66
  - - "~>"
81
67
  - !ruby/object:Gem::Version
82
- version: '1.11'
68
+ version: '10.0'
83
69
  - !ruby/object:Gem::Dependency
84
- name: rake
70
+ name: rspec
85
71
  requirement: !ruby/object:Gem::Requirement
86
72
  requirements:
87
73
  - - "~>"
88
74
  - !ruby/object:Gem::Version
89
- version: '10.0'
75
+ version: '3.0'
90
76
  type: :development
91
77
  prerelease: false
92
78
  version_requirements: !ruby/object:Gem::Requirement
93
79
  requirements:
94
80
  - - "~>"
95
81
  - !ruby/object:Gem::Version
96
- version: '10.0'
82
+ version: '3.0'
97
83
  - !ruby/object:Gem::Dependency
98
- name: rspec
84
+ name: temping
99
85
  requirement: !ruby/object:Gem::Requirement
100
86
  requirements:
101
87
  - - "~>"
102
88
  - !ruby/object:Gem::Version
103
- version: '3.0'
89
+ version: 3.10.0
104
90
  type: :development
105
91
  prerelease: false
106
92
  version_requirements: !ruby/object:Gem::Requirement
107
93
  requirements:
108
94
  - - "~>"
109
95
  - !ruby/object:Gem::Version
110
- version: '3.0'
96
+ version: 3.10.0
111
97
  - !ruby/object:Gem::Dependency
112
98
  name: appraisal
113
99
  requirement: !ruby/object:Gem::Requirement
@@ -149,7 +135,6 @@ files:
149
135
  - lib/directory_diff.rb
150
136
  - lib/directory_diff/transform.rb
151
137
  - lib/directory_diff/transformer/in_memory.rb
152
- - lib/directory_diff/transformer/temp_table.rb
153
138
  - lib/directory_diff/version.rb
154
139
  homepage: https://github.com/envoy/directory_diff
155
140
  licenses:
@@ -171,7 +156,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
171
156
  version: '0'
172
157
  requirements: []
173
158
  rubyforge_project:
174
- rubygems_version: 2.5.2
159
+ rubygems_version: 2.7.6
175
160
  signing_key:
176
161
  specification_version: 4
177
162
  summary: Envoy employee directory diffing.
@@ -1,379 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "activerecord_pg_stuff"
4
-
5
- Arel::Predications.module_eval do
6
- def contains(other)
7
- Arel::Nodes::InfixOperation.new(:"@>", self, other)
8
- end
9
- end
10
-
11
- module DirectoryDiff
12
- module Transformer
13
- class TempTable
14
- attr_reader :current_directory, :operations
15
-
16
- # @params current_directory a relation that filters out only the records
17
- # that represent the current directory. This is
18
- # mostly likely an Employee relation. This
19
- # relation will be pulled into a temporary table.
20
- def initialize(current_directory)
21
- @current_directory = current_directory
22
- @operations = []
23
- end
24
-
25
- # @param new_directory a table containing only the new records to compare
26
- # against, most likely a temp table.
27
- def into(new_directory, options = {})
28
- current_directory_temp_table do |temp_current_directory|
29
- new_directory_temp_table(new_directory) do |deduped_csv|
30
- # Get Arel tables for referencing fields, table names
31
- employees = temp_current_directory.table
32
- csv = deduped_csv.table
33
-
34
- # Reusable Arel predicates
35
- csv_employee_join = csv[:email].eq(employees[:email])
36
- attributes_unchanged = employees[:name].eq(csv[:name])
37
- .and(
38
- employees[:phone_number].eq(csv[:phone_number])
39
- )
40
- .and(
41
- employees[:assistants].contains(csv[:assistants])
42
- )
43
-
44
- # Creates joins between the temp table and the csv table and
45
- # vice versa
46
- # Cribbed from https://gist.github.com/mildmojo/3724189
47
- csv_to_employees = csv.join(employees, Arel::Nodes::OuterJoin)
48
- .on(csv_employee_join)
49
- .join_sources
50
- employees_to_csv = employees.join(csv, Arel::Nodes::OuterJoin)
51
- .on(csv_employee_join)
52
- .join_sources
53
-
54
- # Representation of the joined csv-employees, with csv on the left
55
- csv_records = deduped_csv.joins(csv_to_employees).order('row_number asc')
56
- # Representation of the joined employees-csv, with employees on the
57
- # left
58
- employee_records = temp_current_directory.joins(employees_to_csv)
59
-
60
- connection.execute(SQL.cleanup_sql(csv.name))
61
-
62
- csv_fields = [:name, :email, :phone_number, :assistants, :extra]
63
-
64
- # new records are records in the new directory that don't exist in
65
- # the current directory
66
- new_records = csv_records
67
- .select("'insert'::varchar operation, row_number")
68
- .select(csv_fields)
69
- .where({ employees.name => { email: nil } })
70
- # deleted records are records in the current directory that don't
71
- # exist in the new directory
72
- deleted_records = employee_records
73
- .select("'delete'::varchar operation, row_number")
74
- .select(csv_fields)
75
- .where({ csv.name => { email: nil } })
76
- # changed records are records that have difference in name, phone
77
- # number and/or assistants
78
- changed_records = csv_records
79
- .select("'update'::varchar operation, row_number")
80
- .select(csv_fields)
81
- .where.not(attributes_unchanged)
82
- # unchanged records are records that are exactly the same in both
83
- # directories (without considering the extra field)
84
- unchanged_records = csv_records
85
- .select("'noop'::varchar operation, row_number")
86
- .select(csv_fields)
87
- .where(attributes_unchanged)
88
-
89
- # create temp table for holding operations
90
- temp_table(new_records.to_sql) do |operations|
91
- insert_into_operations(operations, deleted_records.to_sql)
92
- insert_into_operations(operations, changed_records.to_sql)
93
- if options[:skip_noop] != true
94
- insert_into_operations(operations, unchanged_records.to_sql)
95
- end
96
-
97
- operations.order(:row_number).each do |operation|
98
- add_operation(operation)
99
- end
100
- end
101
- end
102
- end
103
-
104
- prioritize_assistants(operations)
105
- end
106
-
107
- private
108
-
109
- def current_directory_temp_table(&block)
110
- # outer temp table is required so that the projection does not run into
111
- # ambiguous column issues
112
- temp_table(current_directory) do |rel|
113
- temp_table(rel.select(SQL.current_directory_projection), &block)
114
- end
115
- end
116
-
117
- def new_directory_temp_table(source, &block)
118
- convert_to_relation(source) do |relation|
119
- relation = relation.select("*")
120
- .from(Arel.sql("(#{SQL.latest_unique_sql(relation.table.name)}) as t"))
121
- .order("row_number")
122
-
123
- temp_table(relation, &block)
124
- end
125
- end
126
-
127
- def convert_to_relation(source, &block)
128
- return block.call(source) if source.is_a?(ActiveRecord::Relation)
129
-
130
- temp_table do |relation|
131
- table_name = relation.table.name
132
- connection.change_table(table_name) do |t|
133
- t.column :name, :string
134
- t.column :email, :string
135
- t.column :phone_number, :string
136
- t.column :assistants, :string
137
- t.column :extra, :string
138
- end
139
- insert_into_csv_table(table_name, source)
140
- block.call(relation)
141
- end
142
- end
143
-
144
- # TODO chunk this into batch sizes
145
- def insert_into_csv_table(table_name, records)
146
- return if records.empty?
147
-
148
- values = records.map do |row|
149
- (name, email, phone_number, assistants, extra) = row
150
- columns = [
151
- connection.quote(name),
152
- connection.quote(email),
153
- connection.quote(phone_number),
154
- connection.quote(assistants),
155
- connection.quote(extra)
156
- ]
157
- "(#{columns.join(", ")})"
158
- end
159
-
160
- connection.execute(SQL.insert_into_temp_csv_table(table_name, values))
161
- end
162
-
163
- def temp_table(source = nil, &block)
164
- return source.temporary_table(&block) if source.is_a?(ActiveRecord::Relation)
165
-
166
- create_temp_table(source) do |name|
167
- klass = current_directory.klass
168
- dec = ActiveRecordPgStuff::Relation::TemporaryTable::Decorator.new(klass, name)
169
- if activerecord52?
170
- rel = ActiveRecord::Relation.new(dec)
171
- else
172
- rel = ActiveRecord::Relation.new(dec, dec.arel_table, dec.predicate_builder, {})
173
- end
174
- rel.readonly!
175
- block.call(rel)
176
- end
177
- end
178
-
179
- def create_temp_table(initial_sql=nil)
180
- table_name = "temporary_#{(Time.now.to_f * 1000).to_i}"
181
-
182
- if initial_sql
183
- connection.with_temporary_table(table_name, initial_sql) do |name|
184
- yield name
185
- end
186
- else
187
- connection.transaction do
188
- begin
189
- connection.create_table(table_name, temporary: true)
190
- yield table_name
191
- ensure
192
- connection.drop_table(table_name)
193
- end
194
- end
195
- end
196
- end
197
-
198
- def insert_into_operations(relation, sql)
199
- connection.execute(SQL.insert_into_operations(relation.table.name, sql))
200
- end
201
-
202
- def add_operation(operation)
203
- op = [
204
- operation.operation.to_sym,
205
- operation.name,
206
- operation.email,
207
- operation.phone_number.presence,
208
- serialize_pg_array(operation.assistants)
209
- ]
210
- op << operation.extra unless operation[:extra].nil?
211
- operations << op
212
- end
213
-
214
- def serialize_pg_array(pg_array)
215
- return if pg_array.nil?
216
- pg_array = pg_array[1...-1] # remove the curly braces
217
- pg_array.presence
218
- end
219
-
220
- def prioritize_assistants(operations)
221
- prioritized_operations = []
222
- operations.each do |operation|
223
- process_operation(operation, operations, prioritized_operations, Set.new)
224
- end
225
- prioritized_operations
226
- end
227
-
228
- def process_operation(operation, operations, prioritized_operations, tail)
229
- (_, _, email, _, assistants) = operation
230
- return if prioritized_operations.find { |_, _, e| e == email }
231
-
232
- (assistants || '').split(',').each do |assistant_email|
233
- next if tail.include?(assistant_email)
234
- assistant_operation = operations.find { |_, _, email| email == assistant_email }
235
- process_operation(
236
- assistant_operation,
237
- operations,
238
- prioritized_operations,
239
- tail.add(email)
240
- )
241
- end
242
-
243
- prioritized_operations << operation
244
- end
245
-
246
- def connection
247
- current_directory.connection
248
- end
249
-
250
- def activerecord52?
251
- ActiveRecord.gem_version >= Gem::Version.new("5.2.x")
252
- end
253
- end
254
-
255
- module SQL
256
- # Cleanup some bad records
257
- # 1. Assistant email is set on an employee, but no assistant record
258
- # in csv. Remove the assistant email.
259
- # 2. Assistant email is employee's own email. Remove the assistant
260
- # email.
261
- # TODO move this into the temp table creation above
262
- # https://www.db-fiddle.com/f/gxg6qABP1LygYvvgRvyH2N/1
263
- def self.cleanup_sql(table_name)
264
- <<-SQL
265
- with
266
- unnested_assistants as
267
- (
268
- select
269
- email,
270
- name,
271
- unnest(assistants) assistant
272
- from #{table_name}
273
- ),
274
- own_email_removed as
275
- (
276
- select
277
- a.*
278
- from unnested_assistants a
279
- where a.email != a.assistant
280
- ),
281
- missing_assistants_removed as
282
- (
283
- select
284
- a.*
285
- from own_email_removed a
286
- left outer join #{table_name} b on a.assistant = b.email
287
- where
288
- (a.assistant is null and b.email is null)
289
- or (a.assistant is not null and b.email is not null)
290
- ),
291
- only_valid_assistants as
292
- (
293
- select
294
- a.email,
295
- a.name,
296
- array_remove(
297
- array_agg(b.assistant),
298
- null
299
- ) assistants
300
- from #{table_name} a
301
- left outer join missing_assistants_removed b
302
- using (email)
303
- group by
304
- a.email, a.name
305
- )
306
- update #{table_name}
307
- set assistants = only_valid_assistants.assistants
308
- from only_valid_assistants
309
- where #{table_name}.email = only_valid_assistants.email
310
- SQL
311
- end
312
-
313
- # Remove dupe email rows, keeping the last one
314
- def self.latest_unique_sql(table_name)
315
- <<-SQL
316
- SELECT
317
- DISTINCT ON (lower(email)) name,
318
- lower(email) email,
319
- coalesce(phone_number, '') phone_number,
320
- array_remove(
321
- regexp_split_to_array(
322
- coalesce(assistants, ''),
323
- '\s*,\s*'
324
- )::varchar[],
325
- ''
326
- ) assistants,
327
- extra,
328
- ROW_NUMBER () OVER ()
329
- FROM
330
- #{table_name}
331
- ORDER BY
332
- lower(email),
333
- row_number desc
334
- SQL
335
- end
336
-
337
- def self.current_directory_projection
338
- <<-SQL
339
- name,
340
- lower(email) email,
341
- coalesce(phone_number, '') phone_number,
342
- array_remove(
343
- regexp_split_to_array(
344
- coalesce(assistants, ''),
345
- '\s*,\s*'
346
- )::varchar[],
347
- ''
348
- ) assistants
349
- SQL
350
- end
351
-
352
- def self.insert_into_operations(table_name, sql)
353
- <<-SQL
354
- insert into #{table_name}(
355
- operation,
356
- row_number,
357
- name,
358
- email,
359
- phone_number,
360
- assistants,
361
- extra
362
- ) #{sql}
363
- SQL
364
- end
365
-
366
- def self.insert_into_temp_csv_table(table_name, values)
367
- <<-SQL
368
- insert into #{table_name}(
369
- name,
370
- email,
371
- phone_number,
372
- assistants,
373
- extra
374
- ) values #{values.join(", ")}
375
- SQL
376
- end
377
- end
378
- end
379
- end