directory_diff 0.4.14 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: d8ee4b73f492c39e557743dfb1fe4c8b84cd10dc4807f11d4606dad2cecf3327
|
4
|
+
data.tar.gz: 918743f81b8d6bc18b5a7ba5d8722efc67a7af35fa718e7095b63b5ed6c91961
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4cf77d75a44f310640a5ce99e4b569a5a7e82e046647b6c70fabf7407c27e460dea47003d376935132bbabe2ca8b088dea9052c29c39f90b02e6dcd145d88714
|
7
|
+
data.tar.gz: 8590c90ab336c515bb9ab7633caf380f1ca9ed9dd7bf8ed30e8a72e85cc1c8bd857cd8ec6a9b1f2823129e5663242fccb7692e0171bb2abd29fcd30a58856abb
|
data/directory_diff.gemspec
CHANGED
@@ -21,7 +21,6 @@ Gem::Specification.new do |spec|
|
|
21
21
|
|
22
22
|
spec.add_dependency "activerecord", ">= 5.1"
|
23
23
|
spec.add_dependency "pg", "~> 1.1.3"
|
24
|
-
spec.add_dependency "activerecord_pg_stuff", "~> 0.2.1"
|
25
24
|
|
26
25
|
spec.add_development_dependency "bundler", "~> 1.11"
|
27
26
|
spec.add_development_dependency "rake", "~> 10.0"
|
@@ -1,5 +1,4 @@
|
|
1
1
|
require_relative "transformer/in_memory"
|
2
|
-
require_relative "transformer/temp_table"
|
3
2
|
|
4
3
|
module DirectoryDiff
|
5
4
|
class Transform
|
@@ -10,21 +9,7 @@ module DirectoryDiff
|
|
10
9
|
end
|
11
10
|
|
12
11
|
def into(new_directory, options = {})
|
13
|
-
|
14
|
-
processor_class.new(current_directory).into(new_directory, options)
|
15
|
-
end
|
16
|
-
|
17
|
-
private
|
18
|
-
|
19
|
-
def processor_for(processor)
|
20
|
-
case processor
|
21
|
-
when nil, :in_memory
|
22
|
-
Transformer::InMemory
|
23
|
-
when :temp_table
|
24
|
-
Transformer::TempTable
|
25
|
-
else
|
26
|
-
raise ArgumentError, "unsupported processor #{processor.inspect}"
|
27
|
-
end
|
12
|
+
Transformer::InMemory.new(current_directory).into(new_directory, options)
|
28
13
|
end
|
29
14
|
end
|
30
15
|
end
|
@@ -1,6 +1,6 @@
|
|
1
1
|
module DirectoryDiff
|
2
2
|
module Transformer
|
3
|
-
class InMemory
|
3
|
+
class InMemory
|
4
4
|
attr_reader :current_directory, :new_directory
|
5
5
|
attr_reader :transforms, :transforms_index
|
6
6
|
attr_reader :options
|
@@ -59,7 +59,7 @@ module DirectoryDiff
|
|
59
59
|
# the new record appears to be the same as the current record
|
60
60
|
if assistant_emails.empty?
|
61
61
|
original_assistant_value = nil
|
62
|
-
new_employee[3] = old_employee&.fetch(3)
|
62
|
+
new_employee[3] = old_employee&.fetch(3)
|
63
63
|
else
|
64
64
|
original_assistant_value = new_employee[3]
|
65
65
|
end
|
@@ -77,7 +77,7 @@ module DirectoryDiff
|
|
77
77
|
|
78
78
|
if old_employee.nil?
|
79
79
|
add_transform(:insert, new_employee)
|
80
|
-
elsif new_employee[0,
|
80
|
+
elsif new_employee[0, 5] == old_employee[0, 5]
|
81
81
|
# restore assistant value after cleanup like missing assistants and own email
|
82
82
|
new_employee[3] = original_assistant_value
|
83
83
|
# restore phone number value
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: directory_diff
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kamal Mahyuddin
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-02-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activerecord
|
@@ -38,20 +38,6 @@ dependencies:
|
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: 1.1.3
|
41
|
-
- !ruby/object:Gem::Dependency
|
42
|
-
name: activerecord_pg_stuff
|
43
|
-
requirement: !ruby/object:Gem::Requirement
|
44
|
-
requirements:
|
45
|
-
- - "~>"
|
46
|
-
- !ruby/object:Gem::Version
|
47
|
-
version: 0.2.1
|
48
|
-
type: :runtime
|
49
|
-
prerelease: false
|
50
|
-
version_requirements: !ruby/object:Gem::Requirement
|
51
|
-
requirements:
|
52
|
-
- - "~>"
|
53
|
-
- !ruby/object:Gem::Version
|
54
|
-
version: 0.2.1
|
55
41
|
- !ruby/object:Gem::Dependency
|
56
42
|
name: bundler
|
57
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -149,13 +135,12 @@ files:
|
|
149
135
|
- lib/directory_diff.rb
|
150
136
|
- lib/directory_diff/transform.rb
|
151
137
|
- lib/directory_diff/transformer/in_memory.rb
|
152
|
-
- lib/directory_diff/transformer/temp_table.rb
|
153
138
|
- lib/directory_diff/version.rb
|
154
139
|
homepage: https://github.com/envoy/directory_diff
|
155
140
|
licenses:
|
156
141
|
- MIT
|
157
142
|
metadata: {}
|
158
|
-
post_install_message:
|
143
|
+
post_install_message:
|
159
144
|
rdoc_options: []
|
160
145
|
require_paths:
|
161
146
|
- lib
|
@@ -170,9 +155,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
170
155
|
- !ruby/object:Gem::Version
|
171
156
|
version: '0'
|
172
157
|
requirements: []
|
173
|
-
|
174
|
-
|
175
|
-
signing_key:
|
158
|
+
rubygems_version: 3.3.12
|
159
|
+
signing_key:
|
176
160
|
specification_version: 4
|
177
161
|
summary: Envoy employee directory diffing.
|
178
162
|
test_files: []
|
@@ -1,385 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require "activerecord_pg_stuff"
|
4
|
-
|
5
|
-
module DirectoryDiff
|
6
|
-
module Transformer
|
7
|
-
class TempTable
|
8
|
-
attr_reader :current_directory, :operations
|
9
|
-
|
10
|
-
# @params current_directory a relation that filters out only the records
|
11
|
-
# that represent the current directory. This is
|
12
|
-
# mostly likely an Employee relation. This
|
13
|
-
# relation will be pulled into a temporary table.
|
14
|
-
def initialize(current_directory)
|
15
|
-
@current_directory = current_directory
|
16
|
-
@operations = []
|
17
|
-
end
|
18
|
-
|
19
|
-
# @param new_directory a table containing only the new records to compare
|
20
|
-
# against, most likely a temp table.
|
21
|
-
def into(new_directory, options = {})
|
22
|
-
current_directory_temp_table do |temp_current_directory|
|
23
|
-
new_directory_temp_table(new_directory) do |deduped_csv|
|
24
|
-
# Get Arel tables for referencing fields, table names
|
25
|
-
employees = temp_current_directory.table
|
26
|
-
csv = deduped_csv.table
|
27
|
-
|
28
|
-
# Reusable Arel predicates
|
29
|
-
csv_employee_join = csv[:email].eq(employees[:email])
|
30
|
-
attributes_unchanged = employees[:name].eq(csv[:name])
|
31
|
-
.and(
|
32
|
-
employees[:phone_number].eq(csv[:phone_number])
|
33
|
-
.or(csv[:phone_number].eq(""))
|
34
|
-
# ☝🏽 Comparing to an empty string because we cast
|
35
|
-
# phone number to an empty string. The reason is
|
36
|
-
# comparing NULL = NULL is always false in SQL
|
37
|
-
)
|
38
|
-
.and(
|
39
|
-
employees[:assistants].eq(csv[:assistants])
|
40
|
-
.or(csv[:assistants].eq("{}"))
|
41
|
-
)
|
42
|
-
|
43
|
-
# Creates joins between the temp table and the csv table and
|
44
|
-
# vice versa
|
45
|
-
# Cribbed from https://gist.github.com/mildmojo/3724189
|
46
|
-
csv_to_employees = csv.join(employees, Arel::Nodes::OuterJoin)
|
47
|
-
.on(csv_employee_join)
|
48
|
-
.join_sources
|
49
|
-
employees_to_csv = employees.join(csv, Arel::Nodes::OuterJoin)
|
50
|
-
.on(csv_employee_join)
|
51
|
-
.join_sources
|
52
|
-
|
53
|
-
# Representation of the joined csv-employees, with csv on the left
|
54
|
-
csv_records = deduped_csv.joins(csv_to_employees).order('row_number asc')
|
55
|
-
# Representation of the joined employees-csv, with employees on the
|
56
|
-
# left
|
57
|
-
employee_records = temp_current_directory.joins(employees_to_csv)
|
58
|
-
|
59
|
-
connection.execute(SQL.cleanup_sql(csv.name))
|
60
|
-
|
61
|
-
csv_fields = %I[name email phone_number assistants extra].map { |c| csv[c] }
|
62
|
-
emp_fields = %I[name email phone_number assistants].map { |c| employees[c] }
|
63
|
-
|
64
|
-
# new records are records in the new directory that don't exist in
|
65
|
-
# the current directory
|
66
|
-
new_records = csv_records
|
67
|
-
.select("'insert'::varchar operation, row_number")
|
68
|
-
.select(csv_fields)
|
69
|
-
.where({ employees.name => { email: nil } })
|
70
|
-
# deleted records are records in the current directory that don't
|
71
|
-
# exist in the new directory
|
72
|
-
deleted_records = employee_records
|
73
|
-
.select("'delete'::varchar operation, row_number")
|
74
|
-
.select(emp_fields)
|
75
|
-
.select("null extra")
|
76
|
-
.where({ csv.name => { email: nil } })
|
77
|
-
# changed records are records that have difference in name, phone
|
78
|
-
# number and/or assistants
|
79
|
-
changed_records = csv_records
|
80
|
-
.select("'update'::varchar operation, row_number")
|
81
|
-
.select(csv_fields)
|
82
|
-
.where.not(attributes_unchanged)
|
83
|
-
# unchanged records are records that are exactly the same in both
|
84
|
-
# directories (without considering the extra field)
|
85
|
-
unchanged_records = csv_records
|
86
|
-
.select("'noop'::varchar operation, row_number")
|
87
|
-
.select(csv_fields)
|
88
|
-
.where(attributes_unchanged)
|
89
|
-
|
90
|
-
# create temp table for holding operations
|
91
|
-
temp_table(new_records.to_sql) do |operations|
|
92
|
-
insert_into_operations(operations, deleted_records.to_sql)
|
93
|
-
insert_into_operations(operations, changed_records.to_sql)
|
94
|
-
if options[:skip_noop] != true
|
95
|
-
insert_into_operations(operations, unchanged_records.to_sql)
|
96
|
-
end
|
97
|
-
|
98
|
-
operations.order(:row_number).each do |operation|
|
99
|
-
add_operation(operation)
|
100
|
-
end
|
101
|
-
end
|
102
|
-
end
|
103
|
-
end
|
104
|
-
|
105
|
-
prioritize_assistants(operations)
|
106
|
-
end
|
107
|
-
|
108
|
-
private
|
109
|
-
|
110
|
-
def current_directory_temp_table(&block)
|
111
|
-
# outer temp table is required so that the projection does not run into
|
112
|
-
# ambiguous column issues
|
113
|
-
temp_table(current_directory) do |rel|
|
114
|
-
temp_table(rel.select(SQL.current_directory_projection), &block)
|
115
|
-
end
|
116
|
-
end
|
117
|
-
|
118
|
-
def new_directory_temp_table(source, &block)
|
119
|
-
convert_to_relation(source) do |relation|
|
120
|
-
relation = relation.select("*")
|
121
|
-
.from(Arel.sql("(#{SQL.latest_unique_sql(relation.table.name)}) as t"))
|
122
|
-
.order("row_number")
|
123
|
-
|
124
|
-
temp_table(relation, &block)
|
125
|
-
end
|
126
|
-
end
|
127
|
-
|
128
|
-
def convert_to_relation(source, &block)
|
129
|
-
return block.call(source) if source.is_a?(ActiveRecord::Relation)
|
130
|
-
|
131
|
-
temp_table do |relation|
|
132
|
-
table_name = relation.table.name
|
133
|
-
connection.change_table(table_name) do |t|
|
134
|
-
t.column :name, :string
|
135
|
-
t.column :email, :string
|
136
|
-
t.column :phone_number, :string
|
137
|
-
t.column :assistants, :string
|
138
|
-
t.column :extra, :string
|
139
|
-
end
|
140
|
-
insert_into_csv_table(table_name, source)
|
141
|
-
block.call(relation)
|
142
|
-
end
|
143
|
-
end
|
144
|
-
|
145
|
-
# TODO chunk this into batch sizes
|
146
|
-
def insert_into_csv_table(table_name, records)
|
147
|
-
return if records.empty?
|
148
|
-
|
149
|
-
values = records.map do |row|
|
150
|
-
(name, email, phone_number, assistants, extra) = row
|
151
|
-
columns = [
|
152
|
-
connection.quote(name),
|
153
|
-
connection.quote(email),
|
154
|
-
connection.quote(phone_number),
|
155
|
-
connection.quote(assistants),
|
156
|
-
connection.quote(extra)
|
157
|
-
]
|
158
|
-
"(#{columns.join(", ")})"
|
159
|
-
end
|
160
|
-
|
161
|
-
connection.execute(SQL.insert_into_temp_csv_table(table_name, values))
|
162
|
-
end
|
163
|
-
|
164
|
-
def temp_table(source = nil, &block)
|
165
|
-
return source.temporary_table(&block) if source.is_a?(ActiveRecord::Relation)
|
166
|
-
|
167
|
-
create_temp_table(source) do |name|
|
168
|
-
klass = current_directory.klass
|
169
|
-
dec = ActiveRecordPgStuff::Relation::TemporaryTable::Decorator.new(klass, name)
|
170
|
-
if activerecord52?
|
171
|
-
rel = ActiveRecord::Relation.new(dec)
|
172
|
-
else
|
173
|
-
rel = ActiveRecord::Relation.new(
|
174
|
-
dec,
|
175
|
-
dec.arel_table,
|
176
|
-
dec.predicate_builder,
|
177
|
-
{}
|
178
|
-
)
|
179
|
-
end
|
180
|
-
rel.readonly!
|
181
|
-
block.call(rel)
|
182
|
-
end
|
183
|
-
end
|
184
|
-
|
185
|
-
def create_temp_table(initial_sql=nil)
|
186
|
-
table_name = "temporary_#{(Time.now.to_f * 1000).to_i}"
|
187
|
-
|
188
|
-
if initial_sql
|
189
|
-
connection.with_temporary_table(table_name, initial_sql) do |name|
|
190
|
-
yield name
|
191
|
-
end
|
192
|
-
else
|
193
|
-
connection.transaction do
|
194
|
-
begin
|
195
|
-
connection.create_table(table_name, temporary: true)
|
196
|
-
yield table_name
|
197
|
-
ensure
|
198
|
-
connection.drop_table(table_name)
|
199
|
-
end
|
200
|
-
end
|
201
|
-
end
|
202
|
-
end
|
203
|
-
|
204
|
-
def insert_into_operations(relation, sql)
|
205
|
-
connection.execute(SQL.insert_into_operations(relation.table.name, sql))
|
206
|
-
end
|
207
|
-
|
208
|
-
def add_operation(operation)
|
209
|
-
op = [
|
210
|
-
operation.operation.to_sym,
|
211
|
-
operation.name,
|
212
|
-
operation.email,
|
213
|
-
operation.phone_number.presence,
|
214
|
-
serialize_pg_array(operation.assistants)
|
215
|
-
]
|
216
|
-
op << operation.extra unless operation[:extra].nil?
|
217
|
-
operations << op
|
218
|
-
end
|
219
|
-
|
220
|
-
def serialize_pg_array(pg_array)
|
221
|
-
return if pg_array.nil?
|
222
|
-
pg_array = pg_array[1...-1] # remove the curly braces
|
223
|
-
pg_array.presence
|
224
|
-
end
|
225
|
-
|
226
|
-
def prioritize_assistants(operations)
|
227
|
-
prioritized_operations = []
|
228
|
-
operations.each do |operation|
|
229
|
-
process_operation(operation, operations, prioritized_operations, Set.new)
|
230
|
-
end
|
231
|
-
prioritized_operations
|
232
|
-
end
|
233
|
-
|
234
|
-
def process_operation(operation, operations, prioritized_operations, tail)
|
235
|
-
(_, _, email, _, assistants) = operation
|
236
|
-
return if prioritized_operations.find { |_, _, e| e == email }
|
237
|
-
|
238
|
-
(assistants || '').split(',').each do |assistant_email|
|
239
|
-
next if tail.include?(assistant_email)
|
240
|
-
assistant_operation = operations.find { |_, _, email| email == assistant_email }
|
241
|
-
process_operation(
|
242
|
-
assistant_operation,
|
243
|
-
operations,
|
244
|
-
prioritized_operations,
|
245
|
-
tail.add(email)
|
246
|
-
)
|
247
|
-
end
|
248
|
-
|
249
|
-
prioritized_operations << operation
|
250
|
-
end
|
251
|
-
|
252
|
-
def connection
|
253
|
-
current_directory.connection
|
254
|
-
end
|
255
|
-
|
256
|
-
def activerecord52?
|
257
|
-
ActiveRecord.gem_version >= Gem::Version.new("5.2.x")
|
258
|
-
end
|
259
|
-
end
|
260
|
-
|
261
|
-
module SQL
|
262
|
-
# Cleanup some bad records
|
263
|
-
# 1. Assistant email is set on an employee, but no assistant record
|
264
|
-
# in csv. Remove the assistant email.
|
265
|
-
# 2. Assistant email is employee's own email. Remove the assistant
|
266
|
-
# email.
|
267
|
-
# TODO move this into the temp table creation above
|
268
|
-
# https://www.db-fiddle.com/f/gxg6qABP1LygYvvgRvyH2N/1
|
269
|
-
def self.cleanup_sql(table_name)
|
270
|
-
<<-SQL
|
271
|
-
with
|
272
|
-
unnested_assistants as
|
273
|
-
(
|
274
|
-
select
|
275
|
-
email,
|
276
|
-
name,
|
277
|
-
unnest(assistants) assistant
|
278
|
-
from #{table_name}
|
279
|
-
),
|
280
|
-
own_email_removed as
|
281
|
-
(
|
282
|
-
select
|
283
|
-
a.*
|
284
|
-
from unnested_assistants a
|
285
|
-
where a.email != a.assistant
|
286
|
-
),
|
287
|
-
missing_assistants_removed as
|
288
|
-
(
|
289
|
-
select
|
290
|
-
a.*
|
291
|
-
from own_email_removed a
|
292
|
-
left outer join #{table_name} b on a.assistant = b.email
|
293
|
-
where
|
294
|
-
(a.assistant is null and b.email is null)
|
295
|
-
or (a.assistant is not null and b.email is not null)
|
296
|
-
),
|
297
|
-
only_valid_assistants as
|
298
|
-
(
|
299
|
-
select
|
300
|
-
a.email,
|
301
|
-
a.name,
|
302
|
-
array_remove(
|
303
|
-
array_agg(b.assistant),
|
304
|
-
null
|
305
|
-
) assistants
|
306
|
-
from #{table_name} a
|
307
|
-
left outer join missing_assistants_removed b
|
308
|
-
using (email)
|
309
|
-
group by
|
310
|
-
a.email, a.name
|
311
|
-
)
|
312
|
-
update #{table_name}
|
313
|
-
set assistants = only_valid_assistants.assistants
|
314
|
-
from only_valid_assistants
|
315
|
-
where #{table_name}.email = only_valid_assistants.email
|
316
|
-
SQL
|
317
|
-
end
|
318
|
-
|
319
|
-
# Remove dupe email rows, keeping the last one
|
320
|
-
def self.latest_unique_sql(table_name)
|
321
|
-
<<-SQL
|
322
|
-
SELECT
|
323
|
-
DISTINCT ON (lower(email)) name,
|
324
|
-
lower(email) email,
|
325
|
-
coalesce(phone_number, '') phone_number,
|
326
|
-
array_remove(
|
327
|
-
regexp_split_to_array(
|
328
|
-
coalesce(assistants, ''),
|
329
|
-
'\s*,\s*'
|
330
|
-
)::varchar[],
|
331
|
-
''
|
332
|
-
) assistants,
|
333
|
-
extra,
|
334
|
-
ROW_NUMBER () OVER ()
|
335
|
-
FROM
|
336
|
-
#{table_name}
|
337
|
-
ORDER BY
|
338
|
-
lower(email),
|
339
|
-
row_number desc
|
340
|
-
SQL
|
341
|
-
end
|
342
|
-
|
343
|
-
def self.current_directory_projection
|
344
|
-
<<-SQL
|
345
|
-
name,
|
346
|
-
lower(email) email,
|
347
|
-
coalesce(phone_number, '') phone_number,
|
348
|
-
array_remove(
|
349
|
-
regexp_split_to_array(
|
350
|
-
coalesce(assistants, ''),
|
351
|
-
'\s*,\s*'
|
352
|
-
)::varchar[],
|
353
|
-
''
|
354
|
-
) assistants
|
355
|
-
SQL
|
356
|
-
end
|
357
|
-
|
358
|
-
def self.insert_into_operations(table_name, sql)
|
359
|
-
<<-SQL
|
360
|
-
insert into #{table_name}(
|
361
|
-
operation,
|
362
|
-
row_number,
|
363
|
-
name,
|
364
|
-
email,
|
365
|
-
phone_number,
|
366
|
-
assistants,
|
367
|
-
extra
|
368
|
-
) #{sql}
|
369
|
-
SQL
|
370
|
-
end
|
371
|
-
|
372
|
-
def self.insert_into_temp_csv_table(table_name, values)
|
373
|
-
<<-SQL
|
374
|
-
insert into #{table_name}(
|
375
|
-
name,
|
376
|
-
email,
|
377
|
-
phone_number,
|
378
|
-
assistants,
|
379
|
-
extra
|
380
|
-
) values #{values.join(", ")}
|
381
|
-
SQL
|
382
|
-
end
|
383
|
-
end
|
384
|
-
end
|
385
|
-
end
|