coopy 0.6.4.1 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +17 -0
- data/.rspec +2 -0
- data/CHANGELOG.md +7 -0
- data/Gemfile +7 -0
- data/LICENSE.md +22 -0
- data/README.md +59 -0
- data/Rakefile +4 -6
- data/coopy.gemspec +26 -0
- data/lib/coopy.rb +32 -175
- data/lib/coopy/alignment.rb +260 -0
- data/lib/coopy/bag.rb +17 -0
- data/lib/coopy/cell_info.rb +24 -0
- data/lib/coopy/change_type.rb +10 -0
- data/lib/coopy/compare_flags.rb +62 -0
- data/lib/coopy/compare_table.rb +327 -0
- data/lib/coopy/coopy.rb +22 -0
- data/lib/coopy/cross_match.rb +10 -0
- data/lib/coopy/csv_table.rb +51 -0
- data/lib/coopy/diff_render.rb +307 -0
- data/lib/coopy/index.rb +73 -0
- data/lib/coopy/index_item.rb +17 -0
- data/lib/coopy/index_pair.rb +72 -0
- data/lib/coopy/mover.rb +123 -0
- data/lib/coopy/ordering.rb +27 -0
- data/lib/coopy/row.rb +9 -0
- data/lib/coopy/simple_cell.rb +15 -0
- data/lib/coopy/simple_table.rb +144 -0
- data/lib/coopy/simple_view.rb +36 -0
- data/lib/coopy/table.rb +44 -0
- data/lib/coopy/table_comparison_state.rb +33 -0
- data/lib/coopy/table_diff.rb +634 -0
- data/lib/coopy/table_text.rb +14 -0
- data/lib/coopy/table_view.rb +31 -0
- data/lib/coopy/unit.rb +53 -0
- data/lib/coopy/version.rb +3 -0
- data/lib/coopy/view.rb +34 -0
- data/spec/fixtures/bridges.html +10 -0
- data/spec/fixtures/bridges_diff.csv +8 -0
- data/spec/fixtures/bridges_new.csv +9 -0
- data/spec/fixtures/bridges_old.csv +9 -0
- data/spec/fixtures/planetary_bodies.html +22 -0
- data/spec/fixtures/planetary_bodies_diff.csv +19 -0
- data/spec/fixtures/planetary_bodies_new.csv +20 -0
- data/spec/fixtures/planetary_bodies_old.csv +19 -0
- data/spec/fixtures/quote_me.csv +10 -0
- data/spec/fixtures/quote_me2.csv +11 -0
- data/spec/integration/table_diff_spec.rb +57 -0
- data/spec/libs/compare_flags_spec.rb +40 -0
- data/spec/libs/coopy_spec.rb +14 -0
- data/spec/libs/ordering_spec.rb +28 -0
- data/spec/libs/unit_spec.rb +31 -0
- data/spec/spec_helper.rb +29 -0
- metadata +153 -46
- data/bin/sqlite_diff +0 -4
- data/bin/sqlite_patch +0 -4
- data/bin/sqlite_rediff +0 -4
- data/lib/coopy/dbi_sql_wrapper.rb +0 -89
- data/lib/coopy/diff_apply_sql.rb +0 -35
- data/lib/coopy/diff_columns.rb +0 -33
- data/lib/coopy/diff_output.rb +0 -21
- data/lib/coopy/diff_output_action.rb +0 -34
- data/lib/coopy/diff_output_group.rb +0 -40
- data/lib/coopy/diff_output_raw.rb +0 -17
- data/lib/coopy/diff_output_stats.rb +0 -45
- data/lib/coopy/diff_output_table.rb +0 -49
- data/lib/coopy/diff_output_tdiff.rb +0 -48
- data/lib/coopy/diff_parser.rb +0 -92
- data/lib/coopy/diff_render_csv.rb +0 -29
- data/lib/coopy/diff_render_html.rb +0 -74
- data/lib/coopy/diff_render_log.rb +0 -52
- data/lib/coopy/row_change.rb +0 -25
- data/lib/coopy/scraperwiki_sql_wrapper.rb +0 -8
- data/lib/coopy/scraperwiki_utils.rb +0 -23
- data/lib/coopy/sequel_sql_wrapper.rb +0 -73
- data/lib/coopy/sql_compare.rb +0 -222
- data/lib/coopy/sql_wrapper.rb +0 -34
- data/lib/coopy/sqlite_sql_wrapper.rb +0 -143
- data/test/test_coopy.rb +0 -126
data/lib/coopy/bag.rb
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
module Coopy
|
2
|
+
class CellInfo
|
3
|
+
|
4
|
+
attr_accessor :value
|
5
|
+
attr_accessor :pretty_value
|
6
|
+
attr_accessor :category
|
7
|
+
attr_accessor :category_given_tr
|
8
|
+
|
9
|
+
# relevant to updates, conflicts
|
10
|
+
attr_accessor :separator
|
11
|
+
attr_accessor :updated
|
12
|
+
attr_accessor :conflicted
|
13
|
+
attr_accessor :pvalue
|
14
|
+
attr_accessor :lvalue
|
15
|
+
attr_accessor :rvalue
|
16
|
+
|
17
|
+
def to_s
|
18
|
+
return value if (!updated)
|
19
|
+
return lvalue + "::" + rvalue if (!conflicted)
|
20
|
+
return pvalue + "||" + lvalue + "::" + rvalue;
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
module Coopy
|
2
|
+
class CompareFlags
|
3
|
+
|
4
|
+
# Should we treat the data as ordered?
|
5
|
+
attr_accessor :ordered # boolean
|
6
|
+
|
7
|
+
# Should we show unchanged rows in diffs?
|
8
|
+
attr_accessor :show_unchanged # boolean
|
9
|
+
|
10
|
+
# What is the minimum number of rows around a changed row we should show?
|
11
|
+
attr_accessor :unchanged_context # integer
|
12
|
+
|
13
|
+
# Should we always decorate the diff with numerical indexes showing order?
|
14
|
+
attr_accessor :always_show_order # boolean
|
15
|
+
|
16
|
+
# Should we never decorate the diff with numerical indexes?
|
17
|
+
attr_accessor :never_show_order # boolean
|
18
|
+
|
19
|
+
# Should we show unchanged columns in diffs?
|
20
|
+
# (note that index/key columns needed to identify rows will be shown
|
21
|
+
# even if we turn this flag off)
|
22
|
+
attr_accessor :show_unchanged_columns # boolean
|
23
|
+
|
24
|
+
# What is the minimum number of columns around a changed
|
25
|
+
# column that we should show?
|
26
|
+
attr_accessor :unchanged_column_context # integer
|
27
|
+
|
28
|
+
# Should we always give a table header in diffs?
|
29
|
+
attr_accessor :always_show_header # boolean
|
30
|
+
|
31
|
+
# Optional filters for actions, set any of:
|
32
|
+
# "update", "insert", "delete"
|
33
|
+
# to true to accept just those actions.
|
34
|
+
attr_accessor :acts # Hash<String, Bool>
|
35
|
+
|
36
|
+
def initialize()
|
37
|
+
@ordered = true;
|
38
|
+
@show_unchanged = false;
|
39
|
+
@unchanged_context = 1;
|
40
|
+
@always_show_order = false;
|
41
|
+
@never_show_order = true;
|
42
|
+
@show_unchanged_columns = false;
|
43
|
+
@unchanged_column_context = 1;
|
44
|
+
@always_show_header = true;
|
45
|
+
@acts = nil;
|
46
|
+
end
|
47
|
+
|
48
|
+
def allow_update
|
49
|
+
acts.nil? || acts.has_key?("update")
|
50
|
+
end
|
51
|
+
|
52
|
+
def allow_insert
|
53
|
+
acts.nil? || acts.has_key?("insert")
|
54
|
+
end
|
55
|
+
|
56
|
+
def allow_delete
|
57
|
+
acts.nil? || acts.has_key?("delete")
|
58
|
+
end
|
59
|
+
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
@@ -0,0 +1,327 @@
|
|
1
|
+
module Coopy
|
2
|
+
class CompareTable
|
3
|
+
|
4
|
+
def attach(comp)
|
5
|
+
@comp = comp # TableComparisonState
|
6
|
+
more = compare_core
|
7
|
+
while (more && @comp.run_to_completion) do
|
8
|
+
more = compare_core
|
9
|
+
end
|
10
|
+
!more
|
11
|
+
end
|
12
|
+
|
13
|
+
def align
|
14
|
+
alignment = Coopy::Alignment.new
|
15
|
+
align_core(alignment)
|
16
|
+
alignment
|
17
|
+
end
|
18
|
+
|
19
|
+
def get_comparison_state
|
20
|
+
@comp
|
21
|
+
end
|
22
|
+
|
23
|
+
def align_core(align)
|
24
|
+
if (@comp.p.nil?)
|
25
|
+
align_core_2(align,@comp.a,@comp.b)
|
26
|
+
return
|
27
|
+
end
|
28
|
+
align.reference = Coopy::Alignment.new
|
29
|
+
align_core_2(align,@comp.p,@comp.b)
|
30
|
+
align_core_2(align.reference,@comp.p,@comp.a)
|
31
|
+
align.meta.reference = align.reference.meta
|
32
|
+
end
|
33
|
+
|
34
|
+
|
35
|
+
def align_core_2(align, a, b)
|
36
|
+
if (align.meta.nil?)
|
37
|
+
align.meta = Coopy::Alignment.new
|
38
|
+
end
|
39
|
+
align_columns(align.meta,a,b)
|
40
|
+
column_order = align.meta.to_order
|
41
|
+
common_units = []
|
42
|
+
column_order.get_list.each do |unit|
|
43
|
+
if (unit.l>=0 && unit.r>=0 && unit.p!=-1)
|
44
|
+
common_units << unit
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
align.range(a.height,b.height)
|
49
|
+
align.tables(a,b)
|
50
|
+
align.set_rowlike(true)
|
51
|
+
|
52
|
+
w = a.width
|
53
|
+
ha = a.height
|
54
|
+
hb = b.height
|
55
|
+
|
56
|
+
av = a.get_cell_view
|
57
|
+
|
58
|
+
# If we have more columns than we have time to process their
|
59
|
+
# combinations, we need to haul out some heuristics.
|
60
|
+
|
61
|
+
n = 5
|
62
|
+
columns = []
|
63
|
+
if (common_units.length>n)
|
64
|
+
columns_eval = []
|
65
|
+
(0..common_units.length-1).each do |i|
|
66
|
+
ct = 0
|
67
|
+
mem = {}
|
68
|
+
mem2 = {}
|
69
|
+
ca = common_units[i].l
|
70
|
+
cb = common_units[i].r
|
71
|
+
(0..ha-1).each do |j|
|
72
|
+
key = av.to_s(a.get_cell(ca,j))
|
73
|
+
if (!mem.has_key?(key))
|
74
|
+
mem[key] = 1
|
75
|
+
ct+=1
|
76
|
+
end
|
77
|
+
end
|
78
|
+
(0..hb-1).each do |j|
|
79
|
+
key = av.to_s(b.get_cell(cb,j))
|
80
|
+
if (!mem2.has_key?(key))
|
81
|
+
mem2[key] = 1
|
82
|
+
ct+=1
|
83
|
+
end
|
84
|
+
end
|
85
|
+
columns_eval << [i,ct]
|
86
|
+
end
|
87
|
+
columns_eval.sort { |a,b| a[1] <=> b[1] }
|
88
|
+
columns = columns_eval.map{ |v| v[0] }
|
89
|
+
columns = columns.slice(0,n)
|
90
|
+
else
|
91
|
+
(0..common_units.length-1).each do |i|
|
92
|
+
columns << i
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
top = (2 ** columns.length).round
|
97
|
+
|
98
|
+
pending = {}
|
99
|
+
(0...ha).each do |j|
|
100
|
+
pending[j] = j
|
101
|
+
end
|
102
|
+
pending_ct = ha
|
103
|
+
|
104
|
+
(0...top).each do |k|
|
105
|
+
next if (k==0)
|
106
|
+
break if (pending_ct == 0)
|
107
|
+
active_columns = []
|
108
|
+
kk = k
|
109
|
+
at = 0
|
110
|
+
while (kk>0)
|
111
|
+
if (kk%2==1)
|
112
|
+
active_columns << columns[at]
|
113
|
+
end
|
114
|
+
kk >>= 1
|
115
|
+
at+=1
|
116
|
+
end
|
117
|
+
|
118
|
+
index = IndexPair.new
|
119
|
+
(0...active_columns.length).each do |k|
|
120
|
+
unit = common_units[active_columns[k]]
|
121
|
+
index.add_columns(unit.l,unit.r)
|
122
|
+
align.add_index_columns(unit)
|
123
|
+
end
|
124
|
+
index.index_tables(a,b)
|
125
|
+
|
126
|
+
h = a.height
|
127
|
+
h = b.height if (b.height>h)
|
128
|
+
h = 1 if (h<1)
|
129
|
+
wide_top_freq = index.get_top_freq
|
130
|
+
ratio = wide_top_freq
|
131
|
+
ratio /= (h+20) # "20" allows for low-data
|
132
|
+
next if (ratio>=0.1) # lousy no-good index, move on
|
133
|
+
|
134
|
+
if @indexes
|
135
|
+
@indexes << index
|
136
|
+
end
|
137
|
+
|
138
|
+
fixed = []
|
139
|
+
pending.keys.each do |j|
|
140
|
+
cross = index.query_local(j)
|
141
|
+
spot_a = cross.spot_a
|
142
|
+
spot_b = cross.spot_b
|
143
|
+
next if (spot_a!=1 || spot_b!=1)
|
144
|
+
fixed << j
|
145
|
+
align.link(j,cross.item_b.lst[0])
|
146
|
+
end
|
147
|
+
(0...fixed.length).each do |j|
|
148
|
+
pending.delete(fixed[j])
|
149
|
+
pending_ct-=1
|
150
|
+
end
|
151
|
+
end
|
152
|
+
# we expect headers on row 0 - link them even if quite different.
|
153
|
+
align.link(0,0)
|
154
|
+
end
|
155
|
+
|
156
|
+
def align_columns(align, a, b)
|
157
|
+
align.range(a.width,b.width)
|
158
|
+
align.tables(a,b)
|
159
|
+
align.set_rowlike(false)
|
160
|
+
|
161
|
+
slop = 5
|
162
|
+
|
163
|
+
va = a.get_cell_view
|
164
|
+
vb = b.get_cell_view
|
165
|
+
ra_best = 0
|
166
|
+
rb_best = 0
|
167
|
+
ct_best = -1
|
168
|
+
ma_best = nil
|
169
|
+
mb_best = nil
|
170
|
+
ra_header = 0
|
171
|
+
rb_header = 0
|
172
|
+
ra_uniques = 0
|
173
|
+
rb_uniques = 0
|
174
|
+
(0..slop-1).each do |ra|
|
175
|
+
break if (ra>=a.height)
|
176
|
+
(0..slop-1).each do |rb|
|
177
|
+
break if (rb>=b.height)
|
178
|
+
ma = {}
|
179
|
+
mb = {}
|
180
|
+
ct = 0
|
181
|
+
uniques = 0
|
182
|
+
(0..a.width-1).each do |ca|
|
183
|
+
key = va.to_s(a.get_cell(ca,ra))
|
184
|
+
if (ma.has_key?(key))
|
185
|
+
ma[key] = -1
|
186
|
+
uniques-=1
|
187
|
+
else
|
188
|
+
ma[key] = ca
|
189
|
+
uniques+=1
|
190
|
+
end
|
191
|
+
end
|
192
|
+
if (uniques>ra_uniques)
|
193
|
+
ra_header = ra
|
194
|
+
ra_uniques = uniques
|
195
|
+
end
|
196
|
+
uniques = 0
|
197
|
+
(0..b.width-1).each do |cb|
|
198
|
+
key = vb.to_s(b.get_cell(cb,rb))
|
199
|
+
if (mb.has_key?(key))
|
200
|
+
mb[key] = -1
|
201
|
+
uniques-=1
|
202
|
+
else
|
203
|
+
mb[key] = cb
|
204
|
+
uniques+=1
|
205
|
+
end
|
206
|
+
end
|
207
|
+
if (uniques>rb_uniques)
|
208
|
+
rb_header = rb
|
209
|
+
rb_uniques = uniques
|
210
|
+
end
|
211
|
+
|
212
|
+
ma.keys.each do |key|
|
213
|
+
i0 = ma[key]
|
214
|
+
i1 = mb[key]
|
215
|
+
if (i1 && i1>=0 && i0>=0)
|
216
|
+
ct+=1
|
217
|
+
end
|
218
|
+
end
|
219
|
+
|
220
|
+
if (ct>ct_best)
|
221
|
+
ct_best = ct
|
222
|
+
ma_best = ma
|
223
|
+
mb_best = mb
|
224
|
+
ra_best = ra
|
225
|
+
rb_best = rb
|
226
|
+
end
|
227
|
+
end
|
228
|
+
end
|
229
|
+
|
230
|
+
return if (ma_best.nil?)
|
231
|
+
ma_best.keys.each do |key|
|
232
|
+
i0 = ma_best[key]
|
233
|
+
i1 = mb_best[key]
|
234
|
+
if (i1>=0 && i0>=0)
|
235
|
+
align.link(i0,i1)
|
236
|
+
end
|
237
|
+
end
|
238
|
+
align.headers(ra_header,rb_header)
|
239
|
+
end
|
240
|
+
|
241
|
+
def test_has_same_columns
|
242
|
+
p = @comp.p
|
243
|
+
a = @comp.a
|
244
|
+
b = @comp.b
|
245
|
+
eq = has_same_columns_2(a,b)
|
246
|
+
if (eq && p)
|
247
|
+
eq = has_same_columns_2(p,a)
|
248
|
+
end
|
249
|
+
@comp.has_same_columns = eq
|
250
|
+
@comp.has_same_columns_known = true
|
251
|
+
return true
|
252
|
+
end
|
253
|
+
|
254
|
+
def has_same_columns_2(a, b)
|
255
|
+
if (a.width!=b.width)
|
256
|
+
return false
|
257
|
+
end
|
258
|
+
if (a.height==0 || b.height==0)
|
259
|
+
return true
|
260
|
+
end
|
261
|
+
|
262
|
+
# check for a blatant header - should only do this
|
263
|
+
# for meta-data free tables, that may have embedded headers
|
264
|
+
av = a.get_cell_view
|
265
|
+
(0..a.width-1).each do |i|
|
266
|
+
((i+1)..a.width-1).each do |j|
|
267
|
+
if (av.equals(a.get_cell(i,0),a.get_cell(j,0)))
|
268
|
+
return false
|
269
|
+
end
|
270
|
+
end
|
271
|
+
if (!av.equals(a.get_cell(i,0),b.get_cell(i,0)))
|
272
|
+
return false
|
273
|
+
end
|
274
|
+
end
|
275
|
+
|
276
|
+
return true
|
277
|
+
end
|
278
|
+
|
279
|
+
def test_is_equal
|
280
|
+
p = @comp.p
|
281
|
+
a = @comp.a
|
282
|
+
b = @comp.b
|
283
|
+
eq = is_equal_2(a,b)
|
284
|
+
if (eq && p)
|
285
|
+
eq = is_equal_2(p,a)
|
286
|
+
end
|
287
|
+
@comp.is_equal = eq
|
288
|
+
@comp.is_equal_known = true
|
289
|
+
true
|
290
|
+
end
|
291
|
+
|
292
|
+
def is_equal_2(a, b)
|
293
|
+
if (a.width!=b.width || a.height!=b.height)
|
294
|
+
return false
|
295
|
+
end
|
296
|
+
av = a.get_cell_view
|
297
|
+
(0..a.height-1).each do |i|
|
298
|
+
(0..a.width-1).each do |j|
|
299
|
+
if (!av.equals(a.get_cell(j,i),b.get_cell(j,i)))
|
300
|
+
return false
|
301
|
+
end
|
302
|
+
end
|
303
|
+
end
|
304
|
+
return true
|
305
|
+
end
|
306
|
+
|
307
|
+
def compare_core
|
308
|
+
return false if (@comp.completed)
|
309
|
+
if (!@comp.is_equal_known)
|
310
|
+
return test_is_equal
|
311
|
+
end
|
312
|
+
if (!@comp.has_same_columns_known)
|
313
|
+
return test_has_same_columns
|
314
|
+
end
|
315
|
+
@comp.completed = true
|
316
|
+
false
|
317
|
+
end
|
318
|
+
|
319
|
+
def store_indexes
|
320
|
+
@indexes = []
|
321
|
+
end
|
322
|
+
|
323
|
+
def get_indexes
|
324
|
+
@indexes
|
325
|
+
end
|
326
|
+
end
|
327
|
+
end
|
data/lib/coopy/coopy.rb
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
module Coopy
|
2
|
+
|
3
|
+
def self.compare_tables(local, remote)
|
4
|
+
ct = Coopy::CompareTable.new
|
5
|
+
comp = Coopy::TableComparisonState.new
|
6
|
+
comp.a = local
|
7
|
+
comp.b = remote
|
8
|
+
ct.attach comp
|
9
|
+
ct
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.compare_tables_3(parent, local, remote)
|
13
|
+
ct = Coopy::CompareTable.new
|
14
|
+
comp = Coopy::TableComparisonState.new
|
15
|
+
comp.p = parent
|
16
|
+
comp.a = local
|
17
|
+
comp.b = remote
|
18
|
+
ct.attach comp
|
19
|
+
ct
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|