coopy 0.6.4.1 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +17 -0
- data/.rspec +2 -0
- data/CHANGELOG.md +7 -0
- data/Gemfile +7 -0
- data/LICENSE.md +22 -0
- data/README.md +59 -0
- data/Rakefile +4 -6
- data/coopy.gemspec +26 -0
- data/lib/coopy.rb +32 -175
- data/lib/coopy/alignment.rb +260 -0
- data/lib/coopy/bag.rb +17 -0
- data/lib/coopy/cell_info.rb +24 -0
- data/lib/coopy/change_type.rb +10 -0
- data/lib/coopy/compare_flags.rb +62 -0
- data/lib/coopy/compare_table.rb +327 -0
- data/lib/coopy/coopy.rb +22 -0
- data/lib/coopy/cross_match.rb +10 -0
- data/lib/coopy/csv_table.rb +51 -0
- data/lib/coopy/diff_render.rb +307 -0
- data/lib/coopy/index.rb +73 -0
- data/lib/coopy/index_item.rb +17 -0
- data/lib/coopy/index_pair.rb +72 -0
- data/lib/coopy/mover.rb +123 -0
- data/lib/coopy/ordering.rb +27 -0
- data/lib/coopy/row.rb +9 -0
- data/lib/coopy/simple_cell.rb +15 -0
- data/lib/coopy/simple_table.rb +144 -0
- data/lib/coopy/simple_view.rb +36 -0
- data/lib/coopy/table.rb +44 -0
- data/lib/coopy/table_comparison_state.rb +33 -0
- data/lib/coopy/table_diff.rb +634 -0
- data/lib/coopy/table_text.rb +14 -0
- data/lib/coopy/table_view.rb +31 -0
- data/lib/coopy/unit.rb +53 -0
- data/lib/coopy/version.rb +3 -0
- data/lib/coopy/view.rb +34 -0
- data/spec/fixtures/bridges.html +10 -0
- data/spec/fixtures/bridges_diff.csv +8 -0
- data/spec/fixtures/bridges_new.csv +9 -0
- data/spec/fixtures/bridges_old.csv +9 -0
- data/spec/fixtures/planetary_bodies.html +22 -0
- data/spec/fixtures/planetary_bodies_diff.csv +19 -0
- data/spec/fixtures/planetary_bodies_new.csv +20 -0
- data/spec/fixtures/planetary_bodies_old.csv +19 -0
- data/spec/fixtures/quote_me.csv +10 -0
- data/spec/fixtures/quote_me2.csv +11 -0
- data/spec/integration/table_diff_spec.rb +57 -0
- data/spec/libs/compare_flags_spec.rb +40 -0
- data/spec/libs/coopy_spec.rb +14 -0
- data/spec/libs/ordering_spec.rb +28 -0
- data/spec/libs/unit_spec.rb +31 -0
- data/spec/spec_helper.rb +29 -0
- metadata +153 -46
- data/bin/sqlite_diff +0 -4
- data/bin/sqlite_patch +0 -4
- data/bin/sqlite_rediff +0 -4
- data/lib/coopy/dbi_sql_wrapper.rb +0 -89
- data/lib/coopy/diff_apply_sql.rb +0 -35
- data/lib/coopy/diff_columns.rb +0 -33
- data/lib/coopy/diff_output.rb +0 -21
- data/lib/coopy/diff_output_action.rb +0 -34
- data/lib/coopy/diff_output_group.rb +0 -40
- data/lib/coopy/diff_output_raw.rb +0 -17
- data/lib/coopy/diff_output_stats.rb +0 -45
- data/lib/coopy/diff_output_table.rb +0 -49
- data/lib/coopy/diff_output_tdiff.rb +0 -48
- data/lib/coopy/diff_parser.rb +0 -92
- data/lib/coopy/diff_render_csv.rb +0 -29
- data/lib/coopy/diff_render_html.rb +0 -74
- data/lib/coopy/diff_render_log.rb +0 -52
- data/lib/coopy/row_change.rb +0 -25
- data/lib/coopy/scraperwiki_sql_wrapper.rb +0 -8
- data/lib/coopy/scraperwiki_utils.rb +0 -23
- data/lib/coopy/sequel_sql_wrapper.rb +0 -73
- data/lib/coopy/sql_compare.rb +0 -222
- data/lib/coopy/sql_wrapper.rb +0 -34
- data/lib/coopy/sqlite_sql_wrapper.rb +0 -143
- data/test/test_coopy.rb +0 -126
data/lib/coopy/bag.rb
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
module Coopy
|
2
|
+
class CellInfo
|
3
|
+
|
4
|
+
attr_accessor :value
|
5
|
+
attr_accessor :pretty_value
|
6
|
+
attr_accessor :category
|
7
|
+
attr_accessor :category_given_tr
|
8
|
+
|
9
|
+
# relevant to updates, conflicts
|
10
|
+
attr_accessor :separator
|
11
|
+
attr_accessor :updated
|
12
|
+
attr_accessor :conflicted
|
13
|
+
attr_accessor :pvalue
|
14
|
+
attr_accessor :lvalue
|
15
|
+
attr_accessor :rvalue
|
16
|
+
|
17
|
+
def to_s
|
18
|
+
return value if (!updated)
|
19
|
+
return lvalue + "::" + rvalue if (!conflicted)
|
20
|
+
return pvalue + "||" + lvalue + "::" + rvalue;
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
module Coopy
|
2
|
+
class CompareFlags
|
3
|
+
|
4
|
+
# Should we treat the data as ordered?
|
5
|
+
attr_accessor :ordered # boolean
|
6
|
+
|
7
|
+
# Should we show unchanged rows in diffs?
|
8
|
+
attr_accessor :show_unchanged # boolean
|
9
|
+
|
10
|
+
# What is the minimum number of rows around a changed row we should show?
|
11
|
+
attr_accessor :unchanged_context # integer
|
12
|
+
|
13
|
+
# Should we always decorate the diff with numerical indexes showing order?
|
14
|
+
attr_accessor :always_show_order # boolean
|
15
|
+
|
16
|
+
# Should we never decorate the diff with numerical indexes?
|
17
|
+
attr_accessor :never_show_order # boolean
|
18
|
+
|
19
|
+
# Should we show unchanged columns in diffs?
|
20
|
+
# (note that index/key columns needed to identify rows will be shown
|
21
|
+
# even if we turn this flag off)
|
22
|
+
attr_accessor :show_unchanged_columns # boolean
|
23
|
+
|
24
|
+
# What is the minimum number of columns around a changed
|
25
|
+
# column that we should show?
|
26
|
+
attr_accessor :unchanged_column_context # integer
|
27
|
+
|
28
|
+
# Should we always give a table header in diffs?
|
29
|
+
attr_accessor :always_show_header # boolean
|
30
|
+
|
31
|
+
# Optional filters for actions, set any of:
|
32
|
+
# "update", "insert", "delete"
|
33
|
+
# to true to accept just those actions.
|
34
|
+
attr_accessor :acts # Hash<String, Bool>
|
35
|
+
|
36
|
+
def initialize()
|
37
|
+
@ordered = true;
|
38
|
+
@show_unchanged = false;
|
39
|
+
@unchanged_context = 1;
|
40
|
+
@always_show_order = false;
|
41
|
+
@never_show_order = true;
|
42
|
+
@show_unchanged_columns = false;
|
43
|
+
@unchanged_column_context = 1;
|
44
|
+
@always_show_header = true;
|
45
|
+
@acts = nil;
|
46
|
+
end
|
47
|
+
|
48
|
+
def allow_update
|
49
|
+
acts.nil? || acts.has_key?("update")
|
50
|
+
end
|
51
|
+
|
52
|
+
def allow_insert
|
53
|
+
acts.nil? || acts.has_key?("insert")
|
54
|
+
end
|
55
|
+
|
56
|
+
def allow_delete
|
57
|
+
acts.nil? || acts.has_key?("delete")
|
58
|
+
end
|
59
|
+
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
@@ -0,0 +1,327 @@
|
|
1
|
+
module Coopy
|
2
|
+
class CompareTable
|
3
|
+
|
4
|
+
def attach(comp)
|
5
|
+
@comp = comp # TableComparisonState
|
6
|
+
more = compare_core
|
7
|
+
while (more && @comp.run_to_completion) do
|
8
|
+
more = compare_core
|
9
|
+
end
|
10
|
+
!more
|
11
|
+
end
|
12
|
+
|
13
|
+
def align
|
14
|
+
alignment = Coopy::Alignment.new
|
15
|
+
align_core(alignment)
|
16
|
+
alignment
|
17
|
+
end
|
18
|
+
|
19
|
+
def get_comparison_state
|
20
|
+
@comp
|
21
|
+
end
|
22
|
+
|
23
|
+
def align_core(align)
|
24
|
+
if (@comp.p.nil?)
|
25
|
+
align_core_2(align,@comp.a,@comp.b)
|
26
|
+
return
|
27
|
+
end
|
28
|
+
align.reference = Coopy::Alignment.new
|
29
|
+
align_core_2(align,@comp.p,@comp.b)
|
30
|
+
align_core_2(align.reference,@comp.p,@comp.a)
|
31
|
+
align.meta.reference = align.reference.meta
|
32
|
+
end
|
33
|
+
|
34
|
+
|
35
|
+
def align_core_2(align, a, b)
|
36
|
+
if (align.meta.nil?)
|
37
|
+
align.meta = Coopy::Alignment.new
|
38
|
+
end
|
39
|
+
align_columns(align.meta,a,b)
|
40
|
+
column_order = align.meta.to_order
|
41
|
+
common_units = []
|
42
|
+
column_order.get_list.each do |unit|
|
43
|
+
if (unit.l>=0 && unit.r>=0 && unit.p!=-1)
|
44
|
+
common_units << unit
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
align.range(a.height,b.height)
|
49
|
+
align.tables(a,b)
|
50
|
+
align.set_rowlike(true)
|
51
|
+
|
52
|
+
w = a.width
|
53
|
+
ha = a.height
|
54
|
+
hb = b.height
|
55
|
+
|
56
|
+
av = a.get_cell_view
|
57
|
+
|
58
|
+
# If we have more columns than we have time to process their
|
59
|
+
# combinations, we need to haul out some heuristics.
|
60
|
+
|
61
|
+
n = 5
|
62
|
+
columns = []
|
63
|
+
if (common_units.length>n)
|
64
|
+
columns_eval = []
|
65
|
+
(0..common_units.length-1).each do |i|
|
66
|
+
ct = 0
|
67
|
+
mem = {}
|
68
|
+
mem2 = {}
|
69
|
+
ca = common_units[i].l
|
70
|
+
cb = common_units[i].r
|
71
|
+
(0..ha-1).each do |j|
|
72
|
+
key = av.to_s(a.get_cell(ca,j))
|
73
|
+
if (!mem.has_key?(key))
|
74
|
+
mem[key] = 1
|
75
|
+
ct+=1
|
76
|
+
end
|
77
|
+
end
|
78
|
+
(0..hb-1).each do |j|
|
79
|
+
key = av.to_s(b.get_cell(cb,j))
|
80
|
+
if (!mem2.has_key?(key))
|
81
|
+
mem2[key] = 1
|
82
|
+
ct+=1
|
83
|
+
end
|
84
|
+
end
|
85
|
+
columns_eval << [i,ct]
|
86
|
+
end
|
87
|
+
columns_eval.sort { |a,b| a[1] <=> b[1] }
|
88
|
+
columns = columns_eval.map{ |v| v[0] }
|
89
|
+
columns = columns.slice(0,n)
|
90
|
+
else
|
91
|
+
(0..common_units.length-1).each do |i|
|
92
|
+
columns << i
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
top = (2 ** columns.length).round
|
97
|
+
|
98
|
+
pending = {}
|
99
|
+
(0...ha).each do |j|
|
100
|
+
pending[j] = j
|
101
|
+
end
|
102
|
+
pending_ct = ha
|
103
|
+
|
104
|
+
(0...top).each do |k|
|
105
|
+
next if (k==0)
|
106
|
+
break if (pending_ct == 0)
|
107
|
+
active_columns = []
|
108
|
+
kk = k
|
109
|
+
at = 0
|
110
|
+
while (kk>0)
|
111
|
+
if (kk%2==1)
|
112
|
+
active_columns << columns[at]
|
113
|
+
end
|
114
|
+
kk >>= 1
|
115
|
+
at+=1
|
116
|
+
end
|
117
|
+
|
118
|
+
index = IndexPair.new
|
119
|
+
(0...active_columns.length).each do |k|
|
120
|
+
unit = common_units[active_columns[k]]
|
121
|
+
index.add_columns(unit.l,unit.r)
|
122
|
+
align.add_index_columns(unit)
|
123
|
+
end
|
124
|
+
index.index_tables(a,b)
|
125
|
+
|
126
|
+
h = a.height
|
127
|
+
h = b.height if (b.height>h)
|
128
|
+
h = 1 if (h<1)
|
129
|
+
wide_top_freq = index.get_top_freq
|
130
|
+
ratio = wide_top_freq
|
131
|
+
ratio /= (h+20) # "20" allows for low-data
|
132
|
+
next if (ratio>=0.1) # lousy no-good index, move on
|
133
|
+
|
134
|
+
if @indexes
|
135
|
+
@indexes << index
|
136
|
+
end
|
137
|
+
|
138
|
+
fixed = []
|
139
|
+
pending.keys.each do |j|
|
140
|
+
cross = index.query_local(j)
|
141
|
+
spot_a = cross.spot_a
|
142
|
+
spot_b = cross.spot_b
|
143
|
+
next if (spot_a!=1 || spot_b!=1)
|
144
|
+
fixed << j
|
145
|
+
align.link(j,cross.item_b.lst[0])
|
146
|
+
end
|
147
|
+
(0...fixed.length).each do |j|
|
148
|
+
pending.delete(fixed[j])
|
149
|
+
pending_ct-=1
|
150
|
+
end
|
151
|
+
end
|
152
|
+
# we expect headers on row 0 - link them even if quite different.
|
153
|
+
align.link(0,0)
|
154
|
+
end
|
155
|
+
|
156
|
+
def align_columns(align, a, b)
|
157
|
+
align.range(a.width,b.width)
|
158
|
+
align.tables(a,b)
|
159
|
+
align.set_rowlike(false)
|
160
|
+
|
161
|
+
slop = 5
|
162
|
+
|
163
|
+
va = a.get_cell_view
|
164
|
+
vb = b.get_cell_view
|
165
|
+
ra_best = 0
|
166
|
+
rb_best = 0
|
167
|
+
ct_best = -1
|
168
|
+
ma_best = nil
|
169
|
+
mb_best = nil
|
170
|
+
ra_header = 0
|
171
|
+
rb_header = 0
|
172
|
+
ra_uniques = 0
|
173
|
+
rb_uniques = 0
|
174
|
+
(0..slop-1).each do |ra|
|
175
|
+
break if (ra>=a.height)
|
176
|
+
(0..slop-1).each do |rb|
|
177
|
+
break if (rb>=b.height)
|
178
|
+
ma = {}
|
179
|
+
mb = {}
|
180
|
+
ct = 0
|
181
|
+
uniques = 0
|
182
|
+
(0..a.width-1).each do |ca|
|
183
|
+
key = va.to_s(a.get_cell(ca,ra))
|
184
|
+
if (ma.has_key?(key))
|
185
|
+
ma[key] = -1
|
186
|
+
uniques-=1
|
187
|
+
else
|
188
|
+
ma[key] = ca
|
189
|
+
uniques+=1
|
190
|
+
end
|
191
|
+
end
|
192
|
+
if (uniques>ra_uniques)
|
193
|
+
ra_header = ra
|
194
|
+
ra_uniques = uniques
|
195
|
+
end
|
196
|
+
uniques = 0
|
197
|
+
(0..b.width-1).each do |cb|
|
198
|
+
key = vb.to_s(b.get_cell(cb,rb))
|
199
|
+
if (mb.has_key?(key))
|
200
|
+
mb[key] = -1
|
201
|
+
uniques-=1
|
202
|
+
else
|
203
|
+
mb[key] = cb
|
204
|
+
uniques+=1
|
205
|
+
end
|
206
|
+
end
|
207
|
+
if (uniques>rb_uniques)
|
208
|
+
rb_header = rb
|
209
|
+
rb_uniques = uniques
|
210
|
+
end
|
211
|
+
|
212
|
+
ma.keys.each do |key|
|
213
|
+
i0 = ma[key]
|
214
|
+
i1 = mb[key]
|
215
|
+
if (i1 && i1>=0 && i0>=0)
|
216
|
+
ct+=1
|
217
|
+
end
|
218
|
+
end
|
219
|
+
|
220
|
+
if (ct>ct_best)
|
221
|
+
ct_best = ct
|
222
|
+
ma_best = ma
|
223
|
+
mb_best = mb
|
224
|
+
ra_best = ra
|
225
|
+
rb_best = rb
|
226
|
+
end
|
227
|
+
end
|
228
|
+
end
|
229
|
+
|
230
|
+
return if (ma_best.nil?)
|
231
|
+
ma_best.keys.each do |key|
|
232
|
+
i0 = ma_best[key]
|
233
|
+
i1 = mb_best[key]
|
234
|
+
if (i1>=0 && i0>=0)
|
235
|
+
align.link(i0,i1)
|
236
|
+
end
|
237
|
+
end
|
238
|
+
align.headers(ra_header,rb_header)
|
239
|
+
end
|
240
|
+
|
241
|
+
def test_has_same_columns
|
242
|
+
p = @comp.p
|
243
|
+
a = @comp.a
|
244
|
+
b = @comp.b
|
245
|
+
eq = has_same_columns_2(a,b)
|
246
|
+
if (eq && p)
|
247
|
+
eq = has_same_columns_2(p,a)
|
248
|
+
end
|
249
|
+
@comp.has_same_columns = eq
|
250
|
+
@comp.has_same_columns_known = true
|
251
|
+
return true
|
252
|
+
end
|
253
|
+
|
254
|
+
def has_same_columns_2(a, b)
|
255
|
+
if (a.width!=b.width)
|
256
|
+
return false
|
257
|
+
end
|
258
|
+
if (a.height==0 || b.height==0)
|
259
|
+
return true
|
260
|
+
end
|
261
|
+
|
262
|
+
# check for a blatant header - should only do this
|
263
|
+
# for meta-data free tables, that may have embedded headers
|
264
|
+
av = a.get_cell_view
|
265
|
+
(0..a.width-1).each do |i|
|
266
|
+
((i+1)..a.width-1).each do |j|
|
267
|
+
if (av.equals(a.get_cell(i,0),a.get_cell(j,0)))
|
268
|
+
return false
|
269
|
+
end
|
270
|
+
end
|
271
|
+
if (!av.equals(a.get_cell(i,0),b.get_cell(i,0)))
|
272
|
+
return false
|
273
|
+
end
|
274
|
+
end
|
275
|
+
|
276
|
+
return true
|
277
|
+
end
|
278
|
+
|
279
|
+
def test_is_equal
|
280
|
+
p = @comp.p
|
281
|
+
a = @comp.a
|
282
|
+
b = @comp.b
|
283
|
+
eq = is_equal_2(a,b)
|
284
|
+
if (eq && p)
|
285
|
+
eq = is_equal_2(p,a)
|
286
|
+
end
|
287
|
+
@comp.is_equal = eq
|
288
|
+
@comp.is_equal_known = true
|
289
|
+
true
|
290
|
+
end
|
291
|
+
|
292
|
+
def is_equal_2(a, b)
|
293
|
+
if (a.width!=b.width || a.height!=b.height)
|
294
|
+
return false
|
295
|
+
end
|
296
|
+
av = a.get_cell_view
|
297
|
+
(0..a.height-1).each do |i|
|
298
|
+
(0..a.width-1).each do |j|
|
299
|
+
if (!av.equals(a.get_cell(j,i),b.get_cell(j,i)))
|
300
|
+
return false
|
301
|
+
end
|
302
|
+
end
|
303
|
+
end
|
304
|
+
return true
|
305
|
+
end
|
306
|
+
|
307
|
+
def compare_core
|
308
|
+
return false if (@comp.completed)
|
309
|
+
if (!@comp.is_equal_known)
|
310
|
+
return test_is_equal
|
311
|
+
end
|
312
|
+
if (!@comp.has_same_columns_known)
|
313
|
+
return test_has_same_columns
|
314
|
+
end
|
315
|
+
@comp.completed = true
|
316
|
+
false
|
317
|
+
end
|
318
|
+
|
319
|
+
def store_indexes
|
320
|
+
@indexes = []
|
321
|
+
end
|
322
|
+
|
323
|
+
def get_indexes
|
324
|
+
@indexes
|
325
|
+
end
|
326
|
+
end
|
327
|
+
end
|
data/lib/coopy/coopy.rb
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
module Coopy
|
2
|
+
|
3
|
+
def self.compare_tables(local, remote)
|
4
|
+
ct = Coopy::CompareTable.new
|
5
|
+
comp = Coopy::TableComparisonState.new
|
6
|
+
comp.a = local
|
7
|
+
comp.b = remote
|
8
|
+
ct.attach comp
|
9
|
+
ct
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.compare_tables_3(parent, local, remote)
|
13
|
+
ct = Coopy::CompareTable.new
|
14
|
+
comp = Coopy::TableComparisonState.new
|
15
|
+
comp.p = parent
|
16
|
+
comp.a = local
|
17
|
+
comp.b = remote
|
18
|
+
ct.attach comp
|
19
|
+
ct
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|