coopy 0.6.4.1 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (79) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +17 -0
  3. data/.rspec +2 -0
  4. data/CHANGELOG.md +7 -0
  5. data/Gemfile +7 -0
  6. data/LICENSE.md +22 -0
  7. data/README.md +59 -0
  8. data/Rakefile +4 -6
  9. data/coopy.gemspec +26 -0
  10. data/lib/coopy.rb +32 -175
  11. data/lib/coopy/alignment.rb +260 -0
  12. data/lib/coopy/bag.rb +17 -0
  13. data/lib/coopy/cell_info.rb +24 -0
  14. data/lib/coopy/change_type.rb +10 -0
  15. data/lib/coopy/compare_flags.rb +62 -0
  16. data/lib/coopy/compare_table.rb +327 -0
  17. data/lib/coopy/coopy.rb +22 -0
  18. data/lib/coopy/cross_match.rb +10 -0
  19. data/lib/coopy/csv_table.rb +51 -0
  20. data/lib/coopy/diff_render.rb +307 -0
  21. data/lib/coopy/index.rb +73 -0
  22. data/lib/coopy/index_item.rb +17 -0
  23. data/lib/coopy/index_pair.rb +72 -0
  24. data/lib/coopy/mover.rb +123 -0
  25. data/lib/coopy/ordering.rb +27 -0
  26. data/lib/coopy/row.rb +9 -0
  27. data/lib/coopy/simple_cell.rb +15 -0
  28. data/lib/coopy/simple_table.rb +144 -0
  29. data/lib/coopy/simple_view.rb +36 -0
  30. data/lib/coopy/table.rb +44 -0
  31. data/lib/coopy/table_comparison_state.rb +33 -0
  32. data/lib/coopy/table_diff.rb +634 -0
  33. data/lib/coopy/table_text.rb +14 -0
  34. data/lib/coopy/table_view.rb +31 -0
  35. data/lib/coopy/unit.rb +53 -0
  36. data/lib/coopy/version.rb +3 -0
  37. data/lib/coopy/view.rb +34 -0
  38. data/spec/fixtures/bridges.html +10 -0
  39. data/spec/fixtures/bridges_diff.csv +8 -0
  40. data/spec/fixtures/bridges_new.csv +9 -0
  41. data/spec/fixtures/bridges_old.csv +9 -0
  42. data/spec/fixtures/planetary_bodies.html +22 -0
  43. data/spec/fixtures/planetary_bodies_diff.csv +19 -0
  44. data/spec/fixtures/planetary_bodies_new.csv +20 -0
  45. data/spec/fixtures/planetary_bodies_old.csv +19 -0
  46. data/spec/fixtures/quote_me.csv +10 -0
  47. data/spec/fixtures/quote_me2.csv +11 -0
  48. data/spec/integration/table_diff_spec.rb +57 -0
  49. data/spec/libs/compare_flags_spec.rb +40 -0
  50. data/spec/libs/coopy_spec.rb +14 -0
  51. data/spec/libs/ordering_spec.rb +28 -0
  52. data/spec/libs/unit_spec.rb +31 -0
  53. data/spec/spec_helper.rb +29 -0
  54. metadata +153 -46
  55. data/bin/sqlite_diff +0 -4
  56. data/bin/sqlite_patch +0 -4
  57. data/bin/sqlite_rediff +0 -4
  58. data/lib/coopy/dbi_sql_wrapper.rb +0 -89
  59. data/lib/coopy/diff_apply_sql.rb +0 -35
  60. data/lib/coopy/diff_columns.rb +0 -33
  61. data/lib/coopy/diff_output.rb +0 -21
  62. data/lib/coopy/diff_output_action.rb +0 -34
  63. data/lib/coopy/diff_output_group.rb +0 -40
  64. data/lib/coopy/diff_output_raw.rb +0 -17
  65. data/lib/coopy/diff_output_stats.rb +0 -45
  66. data/lib/coopy/diff_output_table.rb +0 -49
  67. data/lib/coopy/diff_output_tdiff.rb +0 -48
  68. data/lib/coopy/diff_parser.rb +0 -92
  69. data/lib/coopy/diff_render_csv.rb +0 -29
  70. data/lib/coopy/diff_render_html.rb +0 -74
  71. data/lib/coopy/diff_render_log.rb +0 -52
  72. data/lib/coopy/row_change.rb +0 -25
  73. data/lib/coopy/scraperwiki_sql_wrapper.rb +0 -8
  74. data/lib/coopy/scraperwiki_utils.rb +0 -23
  75. data/lib/coopy/sequel_sql_wrapper.rb +0 -73
  76. data/lib/coopy/sql_compare.rb +0 -222
  77. data/lib/coopy/sql_wrapper.rb +0 -34
  78. data/lib/coopy/sqlite_sql_wrapper.rb +0 -143
  79. data/test/test_coopy.rb +0 -126
@@ -0,0 +1,17 @@
1
+ module Coopy
2
+
3
+ module Bag
4
+
5
+ attr_reader :size # integer
6
+
7
+ def get_item(x)
8
+ raise NotImplementedError
9
+ end
10
+
11
+ def get_item_view
12
+ raise NotImplementedError
13
+ end
14
+
15
+ end
16
+ end
17
+
@@ -0,0 +1,24 @@
1
+ module Coopy
2
+ class CellInfo
3
+
4
+ attr_accessor :value
5
+ attr_accessor :pretty_value
6
+ attr_accessor :category
7
+ attr_accessor :category_given_tr
8
+
9
+ # relevant to updates, conflicts
10
+ attr_accessor :separator
11
+ attr_accessor :updated
12
+ attr_accessor :conflicted
13
+ attr_accessor :pvalue
14
+ attr_accessor :lvalue
15
+ attr_accessor :rvalue
16
+
17
+ def to_s
18
+ return value if (!updated)
19
+ return lvalue + "::" + rvalue if (!conflicted)
20
+ return pvalue + "||" + lvalue + "::" + rvalue;
21
+ end
22
+
23
+ end
24
+ end
@@ -0,0 +1,10 @@
1
+ module Coopy
2
+ module ChangeType
3
+ NO_CHANGE = :no_change
4
+ REMOTE_CHANGE = :remote_change
5
+ LOCAL_CHANGE = :local_change
6
+ BOTH_CHANGE = :both_change
7
+ SAME_CHANGE = :same_change
8
+ NOTE_CHANGE = :note_change
9
+ end
10
+ end
@@ -0,0 +1,62 @@
1
+ module Coopy
2
+ class CompareFlags
3
+
4
+ # Should we treat the data as ordered?
5
+ attr_accessor :ordered # boolean
6
+
7
+ # Should we show unchanged rows in diffs?
8
+ attr_accessor :show_unchanged # boolean
9
+
10
+ # What is the minimum number of rows around a changed row we should show?
11
+ attr_accessor :unchanged_context # integer
12
+
13
+ # Should we always decorate the diff with numerical indexes showing order?
14
+ attr_accessor :always_show_order # boolean
15
+
16
+ # Should we never decorate the diff with numerical indexes?
17
+ attr_accessor :never_show_order # boolean
18
+
19
+ # Should we show unchanged columns in diffs?
20
+ # (note that index/key columns needed to identify rows will be shown
21
+ # even if we turn this flag off)
22
+ attr_accessor :show_unchanged_columns # boolean
23
+
24
+ # What is the minimum number of columns around a changed
25
+ # column that we should show?
26
+ attr_accessor :unchanged_column_context # integer
27
+
28
+ # Should we always give a table header in diffs?
29
+ attr_accessor :always_show_header # boolean
30
+
31
+ # Optional filters for actions, set any of:
32
+ # "update", "insert", "delete"
33
+ # to true to accept just those actions.
34
+ attr_accessor :acts # Hash<String, Bool>
35
+
36
+ def initialize()
37
+ @ordered = true;
38
+ @show_unchanged = false;
39
+ @unchanged_context = 1;
40
+ @always_show_order = false;
41
+ @never_show_order = true;
42
+ @show_unchanged_columns = false;
43
+ @unchanged_column_context = 1;
44
+ @always_show_header = true;
45
+ @acts = nil;
46
+ end
47
+
48
+ def allow_update
49
+ acts.nil? || acts.has_key?("update")
50
+ end
51
+
52
+ def allow_insert
53
+ acts.nil? || acts.has_key?("insert")
54
+ end
55
+
56
+ def allow_delete
57
+ acts.nil? || acts.has_key?("delete")
58
+ end
59
+
60
+ end
61
+ end
62
+
@@ -0,0 +1,327 @@
1
+ module Coopy
2
+ class CompareTable
3
+
4
+ def attach(comp)
5
+ @comp = comp # TableComparisonState
6
+ more = compare_core
7
+ while (more && @comp.run_to_completion) do
8
+ more = compare_core
9
+ end
10
+ !more
11
+ end
12
+
13
+ def align
14
+ alignment = Coopy::Alignment.new
15
+ align_core(alignment)
16
+ alignment
17
+ end
18
+
19
+ def get_comparison_state
20
+ @comp
21
+ end
22
+
23
+ def align_core(align)
24
+ if (@comp.p.nil?)
25
+ align_core_2(align,@comp.a,@comp.b)
26
+ return
27
+ end
28
+ align.reference = Coopy::Alignment.new
29
+ align_core_2(align,@comp.p,@comp.b)
30
+ align_core_2(align.reference,@comp.p,@comp.a)
31
+ align.meta.reference = align.reference.meta
32
+ end
33
+
34
+
35
+ def align_core_2(align, a, b)
36
+ if (align.meta.nil?)
37
+ align.meta = Coopy::Alignment.new
38
+ end
39
+ align_columns(align.meta,a,b)
40
+ column_order = align.meta.to_order
41
+ common_units = []
42
+ column_order.get_list.each do |unit|
43
+ if (unit.l>=0 && unit.r>=0 && unit.p!=-1)
44
+ common_units << unit
45
+ end
46
+ end
47
+
48
+ align.range(a.height,b.height)
49
+ align.tables(a,b)
50
+ align.set_rowlike(true)
51
+
52
+ w = a.width
53
+ ha = a.height
54
+ hb = b.height
55
+
56
+ av = a.get_cell_view
57
+
58
+ # If we have more columns than we have time to process their
59
+ # combinations, we need to haul out some heuristics.
60
+
61
+ n = 5
62
+ columns = []
63
+ if (common_units.length>n)
64
+ columns_eval = []
65
+ (0..common_units.length-1).each do |i|
66
+ ct = 0
67
+ mem = {}
68
+ mem2 = {}
69
+ ca = common_units[i].l
70
+ cb = common_units[i].r
71
+ (0..ha-1).each do |j|
72
+ key = av.to_s(a.get_cell(ca,j))
73
+ if (!mem.has_key?(key))
74
+ mem[key] = 1
75
+ ct+=1
76
+ end
77
+ end
78
+ (0..hb-1).each do |j|
79
+ key = av.to_s(b.get_cell(cb,j))
80
+ if (!mem2.has_key?(key))
81
+ mem2[key] = 1
82
+ ct+=1
83
+ end
84
+ end
85
+ columns_eval << [i,ct]
86
+ end
87
+ columns_eval.sort { |a,b| a[1] <=> b[1] }
88
+ columns = columns_eval.map{ |v| v[0] }
89
+ columns = columns.slice(0,n)
90
+ else
91
+ (0..common_units.length-1).each do |i|
92
+ columns << i
93
+ end
94
+ end
95
+
96
+ top = (2 ** columns.length).round
97
+
98
+ pending = {}
99
+ (0...ha).each do |j|
100
+ pending[j] = j
101
+ end
102
+ pending_ct = ha
103
+
104
+ (0...top).each do |k|
105
+ next if (k==0)
106
+ break if (pending_ct == 0)
107
+ active_columns = []
108
+ kk = k
109
+ at = 0
110
+ while (kk>0)
111
+ if (kk%2==1)
112
+ active_columns << columns[at]
113
+ end
114
+ kk >>= 1
115
+ at+=1
116
+ end
117
+
118
+ index = IndexPair.new
119
+ (0...active_columns.length).each do |k|
120
+ unit = common_units[active_columns[k]]
121
+ index.add_columns(unit.l,unit.r)
122
+ align.add_index_columns(unit)
123
+ end
124
+ index.index_tables(a,b)
125
+
126
+ h = a.height
127
+ h = b.height if (b.height>h)
128
+ h = 1 if (h<1)
129
+ wide_top_freq = index.get_top_freq
130
+ ratio = wide_top_freq
131
+ ratio /= (h+20) # "20" allows for low-data
132
+ next if (ratio>=0.1) # lousy no-good index, move on
133
+
134
+ if @indexes
135
+ @indexes << index
136
+ end
137
+
138
+ fixed = []
139
+ pending.keys.each do |j|
140
+ cross = index.query_local(j)
141
+ spot_a = cross.spot_a
142
+ spot_b = cross.spot_b
143
+ next if (spot_a!=1 || spot_b!=1)
144
+ fixed << j
145
+ align.link(j,cross.item_b.lst[0])
146
+ end
147
+ (0...fixed.length).each do |j|
148
+ pending.delete(fixed[j])
149
+ pending_ct-=1
150
+ end
151
+ end
152
+ # we expect headers on row 0 - link them even if quite different.
153
+ align.link(0,0)
154
+ end
155
+
156
+ def align_columns(align, a, b)
157
+ align.range(a.width,b.width)
158
+ align.tables(a,b)
159
+ align.set_rowlike(false)
160
+
161
+ slop = 5
162
+
163
+ va = a.get_cell_view
164
+ vb = b.get_cell_view
165
+ ra_best = 0
166
+ rb_best = 0
167
+ ct_best = -1
168
+ ma_best = nil
169
+ mb_best = nil
170
+ ra_header = 0
171
+ rb_header = 0
172
+ ra_uniques = 0
173
+ rb_uniques = 0
174
+ (0..slop-1).each do |ra|
175
+ break if (ra>=a.height)
176
+ (0..slop-1).each do |rb|
177
+ break if (rb>=b.height)
178
+ ma = {}
179
+ mb = {}
180
+ ct = 0
181
+ uniques = 0
182
+ (0..a.width-1).each do |ca|
183
+ key = va.to_s(a.get_cell(ca,ra))
184
+ if (ma.has_key?(key))
185
+ ma[key] = -1
186
+ uniques-=1
187
+ else
188
+ ma[key] = ca
189
+ uniques+=1
190
+ end
191
+ end
192
+ if (uniques>ra_uniques)
193
+ ra_header = ra
194
+ ra_uniques = uniques
195
+ end
196
+ uniques = 0
197
+ (0..b.width-1).each do |cb|
198
+ key = vb.to_s(b.get_cell(cb,rb))
199
+ if (mb.has_key?(key))
200
+ mb[key] = -1
201
+ uniques-=1
202
+ else
203
+ mb[key] = cb
204
+ uniques+=1
205
+ end
206
+ end
207
+ if (uniques>rb_uniques)
208
+ rb_header = rb
209
+ rb_uniques = uniques
210
+ end
211
+
212
+ ma.keys.each do |key|
213
+ i0 = ma[key]
214
+ i1 = mb[key]
215
+ if (i1 && i1>=0 && i0>=0)
216
+ ct+=1
217
+ end
218
+ end
219
+
220
+ if (ct>ct_best)
221
+ ct_best = ct
222
+ ma_best = ma
223
+ mb_best = mb
224
+ ra_best = ra
225
+ rb_best = rb
226
+ end
227
+ end
228
+ end
229
+
230
+ return if (ma_best.nil?)
231
+ ma_best.keys.each do |key|
232
+ i0 = ma_best[key]
233
+ i1 = mb_best[key]
234
+ if (i1>=0 && i0>=0)
235
+ align.link(i0,i1)
236
+ end
237
+ end
238
+ align.headers(ra_header,rb_header)
239
+ end
240
+
241
+ def test_has_same_columns
242
+ p = @comp.p
243
+ a = @comp.a
244
+ b = @comp.b
245
+ eq = has_same_columns_2(a,b)
246
+ if (eq && p)
247
+ eq = has_same_columns_2(p,a)
248
+ end
249
+ @comp.has_same_columns = eq
250
+ @comp.has_same_columns_known = true
251
+ return true
252
+ end
253
+
254
+ def has_same_columns_2(a, b)
255
+ if (a.width!=b.width)
256
+ return false
257
+ end
258
+ if (a.height==0 || b.height==0)
259
+ return true
260
+ end
261
+
262
+ # check for a blatant header - should only do this
263
+ # for meta-data free tables, that may have embedded headers
264
+ av = a.get_cell_view
265
+ (0..a.width-1).each do |i|
266
+ ((i+1)..a.width-1).each do |j|
267
+ if (av.equals(a.get_cell(i,0),a.get_cell(j,0)))
268
+ return false
269
+ end
270
+ end
271
+ if (!av.equals(a.get_cell(i,0),b.get_cell(i,0)))
272
+ return false
273
+ end
274
+ end
275
+
276
+ return true
277
+ end
278
+
279
+ def test_is_equal
280
+ p = @comp.p
281
+ a = @comp.a
282
+ b = @comp.b
283
+ eq = is_equal_2(a,b)
284
+ if (eq && p)
285
+ eq = is_equal_2(p,a)
286
+ end
287
+ @comp.is_equal = eq
288
+ @comp.is_equal_known = true
289
+ true
290
+ end
291
+
292
+ def is_equal_2(a, b)
293
+ if (a.width!=b.width || a.height!=b.height)
294
+ return false
295
+ end
296
+ av = a.get_cell_view
297
+ (0..a.height-1).each do |i|
298
+ (0..a.width-1).each do |j|
299
+ if (!av.equals(a.get_cell(j,i),b.get_cell(j,i)))
300
+ return false
301
+ end
302
+ end
303
+ end
304
+ return true
305
+ end
306
+
307
+ def compare_core
308
+ return false if (@comp.completed)
309
+ if (!@comp.is_equal_known)
310
+ return test_is_equal
311
+ end
312
+ if (!@comp.has_same_columns_known)
313
+ return test_has_same_columns
314
+ end
315
+ @comp.completed = true
316
+ false
317
+ end
318
+
319
+ def store_indexes
320
+ @indexes = []
321
+ end
322
+
323
+ def get_indexes
324
+ @indexes
325
+ end
326
+ end
327
+ end
@@ -0,0 +1,22 @@
1
+ module Coopy
2
+
3
+ def self.compare_tables(local, remote)
4
+ ct = Coopy::CompareTable.new
5
+ comp = Coopy::TableComparisonState.new
6
+ comp.a = local
7
+ comp.b = remote
8
+ ct.attach comp
9
+ ct
10
+ end
11
+
12
+ def self.compare_tables_3(parent, local, remote)
13
+ ct = Coopy::CompareTable.new
14
+ comp = Coopy::TableComparisonState.new
15
+ comp.p = parent
16
+ comp.a = local
17
+ comp.b = remote
18
+ ct.attach comp
19
+ ct
20
+ end
21
+
22
+ end