coopy 0.6.4.1 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +17 -0
  3. data/.rspec +2 -0
  4. data/CHANGELOG.md +7 -0
  5. data/Gemfile +7 -0
  6. data/LICENSE.md +22 -0
  7. data/README.md +59 -0
  8. data/Rakefile +4 -6
  9. data/coopy.gemspec +26 -0
  10. data/lib/coopy.rb +32 -175
  11. data/lib/coopy/alignment.rb +260 -0
  12. data/lib/coopy/bag.rb +17 -0
  13. data/lib/coopy/cell_info.rb +24 -0
  14. data/lib/coopy/change_type.rb +10 -0
  15. data/lib/coopy/compare_flags.rb +62 -0
  16. data/lib/coopy/compare_table.rb +327 -0
  17. data/lib/coopy/coopy.rb +22 -0
  18. data/lib/coopy/cross_match.rb +10 -0
  19. data/lib/coopy/csv_table.rb +51 -0
  20. data/lib/coopy/diff_render.rb +307 -0
  21. data/lib/coopy/index.rb +73 -0
  22. data/lib/coopy/index_item.rb +17 -0
  23. data/lib/coopy/index_pair.rb +72 -0
  24. data/lib/coopy/mover.rb +123 -0
  25. data/lib/coopy/ordering.rb +27 -0
  26. data/lib/coopy/row.rb +9 -0
  27. data/lib/coopy/simple_cell.rb +15 -0
  28. data/lib/coopy/simple_table.rb +144 -0
  29. data/lib/coopy/simple_view.rb +36 -0
  30. data/lib/coopy/table.rb +44 -0
  31. data/lib/coopy/table_comparison_state.rb +33 -0
  32. data/lib/coopy/table_diff.rb +634 -0
  33. data/lib/coopy/table_text.rb +14 -0
  34. data/lib/coopy/table_view.rb +31 -0
  35. data/lib/coopy/unit.rb +53 -0
  36. data/lib/coopy/version.rb +3 -0
  37. data/lib/coopy/view.rb +34 -0
  38. data/spec/fixtures/bridges.html +10 -0
  39. data/spec/fixtures/bridges_diff.csv +8 -0
  40. data/spec/fixtures/bridges_new.csv +9 -0
  41. data/spec/fixtures/bridges_old.csv +9 -0
  42. data/spec/fixtures/planetary_bodies.html +22 -0
  43. data/spec/fixtures/planetary_bodies_diff.csv +19 -0
  44. data/spec/fixtures/planetary_bodies_new.csv +20 -0
  45. data/spec/fixtures/planetary_bodies_old.csv +19 -0
  46. data/spec/fixtures/quote_me.csv +10 -0
  47. data/spec/fixtures/quote_me2.csv +11 -0
  48. data/spec/integration/table_diff_spec.rb +57 -0
  49. data/spec/libs/compare_flags_spec.rb +40 -0
  50. data/spec/libs/coopy_spec.rb +14 -0
  51. data/spec/libs/ordering_spec.rb +28 -0
  52. data/spec/libs/unit_spec.rb +31 -0
  53. data/spec/spec_helper.rb +29 -0
  54. metadata +153 -46
  55. data/bin/sqlite_diff +0 -4
  56. data/bin/sqlite_patch +0 -4
  57. data/bin/sqlite_rediff +0 -4
  58. data/lib/coopy/dbi_sql_wrapper.rb +0 -89
  59. data/lib/coopy/diff_apply_sql.rb +0 -35
  60. data/lib/coopy/diff_columns.rb +0 -33
  61. data/lib/coopy/diff_output.rb +0 -21
  62. data/lib/coopy/diff_output_action.rb +0 -34
  63. data/lib/coopy/diff_output_group.rb +0 -40
  64. data/lib/coopy/diff_output_raw.rb +0 -17
  65. data/lib/coopy/diff_output_stats.rb +0 -45
  66. data/lib/coopy/diff_output_table.rb +0 -49
  67. data/lib/coopy/diff_output_tdiff.rb +0 -48
  68. data/lib/coopy/diff_parser.rb +0 -92
  69. data/lib/coopy/diff_render_csv.rb +0 -29
  70. data/lib/coopy/diff_render_html.rb +0 -74
  71. data/lib/coopy/diff_render_log.rb +0 -52
  72. data/lib/coopy/row_change.rb +0 -25
  73. data/lib/coopy/scraperwiki_sql_wrapper.rb +0 -8
  74. data/lib/coopy/scraperwiki_utils.rb +0 -23
  75. data/lib/coopy/sequel_sql_wrapper.rb +0 -73
  76. data/lib/coopy/sql_compare.rb +0 -222
  77. data/lib/coopy/sql_wrapper.rb +0 -34
  78. data/lib/coopy/sqlite_sql_wrapper.rb +0 -143
  79. data/test/test_coopy.rb +0 -126
@@ -0,0 +1,17 @@
1
+ module Coopy
2
+
3
+ module Bag
4
+
5
+ attr_reader :size # integer
6
+
7
+ def get_item(x)
8
+ raise NotImplementedError
9
+ end
10
+
11
+ def get_item_view
12
+ raise NotImplementedError
13
+ end
14
+
15
+ end
16
+ end
17
+
@@ -0,0 +1,24 @@
1
+ module Coopy
2
+ class CellInfo
3
+
4
+ attr_accessor :value
5
+ attr_accessor :pretty_value
6
+ attr_accessor :category
7
+ attr_accessor :category_given_tr
8
+
9
+ # relevant to updates, conflicts
10
+ attr_accessor :separator
11
+ attr_accessor :updated
12
+ attr_accessor :conflicted
13
+ attr_accessor :pvalue
14
+ attr_accessor :lvalue
15
+ attr_accessor :rvalue
16
+
17
+ def to_s
18
+ return value if (!updated)
19
+ return lvalue + "::" + rvalue if (!conflicted)
20
+ return pvalue + "||" + lvalue + "::" + rvalue;
21
+ end
22
+
23
+ end
24
+ end
@@ -0,0 +1,10 @@
1
+ module Coopy
2
+ module ChangeType
3
+ NO_CHANGE = :no_change
4
+ REMOTE_CHANGE = :remote_change
5
+ LOCAL_CHANGE = :local_change
6
+ BOTH_CHANGE = :both_change
7
+ SAME_CHANGE = :same_change
8
+ NOTE_CHANGE = :note_change
9
+ end
10
+ end
@@ -0,0 +1,62 @@
1
+ module Coopy
2
+ class CompareFlags
3
+
4
+ # Should we treat the data as ordered?
5
+ attr_accessor :ordered # boolean
6
+
7
+ # Should we show unchanged rows in diffs?
8
+ attr_accessor :show_unchanged # boolean
9
+
10
+ # What is the minimum number of rows around a changed row we should show?
11
+ attr_accessor :unchanged_context # integer
12
+
13
+ # Should we always decorate the diff with numerical indexes showing order?
14
+ attr_accessor :always_show_order # boolean
15
+
16
+ # Should we never decorate the diff with numerical indexes?
17
+ attr_accessor :never_show_order # boolean
18
+
19
+ # Should we show unchanged columns in diffs?
20
+ # (note that index/key columns needed to identify rows will be shown
21
+ # even if we turn this flag off)
22
+ attr_accessor :show_unchanged_columns # boolean
23
+
24
+ # What is the minimum number of columns around a changed
25
+ # column that we should show?
26
+ attr_accessor :unchanged_column_context # integer
27
+
28
+ # Should we always give a table header in diffs?
29
+ attr_accessor :always_show_header # boolean
30
+
31
+ # Optional filters for actions, set any of:
32
+ # "update", "insert", "delete"
33
+ # to true to accept just those actions.
34
+ attr_accessor :acts # Hash<String, Bool>
35
+
36
+ def initialize()
37
+ @ordered = true;
38
+ @show_unchanged = false;
39
+ @unchanged_context = 1;
40
+ @always_show_order = false;
41
+ @never_show_order = true;
42
+ @show_unchanged_columns = false;
43
+ @unchanged_column_context = 1;
44
+ @always_show_header = true;
45
+ @acts = nil;
46
+ end
47
+
48
+ def allow_update
49
+ acts.nil? || acts.has_key?("update")
50
+ end
51
+
52
+ def allow_insert
53
+ acts.nil? || acts.has_key?("insert")
54
+ end
55
+
56
+ def allow_delete
57
+ acts.nil? || acts.has_key?("delete")
58
+ end
59
+
60
+ end
61
+ end
62
+
@@ -0,0 +1,327 @@
1
+ module Coopy
2
+ class CompareTable
3
+
4
+ def attach(comp)
5
+ @comp = comp # TableComparisonState
6
+ more = compare_core
7
+ while (more && @comp.run_to_completion) do
8
+ more = compare_core
9
+ end
10
+ !more
11
+ end
12
+
13
+ def align
14
+ alignment = Coopy::Alignment.new
15
+ align_core(alignment)
16
+ alignment
17
+ end
18
+
19
+ def get_comparison_state
20
+ @comp
21
+ end
22
+
23
+ def align_core(align)
24
+ if (@comp.p.nil?)
25
+ align_core_2(align,@comp.a,@comp.b)
26
+ return
27
+ end
28
+ align.reference = Coopy::Alignment.new
29
+ align_core_2(align,@comp.p,@comp.b)
30
+ align_core_2(align.reference,@comp.p,@comp.a)
31
+ align.meta.reference = align.reference.meta
32
+ end
33
+
34
+
35
+ def align_core_2(align, a, b)
36
+ if (align.meta.nil?)
37
+ align.meta = Coopy::Alignment.new
38
+ end
39
+ align_columns(align.meta,a,b)
40
+ column_order = align.meta.to_order
41
+ common_units = []
42
+ column_order.get_list.each do |unit|
43
+ if (unit.l>=0 && unit.r>=0 && unit.p!=-1)
44
+ common_units << unit
45
+ end
46
+ end
47
+
48
+ align.range(a.height,b.height)
49
+ align.tables(a,b)
50
+ align.set_rowlike(true)
51
+
52
+ w = a.width
53
+ ha = a.height
54
+ hb = b.height
55
+
56
+ av = a.get_cell_view
57
+
58
+ # If we have more columns than we have time to process their
59
+ # combinations, we need to haul out some heuristics.
60
+
61
+ n = 5
62
+ columns = []
63
+ if (common_units.length>n)
64
+ columns_eval = []
65
+ (0..common_units.length-1).each do |i|
66
+ ct = 0
67
+ mem = {}
68
+ mem2 = {}
69
+ ca = common_units[i].l
70
+ cb = common_units[i].r
71
+ (0..ha-1).each do |j|
72
+ key = av.to_s(a.get_cell(ca,j))
73
+ if (!mem.has_key?(key))
74
+ mem[key] = 1
75
+ ct+=1
76
+ end
77
+ end
78
+ (0..hb-1).each do |j|
79
+ key = av.to_s(b.get_cell(cb,j))
80
+ if (!mem2.has_key?(key))
81
+ mem2[key] = 1
82
+ ct+=1
83
+ end
84
+ end
85
+ columns_eval << [i,ct]
86
+ end
87
+ columns_eval.sort { |a,b| a[1] <=> b[1] }
88
+ columns = columns_eval.map{ |v| v[0] }
89
+ columns = columns.slice(0,n)
90
+ else
91
+ (0..common_units.length-1).each do |i|
92
+ columns << i
93
+ end
94
+ end
95
+
96
+ top = (2 ** columns.length).round
97
+
98
+ pending = {}
99
+ (0...ha).each do |j|
100
+ pending[j] = j
101
+ end
102
+ pending_ct = ha
103
+
104
+ (0...top).each do |k|
105
+ next if (k==0)
106
+ break if (pending_ct == 0)
107
+ active_columns = []
108
+ kk = k
109
+ at = 0
110
+ while (kk>0)
111
+ if (kk%2==1)
112
+ active_columns << columns[at]
113
+ end
114
+ kk >>= 1
115
+ at+=1
116
+ end
117
+
118
+ index = IndexPair.new
119
+ (0...active_columns.length).each do |k|
120
+ unit = common_units[active_columns[k]]
121
+ index.add_columns(unit.l,unit.r)
122
+ align.add_index_columns(unit)
123
+ end
124
+ index.index_tables(a,b)
125
+
126
+ h = a.height
127
+ h = b.height if (b.height>h)
128
+ h = 1 if (h<1)
129
+ wide_top_freq = index.get_top_freq
130
+ ratio = wide_top_freq
131
+ ratio /= (h+20) # "20" allows for low-data
132
+ next if (ratio>=0.1) # lousy no-good index, move on
133
+
134
+ if @indexes
135
+ @indexes << index
136
+ end
137
+
138
+ fixed = []
139
+ pending.keys.each do |j|
140
+ cross = index.query_local(j)
141
+ spot_a = cross.spot_a
142
+ spot_b = cross.spot_b
143
+ next if (spot_a!=1 || spot_b!=1)
144
+ fixed << j
145
+ align.link(j,cross.item_b.lst[0])
146
+ end
147
+ (0...fixed.length).each do |j|
148
+ pending.delete(fixed[j])
149
+ pending_ct-=1
150
+ end
151
+ end
152
+ # we expect headers on row 0 - link them even if quite different.
153
+ align.link(0,0)
154
+ end
155
+
156
+ def align_columns(align, a, b)
157
+ align.range(a.width,b.width)
158
+ align.tables(a,b)
159
+ align.set_rowlike(false)
160
+
161
+ slop = 5
162
+
163
+ va = a.get_cell_view
164
+ vb = b.get_cell_view
165
+ ra_best = 0
166
+ rb_best = 0
167
+ ct_best = -1
168
+ ma_best = nil
169
+ mb_best = nil
170
+ ra_header = 0
171
+ rb_header = 0
172
+ ra_uniques = 0
173
+ rb_uniques = 0
174
+ (0..slop-1).each do |ra|
175
+ break if (ra>=a.height)
176
+ (0..slop-1).each do |rb|
177
+ break if (rb>=b.height)
178
+ ma = {}
179
+ mb = {}
180
+ ct = 0
181
+ uniques = 0
182
+ (0..a.width-1).each do |ca|
183
+ key = va.to_s(a.get_cell(ca,ra))
184
+ if (ma.has_key?(key))
185
+ ma[key] = -1
186
+ uniques-=1
187
+ else
188
+ ma[key] = ca
189
+ uniques+=1
190
+ end
191
+ end
192
+ if (uniques>ra_uniques)
193
+ ra_header = ra
194
+ ra_uniques = uniques
195
+ end
196
+ uniques = 0
197
+ (0..b.width-1).each do |cb|
198
+ key = vb.to_s(b.get_cell(cb,rb))
199
+ if (mb.has_key?(key))
200
+ mb[key] = -1
201
+ uniques-=1
202
+ else
203
+ mb[key] = cb
204
+ uniques+=1
205
+ end
206
+ end
207
+ if (uniques>rb_uniques)
208
+ rb_header = rb
209
+ rb_uniques = uniques
210
+ end
211
+
212
+ ma.keys.each do |key|
213
+ i0 = ma[key]
214
+ i1 = mb[key]
215
+ if (i1 && i1>=0 && i0>=0)
216
+ ct+=1
217
+ end
218
+ end
219
+
220
+ if (ct>ct_best)
221
+ ct_best = ct
222
+ ma_best = ma
223
+ mb_best = mb
224
+ ra_best = ra
225
+ rb_best = rb
226
+ end
227
+ end
228
+ end
229
+
230
+ return if (ma_best.nil?)
231
+ ma_best.keys.each do |key|
232
+ i0 = ma_best[key]
233
+ i1 = mb_best[key]
234
+ if (i1>=0 && i0>=0)
235
+ align.link(i0,i1)
236
+ end
237
+ end
238
+ align.headers(ra_header,rb_header)
239
+ end
240
+
241
+ def test_has_same_columns
242
+ p = @comp.p
243
+ a = @comp.a
244
+ b = @comp.b
245
+ eq = has_same_columns_2(a,b)
246
+ if (eq && p)
247
+ eq = has_same_columns_2(p,a)
248
+ end
249
+ @comp.has_same_columns = eq
250
+ @comp.has_same_columns_known = true
251
+ return true
252
+ end
253
+
254
+ def has_same_columns_2(a, b)
255
+ if (a.width!=b.width)
256
+ return false
257
+ end
258
+ if (a.height==0 || b.height==0)
259
+ return true
260
+ end
261
+
262
+ # check for a blatant header - should only do this
263
+ # for meta-data free tables, that may have embedded headers
264
+ av = a.get_cell_view
265
+ (0..a.width-1).each do |i|
266
+ ((i+1)..a.width-1).each do |j|
267
+ if (av.equals(a.get_cell(i,0),a.get_cell(j,0)))
268
+ return false
269
+ end
270
+ end
271
+ if (!av.equals(a.get_cell(i,0),b.get_cell(i,0)))
272
+ return false
273
+ end
274
+ end
275
+
276
+ return true
277
+ end
278
+
279
+ def test_is_equal
280
+ p = @comp.p
281
+ a = @comp.a
282
+ b = @comp.b
283
+ eq = is_equal_2(a,b)
284
+ if (eq && p)
285
+ eq = is_equal_2(p,a)
286
+ end
287
+ @comp.is_equal = eq
288
+ @comp.is_equal_known = true
289
+ true
290
+ end
291
+
292
+ def is_equal_2(a, b)
293
+ if (a.width!=b.width || a.height!=b.height)
294
+ return false
295
+ end
296
+ av = a.get_cell_view
297
+ (0..a.height-1).each do |i|
298
+ (0..a.width-1).each do |j|
299
+ if (!av.equals(a.get_cell(j,i),b.get_cell(j,i)))
300
+ return false
301
+ end
302
+ end
303
+ end
304
+ return true
305
+ end
306
+
307
+ def compare_core
308
+ return false if (@comp.completed)
309
+ if (!@comp.is_equal_known)
310
+ return test_is_equal
311
+ end
312
+ if (!@comp.has_same_columns_known)
313
+ return test_has_same_columns
314
+ end
315
+ @comp.completed = true
316
+ false
317
+ end
318
+
319
+ def store_indexes
320
+ @indexes = []
321
+ end
322
+
323
+ def get_indexes
324
+ @indexes
325
+ end
326
+ end
327
+ end
@@ -0,0 +1,22 @@
1
+ module Coopy
2
+
3
+ def self.compare_tables(local, remote)
4
+ ct = Coopy::CompareTable.new
5
+ comp = Coopy::TableComparisonState.new
6
+ comp.a = local
7
+ comp.b = remote
8
+ ct.attach comp
9
+ ct
10
+ end
11
+
12
+ def self.compare_tables_3(parent, local, remote)
13
+ ct = Coopy::CompareTable.new
14
+ comp = Coopy::TableComparisonState.new
15
+ comp.p = parent
16
+ comp.a = local
17
+ comp.b = remote
18
+ ct.attach comp
19
+ ct
20
+ end
21
+
22
+ end