coopy 0.6.4.1 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (79) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +17 -0
  3. data/.rspec +2 -0
  4. data/CHANGELOG.md +7 -0
  5. data/Gemfile +7 -0
  6. data/LICENSE.md +22 -0
  7. data/README.md +59 -0
  8. data/Rakefile +4 -6
  9. data/coopy.gemspec +26 -0
  10. data/lib/coopy.rb +32 -175
  11. data/lib/coopy/alignment.rb +260 -0
  12. data/lib/coopy/bag.rb +17 -0
  13. data/lib/coopy/cell_info.rb +24 -0
  14. data/lib/coopy/change_type.rb +10 -0
  15. data/lib/coopy/compare_flags.rb +62 -0
  16. data/lib/coopy/compare_table.rb +327 -0
  17. data/lib/coopy/coopy.rb +22 -0
  18. data/lib/coopy/cross_match.rb +10 -0
  19. data/lib/coopy/csv_table.rb +51 -0
  20. data/lib/coopy/diff_render.rb +307 -0
  21. data/lib/coopy/index.rb +73 -0
  22. data/lib/coopy/index_item.rb +17 -0
  23. data/lib/coopy/index_pair.rb +72 -0
  24. data/lib/coopy/mover.rb +123 -0
  25. data/lib/coopy/ordering.rb +27 -0
  26. data/lib/coopy/row.rb +9 -0
  27. data/lib/coopy/simple_cell.rb +15 -0
  28. data/lib/coopy/simple_table.rb +144 -0
  29. data/lib/coopy/simple_view.rb +36 -0
  30. data/lib/coopy/table.rb +44 -0
  31. data/lib/coopy/table_comparison_state.rb +33 -0
  32. data/lib/coopy/table_diff.rb +634 -0
  33. data/lib/coopy/table_text.rb +14 -0
  34. data/lib/coopy/table_view.rb +31 -0
  35. data/lib/coopy/unit.rb +53 -0
  36. data/lib/coopy/version.rb +3 -0
  37. data/lib/coopy/view.rb +34 -0
  38. data/spec/fixtures/bridges.html +10 -0
  39. data/spec/fixtures/bridges_diff.csv +8 -0
  40. data/spec/fixtures/bridges_new.csv +9 -0
  41. data/spec/fixtures/bridges_old.csv +9 -0
  42. data/spec/fixtures/planetary_bodies.html +22 -0
  43. data/spec/fixtures/planetary_bodies_diff.csv +19 -0
  44. data/spec/fixtures/planetary_bodies_new.csv +20 -0
  45. data/spec/fixtures/planetary_bodies_old.csv +19 -0
  46. data/spec/fixtures/quote_me.csv +10 -0
  47. data/spec/fixtures/quote_me2.csv +11 -0
  48. data/spec/integration/table_diff_spec.rb +57 -0
  49. data/spec/libs/compare_flags_spec.rb +40 -0
  50. data/spec/libs/coopy_spec.rb +14 -0
  51. data/spec/libs/ordering_spec.rb +28 -0
  52. data/spec/libs/unit_spec.rb +31 -0
  53. data/spec/spec_helper.rb +29 -0
  54. metadata +153 -46
  55. data/bin/sqlite_diff +0 -4
  56. data/bin/sqlite_patch +0 -4
  57. data/bin/sqlite_rediff +0 -4
  58. data/lib/coopy/dbi_sql_wrapper.rb +0 -89
  59. data/lib/coopy/diff_apply_sql.rb +0 -35
  60. data/lib/coopy/diff_columns.rb +0 -33
  61. data/lib/coopy/diff_output.rb +0 -21
  62. data/lib/coopy/diff_output_action.rb +0 -34
  63. data/lib/coopy/diff_output_group.rb +0 -40
  64. data/lib/coopy/diff_output_raw.rb +0 -17
  65. data/lib/coopy/diff_output_stats.rb +0 -45
  66. data/lib/coopy/diff_output_table.rb +0 -49
  67. data/lib/coopy/diff_output_tdiff.rb +0 -48
  68. data/lib/coopy/diff_parser.rb +0 -92
  69. data/lib/coopy/diff_render_csv.rb +0 -29
  70. data/lib/coopy/diff_render_html.rb +0 -74
  71. data/lib/coopy/diff_render_log.rb +0 -52
  72. data/lib/coopy/row_change.rb +0 -25
  73. data/lib/coopy/scraperwiki_sql_wrapper.rb +0 -8
  74. data/lib/coopy/scraperwiki_utils.rb +0 -23
  75. data/lib/coopy/sequel_sql_wrapper.rb +0 -73
  76. data/lib/coopy/sql_compare.rb +0 -222
  77. data/lib/coopy/sql_wrapper.rb +0 -34
  78. data/lib/coopy/sqlite_sql_wrapper.rb +0 -143
  79. data/test/test_coopy.rb +0 -126
@@ -1,25 +0,0 @@
1
- class RowChange
2
- attr_accessor :row_mode
3
- attr_accessor :cells
4
- attr_accessor :columns
5
- attr_accessor :key
6
-
7
- def initialize(row_mode,cells)
8
- @row_mode = row_mode
9
- @cells = cells
10
- @key = nil
11
- end
12
-
13
- def active_columns
14
- return [] if @columns.nil?
15
- @columns.column_by_offset
16
- end
17
-
18
- def value_at(column)
19
- @cells[column[:in_offset]][:value]
20
- end
21
-
22
- def new_value_at(column)
23
- @cells[column[:in_offset]][:new_value]
24
- end
25
- end
@@ -1,8 +0,0 @@
1
- require 'sqlite_sql_wrapper'
2
-
3
- # Tweak sqlite wrapper slightly to match ScraperWiki's API
4
- class ScraperwikiSqlWrapper < SqliteSqlWrapper
5
- def sqlite_execute(template,vals)
6
- @db.sqliteexecute(template,vals)["data"]
7
- end
8
- end
@@ -1,23 +0,0 @@
1
- def link_tables(watch_scraper,watch_tables)
2
- sql = ScraperwikiSqlWrapper.new(ScraperWiki)
3
- watch_tables.each { |tbl| sql.copy_table_structure(watch_scraper,tbl) }
4
- end
5
-
6
- class CoopyResult
7
- attr_accessor :html
8
- end
9
-
10
- def sync_table(watch_scraper,tbl,keys)
11
- sql = ScraperwikiSqlWrapper.new(ScraperWiki)
12
- sql.set_primary_key(keys) unless keys.nil?
13
- cmp = SqlCompare.new(sql,"main.#{tbl}","#{watch_scraper}.#{tbl}")
14
- sinks = DiffOutputGroup.new
15
- render = DiffRenderHtml.new
16
- sinks << render
17
- sinks << DiffApplySql.new(sql,"main.#{tbl}")
18
- cmp.set_output(sinks)
19
- cmp.apply
20
- result = CoopyResult.new
21
- result.html = render.to_string
22
- result
23
- end
@@ -1,73 +0,0 @@
1
- require 'sql_wrapper'
2
- require 'sequel'
3
-
4
- class SequelSqlBare < SqlWrapper
5
- def initialize(db)
6
- @db = db
7
- @tname = nil
8
- @t = nil
9
- end
10
-
11
- def sync_table(tbl)
12
- tbl = @tname if tbl.nil?
13
- tbl = @db.tables[0] if tbl.nil?
14
- return @t if tbl==@tname
15
- @tname = tbl
16
- @t = @db[tbl]
17
- end
18
-
19
- def enhash(cols,vals)
20
- Hash[*cols.map{|c| c.to_sym}.zip(vals).flatten]
21
- end
22
-
23
- def insert(tbl,cols,vals)
24
- sync_table(tbl)
25
- @t.insert(enhash(cols,vals))
26
- end
27
-
28
- def delete(tbl,cols,vals)
29
- sync_table(tbl)
30
- @t.filter(enhash(cols,vals)).delete
31
- end
32
-
33
- def update(tbl,set_cols,set_vals,cond_cols,cond_vals)
34
- sync_table(tbl)
35
- @t.filter(enhash(cond_cols,cond_vals)).update(enhash(set_cols,set_vals))
36
- end
37
-
38
- def transaction(&block)
39
- @db.transaction(&block)
40
- end
41
-
42
- def columns(tbl)
43
- sync_table(tbl)
44
- @db.schema(@tname)
45
- end
46
-
47
- def column_names(tbl)
48
- columns(tbl).map{|x| x[0]}
49
- end
50
-
51
- def primary_key(tbl)
52
- cols = columns(tbl)
53
- cols.select{|x| x[1][:primary_key]}.map{|x| x[0]}
54
- end
55
-
56
- def index(tbl)
57
- key = primary_key(tbl)
58
- @t.select(*key)
59
- end
60
-
61
- def fetch(sql,names)
62
- @db.fetch(sql) do |row|
63
- yield names.map{|n| row[n]}
64
- end
65
- end
66
- end
67
-
68
-
69
- class SequelSqlWrapper < SequelSqlBare
70
- def initialize(*params)
71
- super(Sequel.connect(*params))
72
- end
73
- end
@@ -1,222 +0,0 @@
1
- require 'coopy/diff_columns'
2
- require 'coopy/row_change'
3
-
4
- class SqlCompare
5
- def initialize(db1,db2)
6
- @db1 = db
7
- @db2 = db2
8
- @table1 = nil
9
- @table2 = nil
10
- @single_db = false
11
- raise "not implemented yet"
12
- end
13
-
14
- def initialize(db,table1,table2)
15
- @db1 = db
16
- @db2 = db.clone
17
- @table1 = table1
18
- @table2 = table2
19
- @single_db = true
20
- end
21
-
22
- def set_output(patch)
23
- @patch = patch
24
- end
25
-
26
- def apply
27
- apply_single
28
- end
29
-
30
- # We are not implementing full comparison, just an adequate subset
31
- # for easy cases (a table with a trustworthy primary key, and constant
32
- # columns). Make sure we are not trying to do something we're not ready
33
- # for.
34
- def validate_schema
35
- all_cols1 = @db1.column_names(@table1)
36
- all_cols2 = @db2.column_names(@table2)
37
- if all_cols1 != all_cols2
38
- raise "Columns do not match, please use full coopy toolbox"
39
- end
40
-
41
- key_cols1 = @db1.primary_key(@table1)
42
- key_cols2 = @db2.primary_key(@table2)
43
- if key_cols1 != key_cols2
44
- raise "Primary keys do not match, please use full coopy toolbox"
45
- end
46
- end
47
-
48
- def keyify(lst)
49
- lst.map{|x| x.to_s}.join("___")
50
- end
51
-
52
- # When working within a single database, we can delegate more work to SQL.
53
- # So we specialize this case.
54
- def apply_single
55
- validate_schema
56
-
57
- # Prepare some lists of columns.
58
- key_cols = @db1.primary_key(@table1)
59
- data_cols = @db1.except_primary_key(@table1)
60
- all_cols = @db1.column_names(@table1)
61
-
62
- # Let our public know we are beginning.
63
- @patch.begin_diff
64
-
65
- # Advertise column names.
66
- @rc_columns = DiffColumns.new
67
- @rc_columns.title_row = all_cols
68
- @rc_columns.update(0)
69
- cells = all_cols.map{|v| { :txt => v, :value => v, :cell_mode => "" }}
70
- rc = RowChange.new("@@",cells)
71
- @patch.apply_row(rc)
72
-
73
- # If requested, we will be providing context rows around changed rows.
74
- # This is not a natural thing to do with SQL, so we do it only on request.
75
- # When requested, we need to buffer row changes.
76
- @pending_rcs = []
77
-
78
- # Prepare some useful SQL fragments to assemble later.
79
- sql_table1 = @db1.quote_table(@table1)
80
- sql_table2 = @db1.quote_table(@table2)
81
- sql_key_cols = key_cols.map{|c| @db1.quote_column(c)}.join(",")
82
- sql_all_cols = all_cols.map{|c| @db1.quote_column(c)}.join(",")
83
- sql_key_match = key_cols.map{|c| @db1.quote_column(c)}.map{|c| "#{sql_table1}.#{c} IS #{sql_table2}.#{c}"}.join(" AND ")
84
- sql_data_mismatch = data_cols.map{|c| @db1.quote_column(c)}.map{|c| "#{sql_table1}.#{c} IS NOT #{sql_table2}.#{c}"}.join(" OR ")
85
-
86
- # For one query we will need to interleave columns from two tables. For
87
- # portability we need to give these columns distinct names.
88
- weave = all_cols.map{|c| [[sql_table1,@db1.quote_column(c)],
89
- [sql_table2,@db2.quote_column(c)]]}.flatten(1)
90
- dbl_cols = weave.map{|c| "#{c[0]}.#{c[1]}"}
91
- sql_dbl_cols = weave.map{|c| "#{c[0]}.#{c[1]} AS #{c[0].gsub(/[^a-zA-Z0-9]/,'_')}_#{c[1].gsub(/[^a-zA-Z0-9]/,'_')}"}.join(",")
92
-
93
- # Prepare a map of primary key offsets.
94
- keys_in_all_cols = key_cols.each.map{|c| all_cols.index(c)}
95
- keys_in_dbl_cols = keys_in_all_cols.map{|x| 2*x}
96
-
97
- # Find rows in table2 that are not in table1.
98
- sql = "SELECT #{sql_all_cols} FROM #{sql_table2} WHERE NOT EXISTS (SELECT 1 FROM #{sql_table1} WHERE #{sql_key_match})"
99
- apply_inserts(sql,all_cols,keys_in_all_cols)
100
-
101
- # Find rows in table1 and table2 that differ while having the same primary
102
- # key.
103
- sql = "SELECT #{sql_dbl_cols} FROM #{sql_table1} INNER JOIN #{sql_table2} ON #{sql_key_match} WHERE #{sql_data_mismatch}"
104
- apply_updates(sql,dbl_cols,keys_in_dbl_cols)
105
-
106
- # Find rows that are in table1 but not table2
107
- sql = "SELECT #{sql_all_cols} FROM #{sql_table1} WHERE NOT EXISTS (SELECT 1 FROM #{sql_table2} WHERE #{sql_key_match})"
108
- apply_deletes(sql,all_cols,keys_in_all_cols)
109
-
110
- # If we are supposed to provide context, we need to deal with row order.
111
- if @patch.want_context
112
- sql = "SELECT #{sql_all_cols}, 0 AS __coopy_tag__ FROM #{sql_table1} UNION SELECT #{sql_all_cols}, 1 AS __coopy_tag__ FROM #{sql_table2} ORDER BY #{sql_key_cols}, __coopy_tag__"
113
- apply_with_context(sql,all_cols,keys_in_all_cols)
114
- end
115
-
116
- # Done!
117
- @patch.end_diff
118
- end
119
-
120
-
121
- def apply_inserts(sql,all_cols,keys_in_all_cols)
122
- @db1.fetch(sql,all_cols) do |row|
123
- cells = row.map{|v| { :txt => v, :value => v, :cell_mode => "" }}
124
- rc = RowChange.new("+++",cells)
125
- apply_rc(rc,row,keys_in_all_cols)
126
- end
127
- end
128
-
129
-
130
- def apply_updates(sql,dbl_cols,keys_in_dbl_cols)
131
- @db1.fetch(sql,dbl_cols) do |row|
132
- pairs = row.enum_for(:each_slice,2).to_a
133
- cells = pairs.map do |v|
134
- if v[0]==v[1]
135
- { :txt => v[0], :value => v[0], :cell_mode => "" }
136
- else
137
- { :txt => v[0], :value => v[0], :new_value => v[1], :cell_mode => "->" }
138
- end
139
- end
140
- rc = RowChange.new("->",cells)
141
- apply_rc(rc,row,keys_in_dbl_cols)
142
- end
143
- end
144
-
145
-
146
- def apply_deletes(sql,all_cols,keys_in_all_cols)
147
- @db1.fetch(sql,all_cols) do |row|
148
- cells = row.map{|v| { :txt => v, :value => v, :cell_mode => "" }}
149
- rc = RowChange.new("---",cells)
150
- apply_rc(rc,row,keys_in_all_cols)
151
- end
152
- end
153
-
154
- def apply_rc(rc,row,keys_in_cols)
155
- rc.columns = @rc_columns
156
- if @patch.want_context
157
- rc.key = keyify(row.values_at(*keys_in_cols))
158
- @pending_rcs << rc
159
- else
160
- @patch.apply_row(rc)
161
- end
162
- end
163
-
164
- def emit_skip(row)
165
- cells = row.map{|v| { :txt => "...", :value => "...", :cell_mode => "" }}
166
- rc = RowChange.new("...",cells)
167
- rc.columns = @rc_columns
168
- @patch.apply_row(rc)
169
- end
170
-
171
- # Do the context dance.
172
- def apply_with_context(sql,all_cols,keys_in_all_cols)
173
- hits = {}
174
- @pending_rcs.each do |rc|
175
- hits[rc.key] = rc
176
- end
177
- hist = []
178
- n = 2
179
- pending = 0
180
- skipped = false
181
- noted = false
182
- last_row = nil
183
- @db1.fetch(sql,all_cols + ["__coopy_tag__"]) do |row|
184
- tag = row.pop.to_i
185
- k = keyify(row.values_at(*keys_in_all_cols))
186
- if hits[k]
187
- emit_skip(row) if skipped
188
- hist.each do |row0|
189
- cells = row0.map{|v| { :txt => v, :value => v, :cell_mode => "" }}
190
- rc = RowChange.new("",cells)
191
- rc.columns = @rc_columns
192
- @patch.apply_row(rc)
193
- end
194
- hist.clear
195
- pending = n
196
- @patch.apply_row(hits[k])
197
- hits.delete(k)
198
- skipped = false
199
- noted = true
200
- elsif tag == 1
201
- # ignore redundant row
202
- elsif pending>0
203
- emit_skip(row) if skipped
204
- cells = row.map{|v| { :txt => v, :value => v, :cell_mode => "" }}
205
- rc = RowChange.new("",cells)
206
- rc.columns = @rc_columns
207
- @patch.apply_row(rc)
208
- pending = pending-1
209
- skipped = false
210
- else
211
- hist << row
212
- if hist.length>n
213
- skipped = true
214
- last_row = row
215
- hist.shift
216
- end
217
- end
218
- end
219
- emit_skip(last_row) if skipped and noted
220
- end
221
- end
222
-
@@ -1,34 +0,0 @@
1
- class SqlWrapper
2
- def insert(tbl,cols,vals)
3
- end
4
-
5
- def delete(tbl,cols,vals)
6
- end
7
-
8
- def update(tbl,set_cols,set_vals,cond_cols,cond_vals)
9
- end
10
-
11
- def column_names(tbl)
12
- []
13
- end
14
-
15
- def primary_key(tbl)
16
- []
17
- end
18
-
19
- def except_primary_key(tbl)
20
- column_names(tbl)-primary_key(tbl)
21
- end
22
-
23
- def fetch(sql)
24
- []
25
- end
26
-
27
- def quote_column(c)
28
- c.to_s
29
- end
30
-
31
- def quote_table(t)
32
- t.to_s
33
- end
34
- end
@@ -1,143 +0,0 @@
1
- require 'coopy/sql_wrapper'
2
-
3
- class SqliteSqlWrapper < SqlWrapper
4
- def initialize(db)
5
- @db = db
6
- @t = nil
7
- @qt = nil
8
- @pk = nil
9
- @info = {}
10
- end
11
-
12
- def set_primary_key(lst)
13
- @pk = lst
14
- end
15
-
16
- def sqlite_execute(template,vals)
17
- return @db.execute(template,*vals)
18
- end
19
-
20
- def get_table_names
21
- sqlite_execute("SELECT name FROM sqlite_master WHERE type='table' ORDER BY name",[]).flatten
22
- end
23
-
24
- def complete_table(tbl)
25
- @t = tbl unless tbl.nil?
26
- @t = get_table_names[0] if @t.nil?
27
- @t
28
- end
29
-
30
- def quote_with_dots(x)
31
- return x if x.match(/^[a-zA-Z0-9_]+$/)
32
- x.split('.').map{|p| "`#{p}`"}.join('.')
33
- end
34
-
35
- def quote_table(tbl)
36
- complete_table(tbl)
37
- return @t if @t.match(/^[a-zA-Z0-9_]+$/)
38
- quote_with_dots(@t)
39
- end
40
-
41
- def quote_column(col)
42
- return col if col.match(/^[a-zA-Z0-9_]+$/)
43
- quote_with_dots(col)
44
- end
45
-
46
- def insert(tbl,cols,vals)
47
- tbl = quote_table(tbl)
48
- template = cols.map{|x| '?'}.join(",")
49
- template = "INSERT INTO #{tbl} VALUES(#{template})"
50
- sqlite_execute(template,vals)
51
- end
52
-
53
- def delete(tbl,cols,vals)
54
- tbl = quote_table(tbl)
55
- template = cols.map{|c| quote_column(c) + ' IS ?'}.join(" AND ")
56
- template = "DELETE FROM #{tbl} WHERE #{template}"
57
- sqlite_execute(template,vals)
58
- end
59
-
60
- def update(tbl,set_cols,set_vals,cond_cols,cond_vals)
61
- tbl = quote_table(tbl)
62
- conds = cond_cols.map{|c| quote_column(c) + ' IS ?'}.join(" AND ")
63
- sets = set_cols.map{|c| quote_column(c) + ' = ?'}.join(", ")
64
- template = "UPDATE #{tbl} SET #{sets} WHERE #{conds}"
65
- v = set_vals + cond_vals
66
- sqlite_execute(template,v)
67
- end
68
-
69
- def transaction(&block)
70
- # not yet mapped, not yet used
71
- block.call
72
- end
73
-
74
- def pragma(tbl,info)
75
- if tbl.include? '.'
76
- dbname, tbname, *ignore = tbl.split('.')
77
- dbname = quote_with_dots(dbname)
78
- tbname = quote_with_dots(tbname)
79
- query = "PRAGMA #{dbname}.#{info}(#{tbname})"
80
- else
81
- tbl = quote_with_dots(tbl)
82
- query = "PRAGMA #{info}(#{tbl})"
83
- end
84
- result = sqlite_execute(query,[])
85
- result
86
- end
87
-
88
- def part(row,n,name)
89
- row[n]
90
- end
91
-
92
- def columns(tbl)
93
- tbl = complete_table(tbl)
94
- @info[tbl] = pragma(tbl,"table_info") unless @info.has_key? tbl
95
- @info[tbl]
96
- end
97
-
98
- def column_names(tbl)
99
- columns(tbl).map{|c| part(c,1,"name")}
100
- end
101
-
102
- def fetch(sql,names)
103
- sqlite_execute(sql,[]).each do |row|
104
- yield row
105
- end
106
- end
107
-
108
- def primary_key(tbl)
109
- return @pk unless @pk.nil?
110
- cols = columns(tbl)
111
- cols = cols.select{|c| part(c,5,"pk").to_s=="1"}.map{|c| part(c,1,"name")}
112
- if cols.length == 0
113
- cols = pk_from_unique_index(tbl)
114
- end
115
- @pk = cols if cols.length>0
116
- cols
117
- end
118
-
119
- def pk_from_unique_index(tbl)
120
- pragma(tbl,"index_list").each do |row|
121
- if part(row,2,"unique").to_s == "1"
122
- idx = part(row,1,"name")
123
- return pragma(idx,"index_info").map{|r| part(r,2,"name")}
124
- end
125
- end
126
- nil
127
- end
128
-
129
- # copy the structure of an attached table, along with any indexes
130
- def copy_table_structure(rdb,tbl)
131
- template = "SELECT sql, type from X.sqlite_master WHERE tbl_name = ? ORDER BY type DESC"
132
- lsql = template.gsub('X',"main")
133
- rsql = template.gsub('X',quote_with_dots(rdb))
134
- args = [quote_with_dots(tbl)]
135
- lschema = sqlite_execute(lsql,args)
136
- rschema = sqlite_execute(rsql,args)
137
- if lschema.length>0
138
- return false
139
- end
140
- rschema.each{ |row| sqlite_execute(row[0],[]) }
141
- true
142
- end
143
- end