coopy 0.6.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,23 @@
1
+ def link_tables(watch_scraper,watch_tables)
2
+ sql = ScraperwikiSqlWrapper.new(ScraperWiki)
3
+ watch_tables.each { |tbl| sql.copy_table_structure(watch_scraper,tbl) }
4
+ end
5
+
6
+ class CoopyResult
7
+ attr_accessor :html
8
+ end
9
+
10
+ def sync_table(watch_scraper,tbl,keys)
11
+ sql = ScraperwikiSqlWrapper.new(ScraperWiki)
12
+ sql.set_primary_key(keys) unless keys.nil?
13
+ cmp = SqlCompare.new(sql,"main.#{tbl}","#{watch_scraper}.#{tbl}")
14
+ sinks = DiffOutputGroup.new
15
+ render = DiffRenderHtml.new
16
+ sinks << render
17
+ sinks << DiffApplySql.new(sql,"main.#{tbl}")
18
+ cmp.set_output(sinks)
19
+ cmp.apply
20
+ result = CoopyResult.new
21
+ result.html = render.to_string
22
+ result
23
+ end
@@ -0,0 +1,73 @@
1
+ require 'sql_wrapper'
2
+ require 'sequel'
3
+
4
+ class SequelSqlBare < SqlWrapper
5
+ def initialize(db)
6
+ @db = db
7
+ @tname = nil
8
+ @t = nil
9
+ end
10
+
11
+ def sync_table(tbl)
12
+ tbl = @tname if tbl.nil?
13
+ tbl = @db.tables[0] if tbl.nil?
14
+ return @t if tbl==@tname
15
+ @tname = tbl
16
+ @t = @db[tbl]
17
+ end
18
+
19
+ def enhash(cols,vals)
20
+ Hash[*cols.map{|c| c.to_sym}.zip(vals).flatten]
21
+ end
22
+
23
+ def insert(tbl,cols,vals)
24
+ sync_table(tbl)
25
+ @t.insert(enhash(cols,vals))
26
+ end
27
+
28
+ def delete(tbl,cols,vals)
29
+ sync_table(tbl)
30
+ @t.filter(enhash(cols,vals)).delete
31
+ end
32
+
33
+ def update(tbl,set_cols,set_vals,cond_cols,cond_vals)
34
+ sync_table(tbl)
35
+ @t.filter(enhash(cond_cols,cond_vals)).update(enhash(set_cols,set_vals))
36
+ end
37
+
38
+ def transaction(&block)
39
+ @db.transaction(&block)
40
+ end
41
+
42
+ def columns(tbl)
43
+ sync_table(tbl)
44
+ @db.schema(@tname)
45
+ end
46
+
47
+ def column_names(tbl)
48
+ columns(tbl).map{|x| x[0]}
49
+ end
50
+
51
+ def primary_key(tbl)
52
+ cols = columns(tbl)
53
+ cols.select{|x| x[1][:primary_key]}.map{|x| x[0]}
54
+ end
55
+
56
+ def index(tbl)
57
+ key = primary_key(tbl)
58
+ @t.select(*key)
59
+ end
60
+
61
+ def fetch(sql,names)
62
+ @db.fetch(sql) do |row|
63
+ yield names.map{|n| row[n]}
64
+ end
65
+ end
66
+ end
67
+
68
+
69
+ class SequelSqlWrapper < SequelSqlBare
70
+ def initialize(*params)
71
+ super(Sequel.connect(*params))
72
+ end
73
+ end
@@ -0,0 +1,222 @@
1
+ require 'coopy/diff_columns'
2
+ require 'coopy/row_change'
3
+
4
+ class SqlCompare
5
+ def initialize(db1,db2)
6
+ @db1 = db
7
+ @db2 = db2
8
+ @table1 = nil
9
+ @table2 = nil
10
+ @single_db = false
11
+ raise "not implemented yet"
12
+ end
13
+
14
+ def initialize(db,table1,table2)
15
+ @db1 = db
16
+ @db2 = db.clone
17
+ @table1 = table1
18
+ @table2 = table2
19
+ @single_db = true
20
+ end
21
+
22
+ def set_output(patch)
23
+ @patch = patch
24
+ end
25
+
26
+ def apply
27
+ apply_single
28
+ end
29
+
30
+ # We are not implementing full comparison, just an adequate subset
31
+ # for easy cases (a table with a trustworthy primary key, and constant
32
+ # columns). Make sure we are not trying to do something we're not ready
33
+ # for.
34
+ def validate_schema
35
+ all_cols1 = @db1.column_names(@table1)
36
+ all_cols2 = @db2.column_names(@table2)
37
+ if all_cols1 != all_cols2
38
+ raise "Columns do not match, please use full coopy toolbox"
39
+ end
40
+
41
+ key_cols1 = @db1.primary_key(@table1)
42
+ key_cols2 = @db2.primary_key(@table2)
43
+ if key_cols1 != key_cols2
44
+ raise "Primary keys do not match, please use full coopy toolbox"
45
+ end
46
+ end
47
+
48
+ def keyify(lst)
49
+ lst.map{|x| x.to_s}.join("___")
50
+ end
51
+
52
+ # When working within a single database, we can delegate more work to SQL.
53
+ # So we specialize this case.
54
+ def apply_single
55
+ validate_schema
56
+
57
+ # Prepare some lists of columns.
58
+ key_cols = @db1.primary_key(@table1)
59
+ data_cols = @db1.except_primary_key(@table1)
60
+ all_cols = @db1.column_names(@table1)
61
+
62
+ # Let our public know we are beginning.
63
+ @patch.begin_diff
64
+
65
+ # Advertise column names.
66
+ @rc_columns = DiffColumns.new
67
+ @rc_columns.title_row = all_cols
68
+ @rc_columns.update(0)
69
+ cells = all_cols.map{|v| { :txt => v, :value => v, :cell_mode => "" }}
70
+ rc = RowChange.new("@@",cells)
71
+ @patch.apply_row(rc)
72
+
73
+ # If requested, we will be providing context rows around changed rows.
74
+ # This is not a natural thing to do with SQL, so we do it only on request.
75
+ # When requested, we need to buffer row changes.
76
+ @pending_rcs = []
77
+
78
+ # Prepare some useful SQL fragments to assemble later.
79
+ sql_table1 = @db1.quote_table(@table1)
80
+ sql_table2 = @db1.quote_table(@table2)
81
+ sql_key_cols = key_cols.map{|c| @db1.quote_column(c)}.join(",")
82
+ sql_all_cols = all_cols.map{|c| @db1.quote_column(c)}.join(",")
83
+ sql_key_match = key_cols.map{|c| @db1.quote_column(c)}.map{|c| "#{sql_table1}.#{c} IS #{sql_table2}.#{c}"}.join(" AND ")
84
+ sql_data_mismatch = data_cols.map{|c| @db1.quote_column(c)}.map{|c| "#{sql_table1}.#{c} IS NOT #{sql_table2}.#{c}"}.join(" OR ")
85
+
86
+ # For one query we will need to interleave columns from two tables. For
87
+ # portability we need to give these columns distinct names.
88
+ weave = all_cols.map{|c| [[sql_table1,@db1.quote_column(c)],
89
+ [sql_table2,@db2.quote_column(c)]]}.flatten(1)
90
+ dbl_cols = weave.map{|c| "#{c[0]}.#{c[1]}"}
91
+ sql_dbl_cols = weave.map{|c| "#{c[0]}.#{c[1]} AS #{c[0].gsub(/[^a-zA-Z0-9]/,'_')}_#{c[1].gsub(/[^a-zA-Z0-9]/,'_')}"}.join(",")
92
+
93
+ # Prepare a map of primary key offsets.
94
+ keys_in_all_cols = key_cols.each.map{|c| all_cols.index(c)}
95
+ keys_in_dbl_cols = keys_in_all_cols.map{|x| 2*x}
96
+
97
+ # Find rows in table2 that are not in table1.
98
+ sql = "SELECT #{sql_all_cols} FROM #{sql_table2} WHERE NOT EXISTS (SELECT 1 FROM #{sql_table1} WHERE #{sql_key_match})"
99
+ apply_inserts(sql,all_cols,keys_in_all_cols)
100
+
101
+ # Find rows in table1 and table2 that differ while having the same primary
102
+ # key.
103
+ sql = "SELECT #{sql_dbl_cols} FROM #{sql_table1} INNER JOIN #{sql_table2} ON #{sql_key_match} WHERE #{sql_data_mismatch}"
104
+ apply_updates(sql,dbl_cols,keys_in_dbl_cols)
105
+
106
+ # Find rows that are in table1 but not table2
107
+ sql = "SELECT #{sql_all_cols} FROM #{sql_table1} WHERE NOT EXISTS (SELECT 1 FROM #{sql_table2} WHERE #{sql_key_match})"
108
+ apply_deletes(sql,all_cols,keys_in_all_cols)
109
+
110
+ # If we are supposed to provide context, we need to deal with row order.
111
+ if @patch.want_context
112
+ sql = "SELECT #{sql_all_cols}, 0 AS __coopy_tag__ FROM #{sql_table1} UNION SELECT #{sql_all_cols}, 1 AS __coopy_tag__ FROM #{sql_table2} ORDER BY #{sql_key_cols}, __coopy_tag__"
113
+ apply_with_context(sql,all_cols,keys_in_all_cols)
114
+ end
115
+
116
+ # Done!
117
+ @patch.end_diff
118
+ end
119
+
120
+
121
+ def apply_inserts(sql,all_cols,keys_in_all_cols)
122
+ @db1.fetch(sql,all_cols) do |row|
123
+ cells = row.map{|v| { :txt => v, :value => v, :cell_mode => "" }}
124
+ rc = RowChange.new("+++",cells)
125
+ apply_rc(rc,row,keys_in_all_cols)
126
+ end
127
+ end
128
+
129
+
130
+ def apply_updates(sql,dbl_cols,keys_in_dbl_cols)
131
+ @db1.fetch(sql,dbl_cols) do |row|
132
+ pairs = row.enum_for(:each_slice,2).to_a
133
+ cells = pairs.map do |v|
134
+ if v[0]==v[1]
135
+ { :txt => v[0], :value => v[0], :cell_mode => "" }
136
+ else
137
+ { :txt => v[0], :value => v[0], :new_value => v[1], :cell_mode => "->" }
138
+ end
139
+ end
140
+ rc = RowChange.new("->",cells)
141
+ apply_rc(rc,row,keys_in_dbl_cols)
142
+ end
143
+ end
144
+
145
+
146
+ def apply_deletes(sql,all_cols,keys_in_all_cols)
147
+ @db1.fetch(sql,all_cols) do |row|
148
+ cells = row.map{|v| { :txt => v, :value => v, :cell_mode => "" }}
149
+ rc = RowChange.new("---",cells)
150
+ apply_rc(rc,row,keys_in_all_cols)
151
+ end
152
+ end
153
+
154
+ def apply_rc(rc,row,keys_in_cols)
155
+ rc.columns = @rc_columns
156
+ if @patch.want_context
157
+ rc.key = keyify(row.values_at(*keys_in_cols))
158
+ @pending_rcs << rc
159
+ else
160
+ @patch.apply_row(rc)
161
+ end
162
+ end
163
+
164
+ def emit_skip(row)
165
+ cells = row.map{|v| { :txt => "...", :value => "...", :cell_mode => "" }}
166
+ rc = RowChange.new("...",cells)
167
+ rc.columns = @rc_columns
168
+ @patch.apply_row(rc)
169
+ end
170
+
171
+ # Do the context dance.
172
+ def apply_with_context(sql,all_cols,keys_in_all_cols)
173
+ hits = {}
174
+ @pending_rcs.each do |rc|
175
+ hits[rc.key] = rc
176
+ end
177
+ hist = []
178
+ n = 2
179
+ pending = 0
180
+ skipped = false
181
+ noted = false
182
+ last_row = nil
183
+ @db1.fetch(sql,all_cols + ["__coopy_tag__"]) do |row|
184
+ tag = row.pop.to_i
185
+ k = keyify(row.values_at(*keys_in_all_cols))
186
+ if hits[k]
187
+ emit_skip(row) if skipped
188
+ hist.each do |row0|
189
+ cells = row0.map{|v| { :txt => v, :value => v, :cell_mode => "" }}
190
+ rc = RowChange.new("",cells)
191
+ rc.columns = @rc_columns
192
+ @patch.apply_row(rc)
193
+ end
194
+ hist.clear
195
+ pending = n
196
+ @patch.apply_row(hits[k])
197
+ hits.delete(k)
198
+ skipped = false
199
+ noted = true
200
+ elsif tag == 1
201
+ # ignore redundant row
202
+ elsif pending>0
203
+ emit_skip(row) if skipped
204
+ cells = row.map{|v| { :txt => v, :value => v, :cell_mode => "" }}
205
+ rc = RowChange.new("",cells)
206
+ rc.columns = @rc_columns
207
+ @patch.apply_row(rc)
208
+ pending = pending-1
209
+ skipped = false
210
+ else
211
+ hist << row
212
+ if hist.length>n
213
+ skipped = true
214
+ last_row = row
215
+ hist.shift
216
+ end
217
+ end
218
+ end
219
+ emit_skip(last_row) if skipped and noted
220
+ end
221
+ end
222
+
@@ -0,0 +1,34 @@
1
+ class SqlWrapper
2
+ def insert(tbl,cols,vals)
3
+ end
4
+
5
+ def delete(tbl,cols,vals)
6
+ end
7
+
8
+ def update(tbl,set_cols,set_vals,cond_cols,cond_vals)
9
+ end
10
+
11
+ def column_names(tbl)
12
+ []
13
+ end
14
+
15
+ def primary_key(tbl)
16
+ []
17
+ end
18
+
19
+ def except_primary_key(tbl)
20
+ column_names(tbl)-primary_key(tbl)
21
+ end
22
+
23
+ def fetch(sql)
24
+ []
25
+ end
26
+
27
+ def quote_column(c)
28
+ c.to_s
29
+ end
30
+
31
+ def quote_table(t)
32
+ t.to_s
33
+ end
34
+ end
@@ -0,0 +1,143 @@
1
+ require 'coopy/sql_wrapper'
2
+
3
+ class SqliteSqlWrapper < SqlWrapper
4
+ def initialize(db)
5
+ @db = db
6
+ @t = nil
7
+ @qt = nil
8
+ @pk = nil
9
+ @info = {}
10
+ end
11
+
12
+ def set_primary_key(lst)
13
+ @pk = lst
14
+ end
15
+
16
+ def sqlite_execute(template,vals)
17
+ return @db.execute(template,*vals)
18
+ end
19
+
20
+ def get_table_names
21
+ sqlite_execute("SELECT name FROM sqlite_master WHERE type='table' ORDER BY name",[]).flatten
22
+ end
23
+
24
+ def complete_table(tbl)
25
+ @t = tbl unless tbl.nil?
26
+ @t = get_table_names[0] if @t.nil?
27
+ @t
28
+ end
29
+
30
+ def quote_with_dots(x)
31
+ return x if x.match(/^[a-zA-Z0-9_]+$/)
32
+ x.split('.').map{|p| "`#{p}`"}.join('.')
33
+ end
34
+
35
+ def quote_table(tbl)
36
+ complete_table(tbl)
37
+ return @t if @t.match(/^[a-zA-Z0-9_]+$/)
38
+ quote_with_dots(@t)
39
+ end
40
+
41
+ def quote_column(col)
42
+ return col if col.match(/^[a-zA-Z0-9_]+$/)
43
+ quote_with_dots(col)
44
+ end
45
+
46
+ def insert(tbl,cols,vals)
47
+ tbl = quote_table(tbl)
48
+ template = cols.map{|x| '?'}.join(",")
49
+ template = "INSERT INTO #{tbl} VALUES(#{template})"
50
+ sqlite_execute(template,vals)
51
+ end
52
+
53
+ def delete(tbl,cols,vals)
54
+ tbl = quote_table(tbl)
55
+ template = cols.map{|c| quote_column(c) + ' IS ?'}.join(" AND ")
56
+ template = "DELETE FROM #{tbl} WHERE #{template}"
57
+ sqlite_execute(template,vals)
58
+ end
59
+
60
+ def update(tbl,set_cols,set_vals,cond_cols,cond_vals)
61
+ tbl = quote_table(tbl)
62
+ conds = cond_cols.map{|c| quote_column(c) + ' IS ?'}.join(" AND ")
63
+ sets = set_cols.map{|c| quote_column(c) + ' = ?'}.join(", ")
64
+ template = "UPDATE #{tbl} SET #{sets} WHERE #{conds}"
65
+ v = set_vals + cond_vals
66
+ sqlite_execute(template,v)
67
+ end
68
+
69
+ def transaction(&block)
70
+ # not yet mapped, not yet used
71
+ block.call
72
+ end
73
+
74
+ def pragma(tbl,info)
75
+ if tbl.include? '.'
76
+ dbname, tbname, *ignore = tbl.split('.')
77
+ dbname = quote_with_dots(dbname)
78
+ tbname = quote_with_dots(tbname)
79
+ query = "PRAGMA #{dbname}.#{info}(#{tbname})"
80
+ else
81
+ tbl = quote_with_dots(tbl)
82
+ query = "PRAGMA #{info}(#{tbl})"
83
+ end
84
+ result = sqlite_execute(query,[])
85
+ result
86
+ end
87
+
88
+ def part(row,n,name)
89
+ row[n]
90
+ end
91
+
92
+ def columns(tbl)
93
+ tbl = complete_table(tbl)
94
+ @info[tbl] = pragma(tbl,"table_info") unless @info.has_key? tbl
95
+ @info[tbl]
96
+ end
97
+
98
+ def column_names(tbl)
99
+ columns(tbl).map{|c| part(c,1,"name")}
100
+ end
101
+
102
+ def fetch(sql,names)
103
+ sqlite_execute(sql,[]).each do |row|
104
+ yield row
105
+ end
106
+ end
107
+
108
+ def primary_key(tbl)
109
+ return @pk unless @pk.nil?
110
+ cols = columns(tbl)
111
+ cols = cols.select{|c| part(c,5,"pk").to_s=="1"}.map{|c| part(c,1,"name")}
112
+ if cols.length == 0
113
+ cols = pk_from_unique_index(tbl)
114
+ end
115
+ @pk = cols if cols.length>0
116
+ cols
117
+ end
118
+
119
+ def pk_from_unique_index(tbl)
120
+ pragma(tbl,"index_list").each do |row|
121
+ if part(row,2,"unique").to_s == "1"
122
+ idx = part(row,1,"name")
123
+ return pragma(idx,"index_info").map{|r| part(r,2,"name")}
124
+ end
125
+ end
126
+ nil
127
+ end
128
+
129
+ # copy the structure of an attached table, along with any indexes
130
+ def copy_table_structure(rdb,tbl)
131
+ template = "SELECT sql, type from X.sqlite_master WHERE tbl_name = ? ORDER BY type DESC"
132
+ lsql = template.gsub('X',"main")
133
+ rsql = template.gsub('X',quote_with_dots(rdb))
134
+ args = [quote_with_dots(tbl)]
135
+ lschema = sqlite_execute(lsql,args)
136
+ rschema = sqlite_execute(rsql,args)
137
+ if lschema.length>0
138
+ return false
139
+ end
140
+ rschema.each{ |row| sqlite_execute(row[0],[]) }
141
+ true
142
+ end
143
+ end