coopy 0.6.4.1 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +17 -0
  3. data/.rspec +2 -0
  4. data/CHANGELOG.md +7 -0
  5. data/Gemfile +7 -0
  6. data/LICENSE.md +22 -0
  7. data/README.md +59 -0
  8. data/Rakefile +4 -6
  9. data/coopy.gemspec +26 -0
  10. data/lib/coopy.rb +32 -175
  11. data/lib/coopy/alignment.rb +260 -0
  12. data/lib/coopy/bag.rb +17 -0
  13. data/lib/coopy/cell_info.rb +24 -0
  14. data/lib/coopy/change_type.rb +10 -0
  15. data/lib/coopy/compare_flags.rb +62 -0
  16. data/lib/coopy/compare_table.rb +327 -0
  17. data/lib/coopy/coopy.rb +22 -0
  18. data/lib/coopy/cross_match.rb +10 -0
  19. data/lib/coopy/csv_table.rb +51 -0
  20. data/lib/coopy/diff_render.rb +307 -0
  21. data/lib/coopy/index.rb +73 -0
  22. data/lib/coopy/index_item.rb +17 -0
  23. data/lib/coopy/index_pair.rb +72 -0
  24. data/lib/coopy/mover.rb +123 -0
  25. data/lib/coopy/ordering.rb +27 -0
  26. data/lib/coopy/row.rb +9 -0
  27. data/lib/coopy/simple_cell.rb +15 -0
  28. data/lib/coopy/simple_table.rb +144 -0
  29. data/lib/coopy/simple_view.rb +36 -0
  30. data/lib/coopy/table.rb +44 -0
  31. data/lib/coopy/table_comparison_state.rb +33 -0
  32. data/lib/coopy/table_diff.rb +634 -0
  33. data/lib/coopy/table_text.rb +14 -0
  34. data/lib/coopy/table_view.rb +31 -0
  35. data/lib/coopy/unit.rb +53 -0
  36. data/lib/coopy/version.rb +3 -0
  37. data/lib/coopy/view.rb +34 -0
  38. data/spec/fixtures/bridges.html +10 -0
  39. data/spec/fixtures/bridges_diff.csv +8 -0
  40. data/spec/fixtures/bridges_new.csv +9 -0
  41. data/spec/fixtures/bridges_old.csv +9 -0
  42. data/spec/fixtures/planetary_bodies.html +22 -0
  43. data/spec/fixtures/planetary_bodies_diff.csv +19 -0
  44. data/spec/fixtures/planetary_bodies_new.csv +20 -0
  45. data/spec/fixtures/planetary_bodies_old.csv +19 -0
  46. data/spec/fixtures/quote_me.csv +10 -0
  47. data/spec/fixtures/quote_me2.csv +11 -0
  48. data/spec/integration/table_diff_spec.rb +57 -0
  49. data/spec/libs/compare_flags_spec.rb +40 -0
  50. data/spec/libs/coopy_spec.rb +14 -0
  51. data/spec/libs/ordering_spec.rb +28 -0
  52. data/spec/libs/unit_spec.rb +31 -0
  53. data/spec/spec_helper.rb +29 -0
  54. metadata +153 -46
  55. data/bin/sqlite_diff +0 -4
  56. data/bin/sqlite_patch +0 -4
  57. data/bin/sqlite_rediff +0 -4
  58. data/lib/coopy/dbi_sql_wrapper.rb +0 -89
  59. data/lib/coopy/diff_apply_sql.rb +0 -35
  60. data/lib/coopy/diff_columns.rb +0 -33
  61. data/lib/coopy/diff_output.rb +0 -21
  62. data/lib/coopy/diff_output_action.rb +0 -34
  63. data/lib/coopy/diff_output_group.rb +0 -40
  64. data/lib/coopy/diff_output_raw.rb +0 -17
  65. data/lib/coopy/diff_output_stats.rb +0 -45
  66. data/lib/coopy/diff_output_table.rb +0 -49
  67. data/lib/coopy/diff_output_tdiff.rb +0 -48
  68. data/lib/coopy/diff_parser.rb +0 -92
  69. data/lib/coopy/diff_render_csv.rb +0 -29
  70. data/lib/coopy/diff_render_html.rb +0 -74
  71. data/lib/coopy/diff_render_log.rb +0 -52
  72. data/lib/coopy/row_change.rb +0 -25
  73. data/lib/coopy/scraperwiki_sql_wrapper.rb +0 -8
  74. data/lib/coopy/scraperwiki_utils.rb +0 -23
  75. data/lib/coopy/sequel_sql_wrapper.rb +0 -73
  76. data/lib/coopy/sql_compare.rb +0 -222
  77. data/lib/coopy/sql_wrapper.rb +0 -34
  78. data/lib/coopy/sqlite_sql_wrapper.rb +0 -143
  79. data/test/test_coopy.rb +0 -126
@@ -1,25 +0,0 @@
1
- class RowChange
2
- attr_accessor :row_mode
3
- attr_accessor :cells
4
- attr_accessor :columns
5
- attr_accessor :key
6
-
7
- def initialize(row_mode,cells)
8
- @row_mode = row_mode
9
- @cells = cells
10
- @key = nil
11
- end
12
-
13
- def active_columns
14
- return [] if @columns.nil?
15
- @columns.column_by_offset
16
- end
17
-
18
- def value_at(column)
19
- @cells[column[:in_offset]][:value]
20
- end
21
-
22
- def new_value_at(column)
23
- @cells[column[:in_offset]][:new_value]
24
- end
25
- end
@@ -1,8 +0,0 @@
1
- require 'sqlite_sql_wrapper'
2
-
3
- # Tweak sqlite wrapper slightly to match ScraperWiki's API
4
- class ScraperwikiSqlWrapper < SqliteSqlWrapper
5
- def sqlite_execute(template,vals)
6
- @db.sqliteexecute(template,vals)["data"]
7
- end
8
- end
@@ -1,23 +0,0 @@
1
- def link_tables(watch_scraper,watch_tables)
2
- sql = ScraperwikiSqlWrapper.new(ScraperWiki)
3
- watch_tables.each { |tbl| sql.copy_table_structure(watch_scraper,tbl) }
4
- end
5
-
6
- class CoopyResult
7
- attr_accessor :html
8
- end
9
-
10
- def sync_table(watch_scraper,tbl,keys)
11
- sql = ScraperwikiSqlWrapper.new(ScraperWiki)
12
- sql.set_primary_key(keys) unless keys.nil?
13
- cmp = SqlCompare.new(sql,"main.#{tbl}","#{watch_scraper}.#{tbl}")
14
- sinks = DiffOutputGroup.new
15
- render = DiffRenderHtml.new
16
- sinks << render
17
- sinks << DiffApplySql.new(sql,"main.#{tbl}")
18
- cmp.set_output(sinks)
19
- cmp.apply
20
- result = CoopyResult.new
21
- result.html = render.to_string
22
- result
23
- end
@@ -1,73 +0,0 @@
1
- require 'sql_wrapper'
2
- require 'sequel'
3
-
4
- class SequelSqlBare < SqlWrapper
5
- def initialize(db)
6
- @db = db
7
- @tname = nil
8
- @t = nil
9
- end
10
-
11
- def sync_table(tbl)
12
- tbl = @tname if tbl.nil?
13
- tbl = @db.tables[0] if tbl.nil?
14
- return @t if tbl==@tname
15
- @tname = tbl
16
- @t = @db[tbl]
17
- end
18
-
19
- def enhash(cols,vals)
20
- Hash[*cols.map{|c| c.to_sym}.zip(vals).flatten]
21
- end
22
-
23
- def insert(tbl,cols,vals)
24
- sync_table(tbl)
25
- @t.insert(enhash(cols,vals))
26
- end
27
-
28
- def delete(tbl,cols,vals)
29
- sync_table(tbl)
30
- @t.filter(enhash(cols,vals)).delete
31
- end
32
-
33
- def update(tbl,set_cols,set_vals,cond_cols,cond_vals)
34
- sync_table(tbl)
35
- @t.filter(enhash(cond_cols,cond_vals)).update(enhash(set_cols,set_vals))
36
- end
37
-
38
- def transaction(&block)
39
- @db.transaction(&block)
40
- end
41
-
42
- def columns(tbl)
43
- sync_table(tbl)
44
- @db.schema(@tname)
45
- end
46
-
47
- def column_names(tbl)
48
- columns(tbl).map{|x| x[0]}
49
- end
50
-
51
- def primary_key(tbl)
52
- cols = columns(tbl)
53
- cols.select{|x| x[1][:primary_key]}.map{|x| x[0]}
54
- end
55
-
56
- def index(tbl)
57
- key = primary_key(tbl)
58
- @t.select(*key)
59
- end
60
-
61
- def fetch(sql,names)
62
- @db.fetch(sql) do |row|
63
- yield names.map{|n| row[n]}
64
- end
65
- end
66
- end
67
-
68
-
69
- class SequelSqlWrapper < SequelSqlBare
70
- def initialize(*params)
71
- super(Sequel.connect(*params))
72
- end
73
- end
@@ -1,222 +0,0 @@
1
- require 'coopy/diff_columns'
2
- require 'coopy/row_change'
3
-
4
- class SqlCompare
5
- def initialize(db1,db2)
6
- @db1 = db
7
- @db2 = db2
8
- @table1 = nil
9
- @table2 = nil
10
- @single_db = false
11
- raise "not implemented yet"
12
- end
13
-
14
- def initialize(db,table1,table2)
15
- @db1 = db
16
- @db2 = db.clone
17
- @table1 = table1
18
- @table2 = table2
19
- @single_db = true
20
- end
21
-
22
- def set_output(patch)
23
- @patch = patch
24
- end
25
-
26
- def apply
27
- apply_single
28
- end
29
-
30
- # We are not implementing full comparison, just an adequate subset
31
- # for easy cases (a table with a trustworthy primary key, and constant
32
- # columns). Make sure we are not trying to do something we're not ready
33
- # for.
34
- def validate_schema
35
- all_cols1 = @db1.column_names(@table1)
36
- all_cols2 = @db2.column_names(@table2)
37
- if all_cols1 != all_cols2
38
- raise "Columns do not match, please use full coopy toolbox"
39
- end
40
-
41
- key_cols1 = @db1.primary_key(@table1)
42
- key_cols2 = @db2.primary_key(@table2)
43
- if key_cols1 != key_cols2
44
- raise "Primary keys do not match, please use full coopy toolbox"
45
- end
46
- end
47
-
48
- def keyify(lst)
49
- lst.map{|x| x.to_s}.join("___")
50
- end
51
-
52
- # When working within a single database, we can delegate more work to SQL.
53
- # So we specialize this case.
54
- def apply_single
55
- validate_schema
56
-
57
- # Prepare some lists of columns.
58
- key_cols = @db1.primary_key(@table1)
59
- data_cols = @db1.except_primary_key(@table1)
60
- all_cols = @db1.column_names(@table1)
61
-
62
- # Let our public know we are beginning.
63
- @patch.begin_diff
64
-
65
- # Advertise column names.
66
- @rc_columns = DiffColumns.new
67
- @rc_columns.title_row = all_cols
68
- @rc_columns.update(0)
69
- cells = all_cols.map{|v| { :txt => v, :value => v, :cell_mode => "" }}
70
- rc = RowChange.new("@@",cells)
71
- @patch.apply_row(rc)
72
-
73
- # If requested, we will be providing context rows around changed rows.
74
- # This is not a natural thing to do with SQL, so we do it only on request.
75
- # When requested, we need to buffer row changes.
76
- @pending_rcs = []
77
-
78
- # Prepare some useful SQL fragments to assemble later.
79
- sql_table1 = @db1.quote_table(@table1)
80
- sql_table2 = @db1.quote_table(@table2)
81
- sql_key_cols = key_cols.map{|c| @db1.quote_column(c)}.join(",")
82
- sql_all_cols = all_cols.map{|c| @db1.quote_column(c)}.join(",")
83
- sql_key_match = key_cols.map{|c| @db1.quote_column(c)}.map{|c| "#{sql_table1}.#{c} IS #{sql_table2}.#{c}"}.join(" AND ")
84
- sql_data_mismatch = data_cols.map{|c| @db1.quote_column(c)}.map{|c| "#{sql_table1}.#{c} IS NOT #{sql_table2}.#{c}"}.join(" OR ")
85
-
86
- # For one query we will need to interleave columns from two tables. For
87
- # portability we need to give these columns distinct names.
88
- weave = all_cols.map{|c| [[sql_table1,@db1.quote_column(c)],
89
- [sql_table2,@db2.quote_column(c)]]}.flatten(1)
90
- dbl_cols = weave.map{|c| "#{c[0]}.#{c[1]}"}
91
- sql_dbl_cols = weave.map{|c| "#{c[0]}.#{c[1]} AS #{c[0].gsub(/[^a-zA-Z0-9]/,'_')}_#{c[1].gsub(/[^a-zA-Z0-9]/,'_')}"}.join(",")
92
-
93
- # Prepare a map of primary key offsets.
94
- keys_in_all_cols = key_cols.each.map{|c| all_cols.index(c)}
95
- keys_in_dbl_cols = keys_in_all_cols.map{|x| 2*x}
96
-
97
- # Find rows in table2 that are not in table1.
98
- sql = "SELECT #{sql_all_cols} FROM #{sql_table2} WHERE NOT EXISTS (SELECT 1 FROM #{sql_table1} WHERE #{sql_key_match})"
99
- apply_inserts(sql,all_cols,keys_in_all_cols)
100
-
101
- # Find rows in table1 and table2 that differ while having the same primary
102
- # key.
103
- sql = "SELECT #{sql_dbl_cols} FROM #{sql_table1} INNER JOIN #{sql_table2} ON #{sql_key_match} WHERE #{sql_data_mismatch}"
104
- apply_updates(sql,dbl_cols,keys_in_dbl_cols)
105
-
106
- # Find rows that are in table1 but not table2
107
- sql = "SELECT #{sql_all_cols} FROM #{sql_table1} WHERE NOT EXISTS (SELECT 1 FROM #{sql_table2} WHERE #{sql_key_match})"
108
- apply_deletes(sql,all_cols,keys_in_all_cols)
109
-
110
- # If we are supposed to provide context, we need to deal with row order.
111
- if @patch.want_context
112
- sql = "SELECT #{sql_all_cols}, 0 AS __coopy_tag__ FROM #{sql_table1} UNION SELECT #{sql_all_cols}, 1 AS __coopy_tag__ FROM #{sql_table2} ORDER BY #{sql_key_cols}, __coopy_tag__"
113
- apply_with_context(sql,all_cols,keys_in_all_cols)
114
- end
115
-
116
- # Done!
117
- @patch.end_diff
118
- end
119
-
120
-
121
- def apply_inserts(sql,all_cols,keys_in_all_cols)
122
- @db1.fetch(sql,all_cols) do |row|
123
- cells = row.map{|v| { :txt => v, :value => v, :cell_mode => "" }}
124
- rc = RowChange.new("+++",cells)
125
- apply_rc(rc,row,keys_in_all_cols)
126
- end
127
- end
128
-
129
-
130
- def apply_updates(sql,dbl_cols,keys_in_dbl_cols)
131
- @db1.fetch(sql,dbl_cols) do |row|
132
- pairs = row.enum_for(:each_slice,2).to_a
133
- cells = pairs.map do |v|
134
- if v[0]==v[1]
135
- { :txt => v[0], :value => v[0], :cell_mode => "" }
136
- else
137
- { :txt => v[0], :value => v[0], :new_value => v[1], :cell_mode => "->" }
138
- end
139
- end
140
- rc = RowChange.new("->",cells)
141
- apply_rc(rc,row,keys_in_dbl_cols)
142
- end
143
- end
144
-
145
-
146
- def apply_deletes(sql,all_cols,keys_in_all_cols)
147
- @db1.fetch(sql,all_cols) do |row|
148
- cells = row.map{|v| { :txt => v, :value => v, :cell_mode => "" }}
149
- rc = RowChange.new("---",cells)
150
- apply_rc(rc,row,keys_in_all_cols)
151
- end
152
- end
153
-
154
- def apply_rc(rc,row,keys_in_cols)
155
- rc.columns = @rc_columns
156
- if @patch.want_context
157
- rc.key = keyify(row.values_at(*keys_in_cols))
158
- @pending_rcs << rc
159
- else
160
- @patch.apply_row(rc)
161
- end
162
- end
163
-
164
- def emit_skip(row)
165
- cells = row.map{|v| { :txt => "...", :value => "...", :cell_mode => "" }}
166
- rc = RowChange.new("...",cells)
167
- rc.columns = @rc_columns
168
- @patch.apply_row(rc)
169
- end
170
-
171
- # Do the context dance.
172
- def apply_with_context(sql,all_cols,keys_in_all_cols)
173
- hits = {}
174
- @pending_rcs.each do |rc|
175
- hits[rc.key] = rc
176
- end
177
- hist = []
178
- n = 2
179
- pending = 0
180
- skipped = false
181
- noted = false
182
- last_row = nil
183
- @db1.fetch(sql,all_cols + ["__coopy_tag__"]) do |row|
184
- tag = row.pop.to_i
185
- k = keyify(row.values_at(*keys_in_all_cols))
186
- if hits[k]
187
- emit_skip(row) if skipped
188
- hist.each do |row0|
189
- cells = row0.map{|v| { :txt => v, :value => v, :cell_mode => "" }}
190
- rc = RowChange.new("",cells)
191
- rc.columns = @rc_columns
192
- @patch.apply_row(rc)
193
- end
194
- hist.clear
195
- pending = n
196
- @patch.apply_row(hits[k])
197
- hits.delete(k)
198
- skipped = false
199
- noted = true
200
- elsif tag == 1
201
- # ignore redundant row
202
- elsif pending>0
203
- emit_skip(row) if skipped
204
- cells = row.map{|v| { :txt => v, :value => v, :cell_mode => "" }}
205
- rc = RowChange.new("",cells)
206
- rc.columns = @rc_columns
207
- @patch.apply_row(rc)
208
- pending = pending-1
209
- skipped = false
210
- else
211
- hist << row
212
- if hist.length>n
213
- skipped = true
214
- last_row = row
215
- hist.shift
216
- end
217
- end
218
- end
219
- emit_skip(last_row) if skipped and noted
220
- end
221
- end
222
-
@@ -1,34 +0,0 @@
1
- class SqlWrapper
2
- def insert(tbl,cols,vals)
3
- end
4
-
5
- def delete(tbl,cols,vals)
6
- end
7
-
8
- def update(tbl,set_cols,set_vals,cond_cols,cond_vals)
9
- end
10
-
11
- def column_names(tbl)
12
- []
13
- end
14
-
15
- def primary_key(tbl)
16
- []
17
- end
18
-
19
- def except_primary_key(tbl)
20
- column_names(tbl)-primary_key(tbl)
21
- end
22
-
23
- def fetch(sql)
24
- []
25
- end
26
-
27
- def quote_column(c)
28
- c.to_s
29
- end
30
-
31
- def quote_table(t)
32
- t.to_s
33
- end
34
- end
@@ -1,143 +0,0 @@
1
- require 'coopy/sql_wrapper'
2
-
3
- class SqliteSqlWrapper < SqlWrapper
4
- def initialize(db)
5
- @db = db
6
- @t = nil
7
- @qt = nil
8
- @pk = nil
9
- @info = {}
10
- end
11
-
12
- def set_primary_key(lst)
13
- @pk = lst
14
- end
15
-
16
- def sqlite_execute(template,vals)
17
- return @db.execute(template,*vals)
18
- end
19
-
20
- def get_table_names
21
- sqlite_execute("SELECT name FROM sqlite_master WHERE type='table' ORDER BY name",[]).flatten
22
- end
23
-
24
- def complete_table(tbl)
25
- @t = tbl unless tbl.nil?
26
- @t = get_table_names[0] if @t.nil?
27
- @t
28
- end
29
-
30
- def quote_with_dots(x)
31
- return x if x.match(/^[a-zA-Z0-9_]+$/)
32
- x.split('.').map{|p| "`#{p}`"}.join('.')
33
- end
34
-
35
- def quote_table(tbl)
36
- complete_table(tbl)
37
- return @t if @t.match(/^[a-zA-Z0-9_]+$/)
38
- quote_with_dots(@t)
39
- end
40
-
41
- def quote_column(col)
42
- return col if col.match(/^[a-zA-Z0-9_]+$/)
43
- quote_with_dots(col)
44
- end
45
-
46
- def insert(tbl,cols,vals)
47
- tbl = quote_table(tbl)
48
- template = cols.map{|x| '?'}.join(",")
49
- template = "INSERT INTO #{tbl} VALUES(#{template})"
50
- sqlite_execute(template,vals)
51
- end
52
-
53
- def delete(tbl,cols,vals)
54
- tbl = quote_table(tbl)
55
- template = cols.map{|c| quote_column(c) + ' IS ?'}.join(" AND ")
56
- template = "DELETE FROM #{tbl} WHERE #{template}"
57
- sqlite_execute(template,vals)
58
- end
59
-
60
- def update(tbl,set_cols,set_vals,cond_cols,cond_vals)
61
- tbl = quote_table(tbl)
62
- conds = cond_cols.map{|c| quote_column(c) + ' IS ?'}.join(" AND ")
63
- sets = set_cols.map{|c| quote_column(c) + ' = ?'}.join(", ")
64
- template = "UPDATE #{tbl} SET #{sets} WHERE #{conds}"
65
- v = set_vals + cond_vals
66
- sqlite_execute(template,v)
67
- end
68
-
69
- def transaction(&block)
70
- # not yet mapped, not yet used
71
- block.call
72
- end
73
-
74
- def pragma(tbl,info)
75
- if tbl.include? '.'
76
- dbname, tbname, *ignore = tbl.split('.')
77
- dbname = quote_with_dots(dbname)
78
- tbname = quote_with_dots(tbname)
79
- query = "PRAGMA #{dbname}.#{info}(#{tbname})"
80
- else
81
- tbl = quote_with_dots(tbl)
82
- query = "PRAGMA #{info}(#{tbl})"
83
- end
84
- result = sqlite_execute(query,[])
85
- result
86
- end
87
-
88
- def part(row,n,name)
89
- row[n]
90
- end
91
-
92
- def columns(tbl)
93
- tbl = complete_table(tbl)
94
- @info[tbl] = pragma(tbl,"table_info") unless @info.has_key? tbl
95
- @info[tbl]
96
- end
97
-
98
- def column_names(tbl)
99
- columns(tbl).map{|c| part(c,1,"name")}
100
- end
101
-
102
- def fetch(sql,names)
103
- sqlite_execute(sql,[]).each do |row|
104
- yield row
105
- end
106
- end
107
-
108
- def primary_key(tbl)
109
- return @pk unless @pk.nil?
110
- cols = columns(tbl)
111
- cols = cols.select{|c| part(c,5,"pk").to_s=="1"}.map{|c| part(c,1,"name")}
112
- if cols.length == 0
113
- cols = pk_from_unique_index(tbl)
114
- end
115
- @pk = cols if cols.length>0
116
- cols
117
- end
118
-
119
- def pk_from_unique_index(tbl)
120
- pragma(tbl,"index_list").each do |row|
121
- if part(row,2,"unique").to_s == "1"
122
- idx = part(row,1,"name")
123
- return pragma(idx,"index_info").map{|r| part(r,2,"name")}
124
- end
125
- end
126
- nil
127
- end
128
-
129
- # copy the structure of an attached table, along with any indexes
130
- def copy_table_structure(rdb,tbl)
131
- template = "SELECT sql, type from X.sqlite_master WHERE tbl_name = ? ORDER BY type DESC"
132
- lsql = template.gsub('X',"main")
133
- rsql = template.gsub('X',quote_with_dots(rdb))
134
- args = [quote_with_dots(tbl)]
135
- lschema = sqlite_execute(lsql,args)
136
- rschema = sqlite_execute(rsql,args)
137
- if lschema.length>0
138
- return false
139
- end
140
- rschema.each{ |row| sqlite_execute(row[0],[]) }
141
- true
142
- end
143
- end