jcangas-datagateway 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/VERSION.yml ADDED
@@ -0,0 +1,4 @@
1
+ ---
2
+ :major: 1
3
+ :minor: 1
4
+ :patch: 0
data/bin/datagw ADDED
@@ -0,0 +1,35 @@
1
+ #!/usr/bin/env ruby -KU
2
+
3
+
4
+ # == Sinopsis
5
+ # DataGateway actua como una 'psarela de datos'. Permite trasnferir datos de
6
+ # un origen a un destino, incluso en maquinas diferentes usando el proteocolo
7
+ # sftp.
8
+ #
9
+ #
10
+ # == Ejemplos
11
+ #
12
+ #
13
+ # == Usage
14
+ # Ejecute datagw.rb -g proyecto
15
+ #
16
+ # Para ayuda use: datagw -h
17
+ #
18
+ # == Options
19
+ # -h, --help Displays help message
20
+ # -e, --environment Select the execution environment
21
+ # -q, --quiet Output as little as possible, overrides verbose
22
+ # -v, --version Print version and quit
23
+ # -d, --debug Debug mode
24
+ #
25
+ # == Author
26
+ # Jorge L. Cangas
27
+ #
28
+ # == Copyright
29
+ # Copyright (c) 2008 Jorge L. Cangas
30
+
31
+ require 'rubygems'
32
+ require 'datagateway'
33
+
34
+ AppConfig.run
35
+
@@ -0,0 +1,20 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "faster_csv"
4
+
5
+ class CSVConduit
6
+
7
+ def self.pack(data)
8
+ headers = data[0].attribute_names
9
+ headers.to_csv + data.map{ |rec|
10
+ headers.map{ |header| rec.send(header) }.to_csv(:force_quotes => true)
11
+ }.join
12
+ end
13
+
14
+ def self.unpack(data)
15
+ rows = FasterCSV.parse(data)
16
+ attribs = rows.delete_at(0)
17
+ attribs = attribs.map{|atr| atr.to_sym}
18
+ records = rows.map{|row| Hash[*attribs.zip(row).flatten]}
19
+ end
20
+ end
@@ -0,0 +1,9 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ class YAMLConduit
4
+
5
+ def self.pack(data)
6
+ YAML.dump(data.map(&:attributes))
7
+ end
8
+
9
+ end
@@ -0,0 +1,582 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rubygems'
4
+ require 'benchmark'
5
+ require 'fileutils'
6
+ require 'zip/zip'
7
+ require 'folder_writer'
8
+ require 'iconv'
9
+ require 'ssh_transfer'
10
+ require 'jcode'
11
+
12
+ module DataGateway
13
+ CONDUIT_DIR = File.join(File.dirname(__FILE__), 'conduits')
14
+ @@resource_path = ''
15
+ @@use_resources = false
16
+ class << self
17
+ attr_accessor :logger, :inbox, :outbox, :donebox
18
+
19
+ def use_resources
20
+ @@use_resources
21
+ end
22
+
23
+ def use_resources=(value)
24
+ @@use_resources = value
25
+ end
26
+
27
+ def conduit_for(formatid)
28
+ conduitid = File.basename(formatid.to_s, '.zip').split('.').last
29
+ conduit_class = conduitid.upcase + 'Conduit'
30
+ unless Object.const_defined?(conduit_class)
31
+ conduit_file = conduitid.downcase + '_' + 'conduit.rb'
32
+ require File.join(CONDUIT_DIR, conduit_file)
33
+ end
34
+ Object.const_get(conduit_class)
35
+ end
36
+
37
+ # Retrieve ActiveRecord class for a class name
38
+ def model_for(class_name)
39
+ return nil if class_name.nil?
40
+ # define a new ActiveRecord descendant if not found
41
+ class_name = class_name.to_sym
42
+ unless Object.const_defined?(class_name)
43
+ Object.const_set(class_name, Class.new(ActiveRecord::Base))
44
+ end
45
+ # Retrieve it
46
+ Object.const_get(class_name)
47
+ end
48
+ end
49
+
50
+ class NullEncoder
51
+ def pack(data)
52
+ data
53
+ end
54
+
55
+ def unpack(data)
56
+ data
57
+ end
58
+ end
59
+
60
+ class Encoder
61
+ def initialize(options)
62
+ @pack = Iconv.new(options[:to], options[:from])
63
+ @unpack = Iconv.new(options[:from], options[:to])
64
+ end
65
+
66
+ def pack(data)
67
+ @pack.iconv(data)
68
+ end
69
+
70
+ def unpack(data)
71
+ @unpack.iconv(data)
72
+ end
73
+ end
74
+
75
+ class ColumnMapper
76
+ attr :options
77
+ def initialize(table_mapper, options = {})
78
+ @table_mapper = table_mapper
79
+ @options = options
80
+ end
81
+
82
+ def apply_to(record, value)
83
+ DataGateway.logger.debug "apply_to(#{record.inspect}, #{value}) options #{@options.inspect}"
84
+ send(@options[:method], record, value)
85
+ end
86
+
87
+ # value mappers
88
+ def none(record, value)
89
+ # do nothing
90
+ end
91
+
92
+ def write_to(record, value)
93
+ record[@options[:write_to]] = value
94
+ end
95
+
96
+ def lookup(record, value)
97
+
98
+ result_status = {}
99
+ new_value = lookup_map(value, result_status)
100
+ if result_status[:found]
101
+ write_to(record, new_value)
102
+ else
103
+ @table_mapper.register_fixup(value, self)
104
+ end
105
+ end
106
+
107
+ private
108
+
109
+ def lookup_map(value, result_status)
110
+ DataGateway.logger.debug "lookup_map(#{value}, #{result_status}) options #{@options.inspect}"
111
+ lookup_model = DataGateway.model_for(@options[:table])
112
+ finder = "find_or_create_by_#{@options[:key]}"
113
+ lookup_rec = lookup_model.send(finder, value)
114
+ result_status[:found] = !(lookup_rec.nil?)
115
+ if result_status[:found]
116
+ lookup_rec.send(@options[:value]).to_s # valor mapeado
117
+ else
118
+ nil
119
+ end
120
+ end
121
+
122
+ def solve_fixup(record, value)
123
+ DataGateway.logger.debug "mapper #{@map_method} opt. #{@options.inspect}"
124
+ result_status = {}
125
+ DataGateway.logger.debug "lookup #{record.inspect} -> <#{value}>"
126
+ value = lookup_map(value, result_status)
127
+ if result_status[:found]
128
+ DataGateway.logger.debug "FOUND: #{value}"
129
+ write_to(record, value)
130
+ else
131
+ DataGateway.logger.debug "NOT FOUND!"
132
+ end
133
+ end
134
+ end
135
+
136
+ class TableMapper
137
+ attr :source
138
+ attr :identity
139
+
140
+ def initialize(job, table)
141
+ @job = job
142
+ @source = table
143
+ @target = table
144
+ @column_map = {}
145
+ @identity = [:id]
146
+ @ar_fixups = []
147
+ @force_case = false
148
+ clear_fixups
149
+ end
150
+
151
+ def ignore!
152
+ @target = nil
153
+ end
154
+
155
+ def ignore?
156
+ @target == nil
157
+ end
158
+
159
+ def target(table_name = nil)
160
+ @target = table_name if table_name
161
+ @target
162
+ end
163
+
164
+ def force_downcase
165
+ @force_case = :downcase
166
+ end
167
+
168
+ def force_upcase
169
+ @force_case = :upcase
170
+ end
171
+
172
+ def target_model
173
+ DataGateway.model_for(@target)
174
+ end
175
+
176
+ # DSL syntax sugar for declare column mappers
177
+ def columns(column_map)
178
+ column_map.each_pair do |key, value|
179
+ case value
180
+ when Symbol, String: column_map[key] = write_to(value.to_sym)
181
+ end
182
+ end
183
+ @column_map.merge! column_map
184
+
185
+ @column_map.each_pair do |key, value|
186
+ defaults = column_mapper_defaults(key)
187
+ @column_map[key].options.replace(defaults.merge!(@column_map[key].options))
188
+ #puts "map for #{key} => #{@column_map[key].options.inspect}"
189
+ end
190
+ @column_map
191
+ end
192
+
193
+ def none
194
+ create_column_mapper({:method => :none})
195
+ end
196
+
197
+ def write_to(column)
198
+ create_column_mapper({:method => :write_to, :write_to => column})
199
+ end
200
+
201
+ def lookup(options)
202
+ options[:method] = :lookup
203
+ create_column_mapper(options)
204
+ end
205
+
206
+ def identity_by(*cols)
207
+ @identity = cols
208
+ end
209
+
210
+ # utilities
211
+ def column_mapper_for(column_name)
212
+ @column_map[column_name] ||= create_column_mapper(column_mapper_defaults(column_name))
213
+ end
214
+
215
+ def column_mapper_defaults(column_name)
216
+ column_name = column_name.to_s.send(@force_case).to_sym if @force_case
217
+ case column_name.to_s
218
+ when /(.*)_id$/i: { :method => :lookup,
219
+ :write_to => column_name,
220
+ :table => $1.downcase.camelize.to_sym,
221
+ :key => @job.get_mapper($1.downcase.camelize.to_sym).identity,
222
+ :value => :id
223
+ }
224
+ else { :method => :write_to,
225
+ :write_to => column_name
226
+ }
227
+ end
228
+
229
+ end
230
+
231
+ def update_all(records)
232
+ DataGateway.logger.debug "importing #{records.size} recods into table #{source}"
233
+ klass = target_model
234
+ DataGateway.logger.debug "target class #{klass} (PK: #{klass.primary_key})"
235
+ records.each { |rec|
236
+ rec = map(rec)
237
+ update_rec(rec) unless current_fixups(rec)
238
+ }
239
+ end
240
+
241
+ def clear_fixups
242
+ @fixups = {}
243
+ end
244
+
245
+ def register_fixup(value, col_mapper)
246
+ @ar_fixups << [value, col_mapper]
247
+ end
248
+
249
+ def current_fixups(rec)
250
+ DataGateway.logger.debug "current fixups for: #{rec.inspect}"
251
+ DataGateway.logger.debug @ar_fixups.inspect
252
+ return false if @ar_fixups.empty?
253
+ @fixups[rec] ||= []
254
+ @fixups[rec].concat @ar_fixups
255
+ @ar_fixups = []
256
+ true
257
+ end
258
+
259
+ def solve_fixups
260
+ DataGateway.logger.debug "start solve_fixups #{@fixups.size}"
261
+ @fixups.each_pair { |record, data|
262
+ data.each{|item|
263
+ value = item[0]
264
+ col_mapper = item[1]
265
+ col_mapper.solve_fixup(record, value)
266
+ }
267
+ update_rec(record)
268
+ }
269
+ DataGateway.logger.debug "end solve_fixups"
270
+ ensure
271
+ clear_fixups
272
+ end
273
+
274
+ def update_rec(rec)
275
+ DataGateway.logger.debug "update rec class #{target_model} '->' #{rec.inspect}"
276
+ conditions = identity.map{|key| key.to_s}
277
+ values = conditions.map{|key| rec.delete(key.to_sym)}
278
+ finder = 'find_or_create_by_' + conditions.join('_')
279
+ DataGateway.logger.debug "finder: #{finder}(#{values.join(',')})"
280
+
281
+ ar = target_model.send(finder, *values)
282
+ DataGateway.logger.debug "get: #{ar.inspect}"
283
+ DataGateway.logger.debug "upd atrt: #{rec.inspect}"
284
+ ar.update_attributes(rec)
285
+ return ar[:id]
286
+ end
287
+
288
+ def map(ar)
289
+ new_ar = {}
290
+ ar.each_pair do |key, value|
291
+ column_mapper_for(key).apply_to(new_ar, value)
292
+ end
293
+ new_ar
294
+ end
295
+ private
296
+ def create_column_mapper( options)
297
+ ColumnMapper.new(self, options)
298
+ end
299
+ end
300
+
301
+ class Job
302
+ attr_accessor :format_id
303
+ attr :transfer
304
+ attr :name
305
+
306
+ def initialize(name, &block)
307
+ @name = name
308
+ DataGateway.logger.info "DataGateway.use_resources #{DataGateway.use_resources}"
309
+ @use_resources = DataGateway.use_resources
310
+ DataGateway.logger.info "resources will be imported!" if @use_resources
311
+ @encoder = NullEncoder.new
312
+ @transfer = nil
313
+ instance_eval(&block) if block_given?
314
+ end
315
+
316
+ def transfer_for(options)
317
+ options.merge! :logger => DataGateway.logger
318
+ @transfer = SSHTransfer.new(options)
319
+ end
320
+
321
+ def run
322
+ AppConfig.logger.info "running job #{name}"
323
+ do_run
324
+ AppConfig.logger.info "job #{name} finished"
325
+ end
326
+
327
+ private
328
+ # only for debug purpouse
329
+ def use_resources(bool)
330
+ @use_resources = bool
331
+ end
332
+
333
+ def encode(options)
334
+ @encoder = Encoder.new(options)
335
+ end
336
+
337
+ def pack(data)
338
+ DataGateway.conduit_for(self.format_id).pack(data)
339
+ end
340
+
341
+ def unpack(data)
342
+ DataGateway.conduit_for(format_id).unpack(data)
343
+ end
344
+
345
+ def dbchange_point
346
+ @dbchange_point ||= DBChangePoint.class_for(ActiveRecord::Base.connection).new
347
+ end
348
+ end
349
+
350
+ class Import < Job
351
+ def initialize(name, &block)
352
+ @mappers = {} #import mappers
353
+ super
354
+ end
355
+
356
+ def do_run
357
+ transfer.download if transfer
358
+
359
+ Dir.glob(File.join(DataGateway::inbox, '*.zip')).sort.each do |filename|
360
+ DataGateway.logger.info "Importing #{filename}"
361
+ import_file(filename)
362
+ FileUtils.mv(filename, DataGateway::donebox)
363
+ DataGateway.logger.info "Import done for #{filename}"
364
+ end
365
+ end
366
+
367
+ def get_mapper(table)
368
+ @mappers[table] ||= TableMapper.new(self, table)
369
+ end
370
+
371
+ private # DSL
372
+
373
+ def download_from(options)
374
+ options.merge! 'local_files' => DataGateway.inbox
375
+ transfer_for(options)
376
+ end
377
+
378
+ def importing(table, &block)
379
+ get_mapper(table).instance_eval(&block) if block_given?
380
+ end
381
+
382
+ private
383
+
384
+ def import_file(file_name)
385
+ self.format_id = file_name
386
+ clear_fixups
387
+ begin
388
+ Zip::ZipInputStream::open(file_name) { |io|
389
+ while (entry = io.get_next_entry)
390
+ content = io.read
391
+ fname = entry.name
392
+ if fname =~ /^attach\//
393
+ fname = fname.gsub(/^attach\//, '')
394
+ import_attach(fname, content)
395
+ else
396
+ class_name = File.basename(fname, '.*').downcase.camelize.to_sym
397
+ import_class(class_name, content)
398
+ end
399
+ end
400
+ }
401
+ ensure
402
+ solve_fixups
403
+ end
404
+ end
405
+
406
+ def clear_fixups
407
+ @mappers.each_value {|mapper|
408
+ mapper.clear_fixups
409
+ }
410
+ end
411
+
412
+ def solve_fixups
413
+ @mappers.each_value {|mapper|
414
+ mapper.solve_fixups
415
+ }
416
+ end
417
+
418
+ def import_class(class_name, data)
419
+ DataGateway.logger.info "Importing data for #{class_name}"
420
+ mapper = get_mapper(class_name)
421
+ if mapper.ignore?
422
+ DataGateway.logger.info "Ignored"
423
+ return
424
+ end
425
+ mapper.update_all unpack(data)
426
+ #p dbchange_point.capture_for(klass)
427
+ DataGateway.logger.info "done for #{class_name}"
428
+ end
429
+
430
+ def import_attach(fname, content)
431
+ return unless @use_resources
432
+ DataGateway.logger.info "importing attach #{fname}"
433
+ target_file = File.expand_path(fname, DataGateway.resource_path)
434
+ FileUtils.mkdir_p(File.dirname(target_file))
435
+ File.open(target_file, "wb") {|f| f.write content}
436
+ end
437
+ end
438
+
439
+ class Export < Job
440
+ def initialize(name, &block)
441
+ @exports = {}
442
+ @exports_order = []
443
+ export_to :csv #default format
444
+ super
445
+ end
446
+
447
+ def each
448
+ tables = @exports_order
449
+ tables.each { |cname|
450
+ yield cname, export_class(cname)
451
+ }
452
+ end
453
+
454
+ def export_folder
455
+ @export_folder ||= create_folder
456
+ end
457
+
458
+ def close_export_folder
459
+ @export_folder.close if @export_folder
460
+ end
461
+
462
+ def do_run
463
+ DataGateway.logger.debug 'resource_path ' + DataGateway.resource_path
464
+ begin
465
+ self.each { |cname, records|
466
+ file_name = "#{cname}.#{self.format_id}"
467
+ DataGateway.logger.debug "Exporting archive #{file_name}"
468
+ #records.each { |rec|
469
+ # rec.each {|key, value|
470
+ # rec[key] = @encoder.pack(rec[key]) unless (key =~ /^img/i)
471
+ # }
472
+ #}
473
+ write_file(file_name, records)
474
+ }
475
+ ensure
476
+ close_export_folder
477
+ end
478
+
479
+ transfer.upload if transfer
480
+ end
481
+
482
+ private # DSL
483
+ def upload_to(options)
484
+ options.merge! 'local_files' => File.join(DataGateway.outbox, "*.zip")
485
+ transfer_for(options)
486
+ end
487
+
488
+ def exporting(table_name, options = {})
489
+ @exports_order << table_name
490
+ @exports[table_name] = options
491
+ end
492
+
493
+ def export_to(format)
494
+ self.format_id = format
495
+ end
496
+ private
497
+ def create_folder(outbox = DataGateway::outbox)
498
+ FileUtils.mkpath(outbox)
499
+ stamp = Time.now.strftime("%Y%m%d%H%M%S")
500
+ folder_name = File.join(outbox, "#{stamp}.#{self.format_id}")
501
+ (APP_ENV == 'development') ? FolderWriter.new(folder_name) :
502
+ ZipFolderWriter.new(folder_name)
503
+ end
504
+
505
+ def export_class(cname)
506
+ result = []
507
+ bm = Benchmark.measure {
508
+ klass = DataGateway.model_for(cname.to_s.downcase.camelize)
509
+ DataGateway.logger.info "Exporting #{klass} (#{self.format_id})"
510
+ records = klass.find(:all, @exports[cname])
511
+ records.each { |r| export_attachments(r) } if @use_resources
512
+ result = pack(records) unless records.empty?
513
+ }
514
+ DataGateway.logger.info "data exported in #{bm}"
515
+ result
516
+ end
517
+
518
+ def export_attachments(record)
519
+ record.attributes.keys.select { |key| key =~ /^img/i }.each { |key|
520
+ file = record.attributes[key]
521
+ #TODO Si el path del attach es absoluto, quiza debemos tener cuidado al descomprimir el zip
522
+ next if file.blank?
523
+ path = File.expand_path(file, DataGateway.resource_path)
524
+ DataGateway.logger.info "exporting attach #{path}"
525
+ #TODO Si el fichero ya se exporto solo en este vuelta, no repetir!
526
+ next unless File.exist?(path)
527
+ attach_data = File.open(path, 'rb') { |f| f.read }
528
+ write_file(File.join('attach', file), attach_data, 'b')
529
+ }
530
+ end
531
+
532
+ def write_file(file_name, data, options = '')
533
+ op = StringIO.new("", "w")
534
+ op.puts data
535
+ if (options == 'b')
536
+ export_folder.open(file_name, "w" + options){|f| f.puts op.string}
537
+ else
538
+ export_folder.open(file_name, "w" + options){|f| f.puts @encoder.pack(op.string)}
539
+ end
540
+ end
541
+ end
542
+
543
+ # DataGateway DSL
544
+ def resource_path(path= nil)
545
+ if path
546
+ @@resource_path = File.expand_path(path)
547
+ else
548
+ @@resource_path || ''
549
+ end
550
+ end
551
+
552
+ def db_connection(conn)
553
+ ActiveRecord::Base.establish_connection conn
554
+ if conn['nax_empresa']
555
+ NAX::ActiveRecord::Base.establish_connection conn if NAX::ActiveRecord::Base.current_connection.nil?
556
+ end
557
+ end
558
+
559
+ def nax_connection(connection)
560
+ NAX::ActiveRecord::Base.establish_connection connection
561
+ end
562
+
563
+ def import(name, &block)
564
+ name = name.to_sym
565
+ self.class.class_eval {
566
+ attr_reader name
567
+ }
568
+ new_job = Import.new(name, &block)
569
+ instance_variable_set('@' + name.to_s, new_job)
570
+ return new_job
571
+ end
572
+
573
+ def export(name, &block)
574
+ name = name.to_sym
575
+ self.class.class_eval {
576
+ attr_reader name
577
+ }
578
+ new_job = Export.new(name, &block)
579
+ instance_variable_set('@' + name.to_s, new_job)
580
+ return new_job
581
+ end
582
+ end
@@ -0,0 +1,27 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ class DBChangePoint #memento
4
+ def self.class_for(connection)
5
+ cname = connection.class.name.split('::').last
6
+ cname = cname.chomp('Adapter') + 'ChangePoint'
7
+ Object.const_get(cname)
8
+ end
9
+ end
10
+
11
+ class SQLServerChangePoint #memento
12
+ def initialize
13
+ @change_point = {}
14
+ end
15
+
16
+ def capture_for(arclass)
17
+ @change_point[arclass.name] = arclass.connection.select_value("select @@dbts")
18
+ end
19
+
20
+ def changed_point_for(arclass)
21
+ @change_point[arclass.name] ||= [0,0,0,0] #binary(8) in MSSQLServer
22
+ end
23
+
24
+ def changed_condition_for(arclass)
25
+ ['DBREV > ?', @change_point[arclass.name].to_int]
26
+ end
27
+ end