GFunk911-dataload 0.3.4 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/VERSION.yml CHANGED
@@ -1,4 +1,4 @@
1
1
  ---
2
- :patch: 4
2
+ :patch: 0
3
3
  :major: 0
4
- :minor: 3
4
+ :minor: 8
@@ -0,0 +1,18 @@
1
+ class BatchInsert
2
+ include FromHash
3
+ attr_accessor :rows, :table_name
4
+ fattr(:column_names) { rows.first.sorted_column_names }
5
+ fattr(:values_sql) do
6
+ "VALUES " + rows.map { |x| x.insert_values_sql }.join(", ")
7
+ end
8
+ fattr(:columns_sql) do
9
+ "(" + column_names.join(", ") + ")"
10
+ end
11
+ fattr(:insert_sql) do
12
+ "INSERT into #{table_name} #{columns_sql} #{values_sql};"
13
+ end
14
+ def insert!
15
+ ActiveRecord::Base.connection.execute(insert_sql)
16
+ end
17
+ end
18
+
@@ -0,0 +1,7 @@
1
+ class Column
2
+ include FromHash
3
+ attr_accessor :target_name, :blk
4
+ def target_value(row)
5
+ row.instance_eval(&blk)
6
+ end
7
+ end
@@ -0,0 +1,28 @@
1
+ class MasterLoaderDSL
2
+ fattr(:master) { MasterLoader.instance }
3
+ def initialize(&b)
4
+ @blk = b
5
+ instance_eval(&b)
6
+ end
7
+ def database(ops)
8
+ master.db_ops = ops
9
+ end
10
+ def load_order(*tables)
11
+ master.raw_table_load_order = tables.flatten
12
+ end
13
+ def delete_order(*tables)
14
+ master.raw_table_delete_order = tables.flatten
15
+ end
16
+ def block_size(n)
17
+ master.block_size = n
18
+ end
19
+ def run!
20
+ master.run!
21
+ end
22
+ end
23
+
24
+ def master_dataload(&b)
25
+ handle_errors do
26
+ MasterLoaderDSL.new(&b).run!
27
+ end
28
+ end
@@ -0,0 +1,52 @@
1
+ class Both
2
+ include FromHash
3
+ fattr(:objs) { [] }
4
+ def method_missing(sym,*args,&b)
5
+ objs.each { |x| x.send(sym,*args,&b) }
6
+ end
7
+ end
8
+
9
+ class TableLoaderDSL
10
+ fattr(:table_name) { loader.table_name }
11
+ fattr(:loader) { TableLoader.new }
12
+ fattr(:manager) { TableManager.new }
13
+ fattr(:both) { Both.new(:objs => [loader,manager]) }
14
+ def master
15
+ MasterLoader.instance
16
+ end
17
+ def initialize(&b)
18
+ @blk = b
19
+ instance_eval(&@blk)
20
+ master.add(self)
21
+ end
22
+ def column(name,type,&blk)
23
+ blk ||= lambda { |x| x.send(name) }
24
+ loader.columns << Column.new(:target_name => name, :blk => blk)
25
+ end
26
+ def method_missing(sym,*args,&b)
27
+ if [:string, :text, :integer, :float, :decimal, :datetime, :timestamp, :time, :date, :binary, :boolean].include?(sym)
28
+ column(args.first,sym,&b)
29
+ else
30
+ super(sym,*args,&b)
31
+ end
32
+ end
33
+ def source(file)
34
+ loader.source_filename = file
35
+ end
36
+ def table(name)
37
+ both.table_name = name
38
+ end
39
+ def run!
40
+ manager.delete_rows! if @delete_existing_rows
41
+ loader.load!
42
+ end
43
+ def delimiter(x)
44
+ loader.delimiter = x
45
+ end
46
+ end
47
+
48
+ def table_dataload(&b)
49
+ handle_errors do
50
+ TableLoaderDSL.new(&b)
51
+ end
52
+ end
@@ -0,0 +1,9 @@
1
+ class Hash
2
+ def sorted_column_names
3
+ keys.sort_by { |x| x.to_s }
4
+ end
5
+ def insert_values_sql
6
+ res = sorted_column_names.map { |x| "'#{self[x]}'" }.join(",")
7
+ "(#{res})"
8
+ end
9
+ end
@@ -0,0 +1,3 @@
1
+ def enum(*args,&b)
2
+ Enumerable::Enumerator.new(*args,&b)
3
+ end
@@ -0,0 +1,25 @@
1
+ class FasterCSV::Row
2
+ def method_missing(sym,*args,&b)
3
+ if self[sym.to_s]
4
+ self[sym.to_s].safe_to_num
5
+ else
6
+ super(sym,*args,&b)
7
+ end
8
+ end
9
+ end
10
+
11
+ class FasterCSV
12
+ def self.each(*args,&b)
13
+ foreach(*args,&b)
14
+ end
15
+ end
16
+
17
+ class Object
18
+ def safe_to_num
19
+ if self =~ /^\d+$/
20
+ to_i
21
+ else
22
+ self
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,35 @@
1
+ class MasterLoader
2
+ include Singleton
3
+ attr_accessor_nn :raw_table_load_order, :db_ops, :block_size
4
+ fattr(:raw_table_delete_order) { raw_table_load_order.reverse }
5
+ fattr(:tables_in_load_order) do
6
+ raw_table_load_order.map { |x| table_hash[x.to_s] }
7
+ end
8
+ fattr(:tables_in_delete_order) do
9
+ raw_table_delete_order.map { |x| table_hash[x.to_s] }
10
+ end
11
+ fattr(:table_hash) { {} }
12
+ def add(tl)
13
+ self.table_hash[tl.table_name.to_s] = tl
14
+ end
15
+ def delete_rows!
16
+ tables_in_delete_order.each { |t| t.manager.delete_rows! }
17
+ end
18
+ def load_rows!
19
+ tables_in_load_order.each { |t| t.loader.load! }
20
+ end
21
+ def run!
22
+ tables_in_load_order.each do |t|
23
+ t.loader.block_size = block_size
24
+ end
25
+ tm("MasterLoader run") do
26
+ connect!
27
+ delete_rows!
28
+ load_rows!
29
+ end
30
+ end
31
+ def connect!
32
+ ActiveRecord::Base.establish_connection(db_ops)
33
+ Dataload.log "Established Connection"
34
+ end
35
+ end
@@ -0,0 +1,18 @@
1
+ class DataloadMigration < ActiveRecord::Migration
2
+ class << self
3
+ attr_accessor :cols, :table_name, :b
4
+ include FromHash
5
+ end
6
+ def self.new_migration(ops,&b)
7
+ cls = Class.new(DataloadMigration)
8
+ cls.from_hash(ops)
9
+ cls.b = b
10
+ cls.class_eval do
11
+ def self.up
12
+ instance_eval(&b)
13
+ Dataload.log "Created table #{table_name}"
14
+ end
15
+ end
16
+ cls
17
+ end
18
+ end
@@ -1,24 +1,56 @@
1
1
  require 'rubygems'
2
- require 'dataload'
2
+ require File.dirname(__FILE__) + '/../dataload'
3
3
 
4
4
  #setup the sample source file
5
- source_filename = File.dirname(__FILE__) + "/sample_source.csv"
5
+ source_filename = File.dirname(__FILE__) + "/../../tmp/sample_source.csv"
6
+ db_path = File.dirname(__FILE__) + "/../../tmp/sample.sqlite3"
7
+
6
8
  source_text = <<EOF
7
- name,age,city,state
8
9
  Bob Smith,24,Atlanta,GA
9
10
  Jane Doe,35,Buffalo,NY
10
11
  Evan Stein,31,Princeton,NJ
11
12
  EOF
13
+ source_text = "name,age,city,state\n" + (1..10000).map { source_text }.join
12
14
  File.create(source_filename,source_text)
13
15
 
14
16
  #load into a database, creating the table if needed
15
- dataload do
17
+ table_dataload do
18
+ # csv file the data is being sourced from
16
19
  source source_filename
17
- database :adapter => 'sqlite3', :database => "db.sqlite3", :timeout => 5000
20
+
21
+ # database/table the data should be loaded into.
22
+ # the table will be created if it does not already exist
23
+ #database :adapter => 'sqlite3', :database => db_path, :timeout => 5000
24
+ #database :adapter => 'sqlserver', :host => '192.168.1.49', :username => 'pci-tae', :password => 'fgfgf', :database => 'fgfgfgf'
18
25
  table 'people'
26
+
27
+ #field delimiter in source file
28
+ delimiter ","
29
+
30
+ # columns in the new table
31
+ # available types are string, text, integer, float, decimal, datetime, timestamp, time, date, binary, boolean
32
+ #
33
+ # The first argument is the name of the new column
34
+ # The block describes the value to be populated
35
+ #
36
+ # Example: string(:full_name) { name }
37
+ # This creates a field 'full_name' of type string in the new table, and populates it with the name field from the csv
38
+ #
39
+ # Example: boolean(:is_tall) { height_in_inches.to_i > 74 }
40
+ # Creates a field 'is_tall' and populates with true if the height_in_inches field in the csv is greater than 74
41
+ #
42
+ # A column without a block just passes through the same field in the csv
43
+ # integer(:age) creates an integer field 'age' in the new table, populated with the age field in the csv
19
44
  string(:full_name) { name }
20
45
  string(:first_name) { name.split[0] }
21
46
  string(:last_name) { name.split[1] }
22
47
  integer(:age)
23
48
  string(:city_state) { "#{city}, #{state}" }
49
+ end
50
+
51
+ master_dataload do
52
+ #database :adapter => 'sqlite3', :database => db_path, :timeout => 5000
53
+ database :adapter => 'mysql', :database => 'dataload_test', :username => 'root'
54
+ load_order :people
55
+ block_size 1000
24
56
  end
@@ -0,0 +1,72 @@
1
+ require 'rubygems'
2
+ begin
3
+ require "/Code/mharris_ext/lib/mharris_ext"
4
+ rescue
5
+ require 'mharris_ext'
6
+ end
7
+ require 'fastercsv'
8
+ require 'activerecord'
9
+ require 'facets/enumerable'
10
+
11
+ %w(migration column table_module batch_insert).each { |x| require File.dirname(__FILE__) + "/#{x}" }
12
+ Dir[File.dirname(__FILE__) + "/ext/*.rb"].each { |x| require x }
13
+
14
+ class Object
15
+ def fattr_tm(name,&b)
16
+ fattr(name) do
17
+ tm(name) do
18
+ instance_eval(&b)
19
+ end
20
+ end
21
+ end
22
+ end
23
+
24
+ class TableLoader
25
+ include TableModule
26
+ attr_accessor_nn :source_filename
27
+ fattr(:delimiter) { "," }
28
+ fattr(:block_size) { 1000 }
29
+ fattr(:columns) { [] }
30
+ fattr(:source_row_groups) do
31
+ e = enum(FasterCSV,:foreach,source_filename,:headers => true, :col_sep => delimiter)
32
+ enum(e,:each_by,block_size)
33
+ end
34
+ def target_hash_for_row(row)
35
+ columns.inject({}) { |h,col| h.merge(col.target_name => col.target_value(row)) }
36
+ end
37
+ def target_hashes(rows)
38
+ rows.map { |x| target_hash_for_row(x) }
39
+ end
40
+ def target_hash_groups
41
+ source_row_groups.each_with_index do |rows,i|
42
+ yield(target_hashes(rows),i*block_size+rows.size)
43
+ end
44
+ end
45
+ def load!
46
+ migrate!
47
+ Dataload.log "Starting load of table '#{table_name}'"
48
+ total = 0
49
+ target_hash_groups do |hs,num_inserted|
50
+ BatchInsert.new(:rows => hs, :table_name => table_name).insert!
51
+ Dataload.log "Inserted #{block_size} rows into table '#{table_name}'. Total of #{num_inserted} rows inserted."
52
+ total = num_inserted
53
+ end
54
+ Dataload.log "Finished load of table '#{table_name}'. Loaded #{total} rows."
55
+ end
56
+ end
57
+
58
+ module TableCreation
59
+ fattr(:migration) do
60
+ DataloadMigration.new_migration(:cols => columns, :table_name => table_name) do
61
+ create_table table_name do |t|
62
+ cols.each do |col|
63
+ t.column col.target_name, :string
64
+ end
65
+ end
66
+ end
67
+ end
68
+ def migrate!
69
+ migration.migrate(:up) unless ar_cls.table_exists?
70
+ end
71
+ end
72
+ TableLoader.send(:include,TableCreation)
@@ -0,0 +1,9 @@
1
+ class TableManager
2
+ include TableModule
3
+ def delete_rows!
4
+ return unless ar_cls.table_exists?
5
+ Dataload.log "Deleting #{ar_cls.count} rows from table '#{table_name}'"
6
+ ar_cls.connection.execute("DELETE from #{table_name}")
7
+ Dataload.log "Deleted rows from table '#{table_name}'"
8
+ end
9
+ end
@@ -0,0 +1,7 @@
1
+ module TableModule
2
+ attr_accessor_nn :table_name
3
+ fattr(:ar_cls) do
4
+ Class.new(ActiveRecord::Base).tap { |x| x.set_table_name(table_name) }
5
+ end
6
+ end
7
+
data/lib/dataload.rb CHANGED
@@ -1 +1,34 @@
1
- require File.dirname(__FILE__) + "/dataload/loader"
1
+ def handle_errors
2
+ yield
3
+ rescue => exp
4
+ msg = [exp.message,exp.backtrace.join("\n")].join("\n")
5
+ Dataload.log msg
6
+ puts exp.message
7
+ raise "Error occured and logged. Exiting."
8
+ end
9
+
10
+ require File.dirname(__FILE__) + "/dataload/table_loader"
11
+ require File.dirname(__FILE__) + "/dataload/table_manager"
12
+ require File.dirname(__FILE__) + "/dataload/master_loader"
13
+ Dir[File.dirname(__FILE__) + "/dataload/dsl/*.rb"].each { |x| require x }
14
+
15
+ class Dataload
16
+ class << self
17
+ fattr(:logger) { DataloadLogger.new }
18
+ def log(str)
19
+ logger.log(str)
20
+ end
21
+ end
22
+ end
23
+
24
+ class DataloadLogger
25
+ def log(str)
26
+ File.append(filename,"#{Time.now.short_dt} #{str}\n")
27
+ end
28
+ fattr(:filename) do
29
+ t = Time.now.strftime("%Y%m%d%H%M%S")
30
+ res = File.expand_path("dataload_#{t}.log")
31
+ puts "Logging to #{res}"
32
+ res
33
+ end
34
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: GFunk911-dataload
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.4
4
+ version: 0.8.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mike Harris
@@ -52,8 +52,19 @@ extra_rdoc_files:
52
52
  - README.rdoc
53
53
  - LICENSE
54
54
  files:
55
- - lib/dataload/loader.rb
55
+ - lib/dataload/batch_insert.rb
56
+ - lib/dataload/column.rb
57
+ - lib/dataload/dsl/master_loader_dsl.rb
58
+ - lib/dataload/dsl/table_loader_dsl.rb
59
+ - lib/dataload/ext/active_record.rb
60
+ - lib/dataload/ext/enumerator.rb
61
+ - lib/dataload/ext/faster_csv.rb
62
+ - lib/dataload/master_loader.rb
63
+ - lib/dataload/migration.rb
56
64
  - lib/dataload/sample.rb
65
+ - lib/dataload/table_loader.rb
66
+ - lib/dataload/table_manager.rb
67
+ - lib/dataload/table_module.rb
57
68
  - lib/dataload.rb
58
69
  - spec/dataload_spec.rb
59
70
  - spec/spec_helper.rb
@@ -1,126 +0,0 @@
1
- require 'rubygems'
2
- require 'mharris_ext'
3
- require 'fastercsv'
4
- require 'activerecord'
5
-
6
- class FasterCSV::Row
7
- def method_missing(sym,*args,&b)
8
- if self[sym.to_s]
9
- self[sym.to_s]
10
- else
11
- super(sym,*args,&b)
12
- end
13
- end
14
- end
15
-
16
- class Loader
17
- fattr(:columns) { [] }
18
- attr_accessor :source_filename, :db_ops, :table_name
19
- fattr(:source_rows) do
20
- res = []
21
- FasterCSV.foreach(source_filename, :headers => true) do |row|
22
- res << row
23
- end
24
- res
25
- end
26
- def target_hash_for_row(row)
27
- h = {}
28
- columns.each do |col|
29
- h[col.target_name] = col.target_value(row)
30
- end
31
- h
32
- end
33
- def target_hashes
34
- source_rows.map { |x| target_hash_for_row(x) }
35
- end
36
- def target_column_names
37
- columns.map { |x| x.target_name }
38
- end
39
- def new_struct
40
- Struct.new(*target_column_names)
41
- end
42
- fattr(:migration) do
43
- raise "must define table" unless table_name
44
- cls = Class.new(ActiveRecord::Migration)
45
- class << cls
46
- attr_accessor :cols, :table_name
47
- end
48
- cls.cols = columns
49
- cls.table_name = table_name
50
- puts "Table: #{table_name}"
51
- cls.class_eval do
52
- def self.up
53
- create_table table_name do |t|
54
- cols.each do |col|
55
- t.column col.target_name, :string
56
- end
57
- end
58
- end
59
- end
60
- cls
61
- end
62
- fattr(:ar) do
63
- cls = Class.new(ActiveRecord::Base)
64
- cls.send(:set_table_name, table_name)
65
- cls
66
- end
67
- def migrate!
68
- ar.find(:first)
69
- rescue => exp
70
- puts "find failed"
71
- puts exp.inspect
72
- migration.migrate(:up)
73
- end
74
- fattr(:ar_objects) do
75
- target_hashes.map { |h| ar.new(h) }
76
- end
77
- def load!
78
- ActiveRecord::Base.establish_connection(db_ops)
79
- migrate!
80
- ar_objects.each { |x| x.save! }
81
- end
82
- end
83
-
84
- class Column
85
- include FromHash
86
- attr_accessor :target_name, :blk
87
- def target_value(row)
88
- if blk.arity == 1
89
- blk.call(row)
90
- else
91
- row.instance_eval(&blk)
92
- end
93
- end
94
- end
95
-
96
- class LoaderDSL
97
- fattr(:loader) { Loader.new }
98
- def column(name,type,&blk)
99
- blk ||= lambda { |x| x.send(name) }
100
- loader.columns << Column.new(:target_name => name, :blk => blk)
101
- end
102
- def method_missing(sym,*args,&b)
103
- if [:string, :text, :integer, :float, :decimal, :datetime, :timestamp, :time, :date, :binary, :boolean].include?(sym)
104
- column(args.first,sym,&b)
105
- else
106
- super(sym,*args,&b)
107
- end
108
- end
109
- def source(file)
110
- loader.source_filename = file
111
- end
112
- def database(ops)
113
- loader.db_ops = ops
114
- end
115
- def table(name)
116
- loader.table_name = name
117
- end
118
- end
119
-
120
- def dataload(&b)
121
- dsl = LoaderDSL.new
122
- dsl.instance_eval(&b)
123
- dsl.loader.load!
124
- puts "Row Count: " + dsl.loader.ar.find(:all).size.to_s
125
- end
126
-