GFunk911-dataload 0.3.4 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION.yml +2 -2
- data/lib/dataload/batch_insert.rb +18 -0
- data/lib/dataload/column.rb +7 -0
- data/lib/dataload/dsl/master_loader_dsl.rb +28 -0
- data/lib/dataload/dsl/table_loader_dsl.rb +52 -0
- data/lib/dataload/ext/active_record.rb +9 -0
- data/lib/dataload/ext/enumerator.rb +3 -0
- data/lib/dataload/ext/faster_csv.rb +25 -0
- data/lib/dataload/master_loader.rb +35 -0
- data/lib/dataload/migration.rb +18 -0
- data/lib/dataload/sample.rb +37 -5
- data/lib/dataload/table_loader.rb +72 -0
- data/lib/dataload/table_manager.rb +9 -0
- data/lib/dataload/table_module.rb +7 -0
- data/lib/dataload.rb +34 -1
- metadata +13 -2
- data/lib/dataload/loader.rb +0 -126
data/VERSION.yml
CHANGED
@@ -0,0 +1,18 @@
|
|
1
|
+
class BatchInsert
|
2
|
+
include FromHash
|
3
|
+
attr_accessor :rows, :table_name
|
4
|
+
fattr(:column_names) { rows.first.sorted_column_names }
|
5
|
+
fattr(:values_sql) do
|
6
|
+
"VALUES " + rows.map { |x| x.insert_values_sql }.join(", ")
|
7
|
+
end
|
8
|
+
fattr(:columns_sql) do
|
9
|
+
"(" + column_names.join(", ") + ")"
|
10
|
+
end
|
11
|
+
fattr(:insert_sql) do
|
12
|
+
"INSERT into #{table_name} #{columns_sql} #{values_sql};"
|
13
|
+
end
|
14
|
+
def insert!
|
15
|
+
ActiveRecord::Base.connection.execute(insert_sql)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
@@ -0,0 +1,28 @@
|
|
1
|
+
class MasterLoaderDSL
|
2
|
+
fattr(:master) { MasterLoader.instance }
|
3
|
+
def initialize(&b)
|
4
|
+
@blk = b
|
5
|
+
instance_eval(&b)
|
6
|
+
end
|
7
|
+
def database(ops)
|
8
|
+
master.db_ops = ops
|
9
|
+
end
|
10
|
+
def load_order(*tables)
|
11
|
+
master.raw_table_load_order = tables.flatten
|
12
|
+
end
|
13
|
+
def delete_order(*tables)
|
14
|
+
master.raw_table_delete_order = tables.flatten
|
15
|
+
end
|
16
|
+
def block_size(n)
|
17
|
+
master.block_size = n
|
18
|
+
end
|
19
|
+
def run!
|
20
|
+
master.run!
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def master_dataload(&b)
|
25
|
+
handle_errors do
|
26
|
+
MasterLoaderDSL.new(&b).run!
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
class Both
|
2
|
+
include FromHash
|
3
|
+
fattr(:objs) { [] }
|
4
|
+
def method_missing(sym,*args,&b)
|
5
|
+
objs.each { |x| x.send(sym,*args,&b) }
|
6
|
+
end
|
7
|
+
end
|
8
|
+
|
9
|
+
class TableLoaderDSL
|
10
|
+
fattr(:table_name) { loader.table_name }
|
11
|
+
fattr(:loader) { TableLoader.new }
|
12
|
+
fattr(:manager) { TableManager.new }
|
13
|
+
fattr(:both) { Both.new(:objs => [loader,manager]) }
|
14
|
+
def master
|
15
|
+
MasterLoader.instance
|
16
|
+
end
|
17
|
+
def initialize(&b)
|
18
|
+
@blk = b
|
19
|
+
instance_eval(&@blk)
|
20
|
+
master.add(self)
|
21
|
+
end
|
22
|
+
def column(name,type,&blk)
|
23
|
+
blk ||= lambda { |x| x.send(name) }
|
24
|
+
loader.columns << Column.new(:target_name => name, :blk => blk)
|
25
|
+
end
|
26
|
+
def method_missing(sym,*args,&b)
|
27
|
+
if [:string, :text, :integer, :float, :decimal, :datetime, :timestamp, :time, :date, :binary, :boolean].include?(sym)
|
28
|
+
column(args.first,sym,&b)
|
29
|
+
else
|
30
|
+
super(sym,*args,&b)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
def source(file)
|
34
|
+
loader.source_filename = file
|
35
|
+
end
|
36
|
+
def table(name)
|
37
|
+
both.table_name = name
|
38
|
+
end
|
39
|
+
def run!
|
40
|
+
manager.delete_rows! if @delete_existing_rows
|
41
|
+
loader.load!
|
42
|
+
end
|
43
|
+
def delimiter(x)
|
44
|
+
loader.delimiter = x
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def table_dataload(&b)
|
49
|
+
handle_errors do
|
50
|
+
TableLoaderDSL.new(&b)
|
51
|
+
end
|
52
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
class FasterCSV::Row
|
2
|
+
def method_missing(sym,*args,&b)
|
3
|
+
if self[sym.to_s]
|
4
|
+
self[sym.to_s].safe_to_num
|
5
|
+
else
|
6
|
+
super(sym,*args,&b)
|
7
|
+
end
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
class FasterCSV
|
12
|
+
def self.each(*args,&b)
|
13
|
+
foreach(*args,&b)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
class Object
|
18
|
+
def safe_to_num
|
19
|
+
if self =~ /^\d+$/
|
20
|
+
to_i
|
21
|
+
else
|
22
|
+
self
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
class MasterLoader
|
2
|
+
include Singleton
|
3
|
+
attr_accessor_nn :raw_table_load_order, :db_ops, :block_size
|
4
|
+
fattr(:raw_table_delete_order) { raw_table_load_order.reverse }
|
5
|
+
fattr(:tables_in_load_order) do
|
6
|
+
raw_table_load_order.map { |x| table_hash[x.to_s] }
|
7
|
+
end
|
8
|
+
fattr(:tables_in_delete_order) do
|
9
|
+
raw_table_delete_order.map { |x| table_hash[x.to_s] }
|
10
|
+
end
|
11
|
+
fattr(:table_hash) { {} }
|
12
|
+
def add(tl)
|
13
|
+
self.table_hash[tl.table_name.to_s] = tl
|
14
|
+
end
|
15
|
+
def delete_rows!
|
16
|
+
tables_in_delete_order.each { |t| t.manager.delete_rows! }
|
17
|
+
end
|
18
|
+
def load_rows!
|
19
|
+
tables_in_load_order.each { |t| t.loader.load! }
|
20
|
+
end
|
21
|
+
def run!
|
22
|
+
tables_in_load_order.each do |t|
|
23
|
+
t.loader.block_size = block_size
|
24
|
+
end
|
25
|
+
tm("MasterLoader run") do
|
26
|
+
connect!
|
27
|
+
delete_rows!
|
28
|
+
load_rows!
|
29
|
+
end
|
30
|
+
end
|
31
|
+
def connect!
|
32
|
+
ActiveRecord::Base.establish_connection(db_ops)
|
33
|
+
Dataload.log "Established Connection"
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
class DataloadMigration < ActiveRecord::Migration
|
2
|
+
class << self
|
3
|
+
attr_accessor :cols, :table_name, :b
|
4
|
+
include FromHash
|
5
|
+
end
|
6
|
+
def self.new_migration(ops,&b)
|
7
|
+
cls = Class.new(DataloadMigration)
|
8
|
+
cls.from_hash(ops)
|
9
|
+
cls.b = b
|
10
|
+
cls.class_eval do
|
11
|
+
def self.up
|
12
|
+
instance_eval(&b)
|
13
|
+
Dataload.log "Created table #{table_name}"
|
14
|
+
end
|
15
|
+
end
|
16
|
+
cls
|
17
|
+
end
|
18
|
+
end
|
data/lib/dataload/sample.rb
CHANGED
@@ -1,24 +1,56 @@
|
|
1
1
|
require 'rubygems'
|
2
|
-
require 'dataload'
|
2
|
+
require File.dirname(__FILE__) + '/../dataload'
|
3
3
|
|
4
4
|
#setup the sample source file
|
5
|
-
source_filename = File.dirname(__FILE__) + "/sample_source.csv"
|
5
|
+
source_filename = File.dirname(__FILE__) + "/../../tmp/sample_source.csv"
|
6
|
+
db_path = File.dirname(__FILE__) + "/../../tmp/sample.sqlite3"
|
7
|
+
|
6
8
|
source_text = <<EOF
|
7
|
-
name,age,city,state
|
8
9
|
Bob Smith,24,Atlanta,GA
|
9
10
|
Jane Doe,35,Buffalo,NY
|
10
11
|
Evan Stein,31,Princeton,NJ
|
11
12
|
EOF
|
13
|
+
source_text = "name,age,city,state\n" + (1..10000).map { source_text }.join
|
12
14
|
File.create(source_filename,source_text)
|
13
15
|
|
14
16
|
#load into a database, creating the table if needed
|
15
|
-
|
17
|
+
table_dataload do
|
18
|
+
# csv file the data is being sourced from
|
16
19
|
source source_filename
|
17
|
-
|
20
|
+
|
21
|
+
# database/table the data should be loaded into.
|
22
|
+
# the table will be created if it does not already exist
|
23
|
+
#database :adapter => 'sqlite3', :database => db_path, :timeout => 5000
|
24
|
+
#database :adapter => 'sqlserver', :host => '192.168.1.49', :username => 'pci-tae', :password => 'fgfgf', :database => 'fgfgfgf'
|
18
25
|
table 'people'
|
26
|
+
|
27
|
+
#field delimiter in source file
|
28
|
+
delimiter ","
|
29
|
+
|
30
|
+
# columns in the new table
|
31
|
+
# available types are string, text, integer, float, decimal, datetime, timestamp, time, date, binary, boolean
|
32
|
+
#
|
33
|
+
# The first argument is the name of the new column
|
34
|
+
# The block describes the value to be populated
|
35
|
+
#
|
36
|
+
# Example: string(:full_name) { name }
|
37
|
+
# This creates a field 'full_name' of type string in the new table, and populates it with the name field from the csv
|
38
|
+
#
|
39
|
+
# Example: boolean(:is_tall) { height_in_inches.to_i > 74 }
|
40
|
+
# Creates a field 'is_tall' and populates with true if the height_in_inches field in the csv is greater than 74
|
41
|
+
#
|
42
|
+
# A column without a block just passes through the same field in the csv
|
43
|
+
# integer(:age) creates an integer field 'age' in the new table, populated with the age field in the csv
|
19
44
|
string(:full_name) { name }
|
20
45
|
string(:first_name) { name.split[0] }
|
21
46
|
string(:last_name) { name.split[1] }
|
22
47
|
integer(:age)
|
23
48
|
string(:city_state) { "#{city}, #{state}" }
|
49
|
+
end
|
50
|
+
|
51
|
+
master_dataload do
|
52
|
+
#database :adapter => 'sqlite3', :database => db_path, :timeout => 5000
|
53
|
+
database :adapter => 'mysql', :database => 'dataload_test', :username => 'root'
|
54
|
+
load_order :people
|
55
|
+
block_size 1000
|
24
56
|
end
|
@@ -0,0 +1,72 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
begin
|
3
|
+
require "/Code/mharris_ext/lib/mharris_ext"
|
4
|
+
rescue
|
5
|
+
require 'mharris_ext'
|
6
|
+
end
|
7
|
+
require 'fastercsv'
|
8
|
+
require 'activerecord'
|
9
|
+
require 'facets/enumerable'
|
10
|
+
|
11
|
+
%w(migration column table_module batch_insert).each { |x| require File.dirname(__FILE__) + "/#{x}" }
|
12
|
+
Dir[File.dirname(__FILE__) + "/ext/*.rb"].each { |x| require x }
|
13
|
+
|
14
|
+
class Object
|
15
|
+
def fattr_tm(name,&b)
|
16
|
+
fattr(name) do
|
17
|
+
tm(name) do
|
18
|
+
instance_eval(&b)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
class TableLoader
|
25
|
+
include TableModule
|
26
|
+
attr_accessor_nn :source_filename
|
27
|
+
fattr(:delimiter) { "," }
|
28
|
+
fattr(:block_size) { 1000 }
|
29
|
+
fattr(:columns) { [] }
|
30
|
+
fattr(:source_row_groups) do
|
31
|
+
e = enum(FasterCSV,:foreach,source_filename,:headers => true, :col_sep => delimiter)
|
32
|
+
enum(e,:each_by,block_size)
|
33
|
+
end
|
34
|
+
def target_hash_for_row(row)
|
35
|
+
columns.inject({}) { |h,col| h.merge(col.target_name => col.target_value(row)) }
|
36
|
+
end
|
37
|
+
def target_hashes(rows)
|
38
|
+
rows.map { |x| target_hash_for_row(x) }
|
39
|
+
end
|
40
|
+
def target_hash_groups
|
41
|
+
source_row_groups.each_with_index do |rows,i|
|
42
|
+
yield(target_hashes(rows),i*block_size+rows.size)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
def load!
|
46
|
+
migrate!
|
47
|
+
Dataload.log "Starting load of table '#{table_name}'"
|
48
|
+
total = 0
|
49
|
+
target_hash_groups do |hs,num_inserted|
|
50
|
+
BatchInsert.new(:rows => hs, :table_name => table_name).insert!
|
51
|
+
Dataload.log "Inserted #{block_size} rows into table '#{table_name}'. Total of #{num_inserted} rows inserted."
|
52
|
+
total = num_inserted
|
53
|
+
end
|
54
|
+
Dataload.log "Finished load of table '#{table_name}'. Loaded #{total} rows."
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
module TableCreation
|
59
|
+
fattr(:migration) do
|
60
|
+
DataloadMigration.new_migration(:cols => columns, :table_name => table_name) do
|
61
|
+
create_table table_name do |t|
|
62
|
+
cols.each do |col|
|
63
|
+
t.column col.target_name, :string
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
def migrate!
|
69
|
+
migration.migrate(:up) unless ar_cls.table_exists?
|
70
|
+
end
|
71
|
+
end
|
72
|
+
TableLoader.send(:include,TableCreation)
|
@@ -0,0 +1,9 @@
|
|
1
|
+
class TableManager
|
2
|
+
include TableModule
|
3
|
+
def delete_rows!
|
4
|
+
return unless ar_cls.table_exists?
|
5
|
+
Dataload.log "Deleting #{ar_cls.count} rows from table '#{table_name}'"
|
6
|
+
ar_cls.connection.execute("DELETE from #{table_name}")
|
7
|
+
Dataload.log "Deleted rows from table '#{table_name}'"
|
8
|
+
end
|
9
|
+
end
|
data/lib/dataload.rb
CHANGED
@@ -1 +1,34 @@
|
|
1
|
-
|
1
|
+
def handle_errors
|
2
|
+
yield
|
3
|
+
rescue => exp
|
4
|
+
msg = [exp.message,exp.backtrace.join("\n")].join("\n")
|
5
|
+
Dataload.log msg
|
6
|
+
puts exp.message
|
7
|
+
raise "Error occured and logged. Exiting."
|
8
|
+
end
|
9
|
+
|
10
|
+
require File.dirname(__FILE__) + "/dataload/table_loader"
|
11
|
+
require File.dirname(__FILE__) + "/dataload/table_manager"
|
12
|
+
require File.dirname(__FILE__) + "/dataload/master_loader"
|
13
|
+
Dir[File.dirname(__FILE__) + "/dataload/dsl/*.rb"].each { |x| require x }
|
14
|
+
|
15
|
+
class Dataload
|
16
|
+
class << self
|
17
|
+
fattr(:logger) { DataloadLogger.new }
|
18
|
+
def log(str)
|
19
|
+
logger.log(str)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
class DataloadLogger
|
25
|
+
def log(str)
|
26
|
+
File.append(filename,"#{Time.now.short_dt} #{str}\n")
|
27
|
+
end
|
28
|
+
fattr(:filename) do
|
29
|
+
t = Time.now.strftime("%Y%m%d%H%M%S")
|
30
|
+
res = File.expand_path("dataload_#{t}.log")
|
31
|
+
puts "Logging to #{res}"
|
32
|
+
res
|
33
|
+
end
|
34
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: GFunk911-dataload
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.8.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mike Harris
|
@@ -52,8 +52,19 @@ extra_rdoc_files:
|
|
52
52
|
- README.rdoc
|
53
53
|
- LICENSE
|
54
54
|
files:
|
55
|
-
- lib/dataload/
|
55
|
+
- lib/dataload/batch_insert.rb
|
56
|
+
- lib/dataload/column.rb
|
57
|
+
- lib/dataload/dsl/master_loader_dsl.rb
|
58
|
+
- lib/dataload/dsl/table_loader_dsl.rb
|
59
|
+
- lib/dataload/ext/active_record.rb
|
60
|
+
- lib/dataload/ext/enumerator.rb
|
61
|
+
- lib/dataload/ext/faster_csv.rb
|
62
|
+
- lib/dataload/master_loader.rb
|
63
|
+
- lib/dataload/migration.rb
|
56
64
|
- lib/dataload/sample.rb
|
65
|
+
- lib/dataload/table_loader.rb
|
66
|
+
- lib/dataload/table_manager.rb
|
67
|
+
- lib/dataload/table_module.rb
|
57
68
|
- lib/dataload.rb
|
58
69
|
- spec/dataload_spec.rb
|
59
70
|
- spec/spec_helper.rb
|
data/lib/dataload/loader.rb
DELETED
@@ -1,126 +0,0 @@
|
|
1
|
-
require 'rubygems'
|
2
|
-
require 'mharris_ext'
|
3
|
-
require 'fastercsv'
|
4
|
-
require 'activerecord'
|
5
|
-
|
6
|
-
class FasterCSV::Row
|
7
|
-
def method_missing(sym,*args,&b)
|
8
|
-
if self[sym.to_s]
|
9
|
-
self[sym.to_s]
|
10
|
-
else
|
11
|
-
super(sym,*args,&b)
|
12
|
-
end
|
13
|
-
end
|
14
|
-
end
|
15
|
-
|
16
|
-
class Loader
|
17
|
-
fattr(:columns) { [] }
|
18
|
-
attr_accessor :source_filename, :db_ops, :table_name
|
19
|
-
fattr(:source_rows) do
|
20
|
-
res = []
|
21
|
-
FasterCSV.foreach(source_filename, :headers => true) do |row|
|
22
|
-
res << row
|
23
|
-
end
|
24
|
-
res
|
25
|
-
end
|
26
|
-
def target_hash_for_row(row)
|
27
|
-
h = {}
|
28
|
-
columns.each do |col|
|
29
|
-
h[col.target_name] = col.target_value(row)
|
30
|
-
end
|
31
|
-
h
|
32
|
-
end
|
33
|
-
def target_hashes
|
34
|
-
source_rows.map { |x| target_hash_for_row(x) }
|
35
|
-
end
|
36
|
-
def target_column_names
|
37
|
-
columns.map { |x| x.target_name }
|
38
|
-
end
|
39
|
-
def new_struct
|
40
|
-
Struct.new(*target_column_names)
|
41
|
-
end
|
42
|
-
fattr(:migration) do
|
43
|
-
raise "must define table" unless table_name
|
44
|
-
cls = Class.new(ActiveRecord::Migration)
|
45
|
-
class << cls
|
46
|
-
attr_accessor :cols, :table_name
|
47
|
-
end
|
48
|
-
cls.cols = columns
|
49
|
-
cls.table_name = table_name
|
50
|
-
puts "Table: #{table_name}"
|
51
|
-
cls.class_eval do
|
52
|
-
def self.up
|
53
|
-
create_table table_name do |t|
|
54
|
-
cols.each do |col|
|
55
|
-
t.column col.target_name, :string
|
56
|
-
end
|
57
|
-
end
|
58
|
-
end
|
59
|
-
end
|
60
|
-
cls
|
61
|
-
end
|
62
|
-
fattr(:ar) do
|
63
|
-
cls = Class.new(ActiveRecord::Base)
|
64
|
-
cls.send(:set_table_name, table_name)
|
65
|
-
cls
|
66
|
-
end
|
67
|
-
def migrate!
|
68
|
-
ar.find(:first)
|
69
|
-
rescue => exp
|
70
|
-
puts "find failed"
|
71
|
-
puts exp.inspect
|
72
|
-
migration.migrate(:up)
|
73
|
-
end
|
74
|
-
fattr(:ar_objects) do
|
75
|
-
target_hashes.map { |h| ar.new(h) }
|
76
|
-
end
|
77
|
-
def load!
|
78
|
-
ActiveRecord::Base.establish_connection(db_ops)
|
79
|
-
migrate!
|
80
|
-
ar_objects.each { |x| x.save! }
|
81
|
-
end
|
82
|
-
end
|
83
|
-
|
84
|
-
class Column
|
85
|
-
include FromHash
|
86
|
-
attr_accessor :target_name, :blk
|
87
|
-
def target_value(row)
|
88
|
-
if blk.arity == 1
|
89
|
-
blk.call(row)
|
90
|
-
else
|
91
|
-
row.instance_eval(&blk)
|
92
|
-
end
|
93
|
-
end
|
94
|
-
end
|
95
|
-
|
96
|
-
class LoaderDSL
|
97
|
-
fattr(:loader) { Loader.new }
|
98
|
-
def column(name,type,&blk)
|
99
|
-
blk ||= lambda { |x| x.send(name) }
|
100
|
-
loader.columns << Column.new(:target_name => name, :blk => blk)
|
101
|
-
end
|
102
|
-
def method_missing(sym,*args,&b)
|
103
|
-
if [:string, :text, :integer, :float, :decimal, :datetime, :timestamp, :time, :date, :binary, :boolean].include?(sym)
|
104
|
-
column(args.first,sym,&b)
|
105
|
-
else
|
106
|
-
super(sym,*args,&b)
|
107
|
-
end
|
108
|
-
end
|
109
|
-
def source(file)
|
110
|
-
loader.source_filename = file
|
111
|
-
end
|
112
|
-
def database(ops)
|
113
|
-
loader.db_ops = ops
|
114
|
-
end
|
115
|
-
def table(name)
|
116
|
-
loader.table_name = name
|
117
|
-
end
|
118
|
-
end
|
119
|
-
|
120
|
-
def dataload(&b)
|
121
|
-
dsl = LoaderDSL.new
|
122
|
-
dsl.instance_eval(&b)
|
123
|
-
dsl.loader.load!
|
124
|
-
puts "Row Count: " + dsl.loader.ar.find(:all).size.to_s
|
125
|
-
end
|
126
|
-
|