data_transport 0.3.3
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/data_transport/data_store/active_record.rb +210 -0
- data/lib/data_transport/data_store/csv_file.rb +25 -0
- data/lib/data_transport/data_store/file.rb +186 -0
- data/lib/data_transport/data_store.rb +27 -0
- data/lib/data_transport/record/destination.rb +28 -0
- data/lib/data_transport/record/source.rb +24 -0
- data/lib/data_transport.rb +52 -0
- metadata +68 -0
@@ -0,0 +1,210 @@
|
|
1
|
+
module DataTransport
|
2
|
+
class DataStore
|
3
|
+
# Data store that reads and writes records in a database via ActiveRecord.
|
4
|
+
# This class is specifically optimized for reading and writing large
|
5
|
+
# numbers of records, providing a significant advantage over using
|
6
|
+
# ActiveRecord directly.
|
7
|
+
class ActiveRecord < DataStore
|
8
|
+
# There are two ways to initialize this data store. The first is by
|
9
|
+
# specifying one of your ActiveRecord models:
|
10
|
+
#
|
11
|
+
# DataTransport::DataStore::ActiveRecord.new :class => MyModel
|
12
|
+
#
|
13
|
+
# The second is by providing an ActiveRecord database specification (as
|
14
|
+
# read from database.yml, for example) and a table name:
|
15
|
+
#
|
16
|
+
# db_spec = ActiveRecord::Base.configurations["other_app_#{RAILS_ENV}"]
|
17
|
+
# DataTransport::DataStore::ActiveRecord.new(
|
18
|
+
# :connection => db_spec,
|
19
|
+
# :table_name => "sprockets"
|
20
|
+
# )
|
21
|
+
#
|
22
|
+
# The second form is useful for importing or exporting data in non-Rails
|
23
|
+
# applications.
|
24
|
+
#
|
25
|
+
# In addition, the following options are accepted:
|
26
|
+
#
|
27
|
+
# conditions:: Conditions describing which records to read. This can
|
28
|
+
# be anything that ActiveRecord will recognize, such as
|
29
|
+
# a hash table, an array with substitutions, or raw SQL.
|
30
|
+
# Default is nil (no conditions, read all records).
|
31
|
+
# truncate:: If true, the table will be truncated before any
|
32
|
+
# records are written. On databases that support it,
|
33
|
+
# this is performed by executing a TRUNCATE TABLE query;
|
34
|
+
# all other databases use ActiveRecord's delete_all
|
35
|
+
# method.
|
36
|
+
# ignore_errors:: If true, errors that occur during record insertion
|
37
|
+
# will be ignored. This is useful if your table has a
|
38
|
+
# unique index and you want to silently drop records
|
39
|
+
# with duplicate keys. Currently this only works on
|
40
|
+
# MySQL. Default is false.
|
41
|
+
# max_sql_length:: Maximum permissible length of an SQL query, in bytes.
|
42
|
+
# Rows to be inserted are buffered until the largest
|
43
|
+
# possible INSERT statement has been generated, at which
|
44
|
+
# point the statement is executed and a new INSERT
|
45
|
+
# statement begins. The default value varies depending
|
46
|
+
# on what type of database you're connected to. With
|
47
|
+
# SQLite, the default is 1,000,000. With MySQL, the
|
48
|
+
# default is the value of the +max_allowed_packet+
|
49
|
+
# variable minus 512. With all other databases, the
|
50
|
+
# default is 16,777,216.
|
51
|
+
def initialize(options = {})
|
52
|
+
super()
|
53
|
+
# Extract options.
|
54
|
+
@class = options.delete(:class)
|
55
|
+
@connection = options.delete(:connection)
|
56
|
+
@table_name = options.delete(:table_name)
|
57
|
+
@conditions = options.delete(:conditions)
|
58
|
+
@truncate = options.delete(:truncate)
|
59
|
+
@ignore_errors = options.delete(:ignore_errors)
|
60
|
+
@max_sql_length = options.delete(:max_sql_length)
|
61
|
+
# Make sure a class or connection and table name was provided.
|
62
|
+
if @class.nil? && (@connection.nil? || @table_name.nil?)
|
63
|
+
raise(ArgumentError, "missing required option `class', or `connection' and `table_name'")
|
64
|
+
end
|
65
|
+
raise(TypeError, "class must be a class") if @class && !@class.is_a?(Class)
|
66
|
+
# If connection specs were provided instead of a class, make an
|
67
|
+
# anonymous ActiveRecord subclass.
|
68
|
+
unless @class
|
69
|
+
@class = Class.new(::ActiveRecord::Base)
|
70
|
+
@class.set_table_name @table_name
|
71
|
+
@class.establish_connection @connection
|
72
|
+
end
|
73
|
+
# Make sure the class descends from ActiveRecord::Base.
|
74
|
+
klass = @class.superclass
|
75
|
+
is_active_record = false
|
76
|
+
while klass
|
77
|
+
if klass == ::ActiveRecord::Base
|
78
|
+
is_active_record = true
|
79
|
+
break
|
80
|
+
end
|
81
|
+
klass = klass.superclass
|
82
|
+
end
|
83
|
+
raise(TypeError, "class must descend from ActiveRecord::Base") unless is_active_record
|
84
|
+
# If ignore_errors is true, make sure we're connected to a MySQL
|
85
|
+
# database. We don't use is_a? because if the MySQL adapter isn't
|
86
|
+
# loaded, referencing its class throws a NameError.
|
87
|
+
if @ignore_errors
|
88
|
+
unless @class.connection.class.to_s ==
|
89
|
+
"ActiveRecord::ConnectionAdapters::MysqlAdapter"
|
90
|
+
raise ArgumentError, "ignore_errors can only be used with a MySQL database"
|
91
|
+
end
|
92
|
+
end
|
93
|
+
# Check for unknown options.
|
94
|
+
unless options.empty?
|
95
|
+
raise(ArgumentError, "unrecognized options: `#{options.join("', `")}'")
|
96
|
+
end
|
97
|
+
# Figure out how much data the database can handle in one query. See
|
98
|
+
# the note above in the ignore_errors compatibility check about using
|
99
|
+
# stringified class names.
|
100
|
+
if @max_sql_length
|
101
|
+
@max_sql_length = @max_sql_length.to_i
|
102
|
+
else
|
103
|
+
case @class.connection.class.to_s
|
104
|
+
when "ActiveRecord::ConnectionAdapters::MysqlAdapter"
|
105
|
+
rows = @class.connection.select_all("SHOW VARIABLES LIKE 'max_allowed_packet'")
|
106
|
+
@max_sql_length = rows.first["Value"].to_i - 512
|
107
|
+
when /\AActiveRecord::ConnectionAdapters::SQLite3?Adapter\Z/
|
108
|
+
@max_sql_length = 1_000_000
|
109
|
+
else
|
110
|
+
@max_sql_length = 16_777_216
|
111
|
+
end
|
112
|
+
end
|
113
|
+
# Fetch column information
|
114
|
+
@columns = {}
|
115
|
+
@class.columns.each {|c| @columns[c.name.to_sym] = c}
|
116
|
+
end
|
117
|
+
|
118
|
+
def klass # :nodoc:
|
119
|
+
@class
|
120
|
+
end
|
121
|
+
|
122
|
+
# Returns the number of records in the table that match the data store's
|
123
|
+
# conditions.
|
124
|
+
def count
|
125
|
+
@class.count(:conditions => @conditions)
|
126
|
+
end
|
127
|
+
|
128
|
+
def each_record(batch_size = nil) # :nodoc:
|
129
|
+
conn = @class.connection
|
130
|
+
column_names = conn.columns(@class.table_name).collect {|c| c.name}
|
131
|
+
|
132
|
+
offset = 0
|
133
|
+
record = {}
|
134
|
+
base_query = "SELECT * FROM #{conn.quote_table_name(@class.table_name)}"
|
135
|
+
@class.send(:add_conditions!, base_query, @conditions) unless @conditions.nil?
|
136
|
+
while true
|
137
|
+
sql = base_query.dup
|
138
|
+
conn.add_limit_offset!(sql, :limit => batch_size, :offset => offset)
|
139
|
+
offset += batch_size
|
140
|
+
rows = conn.select_rows(sql)
|
141
|
+
break if rows.empty?
|
142
|
+
rows.each do |row|
|
143
|
+
record.clear
|
144
|
+
column_names.each_with_index do |column_name, i|
|
145
|
+
column_name = column_name.to_sym
|
146
|
+
record[column_name] = @columns[column_name].type_cast(row[i])
|
147
|
+
end
|
148
|
+
yield record
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
def write_record(record) # :nodoc:
|
154
|
+
conn = @class.connection
|
155
|
+
# If no SQL has been produced yet, start an INSERT statement.
|
156
|
+
@sql_buffer ||= start_insert_sql(record)
|
157
|
+
# Convert the record into a string of quoted values.
|
158
|
+
values = []
|
159
|
+
record.each {|k, v| values << conn.quote(v, @columns[k])}
|
160
|
+
values = "(#{values.join ","}),"
|
161
|
+
# Write the record.
|
162
|
+
if @max_sql_length.nil?
|
163
|
+
# We have no information on the database's maximum allowed packet
|
164
|
+
# size, so it's safest to write the record immediately.
|
165
|
+
@sql_buffer << values
|
166
|
+
finalize
|
167
|
+
elsif @sql_buffer.length + record.length > @max_sql_length
|
168
|
+
# Appending this record to the SQL buffer will exceed the maximum
|
169
|
+
# allowed packet size. Send the buffer to the database and start a
|
170
|
+
# new statement with this record.
|
171
|
+
finalize
|
172
|
+
@sql_buffer = start_insert_sql
|
173
|
+
@sql_buffer << values
|
174
|
+
else
|
175
|
+
# This record will not cause the SQL buffer to exceed the maximum
|
176
|
+
# allowed packet size. Append it to the SQL buffer.
|
177
|
+
@sql_buffer << values
|
178
|
+
end
|
179
|
+
end
|
180
|
+
|
181
|
+
def finalize # :nodoc:
|
182
|
+
if @truncate
|
183
|
+
conn = @class.connection
|
184
|
+
begin
|
185
|
+
conn.execute("TRUNCATE TABLE #{conn.quote_table_name(@class.table_name)}")
|
186
|
+
rescue
|
187
|
+
@class.delete_all
|
188
|
+
end
|
189
|
+
@truncate = false
|
190
|
+
end
|
191
|
+
if @sql_buffer && @sql_buffer[-1,1] == ","
|
192
|
+
@sql_buffer.chop!
|
193
|
+
@class.connection.execute(@sql_buffer)
|
194
|
+
end
|
195
|
+
end
|
196
|
+
|
197
|
+
def reset # :nodoc:
|
198
|
+
@sql_buffer = nil
|
199
|
+
end
|
200
|
+
|
201
|
+
private
|
202
|
+
|
203
|
+
def start_insert_sql(record)
|
204
|
+
"INSERT #{@ignore_errors ? "IGNORE " : " "}INTO " +
|
205
|
+
"#{@class.connection.quote_table_name(@class.table_name)} " +
|
206
|
+
"(#{record.keys.join ","}) VALUES "
|
207
|
+
end
|
208
|
+
end
|
209
|
+
end
|
210
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
module DataTransport
|
2
|
+
class DataStore
|
3
|
+
# Identical to the File data store, except that it is preconfigured to read
|
4
|
+
# and write CSV files.
|
5
|
+
class CSVFile < File
|
6
|
+
|
7
|
+
# Accepts the same options as the File data store, except that the
|
8
|
+
# following options have different defaults:
|
9
|
+
#
|
10
|
+
# delimiter:: ","
|
11
|
+
# enclosure:: "\""
|
12
|
+
# escape:: :double
|
13
|
+
#
|
14
|
+
# These defaults describe the CSV format.
|
15
|
+
def initialize(options = {})
|
16
|
+
super({
|
17
|
+
:delimiter => ",",
|
18
|
+
:enclosure => "\"",
|
19
|
+
:escape => :double
|
20
|
+
}.merge(options))
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,186 @@
|
|
1
|
+
module DataTransport
|
2
|
+
class DataStore
|
3
|
+
# Data store that reads and writes records in a flat text file.
|
4
|
+
#
|
5
|
+
# Although this class can read and write CSV files, you should use the
|
6
|
+
# CSVFile data store for that instead of this one.
|
7
|
+
class File < DataStore
|
8
|
+
attr_reader :mode # :nodoc:
|
9
|
+
|
10
|
+
# Accepts the following options:
|
11
|
+
#
|
12
|
+
# header:: If true, the file has a header row that contains the names
|
13
|
+
# of each field. Default is false.
|
14
|
+
# delimiter:: String that separates individual fields in a row. Default
|
15
|
+
# is "\t".
|
16
|
+
# enclosure:: String that encloses individual fields. For example, if
|
17
|
+
# this is set to "\"", fields will be enclosed in double
|
18
|
+
# quotes. Default is nil (no enclosure).
|
19
|
+
# escape:: Escape sequence for occurrences of the enclosure string in
|
20
|
+
# field values. Set this to the special value :double if
|
21
|
+
# enclosure characters are escaped by doubling them (like in
|
22
|
+
# CSV and SQL). Default is nil.
|
23
|
+
# path:: Path to the file.
|
24
|
+
# null:: String that represents fields whose value is nil (but not
|
25
|
+
# blank). Default is "".
|
26
|
+
# keys:: Array of field names. Not necessary for files with a header
|
27
|
+
# row. Default for files without a header row is fieldXX,
|
28
|
+
# where XX is numbered sequentially starting from 00.
|
29
|
+
def initialize(options = {})
|
30
|
+
super()
|
31
|
+
# Extract options.
|
32
|
+
@header = options.delete(:header)
|
33
|
+
@delimiter = options.delete(:delimiter) || "\t"
|
34
|
+
@enclosure = options.delete(:enclosure)
|
35
|
+
@escape = options.delete(:escape)
|
36
|
+
@path = options.delete(:path)
|
37
|
+
@null = options.delete(:null) || ""
|
38
|
+
@keys = options.delete(:keys)
|
39
|
+
# Validate options.
|
40
|
+
raise(ArgumentError, "missing required option `path'") if @path.nil?
|
41
|
+
if @escape && @enclosure.nil?
|
42
|
+
raise(ArgumentError, "`escape' cannot be used without `enclosure'")
|
43
|
+
end
|
44
|
+
unless options.empty?
|
45
|
+
raise(ArgumentError, "unrecognized options: `#{options.join("', `")}'")
|
46
|
+
end
|
47
|
+
# Handle the special :double escape sequence.
|
48
|
+
@escape = @enclosure if @escape == :double
|
49
|
+
# Create an enclosure placeholder, which is used to avoid clobbering
|
50
|
+
# escaped enclosure characters during parsing.
|
51
|
+
if @escape
|
52
|
+
if @enclosure == 0.chr
|
53
|
+
safe_ch = 1.chr
|
54
|
+
else
|
55
|
+
safe_ch = 0.chr
|
56
|
+
end
|
57
|
+
@placeholder = "#{safe_ch}__ENCLOSURE_PLACEHOLDER__#{safe_ch}"
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
# Returns the number of lines in the file (not counting the header, if
|
62
|
+
# there is one).
|
63
|
+
def count
|
64
|
+
return @count if @count
|
65
|
+
self.mode = :input
|
66
|
+
line_count = 0
|
67
|
+
rewind_and_restore do
|
68
|
+
io.readline if @header
|
69
|
+
until io.eof?
|
70
|
+
io.gets
|
71
|
+
line_count += 1
|
72
|
+
end
|
73
|
+
end
|
74
|
+
@count = line_count
|
75
|
+
end
|
76
|
+
|
77
|
+
def each_record(batch_size = nil) # :nodoc:
|
78
|
+
self.mode = :input
|
79
|
+
io.rewind
|
80
|
+
io.readline if @header
|
81
|
+
until io.eof?
|
82
|
+
line = io.gets || break
|
83
|
+
line.chomp!
|
84
|
+
values = values_from_s(line)
|
85
|
+
if keys.length != values.length
|
86
|
+
raise RuntimeError, "wrong number of fields (#{values.length} for #{keys.length})"
|
87
|
+
end
|
88
|
+
record = {}
|
89
|
+
keys.length.times {|i| record[keys[i]] = values[i]}
|
90
|
+
yield record
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
def write_record(record) # :nodoc:
|
95
|
+
self.mode = :output
|
96
|
+
# If no key order was ever specified, make one up.
|
97
|
+
@keys ||= record.keys.sort {|a,b| a.to_s <=> b.to_s}
|
98
|
+
# Write the header if this is the first record.
|
99
|
+
if @header && io.pos == 0
|
100
|
+
io.puts(values_to_s(keys))
|
101
|
+
end
|
102
|
+
# Write the values in a predictable order.
|
103
|
+
values = keys.collect do |k|
|
104
|
+
record[k].nil?? @null : record[k]
|
105
|
+
end
|
106
|
+
io.puts(values_to_s(values))
|
107
|
+
end
|
108
|
+
|
109
|
+
private
|
110
|
+
|
111
|
+
def values_to_s(values)
|
112
|
+
if @escape
|
113
|
+
values = values.collect do |v|
|
114
|
+
@enclosure + v.to_s.gsub(/#{@enclosure}/, @escape + @enclosure) + @enclosure
|
115
|
+
end
|
116
|
+
elsif @enclosure
|
117
|
+
values = values.collect {|v| @enclosure + v.to_s + @enclosure}
|
118
|
+
end
|
119
|
+
values.join(@delimiter)
|
120
|
+
end
|
121
|
+
|
122
|
+
def values_from_s(str)
|
123
|
+
if @escape
|
124
|
+
str = str.gsub(/#{@escape}#{@enclosure}/, @placeholder)
|
125
|
+
values = str.split(/#{@enclosure + @delimiter + @enclosure}/)
|
126
|
+
values.first.sub!(/^#{@enclosure}/, "")
|
127
|
+
values.last.sub!(/#{@enclosure}$/, "")
|
128
|
+
values.each do |v|
|
129
|
+
v.gsub!(/#{@placeholder}/, @enclosure)
|
130
|
+
end
|
131
|
+
elsif @enclosure
|
132
|
+
values = str.split(/#{@enclosure + @delimiter + @enclosure}/)
|
133
|
+
values.first.sub!(/^#{@enclosure}/, "")
|
134
|
+
values.last.sub!(/#{@enclosure}$/, "")
|
135
|
+
else
|
136
|
+
values = str.split(/#{@delimiter}/)
|
137
|
+
end
|
138
|
+
values
|
139
|
+
end
|
140
|
+
|
141
|
+
def mode=(new_mode)
|
142
|
+
if !@mode.nil? && @mode != new_mode
|
143
|
+
raise RuntimeError, "can't switch mode from #{@mode} to #{new_mode}"
|
144
|
+
end
|
145
|
+
unless [:input, :output].include?(new_mode)
|
146
|
+
raise ArgumentError, "unknown mode `#{new_mode}'"
|
147
|
+
end
|
148
|
+
@mode = new_mode
|
149
|
+
end
|
150
|
+
|
151
|
+
def io
|
152
|
+
return @io if @io
|
153
|
+
if mode == :output
|
154
|
+
@io = ::File.open(@path, "w")
|
155
|
+
@io.rewind
|
156
|
+
@io
|
157
|
+
else
|
158
|
+
@io = ::File.open(@path, "r")
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
def keys
|
163
|
+
return @keys if @keys
|
164
|
+
return [] if mode == :output
|
165
|
+
line = rewind_and_restore { io.readline }
|
166
|
+
line.chomp!
|
167
|
+
fields = values_from_s(line)
|
168
|
+
if @header
|
169
|
+
@keys = fields.collect! {|hdr| hdr.downcase.to_sym}
|
170
|
+
else
|
171
|
+
@keys = (0..(fields.length - 1)).to_a.collect! do |i|
|
172
|
+
sprintf("field%02d", i).to_sym
|
173
|
+
end
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
def rewind_and_restore
|
178
|
+
pos = io.pos
|
179
|
+
io.rewind
|
180
|
+
result = yield
|
181
|
+
io.seek(pos)
|
182
|
+
result
|
183
|
+
end
|
184
|
+
end
|
185
|
+
end
|
186
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
require "data_transport/data_store/file"
|
2
|
+
require "data_transport/data_store/csv_file"
|
3
|
+
require "data_transport/data_store/active_record"
|
4
|
+
|
5
|
+
module DataTransport
|
6
|
+
class DataStore # :nodoc:
|
7
|
+
def count
|
8
|
+
raise NotImplementedError
|
9
|
+
end
|
10
|
+
|
11
|
+
def each_record(batch_size = nil)
|
12
|
+
raise NotImplementedError
|
13
|
+
end
|
14
|
+
|
15
|
+
def write_record(record)
|
16
|
+
raise NotImplementedError
|
17
|
+
end
|
18
|
+
|
19
|
+
def finalize
|
20
|
+
# Do nothing by default.
|
21
|
+
end
|
22
|
+
|
23
|
+
def reset
|
24
|
+
# Do nothing by default.
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module DataTransport
|
2
|
+
module Record # :nodoc:
|
3
|
+
class Destination # :nodoc:
|
4
|
+
attr_reader :record
|
5
|
+
|
6
|
+
def initialize
|
7
|
+
@record = {}
|
8
|
+
end
|
9
|
+
|
10
|
+
def reset!
|
11
|
+
@record.clear
|
12
|
+
end
|
13
|
+
|
14
|
+
def method_missing(name, *args)
|
15
|
+
name_s = name.to_s
|
16
|
+
if name_s[-1,1] == "="
|
17
|
+
unless args.length == 1
|
18
|
+
raise ArgumentError, "wrong number of arguments (#{args.length} for 1)"
|
19
|
+
end
|
20
|
+
name_s.chop!
|
21
|
+
@record[name_s.to_sym] = args.first
|
22
|
+
else
|
23
|
+
super
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module DataTransport
|
2
|
+
module Record # :nodoc:
|
3
|
+
class Source # :nodoc:
|
4
|
+
def record=(record)
|
5
|
+
@record = record
|
6
|
+
end
|
7
|
+
|
8
|
+
def id
|
9
|
+
method_missing :id
|
10
|
+
end
|
11
|
+
|
12
|
+
def method_missing(name, *args)
|
13
|
+
if @record.has_key?(name)
|
14
|
+
unless args.empty?
|
15
|
+
raise ArgumentError, "wrong number of arguments (#{args.length} for 0)"
|
16
|
+
end
|
17
|
+
@record[name]
|
18
|
+
else
|
19
|
+
super
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
require "data_transport/data_store"
|
2
|
+
require "data_transport/record/source"
|
3
|
+
require "data_transport/record/destination"
|
4
|
+
|
5
|
+
module DataTransport
|
6
|
+
DEFAULT_BATCH_SIZE = 100_000 # :nodoc:
|
7
|
+
|
8
|
+
# Reads records from an input data source, processes them with the supplied
|
9
|
+
# block, and writes them to an output data source. Accepts the following
|
10
|
+
# options:
|
11
|
+
#
|
12
|
+
# batch_size:: Records are read from the input in batches. This option sets
|
13
|
+
# the number of records in a single batch. Default is 1000.
|
14
|
+
#
|
15
|
+
# The block is passed two objects that represent the source and destination
|
16
|
+
# record. These objects have methods that reflect the attributes of the
|
17
|
+
# records. The following example reads the +name+ and +price+ attributes from
|
18
|
+
# input records, downcases the name, multiplies the price by 100, and writes
|
19
|
+
# them to the output:
|
20
|
+
#
|
21
|
+
# # input = DataTransport::DataSource:: ...
|
22
|
+
# # output = DataTransport::DataSource:: ...
|
23
|
+
#
|
24
|
+
# DataTransport.map(input, output) do |src, dst|
|
25
|
+
# dst.name = src.name.downcase
|
26
|
+
# dst.price = (src.price * 100).to_i
|
27
|
+
# end
|
28
|
+
#
|
29
|
+
# The destination doesn't necessarily have to have the same attributes as the
|
30
|
+
# source (or even the same number of attributes). The transformations that
|
31
|
+
# can be accomplished are limited only by what you can do in a block of Ruby.
|
32
|
+
def self.map(input, output, options = {}, &block)
|
33
|
+
# Extract options.
|
34
|
+
batch_size = options.delete(:batch_size) || DEFAULT_BATCH_SIZE
|
35
|
+
raise(TypeError, "batch size must be an integer") unless batch_size.is_a?(Integer)
|
36
|
+
raise(RangeError, "batch size must be greater than zero") if batch_size < 1
|
37
|
+
unless options.empty?
|
38
|
+
raise(ArgumentError, "unrecognized options: `#{options.keys.join("', `")}'")
|
39
|
+
end
|
40
|
+
# Run the transport.
|
41
|
+
output.reset
|
42
|
+
source = DataTransport::Record::Source.new
|
43
|
+
destination = DataTransport::Record::Destination.new
|
44
|
+
input.each_record(batch_size) do |record|
|
45
|
+
source.record = record
|
46
|
+
destination.reset!
|
47
|
+
yield source, destination
|
48
|
+
output.write_record(destination.record)
|
49
|
+
end
|
50
|
+
output.finalize
|
51
|
+
end
|
52
|
+
end
|
metadata
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: data_transport
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 0
|
7
|
+
- 3
|
8
|
+
- 3
|
9
|
+
version: 0.3.3
|
10
|
+
platform: ruby
|
11
|
+
authors:
|
12
|
+
- Dana Contreras
|
13
|
+
autorequire:
|
14
|
+
bindir: bin
|
15
|
+
cert_chain: []
|
16
|
+
|
17
|
+
date: 2010-03-27 00:00:00 -04:00
|
18
|
+
default_executable:
|
19
|
+
dependencies: []
|
20
|
+
|
21
|
+
description:
|
22
|
+
email:
|
23
|
+
executables: []
|
24
|
+
|
25
|
+
extensions: []
|
26
|
+
|
27
|
+
extra_rdoc_files: []
|
28
|
+
|
29
|
+
files:
|
30
|
+
- lib/data_transport.rb
|
31
|
+
- lib/data_transport/data_store.rb
|
32
|
+
- lib/data_transport/data_store/active_record.rb
|
33
|
+
- lib/data_transport/data_store/csv_file.rb
|
34
|
+
- lib/data_transport/data_store/file.rb
|
35
|
+
- lib/data_transport/record/destination.rb
|
36
|
+
- lib/data_transport/record/source.rb
|
37
|
+
has_rdoc: true
|
38
|
+
homepage: http://github.com/DanaDanger/data_transport
|
39
|
+
licenses: []
|
40
|
+
|
41
|
+
post_install_message:
|
42
|
+
rdoc_options: []
|
43
|
+
|
44
|
+
require_paths:
|
45
|
+
- lib
|
46
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
47
|
+
requirements:
|
48
|
+
- - ">="
|
49
|
+
- !ruby/object:Gem::Version
|
50
|
+
segments:
|
51
|
+
- 0
|
52
|
+
version: "0"
|
53
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
54
|
+
requirements:
|
55
|
+
- - ">="
|
56
|
+
- !ruby/object:Gem::Version
|
57
|
+
segments:
|
58
|
+
- 0
|
59
|
+
version: "0"
|
60
|
+
requirements: []
|
61
|
+
|
62
|
+
rubyforge_project:
|
63
|
+
rubygems_version: 1.3.6
|
64
|
+
signing_key:
|
65
|
+
specification_version: 3
|
66
|
+
summary: A gem for importing and exporting large quantities of data.
|
67
|
+
test_files: []
|
68
|
+
|