data_transport 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/data_transport/data_store/active_record.rb +210 -0
- data/lib/data_transport/data_store/csv_file.rb +25 -0
- data/lib/data_transport/data_store/file.rb +186 -0
- data/lib/data_transport/data_store.rb +27 -0
- data/lib/data_transport/record/destination.rb +28 -0
- data/lib/data_transport/record/source.rb +24 -0
- data/lib/data_transport.rb +52 -0
- metadata +68 -0
@@ -0,0 +1,210 @@
|
|
1
|
+
module DataTransport
|
2
|
+
class DataStore
|
3
|
+
# Data store that reads and writes records in a database via ActiveRecord.
|
4
|
+
# This class is specifically optimized for reading and writing large
|
5
|
+
# numbers of records, providing a significant advantage over using
|
6
|
+
# ActiveRecord directly.
|
7
|
+
class ActiveRecord < DataStore
|
8
|
+
# There are two ways to initialize this data store. The first is by
|
9
|
+
# specifying one of your ActiveRecord models:
|
10
|
+
#
|
11
|
+
# DataTransport::DataStore::ActiveRecord.new :class => MyModel
|
12
|
+
#
|
13
|
+
# The second is by providing an ActiveRecord database specification (as
|
14
|
+
# read from database.yml, for example) and a table name:
|
15
|
+
#
|
16
|
+
# db_spec = ActiveRecord::Base.configurations["other_app_#{RAILS_ENV}"]
|
17
|
+
# DataTransport::DataStore::ActiveRecord.new(
|
18
|
+
# :connection => db_spec,
|
19
|
+
# :table_name => "sprockets"
|
20
|
+
# )
|
21
|
+
#
|
22
|
+
# The second form is useful for importing or exporting data in non-Rails
|
23
|
+
# applications.
|
24
|
+
#
|
25
|
+
# In addition, the following options are accepted:
|
26
|
+
#
|
27
|
+
# conditions:: Conditions describing which records to read. This can
|
28
|
+
# be anything that ActiveRecord will recognize, such as
|
29
|
+
# a hash table, an array with substitutions, or raw SQL.
|
30
|
+
# Default is nil (no conditions, read all records).
|
31
|
+
# truncate:: If true, the table will be truncated before any
|
32
|
+
# records are written. On databases that support it,
|
33
|
+
# this is performed by executing a TRUNCATE TABLE query;
|
34
|
+
# all other databases use ActiveRecord's delete_all
|
35
|
+
# method.
|
36
|
+
# ignore_errors:: If true, errors that occur during record insertion
|
37
|
+
# will be ignored. This is useful if your table has a
|
38
|
+
# unique index and you want to silently drop records
|
39
|
+
# with duplicate keys. Currently this only works on
|
40
|
+
# MySQL. Default is false.
|
41
|
+
# max_sql_length:: Maximum permissible length of an SQL query, in bytes.
|
42
|
+
# Rows to be inserted are buffered until the largest
|
43
|
+
# possible INSERT statement has been generated, at which
|
44
|
+
# point the statement is executed and a new INSERT
|
45
|
+
# statement begins. The default value varies depending
|
46
|
+
# on what type of database you're connected to. With
|
47
|
+
# SQLite, the default is 1,000,000. With MySQL, the
|
48
|
+
# default is the value of the +max_allowed_packet+
|
49
|
+
# variable minus 512. With all other databases, the
|
50
|
+
# default is 16,777,216.
|
51
|
+
def initialize(options = {})
|
52
|
+
super()
|
53
|
+
# Extract options.
|
54
|
+
@class = options.delete(:class)
|
55
|
+
@connection = options.delete(:connection)
|
56
|
+
@table_name = options.delete(:table_name)
|
57
|
+
@conditions = options.delete(:conditions)
|
58
|
+
@truncate = options.delete(:truncate)
|
59
|
+
@ignore_errors = options.delete(:ignore_errors)
|
60
|
+
@max_sql_length = options.delete(:max_sql_length)
|
61
|
+
# Make sure a class or connection and table name was provided.
|
62
|
+
if @class.nil? && (@connection.nil? || @table_name.nil?)
|
63
|
+
raise(ArgumentError, "missing required option `class', or `connection' and `table_name'")
|
64
|
+
end
|
65
|
+
raise(TypeError, "class must be a class") if @class && !@class.is_a?(Class)
|
66
|
+
# If connection specs were provided instead of a class, make an
|
67
|
+
# anonymous ActiveRecord subclass.
|
68
|
+
unless @class
|
69
|
+
@class = Class.new(::ActiveRecord::Base)
|
70
|
+
@class.set_table_name @table_name
|
71
|
+
@class.establish_connection @connection
|
72
|
+
end
|
73
|
+
# Make sure the class descends from ActiveRecord::Base.
|
74
|
+
klass = @class.superclass
|
75
|
+
is_active_record = false
|
76
|
+
while klass
|
77
|
+
if klass == ::ActiveRecord::Base
|
78
|
+
is_active_record = true
|
79
|
+
break
|
80
|
+
end
|
81
|
+
klass = klass.superclass
|
82
|
+
end
|
83
|
+
raise(TypeError, "class must descend from ActiveRecord::Base") unless is_active_record
|
84
|
+
# If ignore_errors is true, make sure we're connected to a MySQL
|
85
|
+
# database. We don't use is_a? because if the MySQL adapter isn't
|
86
|
+
# loaded, referencing its class throws a NameError.
|
87
|
+
if @ignore_errors
|
88
|
+
unless @class.connection.class.to_s ==
|
89
|
+
"ActiveRecord::ConnectionAdapters::MysqlAdapter"
|
90
|
+
raise ArgumentError, "ignore_errors can only be used with a MySQL database"
|
91
|
+
end
|
92
|
+
end
|
93
|
+
# Check for unknown options.
|
94
|
+
unless options.empty?
|
95
|
+
raise(ArgumentError, "unrecognized options: `#{options.join("', `")}'")
|
96
|
+
end
|
97
|
+
# Figure out how much data the database can handle in one query. See
|
98
|
+
# the note above in the ignore_errors compatibility check about using
|
99
|
+
# stringified class names.
|
100
|
+
if @max_sql_length
|
101
|
+
@max_sql_length = @max_sql_length.to_i
|
102
|
+
else
|
103
|
+
case @class.connection.class.to_s
|
104
|
+
when "ActiveRecord::ConnectionAdapters::MysqlAdapter"
|
105
|
+
rows = @class.connection.select_all("SHOW VARIABLES LIKE 'max_allowed_packet'")
|
106
|
+
@max_sql_length = rows.first["Value"].to_i - 512
|
107
|
+
when /\AActiveRecord::ConnectionAdapters::SQLite3?Adapter\Z/
|
108
|
+
@max_sql_length = 1_000_000
|
109
|
+
else
|
110
|
+
@max_sql_length = 16_777_216
|
111
|
+
end
|
112
|
+
end
|
113
|
+
# Fetch column information
|
114
|
+
@columns = {}
|
115
|
+
@class.columns.each {|c| @columns[c.name.to_sym] = c}
|
116
|
+
end
|
117
|
+
|
118
|
+
def klass # :nodoc:
|
119
|
+
@class
|
120
|
+
end
|
121
|
+
|
122
|
+
# Returns the number of records in the table that match the data store's
|
123
|
+
# conditions.
|
124
|
+
def count
|
125
|
+
@class.count(:conditions => @conditions)
|
126
|
+
end
|
127
|
+
|
128
|
+
def each_record(batch_size = nil) # :nodoc:
|
129
|
+
conn = @class.connection
|
130
|
+
column_names = conn.columns(@class.table_name).collect {|c| c.name}
|
131
|
+
|
132
|
+
offset = 0
|
133
|
+
record = {}
|
134
|
+
base_query = "SELECT * FROM #{conn.quote_table_name(@class.table_name)}"
|
135
|
+
@class.send(:add_conditions!, base_query, @conditions) unless @conditions.nil?
|
136
|
+
while true
|
137
|
+
sql = base_query.dup
|
138
|
+
conn.add_limit_offset!(sql, :limit => batch_size, :offset => offset)
|
139
|
+
offset += batch_size
|
140
|
+
rows = conn.select_rows(sql)
|
141
|
+
break if rows.empty?
|
142
|
+
rows.each do |row|
|
143
|
+
record.clear
|
144
|
+
column_names.each_with_index do |column_name, i|
|
145
|
+
column_name = column_name.to_sym
|
146
|
+
record[column_name] = @columns[column_name].type_cast(row[i])
|
147
|
+
end
|
148
|
+
yield record
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
def write_record(record) # :nodoc:
|
154
|
+
conn = @class.connection
|
155
|
+
# If no SQL has been produced yet, start an INSERT statement.
|
156
|
+
@sql_buffer ||= start_insert_sql(record)
|
157
|
+
# Convert the record into a string of quoted values.
|
158
|
+
values = []
|
159
|
+
record.each {|k, v| values << conn.quote(v, @columns[k])}
|
160
|
+
values = "(#{values.join ","}),"
|
161
|
+
# Write the record.
|
162
|
+
if @max_sql_length.nil?
|
163
|
+
# We have no information on the database's maximum allowed packet
|
164
|
+
# size, so it's safest to write the record immediately.
|
165
|
+
@sql_buffer << values
|
166
|
+
finalize
|
167
|
+
elsif @sql_buffer.length + record.length > @max_sql_length
|
168
|
+
# Appending this record to the SQL buffer will exceed the maximum
|
169
|
+
# allowed packet size. Send the buffer to the database and start a
|
170
|
+
# new statement with this record.
|
171
|
+
finalize
|
172
|
+
@sql_buffer = start_insert_sql
|
173
|
+
@sql_buffer << values
|
174
|
+
else
|
175
|
+
# This record will not cause the SQL buffer to exceed the maximum
|
176
|
+
# allowed packet size. Append it to the SQL buffer.
|
177
|
+
@sql_buffer << values
|
178
|
+
end
|
179
|
+
end
|
180
|
+
|
181
|
+
def finalize # :nodoc:
|
182
|
+
if @truncate
|
183
|
+
conn = @class.connection
|
184
|
+
begin
|
185
|
+
conn.execute("TRUNCATE TABLE #{conn.quote_table_name(@class.table_name)}")
|
186
|
+
rescue
|
187
|
+
@class.delete_all
|
188
|
+
end
|
189
|
+
@truncate = false
|
190
|
+
end
|
191
|
+
if @sql_buffer && @sql_buffer[-1,1] == ","
|
192
|
+
@sql_buffer.chop!
|
193
|
+
@class.connection.execute(@sql_buffer)
|
194
|
+
end
|
195
|
+
end
|
196
|
+
|
197
|
+
def reset # :nodoc:
|
198
|
+
@sql_buffer = nil
|
199
|
+
end
|
200
|
+
|
201
|
+
private
|
202
|
+
|
203
|
+
def start_insert_sql(record)
|
204
|
+
"INSERT #{@ignore_errors ? "IGNORE " : " "}INTO " +
|
205
|
+
"#{@class.connection.quote_table_name(@class.table_name)} " +
|
206
|
+
"(#{record.keys.join ","}) VALUES "
|
207
|
+
end
|
208
|
+
end
|
209
|
+
end
|
210
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
module DataTransport
|
2
|
+
class DataStore
|
3
|
+
# Identical to the File data store, except that it is preconfigured to read
|
4
|
+
# and write CSV files.
|
5
|
+
class CSVFile < File
|
6
|
+
|
7
|
+
# Accepts the same options as the File data store, except that the
|
8
|
+
# following options have different defaults:
|
9
|
+
#
|
10
|
+
# delimiter:: ","
|
11
|
+
# enclosure:: "\""
|
12
|
+
# escape:: :double
|
13
|
+
#
|
14
|
+
# These defaults describe the CSV format.
|
15
|
+
def initialize(options = {})
|
16
|
+
super({
|
17
|
+
:delimiter => ",",
|
18
|
+
:enclosure => "\"",
|
19
|
+
:escape => :double
|
20
|
+
}.merge(options))
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,186 @@
|
|
1
|
+
module DataTransport
|
2
|
+
class DataStore
|
3
|
+
# Data store that reads and writes records in a flat text file.
|
4
|
+
#
|
5
|
+
# Although this class can read and write CSV files, you should use the
|
6
|
+
# CSVFile data store for that instead of this one.
|
7
|
+
class File < DataStore
|
8
|
+
attr_reader :mode # :nodoc:
|
9
|
+
|
10
|
+
# Accepts the following options:
|
11
|
+
#
|
12
|
+
# header:: If true, the file has a header row that contains the names
|
13
|
+
# of each field. Default is false.
|
14
|
+
# delimiter:: String that separates individual fields in a row. Default
|
15
|
+
# is "\t".
|
16
|
+
# enclosure:: String that encloses individual fields. For example, if
|
17
|
+
# this is set to "\"", fields will be enclosed in double
|
18
|
+
# quotes. Default is nil (no enclosure).
|
19
|
+
# escape:: Escape sequence for occurrences of the enclosure string in
|
20
|
+
# field values. Set this to the special value :double if
|
21
|
+
# enclosure characters are escaped by doubling them (like in
|
22
|
+
# CSV and SQL). Default is nil.
|
23
|
+
# path:: Path to the file.
|
24
|
+
# null:: String that represents fields whose value is nil (but not
|
25
|
+
# blank). Default is "".
|
26
|
+
# keys:: Array of field names. Not necessary for files with a header
|
27
|
+
# row. Default for files without a header row is fieldXX,
|
28
|
+
# where XX is numbered sequentially starting from 00.
|
29
|
+
def initialize(options = {})
|
30
|
+
super()
|
31
|
+
# Extract options.
|
32
|
+
@header = options.delete(:header)
|
33
|
+
@delimiter = options.delete(:delimiter) || "\t"
|
34
|
+
@enclosure = options.delete(:enclosure)
|
35
|
+
@escape = options.delete(:escape)
|
36
|
+
@path = options.delete(:path)
|
37
|
+
@null = options.delete(:null) || ""
|
38
|
+
@keys = options.delete(:keys)
|
39
|
+
# Validate options.
|
40
|
+
raise(ArgumentError, "missing required option `path'") if @path.nil?
|
41
|
+
if @escape && @enclosure.nil?
|
42
|
+
raise(ArgumentError, "`escape' cannot be used without `enclosure'")
|
43
|
+
end
|
44
|
+
unless options.empty?
|
45
|
+
raise(ArgumentError, "unrecognized options: `#{options.join("', `")}'")
|
46
|
+
end
|
47
|
+
# Handle the special :double escape sequence.
|
48
|
+
@escape = @enclosure if @escape == :double
|
49
|
+
# Create an enclosure placeholder, which is used to avoid clobbering
|
50
|
+
# escaped enclosure characters during parsing.
|
51
|
+
if @escape
|
52
|
+
if @enclosure == 0.chr
|
53
|
+
safe_ch = 1.chr
|
54
|
+
else
|
55
|
+
safe_ch = 0.chr
|
56
|
+
end
|
57
|
+
@placeholder = "#{safe_ch}__ENCLOSURE_PLACEHOLDER__#{safe_ch}"
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
# Returns the number of lines in the file (not counting the header, if
|
62
|
+
# there is one).
|
63
|
+
def count
|
64
|
+
return @count if @count
|
65
|
+
self.mode = :input
|
66
|
+
line_count = 0
|
67
|
+
rewind_and_restore do
|
68
|
+
io.readline if @header
|
69
|
+
until io.eof?
|
70
|
+
io.gets
|
71
|
+
line_count += 1
|
72
|
+
end
|
73
|
+
end
|
74
|
+
@count = line_count
|
75
|
+
end
|
76
|
+
|
77
|
+
def each_record(batch_size = nil) # :nodoc:
|
78
|
+
self.mode = :input
|
79
|
+
io.rewind
|
80
|
+
io.readline if @header
|
81
|
+
until io.eof?
|
82
|
+
line = io.gets || break
|
83
|
+
line.chomp!
|
84
|
+
values = values_from_s(line)
|
85
|
+
if keys.length != values.length
|
86
|
+
raise RuntimeError, "wrong number of fields (#{values.length} for #{keys.length})"
|
87
|
+
end
|
88
|
+
record = {}
|
89
|
+
keys.length.times {|i| record[keys[i]] = values[i]}
|
90
|
+
yield record
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
def write_record(record) # :nodoc:
|
95
|
+
self.mode = :output
|
96
|
+
# If no key order was ever specified, make one up.
|
97
|
+
@keys ||= record.keys.sort {|a,b| a.to_s <=> b.to_s}
|
98
|
+
# Write the header if this is the first record.
|
99
|
+
if @header && io.pos == 0
|
100
|
+
io.puts(values_to_s(keys))
|
101
|
+
end
|
102
|
+
# Write the values in a predictable order.
|
103
|
+
values = keys.collect do |k|
|
104
|
+
record[k].nil?? @null : record[k]
|
105
|
+
end
|
106
|
+
io.puts(values_to_s(values))
|
107
|
+
end
|
108
|
+
|
109
|
+
private
|
110
|
+
|
111
|
+
def values_to_s(values)
|
112
|
+
if @escape
|
113
|
+
values = values.collect do |v|
|
114
|
+
@enclosure + v.to_s.gsub(/#{@enclosure}/, @escape + @enclosure) + @enclosure
|
115
|
+
end
|
116
|
+
elsif @enclosure
|
117
|
+
values = values.collect {|v| @enclosure + v.to_s + @enclosure}
|
118
|
+
end
|
119
|
+
values.join(@delimiter)
|
120
|
+
end
|
121
|
+
|
122
|
+
def values_from_s(str)
|
123
|
+
if @escape
|
124
|
+
str = str.gsub(/#{@escape}#{@enclosure}/, @placeholder)
|
125
|
+
values = str.split(/#{@enclosure + @delimiter + @enclosure}/)
|
126
|
+
values.first.sub!(/^#{@enclosure}/, "")
|
127
|
+
values.last.sub!(/#{@enclosure}$/, "")
|
128
|
+
values.each do |v|
|
129
|
+
v.gsub!(/#{@placeholder}/, @enclosure)
|
130
|
+
end
|
131
|
+
elsif @enclosure
|
132
|
+
values = str.split(/#{@enclosure + @delimiter + @enclosure}/)
|
133
|
+
values.first.sub!(/^#{@enclosure}/, "")
|
134
|
+
values.last.sub!(/#{@enclosure}$/, "")
|
135
|
+
else
|
136
|
+
values = str.split(/#{@delimiter}/)
|
137
|
+
end
|
138
|
+
values
|
139
|
+
end
|
140
|
+
|
141
|
+
def mode=(new_mode)
|
142
|
+
if !@mode.nil? && @mode != new_mode
|
143
|
+
raise RuntimeError, "can't switch mode from #{@mode} to #{new_mode}"
|
144
|
+
end
|
145
|
+
unless [:input, :output].include?(new_mode)
|
146
|
+
raise ArgumentError, "unknown mode `#{new_mode}'"
|
147
|
+
end
|
148
|
+
@mode = new_mode
|
149
|
+
end
|
150
|
+
|
151
|
+
def io
|
152
|
+
return @io if @io
|
153
|
+
if mode == :output
|
154
|
+
@io = ::File.open(@path, "w")
|
155
|
+
@io.rewind
|
156
|
+
@io
|
157
|
+
else
|
158
|
+
@io = ::File.open(@path, "r")
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
def keys
|
163
|
+
return @keys if @keys
|
164
|
+
return [] if mode == :output
|
165
|
+
line = rewind_and_restore { io.readline }
|
166
|
+
line.chomp!
|
167
|
+
fields = values_from_s(line)
|
168
|
+
if @header
|
169
|
+
@keys = fields.collect! {|hdr| hdr.downcase.to_sym}
|
170
|
+
else
|
171
|
+
@keys = (0..(fields.length - 1)).to_a.collect! do |i|
|
172
|
+
sprintf("field%02d", i).to_sym
|
173
|
+
end
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
def rewind_and_restore
|
178
|
+
pos = io.pos
|
179
|
+
io.rewind
|
180
|
+
result = yield
|
181
|
+
io.seek(pos)
|
182
|
+
result
|
183
|
+
end
|
184
|
+
end
|
185
|
+
end
|
186
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
require "data_transport/data_store/file"
|
2
|
+
require "data_transport/data_store/csv_file"
|
3
|
+
require "data_transport/data_store/active_record"
|
4
|
+
|
5
|
+
module DataTransport
|
6
|
+
class DataStore # :nodoc:
|
7
|
+
def count
|
8
|
+
raise NotImplementedError
|
9
|
+
end
|
10
|
+
|
11
|
+
def each_record(batch_size = nil)
|
12
|
+
raise NotImplementedError
|
13
|
+
end
|
14
|
+
|
15
|
+
def write_record(record)
|
16
|
+
raise NotImplementedError
|
17
|
+
end
|
18
|
+
|
19
|
+
def finalize
|
20
|
+
# Do nothing by default.
|
21
|
+
end
|
22
|
+
|
23
|
+
def reset
|
24
|
+
# Do nothing by default.
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module DataTransport
|
2
|
+
module Record # :nodoc:
|
3
|
+
class Destination # :nodoc:
|
4
|
+
attr_reader :record
|
5
|
+
|
6
|
+
def initialize
|
7
|
+
@record = {}
|
8
|
+
end
|
9
|
+
|
10
|
+
def reset!
|
11
|
+
@record.clear
|
12
|
+
end
|
13
|
+
|
14
|
+
def method_missing(name, *args)
|
15
|
+
name_s = name.to_s
|
16
|
+
if name_s[-1,1] == "="
|
17
|
+
unless args.length == 1
|
18
|
+
raise ArgumentError, "wrong number of arguments (#{args.length} for 1)"
|
19
|
+
end
|
20
|
+
name_s.chop!
|
21
|
+
@record[name_s.to_sym] = args.first
|
22
|
+
else
|
23
|
+
super
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module DataTransport
|
2
|
+
module Record # :nodoc:
|
3
|
+
class Source # :nodoc:
|
4
|
+
def record=(record)
|
5
|
+
@record = record
|
6
|
+
end
|
7
|
+
|
8
|
+
def id
|
9
|
+
method_missing :id
|
10
|
+
end
|
11
|
+
|
12
|
+
def method_missing(name, *args)
|
13
|
+
if @record.has_key?(name)
|
14
|
+
unless args.empty?
|
15
|
+
raise ArgumentError, "wrong number of arguments (#{args.length} for 0)"
|
16
|
+
end
|
17
|
+
@record[name]
|
18
|
+
else
|
19
|
+
super
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
require "data_transport/data_store"
|
2
|
+
require "data_transport/record/source"
|
3
|
+
require "data_transport/record/destination"
|
4
|
+
|
5
|
+
module DataTransport
|
6
|
+
DEFAULT_BATCH_SIZE = 100_000 # :nodoc:
|
7
|
+
|
8
|
+
# Reads records from an input data source, processes them with the supplied
|
9
|
+
# block, and writes them to an output data source. Accepts the following
|
10
|
+
# options:
|
11
|
+
#
|
12
|
+
# batch_size:: Records are read from the input in batches. This option sets
|
13
|
+
# the number of records in a single batch. Default is 1000.
|
14
|
+
#
|
15
|
+
# The block is passed two objects that represent the source and destination
|
16
|
+
# record. These objects have methods that reflect the attributes of the
|
17
|
+
# records. The following example reads the +name+ and +price+ attributes from
|
18
|
+
# input records, downcases the name, multiplies the price by 100, and writes
|
19
|
+
# them to the output:
|
20
|
+
#
|
21
|
+
# # input = DataTransport::DataSource:: ...
|
22
|
+
# # output = DataTransport::DataSource:: ...
|
23
|
+
#
|
24
|
+
# DataTransport.map(input, output) do |src, dst|
|
25
|
+
# dst.name = src.name.downcase
|
26
|
+
# dst.price = (src.price * 100).to_i
|
27
|
+
# end
|
28
|
+
#
|
29
|
+
# The destination doesn't necessarily have to have the same attributes as the
|
30
|
+
# source (or even the same number of attributes). The transformations that
|
31
|
+
# can be accomplished are limited only by what you can do in a block of Ruby.
|
32
|
+
def self.map(input, output, options = {}, &block)
|
33
|
+
# Extract options.
|
34
|
+
batch_size = options.delete(:batch_size) || DEFAULT_BATCH_SIZE
|
35
|
+
raise(TypeError, "batch size must be an integer") unless batch_size.is_a?(Integer)
|
36
|
+
raise(RangeError, "batch size must be greater than zero") if batch_size < 1
|
37
|
+
unless options.empty?
|
38
|
+
raise(ArgumentError, "unrecognized options: `#{options.keys.join("', `")}'")
|
39
|
+
end
|
40
|
+
# Run the transport.
|
41
|
+
output.reset
|
42
|
+
source = DataTransport::Record::Source.new
|
43
|
+
destination = DataTransport::Record::Destination.new
|
44
|
+
input.each_record(batch_size) do |record|
|
45
|
+
source.record = record
|
46
|
+
destination.reset!
|
47
|
+
yield source, destination
|
48
|
+
output.write_record(destination.record)
|
49
|
+
end
|
50
|
+
output.finalize
|
51
|
+
end
|
52
|
+
end
|
metadata
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: data_transport
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 0
|
7
|
+
- 3
|
8
|
+
- 3
|
9
|
+
version: 0.3.3
|
10
|
+
platform: ruby
|
11
|
+
authors:
|
12
|
+
- Dana Contreras
|
13
|
+
autorequire:
|
14
|
+
bindir: bin
|
15
|
+
cert_chain: []
|
16
|
+
|
17
|
+
date: 2010-03-27 00:00:00 -04:00
|
18
|
+
default_executable:
|
19
|
+
dependencies: []
|
20
|
+
|
21
|
+
description:
|
22
|
+
email:
|
23
|
+
executables: []
|
24
|
+
|
25
|
+
extensions: []
|
26
|
+
|
27
|
+
extra_rdoc_files: []
|
28
|
+
|
29
|
+
files:
|
30
|
+
- lib/data_transport.rb
|
31
|
+
- lib/data_transport/data_store.rb
|
32
|
+
- lib/data_transport/data_store/active_record.rb
|
33
|
+
- lib/data_transport/data_store/csv_file.rb
|
34
|
+
- lib/data_transport/data_store/file.rb
|
35
|
+
- lib/data_transport/record/destination.rb
|
36
|
+
- lib/data_transport/record/source.rb
|
37
|
+
has_rdoc: true
|
38
|
+
homepage: http://github.com/DanaDanger/data_transport
|
39
|
+
licenses: []
|
40
|
+
|
41
|
+
post_install_message:
|
42
|
+
rdoc_options: []
|
43
|
+
|
44
|
+
require_paths:
|
45
|
+
- lib
|
46
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
47
|
+
requirements:
|
48
|
+
- - ">="
|
49
|
+
- !ruby/object:Gem::Version
|
50
|
+
segments:
|
51
|
+
- 0
|
52
|
+
version: "0"
|
53
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
54
|
+
requirements:
|
55
|
+
- - ">="
|
56
|
+
- !ruby/object:Gem::Version
|
57
|
+
segments:
|
58
|
+
- 0
|
59
|
+
version: "0"
|
60
|
+
requirements: []
|
61
|
+
|
62
|
+
rubyforge_project:
|
63
|
+
rubygems_version: 1.3.6
|
64
|
+
signing_key:
|
65
|
+
specification_version: 3
|
66
|
+
summary: A gem for importing and exporting large quantities of data.
|
67
|
+
test_files: []
|
68
|
+
|