sql2avro 0.1.1 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
data/Makefile CHANGED
@@ -1,4 +1,4 @@
1
- sql2avro-0.1.1.gem: sql2avro.gemspec
1
+ sql2avro-*.gem: sql2avro.gemspec
2
2
  bundle exec gem build $<
3
3
 
4
4
  vendor/avro-tools-1.7.4.jar:
@@ -143,7 +143,7 @@ class MySql < DbInterface
143
143
  cmd = %{
144
144
  mysql \\
145
145
  --batch \\
146
- --execute="#{sql}" \\
146
+ --execute="SET NAMES 'utf8'; #{sql}" \\
147
147
  --host #{db_host} \\
148
148
  --user #{username} \\
149
149
  --password=#{password} \\
data/lib/sql2avro.rb CHANGED
@@ -27,7 +27,7 @@ module Sql2Avro
27
27
  # table is the table to pull from.
28
28
  #
29
29
  # min_id specifies the value of the id column from which to start.
30
- def Sql2Avro.avroize(database_config, table, min_id)
30
+ def Sql2Avro.avroize(database_config, table, min_id, max_rows_per_batch=nil)
31
31
  raise "Database interface not specified." if !database_config.has_key? 'adapter'
32
32
  raise "Database interface not supported: #{database_config['adapter']}" if database_config['adapter'] != 'mysql'
33
33
 
@@ -35,19 +35,24 @@ module Sql2Avro
35
35
 
36
36
  schema = Yajl::Encoder.encode(interface.schema(table))
37
37
  max_id = interface.max_id(table)
38
+ max_id_this_batch = if max_rows_per_batch.nil?
39
+ max_id
40
+ else
41
+ [max_id, min_id + max_rows_per_batch].min
42
+ end
38
43
 
39
44
  date, time, zone = Time.now.utc.to_s.split
40
- filename = "#{table}.#{date}T#{time}Z.#{min_id}.#{max_id}.avro"
45
+ filename = "#{table}.#{date}T#{time}Z.#{min_id}.#{max_id_this_batch}.avro"
41
46
 
42
47
  retval = {
43
- max_id: max_id,
48
+ max_id: max_id_this_batch,
44
49
  path: filename
45
50
  }
46
51
 
47
52
  begin
48
53
  json_file = "#{filename}.json"
49
54
  File.open(json_file, 'w') do |f|
50
- interface.data(table, min_id, max_id).each do |datum|
55
+ interface.data(table, min_id, max_id_this_batch).each do |datum|
51
56
  Yajl::Encoder.encode(datum, f)
52
57
  f.write "\n"
53
58
  end
@@ -57,8 +62,8 @@ module Sql2Avro
57
62
  `#{cmd}`
58
63
 
59
64
  `rm #{json_file}`
60
- rescue
61
- retval[:error] = $!.to_s
65
+ rescue Exception => e
66
+ retval[:error] = "#{e}\n\n#{e.backtrace}"
62
67
  end
63
68
 
64
69
  retval
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sql2avro
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.2.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -13,7 +13,7 @@ date: 2013-06-21 00:00:00.000000000Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: yajl-ruby
16
- requirement: &70268699282980 !ruby/object:Gem::Requirement
16
+ requirement: &70241121702160 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,7 +21,7 @@ dependencies:
21
21
  version: '0'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *70268699282980
24
+ version_requirements: *70241121702160
25
25
  description: sql2avro extracts data from a specified SQL database table and transforms
26
26
  it into an Avro file with a schema based on the database table's schema. The intended
27
27
  use case is to incrementally load data out of an SQL database and into HDFS for
@@ -52,7 +52,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
52
52
  version: '0'
53
53
  segments:
54
54
  - 0
55
- hash: -933336779340994961
55
+ hash: -4019534732048256908
56
56
  required_rubygems_version: !ruby/object:Gem::Requirement
57
57
  none: false
58
58
  requirements:
@@ -61,7 +61,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
61
61
  version: '0'
62
62
  segments:
63
63
  - 0
64
- hash: -933336779340994961
64
+ hash: -4019534732048256908
65
65
  requirements: []
66
66
  rubyforge_project:
67
67
  rubygems_version: 1.8.10