sql2avro 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Makefile CHANGED
@@ -1,4 +1,4 @@
1
- sql2avro-0.1.1.gem: sql2avro.gemspec
1
+ sql2avro-*.gem: sql2avro.gemspec
2
2
  bundle exec gem build $<
3
3
 
4
4
  vendor/avro-tools-1.7.4.jar:
@@ -143,7 +143,7 @@ class MySql < DbInterface
143
143
  cmd = %{
144
144
  mysql \\
145
145
  --batch \\
146
- --execute="#{sql}" \\
146
+ --execute="SET NAMES 'utf8'; #{sql}" \\
147
147
  --host #{db_host} \\
148
148
  --user #{username} \\
149
149
  --password=#{password} \\
data/lib/sql2avro.rb CHANGED
@@ -27,7 +27,7 @@ module Sql2Avro
27
27
  # table is the table to pull from.
28
28
  #
29
29
  # min_id specifies the value of the id column from which to start.
30
- def Sql2Avro.avroize(database_config, table, min_id)
30
+ def Sql2Avro.avroize(database_config, table, min_id, max_rows_per_batch=nil)
31
31
  raise "Database interface not specified." if !database_config.has_key? 'adapter'
32
32
  raise "Database interface not supported: #{database_config['adapter']}" if database_config['adapter'] != 'mysql'
33
33
 
@@ -35,19 +35,24 @@ module Sql2Avro
35
35
 
36
36
  schema = Yajl::Encoder.encode(interface.schema(table))
37
37
  max_id = interface.max_id(table)
38
+ max_id_this_batch = if max_rows_per_batch.nil?
39
+ max_id
40
+ else
41
+ [max_id, min_id + max_rows_per_batch].min
42
+ end
38
43
 
39
44
  date, time, zone = Time.now.utc.to_s.split
40
- filename = "#{table}.#{date}T#{time}Z.#{min_id}.#{max_id}.avro"
45
+ filename = "#{table}.#{date}T#{time}Z.#{min_id}.#{max_id_this_batch}.avro"
41
46
 
42
47
  retval = {
43
- max_id: max_id,
48
+ max_id: max_id_this_batch,
44
49
  path: filename
45
50
  }
46
51
 
47
52
  begin
48
53
  json_file = "#{filename}.json"
49
54
  File.open(json_file, 'w') do |f|
50
- interface.data(table, min_id, max_id).each do |datum|
55
+ interface.data(table, min_id, max_id_this_batch).each do |datum|
51
56
  Yajl::Encoder.encode(datum, f)
52
57
  f.write "\n"
53
58
  end
@@ -57,8 +62,8 @@ module Sql2Avro
57
62
  `#{cmd}`
58
63
 
59
64
  `rm #{json_file}`
60
- rescue
61
- retval[:error] = $!.to_s
65
+ rescue Exception => e
66
+ retval[:error] = "#{e}\n\n#{e.backtrace}"
62
67
  end
63
68
 
64
69
  retval
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sql2avro
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.2.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -13,7 +13,7 @@ date: 2013-06-21 00:00:00.000000000Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: yajl-ruby
16
- requirement: &70268699282980 !ruby/object:Gem::Requirement
16
+ requirement: &70241121702160 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,7 +21,7 @@ dependencies:
21
21
  version: '0'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *70268699282980
24
+ version_requirements: *70241121702160
25
25
  description: sql2avro extracts data from a specified SQL database table and transforms
26
26
  it into an Avro file with a schema based on the database table's schema. The intended
27
27
  use case is to incrementally load data out of an SQL database and into HDFS for
@@ -52,7 +52,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
52
52
  version: '0'
53
53
  segments:
54
54
  - 0
55
- hash: -933336779340994961
55
+ hash: -4019534732048256908
56
56
  required_rubygems_version: !ruby/object:Gem::Requirement
57
57
  none: false
58
58
  requirements:
@@ -61,7 +61,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
61
61
  version: '0'
62
62
  segments:
63
63
  - 0
64
- hash: -933336779340994961
64
+ hash: -4019534732048256908
65
65
  requirements: []
66
66
  rubyforge_project:
67
67
  rubygems_version: 1.8.10