avro_utils 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 138c585168f97b5d9786dfcfda80f7b5f25caa61
4
- data.tar.gz: 1011ffd72b61545233da0a573608a2692d8d194c
3
+ metadata.gz: af0e71b643b4c4299c0f2da4540c2877047fc5e2
4
+ data.tar.gz: 78f6e118f3795b92d8e6079d176308d34aa2e8e5
5
5
  SHA512:
6
- metadata.gz: ccd802a2f71f5a48a723dd457ed210662e6dae1a80a44d1500593cb909e5481981f2b40a6da95707f528fbadb24c4e668e80e4d0e753a5e2292da7424db30989
7
- data.tar.gz: a5c4af7e7f56a3694fce9f78b0cdbd8780b726ce81f837ad0d6c3b256c6bd6a5d355f8c36c48786fab532ad0819b82850a84cf17a39ec1eadf3c5410571c59a3
6
+ metadata.gz: 36536604fa62db13e89d6df4ffd08876556718431fb22334b4ac1f9eeebec21b79dffb0d06b88ecae4b6f3a6619fc9d05cdc4da44b26f2959a1709c38c14309d
7
+ data.tar.gz: b94fb042627c71eb4dbee94ea8297775de0f082e9c328253f694f4f1efa85214a879fe63548c254610ad545b1f282e881c82496f0e85e61ec75cbf25aed85440
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'avro_utils'
3
- s.version = '0.1.0'
3
+ s.version = '0.2.0'
4
4
  s.summary = "A gem to convert different data format to avro"
5
5
  s.description = "A gem to convert different data format to avro"
6
6
  s.authors = ['Linh Chau']
@@ -2,53 +2,85 @@ class AvroUtils
2
2
 
3
3
  class << self
4
4
  def avro_schema_hash(hash_data, record_type_name)
5
- result = {
6
- type: 'record',
7
- name: record_type_name,
8
- fields: []
9
- }
10
-
11
- hash_data.each do |key, value|
12
- if value.is_a? Hash
13
- result[:fields] << { name: key, type: avro_schema_hash(value, key.to_s.singularize.camelize) }
14
- elsif value.is_a? Array
15
- result[:fields] << { name: key, type: { type: 'array', items: RUBY_AVRO_TYPE_MAPPING[value[0].class.name.to_sym] } }
16
- else
17
- result[:fields] << { name: key, type: RUBY_AVRO_TYPE_MAPPING[value.class.name.to_sym] }
5
+ process_data do
6
+ result = {
7
+ type: 'record',
8
+ name: record_type_name,
9
+ fields: []
10
+ }
11
+
12
+ hash_data.each do |key, value|
13
+ if value.is_a? Hash
14
+ result[:fields] << { name: key, type: avro_schema_hash(value, key.to_s.singularize.camelize) }
15
+ elsif value.is_a? Array
16
+ result[:fields] << { name: key, type: { type: 'array', items: RUBY_AVRO_TYPE_MAPPING[value[0].class.name.to_sym] } }
17
+ else
18
+ result[:fields] << { name: key, type: RUBY_AVRO_TYPE_MAPPING[value.class.name.to_sym] }
19
+ end
18
20
  end
21
+
22
+ result
19
23
  end
20
-
21
- result
22
- rescue StandardError => error
23
- puts("Error: #{error.message}\n#{error.backtrace.join("\n")}")
24
- raise InvalidDataException.new(error.message)
25
24
  end
26
25
 
27
26
  def avro_schema(hash_data, record_type_name)
28
27
  Avro::Schema.parse(avro_schema_hash(hash_data, record_type_name).to_json)
29
28
  end
30
29
 
31
- def json_to_avro(json_data, record_type_name)
32
- buffer = StringIO.new
30
+ def json_to_avro(json_data, record_type_name, filename = nil)
31
+ process_data do
32
+ buffer = (filename.nil?)? StringIO.new : File.new(filename, 'wb')
33
+ hash_data = hash_with_string_keys(json_data)
34
+
35
+ schema = avro_schema(hash_data, record_type_name)
33
36
 
34
- # Avro assumes that all the hashes use strings for keys. It does not accept Indifferent Hash.
35
- # So the conversion bellow ensures that the hash data uses strings for keys.
36
- json_data = json_data.to_json if json_data.is_a? Hash
37
- hash_data = JSON.parse(json_data)
37
+ file_writer = Avro::DataFile::Writer.new(buffer, Avro::IO::DatumWriter.new(schema), schema)
38
+ file_writer << hash_data
39
+ file_writer.flush
38
40
 
39
- schema = avro_schema(hash_data, record_type_name)
41
+ buffer.rewind
42
+ buffer.close if buffer.is_a? File
43
+ buffer
44
+ end
45
+ end
46
+
47
+ def bulk_json_to_avro(collection, record_type_name, filename = nil)
48
+ process_data do
49
+ buffer = (filename.nil?)? StringIO.new : File.new(filename, 'wb')
50
+
51
+ schema = avro_schema(hash_with_string_keys(collection.first), record_type_name)
52
+ file_writer = Avro::DataFile::Writer.new(buffer, Avro::IO::DatumWriter.new(schema), schema)
53
+
54
+ collection.each do |json_data|
55
+ hash_data = hash_with_string_keys(json_data)
56
+ file_writer << hash_data
57
+ end
40
58
 
41
- datum_writer = Avro::IO::DatumWriter.new(schema)
42
- file_writer = Avro::DataFile::Writer.new(buffer, datum_writer, schema)
43
- file_writer << hash_data
44
- file_writer.flush
59
+ file_writer.flush
45
60
 
46
- buffer.rewind
47
- buffer
61
+ buffer.rewind
62
+ buffer.close if buffer.is_a? File
63
+ buffer
64
+ end
65
+ end
66
+
67
+ # Avro assumes that all the hashes use strings for keys. It does not accept Indifferent Hash.
68
+ # So the conversion bellow ensures that the hash data uses strings for keys.
69
+ def hash_with_string_keys(json_data)
70
+ process_data do
71
+ json_data = json_data.to_json if json_data.is_a? Hash
72
+ hash_data = JSON.parse(json_data)
73
+ end
74
+ end
75
+
76
+ private
77
+
78
+ def process_data
79
+ yield
48
80
  rescue StandardError => error
49
81
  puts("Error: #{error.message}\n#{error.backtrace.join("\n")}")
50
82
  raise InvalidDataException.new(error.message)
51
83
  end
52
-
84
+
53
85
  end
54
86
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: avro_utils
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Linh Chau