avro_utils 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 138c585168f97b5d9786dfcfda80f7b5f25caa61
4
- data.tar.gz: 1011ffd72b61545233da0a573608a2692d8d194c
3
+ metadata.gz: af0e71b643b4c4299c0f2da4540c2877047fc5e2
4
+ data.tar.gz: 78f6e118f3795b92d8e6079d176308d34aa2e8e5
5
5
  SHA512:
6
- metadata.gz: ccd802a2f71f5a48a723dd457ed210662e6dae1a80a44d1500593cb909e5481981f2b40a6da95707f528fbadb24c4e668e80e4d0e753a5e2292da7424db30989
7
- data.tar.gz: a5c4af7e7f56a3694fce9f78b0cdbd8780b726ce81f837ad0d6c3b256c6bd6a5d355f8c36c48786fab532ad0819b82850a84cf17a39ec1eadf3c5410571c59a3
6
+ metadata.gz: 36536604fa62db13e89d6df4ffd08876556718431fb22334b4ac1f9eeebec21b79dffb0d06b88ecae4b6f3a6619fc9d05cdc4da44b26f2959a1709c38c14309d
7
+ data.tar.gz: b94fb042627c71eb4dbee94ea8297775de0f082e9c328253f694f4f1efa85214a879fe63548c254610ad545b1f282e881c82496f0e85e61ec75cbf25aed85440
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'avro_utils'
3
- s.version = '0.1.0'
3
+ s.version = '0.2.0'
4
4
  s.summary = "A gem to convert different data format to avro"
5
5
  s.description = "A gem to convert different data format to avro"
6
6
  s.authors = ['Linh Chau']
@@ -2,53 +2,85 @@ class AvroUtils
2
2
 
3
3
  class << self
4
4
  def avro_schema_hash(hash_data, record_type_name)
5
- result = {
6
- type: 'record',
7
- name: record_type_name,
8
- fields: []
9
- }
10
-
11
- hash_data.each do |key, value|
12
- if value.is_a? Hash
13
- result[:fields] << { name: key, type: avro_schema_hash(value, key.to_s.singularize.camelize) }
14
- elsif value.is_a? Array
15
- result[:fields] << { name: key, type: { type: 'array', items: RUBY_AVRO_TYPE_MAPPING[value[0].class.name.to_sym] } }
16
- else
17
- result[:fields] << { name: key, type: RUBY_AVRO_TYPE_MAPPING[value.class.name.to_sym] }
5
+ process_data do
6
+ result = {
7
+ type: 'record',
8
+ name: record_type_name,
9
+ fields: []
10
+ }
11
+
12
+ hash_data.each do |key, value|
13
+ if value.is_a? Hash
14
+ result[:fields] << { name: key, type: avro_schema_hash(value, key.to_s.singularize.camelize) }
15
+ elsif value.is_a? Array
16
+ result[:fields] << { name: key, type: { type: 'array', items: RUBY_AVRO_TYPE_MAPPING[value[0].class.name.to_sym] } }
17
+ else
18
+ result[:fields] << { name: key, type: RUBY_AVRO_TYPE_MAPPING[value.class.name.to_sym] }
19
+ end
18
20
  end
21
+
22
+ result
19
23
  end
20
-
21
- result
22
- rescue StandardError => error
23
- puts("Error: #{error.message}\n#{error.backtrace.join("\n")}")
24
- raise InvalidDataException.new(error.message)
25
24
  end
26
25
 
27
26
  def avro_schema(hash_data, record_type_name)
28
27
  Avro::Schema.parse(avro_schema_hash(hash_data, record_type_name).to_json)
29
28
  end
30
29
 
31
- def json_to_avro(json_data, record_type_name)
32
- buffer = StringIO.new
30
+ def json_to_avro(json_data, record_type_name, filename = nil)
31
+ process_data do
32
+ buffer = (filename.nil?)? StringIO.new : File.new(filename, 'wb')
33
+ hash_data = hash_with_string_keys(json_data)
34
+
35
+ schema = avro_schema(hash_data, record_type_name)
33
36
 
34
- # Avro assumes that all the hashes use strings for keys. It does not accept Indifferent Hash.
35
- # So the conversion bellow ensures that the hash data uses strings for keys.
36
- json_data = json_data.to_json if json_data.is_a? Hash
37
- hash_data = JSON.parse(json_data)
37
+ file_writer = Avro::DataFile::Writer.new(buffer, Avro::IO::DatumWriter.new(schema), schema)
38
+ file_writer << hash_data
39
+ file_writer.flush
38
40
 
39
- schema = avro_schema(hash_data, record_type_name)
41
+ buffer.rewind
42
+ buffer.close if buffer.is_a? File
43
+ buffer
44
+ end
45
+ end
46
+
47
+ def bulk_json_to_avro(collection, record_type_name, filename = nil)
48
+ process_data do
49
+ buffer = (filename.nil?)? StringIO.new : File.new(filename, 'wb')
50
+
51
+ schema = avro_schema(hash_with_string_keys(collection.first), record_type_name)
52
+ file_writer = Avro::DataFile::Writer.new(buffer, Avro::IO::DatumWriter.new(schema), schema)
53
+
54
+ collection.each do |json_data|
55
+ hash_data = hash_with_string_keys(json_data)
56
+ file_writer << hash_data
57
+ end
40
58
 
41
- datum_writer = Avro::IO::DatumWriter.new(schema)
42
- file_writer = Avro::DataFile::Writer.new(buffer, datum_writer, schema)
43
- file_writer << hash_data
44
- file_writer.flush
59
+ file_writer.flush
45
60
 
46
- buffer.rewind
47
- buffer
61
+ buffer.rewind
62
+ buffer.close if buffer.is_a? File
63
+ buffer
64
+ end
65
+ end
66
+
67
+ # Avro assumes that all the hashes use strings for keys. It does not accept Indifferent Hash.
68
+ # So the conversion bellow ensures that the hash data uses strings for keys.
69
+ def hash_with_string_keys(json_data)
70
+ process_data do
71
+ json_data = json_data.to_json if json_data.is_a? Hash
72
+ hash_data = JSON.parse(json_data)
73
+ end
74
+ end
75
+
76
+ private
77
+
78
+ def process_data
79
+ yield
48
80
  rescue StandardError => error
49
81
  puts("Error: #{error.message}\n#{error.backtrace.join("\n")}")
50
82
  raise InvalidDataException.new(error.message)
51
83
  end
52
-
84
+
53
85
  end
54
86
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: avro_utils
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Linh Chau