avro_utils 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/avro_utils.gemspec +1 -1
- data/lib/utils/avro_utils.rb +64 -32
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: af0e71b643b4c4299c0f2da4540c2877047fc5e2
|
4
|
+
data.tar.gz: 78f6e118f3795b92d8e6079d176308d34aa2e8e5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 36536604fa62db13e89d6df4ffd08876556718431fb22334b4ac1f9eeebec21b79dffb0d06b88ecae4b6f3a6619fc9d05cdc4da44b26f2959a1709c38c14309d
|
7
|
+
data.tar.gz: b94fb042627c71eb4dbee94ea8297775de0f082e9c328253f694f4f1efa85214a879fe63548c254610ad545b1f282e881c82496f0e85e61ec75cbf25aed85440
|
data/avro_utils.gemspec
CHANGED
data/lib/utils/avro_utils.rb
CHANGED
@@ -2,53 +2,85 @@ class AvroUtils
|
|
2
2
|
|
3
3
|
class << self
|
4
4
|
def avro_schema_hash(hash_data, record_type_name)
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
5
|
+
process_data do
|
6
|
+
result = {
|
7
|
+
type: 'record',
|
8
|
+
name: record_type_name,
|
9
|
+
fields: []
|
10
|
+
}
|
11
|
+
|
12
|
+
hash_data.each do |key, value|
|
13
|
+
if value.is_a? Hash
|
14
|
+
result[:fields] << { name: key, type: avro_schema_hash(value, key.to_s.singularize.camelize) }
|
15
|
+
elsif value.is_a? Array
|
16
|
+
result[:fields] << { name: key, type: { type: 'array', items: RUBY_AVRO_TYPE_MAPPING[value[0].class.name.to_sym] } }
|
17
|
+
else
|
18
|
+
result[:fields] << { name: key, type: RUBY_AVRO_TYPE_MAPPING[value.class.name.to_sym] }
|
19
|
+
end
|
18
20
|
end
|
21
|
+
|
22
|
+
result
|
19
23
|
end
|
20
|
-
|
21
|
-
result
|
22
|
-
rescue StandardError => error
|
23
|
-
puts("Error: #{error.message}\n#{error.backtrace.join("\n")}")
|
24
|
-
raise InvalidDataException.new(error.message)
|
25
24
|
end
|
26
25
|
|
27
26
|
def avro_schema(hash_data, record_type_name)
|
28
27
|
Avro::Schema.parse(avro_schema_hash(hash_data, record_type_name).to_json)
|
29
28
|
end
|
30
29
|
|
31
|
-
def json_to_avro(json_data, record_type_name)
|
32
|
-
|
30
|
+
def json_to_avro(json_data, record_type_name, filename = nil)
|
31
|
+
process_data do
|
32
|
+
buffer = (filename.nil?)? StringIO.new : File.new(filename, 'wb')
|
33
|
+
hash_data = hash_with_string_keys(json_data)
|
34
|
+
|
35
|
+
schema = avro_schema(hash_data, record_type_name)
|
33
36
|
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
hash_data = JSON.parse(json_data)
|
37
|
+
file_writer = Avro::DataFile::Writer.new(buffer, Avro::IO::DatumWriter.new(schema), schema)
|
38
|
+
file_writer << hash_data
|
39
|
+
file_writer.flush
|
38
40
|
|
39
|
-
|
41
|
+
buffer.rewind
|
42
|
+
buffer.close if buffer.is_a? File
|
43
|
+
buffer
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def bulk_json_to_avro(collection, record_type_name, filename = nil)
|
48
|
+
process_data do
|
49
|
+
buffer = (filename.nil?)? StringIO.new : File.new(filename, 'wb')
|
50
|
+
|
51
|
+
schema = avro_schema(hash_with_string_keys(collection.first), record_type_name)
|
52
|
+
file_writer = Avro::DataFile::Writer.new(buffer, Avro::IO::DatumWriter.new(schema), schema)
|
53
|
+
|
54
|
+
collection.each do |json_data|
|
55
|
+
hash_data = hash_with_string_keys(json_data)
|
56
|
+
file_writer << hash_data
|
57
|
+
end
|
40
58
|
|
41
|
-
|
42
|
-
file_writer = Avro::DataFile::Writer.new(buffer, datum_writer, schema)
|
43
|
-
file_writer << hash_data
|
44
|
-
file_writer.flush
|
59
|
+
file_writer.flush
|
45
60
|
|
46
|
-
|
47
|
-
|
61
|
+
buffer.rewind
|
62
|
+
buffer.close if buffer.is_a? File
|
63
|
+
buffer
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
# Avro assumes that all the hashes use strings for keys. It does not accept Indifferent Hash.
|
68
|
+
# So the conversion bellow ensures that the hash data uses strings for keys.
|
69
|
+
def hash_with_string_keys(json_data)
|
70
|
+
process_data do
|
71
|
+
json_data = json_data.to_json if json_data.is_a? Hash
|
72
|
+
hash_data = JSON.parse(json_data)
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
private
|
77
|
+
|
78
|
+
def process_data
|
79
|
+
yield
|
48
80
|
rescue StandardError => error
|
49
81
|
puts("Error: #{error.message}\n#{error.backtrace.join("\n")}")
|
50
82
|
raise InvalidDataException.new(error.message)
|
51
83
|
end
|
52
|
-
|
84
|
+
|
53
85
|
end
|
54
86
|
end
|