avro_utils 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 138c585168f97b5d9786dfcfda80f7b5f25caa61
4
+ data.tar.gz: 1011ffd72b61545233da0a573608a2692d8d194c
5
+ SHA512:
6
+ metadata.gz: ccd802a2f71f5a48a723dd457ed210662e6dae1a80a44d1500593cb909e5481981f2b40a6da95707f528fbadb24c4e668e80e4d0e753a5e2292da7424db30989
7
+ data.tar.gz: a5c4af7e7f56a3694fce9f78b0cdbd8780b726ce81f837ad0d6c3b256c6bd6a5d355f8c36c48786fab532ad0819b82850a84cf17a39ec1eadf3c5410571c59a3
@@ -0,0 +1,20 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = 'avro_utils'
3
+ s.version = '0.1.0'
4
+ s.summary = "A gem to convert different data format to avro"
5
+ s.description = "A gem to convert different data format to avro"
6
+ s.authors = ['Linh Chau']
7
+ s.email = 'chauhonglinh@gmail.com'
8
+ s.files = [
9
+ './avro_utils.gemspec', 'lib/avro_utils.rb',
10
+ 'lib/utils/avro_utils.rb', 'lib/exceptions/invalid_data_exception.rb'
11
+ ]
12
+ s.homepage = 'https://github.com/linhchauatl/avro_utils'
13
+ s.license = 'MIT'
14
+ s.add_runtime_dependency 'json'
15
+ s.add_runtime_dependency 'json-schema'
16
+ s.add_runtime_dependency 'avro'
17
+ s.add_runtime_dependency 'logging'
18
+ s.add_runtime_dependency 'activesupport'
19
+ s.add_development_dependency 'rspec', '~> 3.3'
20
+ end
data/lib/avro_utils.rb ADDED
@@ -0,0 +1,29 @@
1
+ require 'active_support/all'
2
+ require 'json'
3
+ require 'json-schema'
4
+ require 'avro'
5
+
6
+ # avro types
7
+ # string: unicode character sequence
8
+ # bytes: sequence of 8-bit unsigned bytes
9
+ # int: 32-bit signed integer
10
+ # long: 64-bit signed integer
11
+ # float: single precision (32-bit) IEEE 754 floating-point number
12
+ # double: double precision (64-bit) IEEE 754 floating-point number
13
+ # boolean: a binary value
14
+ # null: no value
15
+
16
+ RUBY_AVRO_TYPE_MAPPING = {
17
+ String: :string,
18
+ Symbol: :string,
19
+ Fixnum: :int,
20
+ Bignum: :long,
21
+ Float: :double,
22
+ TrueClass: :boolean,
23
+ FalseClass: :boolean,
24
+ NilClass: :null
25
+
26
+ }
27
+
28
+ Dir.glob('./lib/exceptions/*.rb').sort.each { |file| puts file; require file }
29
+ Dir.glob('./lib/utils/*.rb').sort.each { |file| puts file; require file }
@@ -0,0 +1,2 @@
1
+ class InvalidDataException < RuntimeError
2
+ end
@@ -0,0 +1,54 @@
1
+ class AvroUtils
2
+
3
+ class << self
4
+ def avro_schema_hash(hash_data, record_type_name)
5
+ result = {
6
+ type: 'record',
7
+ name: record_type_name,
8
+ fields: []
9
+ }
10
+
11
+ hash_data.each do |key, value|
12
+ if value.is_a? Hash
13
+ result[:fields] << { name: key, type: avro_schema_hash(value, key.to_s.singularize.camelize) }
14
+ elsif value.is_a? Array
15
+ result[:fields] << { name: key, type: { type: 'array', items: RUBY_AVRO_TYPE_MAPPING[value[0].class.name.to_sym] } }
16
+ else
17
+ result[:fields] << { name: key, type: RUBY_AVRO_TYPE_MAPPING[value.class.name.to_sym] }
18
+ end
19
+ end
20
+
21
+ result
22
+ rescue StandardError => error
23
+ puts("Error: #{error.message}\n#{error.backtrace.join("\n")}")
24
+ raise InvalidDataException.new(error.message)
25
+ end
26
+
27
+ def avro_schema(hash_data, record_type_name)
28
+ Avro::Schema.parse(avro_schema_hash(hash_data, record_type_name).to_json)
29
+ end
30
+
31
+ def json_to_avro(json_data, record_type_name)
32
+ buffer = StringIO.new
33
+
34
+ # Avro assumes that all the hashes use strings for keys. It does not accept Indifferent Hash.
35
+ # So the conversion bellow ensures that the hash data uses strings for keys.
36
+ json_data = json_data.to_json if json_data.is_a? Hash
37
+ hash_data = JSON.parse(json_data)
38
+
39
+ schema = avro_schema(hash_data, record_type_name)
40
+
41
+ datum_writer = Avro::IO::DatumWriter.new(schema)
42
+ file_writer = Avro::DataFile::Writer.new(buffer, datum_writer, schema)
43
+ file_writer << hash_data
44
+ file_writer.flush
45
+
46
+ buffer.rewind
47
+ buffer
48
+ rescue StandardError => error
49
+ puts("Error: #{error.message}\n#{error.backtrace.join("\n")}")
50
+ raise InvalidDataException.new(error.message)
51
+ end
52
+
53
+ end
54
+ end
metadata ADDED
@@ -0,0 +1,131 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: avro_utils
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Linh Chau
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-09-24 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: json
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: json-schema
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: avro
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: logging
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: activesupport
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: rspec
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '3.3'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '3.3'
97
+ description: A gem to convert different data format to avro
98
+ email: chauhonglinh@gmail.com
99
+ executables: []
100
+ extensions: []
101
+ extra_rdoc_files: []
102
+ files:
103
+ - "./avro_utils.gemspec"
104
+ - lib/avro_utils.rb
105
+ - lib/exceptions/invalid_data_exception.rb
106
+ - lib/utils/avro_utils.rb
107
+ homepage: https://github.com/linhchauatl/avro_utils
108
+ licenses:
109
+ - MIT
110
+ metadata: {}
111
+ post_install_message:
112
+ rdoc_options: []
113
+ require_paths:
114
+ - lib
115
+ required_ruby_version: !ruby/object:Gem::Requirement
116
+ requirements:
117
+ - - ">="
118
+ - !ruby/object:Gem::Version
119
+ version: '0'
120
+ required_rubygems_version: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - ">="
123
+ - !ruby/object:Gem::Version
124
+ version: '0'
125
+ requirements: []
126
+ rubyforge_project:
127
+ rubygems_version: 2.4.8
128
+ signing_key:
129
+ specification_version: 4
130
+ summary: A gem to convert different data format to avro
131
+ test_files: []