dataMetaAvro 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 499130a7de4920319df3b2432a8ec85a6cf46a42
4
+ data.tar.gz: c01912259141df8d2aba49eb97e8da67b73e8b58
5
+ SHA512:
6
+ metadata.gz: f103e5ef77050cc11c5cd7ae6037197cfa412589a80b4bd1ef84c688c61367624906cdc89e79750e5c71ab74362bebf829fd68d335466f575c6e72f0ded69794
7
+ data.tar.gz: adc11fd55bbba2761ac8b6f860f89da4726d6273e468252fdd2572373c583606cea1710ff8532bee08c0de630a39326c2435feb77f4ccfb9e9e2f4215d1ac93d
data/.yardopts ADDED
@@ -0,0 +1 @@
1
+ --title "DataMeta support for Avro" -r README.md --charset UTF-8 lib/**/* - README.md
data/History.md ADDED
@@ -0,0 +1,5 @@
1
+ # `legaDomAvro` Release history:
2
+
3
+ ## `1.0.0` released `2017-02-14 Tue`
4
+ * 1 major enhancement:
5
+ * Initial release
data/PostInstall.txt ADDED
@@ -0,0 +1,2 @@
1
+ No special steps
2
+
data/README.md ADDED
@@ -0,0 +1,46 @@
1
+ # DataMetaAvro
2
+
3
+ DataMeta [Avro](http://avro.apache.org/docs/1.7.4) utilities, such as DataMetaDOM source to
4
+ [Avro Schema](http://avro.apache.org/docs/1.7.4/spec.html) converter.
5
+
6
+ References to this gem's:
7
+
8
+ * [Source](https://github.com/eBayDataMeta/DataMeta-gems)
9
+
10
+
11
+ ## DESCRIPTION:
12
+
13
+ See the [DataMeta Project](https://github.com/eBayDataMeta/DataMeta).
14
+
15
+ ## FEATURES/PROBLEMS:
16
+
17
+ Avro support:
18
+
19
+ * ver `1.8.1` or newer
20
+
21
+ Since Avro supports limited subset of the DataMetaDOM features, DataMeta's features that are not supported by Avro
22
+ cause an error during export.
23
+
24
+ ## SYNOPSIS:
25
+
26
+ ### Avro schema generator
27
+
28
+ * Runnables:
29
+ * <tt>dataMetaAvroSchemaGen.rb</tt> - generate [Avro](http://avro.apache.org/docs/current) [Schemas](http://avro.apache.org/docs/current/spec.html),
30
+ one file per class
31
+
32
+ Usage:
33
+
34
+ dataMetaAvroSchemaGen.rb <DataMetaDOM source> <Avro Schemas target dir>
35
+
36
+ ## REQUIREMENTS:
37
+
38
+ * No special requirements
39
+
40
+ ## INSTALL:
41
+
42
+ gem install dataMetaAvro
43
+
44
+ ## LICENSE:
45
+
46
+ [Apache v 2.0](https://github.com/eBayDataMeta/DataMeta/blob/master/LICENSE.md)
data/Rakefile ADDED
@@ -0,0 +1,12 @@
1
+ %w(yard rake/testtask).each{ |r| require r}
2
+
3
+ Rake::TestTask.new do |t|
4
+ t.libs << 'test'
5
+ end
6
+
7
+ desc 'Regen RDocs'
8
+ task :default => :docs
9
+
10
+ YARD::Rake::YardocTask.new('docs') {|r|
11
+ r.stats_options = ['--list-undoc']
12
+ }
@@ -0,0 +1,22 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ %w( dataMetaDom dataMetaAvro ).each(&method(:require))
4
+
5
+ # sample arguments from the gem's root
6
+ # dataMetaAvroSchemaGen.rb ../../../test/dmDom/showCase.dmDom ../../../../src/test/avsc
7
+
8
+ @source, @target = ARGV
9
+ DataMetaAvro::helpAvroSchemaGen __FILE__ unless @source && @target
10
+ DataMetaAvro::helpAvroSchemaGen(__FILE__, "DataMetaDom source #{@source} is not a file") unless File.file?(@source)
11
+ DataMetaAvro::helpAvroSchemaGen(__FILE__, "Schema destination directory #{@target} is not a dir") unless File.directory?(@target)
12
+
13
+ puts "Generating #{@source} into #{@target}"
14
+
15
+ @parser = DataMetaDom::Model.new
16
+ begin
17
+ @parser.parse(@source)
18
+ DataMetaAvro::genSchema(@parser, @target)
19
+ rescue Exception => e
20
+ puts "ERROR #{e.message}; #{@parser.diagn}"
21
+ puts e.backtrace.inspect
22
+ end
@@ -0,0 +1,147 @@
1
+ $:.unshift(File.dirname(__FILE__)) unless $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
2
+
3
+ require 'erb'
4
+ require 'fileutils'
5
+ require 'dataMetaDom'
6
+ require 'dataMetaDom/help'
7
+ require 'dataMetaDom/pojo'
8
+ require 'dataMetaDom/record'
9
+ require 'dataMetaDom/util'
10
+ require 'ostruct'
11
+
12
+ =begin rdoc
13
+ DataMetaDOM and {Avro}[http://avro.apache.org/docs/current] {Schemas}[http://avro.apache.org/docs/current/spec.html].
14
+
15
+ For command line details either check the new method's source or the README, the usage section.
16
+ =end
17
+
18
+ module DataMetaAvro
19
+ # Current version
20
+ VERSION = '1.0.0'
21
+
22
+ # The root of the gem.
23
+ GEM_ROOT = File.realpath(File.dirname(__FILE__) + '/../')
24
+
25
+ # Location of templates.
26
+ TMPL_ROOT = File.join(GEM_ROOT, 'tmpl')
27
+
28
+ =begin rdoc
29
+ Mapping from a DataMeta DOM type to a matching renderer of Avro schema JSON.
30
+ The lambda expect whole DataMetaDom::Field instance, must return the whole
31
+ specification that you would put under the <tt>"type":</tt> JSON tag, such as:
32
+ "int"
33
+ or, for a type with a size:
34
+ { "type": "fixed", "name": "theFieldName", "size": 16}
35
+
36
+ Note that wrapping this type into optional specification, i.e. unioned with <tt>"null"</tt> is done by calling
37
+ the avroType method.
38
+ =end
39
+ AVRO_TYPES = {
40
+ DataMetaDom::BOOL => lambda{|dt| %q<"boolean">},
41
+ DataMetaDom::INT => lambda{ |dt|
42
+ len = dt.length
43
+ case
44
+ when len <= 4; %q<"int">
45
+ when len <= 8; %q<"long">
46
+ else; raise "Invalid integer length #{len}"
47
+ end
48
+ },
49
+ DataMetaDom::FLOAT => lambda{|dt|
50
+ len = dt.length
51
+ case
52
+ when len <= 4; %q<"float">
53
+ when len <= 8; %q<"double">
54
+ else; raise "Invalid float length #{len}"
55
+ end
56
+ },
57
+ DataMetaDom::RAW => lambda{|dt| %q<"bytes">},
58
+ DataMetaDom::STRING => lambda{|dt| %q<"string">},
59
+ =begin
60
+ Unlike DataMeta DOM, Avro does not support temporal types such as date, time and datetime,
61
+ they have a ticket filed for it but no idea when it is going to be implemented.
62
+ They use {integral types}[http://avro.apache.org/docs/current/spec.html#Time+%28millisecond+precision%29] for
63
+ everything temporal.
64
+ =end
65
+ DataMetaDom::DATETIME => lambda{|dt| %q<"long">},
66
+ # No support for these in this release:
67
+ #NUMERIC => lambda{|t| "BigDecimal"}
68
+ }
69
+
70
+ =begin rdoc
71
+ Converts DataMeta DOM type to Avro schema type.
72
+ =end
73
+ def avroType(dataMetaType)
74
+ renderer = AVRO_TYPES[dataMetaType.type]
75
+ raise "Unsupported type #{dataMetaType}" unless renderer
76
+ renderer.call(dataMetaType)
77
+ end
78
+
79
+ # Wraps required/optional in proper enclosure
80
+ def wrapReqOptional(field, baseType)
81
+ field.isRequired ? baseType : %Q^[#{baseType}, "null"]^
82
+ end
83
+
84
+ =begin rdoc
85
+ Generates an {Avro Schema}[http://avro.apache.org/docs/current/spec.html] for the given model's record.
86
+
87
+ It makes impression that some parameters are not used, but it is not so: they are used by the ERB template
88
+ as the part of the method's binding.
89
+
90
+ The parameters nameSpace and the base can be derived from rec, but since they are evaluated previously by calling
91
+ assertNamespace, can just as well reuse them.
92
+
93
+ * Params:
94
+ * model - DataMetaDom::Model
95
+ * outFile - output file name
96
+ * rec - DataMetaDom::Record
97
+ * nameSpace - the namespace for the record
98
+ * base - base name of the record
99
+ =end
100
+ def genRecordJson(model, outFile, rec, nameSpace, base)
101
+ vars = OpenStruct.new # for template's local variables. ERB does not make them visible to the binding
102
+ IO.write(outFile, "#{ERB.new(IO.read("#{TMPL_ROOT}/dataClass.avsc.erb"), 0, '-').result(binding)}", {:mode => 'wb'})
103
+ end
104
+
105
+ =begin rdoc
106
+ Splits the full name of a class into the namespace and the base, returns an array of
107
+ the namespace (empty string if there is no namespace on the name) and the base name.
108
+
109
+ Examples:
110
+ * <tt>'BaseNameAlone'</tt> -> <tt>['', 'BaseNameAlone']</tt>
111
+ * <tt>'one.package.another.pack.FinallyTheName'</tt> -> <tt>['one.package.another.pack', 'FinallyTheName']</tt>
112
+ =end
113
+ def assertNamespace(fullName)
114
+ ns, base = DataMetaDom::splitNameSpace(fullName)
115
+ [DataMetaDom.validNs?(ns, base) ? ns : '', base]
116
+ end
117
+
118
+ =begin rdoc
119
+ Generates the {Avro Schema}[http://avro.apache.org/docs/current/spec.html], one +avsc+ file per a record.
120
+ =end
121
+ def genSchema(model, outRoot)
122
+ model.records.values.each { |rec| # loop through all the records in the model
123
+ nameSpace, base = assertNamespace(rec.name)
124
+ FileUtils.mkdir_p outRoot # write json files named as one.package.another.package.ClassName.json in one dir
125
+ outFile = File.join(outRoot, "#{rec.name}.avsc")
126
+ case
127
+ when rec.kind_of?(DataMetaDom::Record)
128
+ genRecordJson model, outFile, rec, nameSpace, base
129
+ else # since we are cycling through records, should never get here
130
+ raise "Unsupported Entity: #{rec.inspect}"
131
+ end
132
+ }
133
+ end
134
+
135
+ # Shortcut to help for the Hadoop Writables generator.
136
+ def helpAvroSchemaGen(file, errorText=nil)
137
+ DataMetaDom::help(file, "DataMeta DOM Avro Schema Generation ver #{VERSION}",
138
+ '<DataMeta DOM source> <Avro Schemas target dir>', errorText)
139
+ end
140
+
141
+ def assertMapKeyType(fld, type)
142
+ raise ArgumentError, %<Field "#{fld.name}": Avro supports only strings as map keys, "#{
143
+ type}" is not supported as a map key by Avro> unless type == DataMetaDom::STRING
144
+ end
145
+ module_function :helpAvroSchemaGen, :genSchema, :assertNamespace, :genRecordJson, :avroType, :assertMapKeyType,
146
+ :wrapReqOptional
147
+ end
data/test/sample.dmDom ADDED
@@ -0,0 +1,61 @@
1
+ # DataMetaDOM with all field types accounted for testing.
2
+ namespace org.ebay.datameta.examples.conv.avro
3
+
4
+ ver 1.0.0
5
+
6
+ enum BaseColor
7
+ Red, Green, Blue
8
+ end
9
+
10
+ # Avro supports only stings as map keys
11
+ mapping Timings string[3] datetime
12
+ "now" => DateTime.now,
13
+ 'then' => DateTime.parse("2012-09-29T23:45:59Z")
14
+ end
15
+
16
+ mapping Depths string[16] float[3]
17
+ "shallow" => 0.1,
18
+ "medium" => 353.232,
19
+ "deep" => 787.0
20
+ end
21
+
22
+ record AllTypes
23
+ +int[4] id
24
+ +int[8] count
25
+ +bool isIt
26
+ # this is a very special case in Avro, "fixed", see http://avro.apache.org/docs/current/spec.html#Fixed
27
+ +char[5] code
28
+ -float[4] width
29
+ +float[8] height
30
+ +string anyLength
31
+ +string[16] name
32
+ +set{string} aliases
33
+ -deque{datetime} accesses
34
+ +list{int[4]} quants
35
+ # Avro supports only stings as map keys
36
+ +map{string[32], int[4]} strToInt
37
+ +Depths depths
38
+ +Timings lengths
39
+ +BaseColor color
40
+ identity id
41
+ end
42
+
43
+ # Check how optional fields are rendered
44
+ record Optionals
45
+ +int[4] id
46
+ -int[8] count
47
+ -bool isIt
48
+ -char[5] code
49
+ -float[4] width
50
+ +float[8] height
51
+ -string anyLength
52
+ +string[16] name
53
+ +set{string} aliases
54
+ -deque{datetime} accesses
55
+ -list{int[4]} quants
56
+ -map{string[32], int[4]} strToInt
57
+ -Depths depths
58
+ -Timings lengths
59
+ -BaseColor color
60
+ identity id
61
+ end
@@ -0,0 +1,34 @@
1
+ # keep this underscore naming in the test subdir, it's easier to append files names to test
2
+ require './test/test_helper.rb'
3
+ require 'avro'
4
+
5
+ # Unit test cases for the DataMetaAvro
6
+ # See for instance:
7
+ # - test_full
8
+ class TestNewGem < Test::Unit::TestCase
9
+
10
+ L = Logger.new('dataMetaAvroTests.log', 0, 10_000_000)
11
+ L.level = Logger::DEBUG
12
+ L.datetime_format = '%Y-%m-%d %H:%M:%S'
13
+
14
+ GEN_TARGET = '.tmp'
15
+
16
+ # an empty stub for now
17
+ def setup; end
18
+
19
+ # Smell-check the parsing
20
+ def test_parsing
21
+ model = DataMetaDom::Model.new
22
+ model.parse(File.join(File.dirname(__FILE__), 'sample.dmDom'), options={autoNsVer: true})
23
+ L.info(%<Model: #{model}>)
24
+ FileUtils.rmtree(GEN_TARGET) if File.exist?(GEN_TARGET)
25
+ FileUtils.mkpath GEN_TARGET
26
+ DataMetaAvro.genSchema(model, GEN_TARGET)
27
+ Dir.entries(GEN_TARGET).select{|e| e.end_with?('.avsc')}.each{ |e|
28
+ L.info("Verifying schema #{e}")
29
+ schema = IO.read(File.join(GEN_TARGET, e))
30
+ projection = Avro::Schema.parse(schema) # if schema is invalid, this will cause an error
31
+ L.info(projection.inspect)
32
+ }
33
+ end
34
+ end
@@ -0,0 +1,8 @@
1
+ ## keep this underscore naming in the test subdir, it's easier to append files names to test
2
+
3
+ require 'test/unit'
4
+ require 'dataMetaDom'
5
+ require 'fileutils'
6
+
7
+ # this is expected to run from the project root, normally by the rake file
8
+ require './lib/dataMetaAvro'
@@ -0,0 +1,43 @@
1
+ <%#
2
+ Template for Avro data only protocol for one class.
3
+
4
+ -%>
5
+ {
6
+ "type": "record", "namespace":"<%= nameSpace %>", "name" : "<%= base %>",
7
+ "fields": [
8
+ <%
9
+
10
+ vars.flds = rec.fields
11
+ delim = nil; rec.fields.each_key { |fldId| vars.fld=rec.fields[fldId]
12
+ vars.dataType = vars.fld.dataType
13
+ vars.dt = vars.dataType.type
14
+ vars.enum = model.enums[vars.dt]
15
+ %><%=delim ? delim : ' '%><%
16
+ if vars.enum && vars.enum.is_a?(DataMetaDom::Enum)
17
+ vars.ns, vars.base = DataMetaDom.splitNameSpace(vars.dt)
18
+ values = vars.enum.keys.map{|k| vars.enum[k]} # sort by ordinals to preserve the order
19
+ %>
20
+ {"name": "<%=vars.fld.name%>", "type": <%=wrapReqOptional(vars.fld, %^{ "name": "#{vars.base}", "type": "enum", "symbols" : [#{ values.map{|v| %<"#{v}">}.join(', ')}]}^)%>}
21
+ <%
22
+ elsif vars.enum && vars.enum.is_a?(DataMetaDom::Mappings)
23
+ assertMapKeyType(vars.fld, vars.enum.fromT.type)
24
+ vars.ns, vars.base = DataMetaDom.splitNameSpace(vars.dt)
25
+ %>
26
+ {"name": "<%=vars.fld.name%>", "type": <%=wrapReqOptional(vars.fld, %^{"type": "map", "values": #{avroType(vars.enum.toT)}}^)%>}<%
27
+ elsif vars.dt == DataMetaDom::CHAR
28
+ %>
29
+ {"name": "<%=vars.fld.name%>", "type":<%=wrapReqOptional(vars.fld, %^{"name": "#{vars.fld.name}", "type": "fixed", "size": #{vars.dataType.length}}^)%>}<%
30
+ elsif vars.fld.map?
31
+ assertMapKeyType(vars.fld, vars.dt)
32
+ %>
33
+ {"name": "<%=vars.fld.name%>", "type": <%=wrapReqOptional(vars.fld, %^{"type": "map", "values": #{avroType(vars.fld.trgType)}}^)%>} <%
34
+ elsif vars.fld.aggr%>
35
+ {"name": "<%=vars.fld.name%>", "type": <%=wrapReqOptional(vars.fld, %^{"type": "array", "items": #{avroType(vars.dataType)}}^)%>} <%
36
+ else %>
37
+ {"name" : "<%= vars.fld.name %>", "type": <%= wrapReqOptional(vars.fld, avroType(vars.dataType)) %>} <%
38
+ end
39
+ delim =","
40
+ }
41
+ %>
42
+ ]
43
+ }
metadata ADDED
@@ -0,0 +1,77 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: dataMetaAvro
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Michael Bergens
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2017-02-14 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: dataMetaDom
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.0'
20
+ - - ">="
21
+ - !ruby/object:Gem::Version
22
+ version: 1.0.1
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - "~>"
28
+ - !ruby/object:Gem::Version
29
+ version: '1.0'
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: 1.0.1
33
+ description: DataMeta DOM to Avro schema generator
34
+ email: michael.bergens@gmail.com
35
+ executables:
36
+ - dataMetaAvroSchemaGen.rb
37
+ extensions: []
38
+ extra_rdoc_files: []
39
+ files:
40
+ - ".yardopts"
41
+ - History.md
42
+ - PostInstall.txt
43
+ - README.md
44
+ - Rakefile
45
+ - bin/dataMetaAvroSchemaGen.rb
46
+ - lib/dataMetaAvro.rb
47
+ - test/sample.dmDom
48
+ - test/test_dataMetaAvro.rb
49
+ - test/test_helper.rb
50
+ - tmpl/dataClass.avsc.erb
51
+ homepage: https://github.com/eBayDataMeta
52
+ licenses:
53
+ - Apache-2.0
54
+ metadata: {}
55
+ post_install_message:
56
+ rdoc_options: []
57
+ require_paths:
58
+ - lib
59
+ required_ruby_version: !ruby/object:Gem::Requirement
60
+ requirements:
61
+ - - ">="
62
+ - !ruby/object:Gem::Version
63
+ version: 2.1.0
64
+ required_rubygems_version: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ requirements:
70
+ - No special requirements
71
+ rubyforge_project:
72
+ rubygems_version: 2.5.1
73
+ signing_key:
74
+ specification_version: 4
75
+ summary: DataMeta Avro
76
+ test_files:
77
+ - test/test_dataMetaAvro.rb