dataMetaAvro 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 499130a7de4920319df3b2432a8ec85a6cf46a42
4
+ data.tar.gz: c01912259141df8d2aba49eb97e8da67b73e8b58
5
+ SHA512:
6
+ metadata.gz: f103e5ef77050cc11c5cd7ae6037197cfa412589a80b4bd1ef84c688c61367624906cdc89e79750e5c71ab74362bebf829fd68d335466f575c6e72f0ded69794
7
+ data.tar.gz: adc11fd55bbba2761ac8b6f860f89da4726d6273e468252fdd2572373c583606cea1710ff8532bee08c0de630a39326c2435feb77f4ccfb9e9e2f4215d1ac93d
data/.yardopts ADDED
@@ -0,0 +1 @@
1
+ --title "DataMeta support for Avro" -r README.md --charset UTF-8 lib/**/* - README.md
data/History.md ADDED
@@ -0,0 +1,5 @@
1
+ # `legaDomAvro` Release history:
2
+
3
+ ## `1.0.0` released `2017-02-14 Tue`
4
+ * 1 major enhancement:
5
+ * Initial release
data/PostInstall.txt ADDED
@@ -0,0 +1,2 @@
1
+ No special steps
2
+
data/README.md ADDED
@@ -0,0 +1,46 @@
1
+ # DataMetaAvro
2
+
3
+ DataMeta [Avro](http://avro.apache.org/docs/1.7.4) utilities, such as DataMetaDOM source to
4
+ [Avro Schema](http://avro.apache.org/docs/1.7.4/spec.html) converter.
5
+
6
+ References to this gem's:
7
+
8
+ * [Source](https://github.com/eBayDataMeta/DataMeta-gems)
9
+
10
+
11
+ ## DESCRIPTION:
12
+
13
+ See the [DataMeta Project](https://github.com/eBayDataMeta/DataMeta).
14
+
15
+ ## FEATURES/PROBLEMS:
16
+
17
+ Avro support:
18
+
19
+ * ver `1.8.1` or newer
20
+
21
+ Since Avro supports limited subset of the DataMetaDOM features, DataMeta's features that are not supported by Avro
22
+ cause an error during export.
23
+
24
+ ## SYNOPSIS:
25
+
26
+ ### Avro schema generator
27
+
28
+ * Runnables:
29
+ * <tt>dataMetaAvroSchemaGen.rb</tt> - generate [Avro](http://avro.apache.org/docs/current) [Schemas](http://avro.apache.org/docs/current/spec.html),
30
+ one file per class
31
+
32
+ Usage:
33
+
34
+ dataMetaAvroSchemaGen.rb <DataMetaDOM source> <Avro Schemas target dir>
35
+
36
+ ## REQUIREMENTS:
37
+
38
+ * No special requirements
39
+
40
+ ## INSTALL:
41
+
42
+ gem install dataMetaAvro
43
+
44
+ ## LICENSE:
45
+
46
+ [Apache v 2.0](https://github.com/eBayDataMeta/DataMeta/blob/master/LICENSE.md)
data/Rakefile ADDED
@@ -0,0 +1,12 @@
1
+ %w(yard rake/testtask).each{ |r| require r}
2
+
3
+ Rake::TestTask.new do |t|
4
+ t.libs << 'test'
5
+ end
6
+
7
+ desc 'Regen RDocs'
8
+ task :default => :docs
9
+
10
+ YARD::Rake::YardocTask.new('docs') {|r|
11
+ r.stats_options = ['--list-undoc']
12
+ }
@@ -0,0 +1,22 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ %w( dataMetaDom dataMetaAvro ).each(&method(:require))
4
+
5
+ # sample arguments from the gem's root
6
+ # dataMetaAvroSchemaGen.rb ../../../test/dmDom/showCase.dmDom ../../../../src/test/avsc
7
+
8
+ @source, @target = ARGV
9
+ DataMetaAvro::helpAvroSchemaGen __FILE__ unless @source && @target
10
+ DataMetaAvro::helpAvroSchemaGen(__FILE__, "DataMetaDom source #{@source} is not a file") unless File.file?(@source)
11
+ DataMetaAvro::helpAvroSchemaGen(__FILE__, "Schema destination directory #{@target} is not a dir") unless File.directory?(@target)
12
+
13
+ puts "Generating #{@source} into #{@target}"
14
+
15
+ @parser = DataMetaDom::Model.new
16
+ begin
17
+ @parser.parse(@source)
18
+ DataMetaAvro::genSchema(@parser, @target)
19
+ rescue Exception => e
20
+ puts "ERROR #{e.message}; #{@parser.diagn}"
21
+ puts e.backtrace.inspect
22
+ end
@@ -0,0 +1,147 @@
1
+ $:.unshift(File.dirname(__FILE__)) unless $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
2
+
3
+ require 'erb'
4
+ require 'fileutils'
5
+ require 'dataMetaDom'
6
+ require 'dataMetaDom/help'
7
+ require 'dataMetaDom/pojo'
8
+ require 'dataMetaDom/record'
9
+ require 'dataMetaDom/util'
10
+ require 'ostruct'
11
+
12
+ =begin rdoc
13
+ DataMetaDOM and {Avro}[http://avro.apache.org/docs/current] {Schemas}[http://avro.apache.org/docs/current/spec.html].
14
+
15
+ For command line details either check the new method's source or the README, the usage section.
16
+ =end
17
+
18
+ module DataMetaAvro
19
+ # Current version
20
+ VERSION = '1.0.0'
21
+
22
+ # The root of the gem.
23
+ GEM_ROOT = File.realpath(File.dirname(__FILE__) + '/../')
24
+
25
+ # Location of templates.
26
+ TMPL_ROOT = File.join(GEM_ROOT, 'tmpl')
27
+
28
+ =begin rdoc
29
+ Mapping from a DataMeta DOM type to a matching renderer of Avro schema JSON.
30
+ The lambda expect whole DataMetaDom::Field instance, must return the whole
31
+ specification that you would put under the <tt>"type":</tt> JSON tag, such as:
32
+ "int"
33
+ or, for a type with a size:
34
+ { "type": "fixed", "name": "theFieldName", "size": 16}
35
+
36
+ Note that wrapping this type into optional specification, i.e. unioned with <tt>"null"</tt> is done by calling
37
+ the avroType method.
38
+ =end
39
+ AVRO_TYPES = {
40
+ DataMetaDom::BOOL => lambda{|dt| %q<"boolean">},
41
+ DataMetaDom::INT => lambda{ |dt|
42
+ len = dt.length
43
+ case
44
+ when len <= 4; %q<"int">
45
+ when len <= 8; %q<"long">
46
+ else; raise "Invalid integer length #{len}"
47
+ end
48
+ },
49
+ DataMetaDom::FLOAT => lambda{|dt|
50
+ len = dt.length
51
+ case
52
+ when len <= 4; %q<"float">
53
+ when len <= 8; %q<"double">
54
+ else; raise "Invalid float length #{len}"
55
+ end
56
+ },
57
+ DataMetaDom::RAW => lambda{|dt| %q<"bytes">},
58
+ DataMetaDom::STRING => lambda{|dt| %q<"string">},
59
+ =begin
60
+ Unlike DataMeta DOM, Avro does not support temporal types such as date, time and datetime,
61
+ they have a ticket filed for it but no idea when it is going to be implemented.
62
+ They use {integral types}[http://avro.apache.org/docs/current/spec.html#Time+%28millisecond+precision%29] for
63
+ everything temporal.
64
+ =end
65
+ DataMetaDom::DATETIME => lambda{|dt| %q<"long">},
66
+ # No support for these in this release:
67
+ #NUMERIC => lambda{|t| "BigDecimal"}
68
+ }
69
+
70
+ =begin rdoc
71
+ Converts DataMeta DOM type to Avro schema type.
72
+ =end
73
+ def avroType(dataMetaType)
74
+ renderer = AVRO_TYPES[dataMetaType.type]
75
+ raise "Unsupported type #{dataMetaType}" unless renderer
76
+ renderer.call(dataMetaType)
77
+ end
78
+
79
+ # Wraps required/optional in proper enclosure
80
+ def wrapReqOptional(field, baseType)
81
+ field.isRequired ? baseType : %Q^[#{baseType}, "null"]^
82
+ end
83
+
84
+ =begin rdoc
85
+ Generates an {Avro Schema}[http://avro.apache.org/docs/current/spec.html] for the given model's record.
86
+
87
+ It makes impression that some parameters are not used, but it is not so: they are used by the ERB template
88
+ as the part of the method's binding.
89
+
90
+ The parameters nameSpace and the base can be derived from rec, but since they are evaluated previously by calling
91
+ assertNamespace, can just as well reuse them.
92
+
93
+ * Params:
94
+ * model - DataMetaDom::Model
95
+ * outFile - output file name
96
+ * rec - DataMetaDom::Record
97
+ * nameSpace - the namespace for the record
98
+ * base - base name of the record
99
+ =end
100
+ def genRecordJson(model, outFile, rec, nameSpace, base)
101
+ vars = OpenStruct.new # for template's local variables. ERB does not make them visible to the binding
102
+ IO.write(outFile, "#{ERB.new(IO.read("#{TMPL_ROOT}/dataClass.avsc.erb"), 0, '-').result(binding)}", {:mode => 'wb'})
103
+ end
104
+
105
+ =begin rdoc
106
+ Splits the full name of a class into the namespace and the base, returns an array of
107
+ the namespace (empty string if there is no namespace on the name) and the base name.
108
+
109
+ Examples:
110
+ * <tt>'BaseNameAlone'</tt> -> <tt>['', 'BaseNameAlone']</tt>
111
+ * <tt>'one.package.another.pack.FinallyTheName'</tt> -> <tt>['one.package.another.pack', 'FinallyTheName']</tt>
112
+ =end
113
+ def assertNamespace(fullName)
114
+ ns, base = DataMetaDom::splitNameSpace(fullName)
115
+ [DataMetaDom.validNs?(ns, base) ? ns : '', base]
116
+ end
117
+
118
+ =begin rdoc
119
+ Generates the {Avro Schema}[http://avro.apache.org/docs/current/spec.html], one +avsc+ file per a record.
120
+ =end
121
+ def genSchema(model, outRoot)
122
+ model.records.values.each { |rec| # loop through all the records in the model
123
+ nameSpace, base = assertNamespace(rec.name)
124
+ FileUtils.mkdir_p outRoot # write json files named as one.package.another.package.ClassName.json in one dir
125
+ outFile = File.join(outRoot, "#{rec.name}.avsc")
126
+ case
127
+ when rec.kind_of?(DataMetaDom::Record)
128
+ genRecordJson model, outFile, rec, nameSpace, base
129
+ else # since we are cycling through records, should never get here
130
+ raise "Unsupported Entity: #{rec.inspect}"
131
+ end
132
+ }
133
+ end
134
+
135
+ # Shortcut to help for the Hadoop Writables generator.
136
+ def helpAvroSchemaGen(file, errorText=nil)
137
+ DataMetaDom::help(file, "DataMeta DOM Avro Schema Generation ver #{VERSION}",
138
+ '<DataMeta DOM source> <Avro Schemas target dir>', errorText)
139
+ end
140
+
141
+ def assertMapKeyType(fld, type)
142
+ raise ArgumentError, %<Field "#{fld.name}": Avro supports only strings as map keys, "#{
143
+ type}" is not supported as a map key by Avro> unless type == DataMetaDom::STRING
144
+ end
145
+ module_function :helpAvroSchemaGen, :genSchema, :assertNamespace, :genRecordJson, :avroType, :assertMapKeyType,
146
+ :wrapReqOptional
147
+ end
data/test/sample.dmDom ADDED
@@ -0,0 +1,61 @@
1
+ # DataMetaDOM with all field types accounted for testing.
2
+ namespace org.ebay.datameta.examples.conv.avro
3
+
4
+ ver 1.0.0
5
+
6
+ enum BaseColor
7
+ Red, Green, Blue
8
+ end
9
+
10
+ # Avro supports only stings as map keys
11
+ mapping Timings string[3] datetime
12
+ "now" => DateTime.now,
13
+ 'then' => DateTime.parse("2012-09-29T23:45:59Z")
14
+ end
15
+
16
+ mapping Depths string[16] float[3]
17
+ "shallow" => 0.1,
18
+ "medium" => 353.232,
19
+ "deep" => 787.0
20
+ end
21
+
22
+ record AllTypes
23
+ +int[4] id
24
+ +int[8] count
25
+ +bool isIt
26
+ # this is a very special case in Avro, "fixed", see http://avro.apache.org/docs/current/spec.html#Fixed
27
+ +char[5] code
28
+ -float[4] width
29
+ +float[8] height
30
+ +string anyLength
31
+ +string[16] name
32
+ +set{string} aliases
33
+ -deque{datetime} accesses
34
+ +list{int[4]} quants
35
+ # Avro supports only stings as map keys
36
+ +map{string[32], int[4]} strToInt
37
+ +Depths depths
38
+ +Timings lengths
39
+ +BaseColor color
40
+ identity id
41
+ end
42
+
43
+ # Check how optional fields are rendered
44
+ record Optionals
45
+ +int[4] id
46
+ -int[8] count
47
+ -bool isIt
48
+ -char[5] code
49
+ -float[4] width
50
+ +float[8] height
51
+ -string anyLength
52
+ +string[16] name
53
+ +set{string} aliases
54
+ -deque{datetime} accesses
55
+ -list{int[4]} quants
56
+ -map{string[32], int[4]} strToInt
57
+ -Depths depths
58
+ -Timings lengths
59
+ -BaseColor color
60
+ identity id
61
+ end
@@ -0,0 +1,34 @@
1
+ # keep this underscore naming in the test subdir, it's easier to append files names to test
2
+ require './test/test_helper.rb'
3
+ require 'avro'
4
+
5
+ # Unit test cases for the DataMetaAvro
6
+ # See for instance:
7
+ # - test_full
8
+ class TestNewGem < Test::Unit::TestCase
9
+
10
+ L = Logger.new('dataMetaAvroTests.log', 0, 10_000_000)
11
+ L.level = Logger::DEBUG
12
+ L.datetime_format = '%Y-%m-%d %H:%M:%S'
13
+
14
+ GEN_TARGET = '.tmp'
15
+
16
+ # an empty stub for now
17
+ def setup; end
18
+
19
+ # Smell-check the parsing
20
+ def test_parsing
21
+ model = DataMetaDom::Model.new
22
+ model.parse(File.join(File.dirname(__FILE__), 'sample.dmDom'), options={autoNsVer: true})
23
+ L.info(%<Model: #{model}>)
24
+ FileUtils.rmtree(GEN_TARGET) if File.exist?(GEN_TARGET)
25
+ FileUtils.mkpath GEN_TARGET
26
+ DataMetaAvro.genSchema(model, GEN_TARGET)
27
+ Dir.entries(GEN_TARGET).select{|e| e.end_with?('.avsc')}.each{ |e|
28
+ L.info("Verifying schema #{e}")
29
+ schema = IO.read(File.join(GEN_TARGET, e))
30
+ projection = Avro::Schema.parse(schema) # if schema is invalid, this will cause an error
31
+ L.info(projection.inspect)
32
+ }
33
+ end
34
+ end
@@ -0,0 +1,8 @@
1
+ ## keep this underscore naming in the test subdir, it's easier to append files names to test
2
+
3
+ require 'test/unit'
4
+ require 'dataMetaDom'
5
+ require 'fileutils'
6
+
7
+ # this is expected to run from the project root, normally by the rake file
8
+ require './lib/dataMetaAvro'
@@ -0,0 +1,43 @@
1
+ <%#
2
+ Template for Avro data only protocol for one class.
3
+
4
+ -%>
5
+ {
6
+ "type": "record", "namespace":"<%= nameSpace %>", "name" : "<%= base %>",
7
+ "fields": [
8
+ <%
9
+
10
+ vars.flds = rec.fields
11
+ delim = nil; rec.fields.each_key { |fldId| vars.fld=rec.fields[fldId]
12
+ vars.dataType = vars.fld.dataType
13
+ vars.dt = vars.dataType.type
14
+ vars.enum = model.enums[vars.dt]
15
+ %><%=delim ? delim : ' '%><%
16
+ if vars.enum && vars.enum.is_a?(DataMetaDom::Enum)
17
+ vars.ns, vars.base = DataMetaDom.splitNameSpace(vars.dt)
18
+ values = vars.enum.keys.map{|k| vars.enum[k]} # sort by ordinals to preserve the order
19
+ %>
20
+ {"name": "<%=vars.fld.name%>", "type": <%=wrapReqOptional(vars.fld, %^{ "name": "#{vars.base}", "type": "enum", "symbols" : [#{ values.map{|v| %<"#{v}">}.join(', ')}]}^)%>}
21
+ <%
22
+ elsif vars.enum && vars.enum.is_a?(DataMetaDom::Mappings)
23
+ assertMapKeyType(vars.fld, vars.enum.fromT.type)
24
+ vars.ns, vars.base = DataMetaDom.splitNameSpace(vars.dt)
25
+ %>
26
+ {"name": "<%=vars.fld.name%>", "type": <%=wrapReqOptional(vars.fld, %^{"type": "map", "values": #{avroType(vars.enum.toT)}}^)%>}<%
27
+ elsif vars.dt == DataMetaDom::CHAR
28
+ %>
29
+ {"name": "<%=vars.fld.name%>", "type":<%=wrapReqOptional(vars.fld, %^{"name": "#{vars.fld.name}", "type": "fixed", "size": #{vars.dataType.length}}^)%>}<%
30
+ elsif vars.fld.map?
31
+ assertMapKeyType(vars.fld, vars.dt)
32
+ %>
33
+ {"name": "<%=vars.fld.name%>", "type": <%=wrapReqOptional(vars.fld, %^{"type": "map", "values": #{avroType(vars.fld.trgType)}}^)%>} <%
34
+ elsif vars.fld.aggr%>
35
+ {"name": "<%=vars.fld.name%>", "type": <%=wrapReqOptional(vars.fld, %^{"type": "array", "items": #{avroType(vars.dataType)}}^)%>} <%
36
+ else %>
37
+ {"name" : "<%= vars.fld.name %>", "type": <%= wrapReqOptional(vars.fld, avroType(vars.dataType)) %>} <%
38
+ end
39
+ delim =","
40
+ }
41
+ %>
42
+ ]
43
+ }
metadata ADDED
@@ -0,0 +1,77 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: dataMetaAvro
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Michael Bergens
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2017-02-14 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: dataMetaDom
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.0'
20
+ - - ">="
21
+ - !ruby/object:Gem::Version
22
+ version: 1.0.1
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - "~>"
28
+ - !ruby/object:Gem::Version
29
+ version: '1.0'
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: 1.0.1
33
+ description: DataMeta DOM to Avro schema generator
34
+ email: michael.bergens@gmail.com
35
+ executables:
36
+ - dataMetaAvroSchemaGen.rb
37
+ extensions: []
38
+ extra_rdoc_files: []
39
+ files:
40
+ - ".yardopts"
41
+ - History.md
42
+ - PostInstall.txt
43
+ - README.md
44
+ - Rakefile
45
+ - bin/dataMetaAvroSchemaGen.rb
46
+ - lib/dataMetaAvro.rb
47
+ - test/sample.dmDom
48
+ - test/test_dataMetaAvro.rb
49
+ - test/test_helper.rb
50
+ - tmpl/dataClass.avsc.erb
51
+ homepage: https://github.com/eBayDataMeta
52
+ licenses:
53
+ - Apache-2.0
54
+ metadata: {}
55
+ post_install_message:
56
+ rdoc_options: []
57
+ require_paths:
58
+ - lib
59
+ required_ruby_version: !ruby/object:Gem::Requirement
60
+ requirements:
61
+ - - ">="
62
+ - !ruby/object:Gem::Version
63
+ version: 2.1.0
64
+ required_rubygems_version: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ requirements:
70
+ - No special requirements
71
+ rubyforge_project:
72
+ rubygems_version: 2.5.1
73
+ signing_key:
74
+ specification_version: 4
75
+ summary: DataMeta Avro
76
+ test_files:
77
+ - test/test_dataMetaAvro.rb