dataMetaByteSer 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 4d5201ed98a82b4da2ca29379d7318cb3607e920
4
+ data.tar.gz: edb71acaf79c4111d1a703c9a7044c9a78019fb8
5
+ SHA512:
6
+ metadata.gz: 4d9edb7888e536f006becb9ce5612ef7573798afb5f136e0b36745021caaebc8c1cfad455815e6c1269a25763f45590a0133200ffc3121ababe839791e7cae7d
7
+ data.tar.gz: a5358415079756f869e417d783f069ebdf60aafe46c8fb449dd9014af7ef269ff1f683723f9784f3759b83d79c848aa1ab0ff6303675c1b8fa55f647d2c99639
data/.yardopts ADDED
@@ -0,0 +1 @@
1
+ --title "DataMeta Bytes (de)serialization" -r README.md --charset UTF-8 lib/**/* - README.md
data/History.md ADDED
@@ -0,0 +1,5 @@
1
+ # `dataMetaByteSer` Release history:
2
+
3
+ ## `1.0.0` released `2012-12-17`
4
+ * 1 major enhancement:
5
+ * Initial release
data/PostInstall.txt ADDED
@@ -0,0 +1 @@
1
+ No special steps
data/README.md ADDED
@@ -0,0 +1,35 @@
1
+ # `dataMetaByteSer` gem
2
+
3
+ Byte array (de)serialization generation from [DataMeta DOM](https://github.com/eBayDataMeta/DataMeta-gems) sources.
4
+
5
+ References to this gem's:
6
+
7
+ * [Source](https://github.com/eBayDataMeta/DataMeta-gems)
8
+
9
+
10
+ ## DESCRIPTION:
11
+
12
+ See the [DataMeta Project](https://github.com/eBayDataMeta/DataMeta)
13
+
14
+ ## FEATURES:
15
+
16
+ Generates (de)serializers to/from byte arrays with matching Hadoop writables, performance maximized by storage size
17
+ first and runtime performance second, both aspects are clocked to perform around best in the class.
18
+
19
+ ## SYNOPSIS:
20
+
21
+ To generate Byte Array serializers in Java, including Hadoop Writables for the DataMeta model, run:
22
+
23
+ dataMetaByteSerGen.rb <DataMeta DOM source> <Target Directory>
24
+
25
+ ## REQUIREMENTS:
26
+
27
+ * No special requirements
28
+
29
+ ## INSTALL:
30
+
31
+ gem install dataMetaByteSer
32
+
33
+ ## LICENSE:
34
+
35
+ [Apache v 2.0](https://github.com/eBayDataMeta/DataMeta/blob/master/LICENSE.md)
data/Rakefile ADDED
@@ -0,0 +1,13 @@
1
+ %w(yard rdoc/task rake/testtask fileutils ./lib/dataMetaByteSer).each{ |r| require r}
2
+
3
+ Rake::TestTask.new do |t|
4
+ t.libs << 'test'
5
+ end
6
+
7
+ desc 'Regen RDocs'
8
+ task :default => :docs
9
+
10
+ YARD::Rake::YardocTask.new('docs') {|r|
11
+ r.stats_options = ['--list-undoc']
12
+ }
13
+
@@ -0,0 +1,16 @@
1
+ #!/usr/bin/env ruby
2
+ %w( dataMetaDom dataMetaByteSer ).each(&method(:require))
3
+
4
+ @source, @target = ARGV
5
+ DataMetaByteSer::helpDataMetaBytesSerGen __FILE__ unless @source && @target
6
+ DataMetaByteSer::helpDataMetaBytesSerGen(__FILE__, "DataMeta DOM source #{@source} is not a file") unless File.file?(@source)
7
+ DataMetaByteSer::helpDataMetaBytesSerGen(__FILE__, "Writables destination directory #{@target} is not a dir") unless File.directory?(@target)
8
+
9
+ @parser = DataMetaDom::Model.new
10
+ begin
11
+ @parser.parse(@source)
12
+ DataMetaByteSer::genWritables(@parser, @target)
13
+ rescue Exception => e
14
+ $stderr.puts "ERROR #{e.message}; #{@parser.diagn}"
15
+ $stderr.puts e.backtrace.inspect
16
+ end
@@ -0,0 +1,387 @@
1
+ $:.unshift(File.dirname(__FILE__)) unless $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
2
+
3
+ require 'fileutils'
4
+ require 'dataMetaDom'
5
+ require 'dataMetaDom/util'
6
+ require 'dataMetaDom/python'
7
+ require 'dataMetaByteSer/util'
8
+
9
+ module DataMetaByteSer
10
+ # (De)Serialization for Python
11
+ module Py
12
+ include DataMetaDom, DataMetaDom::PythonLexer, DataMetaByteSer
13
+ =begin rdoc
14
+ Builds a class name for a Writable.
15
+ =end
16
+ def writableClassName(baseName); "#{baseName}_Writable" end
17
+ =begin rdoc
18
+ Builds a class name for a InOutable.
19
+ =end
20
+ def inOutablePy(arg)
21
+ klassName = case
22
+ when arg.kind_of?(String)
23
+ arg
24
+ else
25
+ _, s = DataMetaDom.splitNameSpace(arg.fType.type)
26
+ s
27
+ end
28
+ "#{klassName}_InOutable"
29
+ end
30
+
31
+ def mapsNotSupported(fld)
32
+ raise ArgumentError, "Field #{fld.name}: maps are not currently supported on Hadoop layer"
33
+ end
34
+
35
+ def aggrNotSupported(fld, forWhat)
36
+ raise ArgumentError, "Field #{fld.name}: aggregate types are not supported for #{forWhat} on Hadoop layer"
37
+ end
38
+
39
+ =begin rdoc
40
+ HDFS Reader and Writer for textual Python types such as str.
41
+ =end
42
+ TEXT_RW_METHODS = DataMetaByteSer::RwHolder.new(
43
+ lambda{|ctx|
44
+ ctx.fld.aggr ? ctx.rw.call("DataMetaHadoopUtil.read#{aggrPyFull(ctx.fld.aggr)}String(di)") : ctx.rw.call('DataMetaHadoopUtil.readText(di)')
45
+ },
46
+ lambda{|ctx|
47
+ ctx.fld.aggr ? "DataMetaHadoopUtil.write#{aggrPyFull(ctx.fld.aggr)}String(do, val.#{ctx.valGetter})" : "DataMetaHadoopUtil.writeTextIfAny(do, val.#{ctx.valGetter})"
48
+ }
49
+ )
50
+
51
+ =begin rdoc
52
+ HDFS Reader and Writer for integral Python type.
53
+ =end
54
+ INTEGRAL_RW_METHODS = RwHolder.new(
55
+ lambda{ |ctx|
56
+ mapsNotSupported(ctx.fld) if ctx.fld.trgType # map
57
+ case
58
+ when ctx.fType.length <= 4; ctx.fld.aggr ? ctx.rw.call("DataMetaHadoopUtil.read#{aggrPyFull(ctx.fld.aggr)}Integer(di)") :
59
+ ctx.rw.call('WritableUtils.readVInt(di)')
60
+
61
+ when ctx.fType.length <= 8; ; ctx.fld.aggr ? ctx.rw.call("DataMetaHadoopUtil.read#{aggrPyFull(ctx.fld.aggr)}Long(di)") : ctx.rw.call('WritableUtils.readVLong(di)')
62
+
63
+ else; raise "Invalid integer field #{ctx.fld}"
64
+ end
65
+ },
66
+ lambda{ |ctx|
67
+ case
68
+ when ctx.fType.length <= 4; ctx.fld.aggr ? "DataMetaHadoopUtil.write#{aggrPyFull(ctx.fld.aggr)}Integer(do, val.#{ctx.valGetter})" :
69
+ "WritableUtils.writeVInt(do, val.#{ctx.valGetter})"
70
+
71
+ when ctx.fType.length <= 8; ctx.fld.aggr ? "DataMetaHadoopUtil.write#{aggrPyFull(ctx.fld.aggr)}Long(do, val.#{ctx.valGetter})" : "WritableUtils.writeVLong(do, val.#{ctx.valGetter})"
72
+
73
+ else; raise "Invalid integer field #{ctx.fld}"
74
+ end
75
+ })
76
+
77
+ =begin rdoc
78
+ HDFS Reader and Writer for Booleans.
79
+ =end
80
+ BOOLEAN_RW_METHODS = RwHolder.new(
81
+ lambda{|ctx|
82
+ mapsNotSupported(ctx.fld) if ctx.fld.trgType # map
83
+ ctx.fld.aggr ? ctx.rw.call("DataMetaHadoopUtil.read#{aggrPyFull(ctx.fld.aggr)}Boolean(di)") : ctx.rw.call('di.readBoolean()')
84
+ },
85
+ lambda{|ctx|
86
+ mapsNotSupported(ctx.fld) if ctx.fld.trgType # map
87
+ ctx.fld.aggr ? "DataMetaHadoopUtil.write#{aggrPyFull(ctx.fld.aggr)}Boolean(do, val.#{ctx.valGetter})" : "do.writeBoolean(val.#{ctx.valGetter})"
88
+ })
89
+
90
+ # Python has no primitivable types
91
+ PRIMITIVABLE_TYPES = Set.new
92
+
93
+ =begin rdoc
94
+ HDFS Reader and Writer for floating point types.
95
+ =end
96
+ FLOAT_RW_METHODS = RwHolder.new(
97
+ lambda{|ctx|
98
+ mapsNotSupported(ctx.fld) if ctx.fld.trgType # map
99
+ case
100
+ when ctx.fType.length <= 4; ctx.fld.aggr ? ctx.rw.call("DataMetaHadoopUtil.read#{aggrPyFull(ctx.fld.aggr)}Float(di)") : ctx.rw.call('di.readFloat()')
101
+ when ctx.fType.length <= 8; ctx.fld.aggr ? ctx.rw.call("DataMetaHadoopUtil.read#{aggrPyFull(ctx.fld.aggr)}Double(di)") : ctx.rw.call('di.readDouble()')
102
+ else; raise "Invalid float field #{ctx.fld}"
103
+ end
104
+ },
105
+ lambda{|ctx|
106
+ mapsNotSupported(ctx.fld) if ctx.fld.trgType # map
107
+ case
108
+ when ctx.fType.length <= 4; ctx.fld.aggr ? "DataMetaHadoopUtil.write#{aggrPyFull(ctx.fld.aggr)}Float(do, val.#{ctx.valGetter})" : "do.writeFloat(val.#{ctx.valGetter})"
109
+ when ctx.fType.length <= 8; ctx.fld.aggr ? "DataMetaHadoopUtil.write#{aggrPyFull(ctx.fld.aggr)}Double(do, val.#{ctx.valGetter})" : "do.writeDouble(val.#{ctx.valGetter})"
110
+ else; raise "Invalid float field #{ctx.fld}"
111
+ end
112
+ })
113
+
114
+ =begin rdoc
115
+ HDFS Reader and Writer for the temporal type, the DateTime
116
+ =end
117
+ DTTM_RW_METHODS = RwHolder.new(
118
+ lambda { |ctx|
119
+ ctx.fld.aggr ? ctx.rw.call("DataMetaHadoopUtil.read#{aggrPyFull(ctx.fld.aggr)}DateTime(di)") : ctx.rw.call('DataMetaHadoopUtil.readDttm(di)')
120
+ },
121
+ lambda { |ctx|
122
+ ctx.fld.aggr ? "DataMetaHadoopUtil.write#{aggrPyFull(ctx.fld.aggr)}DateTime(do, val.#{ctx.valGetter})" : "DataMetaHadoopUtil.writeDttm(do, val.#{ctx.valGetter})"
123
+ }
124
+ )
125
+ =begin rdoc
126
+ HDFS Reader and Writer the variable size Decimal data type.
127
+ =end
128
+ NUMERIC_RW_METHODS = RwHolder.new(lambda{|ctx| ctx.fld.aggr ? ctx.rw.call("DataMetaHadoopUtil.read#{aggrPyFull(ctx.fld.aggr)}BigDecimal(di)") : ctx.rw.call('DataMetaHadoopUtil.readBigDecimal(di)')},
129
+ lambda{|ctx| "DataMetaHadoopUtil.writeBigDecimal(do, val.#{ctx.valGetter})"})
130
+
131
+ # Full name of a Py aggregate for the given DataMeta DOM aggregate
132
+ def aggrPyFull(aggr)
133
+ case aggr
134
+ when DataMetaDom::Field::LIST
135
+ 'List'
136
+ when DataMetaDom::Field::SET
137
+ 'Set'
138
+ when DataMetaDom::Field::DEQUE
139
+ 'Deque' # note this is different from Java
140
+ else
141
+ raise ArgumentError, "Aggregate type #{aggr} not supported for Python serialization"
142
+ end
143
+ end
144
+
145
+ =begin rdoc
146
+ HDFS Reader and Writer the Java Enums.
147
+ =end
148
+ ENUM_RW_METHODS = RwHolder.new(
149
+ lambda{|ctx|
150
+ aggrNotSupported(ctx.fld, 'Enums') if ctx.fld.aggr
151
+ _, s = DataMetaDom.splitNameSpace(ctx.fType.type)
152
+ "#{s}(WritableUtils.readVInt(di) + 1)" # Python starts their enums from 1 - we save it starting from 0
153
+ # as Java and Scala does
154
+ },
155
+ lambda { |ctx|
156
+ aggrNotSupported(ctx.fld, 'Enums') if ctx.fld.aggr
157
+ # Python starts their enums from 1 - we save it starting from 0 as Java and Scala
158
+ "WritableUtils.writeVInt(do, val.#{ctx.valGetter}.value - 1)"
159
+ }
160
+ )
161
+ =begin rdoc
162
+ HDFS Reader and Writer the URL.
163
+ =end
164
+ URL_RW_METHODS = RwHolder.new(
165
+ lambda { |ctx|
166
+ aggrNotSupported(ctx.fld, 'URLs') if ctx.fld.aggr
167
+ 'DataMetaHadoopUtil.readText(di)'
168
+ },
169
+ lambda { |ctx|
170
+ aggrNotSupported(ctx.fld, 'URLs') if ctx.fld.aggr
171
+ "DataMetaHadoopUtil.writeTextIfAny(do, val.#{ctx.valGetter})"
172
+ }
173
+ )
174
+ # Pseudo-implementers that just raise an error
175
+ NOT_IMPLEMENTED_METHODS = RwHolder.new(
176
+ lambda { |ctx|
177
+ aggrNotSupported(ctx.fld, 'Serialization')
178
+ },
179
+ lambda { |ctx|
180
+ aggrNotSupported(ctx.fld, 'Serialization')
181
+ }
182
+ )
183
+ =begin rdoc
184
+ Read/write methods for the standard data types.
185
+ =end
186
+ STD_RW_METHODS = {
187
+ DataMetaDom::INT => INTEGRAL_RW_METHODS,
188
+ DataMetaDom::STRING => TEXT_RW_METHODS,
189
+ DataMetaDom::DATETIME => DTTM_RW_METHODS,
190
+ DataMetaDom::BOOL => BOOLEAN_RW_METHODS,
191
+ DataMetaDom::CHAR => TEXT_RW_METHODS,
192
+ DataMetaDom::FLOAT => FLOAT_RW_METHODS,
193
+ DataMetaDom::RAW => NOT_IMPLEMENTED_METHODS,
194
+ DataMetaDom::NUMERIC => NUMERIC_RW_METHODS,
195
+ DataMetaDom::URL => URL_RW_METHODS
196
+ }
197
+ # DataMeta DOM object renderer
198
+ RECORD_RW_METHODS = RwHolder.new(
199
+ lambda { |ctx|
200
+ if ctx.fld.aggr
201
+ if ctx.fld.trgType # map
202
+ mapsNotSupported(ctx.fld)
203
+ else # list, set or deque
204
+ "DataMetaHadoopUtil.read#{aggrPyFull(ctx.fld.aggr)}(di, #{
205
+ inOutablePy(ctx)}())"
206
+ end
207
+ else # scalar
208
+ "#{inOutablePy(ctx)}().read(di)"
209
+ end
210
+ },
211
+ lambda { |ctx|
212
+ if ctx.fld.aggr && !ctx.fld.trgType
213
+ if ctx.fld.trgType # map
214
+ mapsNotSupported(ctx.fld)
215
+ else # list, set or deque
216
+ "DataMetaHadoopUtil.writeCollection(val.#{ctx.valGetter}, do, #{inOutablePy(ctx)}())"
217
+ end
218
+ else # scalar
219
+ "#{inOutablePy(ctx)}().write(do, val.#{ctx.valGetter})"
220
+ end
221
+ }
222
+ )
223
+ =begin rdoc
224
+ Read/write methods for the DataMeta DOM Maps, accidentally all the same as for the standard data types.
225
+ =end
226
+ MAP_RW_METHODS = STD_RW_METHODS
227
+
228
+ # Build the Read/Write operation renderer for the given context:
229
+ def getRwRenderer(ctx)
230
+ dt = ctx.fld.dataType
231
+ ctx.refType = nil # reset to avoid misrendering primitives
232
+ rwRenderer = STD_RW_METHODS[dt.type]
233
+ return rwRenderer if rwRenderer
234
+ refKey = dt.type
235
+ ctx.refType = ctx.model.enums[refKey] || ctx.model.records[refKey]
236
+ case
237
+ when ctx.refType.kind_of?(DataMetaDom::Record)
238
+ RECORD_RW_METHODS
239
+ when ctx.refType.kind_of?(DataMetaDom::Enum)
240
+ ENUM_RW_METHODS
241
+ when ctx.refType.kind_of?(DataMetaDom::BitSet)
242
+ NOT_IMPLEMENTED_METHODS
243
+ when ctx.refType.kind_of?(DataMetaDom::Mapping)
244
+ MAP_RW_METHODS[ctx.fType.type] || (raise ArgumentError, "No renderer found for the key type #{
245
+ ctx.fType.type}, record #{ctx.rec}, field #{ctx.fld}")
246
+ else
247
+ raise "No renderer defined for field #{ctx.fld}"
248
+ end
249
+ end
250
+
251
+ # Generates one InOutable, Writables here currently are not generated
252
+ def genWritable(model, wriOut, ioOut, record, pyPackage, baseName)
253
+ enumCount = model.enums.values.select{|e| e.kind_of?(DataMetaDom::Enum)}.size
254
+ recImports = model.records.values.map{|r| # import all records
255
+ p, b, pp = DataMetaDom::PojoLexer::assertNamespace(r.name)
256
+ %|from #{DataMetaXtra::Str.downCaseFirst(b)} import #{b}|
257
+ }.join("\n")
258
+ # ioImports = model.records.values.reject{|r| r.name == record.name}.map{|r| # import all InOutables except of this one
259
+ # p, b, pp = DataMetaDom::PojoLexer::assertNamespace(r.name)
260
+ # # since one InOutable may import another which may import another, and Python can't handle this,
261
+ # # catch the error. It's harmless because if it really failed to import, we'll know
262
+ # %|
263
+ # try:
264
+ # from #{inOutablePy(DataMetaXtra::Str.downCaseFirst(b))} import #{inOutablePy(b)}
265
+ # except ImportError:
266
+ # pass|
267
+ # }.join("\n")
268
+ ctx = RendCtx.new.init(model, record, pyPackage, baseName)
269
+ fields = record.fields
270
+ wriName = nil # writableClassName(baseName)
271
+ ioName = inOutablePy(baseName)
272
+ hasOptional = fields.values.map{|f|
273
+ # !model.records[f.dataType.type] &&
274
+ !f.isRequired
275
+ }.reduce(:|) # true if there is at least one optional field which isn't a record
276
+ keysInOrder = fields.each_key.map{|k| k.to_s}.sort.map{|k| k.to_sym}
277
+ reads = ''
278
+ writes = ''
279
+ writeNullMaskHead = hasOptional ? "nullFlags = bitarray(#{fields.keys.size}); nullFlags.setall(False); fldIndex = -1" : ''
280
+ readNullMaskHead = hasOptional ? 'nullFlags = DataMetaHadoopUtil.readBitArray(di); fldIndex = -1' : ''
281
+ indent = "\n#{' ' * 8}"
282
+ # sorting provides predictable read/write order
283
+ keysInOrder.each { |k|
284
+ f = fields[k]
285
+ ctx.fld = f
286
+ rwRenderer = getRwRenderer(ctx)
287
+ reads << ( indent + (f.isRequired ? '' : "fldIndex += 1#{indent}") + "val.#{DataMetaDom.setterName(ctx.fld)}(" +
288
+ (f.isRequired ? '' : ' None if nullFlags[fldIndex] else ')+ "#{rwRenderer.r.call(ctx)})"
289
+ )
290
+ # noinspection RubyNestedTernaryOperatorsInspection
291
+ writes << (indent + (f.isRequired ?
292
+ (PRIMITIVABLE_TYPES.member?(f.dataType.type) ? '' : ''):
293
+ #%Q<if(val.#{DataMetaDom::PojoLexer::getterName(ctx.fld)}() == null) throw noReqFld("#{f.name}"); >) :
294
+ "if(val.#{DataMetaDom.getterName(ctx.fld)}() is not None): ") + "#{rwRenderer.w.call(ctx)}")
295
+ unless f.isRequired
296
+ writeNullMaskHead << (indent + "fldIndex += 1#{indent}if(val.#{DataMetaDom.getterName(ctx.fld)}() is None): nullFlags[fldIndex] = True")
297
+ end
298
+ }
299
+ writeNullMaskHead << ( indent + 'DataMetaHadoopUtil.writeBitArray(do, nullFlags)') if hasOptional
300
+
301
+ ioOut.puts <<IN_OUTABLE_CLASS
302
+
303
+ class #{ioName}(InOutable):
304
+
305
+ def write(self, do, val):
306
+ val.verify()
307
+ #{writeNullMaskHead}
308
+ #{writes}
309
+
310
+ def readVal(self, di, val):
311
+ #{readNullMaskHead}
312
+ #{reads}
313
+ return val
314
+
315
+ def read(self, di):
316
+ return self.readVal(di, #{baseName}())
317
+
318
+ IN_OUTABLE_CLASS
319
+ end
320
+
321
+ =begin rdoc
322
+ Generates all the writables for the given model.
323
+ Parameters:
324
+ * +model+ - the model to generate Writables from.
325
+ * +outRoot+ - destination directory name.
326
+ =end
327
+ def genWritables(model, outRoot)
328
+ firstRecord = model.records.values.first
329
+ pyPackage, base, packagePath = DataMetaDom::PojoLexer::assertNamespace(firstRecord.name)
330
+ # Next: replace dots with underscores.The path also adjusted accordingly.
331
+ #
332
+ # Rationale for this, quoting PEP 8:
333
+ #
334
+ # Package and Module Names
335
+ #
336
+ # Modules should have short, all-lowercase names. Underscores can be used in the module name if it improves
337
+ # readability. Python packages should also have short, all-lowercase names, although the use of underscores
338
+ # is discouraged.
339
+ #
340
+ # Short and all-lowercase names, and improving readability if you have complex system and need long package names,
341
+ # is "discouraged". Can't do this here, our system is more complicated for strictly religous, "pythonic" Python.
342
+ # A tool must be enabling, and in this case, this irrational ruling gets in the way.
343
+ # And dots are a no-no, Python can't find packages with complicated package structures and imports.
344
+ #
345
+ # Hence, we opt for long package names with underscores for distinctiveness and readability:
346
+ pyPackage = pyPackage.gsub('.', '_')
347
+ packagePath = packagePath.gsub('/', '_')
348
+ destDir = File.join(outRoot, packagePath)
349
+ FileUtils.mkdir_p destDir
350
+ wriOut = nil # File.open(File.join(destDir, "#{writableClassName(base)}.py"), 'wb')
351
+ serFile = File.join(destDir, 'serial.py')
352
+ FileUtils.rm serFile if File.file?(serFile)
353
+ ioOut = File.open(serFile, 'wb') # one huge serialization file
354
+ ioOut.puts %|# This file is generated by DataMeta DOM. Do not edit manually!
355
+ #package #{pyPackage}
356
+
357
+ from hadoop.io import WritableUtils, InputStream, OutputStream, Text
358
+ from ebay_datameta_core.base import DateTime
359
+ from decimal import *
360
+ from collections import *
361
+ from bitarray import bitarray
362
+ from ebay_datameta_hadoop.base import *
363
+ from model import *
364
+
365
+ |
366
+ begin
367
+ model.records.values.each { |e|
368
+ _, base, _ = DataMetaDom::PojoLexer::assertNamespace(e.name)
369
+ case
370
+ when e.kind_of?(DataMetaDom::Record)
371
+ genWritable model, wriOut, ioOut, e, pyPackage, base
372
+ else
373
+ raise "Unsupported Entity: #{e.inspect}"
374
+ end
375
+ }
376
+ ensure
377
+ begin
378
+ ioOut.close
379
+ ensure
380
+ #wriOut.close
381
+ end
382
+ end
383
+ end
384
+ module_function :genWritables, :genWritable, :inOutablePy, :writableClassName, :mapsNotSupported,
385
+ :aggrNotSupported, :getRwRenderer, :aggrPyFull
386
+ end
387
+ end
@@ -0,0 +1,138 @@
1
+ $:.unshift(File.dirname(__FILE__)) unless $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
2
+
3
+ require 'set'
4
+ require 'logger'
5
+
6
+ module DataMetaByteSer
7
+
8
+ =begin rdoc
9
+ A holder for a read renderer and a write renderer, those come in pairs that have to be consistent so the
10
+ data is read and written uniformly.
11
+ =end
12
+ class RwHolder
13
+ =begin rdoc
14
+ Read renderer.
15
+ =end
16
+ attr_reader :r
17
+ =begin rdoc
18
+ Write renderer.
19
+ =end
20
+ attr_reader :w
21
+ =begin rdoc
22
+ Creates a new HDFS Reade and Write renderers pair.
23
+ =end
24
+ def initialize(readRenderer, writeRenderer); @r = readRenderer; @w = writeRenderer end
25
+ end
26
+
27
+ =begin rdoc
28
+ Rendering context with rendering-related properties and settings.
29
+ =end
30
+ class RendCtx
31
+
32
+ =begin rdoc
33
+ DataMeta DOM Model on the context.
34
+ =end
35
+ attr_accessor :model
36
+ =begin rdoc
37
+ Record currently worked on.
38
+ =end
39
+ attr_accessor :rec
40
+
41
+ =begin rdoc
42
+ Set of imports if any, each as symbol.
43
+ =end
44
+ attr_accessor :imps
45
+
46
+ =begin rdoc
47
+ Java package.
48
+ =end
49
+ attr_accessor :pckg
50
+ =begin rdoc
51
+ Base name of the type, without a namespace.
52
+ =end
53
+ attr_accessor :baseName
54
+ =begin rdoc
55
+ The data type of the entity on the context.
56
+ =end
57
+ attr_accessor :refType
58
+ =begin rdoc
59
+ Field currently on the context.
60
+ =end
61
+ attr_reader :fld
62
+
63
+ =begin rdoc
64
+ Creates a new context.
65
+ =end
66
+ def initialize; @imps = Set.new end
67
+
68
+ =begin rdoc
69
+ Setter for the field on the context, the field currently worked on.
70
+ =end
71
+ def fld=(val); @fld = val end
72
+
73
+ =begin rdoc
74
+ Initialize the context with the model, the record, the package and the basename.
75
+ Returns self for call chaining.
76
+ =end
77
+ def init(model, rec, pckg, baseName); @model = model; @rec = rec; @pckg = pckg; @baseName = baseName; self end
78
+
79
+ =begin rdoc
80
+ Add an import to the context, returns self for call chaining.
81
+ =end
82
+ def <<(import)
83
+ @imps << import.to_sym if import
84
+ self
85
+ end
86
+
87
+ =begin rdoc
88
+ Formats imports into Java source, sorted.
89
+ =end
90
+ def importsText
91
+ @imps.to_a.map{|k| "import #{k};"}.sort.join("\n")
92
+ end
93
+
94
+ =begin rdoc
95
+ Determines if the refType is a DataMetaDom::Mapping.
96
+ =end
97
+ def isMapping
98
+ @refType.kind_of?(DataMetaDom::Mapping) && !@refType.kind_of?(DataMetaDom::BitSet)
99
+ end
100
+
101
+ # Effective field type
102
+ def fType
103
+ isMapping ? @refType.fromT : @fld.dataType
104
+ end
105
+
106
+ # Readwrap
107
+ def rw
108
+ isMapping ? lambda{|t| "new #{condenseType(@fld.dataType.type, self)}(#{t})"} : lambda{|t| t}
109
+ end
110
+
111
+ =begin rdoc
112
+ Getter name for the current field, if the type is Mapping, includes <tt>.getKey()</tt> too.
113
+ =end
114
+ def valGetter
115
+ "#{DataMetaDom.getterName(@fld)}()" + ( isMapping ? '.getKey()' : '')
116
+ end
117
+ end # RendCtx
118
+
119
+ =begin rdoc
120
+ Builds a class name for a Writable.
121
+ =end
122
+ def writableClassName(baseName); "#{baseName}_Writable" end
123
+
124
+ =begin rdoc
125
+ Builds a class name for a InOutable.
126
+ =end
127
+ def inOutableClassName(baseName); "#{baseName}_InOutable" end
128
+
129
+ def mapsNotSupported(fld)
130
+ raise ArgumentError, "Field #{fld.name}: maps are not currently supported on Hadoop layer"
131
+ end
132
+
133
+ def aggrNotSupported(fld, forWhat)
134
+ raise ArgumentError, "Field #{fld.name}: aggregate types are not supported for #{forWhat} on Hadoop layer"
135
+ end
136
+
137
+ module_function :writableClassName, :inOutableClassName, :mapsNotSupported, :aggrNotSupported
138
+ end
@@ -0,0 +1,49 @@
1
+ $:.unshift(File.dirname(__FILE__)) unless $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
2
+
3
+ require 'dataMetaDom/field'
4
+ require 'dataMetaDom/pojo'
5
+
6
+ module DataMetaByteSer
7
+ =begin rdoc
8
+ Migration tooling.
9
+
10
+ =end
11
+ module VerReads
12
+ include DataMetaDom, DataMetaDom::PojoLexer
13
+ =begin
14
+ Generates Versioned Read switch that channels the read to the proper migration scenario.
15
+ =end
16
+ def genVerReadSwitch(v1, v2, modelForVer, vers, outRoot)
17
+ # v1 = mo1.records.values.first.ver.full
18
+ # v2 = mo2.records.values.first.ver.full
19
+ mo1 = modelForVer.call(v1)
20
+ mo2 = modelForVer.call(v2)
21
+ destDir = outRoot
22
+ javaPackage = '' # set the scope for the var
23
+ vars = OpenStruct.new # for template's local variables. ERB does not make them visible to the binding
24
+ # sort the models by versions out, 2nd to be the latest:
25
+ raise ArgumentError, "Versions on the model are the same: #{v1}" if v1 == v2
26
+ if v1 > v2
27
+ model2 = mo1
28
+ model1 = mo2
29
+ ver1 = v2
30
+ ver2 = v1
31
+ else
32
+ model2 = mo2
33
+ model1 = mo1
34
+ ver1 = v1
35
+ ver2 = v2
36
+ end
37
+ puts "Going from ver #{ver1} to #{ver2}"
38
+ trgE = model2.records.values.first
39
+ javaPackage, baseName, packagePath = assertNamespace(trgE.name)
40
+ javaClassName = "Read__Switch_v#{ver1.toVarName}_to_v#{ver2.toVarName}"
41
+ destDir = File.join(outRoot, packagePath)
42
+ FileUtils.mkdir_p destDir
43
+ IO::write(File.join(destDir, "#{javaClassName}.java"),
44
+ ERB.new(IO.read(File.join(File.dirname(__FILE__), '../../tmpl/readSwitch.erb')),
45
+ $SAFE, '%<>').result(binding), mode: 'wb')
46
+ end
47
+ module_function :genVerReadSwitch
48
+ end
49
+ end
@@ -0,0 +1,391 @@
1
+ $:.unshift(File.dirname(__FILE__)) unless $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
2
+
3
+ # Definition for generating Plain Old Java Objects (POJOs)
4
+ %w(fileutils dataMetaDom dataMetaDom/pojo dataMetaDom/enum dataMetaDom/record dataMetaDom/help).each(&method(:require))
5
+ require 'set'
6
+ require 'dataMetaByteSer/util'
7
+
8
+ =begin rdoc
9
+ Serialization artifacts generation such as Hadoop Writables etc.
10
+
11
+ TODO this isn't a bad way, but beter use templating next time such as {ERB}[http://ruby-doc.org/stdlib-1.9.3/libdoc/erb/rdoc/ERB.html].
12
+
13
+ For command line details either check the new method's source or the README.rdoc file, the usage section.
14
+ =end
15
+ module DataMetaByteSer
16
+ # Current version
17
+ VERSION = '1.0.0'
18
+ include DataMetaDom, DataMetaDom::PojoLexer
19
+
20
+ =begin rdoc
21
+ HDFS Reader and Writer for textual Java types such as String.
22
+ =end
23
+ TEXT_RW_METHODS = RwHolder.new(
24
+ lambda{|ctx|
25
+ ctx.fld.aggr ? ctx.rw.call("read#{aggrBaseName(aggrJavaFull(ctx.fld.aggr))}String(in)") : ctx.rw.call('readText(in)')
26
+ },
27
+ lambda{|ctx|
28
+ ctx.fld.aggr ? "write#{aggrBaseName(aggrJavaFull(ctx.fld.aggr))}String(out, val.#{ctx.valGetter})" : "writeTextIfAny(out, val.#{ctx.valGetter})"
29
+ }
30
+ )
31
+
32
+ =begin rdoc
33
+ HDFS Reader and Writer for integral Java types such as Integer or Long.
34
+ =end
35
+ INTEGRAL_RW_METHODS = RwHolder.new(
36
+ lambda{ |ctx|
37
+ mapsNotSupported(ctx.fld) if ctx.fld.trgType # map
38
+ case
39
+ when ctx.fType.length <= 4; ctx.fld.aggr ? ctx.rw.call("read#{aggrBaseName(aggrJavaFull(ctx.fld.aggr))}Integer(in)") :
40
+ ctx.rw.call('readVInt(in)')
41
+
42
+ when ctx.fType.length <= 8; ; ctx.fld.aggr ? ctx.rw.call("read#{aggrBaseName(aggrJavaFull(ctx.fld.aggr))}Long(in)") : ctx.rw.call('readVLong(in)')
43
+
44
+ else; raise "Invalid integer field #{ctx.fld}"
45
+ end
46
+ },
47
+ lambda{ |ctx|
48
+ case
49
+ when ctx.fType.length <= 4; ctx.fld.aggr ? "write#{aggrBaseName(aggrJavaFull(ctx.fld.aggr))}Integer(out, val.#{ctx.valGetter})" :
50
+ "writeVInt(out, val.#{ctx.valGetter})"
51
+
52
+ when ctx.fType.length <= 8; ctx.fld.aggr ? "write#{aggrBaseName(aggrJavaFull(ctx.fld.aggr))}Long(out, val.#{ctx.valGetter})" : "writeVLong(out, val.#{ctx.valGetter})"
53
+
54
+ else; raise "Invalid integer field #{ctx.fld}"
55
+ end
56
+ })
57
+
58
+ =begin rdoc
59
+ HDFS Reader and Writer for floating point Java types such as Float or Double.
60
+ =end
61
+ FLOAT_RW_METHODS = RwHolder.new(
62
+ lambda{|ctx|
63
+ mapsNotSupported(ctx.fld) if ctx.fld.trgType # map
64
+ case
65
+ when ctx.fType.length <= 4; ctx.fld.aggr ? ctx.rw.call("read#{aggrBaseName(aggrJavaFull(ctx.fld.aggr))}Float(in)") : ctx.rw.call('in.readFloat()')
66
+ when ctx.fType.length <= 8; ctx.fld.aggr ? ctx.rw.call("read#{aggrBaseName(aggrJavaFull(ctx.fld.aggr))}Double(in)") : ctx.rw.call('in.readDouble()')
67
+ else; raise "Invalid float field #{ctx.fld}"
68
+ end
69
+ },
70
+ lambda{|ctx|
71
+ case
72
+ when ctx.fType.length <= 4; ctx.fld.aggr ? "write#{aggrBaseName(aggrJavaFull(ctx.fld.aggr))}Float(out, val.#{ctx.valGetter})" : "out.writeFloat(val.#{ctx.valGetter})"
73
+ when ctx.fType.length <= 8; ctx.fld.aggr ? "write#{aggrBaseName(aggrJavaFull(ctx.fld.aggr))}Double(out, val.#{ctx.valGetter})" : "out.writeDouble(val.#{ctx.valGetter})"
74
+ else; raise "Invalid float field #{ctx.fld}"
75
+ end
76
+ })
77
+
78
+ =begin rdoc
79
+ HDFS Reader and Writer for the temporal type, the DateTime
80
+ =end
81
+ DTTM_RW_METHODS = RwHolder.new(
82
+ lambda { |ctx|
83
+ ctx.fld.aggr ? ctx.rw.call("read#{aggrBaseName(aggrJavaFull(ctx.fld.aggr))}ZonedDateTime(in)") : ctx.rw.call('readDttm(in)')
84
+ },
85
+ lambda { |ctx|
86
+ ctx.fld.aggr ? "write#{aggrBaseName(aggrJavaFull(ctx.fld.aggr))}ZonedDateTime(out, val.#{ctx.valGetter})" : "writeDttm(out, val.#{ctx.valGetter})"
87
+ }
88
+ )
89
+
90
+ =begin rdoc
91
+ HDFS Reader and Writer for boolean Java type.
92
+ =end
93
+ BOOL_RW_METHODS = RwHolder.new(
94
+ lambda { |ctx|
95
+ aggrNotSupported(ctx.fld, 'Booleans') if ctx.fld.aggr
96
+ ctx.rw.call('in.readBoolean()')
97
+ },
98
+ lambda { |ctx|
99
+ aggrNotSupported(ctx.fld, 'Booleans') if ctx.fld.aggr
100
+ "out.writeBoolean(val.#{ctx.valGetter})"
101
+ }
102
+ )
103
+
104
+ =begin rdoc
105
+ HDFS Reader and Writer the raw data type, the byte array.
106
+ =end
107
+ RAW_RW_METHODS = RwHolder.new(
108
+ lambda { |ctx|
109
+ aggrNotSupported(ctx.fld, 'Raw Data') if ctx.fld.aggr
110
+ ctx.rw.call('readByteArray(in)')
111
+ },
112
+ lambda { |ctx|
113
+ aggrNotSupported(ctx.fld, 'Raw Data') if ctx.fld.aggr
114
+ "writeByteArray(out, val.#{ctx.valGetter})" }
115
+ )
116
+
117
+ =begin rdoc
118
+ HDFS Reader and Writer the variable size Decimal data type.
119
+ =end
120
+ NUMERIC_RW_METHODS = RwHolder.new(lambda{|ctx| ctx.fld.aggr ? ctx.rw.call("read#{aggrBaseName(aggrJavaFull(ctx.fld.aggr))}BigDecimal(in)") : ctx.rw.call('readBigDecimal(in)')},
121
+ lambda{|ctx| "writeBigDecimal(out, val.#{ctx.valGetter})"})
122
+
123
+ =begin rdoc
124
+ HDFS Reader and Writer the Java Enums.
125
+ =end
126
+ ENUM_RW_METHODS = RwHolder.new(
127
+ lambda{|ctx|
128
+ aggrNotSupported(ctx.fld, 'Enums') if ctx.fld.aggr
129
+ "#{condenseType(ctx.fType.type, ctx.pckg)}.forOrd(readVInt(in))"
130
+ },
131
+ lambda { |ctx|
132
+ aggrNotSupported(ctx.fld, 'Enums') if ctx.fld.aggr
133
+ "writeVInt(out, val.#{ctx.valGetter}.ordinal())"
134
+ }
135
+ )
136
+
137
+ =begin rdoc
138
+ HDFS Reader and Writer the BitSet.
139
+ =end
140
+ BITSET_RW_METHODS = RwHolder.new(
141
+ lambda { |ctx|
142
+ aggrNotSupported(ctx.fld, 'BitSets') if ctx.fld.aggr
143
+ "new #{condenseType(ctx.fld.dataType, ctx.pckg)}(readLongArray(in))"
144
+ },
145
+ lambda { |ctx|
146
+ aggrNotSupported(ctx.fld, 'BitSets') if ctx.fld.aggr
147
+ "writeBitSet(out, val.#{ctx.valGetter})"
148
+ }
149
+ )
150
+
151
+ =begin rdoc
152
+ HDFS Reader and Writer the URL.
153
+ =end
154
+ URL_RW_METHODS = RwHolder.new(
155
+ lambda { |ctx|
156
+ aggrNotSupported(ctx.fld, 'URLs') if ctx.fld.aggr
157
+ 'new java.net.URL(readText(in))'
158
+ },
159
+ lambda { |ctx|
160
+ aggrNotSupported(ctx.fld, 'URLs') if ctx.fld.aggr
161
+ "writeTextIfAny(out, val.#{ctx.valGetter}.toExternalForm())"
162
+ }
163
+ )
164
+ =begin rdoc
165
+ Read/write methods for the standard data types.
166
+ =end
167
+ STD_RW_METHODS = {
168
+ INT => INTEGRAL_RW_METHODS,
169
+ STRING => TEXT_RW_METHODS,
170
+ DATETIME => DTTM_RW_METHODS,
171
+ BOOL => BOOL_RW_METHODS,
172
+ CHAR => TEXT_RW_METHODS,
173
+ FLOAT => FLOAT_RW_METHODS,
174
+ RAW => RAW_RW_METHODS,
175
+ NUMERIC => NUMERIC_RW_METHODS,
176
+ URL => URL_RW_METHODS
177
+ }
178
+ # DataMeta DOM object renderer
179
+ RECORD_RW_METHODS = RwHolder.new(
180
+ lambda { |ctx|
181
+ if ctx.fld.aggr
182
+ if ctx.fld.trgType # map
183
+ mapsNotSupported(ctx.fld)
184
+ else # list, set or deque
185
+ "read#{aggrBaseName(aggrJavaFull(ctx.fld.aggr))}(in, #{
186
+ inOutableClassName(condenseType(ctx.fType.type, ctx.pckg))}.getInstance())"
187
+ end
188
+ else # scalar
189
+ "#{inOutableClassName(condenseType(ctx.fType.type, ctx.pckg))}.getInstance().read(in)"
190
+ end
191
+ },
192
+ lambda { |ctx|
193
+ if ctx.fld.aggr && !ctx.fld.trgType
194
+ if ctx.fld.trgType # map
195
+ mapsNotSupported(ctx.fld)
196
+ else # list, set or deque
197
+ "writeCollection(val.#{ctx.valGetter}, out, #{inOutableClassName(condenseType(ctx.fType.type, ctx.pckg))}.getInstance())"
198
+ end
199
+ else # scalar
200
+ "#{inOutableClassName(condenseType(ctx.fType.type, ctx.pckg))}.getInstance().write(out, val.#{ctx.valGetter})"
201
+ end
202
+ }
203
+ )
204
+
205
+ # Transforms the given DataMeta DOM aggregate type to full pathed Java class name
206
+ def aggrJavaFull(aggr)
207
+ PojoLexer::AGGR_CLASSES[aggr] || (raise ArgumentError, "No Aggregate classes for type #{aggr}" )
208
+ end
209
+
210
+ # Transforms the given full Java name for the aggregate class into base name to interpolate into methods
211
+ def aggrBaseName(aggr)
212
+ /^(\w+\.)+(\w+)$/.match(aggr)[2]
213
+ end
214
+ =begin rdoc
215
+ Read/write methods for the DataMeta DOM Maps, accidentally all the same as for the standard data types.
216
+ =end
217
+ MAP_RW_METHODS = STD_RW_METHODS
218
+
219
+ # Build the Read/Write operation renderer for the given context:
220
+ def getRwRenderer(ctx)
221
+ dt = ctx.fld.dataType
222
+ ctx.refType = nil # reset to avoid misrendering primitives
223
+ rwRenderer = STD_RW_METHODS[dt.type]
224
+ return rwRenderer if rwRenderer
225
+ refKey = dt.type
226
+ ctx.refType = ctx.model.enums[refKey] || ctx.model.records[refKey]
227
+ case
228
+ when ctx.refType.kind_of?(DataMetaDom::Record)
229
+ RECORD_RW_METHODS
230
+ when ctx.refType.kind_of?(DataMetaDom::Enum)
231
+ ENUM_RW_METHODS
232
+ when ctx.refType.kind_of?(DataMetaDom::BitSet)
233
+ BITSET_RW_METHODS
234
+ when ctx.refType.kind_of?(DataMetaDom::Mapping)
235
+ MAP_RW_METHODS[ctx.fType.type] || (raise ArgumentError, "No renderer found for the key type #{
236
+ ctx.fType.type}, record #{ctx.rec}, field #{ctx.fld}")
237
+ else
238
+ raise "No renderer defined for field #{ctx.fld}"
239
+ end
240
+ end
241
+
242
+ # Temporary/scratch var -- avoiding collisions at all costs
243
+ def tmpVar(name); "#{'_'*3}#{name}#{'_'*3}" end
244
+
245
+ # generates writable via delegation
246
+ def genWritable(model, wriOut, ioOut, record, javaPackage, baseName)
247
+ ctx = RendCtx.new.init(model, record, javaPackage, baseName)
248
+ fields = record.fields
249
+ wriName = writableClassName(baseName)
250
+ ioName = inOutableClassName(baseName)
251
+ # scan for imports needed
252
+ hasOptional = fields.values.map{|f|
253
+ # !model.records[f.dataType.type] &&
254
+ !f.isRequired
255
+ }.reduce(:|) # true if there is at least one optional field which isn't a record
256
+ #fields.values.each { |f|
257
+ # ctx << DataMetaDom::PojoLexer::JAVA_IMPORTS[f.dataType.type]
258
+ #}
259
+
260
+ # field keys (names) in the order of reading/writing to the in/out record
261
+ keysInOrder = fields.each_key.map{|k| k.to_s}.sort.map{|k| k.to_sym}
262
+ reads = ''
263
+ writes = ''
264
+ writeNullMaskHead = hasOptional ? 'final BitSet nullFlags = new BitSet(); int fldIndex = -1;' : ''
265
+ readNullMaskHead = hasOptional ? 'final BitSet nullFlags = new BitSet(readLongArray(in), false); int fldIndex = -1;' : ''
266
+ indent = "\n#{' ' * 8}"
267
+ # sorting provides predictable read/write order
268
+ keysInOrder.each { |k|
269
+ f = fields[k]
270
+ ctx.fld = f
271
+ rwRenderer = getRwRenderer(ctx)
272
+ # unless ctx.refType.kind_of?(DataMetaDom::Record)
273
+ reads << (
274
+ indent + (f.isRequired ? '' : 'fldIndex++;') + "val.#{DataMetaDom.setterName(ctx.fld)}(" +
275
+ (f.isRequired ? '' : 'nullFlags.get(fldIndex) ? null : ')+
276
+ "#{rwRenderer.r.call(ctx)});"
277
+ )
278
+ # rendering of noReqFld - using the Veryfiable interface instead
279
+ #=begin
280
+ writes << (indent + (f.isRequired ?
281
+ (PRIMITIVABLE_TYPES.member?(f.dataType.type) ? '' : ''):
282
+ #%Q<if(val.#{DataMetaDom::PojoLexer::getterName(ctx.fld)}() == null) throw noReqFld("#{f.name}"); >) :
283
+ "if(val.#{DataMetaDom.getterName(ctx.fld)}() != null) ") + "#{rwRenderer.w.call(ctx)};")
284
+ unless f.isRequired
285
+ writeNullMaskHead << (indent + "fldIndex++; if(val.#{DataMetaDom.getterName(ctx.fld)}() == null) nullFlags.set(fldIndex);")
286
+ end
287
+ #=end
288
+ # end
289
+ }
290
+ writeNullMaskHead << ( indent + 'writeBitSet(out, nullFlags);') if hasOptional
291
+ ioOut.puts <<IN_OUTABLE_CLASS
292
+ package #{javaPackage};
293
+ import org.ebay.datameta.dom.*;
294
+ import java.io.*;
295
+ import static org.ebay.datameta.ser.bytes.DataMetaHadoopUtil.*;
296
+ import static org.apache.hadoop.io.WritableUtils.*;
297
+ import org.ebay.datameta.ser.bytes.InOutable;
298
+ #{ctx.importsText}
299
+ #{DataMetaDom::PojoLexer.classJavaDoc({})}public class #{ioName} extends InOutable<#{baseName}> {
300
+
301
+ private static final #{ioName} INSTANCE = new #{ioName}();
302
+ public static #{ioName} getInstance() { return INSTANCE; }
303
+ private #{ioName}() {}
304
+
305
+ @Override public void write(final DataOutput out, final #{baseName} val) throws IOException {
306
+ val.verify();
307
+ #{writeNullMaskHead}
308
+ #{writes}
309
+ }
310
+
311
+ @Override public #{baseName} read(final DataInput in, final #{baseName} val) throws IOException {
312
+ #{readNullMaskHead}
313
+ #{reads}
314
+ return val;
315
+ }
316
+ @Override public #{baseName} read(final DataInput in) throws IOException {
317
+ return read(in, new #{baseName}());
318
+ }
319
+ }
320
+ IN_OUTABLE_CLASS
321
+ wriOut.puts <<WRITABLE_CLASS
322
+ package #{javaPackage};
323
+ import org.apache.hadoop.io.Writable;
324
+ import org.ebay.datameta.dom.*;
325
+ import java.io.*;
326
+ import static org.ebay.datameta.ser.bytes.DataMetaHadoopUtil.*;
327
+ import static org.apache.hadoop.io.WritableUtils.*;
328
+ import org.ebay.datameta.ser.bytes.HdfsReadWrite;
329
+ #{ctx.importsText}
330
+ #{DataMetaDom::PojoLexer.classJavaDoc({})}public class #{wriName} extends HdfsReadWrite<#{baseName}> {
331
+
332
+ public #{wriName}(final #{baseName} value) {
333
+ super(value);
334
+ }
335
+
336
+ public #{wriName}() {
337
+ super(new #{baseName}()); // the value must be on the instance at all times,
338
+ // for example, when used with hadoop fs -text, this class will be used with default constructor
339
+ }
340
+
341
+ @Override public void write(final DataOutput out) throws IOException {
342
+ #{ioName}.getInstance().write(out, getVal());
343
+ }
344
+
345
+ @Override public void readFields(final DataInput in) throws IOException {
346
+ #{ioName}.getInstance().read(in, getVal());
347
+ }
348
+ }
349
+ WRITABLE_CLASS
350
+
351
+ ########assertValue();
352
+ end
353
+
354
+ =begin rdoc
355
+ Generates all the writables for the given model.
356
+ Parameters:
357
+ * +model+ - the model to generate Writables from.
358
+ * +outRoot+ - destination directory name.
359
+ =end
360
+ def genWritables(model, outRoot)
361
+ model.records.values.each { |e|
362
+ javaPackage, base, packagePath = DataMetaDom::PojoLexer::assertNamespace(e.name)
363
+ destDir = File.join(outRoot, packagePath)
364
+ FileUtils.mkdir_p destDir
365
+ wriOut = File.open(File.join(destDir, "#{writableClassName(base)}.java"), 'wb')
366
+ ioOut = File.open(File.join(destDir, "#{inOutableClassName(base)}.java"), 'wb')
367
+ begin
368
+ case
369
+ when e.kind_of?(DataMetaDom::Record)
370
+ genWritable model, wriOut, ioOut, e, javaPackage, base
371
+ else
372
+ raise "Unsupported Entity: #{e.inspect}"
373
+ end
374
+ ensure
375
+ begin
376
+ ioOut.close
377
+ ensure
378
+ wriOut.close
379
+ end
380
+ end
381
+ }
382
+ end
383
+
384
+ # Shortcut to help for the Hadoop Writables generator.
385
+ def helpDataMetaBytesSerGen(file, errorText=nil)
386
+ DataMetaDom::help(file, 'DataMeta Serialization to/from Bytes', '<DataMeta DOM source> <Target Directory>', errorText)
387
+ end
388
+
389
+ module_function :helpDataMetaBytesSerGen, :genWritables, :genWritable, :getRwRenderer,
390
+ :aggrBaseName, :aggrJavaFull
391
+ end
@@ -0,0 +1,17 @@
1
+ # keep this underscore naming in the test subdir, it's easier to append files names to test
2
+ require './test/test_helper.rb'
3
+
4
+ # Unit test cases for the DataMetaByteSer
5
+ # See for instance:
6
+ # - test_full
7
+ class TestNewGem < Test::Unit::TestCase
8
+
9
+ # an empty stub for now
10
+ def setup;
11
+ end
12
+
13
+ # stub
14
+ def test_true
15
+ assert_equal('a', "a")
16
+ end
17
+ end
@@ -0,0 +1,4 @@
1
+ ## keep this underscore naming in the test subdir, it's easier to append files names to test
2
+ %w(stringio test/unit).each { |r| require r }
3
+ # this is expected to run from the project root, normally by the rake file
4
+ require './lib/dataMetaByteSer'
@@ -0,0 +1,63 @@
1
+ <%#
2
+ Template for Java migration guides
3
+ %>
4
+ package <%=javaPackage%>;
5
+ /*
6
+ This class is generated by DataMeta DOM. Do not edit manually!
7
+ */
8
+ import org.ebay.datameta.ser.bytes.InOutable;
9
+ import org.ebay.datameta.util.jdk.SemanticVersion;
10
+
11
+ import java.io.DataInput;
12
+ import java.io.IOException;
13
+
14
+ public class <%=javaClassName%> {
15
+
16
+ private static final <%=javaClassName%> INSTANCE = new <%=javaClassName%>();
17
+
18
+ public static <%=javaClassName%> getInstance() { return INSTANCE; }
19
+
20
+ <% model2.records.values.each { |trgE|
21
+ vars.versCases = vers.clone.select{|v| v < ver2}.sort{|x, y| y<=>x}
22
+ vars.javaPackage, vars.baseName, vars.packagePath = assertNamespace(trgE.name)
23
+ srcRecName = flipVer(trgE.name, ver2.toVarName, ver1.toVarName)
24
+ srcE = model1.records[srcRecName]
25
+ if srcE
26
+ %>
27
+ public <%= vars.baseName %> read_<%= vars.baseName %>_versioned(final DataInput in) throws IOException {
28
+ final SemanticVersion ver = InOutable.readVersion(in);
29
+ if(ver.equals(<%=vars.baseName%>.VERSION)) {
30
+ return <%=vars.baseName%>_InOutable.getInstance().read(in);
31
+ <%
32
+ while vars.versCases.length > 1 # loop through the case statement - a version per each
33
+ vars.switchTargVer = vars.versCases.shift
34
+ vars.brackets = ''
35
+ caseObjName = flipVer(trgE.name, ver2.toVarName, vars.switchTargVer.toVarName)
36
+ caseMod = modelForVer.call(vars.switchTargVer)
37
+ next unless caseMod.records.keys.member?(caseObjName.to_sym) # skip cases for the versions where this object's target version does not exist
38
+ %>
39
+ }
40
+ else if(ver.equals(<%=caseObjName%>.VERSION)){<% vars.versMigr = vers.clone.select{|v| v <= ver2}.sort{|x, y| y<=>x}%>
41
+ return <% while vars.versMigr.length > 1 # migration steps loop nested in the case statement loop
42
+ vars.brackets << ')'
43
+ vars.migrTargVer = vars.versMigr.shift # target version for migration loop
44
+ vars.srcVer = vars.versMigr[0]
45
+ vars.srcType = flipVer(trgE.name, ver2.toVarName, vars.srcVer.toVarName)
46
+ migrMod = modelForVer.call(vars.srcVer)
47
+ break unless migrMod.records.keys.member?(vars.srcType.to_sym) # enough if there is no record in the target version
48
+ vars.jpMigr = vars.javaPackage.gsub(".v#{ver2.toVarName}", ".v#{vars.migrTargVer.toVarName}")
49
+ %>
50
+ // substituting in <%=vars.javaPackage%>: ".v<%=ver2.toVarName%>" with ".v<%=vars.migrTargVer.toVarName%>"
51
+ <%=vars.jpMigr%>.<%=migrClass(vars.baseName, vars.srcVer, vars.migrTargVer)%>.getInstance().migrate(<% break if vars.srcVer <= vars.switchTargVer
52
+ end # migration steps loop %>
53
+
54
+ <%=vars.srcType%>_InOutable.getInstance().read(in)
55
+ <%= vars.brackets %>;
56
+ <% end %>
57
+ }
58
+ else throw new IllegalArgumentException("Unsupported version for the record <%=vars.baseName%>: " + ver);
59
+ }
60
+ <% end
61
+ } # records loop %>
62
+ }
63
+
metadata ADDED
@@ -0,0 +1,80 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: dataMetaByteSer
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Michael Bergens
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2017-01-15 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: dataMetaDom
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.0'
20
+ - - ">="
21
+ - !ruby/object:Gem::Version
22
+ version: 1.0.0
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - "~>"
28
+ - !ruby/object:Gem::Version
29
+ version: '1.0'
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: 1.0.0
33
+ description: Generates serializers of DataMeta objects to/from byte arrays, which
34
+ can be used with Hadoop, BigTable and beyond.
35
+ email: michael.bergens@gmail.com
36
+ executables:
37
+ - dataMetaByteSerGen.rb
38
+ extensions: []
39
+ extra_rdoc_files: []
40
+ files:
41
+ - ".yardopts"
42
+ - History.md
43
+ - PostInstall.txt
44
+ - README.md
45
+ - Rakefile
46
+ - bin/dataMetaByteSerGen.rb
47
+ - lib/dataMetaByteSer.rb
48
+ - lib/dataMetaByteSer/python.rb
49
+ - lib/dataMetaByteSer/util.rb
50
+ - lib/dataMetaByteSer/ver_reads.rb
51
+ - test/test_dataMetaByteSer.rb
52
+ - test/test_helper.rb
53
+ - tmpl/readSwitch.erb
54
+ homepage: https://github.com/eBayDataMeta
55
+ licenses:
56
+ - Apache-2.0
57
+ metadata: {}
58
+ post_install_message:
59
+ rdoc_options: []
60
+ require_paths:
61
+ - lib
62
+ required_ruby_version: !ruby/object:Gem::Requirement
63
+ requirements:
64
+ - - ">="
65
+ - !ruby/object:Gem::Version
66
+ version: 2.1.1
67
+ required_rubygems_version: !ruby/object:Gem::Requirement
68
+ requirements:
69
+ - - ">="
70
+ - !ruby/object:Gem::Version
71
+ version: '0'
72
+ requirements:
73
+ - Hadoop libraries
74
+ rubyforge_project:
75
+ rubygems_version: 2.5.1
76
+ signing_key:
77
+ specification_version: 4
78
+ summary: DataMeta Byte Array Serializers Gen
79
+ test_files:
80
+ - test/test_dataMetaByteSer.rb