dataMetaByteSer 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 4d5201ed98a82b4da2ca29379d7318cb3607e920
4
+ data.tar.gz: edb71acaf79c4111d1a703c9a7044c9a78019fb8
5
+ SHA512:
6
+ metadata.gz: 4d9edb7888e536f006becb9ce5612ef7573798afb5f136e0b36745021caaebc8c1cfad455815e6c1269a25763f45590a0133200ffc3121ababe839791e7cae7d
7
+ data.tar.gz: a5358415079756f869e417d783f069ebdf60aafe46c8fb449dd9014af7ef269ff1f683723f9784f3759b83d79c848aa1ab0ff6303675c1b8fa55f647d2c99639
data/.yardopts ADDED
@@ -0,0 +1 @@
1
+ --title "DataMeta Bytes (de)serialization" -r README.md --charset UTF-8 lib/**/* - README.md
data/History.md ADDED
@@ -0,0 +1,5 @@
1
+ # `dataMetaByteSer` Release history:
2
+
3
+ ## `1.0.0` released `2012-12-17`
4
+ * 1 major enhancement:
5
+ * Initial release
data/PostInstall.txt ADDED
@@ -0,0 +1 @@
1
+ No special steps
data/README.md ADDED
@@ -0,0 +1,35 @@
1
+ # `dataMetaByteSer` gem
2
+
3
+ Byte array (de)serialization generation from [DataMeta DOM](https://github.com/eBayDataMeta/DataMeta-gems) sources.
4
+
5
+ References to this gem's:
6
+
7
+ * [Source](https://github.com/eBayDataMeta/DataMeta-gems)
8
+
9
+
10
+ ## DESCRIPTION:
11
+
12
+ See the [DataMeta Project](https://github.com/eBayDataMeta/DataMeta)
13
+
14
+ ## FEATURES:
15
+
16
+ Generates (de)serializers to/from byte arrays with matching Hadoop writables, performance maximized by storage size
17
+ first and runtime performance second, both aspects are clocked to perform around best in the class.
18
+
19
+ ## SYNOPSIS:
20
+
21
+ To generate Byte Array serializers in Java, including Hadoop Writables for the DataMeta model, run:
22
+
23
+ dataMetaByteSerGen.rb <DataMeta DOM source> <Target Directory>
24
+
25
+ ## REQUIREMENTS:
26
+
27
+ * No special requirements
28
+
29
+ ## INSTALL:
30
+
31
+ gem install dataMetaByteSer
32
+
33
+ ## LICENSE:
34
+
35
+ [Apache v 2.0](https://github.com/eBayDataMeta/DataMeta/blob/master/LICENSE.md)
data/Rakefile ADDED
@@ -0,0 +1,13 @@
1
+ %w(yard rdoc/task rake/testtask fileutils ./lib/dataMetaByteSer).each{ |r| require r}
2
+
3
+ Rake::TestTask.new do |t|
4
+ t.libs << 'test'
5
+ end
6
+
7
+ desc 'Regen RDocs'
8
+ task :default => :docs
9
+
10
+ YARD::Rake::YardocTask.new('docs') {|r|
11
+ r.stats_options = ['--list-undoc']
12
+ }
13
+
@@ -0,0 +1,16 @@
1
+ #!/usr/bin/env ruby
2
+ %w( dataMetaDom dataMetaByteSer ).each(&method(:require))
3
+
4
+ @source, @target = ARGV
5
+ DataMetaByteSer::helpDataMetaBytesSerGen __FILE__ unless @source && @target
6
+ DataMetaByteSer::helpDataMetaBytesSerGen(__FILE__, "DataMeta DOM source #{@source} is not a file") unless File.file?(@source)
7
+ DataMetaByteSer::helpDataMetaBytesSerGen(__FILE__, "Writables destination directory #{@target} is not a dir") unless File.directory?(@target)
8
+
9
+ @parser = DataMetaDom::Model.new
10
+ begin
11
+ @parser.parse(@source)
12
+ DataMetaByteSer::genWritables(@parser, @target)
13
+ rescue Exception => e
14
+ $stderr.puts "ERROR #{e.message}; #{@parser.diagn}"
15
+ $stderr.puts e.backtrace.inspect
16
+ end
@@ -0,0 +1,387 @@
1
+ $:.unshift(File.dirname(__FILE__)) unless $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
2
+
3
+ require 'fileutils'
4
+ require 'dataMetaDom'
5
+ require 'dataMetaDom/util'
6
+ require 'dataMetaDom/python'
7
+ require 'dataMetaByteSer/util'
8
+
9
+ module DataMetaByteSer
10
+ # (De)Serialization for Python
11
+ module Py
12
+ include DataMetaDom, DataMetaDom::PythonLexer, DataMetaByteSer
13
+ =begin rdoc
14
+ Builds a class name for a Writable.
15
+ =end
16
+ def writableClassName(baseName); "#{baseName}_Writable" end
17
+ =begin rdoc
18
+ Builds a class name for a InOutable.
19
+ =end
20
+ def inOutablePy(arg)
21
+ klassName = case
22
+ when arg.kind_of?(String)
23
+ arg
24
+ else
25
+ _, s = DataMetaDom.splitNameSpace(arg.fType.type)
26
+ s
27
+ end
28
+ "#{klassName}_InOutable"
29
+ end
30
+
31
+ def mapsNotSupported(fld)
32
+ raise ArgumentError, "Field #{fld.name}: maps are not currently supported on Hadoop layer"
33
+ end
34
+
35
+ def aggrNotSupported(fld, forWhat)
36
+ raise ArgumentError, "Field #{fld.name}: aggregate types are not supported for #{forWhat} on Hadoop layer"
37
+ end
38
+
39
+ =begin rdoc
40
+ HDFS Reader and Writer for textual Python types such as str.
41
+ =end
42
+ TEXT_RW_METHODS = DataMetaByteSer::RwHolder.new(
43
+ lambda{|ctx|
44
+ ctx.fld.aggr ? ctx.rw.call("DataMetaHadoopUtil.read#{aggrPyFull(ctx.fld.aggr)}String(di)") : ctx.rw.call('DataMetaHadoopUtil.readText(di)')
45
+ },
46
+ lambda{|ctx|
47
+ ctx.fld.aggr ? "DataMetaHadoopUtil.write#{aggrPyFull(ctx.fld.aggr)}String(do, val.#{ctx.valGetter})" : "DataMetaHadoopUtil.writeTextIfAny(do, val.#{ctx.valGetter})"
48
+ }
49
+ )
50
+
51
+ =begin rdoc
52
+ HDFS Reader and Writer for integral Python type.
53
+ =end
54
+ INTEGRAL_RW_METHODS = RwHolder.new(
55
+ lambda{ |ctx|
56
+ mapsNotSupported(ctx.fld) if ctx.fld.trgType # map
57
+ case
58
+ when ctx.fType.length <= 4; ctx.fld.aggr ? ctx.rw.call("DataMetaHadoopUtil.read#{aggrPyFull(ctx.fld.aggr)}Integer(di)") :
59
+ ctx.rw.call('WritableUtils.readVInt(di)')
60
+
61
+ when ctx.fType.length <= 8; ; ctx.fld.aggr ? ctx.rw.call("DataMetaHadoopUtil.read#{aggrPyFull(ctx.fld.aggr)}Long(di)") : ctx.rw.call('WritableUtils.readVLong(di)')
62
+
63
+ else; raise "Invalid integer field #{ctx.fld}"
64
+ end
65
+ },
66
+ lambda{ |ctx|
67
+ case
68
+ when ctx.fType.length <= 4; ctx.fld.aggr ? "DataMetaHadoopUtil.write#{aggrPyFull(ctx.fld.aggr)}Integer(do, val.#{ctx.valGetter})" :
69
+ "WritableUtils.writeVInt(do, val.#{ctx.valGetter})"
70
+
71
+ when ctx.fType.length <= 8; ctx.fld.aggr ? "DataMetaHadoopUtil.write#{aggrPyFull(ctx.fld.aggr)}Long(do, val.#{ctx.valGetter})" : "WritableUtils.writeVLong(do, val.#{ctx.valGetter})"
72
+
73
+ else; raise "Invalid integer field #{ctx.fld}"
74
+ end
75
+ })
76
+
77
+ =begin rdoc
78
+ HDFS Reader and Writer for Booleans.
79
+ =end
80
+ BOOLEAN_RW_METHODS = RwHolder.new(
81
+ lambda{|ctx|
82
+ mapsNotSupported(ctx.fld) if ctx.fld.trgType # map
83
+ ctx.fld.aggr ? ctx.rw.call("DataMetaHadoopUtil.read#{aggrPyFull(ctx.fld.aggr)}Boolean(di)") : ctx.rw.call('di.readBoolean()')
84
+ },
85
+ lambda{|ctx|
86
+ mapsNotSupported(ctx.fld) if ctx.fld.trgType # map
87
+ ctx.fld.aggr ? "DataMetaHadoopUtil.write#{aggrPyFull(ctx.fld.aggr)}Boolean(do, val.#{ctx.valGetter})" : "do.writeBoolean(val.#{ctx.valGetter})"
88
+ })
89
+
90
+ # Python has no primitivable types
91
+ PRIMITIVABLE_TYPES = Set.new
92
+
93
+ =begin rdoc
94
+ HDFS Reader and Writer for floating point types.
95
+ =end
96
+ FLOAT_RW_METHODS = RwHolder.new(
97
+ lambda{|ctx|
98
+ mapsNotSupported(ctx.fld) if ctx.fld.trgType # map
99
+ case
100
+ when ctx.fType.length <= 4; ctx.fld.aggr ? ctx.rw.call("DataMetaHadoopUtil.read#{aggrPyFull(ctx.fld.aggr)}Float(di)") : ctx.rw.call('di.readFloat()')
101
+ when ctx.fType.length <= 8; ctx.fld.aggr ? ctx.rw.call("DataMetaHadoopUtil.read#{aggrPyFull(ctx.fld.aggr)}Double(di)") : ctx.rw.call('di.readDouble()')
102
+ else; raise "Invalid float field #{ctx.fld}"
103
+ end
104
+ },
105
+ lambda{|ctx|
106
+ mapsNotSupported(ctx.fld) if ctx.fld.trgType # map
107
+ case
108
+ when ctx.fType.length <= 4; ctx.fld.aggr ? "DataMetaHadoopUtil.write#{aggrPyFull(ctx.fld.aggr)}Float(do, val.#{ctx.valGetter})" : "do.writeFloat(val.#{ctx.valGetter})"
109
+ when ctx.fType.length <= 8; ctx.fld.aggr ? "DataMetaHadoopUtil.write#{aggrPyFull(ctx.fld.aggr)}Double(do, val.#{ctx.valGetter})" : "do.writeDouble(val.#{ctx.valGetter})"
110
+ else; raise "Invalid float field #{ctx.fld}"
111
+ end
112
+ })
113
+
114
+ =begin rdoc
115
+ HDFS Reader and Writer for the temporal type, the DateTime
116
+ =end
117
+ DTTM_RW_METHODS = RwHolder.new(
118
+ lambda { |ctx|
119
+ ctx.fld.aggr ? ctx.rw.call("DataMetaHadoopUtil.read#{aggrPyFull(ctx.fld.aggr)}DateTime(di)") : ctx.rw.call('DataMetaHadoopUtil.readDttm(di)')
120
+ },
121
+ lambda { |ctx|
122
+ ctx.fld.aggr ? "DataMetaHadoopUtil.write#{aggrPyFull(ctx.fld.aggr)}DateTime(do, val.#{ctx.valGetter})" : "DataMetaHadoopUtil.writeDttm(do, val.#{ctx.valGetter})"
123
+ }
124
+ )
125
+ =begin rdoc
126
+ HDFS Reader and Writer the variable size Decimal data type.
127
+ =end
128
+ NUMERIC_RW_METHODS = RwHolder.new(lambda{|ctx| ctx.fld.aggr ? ctx.rw.call("DataMetaHadoopUtil.read#{aggrPyFull(ctx.fld.aggr)}BigDecimal(di)") : ctx.rw.call('DataMetaHadoopUtil.readBigDecimal(di)')},
129
+ lambda{|ctx| "DataMetaHadoopUtil.writeBigDecimal(do, val.#{ctx.valGetter})"})
130
+
131
+ # Full name of a Py aggregate for the given DataMeta DOM aggregate
132
+ def aggrPyFull(aggr)
133
+ case aggr
134
+ when DataMetaDom::Field::LIST
135
+ 'List'
136
+ when DataMetaDom::Field::SET
137
+ 'Set'
138
+ when DataMetaDom::Field::DEQUE
139
+ 'Deque' # note this is different from Java
140
+ else
141
+ raise ArgumentError, "Aggregate type #{aggr} not supported for Python serialization"
142
+ end
143
+ end
144
+
145
+ =begin rdoc
146
+ HDFS Reader and Writer the Java Enums.
147
+ =end
148
+ ENUM_RW_METHODS = RwHolder.new(
149
+ lambda{|ctx|
150
+ aggrNotSupported(ctx.fld, 'Enums') if ctx.fld.aggr
151
+ _, s = DataMetaDom.splitNameSpace(ctx.fType.type)
152
+ "#{s}(WritableUtils.readVInt(di) + 1)" # Python starts their enums from 1 - we save it starting from 0
153
+ # as Java and Scala does
154
+ },
155
+ lambda { |ctx|
156
+ aggrNotSupported(ctx.fld, 'Enums') if ctx.fld.aggr
157
+ # Python starts their enums from 1 - we save it starting from 0 as Java and Scala
158
+ "WritableUtils.writeVInt(do, val.#{ctx.valGetter}.value - 1)"
159
+ }
160
+ )
161
+ =begin rdoc
162
+ HDFS Reader and Writer the URL.
163
+ =end
164
+ URL_RW_METHODS = RwHolder.new(
165
+ lambda { |ctx|
166
+ aggrNotSupported(ctx.fld, 'URLs') if ctx.fld.aggr
167
+ 'DataMetaHadoopUtil.readText(di)'
168
+ },
169
+ lambda { |ctx|
170
+ aggrNotSupported(ctx.fld, 'URLs') if ctx.fld.aggr
171
+ "DataMetaHadoopUtil.writeTextIfAny(do, val.#{ctx.valGetter})"
172
+ }
173
+ )
174
+ # Pseudo-implementers that just raise an error
175
+ NOT_IMPLEMENTED_METHODS = RwHolder.new(
176
+ lambda { |ctx|
177
+ aggrNotSupported(ctx.fld, 'Serialization')
178
+ },
179
+ lambda { |ctx|
180
+ aggrNotSupported(ctx.fld, 'Serialization')
181
+ }
182
+ )
183
+ =begin rdoc
184
+ Read/write methods for the standard data types.
185
+ =end
186
+ STD_RW_METHODS = {
187
+ DataMetaDom::INT => INTEGRAL_RW_METHODS,
188
+ DataMetaDom::STRING => TEXT_RW_METHODS,
189
+ DataMetaDom::DATETIME => DTTM_RW_METHODS,
190
+ DataMetaDom::BOOL => BOOLEAN_RW_METHODS,
191
+ DataMetaDom::CHAR => TEXT_RW_METHODS,
192
+ DataMetaDom::FLOAT => FLOAT_RW_METHODS,
193
+ DataMetaDom::RAW => NOT_IMPLEMENTED_METHODS,
194
+ DataMetaDom::NUMERIC => NUMERIC_RW_METHODS,
195
+ DataMetaDom::URL => URL_RW_METHODS
196
+ }
197
+ # DataMeta DOM object renderer
198
+ RECORD_RW_METHODS = RwHolder.new(
199
+ lambda { |ctx|
200
+ if ctx.fld.aggr
201
+ if ctx.fld.trgType # map
202
+ mapsNotSupported(ctx.fld)
203
+ else # list, set or deque
204
+ "DataMetaHadoopUtil.read#{aggrPyFull(ctx.fld.aggr)}(di, #{
205
+ inOutablePy(ctx)}())"
206
+ end
207
+ else # scalar
208
+ "#{inOutablePy(ctx)}().read(di)"
209
+ end
210
+ },
211
+ lambda { |ctx|
212
+ if ctx.fld.aggr && !ctx.fld.trgType
213
+ if ctx.fld.trgType # map
214
+ mapsNotSupported(ctx.fld)
215
+ else # list, set or deque
216
+ "DataMetaHadoopUtil.writeCollection(val.#{ctx.valGetter}, do, #{inOutablePy(ctx)}())"
217
+ end
218
+ else # scalar
219
+ "#{inOutablePy(ctx)}().write(do, val.#{ctx.valGetter})"
220
+ end
221
+ }
222
+ )
223
+ =begin rdoc
224
+ Read/write methods for the DataMeta DOM Maps, accidentally all the same as for the standard data types.
225
+ =end
226
+ MAP_RW_METHODS = STD_RW_METHODS
227
+
228
+ # Build the Read/Write operation renderer for the given context:
229
+ def getRwRenderer(ctx)
230
+ dt = ctx.fld.dataType
231
+ ctx.refType = nil # reset to avoid misrendering primitives
232
+ rwRenderer = STD_RW_METHODS[dt.type]
233
+ return rwRenderer if rwRenderer
234
+ refKey = dt.type
235
+ ctx.refType = ctx.model.enums[refKey] || ctx.model.records[refKey]
236
+ case
237
+ when ctx.refType.kind_of?(DataMetaDom::Record)
238
+ RECORD_RW_METHODS
239
+ when ctx.refType.kind_of?(DataMetaDom::Enum)
240
+ ENUM_RW_METHODS
241
+ when ctx.refType.kind_of?(DataMetaDom::BitSet)
242
+ NOT_IMPLEMENTED_METHODS
243
+ when ctx.refType.kind_of?(DataMetaDom::Mapping)
244
+ MAP_RW_METHODS[ctx.fType.type] || (raise ArgumentError, "No renderer found for the key type #{
245
+ ctx.fType.type}, record #{ctx.rec}, field #{ctx.fld}")
246
+ else
247
+ raise "No renderer defined for field #{ctx.fld}"
248
+ end
249
+ end
250
+
251
+ # Generates one InOutable, Writables here currently are not generated
252
+ def genWritable(model, wriOut, ioOut, record, pyPackage, baseName)
253
+ enumCount = model.enums.values.select{|e| e.kind_of?(DataMetaDom::Enum)}.size
254
+ recImports = model.records.values.map{|r| # import all records
255
+ p, b, pp = DataMetaDom::PojoLexer::assertNamespace(r.name)
256
+ %|from #{DataMetaXtra::Str.downCaseFirst(b)} import #{b}|
257
+ }.join("\n")
258
+ # ioImports = model.records.values.reject{|r| r.name == record.name}.map{|r| # import all InOutables except of this one
259
+ # p, b, pp = DataMetaDom::PojoLexer::assertNamespace(r.name)
260
+ # # since one InOutable may import another which may import another, and Python can't handle this,
261
+ # # catch the error. It's harmless because if it really failed to import, we'll know
262
+ # %|
263
+ # try:
264
+ # from #{inOutablePy(DataMetaXtra::Str.downCaseFirst(b))} import #{inOutablePy(b)}
265
+ # except ImportError:
266
+ # pass|
267
+ # }.join("\n")
268
+ ctx = RendCtx.new.init(model, record, pyPackage, baseName)
269
+ fields = record.fields
270
+ wriName = nil # writableClassName(baseName)
271
+ ioName = inOutablePy(baseName)
272
+ hasOptional = fields.values.map{|f|
273
+ # !model.records[f.dataType.type] &&
274
+ !f.isRequired
275
+ }.reduce(:|) # true if there is at least one optional field which isn't a record
276
+ keysInOrder = fields.each_key.map{|k| k.to_s}.sort.map{|k| k.to_sym}
277
+ reads = ''
278
+ writes = ''
279
+ writeNullMaskHead = hasOptional ? "nullFlags = bitarray(#{fields.keys.size}); nullFlags.setall(False); fldIndex = -1" : ''
280
+ readNullMaskHead = hasOptional ? 'nullFlags = DataMetaHadoopUtil.readBitArray(di); fldIndex = -1' : ''
281
+ indent = "\n#{' ' * 8}"
282
+ # sorting provides predictable read/write order
283
+ keysInOrder.each { |k|
284
+ f = fields[k]
285
+ ctx.fld = f
286
+ rwRenderer = getRwRenderer(ctx)
287
+ reads << ( indent + (f.isRequired ? '' : "fldIndex += 1#{indent}") + "val.#{DataMetaDom.setterName(ctx.fld)}(" +
288
+ (f.isRequired ? '' : ' None if nullFlags[fldIndex] else ')+ "#{rwRenderer.r.call(ctx)})"
289
+ )
290
+ # noinspection RubyNestedTernaryOperatorsInspection
291
+ writes << (indent + (f.isRequired ?
292
+ (PRIMITIVABLE_TYPES.member?(f.dataType.type) ? '' : ''):
293
+ #%Q<if(val.#{DataMetaDom::PojoLexer::getterName(ctx.fld)}() == null) throw noReqFld("#{f.name}"); >) :
294
+ "if(val.#{DataMetaDom.getterName(ctx.fld)}() is not None): ") + "#{rwRenderer.w.call(ctx)}")
295
+ unless f.isRequired
296
+ writeNullMaskHead << (indent + "fldIndex += 1#{indent}if(val.#{DataMetaDom.getterName(ctx.fld)}() is None): nullFlags[fldIndex] = True")
297
+ end
298
+ }
299
+ writeNullMaskHead << ( indent + 'DataMetaHadoopUtil.writeBitArray(do, nullFlags)') if hasOptional
300
+
301
+ ioOut.puts <<IN_OUTABLE_CLASS
302
+
303
+ class #{ioName}(InOutable):
304
+
305
+ def write(self, do, val):
306
+ val.verify()
307
+ #{writeNullMaskHead}
308
+ #{writes}
309
+
310
+ def readVal(self, di, val):
311
+ #{readNullMaskHead}
312
+ #{reads}
313
+ return val
314
+
315
+ def read(self, di):
316
+ return self.readVal(di, #{baseName}())
317
+
318
+ IN_OUTABLE_CLASS
319
+ end
320
+
321
+ =begin rdoc
322
+ Generates all the writables for the given model.
323
+ Parameters:
324
+ * +model+ - the model to generate Writables from.
325
+ * +outRoot+ - destination directory name.
326
+ =end
327
+ def genWritables(model, outRoot)
328
+ firstRecord = model.records.values.first
329
+ pyPackage, base, packagePath = DataMetaDom::PojoLexer::assertNamespace(firstRecord.name)
330
+ # Next: replace dots with underscores.The path also adjusted accordingly.
331
+ #
332
+ # Rationale for this, quoting PEP 8:
333
+ #
334
+ # Package and Module Names
335
+ #
336
+ # Modules should have short, all-lowercase names. Underscores can be used in the module name if it improves
337
+ # readability. Python packages should also have short, all-lowercase names, although the use of underscores
338
+ # is discouraged.
339
+ #
340
+ # Short and all-lowercase names, and improving readability if you have complex system and need long package names,
341
+ # is "discouraged". Can't do this here, our system is more complicated for strictly religous, "pythonic" Python.
342
+ # A tool must be enabling, and in this case, this irrational ruling gets in the way.
343
+ # And dots are a no-no, Python can't find packages with complicated package structures and imports.
344
+ #
345
+ # Hence, we opt for long package names with underscores for distinctiveness and readability:
346
+ pyPackage = pyPackage.gsub('.', '_')
347
+ packagePath = packagePath.gsub('/', '_')
348
+ destDir = File.join(outRoot, packagePath)
349
+ FileUtils.mkdir_p destDir
350
+ wriOut = nil # File.open(File.join(destDir, "#{writableClassName(base)}.py"), 'wb')
351
+ serFile = File.join(destDir, 'serial.py')
352
+ FileUtils.rm serFile if File.file?(serFile)
353
+ ioOut = File.open(serFile, 'wb') # one huge serialization file
354
+ ioOut.puts %|# This file is generated by DataMeta DOM. Do not edit manually!
355
+ #package #{pyPackage}
356
+
357
+ from hadoop.io import WritableUtils, InputStream, OutputStream, Text
358
+ from ebay_datameta_core.base import DateTime
359
+ from decimal import *
360
+ from collections import *
361
+ from bitarray import bitarray
362
+ from ebay_datameta_hadoop.base import *
363
+ from model import *
364
+
365
+ |
366
+ begin
367
+ model.records.values.each { |e|
368
+ _, base, _ = DataMetaDom::PojoLexer::assertNamespace(e.name)
369
+ case
370
+ when e.kind_of?(DataMetaDom::Record)
371
+ genWritable model, wriOut, ioOut, e, pyPackage, base
372
+ else
373
+ raise "Unsupported Entity: #{e.inspect}"
374
+ end
375
+ }
376
+ ensure
377
+ begin
378
+ ioOut.close
379
+ ensure
380
+ #wriOut.close
381
+ end
382
+ end
383
+ end
384
+ module_function :genWritables, :genWritable, :inOutablePy, :writableClassName, :mapsNotSupported,
385
+ :aggrNotSupported, :getRwRenderer, :aggrPyFull
386
+ end
387
+ end
@@ -0,0 +1,138 @@
1
+ $:.unshift(File.dirname(__FILE__)) unless $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
2
+
3
+ require 'set'
4
+ require 'logger'
5
+
6
+ module DataMetaByteSer
7
+
8
+ =begin rdoc
9
+ A holder for a read renderer and a write renderer, those come in pairs that have to be consistent so the
10
+ data is read and written uniformly.
11
+ =end
12
+ class RwHolder
13
+ =begin rdoc
14
+ Read renderer.
15
+ =end
16
+ attr_reader :r
17
+ =begin rdoc
18
+ Write renderer.
19
+ =end
20
+ attr_reader :w
21
+ =begin rdoc
22
+ Creates a new HDFS Reade and Write renderers pair.
23
+ =end
24
+ def initialize(readRenderer, writeRenderer); @r = readRenderer; @w = writeRenderer end
25
+ end
26
+
27
+ =begin rdoc
28
+ Rendering context with rendering-related properties and settings.
29
+ =end
30
+ class RendCtx
31
+
32
+ =begin rdoc
33
+ DataMeta DOM Model on the context.
34
+ =end
35
+ attr_accessor :model
36
+ =begin rdoc
37
+ Record currently worked on.
38
+ =end
39
+ attr_accessor :rec
40
+
41
+ =begin rdoc
42
+ Set of imports if any, each as symbol.
43
+ =end
44
+ attr_accessor :imps
45
+
46
+ =begin rdoc
47
+ Java package.
48
+ =end
49
+ attr_accessor :pckg
50
+ =begin rdoc
51
+ Base name of the type, without a namespace.
52
+ =end
53
+ attr_accessor :baseName
54
+ =begin rdoc
55
+ The data type of the entity on the context.
56
+ =end
57
+ attr_accessor :refType
58
+ =begin rdoc
59
+ Field currently on the context.
60
+ =end
61
+ attr_reader :fld
62
+
63
+ =begin rdoc
64
+ Creates a new context.
65
+ =end
66
+ def initialize; @imps = Set.new end
67
+
68
+ =begin rdoc
69
+ Setter for the field on the context, the field currently worked on.
70
+ =end
71
+ def fld=(val); @fld = val end
72
+
73
+ =begin rdoc
74
+ Initialize the context with the model, the record, the package and the basename.
75
+ Returns self for call chaining.
76
+ =end
77
+ def init(model, rec, pckg, baseName); @model = model; @rec = rec; @pckg = pckg; @baseName = baseName; self end
78
+
79
+ =begin rdoc
80
+ Add an import to the context, returns self for call chaining.
81
+ =end
82
+ def <<(import)
83
+ @imps << import.to_sym if import
84
+ self
85
+ end
86
+
87
+ =begin rdoc
88
+ Formats imports into Java source, sorted.
89
+ =end
90
+ def importsText
91
+ @imps.to_a.map{|k| "import #{k};"}.sort.join("\n")
92
+ end
93
+
94
+ =begin rdoc
95
+ Determines if the refType is a DataMetaDom::Mapping.
96
+ =end
97
+ def isMapping
98
+ @refType.kind_of?(DataMetaDom::Mapping) && !@refType.kind_of?(DataMetaDom::BitSet)
99
+ end
100
+
101
+ # Effective field type
102
+ def fType
103
+ isMapping ? @refType.fromT : @fld.dataType
104
+ end
105
+
106
+ # Readwrap
107
+ def rw
108
+ isMapping ? lambda{|t| "new #{condenseType(@fld.dataType.type, self)}(#{t})"} : lambda{|t| t}
109
+ end
110
+
111
+ =begin rdoc
112
+ Getter name for the current field, if the type is Mapping, includes <tt>.getKey()</tt> too.
113
+ =end
114
+ def valGetter
115
+ "#{DataMetaDom.getterName(@fld)}()" + ( isMapping ? '.getKey()' : '')
116
+ end
117
+ end # RendCtx
118
+
119
+ =begin rdoc
120
+ Builds a class name for a Writable.
121
+ =end
122
+ def writableClassName(baseName); "#{baseName}_Writable" end
123
+
124
+ =begin rdoc
125
+ Builds a class name for a InOutable.
126
+ =end
127
+ def inOutableClassName(baseName); "#{baseName}_InOutable" end
128
+
129
+ def mapsNotSupported(fld)
130
+ raise ArgumentError, "Field #{fld.name}: maps are not currently supported on Hadoop layer"
131
+ end
132
+
133
+ def aggrNotSupported(fld, forWhat)
134
+ raise ArgumentError, "Field #{fld.name}: aggregate types are not supported for #{forWhat} on Hadoop layer"
135
+ end
136
+
137
+ module_function :writableClassName, :inOutableClassName, :mapsNotSupported, :aggrNotSupported
138
+ end
@@ -0,0 +1,49 @@
1
+ $:.unshift(File.dirname(__FILE__)) unless $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
2
+
3
+ require 'dataMetaDom/field'
4
+ require 'dataMetaDom/pojo'
5
+
6
+ module DataMetaByteSer
7
+ =begin rdoc
8
+ Migration tooling.
9
+
10
+ =end
11
+ module VerReads
12
+ include DataMetaDom, DataMetaDom::PojoLexer
13
+ =begin
14
+ Generates Versioned Read switch that channels the read to the proper migration scenario.
15
+ =end
16
+ def genVerReadSwitch(v1, v2, modelForVer, vers, outRoot)
17
+ # v1 = mo1.records.values.first.ver.full
18
+ # v2 = mo2.records.values.first.ver.full
19
+ mo1 = modelForVer.call(v1)
20
+ mo2 = modelForVer.call(v2)
21
+ destDir = outRoot
22
+ javaPackage = '' # set the scope for the var
23
+ vars = OpenStruct.new # for template's local variables. ERB does not make them visible to the binding
24
+ # sort the models by versions out, 2nd to be the latest:
25
+ raise ArgumentError, "Versions on the model are the same: #{v1}" if v1 == v2
26
+ if v1 > v2
27
+ model2 = mo1
28
+ model1 = mo2
29
+ ver1 = v2
30
+ ver2 = v1
31
+ else
32
+ model2 = mo2
33
+ model1 = mo1
34
+ ver1 = v1
35
+ ver2 = v2
36
+ end
37
+ puts "Going from ver #{ver1} to #{ver2}"
38
+ trgE = model2.records.values.first
39
+ javaPackage, baseName, packagePath = assertNamespace(trgE.name)
40
+ javaClassName = "Read__Switch_v#{ver1.toVarName}_to_v#{ver2.toVarName}"
41
+ destDir = File.join(outRoot, packagePath)
42
+ FileUtils.mkdir_p destDir
43
+ IO::write(File.join(destDir, "#{javaClassName}.java"),
44
+ ERB.new(IO.read(File.join(File.dirname(__FILE__), '../../tmpl/readSwitch.erb')),
45
+ $SAFE, '%<>').result(binding), mode: 'wb')
46
+ end
47
+ module_function :genVerReadSwitch
48
+ end
49
+ end
@@ -0,0 +1,391 @@
1
+ $:.unshift(File.dirname(__FILE__)) unless $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
2
+
3
+ # Definition for generating Plain Old Java Objects (POJOs)
4
+ %w(fileutils dataMetaDom dataMetaDom/pojo dataMetaDom/enum dataMetaDom/record dataMetaDom/help).each(&method(:require))
5
+ require 'set'
6
+ require 'dataMetaByteSer/util'
7
+
8
+ =begin rdoc
9
+ Serialization artifacts generation such as Hadoop Writables etc.
10
+
11
+ TODO this isn't a bad way, but beter use templating next time such as {ERB}[http://ruby-doc.org/stdlib-1.9.3/libdoc/erb/rdoc/ERB.html].
12
+
13
+ For command line details either check the new method's source or the README.rdoc file, the usage section.
14
+ =end
15
+ module DataMetaByteSer
16
+ # Current version
17
+ VERSION = '1.0.0'
18
+ include DataMetaDom, DataMetaDom::PojoLexer
19
+
20
+ =begin rdoc
21
+ HDFS Reader and Writer for textual Java types such as String.
22
+ =end
23
+ TEXT_RW_METHODS = RwHolder.new(
24
+ lambda{|ctx|
25
+ ctx.fld.aggr ? ctx.rw.call("read#{aggrBaseName(aggrJavaFull(ctx.fld.aggr))}String(in)") : ctx.rw.call('readText(in)')
26
+ },
27
+ lambda{|ctx|
28
+ ctx.fld.aggr ? "write#{aggrBaseName(aggrJavaFull(ctx.fld.aggr))}String(out, val.#{ctx.valGetter})" : "writeTextIfAny(out, val.#{ctx.valGetter})"
29
+ }
30
+ )
31
+
32
+ =begin rdoc
33
+ HDFS Reader and Writer for integral Java types such as Integer or Long.
34
+ =end
35
+ INTEGRAL_RW_METHODS = RwHolder.new(
36
+ lambda{ |ctx|
37
+ mapsNotSupported(ctx.fld) if ctx.fld.trgType # map
38
+ case
39
+ when ctx.fType.length <= 4; ctx.fld.aggr ? ctx.rw.call("read#{aggrBaseName(aggrJavaFull(ctx.fld.aggr))}Integer(in)") :
40
+ ctx.rw.call('readVInt(in)')
41
+
42
+ when ctx.fType.length <= 8; ; ctx.fld.aggr ? ctx.rw.call("read#{aggrBaseName(aggrJavaFull(ctx.fld.aggr))}Long(in)") : ctx.rw.call('readVLong(in)')
43
+
44
+ else; raise "Invalid integer field #{ctx.fld}"
45
+ end
46
+ },
47
+ lambda{ |ctx|
48
+ case
49
+ when ctx.fType.length <= 4; ctx.fld.aggr ? "write#{aggrBaseName(aggrJavaFull(ctx.fld.aggr))}Integer(out, val.#{ctx.valGetter})" :
50
+ "writeVInt(out, val.#{ctx.valGetter})"
51
+
52
+ when ctx.fType.length <= 8; ctx.fld.aggr ? "write#{aggrBaseName(aggrJavaFull(ctx.fld.aggr))}Long(out, val.#{ctx.valGetter})" : "writeVLong(out, val.#{ctx.valGetter})"
53
+
54
+ else; raise "Invalid integer field #{ctx.fld}"
55
+ end
56
+ })
57
+
58
+ =begin rdoc
59
+ HDFS Reader and Writer for floating point Java types such as Float or Double.
60
+ =end
61
+ FLOAT_RW_METHODS = RwHolder.new(
62
+ lambda{|ctx|
63
+ mapsNotSupported(ctx.fld) if ctx.fld.trgType # map
64
+ case
65
+ when ctx.fType.length <= 4; ctx.fld.aggr ? ctx.rw.call("read#{aggrBaseName(aggrJavaFull(ctx.fld.aggr))}Float(in)") : ctx.rw.call('in.readFloat()')
66
+ when ctx.fType.length <= 8; ctx.fld.aggr ? ctx.rw.call("read#{aggrBaseName(aggrJavaFull(ctx.fld.aggr))}Double(in)") : ctx.rw.call('in.readDouble()')
67
+ else; raise "Invalid float field #{ctx.fld}"
68
+ end
69
+ },
70
+ lambda{|ctx|
71
+ case
72
+ when ctx.fType.length <= 4; ctx.fld.aggr ? "write#{aggrBaseName(aggrJavaFull(ctx.fld.aggr))}Float(out, val.#{ctx.valGetter})" : "out.writeFloat(val.#{ctx.valGetter})"
73
+ when ctx.fType.length <= 8; ctx.fld.aggr ? "write#{aggrBaseName(aggrJavaFull(ctx.fld.aggr))}Double(out, val.#{ctx.valGetter})" : "out.writeDouble(val.#{ctx.valGetter})"
74
+ else; raise "Invalid float field #{ctx.fld}"
75
+ end
76
+ })
77
+
78
+ =begin rdoc
79
+ HDFS Reader and Writer for the temporal type, the DateTime
80
+ =end
81
+ DTTM_RW_METHODS = RwHolder.new(
82
+ lambda { |ctx|
83
+ ctx.fld.aggr ? ctx.rw.call("read#{aggrBaseName(aggrJavaFull(ctx.fld.aggr))}ZonedDateTime(in)") : ctx.rw.call('readDttm(in)')
84
+ },
85
+ lambda { |ctx|
86
+ ctx.fld.aggr ? "write#{aggrBaseName(aggrJavaFull(ctx.fld.aggr))}ZonedDateTime(out, val.#{ctx.valGetter})" : "writeDttm(out, val.#{ctx.valGetter})"
87
+ }
88
+ )
89
+
90
+ =begin rdoc
91
+ HDFS Reader and Writer for boolean Java type.
92
+ =end
93
+ BOOL_RW_METHODS = RwHolder.new(
94
+ lambda { |ctx|
95
+ aggrNotSupported(ctx.fld, 'Booleans') if ctx.fld.aggr
96
+ ctx.rw.call('in.readBoolean()')
97
+ },
98
+ lambda { |ctx|
99
+ aggrNotSupported(ctx.fld, 'Booleans') if ctx.fld.aggr
100
+ "out.writeBoolean(val.#{ctx.valGetter})"
101
+ }
102
+ )
103
+
104
+ =begin rdoc
105
+ HDFS Reader and Writer the raw data type, the byte array.
106
+ =end
107
+ RAW_RW_METHODS = RwHolder.new(
108
+ lambda { |ctx|
109
+ aggrNotSupported(ctx.fld, 'Raw Data') if ctx.fld.aggr
110
+ ctx.rw.call('readByteArray(in)')
111
+ },
112
+ lambda { |ctx|
113
+ aggrNotSupported(ctx.fld, 'Raw Data') if ctx.fld.aggr
114
+ "writeByteArray(out, val.#{ctx.valGetter})" }
115
+ )
116
+
117
+ =begin rdoc
118
+ HDFS Reader and Writer the variable size Decimal data type.
119
+ =end
120
+ NUMERIC_RW_METHODS = RwHolder.new(lambda{|ctx| ctx.fld.aggr ? ctx.rw.call("read#{aggrBaseName(aggrJavaFull(ctx.fld.aggr))}BigDecimal(in)") : ctx.rw.call('readBigDecimal(in)')},
121
+ lambda{|ctx| "writeBigDecimal(out, val.#{ctx.valGetter})"})
122
+
123
+ =begin rdoc
124
+ HDFS Reader and Writer the Java Enums.
125
+ =end
126
+ ENUM_RW_METHODS = RwHolder.new(
127
+ lambda{|ctx|
128
+ aggrNotSupported(ctx.fld, 'Enums') if ctx.fld.aggr
129
+ "#{condenseType(ctx.fType.type, ctx.pckg)}.forOrd(readVInt(in))"
130
+ },
131
+ lambda { |ctx|
132
+ aggrNotSupported(ctx.fld, 'Enums') if ctx.fld.aggr
133
+ "writeVInt(out, val.#{ctx.valGetter}.ordinal())"
134
+ }
135
+ )
136
+
137
+ =begin rdoc
138
+ HDFS Reader and Writer the BitSet.
139
+ =end
140
+ BITSET_RW_METHODS = RwHolder.new(
141
+ lambda { |ctx|
142
+ aggrNotSupported(ctx.fld, 'BitSets') if ctx.fld.aggr
143
+ "new #{condenseType(ctx.fld.dataType, ctx.pckg)}(readLongArray(in))"
144
+ },
145
+ lambda { |ctx|
146
+ aggrNotSupported(ctx.fld, 'BitSets') if ctx.fld.aggr
147
+ "writeBitSet(out, val.#{ctx.valGetter})"
148
+ }
149
+ )
150
+
151
+ =begin rdoc
152
+ HDFS Reader and Writer the URL.
153
+ =end
154
+ URL_RW_METHODS = RwHolder.new(
155
+ lambda { |ctx|
156
+ aggrNotSupported(ctx.fld, 'URLs') if ctx.fld.aggr
157
+ 'new java.net.URL(readText(in))'
158
+ },
159
+ lambda { |ctx|
160
+ aggrNotSupported(ctx.fld, 'URLs') if ctx.fld.aggr
161
+ "writeTextIfAny(out, val.#{ctx.valGetter}.toExternalForm())"
162
+ }
163
+ )
164
+ =begin rdoc
165
+ Read/write methods for the standard data types.
166
+ =end
167
+ STD_RW_METHODS = {
168
+ INT => INTEGRAL_RW_METHODS,
169
+ STRING => TEXT_RW_METHODS,
170
+ DATETIME => DTTM_RW_METHODS,
171
+ BOOL => BOOL_RW_METHODS,
172
+ CHAR => TEXT_RW_METHODS,
173
+ FLOAT => FLOAT_RW_METHODS,
174
+ RAW => RAW_RW_METHODS,
175
+ NUMERIC => NUMERIC_RW_METHODS,
176
+ URL => URL_RW_METHODS
177
+ }
178
+ # DataMeta DOM object renderer
179
+ RECORD_RW_METHODS = RwHolder.new(
180
+ lambda { |ctx|
181
+ if ctx.fld.aggr
182
+ if ctx.fld.trgType # map
183
+ mapsNotSupported(ctx.fld)
184
+ else # list, set or deque
185
+ "read#{aggrBaseName(aggrJavaFull(ctx.fld.aggr))}(in, #{
186
+ inOutableClassName(condenseType(ctx.fType.type, ctx.pckg))}.getInstance())"
187
+ end
188
+ else # scalar
189
+ "#{inOutableClassName(condenseType(ctx.fType.type, ctx.pckg))}.getInstance().read(in)"
190
+ end
191
+ },
192
+ lambda { |ctx|
193
+ if ctx.fld.aggr && !ctx.fld.trgType
194
+ if ctx.fld.trgType # map
195
+ mapsNotSupported(ctx.fld)
196
+ else # list, set or deque
197
+ "writeCollection(val.#{ctx.valGetter}, out, #{inOutableClassName(condenseType(ctx.fType.type, ctx.pckg))}.getInstance())"
198
+ end
199
+ else # scalar
200
+ "#{inOutableClassName(condenseType(ctx.fType.type, ctx.pckg))}.getInstance().write(out, val.#{ctx.valGetter})"
201
+ end
202
+ }
203
+ )
204
+
205
+ # Transforms the given DataMeta DOM aggregate type to full pathed Java class name
206
+ def aggrJavaFull(aggr)
207
+ PojoLexer::AGGR_CLASSES[aggr] || (raise ArgumentError, "No Aggregate classes for type #{aggr}" )
208
+ end
209
+
210
+ # Transforms the given full Java name for the aggregate class into base name to interpolate into methods
211
+ def aggrBaseName(aggr)
212
+ /^(\w+\.)+(\w+)$/.match(aggr)[2]
213
+ end
214
+ =begin rdoc
215
+ Read/write methods for the DataMeta DOM Maps, accidentally all the same as for the standard data types.
216
+ =end
217
+ MAP_RW_METHODS = STD_RW_METHODS
218
+
219
+ # Build the Read/Write operation renderer for the given context:
220
+ def getRwRenderer(ctx)
221
+ dt = ctx.fld.dataType
222
+ ctx.refType = nil # reset to avoid misrendering primitives
223
+ rwRenderer = STD_RW_METHODS[dt.type]
224
+ return rwRenderer if rwRenderer
225
+ refKey = dt.type
226
+ ctx.refType = ctx.model.enums[refKey] || ctx.model.records[refKey]
227
+ case
228
+ when ctx.refType.kind_of?(DataMetaDom::Record)
229
+ RECORD_RW_METHODS
230
+ when ctx.refType.kind_of?(DataMetaDom::Enum)
231
+ ENUM_RW_METHODS
232
+ when ctx.refType.kind_of?(DataMetaDom::BitSet)
233
+ BITSET_RW_METHODS
234
+ when ctx.refType.kind_of?(DataMetaDom::Mapping)
235
+ MAP_RW_METHODS[ctx.fType.type] || (raise ArgumentError, "No renderer found for the key type #{
236
+ ctx.fType.type}, record #{ctx.rec}, field #{ctx.fld}")
237
+ else
238
+ raise "No renderer defined for field #{ctx.fld}"
239
+ end
240
+ end
241
+
242
+ # Temporary/scratch var -- avoiding collisions at all costs
243
+ def tmpVar(name); "#{'_'*3}#{name}#{'_'*3}" end
244
+
245
+ # generates writable via delegation
246
+ def genWritable(model, wriOut, ioOut, record, javaPackage, baseName)
247
+ ctx = RendCtx.new.init(model, record, javaPackage, baseName)
248
+ fields = record.fields
249
+ wriName = writableClassName(baseName)
250
+ ioName = inOutableClassName(baseName)
251
+ # scan for imports needed
252
+ hasOptional = fields.values.map{|f|
253
+ # !model.records[f.dataType.type] &&
254
+ !f.isRequired
255
+ }.reduce(:|) # true if there is at least one optional field which isn't a record
256
+ #fields.values.each { |f|
257
+ # ctx << DataMetaDom::PojoLexer::JAVA_IMPORTS[f.dataType.type]
258
+ #}
259
+
260
+ # field keys (names) in the order of reading/writing to the in/out record
261
+ keysInOrder = fields.each_key.map{|k| k.to_s}.sort.map{|k| k.to_sym}
262
+ reads = ''
263
+ writes = ''
264
+ writeNullMaskHead = hasOptional ? 'final BitSet nullFlags = new BitSet(); int fldIndex = -1;' : ''
265
+ readNullMaskHead = hasOptional ? 'final BitSet nullFlags = new BitSet(readLongArray(in), false); int fldIndex = -1;' : ''
266
+ indent = "\n#{' ' * 8}"
267
+ # sorting provides predictable read/write order
268
+ keysInOrder.each { |k|
269
+ f = fields[k]
270
+ ctx.fld = f
271
+ rwRenderer = getRwRenderer(ctx)
272
+ # unless ctx.refType.kind_of?(DataMetaDom::Record)
273
+ reads << (
274
+ indent + (f.isRequired ? '' : 'fldIndex++;') + "val.#{DataMetaDom.setterName(ctx.fld)}(" +
275
+ (f.isRequired ? '' : 'nullFlags.get(fldIndex) ? null : ')+
276
+ "#{rwRenderer.r.call(ctx)});"
277
+ )
278
+ # rendering of noReqFld - using the Veryfiable interface instead
279
+ #=begin
280
+ writes << (indent + (f.isRequired ?
281
+ (PRIMITIVABLE_TYPES.member?(f.dataType.type) ? '' : ''):
282
+ #%Q<if(val.#{DataMetaDom::PojoLexer::getterName(ctx.fld)}() == null) throw noReqFld("#{f.name}"); >) :
283
+ "if(val.#{DataMetaDom.getterName(ctx.fld)}() != null) ") + "#{rwRenderer.w.call(ctx)};")
284
+ unless f.isRequired
285
+ writeNullMaskHead << (indent + "fldIndex++; if(val.#{DataMetaDom.getterName(ctx.fld)}() == null) nullFlags.set(fldIndex);")
286
+ end
287
+ #=end
288
+ # end
289
+ }
290
+ writeNullMaskHead << ( indent + 'writeBitSet(out, nullFlags);') if hasOptional
291
+ ioOut.puts <<IN_OUTABLE_CLASS
292
+ package #{javaPackage};
293
+ import org.ebay.datameta.dom.*;
294
+ import java.io.*;
295
+ import static org.ebay.datameta.ser.bytes.DataMetaHadoopUtil.*;
296
+ import static org.apache.hadoop.io.WritableUtils.*;
297
+ import org.ebay.datameta.ser.bytes.InOutable;
298
+ #{ctx.importsText}
299
+ #{DataMetaDom::PojoLexer.classJavaDoc({})}public class #{ioName} extends InOutable<#{baseName}> {
300
+
301
+ private static final #{ioName} INSTANCE = new #{ioName}();
302
+ public static #{ioName} getInstance() { return INSTANCE; }
303
+ private #{ioName}() {}
304
+
305
+ @Override public void write(final DataOutput out, final #{baseName} val) throws IOException {
306
+ val.verify();
307
+ #{writeNullMaskHead}
308
+ #{writes}
309
+ }
310
+
311
+ @Override public #{baseName} read(final DataInput in, final #{baseName} val) throws IOException {
312
+ #{readNullMaskHead}
313
+ #{reads}
314
+ return val;
315
+ }
316
+ @Override public #{baseName} read(final DataInput in) throws IOException {
317
+ return read(in, new #{baseName}());
318
+ }
319
+ }
320
+ IN_OUTABLE_CLASS
321
+ wriOut.puts <<WRITABLE_CLASS
322
+ package #{javaPackage};
323
+ import org.apache.hadoop.io.Writable;
324
+ import org.ebay.datameta.dom.*;
325
+ import java.io.*;
326
+ import static org.ebay.datameta.ser.bytes.DataMetaHadoopUtil.*;
327
+ import static org.apache.hadoop.io.WritableUtils.*;
328
+ import org.ebay.datameta.ser.bytes.HdfsReadWrite;
329
+ #{ctx.importsText}
330
+ #{DataMetaDom::PojoLexer.classJavaDoc({})}public class #{wriName} extends HdfsReadWrite<#{baseName}> {
331
+
332
+ public #{wriName}(final #{baseName} value) {
333
+ super(value);
334
+ }
335
+
336
+ public #{wriName}() {
337
+ super(new #{baseName}()); // the value must be on the instance at all times,
338
+ // for example, when used with hadoop fs -text, this class will be used with default constructor
339
+ }
340
+
341
+ @Override public void write(final DataOutput out) throws IOException {
342
+ #{ioName}.getInstance().write(out, getVal());
343
+ }
344
+
345
+ @Override public void readFields(final DataInput in) throws IOException {
346
+ #{ioName}.getInstance().read(in, getVal());
347
+ }
348
+ }
349
+ WRITABLE_CLASS
350
+
351
+ ########assertValue();
352
+ end
353
+
354
+ =begin rdoc
355
+ Generates all the writables for the given model.
356
+ Parameters:
357
+ * +model+ - the model to generate Writables from.
358
+ * +outRoot+ - destination directory name.
359
+ =end
360
+ def genWritables(model, outRoot)
361
+ model.records.values.each { |e|
362
+ javaPackage, base, packagePath = DataMetaDom::PojoLexer::assertNamespace(e.name)
363
+ destDir = File.join(outRoot, packagePath)
364
+ FileUtils.mkdir_p destDir
365
+ wriOut = File.open(File.join(destDir, "#{writableClassName(base)}.java"), 'wb')
366
+ ioOut = File.open(File.join(destDir, "#{inOutableClassName(base)}.java"), 'wb')
367
+ begin
368
+ case
369
+ when e.kind_of?(DataMetaDom::Record)
370
+ genWritable model, wriOut, ioOut, e, javaPackage, base
371
+ else
372
+ raise "Unsupported Entity: #{e.inspect}"
373
+ end
374
+ ensure
375
+ begin
376
+ ioOut.close
377
+ ensure
378
+ wriOut.close
379
+ end
380
+ end
381
+ }
382
+ end
383
+
384
+ # Shortcut to help for the Hadoop Writables generator.
385
+ def helpDataMetaBytesSerGen(file, errorText=nil)
386
+ DataMetaDom::help(file, 'DataMeta Serialization to/from Bytes', '<DataMeta DOM source> <Target Directory>', errorText)
387
+ end
388
+
389
+ module_function :helpDataMetaBytesSerGen, :genWritables, :genWritable, :getRwRenderer,
390
+ :aggrBaseName, :aggrJavaFull
391
+ end
@@ -0,0 +1,17 @@
1
+ # keep this underscore naming in the test subdir, it's easier to append files names to test
2
+ require './test/test_helper.rb'
3
+
4
+ # Unit test cases for the DataMetaByteSer
5
+ # See for instance:
6
+ # - test_full
7
+ class TestNewGem < Test::Unit::TestCase
8
+
9
+ # an empty stub for now
10
+ def setup;
11
+ end
12
+
13
+ # stub
14
+ def test_true
15
+ assert_equal('a', "a")
16
+ end
17
+ end
@@ -0,0 +1,4 @@
1
+ ## keep this underscore naming in the test subdir, it's easier to append files names to test
2
+ %w(stringio test/unit).each { |r| require r }
3
+ # this is expected to run from the project root, normally by the rake file
4
+ require './lib/dataMetaByteSer'
@@ -0,0 +1,63 @@
1
+ <%#
2
+ Template for Java migration guides
3
+ %>
4
+ package <%=javaPackage%>;
5
+ /*
6
+ This class is generated by DataMeta DOM. Do not edit manually!
7
+ */
8
+ import org.ebay.datameta.ser.bytes.InOutable;
9
+ import org.ebay.datameta.util.jdk.SemanticVersion;
10
+
11
+ import java.io.DataInput;
12
+ import java.io.IOException;
13
+
14
+ public class <%=javaClassName%> {
15
+
16
+ private static final <%=javaClassName%> INSTANCE = new <%=javaClassName%>();
17
+
18
+ public static <%=javaClassName%> getInstance() { return INSTANCE; }
19
+
20
+ <% model2.records.values.each { |trgE|
21
+ vars.versCases = vers.clone.select{|v| v < ver2}.sort{|x, y| y<=>x}
22
+ vars.javaPackage, vars.baseName, vars.packagePath = assertNamespace(trgE.name)
23
+ srcRecName = flipVer(trgE.name, ver2.toVarName, ver1.toVarName)
24
+ srcE = model1.records[srcRecName]
25
+ if srcE
26
+ %>
27
+ public <%= vars.baseName %> read_<%= vars.baseName %>_versioned(final DataInput in) throws IOException {
28
+ final SemanticVersion ver = InOutable.readVersion(in);
29
+ if(ver.equals(<%=vars.baseName%>.VERSION)) {
30
+ return <%=vars.baseName%>_InOutable.getInstance().read(in);
31
+ <%
32
+ while vars.versCases.length > 1 # loop through the case statement - a version per each
33
+ vars.switchTargVer = vars.versCases.shift
34
+ vars.brackets = ''
35
+ caseObjName = flipVer(trgE.name, ver2.toVarName, vars.switchTargVer.toVarName)
36
+ caseMod = modelForVer.call(vars.switchTargVer)
37
+ next unless caseMod.records.keys.member?(caseObjName.to_sym) # skip cases for the versions where this object's target version does not exist
38
+ %>
39
+ }
40
+ else if(ver.equals(<%=caseObjName%>.VERSION)){<% vars.versMigr = vers.clone.select{|v| v <= ver2}.sort{|x, y| y<=>x}%>
41
+ return <% while vars.versMigr.length > 1 # migration steps loop nested in the case statement loop
42
+ vars.brackets << ')'
43
+ vars.migrTargVer = vars.versMigr.shift # target version for migration loop
44
+ vars.srcVer = vars.versMigr[0]
45
+ vars.srcType = flipVer(trgE.name, ver2.toVarName, vars.srcVer.toVarName)
46
+ migrMod = modelForVer.call(vars.srcVer)
47
+ break unless migrMod.records.keys.member?(vars.srcType.to_sym) # enough if there is no record in the target version
48
+ vars.jpMigr = vars.javaPackage.gsub(".v#{ver2.toVarName}", ".v#{vars.migrTargVer.toVarName}")
49
+ %>
50
+ // substituting in <%=vars.javaPackage%>: ".v<%=ver2.toVarName%>" with ".v<%=vars.migrTargVer.toVarName%>"
51
+ <%=vars.jpMigr%>.<%=migrClass(vars.baseName, vars.srcVer, vars.migrTargVer)%>.getInstance().migrate(<% break if vars.srcVer <= vars.switchTargVer
52
+ end # migration steps loop %>
53
+
54
+ <%=vars.srcType%>_InOutable.getInstance().read(in)
55
+ <%= vars.brackets %>;
56
+ <% end %>
57
+ }
58
+ else throw new IllegalArgumentException("Unsupported version for the record <%=vars.baseName%>: " + ver);
59
+ }
60
+ <% end
61
+ } # records loop %>
62
+ }
63
+
metadata ADDED
@@ -0,0 +1,80 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: dataMetaByteSer
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Michael Bergens
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2017-01-15 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: dataMetaDom
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.0'
20
+ - - ">="
21
+ - !ruby/object:Gem::Version
22
+ version: 1.0.0
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - "~>"
28
+ - !ruby/object:Gem::Version
29
+ version: '1.0'
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: 1.0.0
33
+ description: Generates serializers of DataMeta objects to/from byte arrays, which
34
+ can be used with Hadoop, BigTable and beyond.
35
+ email: michael.bergens@gmail.com
36
+ executables:
37
+ - dataMetaByteSerGen.rb
38
+ extensions: []
39
+ extra_rdoc_files: []
40
+ files:
41
+ - ".yardopts"
42
+ - History.md
43
+ - PostInstall.txt
44
+ - README.md
45
+ - Rakefile
46
+ - bin/dataMetaByteSerGen.rb
47
+ - lib/dataMetaByteSer.rb
48
+ - lib/dataMetaByteSer/python.rb
49
+ - lib/dataMetaByteSer/util.rb
50
+ - lib/dataMetaByteSer/ver_reads.rb
51
+ - test/test_dataMetaByteSer.rb
52
+ - test/test_helper.rb
53
+ - tmpl/readSwitch.erb
54
+ homepage: https://github.com/eBayDataMeta
55
+ licenses:
56
+ - Apache-2.0
57
+ metadata: {}
58
+ post_install_message:
59
+ rdoc_options: []
60
+ require_paths:
61
+ - lib
62
+ required_ruby_version: !ruby/object:Gem::Requirement
63
+ requirements:
64
+ - - ">="
65
+ - !ruby/object:Gem::Version
66
+ version: 2.1.1
67
+ required_rubygems_version: !ruby/object:Gem::Requirement
68
+ requirements:
69
+ - - ">="
70
+ - !ruby/object:Gem::Version
71
+ version: '0'
72
+ requirements:
73
+ - Hadoop libraries
74
+ rubyforge_project:
75
+ rubygems_version: 2.5.1
76
+ signing_key:
77
+ specification_version: 4
78
+ summary: DataMeta Byte Array Serializers Gen
79
+ test_files:
80
+ - test/test_dataMetaByteSer.rb