dataMetaByteSer 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.yardopts +1 -0
- data/History.md +5 -0
- data/PostInstall.txt +1 -0
- data/README.md +35 -0
- data/Rakefile +13 -0
- data/bin/dataMetaByteSerGen.rb +16 -0
- data/lib/dataMetaByteSer/python.rb +387 -0
- data/lib/dataMetaByteSer/util.rb +138 -0
- data/lib/dataMetaByteSer/ver_reads.rb +49 -0
- data/lib/dataMetaByteSer.rb +391 -0
- data/test/test_dataMetaByteSer.rb +17 -0
- data/test/test_helper.rb +4 -0
- data/tmpl/readSwitch.erb +63 -0
- metadata +80 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 4d5201ed98a82b4da2ca29379d7318cb3607e920
|
4
|
+
data.tar.gz: edb71acaf79c4111d1a703c9a7044c9a78019fb8
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 4d9edb7888e536f006becb9ce5612ef7573798afb5f136e0b36745021caaebc8c1cfad455815e6c1269a25763f45590a0133200ffc3121ababe839791e7cae7d
|
7
|
+
data.tar.gz: a5358415079756f869e417d783f069ebdf60aafe46c8fb449dd9014af7ef269ff1f683723f9784f3759b83d79c848aa1ab0ff6303675c1b8fa55f647d2c99639
|
data/.yardopts
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--title "DataMeta Bytes (de)serialization" -r README.md --charset UTF-8 lib/**/* - README.md
|
data/History.md
ADDED
data/PostInstall.txt
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
No special steps
|
data/README.md
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
# `dataMetaByteSer` gem
|
2
|
+
|
3
|
+
Byte array (de)serialization generation from [DataMeta DOM](https://github.com/eBayDataMeta/DataMeta-gems) sources.
|
4
|
+
|
5
|
+
References to this gem's:
|
6
|
+
|
7
|
+
* [Source](https://github.com/eBayDataMeta/DataMeta-gems)
|
8
|
+
|
9
|
+
|
10
|
+
## DESCRIPTION:
|
11
|
+
|
12
|
+
See the [DataMeta Project](https://github.com/eBayDataMeta/DataMeta)
|
13
|
+
|
14
|
+
## FEATURES:
|
15
|
+
|
16
|
+
Generates (de)serializers to/from byte arrays with matching Hadoop writables, performance maximized by storage size
|
17
|
+
first and runtime performance second, both aspects are clocked to perform around best in the class.
|
18
|
+
|
19
|
+
## SYNOPSIS:
|
20
|
+
|
21
|
+
To generate Byte Array serializers in Java, including Hadoop Writables for the DataMeta model, run:
|
22
|
+
|
23
|
+
dataMetaByteSerGen.rb <DataMeta DOM source> <Target Directory>
|
24
|
+
|
25
|
+
## REQUIREMENTS:
|
26
|
+
|
27
|
+
* No special requirements
|
28
|
+
|
29
|
+
## INSTALL:
|
30
|
+
|
31
|
+
gem install dataMetaByteSer
|
32
|
+
|
33
|
+
## LICENSE:
|
34
|
+
|
35
|
+
[Apache v 2.0](https://github.com/eBayDataMeta/DataMeta/blob/master/LICENSE.md)
|
data/Rakefile
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
%w(yard rdoc/task rake/testtask fileutils ./lib/dataMetaByteSer).each{ |r| require r}
|
2
|
+
|
3
|
+
Rake::TestTask.new do |t|
|
4
|
+
t.libs << 'test'
|
5
|
+
end
|
6
|
+
|
7
|
+
desc 'Regen RDocs'
|
8
|
+
task :default => :docs
|
9
|
+
|
10
|
+
YARD::Rake::YardocTask.new('docs') {|r|
|
11
|
+
r.stats_options = ['--list-undoc']
|
12
|
+
}
|
13
|
+
|
@@ -0,0 +1,16 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
%w( dataMetaDom dataMetaByteSer ).each(&method(:require))
|
3
|
+
|
4
|
+
@source, @target = ARGV
|
5
|
+
DataMetaByteSer::helpDataMetaBytesSerGen __FILE__ unless @source && @target
|
6
|
+
DataMetaByteSer::helpDataMetaBytesSerGen(__FILE__, "DataMeta DOM source #{@source} is not a file") unless File.file?(@source)
|
7
|
+
DataMetaByteSer::helpDataMetaBytesSerGen(__FILE__, "Writables destination directory #{@target} is not a dir") unless File.directory?(@target)
|
8
|
+
|
9
|
+
@parser = DataMetaDom::Model.new
|
10
|
+
begin
|
11
|
+
@parser.parse(@source)
|
12
|
+
DataMetaByteSer::genWritables(@parser, @target)
|
13
|
+
rescue Exception => e
|
14
|
+
$stderr.puts "ERROR #{e.message}; #{@parser.diagn}"
|
15
|
+
$stderr.puts e.backtrace.inspect
|
16
|
+
end
|
@@ -0,0 +1,387 @@
|
|
1
|
+
$:.unshift(File.dirname(__FILE__)) unless $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
|
2
|
+
|
3
|
+
require 'fileutils'
|
4
|
+
require 'dataMetaDom'
|
5
|
+
require 'dataMetaDom/util'
|
6
|
+
require 'dataMetaDom/python'
|
7
|
+
require 'dataMetaByteSer/util'
|
8
|
+
|
9
|
+
module DataMetaByteSer
|
10
|
+
# (De)Serialization for Python
|
11
|
+
module Py
|
12
|
+
include DataMetaDom, DataMetaDom::PythonLexer, DataMetaByteSer
|
13
|
+
=begin rdoc
|
14
|
+
Builds a class name for a Writable.
|
15
|
+
=end
|
16
|
+
def writableClassName(baseName); "#{baseName}_Writable" end
|
17
|
+
=begin rdoc
|
18
|
+
Builds a class name for a InOutable.
|
19
|
+
=end
|
20
|
+
def inOutablePy(arg)
|
21
|
+
klassName = case
|
22
|
+
when arg.kind_of?(String)
|
23
|
+
arg
|
24
|
+
else
|
25
|
+
_, s = DataMetaDom.splitNameSpace(arg.fType.type)
|
26
|
+
s
|
27
|
+
end
|
28
|
+
"#{klassName}_InOutable"
|
29
|
+
end
|
30
|
+
|
31
|
+
def mapsNotSupported(fld)
|
32
|
+
raise ArgumentError, "Field #{fld.name}: maps are not currently supported on Hadoop layer"
|
33
|
+
end
|
34
|
+
|
35
|
+
def aggrNotSupported(fld, forWhat)
|
36
|
+
raise ArgumentError, "Field #{fld.name}: aggregate types are not supported for #{forWhat} on Hadoop layer"
|
37
|
+
end
|
38
|
+
|
39
|
+
=begin rdoc
|
40
|
+
HDFS Reader and Writer for textual Python types such as str.
|
41
|
+
=end
|
42
|
+
TEXT_RW_METHODS = DataMetaByteSer::RwHolder.new(
|
43
|
+
lambda{|ctx|
|
44
|
+
ctx.fld.aggr ? ctx.rw.call("DataMetaHadoopUtil.read#{aggrPyFull(ctx.fld.aggr)}String(di)") : ctx.rw.call('DataMetaHadoopUtil.readText(di)')
|
45
|
+
},
|
46
|
+
lambda{|ctx|
|
47
|
+
ctx.fld.aggr ? "DataMetaHadoopUtil.write#{aggrPyFull(ctx.fld.aggr)}String(do, val.#{ctx.valGetter})" : "DataMetaHadoopUtil.writeTextIfAny(do, val.#{ctx.valGetter})"
|
48
|
+
}
|
49
|
+
)
|
50
|
+
|
51
|
+
=begin rdoc
|
52
|
+
HDFS Reader and Writer for integral Python type.
|
53
|
+
=end
|
54
|
+
INTEGRAL_RW_METHODS = RwHolder.new(
|
55
|
+
lambda{ |ctx|
|
56
|
+
mapsNotSupported(ctx.fld) if ctx.fld.trgType # map
|
57
|
+
case
|
58
|
+
when ctx.fType.length <= 4; ctx.fld.aggr ? ctx.rw.call("DataMetaHadoopUtil.read#{aggrPyFull(ctx.fld.aggr)}Integer(di)") :
|
59
|
+
ctx.rw.call('WritableUtils.readVInt(di)')
|
60
|
+
|
61
|
+
when ctx.fType.length <= 8; ; ctx.fld.aggr ? ctx.rw.call("DataMetaHadoopUtil.read#{aggrPyFull(ctx.fld.aggr)}Long(di)") : ctx.rw.call('WritableUtils.readVLong(di)')
|
62
|
+
|
63
|
+
else; raise "Invalid integer field #{ctx.fld}"
|
64
|
+
end
|
65
|
+
},
|
66
|
+
lambda{ |ctx|
|
67
|
+
case
|
68
|
+
when ctx.fType.length <= 4; ctx.fld.aggr ? "DataMetaHadoopUtil.write#{aggrPyFull(ctx.fld.aggr)}Integer(do, val.#{ctx.valGetter})" :
|
69
|
+
"WritableUtils.writeVInt(do, val.#{ctx.valGetter})"
|
70
|
+
|
71
|
+
when ctx.fType.length <= 8; ctx.fld.aggr ? "DataMetaHadoopUtil.write#{aggrPyFull(ctx.fld.aggr)}Long(do, val.#{ctx.valGetter})" : "WritableUtils.writeVLong(do, val.#{ctx.valGetter})"
|
72
|
+
|
73
|
+
else; raise "Invalid integer field #{ctx.fld}"
|
74
|
+
end
|
75
|
+
})
|
76
|
+
|
77
|
+
=begin rdoc
|
78
|
+
HDFS Reader and Writer for Booleans.
|
79
|
+
=end
|
80
|
+
BOOLEAN_RW_METHODS = RwHolder.new(
|
81
|
+
lambda{|ctx|
|
82
|
+
mapsNotSupported(ctx.fld) if ctx.fld.trgType # map
|
83
|
+
ctx.fld.aggr ? ctx.rw.call("DataMetaHadoopUtil.read#{aggrPyFull(ctx.fld.aggr)}Boolean(di)") : ctx.rw.call('di.readBoolean()')
|
84
|
+
},
|
85
|
+
lambda{|ctx|
|
86
|
+
mapsNotSupported(ctx.fld) if ctx.fld.trgType # map
|
87
|
+
ctx.fld.aggr ? "DataMetaHadoopUtil.write#{aggrPyFull(ctx.fld.aggr)}Boolean(do, val.#{ctx.valGetter})" : "do.writeBoolean(val.#{ctx.valGetter})"
|
88
|
+
})
|
89
|
+
|
90
|
+
# Python has no primitivable types
|
91
|
+
PRIMITIVABLE_TYPES = Set.new
|
92
|
+
|
93
|
+
=begin rdoc
|
94
|
+
HDFS Reader and Writer for floating point types.
|
95
|
+
=end
|
96
|
+
FLOAT_RW_METHODS = RwHolder.new(
|
97
|
+
lambda{|ctx|
|
98
|
+
mapsNotSupported(ctx.fld) if ctx.fld.trgType # map
|
99
|
+
case
|
100
|
+
when ctx.fType.length <= 4; ctx.fld.aggr ? ctx.rw.call("DataMetaHadoopUtil.read#{aggrPyFull(ctx.fld.aggr)}Float(di)") : ctx.rw.call('di.readFloat()')
|
101
|
+
when ctx.fType.length <= 8; ctx.fld.aggr ? ctx.rw.call("DataMetaHadoopUtil.read#{aggrPyFull(ctx.fld.aggr)}Double(di)") : ctx.rw.call('di.readDouble()')
|
102
|
+
else; raise "Invalid float field #{ctx.fld}"
|
103
|
+
end
|
104
|
+
},
|
105
|
+
lambda{|ctx|
|
106
|
+
mapsNotSupported(ctx.fld) if ctx.fld.trgType # map
|
107
|
+
case
|
108
|
+
when ctx.fType.length <= 4; ctx.fld.aggr ? "DataMetaHadoopUtil.write#{aggrPyFull(ctx.fld.aggr)}Float(do, val.#{ctx.valGetter})" : "do.writeFloat(val.#{ctx.valGetter})"
|
109
|
+
when ctx.fType.length <= 8; ctx.fld.aggr ? "DataMetaHadoopUtil.write#{aggrPyFull(ctx.fld.aggr)}Double(do, val.#{ctx.valGetter})" : "do.writeDouble(val.#{ctx.valGetter})"
|
110
|
+
else; raise "Invalid float field #{ctx.fld}"
|
111
|
+
end
|
112
|
+
})
|
113
|
+
|
114
|
+
=begin rdoc
|
115
|
+
HDFS Reader and Writer for the temporal type, the DateTime
|
116
|
+
=end
|
117
|
+
DTTM_RW_METHODS = RwHolder.new(
|
118
|
+
lambda { |ctx|
|
119
|
+
ctx.fld.aggr ? ctx.rw.call("DataMetaHadoopUtil.read#{aggrPyFull(ctx.fld.aggr)}DateTime(di)") : ctx.rw.call('DataMetaHadoopUtil.readDttm(di)')
|
120
|
+
},
|
121
|
+
lambda { |ctx|
|
122
|
+
ctx.fld.aggr ? "DataMetaHadoopUtil.write#{aggrPyFull(ctx.fld.aggr)}DateTime(do, val.#{ctx.valGetter})" : "DataMetaHadoopUtil.writeDttm(do, val.#{ctx.valGetter})"
|
123
|
+
}
|
124
|
+
)
|
125
|
+
=begin rdoc
|
126
|
+
HDFS Reader and Writer the variable size Decimal data type.
|
127
|
+
=end
|
128
|
+
NUMERIC_RW_METHODS = RwHolder.new(lambda{|ctx| ctx.fld.aggr ? ctx.rw.call("DataMetaHadoopUtil.read#{aggrPyFull(ctx.fld.aggr)}BigDecimal(di)") : ctx.rw.call('DataMetaHadoopUtil.readBigDecimal(di)')},
|
129
|
+
lambda{|ctx| "DataMetaHadoopUtil.writeBigDecimal(do, val.#{ctx.valGetter})"})
|
130
|
+
|
131
|
+
# Full name of a Py aggregate for the given DataMeta DOM aggregate
|
132
|
+
def aggrPyFull(aggr)
|
133
|
+
case aggr
|
134
|
+
when DataMetaDom::Field::LIST
|
135
|
+
'List'
|
136
|
+
when DataMetaDom::Field::SET
|
137
|
+
'Set'
|
138
|
+
when DataMetaDom::Field::DEQUE
|
139
|
+
'Deque' # note this is different from Java
|
140
|
+
else
|
141
|
+
raise ArgumentError, "Aggregate type #{aggr} not supported for Python serialization"
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
=begin rdoc
|
146
|
+
HDFS Reader and Writer the Java Enums.
|
147
|
+
=end
|
148
|
+
ENUM_RW_METHODS = RwHolder.new(
|
149
|
+
lambda{|ctx|
|
150
|
+
aggrNotSupported(ctx.fld, 'Enums') if ctx.fld.aggr
|
151
|
+
_, s = DataMetaDom.splitNameSpace(ctx.fType.type)
|
152
|
+
"#{s}(WritableUtils.readVInt(di) + 1)" # Python starts their enums from 1 - we save it starting from 0
|
153
|
+
# as Java and Scala does
|
154
|
+
},
|
155
|
+
lambda { |ctx|
|
156
|
+
aggrNotSupported(ctx.fld, 'Enums') if ctx.fld.aggr
|
157
|
+
# Python starts their enums from 1 - we save it starting from 0 as Java and Scala
|
158
|
+
"WritableUtils.writeVInt(do, val.#{ctx.valGetter}.value - 1)"
|
159
|
+
}
|
160
|
+
)
|
161
|
+
=begin rdoc
|
162
|
+
HDFS Reader and Writer the URL.
|
163
|
+
=end
|
164
|
+
URL_RW_METHODS = RwHolder.new(
|
165
|
+
lambda { |ctx|
|
166
|
+
aggrNotSupported(ctx.fld, 'URLs') if ctx.fld.aggr
|
167
|
+
'DataMetaHadoopUtil.readText(di)'
|
168
|
+
},
|
169
|
+
lambda { |ctx|
|
170
|
+
aggrNotSupported(ctx.fld, 'URLs') if ctx.fld.aggr
|
171
|
+
"DataMetaHadoopUtil.writeTextIfAny(do, val.#{ctx.valGetter})"
|
172
|
+
}
|
173
|
+
)
|
174
|
+
# Pseudo-implementers that just raise an error
|
175
|
+
NOT_IMPLEMENTED_METHODS = RwHolder.new(
|
176
|
+
lambda { |ctx|
|
177
|
+
aggrNotSupported(ctx.fld, 'Serialization')
|
178
|
+
},
|
179
|
+
lambda { |ctx|
|
180
|
+
aggrNotSupported(ctx.fld, 'Serialization')
|
181
|
+
}
|
182
|
+
)
|
183
|
+
=begin rdoc
|
184
|
+
Read/write methods for the standard data types.
|
185
|
+
=end
|
186
|
+
STD_RW_METHODS = {
|
187
|
+
DataMetaDom::INT => INTEGRAL_RW_METHODS,
|
188
|
+
DataMetaDom::STRING => TEXT_RW_METHODS,
|
189
|
+
DataMetaDom::DATETIME => DTTM_RW_METHODS,
|
190
|
+
DataMetaDom::BOOL => BOOLEAN_RW_METHODS,
|
191
|
+
DataMetaDom::CHAR => TEXT_RW_METHODS,
|
192
|
+
DataMetaDom::FLOAT => FLOAT_RW_METHODS,
|
193
|
+
DataMetaDom::RAW => NOT_IMPLEMENTED_METHODS,
|
194
|
+
DataMetaDom::NUMERIC => NUMERIC_RW_METHODS,
|
195
|
+
DataMetaDom::URL => URL_RW_METHODS
|
196
|
+
}
|
197
|
+
# DataMeta DOM object renderer
|
198
|
+
RECORD_RW_METHODS = RwHolder.new(
|
199
|
+
lambda { |ctx|
|
200
|
+
if ctx.fld.aggr
|
201
|
+
if ctx.fld.trgType # map
|
202
|
+
mapsNotSupported(ctx.fld)
|
203
|
+
else # list, set or deque
|
204
|
+
"DataMetaHadoopUtil.read#{aggrPyFull(ctx.fld.aggr)}(di, #{
|
205
|
+
inOutablePy(ctx)}())"
|
206
|
+
end
|
207
|
+
else # scalar
|
208
|
+
"#{inOutablePy(ctx)}().read(di)"
|
209
|
+
end
|
210
|
+
},
|
211
|
+
lambda { |ctx|
|
212
|
+
if ctx.fld.aggr && !ctx.fld.trgType
|
213
|
+
if ctx.fld.trgType # map
|
214
|
+
mapsNotSupported(ctx.fld)
|
215
|
+
else # list, set or deque
|
216
|
+
"DataMetaHadoopUtil.writeCollection(val.#{ctx.valGetter}, do, #{inOutablePy(ctx)}())"
|
217
|
+
end
|
218
|
+
else # scalar
|
219
|
+
"#{inOutablePy(ctx)}().write(do, val.#{ctx.valGetter})"
|
220
|
+
end
|
221
|
+
}
|
222
|
+
)
|
223
|
+
=begin rdoc
|
224
|
+
Read/write methods for the DataMeta DOM Maps, accidentally all the same as for the standard data types.
|
225
|
+
=end
|
226
|
+
MAP_RW_METHODS = STD_RW_METHODS
|
227
|
+
|
228
|
+
# Build the Read/Write operation renderer for the given context:
|
229
|
+
def getRwRenderer(ctx)
|
230
|
+
dt = ctx.fld.dataType
|
231
|
+
ctx.refType = nil # reset to avoid misrendering primitives
|
232
|
+
rwRenderer = STD_RW_METHODS[dt.type]
|
233
|
+
return rwRenderer if rwRenderer
|
234
|
+
refKey = dt.type
|
235
|
+
ctx.refType = ctx.model.enums[refKey] || ctx.model.records[refKey]
|
236
|
+
case
|
237
|
+
when ctx.refType.kind_of?(DataMetaDom::Record)
|
238
|
+
RECORD_RW_METHODS
|
239
|
+
when ctx.refType.kind_of?(DataMetaDom::Enum)
|
240
|
+
ENUM_RW_METHODS
|
241
|
+
when ctx.refType.kind_of?(DataMetaDom::BitSet)
|
242
|
+
NOT_IMPLEMENTED_METHODS
|
243
|
+
when ctx.refType.kind_of?(DataMetaDom::Mapping)
|
244
|
+
MAP_RW_METHODS[ctx.fType.type] || (raise ArgumentError, "No renderer found for the key type #{
|
245
|
+
ctx.fType.type}, record #{ctx.rec}, field #{ctx.fld}")
|
246
|
+
else
|
247
|
+
raise "No renderer defined for field #{ctx.fld}"
|
248
|
+
end
|
249
|
+
end
|
250
|
+
|
251
|
+
# Generates one InOutable, Writables here currently are not generated
|
252
|
+
def genWritable(model, wriOut, ioOut, record, pyPackage, baseName)
|
253
|
+
enumCount = model.enums.values.select{|e| e.kind_of?(DataMetaDom::Enum)}.size
|
254
|
+
recImports = model.records.values.map{|r| # import all records
|
255
|
+
p, b, pp = DataMetaDom::PojoLexer::assertNamespace(r.name)
|
256
|
+
%|from #{DataMetaXtra::Str.downCaseFirst(b)} import #{b}|
|
257
|
+
}.join("\n")
|
258
|
+
# ioImports = model.records.values.reject{|r| r.name == record.name}.map{|r| # import all InOutables except of this one
|
259
|
+
# p, b, pp = DataMetaDom::PojoLexer::assertNamespace(r.name)
|
260
|
+
# # since one InOutable may import another which may import another, and Python can't handle this,
|
261
|
+
# # catch the error. It's harmless because if it really failed to import, we'll know
|
262
|
+
# %|
|
263
|
+
# try:
|
264
|
+
# from #{inOutablePy(DataMetaXtra::Str.downCaseFirst(b))} import #{inOutablePy(b)}
|
265
|
+
# except ImportError:
|
266
|
+
# pass|
|
267
|
+
# }.join("\n")
|
268
|
+
ctx = RendCtx.new.init(model, record, pyPackage, baseName)
|
269
|
+
fields = record.fields
|
270
|
+
wriName = nil # writableClassName(baseName)
|
271
|
+
ioName = inOutablePy(baseName)
|
272
|
+
hasOptional = fields.values.map{|f|
|
273
|
+
# !model.records[f.dataType.type] &&
|
274
|
+
!f.isRequired
|
275
|
+
}.reduce(:|) # true if there is at least one optional field which isn't a record
|
276
|
+
keysInOrder = fields.each_key.map{|k| k.to_s}.sort.map{|k| k.to_sym}
|
277
|
+
reads = ''
|
278
|
+
writes = ''
|
279
|
+
writeNullMaskHead = hasOptional ? "nullFlags = bitarray(#{fields.keys.size}); nullFlags.setall(False); fldIndex = -1" : ''
|
280
|
+
readNullMaskHead = hasOptional ? 'nullFlags = DataMetaHadoopUtil.readBitArray(di); fldIndex = -1' : ''
|
281
|
+
indent = "\n#{' ' * 8}"
|
282
|
+
# sorting provides predictable read/write order
|
283
|
+
keysInOrder.each { |k|
|
284
|
+
f = fields[k]
|
285
|
+
ctx.fld = f
|
286
|
+
rwRenderer = getRwRenderer(ctx)
|
287
|
+
reads << ( indent + (f.isRequired ? '' : "fldIndex += 1#{indent}") + "val.#{DataMetaDom.setterName(ctx.fld)}(" +
|
288
|
+
(f.isRequired ? '' : ' None if nullFlags[fldIndex] else ')+ "#{rwRenderer.r.call(ctx)})"
|
289
|
+
)
|
290
|
+
# noinspection RubyNestedTernaryOperatorsInspection
|
291
|
+
writes << (indent + (f.isRequired ?
|
292
|
+
(PRIMITIVABLE_TYPES.member?(f.dataType.type) ? '' : ''):
|
293
|
+
#%Q<if(val.#{DataMetaDom::PojoLexer::getterName(ctx.fld)}() == null) throw noReqFld("#{f.name}"); >) :
|
294
|
+
"if(val.#{DataMetaDom.getterName(ctx.fld)}() is not None): ") + "#{rwRenderer.w.call(ctx)}")
|
295
|
+
unless f.isRequired
|
296
|
+
writeNullMaskHead << (indent + "fldIndex += 1#{indent}if(val.#{DataMetaDom.getterName(ctx.fld)}() is None): nullFlags[fldIndex] = True")
|
297
|
+
end
|
298
|
+
}
|
299
|
+
writeNullMaskHead << ( indent + 'DataMetaHadoopUtil.writeBitArray(do, nullFlags)') if hasOptional
|
300
|
+
|
301
|
+
ioOut.puts <<IN_OUTABLE_CLASS
|
302
|
+
|
303
|
+
class #{ioName}(InOutable):
|
304
|
+
|
305
|
+
def write(self, do, val):
|
306
|
+
val.verify()
|
307
|
+
#{writeNullMaskHead}
|
308
|
+
#{writes}
|
309
|
+
|
310
|
+
def readVal(self, di, val):
|
311
|
+
#{readNullMaskHead}
|
312
|
+
#{reads}
|
313
|
+
return val
|
314
|
+
|
315
|
+
def read(self, di):
|
316
|
+
return self.readVal(di, #{baseName}())
|
317
|
+
|
318
|
+
IN_OUTABLE_CLASS
|
319
|
+
end
|
320
|
+
|
321
|
+
=begin rdoc
|
322
|
+
Generates all the writables for the given model.
|
323
|
+
Parameters:
|
324
|
+
* +model+ - the model to generate Writables from.
|
325
|
+
* +outRoot+ - destination directory name.
|
326
|
+
=end
|
327
|
+
def genWritables(model, outRoot)
|
328
|
+
firstRecord = model.records.values.first
|
329
|
+
pyPackage, base, packagePath = DataMetaDom::PojoLexer::assertNamespace(firstRecord.name)
|
330
|
+
# Next: replace dots with underscores.The path also adjusted accordingly.
|
331
|
+
#
|
332
|
+
# Rationale for this, quoting PEP 8:
|
333
|
+
#
|
334
|
+
# Package and Module Names
|
335
|
+
#
|
336
|
+
# Modules should have short, all-lowercase names. Underscores can be used in the module name if it improves
|
337
|
+
# readability. Python packages should also have short, all-lowercase names, although the use of underscores
|
338
|
+
# is discouraged.
|
339
|
+
#
|
340
|
+
# Short and all-lowercase names, and improving readability if you have complex system and need long package names,
|
341
|
+
# is "discouraged". Can't do this here, our system is more complicated for strictly religous, "pythonic" Python.
|
342
|
+
# A tool must be enabling, and in this case, this irrational ruling gets in the way.
|
343
|
+
# And dots are a no-no, Python can't find packages with complicated package structures and imports.
|
344
|
+
#
|
345
|
+
# Hence, we opt for long package names with underscores for distinctiveness and readability:
|
346
|
+
pyPackage = pyPackage.gsub('.', '_')
|
347
|
+
packagePath = packagePath.gsub('/', '_')
|
348
|
+
destDir = File.join(outRoot, packagePath)
|
349
|
+
FileUtils.mkdir_p destDir
|
350
|
+
wriOut = nil # File.open(File.join(destDir, "#{writableClassName(base)}.py"), 'wb')
|
351
|
+
serFile = File.join(destDir, 'serial.py')
|
352
|
+
FileUtils.rm serFile if File.file?(serFile)
|
353
|
+
ioOut = File.open(serFile, 'wb') # one huge serialization file
|
354
|
+
ioOut.puts %|# This file is generated by DataMeta DOM. Do not edit manually!
|
355
|
+
#package #{pyPackage}
|
356
|
+
|
357
|
+
from hadoop.io import WritableUtils, InputStream, OutputStream, Text
|
358
|
+
from ebay_datameta_core.base import DateTime
|
359
|
+
from decimal import *
|
360
|
+
from collections import *
|
361
|
+
from bitarray import bitarray
|
362
|
+
from ebay_datameta_hadoop.base import *
|
363
|
+
from model import *
|
364
|
+
|
365
|
+
|
|
366
|
+
begin
|
367
|
+
model.records.values.each { |e|
|
368
|
+
_, base, _ = DataMetaDom::PojoLexer::assertNamespace(e.name)
|
369
|
+
case
|
370
|
+
when e.kind_of?(DataMetaDom::Record)
|
371
|
+
genWritable model, wriOut, ioOut, e, pyPackage, base
|
372
|
+
else
|
373
|
+
raise "Unsupported Entity: #{e.inspect}"
|
374
|
+
end
|
375
|
+
}
|
376
|
+
ensure
|
377
|
+
begin
|
378
|
+
ioOut.close
|
379
|
+
ensure
|
380
|
+
#wriOut.close
|
381
|
+
end
|
382
|
+
end
|
383
|
+
end
|
384
|
+
module_function :genWritables, :genWritable, :inOutablePy, :writableClassName, :mapsNotSupported,
|
385
|
+
:aggrNotSupported, :getRwRenderer, :aggrPyFull
|
386
|
+
end
|
387
|
+
end
|
@@ -0,0 +1,138 @@
|
|
1
|
+
$:.unshift(File.dirname(__FILE__)) unless $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
|
2
|
+
|
3
|
+
require 'set'
|
4
|
+
require 'logger'
|
5
|
+
|
6
|
+
module DataMetaByteSer
|
7
|
+
|
8
|
+
=begin rdoc
|
9
|
+
A holder for a read renderer and a write renderer, those come in pairs that have to be consistent so the
|
10
|
+
data is read and written uniformly.
|
11
|
+
=end
|
12
|
+
class RwHolder
|
13
|
+
=begin rdoc
|
14
|
+
Read renderer.
|
15
|
+
=end
|
16
|
+
attr_reader :r
|
17
|
+
=begin rdoc
|
18
|
+
Write renderer.
|
19
|
+
=end
|
20
|
+
attr_reader :w
|
21
|
+
=begin rdoc
|
22
|
+
Creates a new HDFS Reade and Write renderers pair.
|
23
|
+
=end
|
24
|
+
def initialize(readRenderer, writeRenderer); @r = readRenderer; @w = writeRenderer end
|
25
|
+
end
|
26
|
+
|
27
|
+
=begin rdoc
|
28
|
+
Rendering context with rendering-related properties and settings.
|
29
|
+
=end
|
30
|
+
class RendCtx
|
31
|
+
|
32
|
+
=begin rdoc
|
33
|
+
DataMeta DOM Model on the context.
|
34
|
+
=end
|
35
|
+
attr_accessor :model
|
36
|
+
=begin rdoc
|
37
|
+
Record currently worked on.
|
38
|
+
=end
|
39
|
+
attr_accessor :rec
|
40
|
+
|
41
|
+
=begin rdoc
|
42
|
+
Set of imports if any, each as symbol.
|
43
|
+
=end
|
44
|
+
attr_accessor :imps
|
45
|
+
|
46
|
+
=begin rdoc
|
47
|
+
Java package.
|
48
|
+
=end
|
49
|
+
attr_accessor :pckg
|
50
|
+
=begin rdoc
|
51
|
+
Base name of the type, without a namespace.
|
52
|
+
=end
|
53
|
+
attr_accessor :baseName
|
54
|
+
=begin rdoc
|
55
|
+
The data type of the entity on the context.
|
56
|
+
=end
|
57
|
+
attr_accessor :refType
|
58
|
+
=begin rdoc
|
59
|
+
Field currently on the context.
|
60
|
+
=end
|
61
|
+
attr_reader :fld
|
62
|
+
|
63
|
+
=begin rdoc
|
64
|
+
Creates a new context.
|
65
|
+
=end
|
66
|
+
def initialize; @imps = Set.new end
|
67
|
+
|
68
|
+
=begin rdoc
|
69
|
+
Setter for the field on the context, the field currently worked on.
|
70
|
+
=end
|
71
|
+
def fld=(val); @fld = val end
|
72
|
+
|
73
|
+
=begin rdoc
|
74
|
+
Initialize the context with the model, the record, the package and the basename.
|
75
|
+
Returns self for call chaining.
|
76
|
+
=end
|
77
|
+
def init(model, rec, pckg, baseName); @model = model; @rec = rec; @pckg = pckg; @baseName = baseName; self end
|
78
|
+
|
79
|
+
=begin rdoc
|
80
|
+
Add an import to the context, returns self for call chaining.
|
81
|
+
=end
|
82
|
+
def <<(import)
|
83
|
+
@imps << import.to_sym if import
|
84
|
+
self
|
85
|
+
end
|
86
|
+
|
87
|
+
=begin rdoc
|
88
|
+
Formats imports into Java source, sorted.
|
89
|
+
=end
|
90
|
+
def importsText
|
91
|
+
@imps.to_a.map{|k| "import #{k};"}.sort.join("\n")
|
92
|
+
end
|
93
|
+
|
94
|
+
=begin rdoc
|
95
|
+
Determines if the refType is a DataMetaDom::Mapping.
|
96
|
+
=end
|
97
|
+
def isMapping
|
98
|
+
@refType.kind_of?(DataMetaDom::Mapping) && !@refType.kind_of?(DataMetaDom::BitSet)
|
99
|
+
end
|
100
|
+
|
101
|
+
# Effective field type
|
102
|
+
def fType
|
103
|
+
isMapping ? @refType.fromT : @fld.dataType
|
104
|
+
end
|
105
|
+
|
106
|
+
# Readwrap
|
107
|
+
def rw
|
108
|
+
isMapping ? lambda{|t| "new #{condenseType(@fld.dataType.type, self)}(#{t})"} : lambda{|t| t}
|
109
|
+
end
|
110
|
+
|
111
|
+
=begin rdoc
|
112
|
+
Getter name for the current field, if the type is Mapping, includes <tt>.getKey()</tt> too.
|
113
|
+
=end
|
114
|
+
def valGetter
|
115
|
+
"#{DataMetaDom.getterName(@fld)}()" + ( isMapping ? '.getKey()' : '')
|
116
|
+
end
|
117
|
+
end # RendCtx
|
118
|
+
|
119
|
+
=begin rdoc
|
120
|
+
Builds a class name for a Writable.
|
121
|
+
=end
|
122
|
+
def writableClassName(baseName); "#{baseName}_Writable" end
|
123
|
+
|
124
|
+
=begin rdoc
|
125
|
+
Builds a class name for a InOutable.
|
126
|
+
=end
|
127
|
+
def inOutableClassName(baseName); "#{baseName}_InOutable" end
|
128
|
+
|
129
|
+
def mapsNotSupported(fld)
|
130
|
+
raise ArgumentError, "Field #{fld.name}: maps are not currently supported on Hadoop layer"
|
131
|
+
end
|
132
|
+
|
133
|
+
def aggrNotSupported(fld, forWhat)
|
134
|
+
raise ArgumentError, "Field #{fld.name}: aggregate types are not supported for #{forWhat} on Hadoop layer"
|
135
|
+
end
|
136
|
+
|
137
|
+
module_function :writableClassName, :inOutableClassName, :mapsNotSupported, :aggrNotSupported
|
138
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
$:.unshift(File.dirname(__FILE__)) unless $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
|
2
|
+
|
3
|
+
require 'dataMetaDom/field'
|
4
|
+
require 'dataMetaDom/pojo'
|
5
|
+
|
6
|
+
module DataMetaByteSer
|
7
|
+
=begin rdoc
|
8
|
+
Migration tooling.
|
9
|
+
|
10
|
+
=end
|
11
|
+
module VerReads
|
12
|
+
include DataMetaDom, DataMetaDom::PojoLexer
|
13
|
+
=begin
|
14
|
+
Generates Versioned Read switch that channels the read to the proper migration scenario.
|
15
|
+
=end
|
16
|
+
def genVerReadSwitch(v1, v2, modelForVer, vers, outRoot)
|
17
|
+
# v1 = mo1.records.values.first.ver.full
|
18
|
+
# v2 = mo2.records.values.first.ver.full
|
19
|
+
mo1 = modelForVer.call(v1)
|
20
|
+
mo2 = modelForVer.call(v2)
|
21
|
+
destDir = outRoot
|
22
|
+
javaPackage = '' # set the scope for the var
|
23
|
+
vars = OpenStruct.new # for template's local variables. ERB does not make them visible to the binding
|
24
|
+
# sort the models by versions out, 2nd to be the latest:
|
25
|
+
raise ArgumentError, "Versions on the model are the same: #{v1}" if v1 == v2
|
26
|
+
if v1 > v2
|
27
|
+
model2 = mo1
|
28
|
+
model1 = mo2
|
29
|
+
ver1 = v2
|
30
|
+
ver2 = v1
|
31
|
+
else
|
32
|
+
model2 = mo2
|
33
|
+
model1 = mo1
|
34
|
+
ver1 = v1
|
35
|
+
ver2 = v2
|
36
|
+
end
|
37
|
+
puts "Going from ver #{ver1} to #{ver2}"
|
38
|
+
trgE = model2.records.values.first
|
39
|
+
javaPackage, baseName, packagePath = assertNamespace(trgE.name)
|
40
|
+
javaClassName = "Read__Switch_v#{ver1.toVarName}_to_v#{ver2.toVarName}"
|
41
|
+
destDir = File.join(outRoot, packagePath)
|
42
|
+
FileUtils.mkdir_p destDir
|
43
|
+
IO::write(File.join(destDir, "#{javaClassName}.java"),
|
44
|
+
ERB.new(IO.read(File.join(File.dirname(__FILE__), '../../tmpl/readSwitch.erb')),
|
45
|
+
$SAFE, '%<>').result(binding), mode: 'wb')
|
46
|
+
end
|
47
|
+
module_function :genVerReadSwitch
|
48
|
+
end
|
49
|
+
end
|
@@ -0,0 +1,391 @@
|
|
1
|
+
$:.unshift(File.dirname(__FILE__)) unless $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
|
2
|
+
|
3
|
+
# Definition for generating Plain Old Java Objects (POJOs)
|
4
|
+
%w(fileutils dataMetaDom dataMetaDom/pojo dataMetaDom/enum dataMetaDom/record dataMetaDom/help).each(&method(:require))
|
5
|
+
require 'set'
|
6
|
+
require 'dataMetaByteSer/util'
|
7
|
+
|
8
|
+
=begin rdoc
|
9
|
+
Serialization artifacts generation such as Hadoop Writables etc.
|
10
|
+
|
11
|
+
TODO this isn't a bad way, but beter use templating next time such as {ERB}[http://ruby-doc.org/stdlib-1.9.3/libdoc/erb/rdoc/ERB.html].
|
12
|
+
|
13
|
+
For command line details either check the new method's source or the README.rdoc file, the usage section.
|
14
|
+
=end
|
15
|
+
module DataMetaByteSer
|
16
|
+
# Current version
|
17
|
+
VERSION = '1.0.0'
|
18
|
+
include DataMetaDom, DataMetaDom::PojoLexer
|
19
|
+
|
20
|
+
=begin rdoc
|
21
|
+
HDFS Reader and Writer for textual Java types such as String.
|
22
|
+
=end
|
23
|
+
TEXT_RW_METHODS = RwHolder.new(
|
24
|
+
lambda{|ctx|
|
25
|
+
ctx.fld.aggr ? ctx.rw.call("read#{aggrBaseName(aggrJavaFull(ctx.fld.aggr))}String(in)") : ctx.rw.call('readText(in)')
|
26
|
+
},
|
27
|
+
lambda{|ctx|
|
28
|
+
ctx.fld.aggr ? "write#{aggrBaseName(aggrJavaFull(ctx.fld.aggr))}String(out, val.#{ctx.valGetter})" : "writeTextIfAny(out, val.#{ctx.valGetter})"
|
29
|
+
}
|
30
|
+
)
|
31
|
+
|
32
|
+
=begin rdoc
|
33
|
+
HDFS Reader and Writer for integral Java types such as Integer or Long.
|
34
|
+
=end
|
35
|
+
INTEGRAL_RW_METHODS = RwHolder.new(
|
36
|
+
lambda{ |ctx|
|
37
|
+
mapsNotSupported(ctx.fld) if ctx.fld.trgType # map
|
38
|
+
case
|
39
|
+
when ctx.fType.length <= 4; ctx.fld.aggr ? ctx.rw.call("read#{aggrBaseName(aggrJavaFull(ctx.fld.aggr))}Integer(in)") :
|
40
|
+
ctx.rw.call('readVInt(in)')
|
41
|
+
|
42
|
+
when ctx.fType.length <= 8; ; ctx.fld.aggr ? ctx.rw.call("read#{aggrBaseName(aggrJavaFull(ctx.fld.aggr))}Long(in)") : ctx.rw.call('readVLong(in)')
|
43
|
+
|
44
|
+
else; raise "Invalid integer field #{ctx.fld}"
|
45
|
+
end
|
46
|
+
},
|
47
|
+
lambda{ |ctx|
|
48
|
+
case
|
49
|
+
when ctx.fType.length <= 4; ctx.fld.aggr ? "write#{aggrBaseName(aggrJavaFull(ctx.fld.aggr))}Integer(out, val.#{ctx.valGetter})" :
|
50
|
+
"writeVInt(out, val.#{ctx.valGetter})"
|
51
|
+
|
52
|
+
when ctx.fType.length <= 8; ctx.fld.aggr ? "write#{aggrBaseName(aggrJavaFull(ctx.fld.aggr))}Long(out, val.#{ctx.valGetter})" : "writeVLong(out, val.#{ctx.valGetter})"
|
53
|
+
|
54
|
+
else; raise "Invalid integer field #{ctx.fld}"
|
55
|
+
end
|
56
|
+
})
|
57
|
+
|
58
|
+
=begin rdoc
|
59
|
+
HDFS Reader and Writer for floating point Java types such as Float or Double.
|
60
|
+
=end
|
61
|
+
FLOAT_RW_METHODS = RwHolder.new(
|
62
|
+
lambda{|ctx|
|
63
|
+
mapsNotSupported(ctx.fld) if ctx.fld.trgType # map
|
64
|
+
case
|
65
|
+
when ctx.fType.length <= 4; ctx.fld.aggr ? ctx.rw.call("read#{aggrBaseName(aggrJavaFull(ctx.fld.aggr))}Float(in)") : ctx.rw.call('in.readFloat()')
|
66
|
+
when ctx.fType.length <= 8; ctx.fld.aggr ? ctx.rw.call("read#{aggrBaseName(aggrJavaFull(ctx.fld.aggr))}Double(in)") : ctx.rw.call('in.readDouble()')
|
67
|
+
else; raise "Invalid float field #{ctx.fld}"
|
68
|
+
end
|
69
|
+
},
|
70
|
+
lambda{|ctx|
|
71
|
+
case
|
72
|
+
when ctx.fType.length <= 4; ctx.fld.aggr ? "write#{aggrBaseName(aggrJavaFull(ctx.fld.aggr))}Float(out, val.#{ctx.valGetter})" : "out.writeFloat(val.#{ctx.valGetter})"
|
73
|
+
when ctx.fType.length <= 8; ctx.fld.aggr ? "write#{aggrBaseName(aggrJavaFull(ctx.fld.aggr))}Double(out, val.#{ctx.valGetter})" : "out.writeDouble(val.#{ctx.valGetter})"
|
74
|
+
else; raise "Invalid float field #{ctx.fld}"
|
75
|
+
end
|
76
|
+
})
|
77
|
+
|
78
|
+
=begin rdoc
|
79
|
+
HDFS Reader and Writer for the temporal type, the DateTime
|
80
|
+
=end
|
81
|
+
DTTM_RW_METHODS = RwHolder.new(
|
82
|
+
lambda { |ctx|
|
83
|
+
ctx.fld.aggr ? ctx.rw.call("read#{aggrBaseName(aggrJavaFull(ctx.fld.aggr))}ZonedDateTime(in)") : ctx.rw.call('readDttm(in)')
|
84
|
+
},
|
85
|
+
lambda { |ctx|
|
86
|
+
ctx.fld.aggr ? "write#{aggrBaseName(aggrJavaFull(ctx.fld.aggr))}ZonedDateTime(out, val.#{ctx.valGetter})" : "writeDttm(out, val.#{ctx.valGetter})"
|
87
|
+
}
|
88
|
+
)
|
89
|
+
|
90
|
+
=begin rdoc
|
91
|
+
HDFS Reader and Writer for boolean Java type.
|
92
|
+
=end
|
93
|
+
BOOL_RW_METHODS = RwHolder.new(
|
94
|
+
lambda { |ctx|
|
95
|
+
aggrNotSupported(ctx.fld, 'Booleans') if ctx.fld.aggr
|
96
|
+
ctx.rw.call('in.readBoolean()')
|
97
|
+
},
|
98
|
+
lambda { |ctx|
|
99
|
+
aggrNotSupported(ctx.fld, 'Booleans') if ctx.fld.aggr
|
100
|
+
"out.writeBoolean(val.#{ctx.valGetter})"
|
101
|
+
}
|
102
|
+
)
|
103
|
+
|
104
|
+
=begin rdoc
|
105
|
+
HDFS Reader and Writer the raw data type, the byte array.
|
106
|
+
=end
|
107
|
+
RAW_RW_METHODS = RwHolder.new(
|
108
|
+
lambda { |ctx|
|
109
|
+
aggrNotSupported(ctx.fld, 'Raw Data') if ctx.fld.aggr
|
110
|
+
ctx.rw.call('readByteArray(in)')
|
111
|
+
},
|
112
|
+
lambda { |ctx|
|
113
|
+
aggrNotSupported(ctx.fld, 'Raw Data') if ctx.fld.aggr
|
114
|
+
"writeByteArray(out, val.#{ctx.valGetter})" }
|
115
|
+
)
|
116
|
+
|
117
|
+
=begin rdoc
|
118
|
+
HDFS Reader and Writer the variable size Decimal data type.
|
119
|
+
=end
|
120
|
+
NUMERIC_RW_METHODS = RwHolder.new(lambda{|ctx| ctx.fld.aggr ? ctx.rw.call("read#{aggrBaseName(aggrJavaFull(ctx.fld.aggr))}BigDecimal(in)") : ctx.rw.call('readBigDecimal(in)')},
|
121
|
+
lambda{|ctx| "writeBigDecimal(out, val.#{ctx.valGetter})"})
|
122
|
+
|
123
|
+
=begin rdoc
|
124
|
+
HDFS Reader and Writer the Java Enums.
|
125
|
+
=end
|
126
|
+
ENUM_RW_METHODS = RwHolder.new(
|
127
|
+
lambda{|ctx|
|
128
|
+
aggrNotSupported(ctx.fld, 'Enums') if ctx.fld.aggr
|
129
|
+
"#{condenseType(ctx.fType.type, ctx.pckg)}.forOrd(readVInt(in))"
|
130
|
+
},
|
131
|
+
lambda { |ctx|
|
132
|
+
aggrNotSupported(ctx.fld, 'Enums') if ctx.fld.aggr
|
133
|
+
"writeVInt(out, val.#{ctx.valGetter}.ordinal())"
|
134
|
+
}
|
135
|
+
)
|
136
|
+
|
137
|
+
=begin rdoc
|
138
|
+
HDFS Reader and Writer the BitSet.
|
139
|
+
=end
|
140
|
+
BITSET_RW_METHODS = RwHolder.new(
|
141
|
+
lambda { |ctx|
|
142
|
+
aggrNotSupported(ctx.fld, 'BitSets') if ctx.fld.aggr
|
143
|
+
"new #{condenseType(ctx.fld.dataType, ctx.pckg)}(readLongArray(in))"
|
144
|
+
},
|
145
|
+
lambda { |ctx|
|
146
|
+
aggrNotSupported(ctx.fld, 'BitSets') if ctx.fld.aggr
|
147
|
+
"writeBitSet(out, val.#{ctx.valGetter})"
|
148
|
+
}
|
149
|
+
)
|
150
|
+
|
151
|
+
=begin rdoc
|
152
|
+
HDFS Reader and Writer the URL.
|
153
|
+
=end
|
154
|
+
URL_RW_METHODS = RwHolder.new(
|
155
|
+
lambda { |ctx|
|
156
|
+
aggrNotSupported(ctx.fld, 'URLs') if ctx.fld.aggr
|
157
|
+
'new java.net.URL(readText(in))'
|
158
|
+
},
|
159
|
+
lambda { |ctx|
|
160
|
+
aggrNotSupported(ctx.fld, 'URLs') if ctx.fld.aggr
|
161
|
+
"writeTextIfAny(out, val.#{ctx.valGetter}.toExternalForm())"
|
162
|
+
}
|
163
|
+
)
|
164
|
+
=begin rdoc
|
165
|
+
Read/write methods for the standard data types.
|
166
|
+
=end
|
167
|
+
STD_RW_METHODS = {
|
168
|
+
INT => INTEGRAL_RW_METHODS,
|
169
|
+
STRING => TEXT_RW_METHODS,
|
170
|
+
DATETIME => DTTM_RW_METHODS,
|
171
|
+
BOOL => BOOL_RW_METHODS,
|
172
|
+
CHAR => TEXT_RW_METHODS,
|
173
|
+
FLOAT => FLOAT_RW_METHODS,
|
174
|
+
RAW => RAW_RW_METHODS,
|
175
|
+
NUMERIC => NUMERIC_RW_METHODS,
|
176
|
+
URL => URL_RW_METHODS
|
177
|
+
}
|
178
|
+
# DataMeta DOM object renderer
|
179
|
+
RECORD_RW_METHODS = RwHolder.new(
|
180
|
+
lambda { |ctx|
|
181
|
+
if ctx.fld.aggr
|
182
|
+
if ctx.fld.trgType # map
|
183
|
+
mapsNotSupported(ctx.fld)
|
184
|
+
else # list, set or deque
|
185
|
+
"read#{aggrBaseName(aggrJavaFull(ctx.fld.aggr))}(in, #{
|
186
|
+
inOutableClassName(condenseType(ctx.fType.type, ctx.pckg))}.getInstance())"
|
187
|
+
end
|
188
|
+
else # scalar
|
189
|
+
"#{inOutableClassName(condenseType(ctx.fType.type, ctx.pckg))}.getInstance().read(in)"
|
190
|
+
end
|
191
|
+
},
|
192
|
+
lambda { |ctx|
|
193
|
+
if ctx.fld.aggr && !ctx.fld.trgType
|
194
|
+
if ctx.fld.trgType # map
|
195
|
+
mapsNotSupported(ctx.fld)
|
196
|
+
else # list, set or deque
|
197
|
+
"writeCollection(val.#{ctx.valGetter}, out, #{inOutableClassName(condenseType(ctx.fType.type, ctx.pckg))}.getInstance())"
|
198
|
+
end
|
199
|
+
else # scalar
|
200
|
+
"#{inOutableClassName(condenseType(ctx.fType.type, ctx.pckg))}.getInstance().write(out, val.#{ctx.valGetter})"
|
201
|
+
end
|
202
|
+
}
|
203
|
+
)
|
204
|
+
|
205
|
+
# Transforms the given DataMeta DOM aggregate type to full pathed Java class name
|
206
|
+
def aggrJavaFull(aggr)
|
207
|
+
PojoLexer::AGGR_CLASSES[aggr] || (raise ArgumentError, "No Aggregate classes for type #{aggr}" )
|
208
|
+
end
|
209
|
+
|
210
|
+
# Transforms the given full Java name for the aggregate class into base name to interpolate into methods
|
211
|
+
def aggrBaseName(aggr)
|
212
|
+
/^(\w+\.)+(\w+)$/.match(aggr)[2]
|
213
|
+
end
|
214
|
+
=begin rdoc
|
215
|
+
Read/write methods for the DataMeta DOM Maps, accidentally all the same as for the standard data types.
|
216
|
+
=end
|
217
|
+
MAP_RW_METHODS = STD_RW_METHODS
|
218
|
+
|
219
|
+
# Build the Read/Write operation renderer for the given context:
|
220
|
+
def getRwRenderer(ctx)
|
221
|
+
dt = ctx.fld.dataType
|
222
|
+
ctx.refType = nil # reset to avoid misrendering primitives
|
223
|
+
rwRenderer = STD_RW_METHODS[dt.type]
|
224
|
+
return rwRenderer if rwRenderer
|
225
|
+
refKey = dt.type
|
226
|
+
ctx.refType = ctx.model.enums[refKey] || ctx.model.records[refKey]
|
227
|
+
case
|
228
|
+
when ctx.refType.kind_of?(DataMetaDom::Record)
|
229
|
+
RECORD_RW_METHODS
|
230
|
+
when ctx.refType.kind_of?(DataMetaDom::Enum)
|
231
|
+
ENUM_RW_METHODS
|
232
|
+
when ctx.refType.kind_of?(DataMetaDom::BitSet)
|
233
|
+
BITSET_RW_METHODS
|
234
|
+
when ctx.refType.kind_of?(DataMetaDom::Mapping)
|
235
|
+
MAP_RW_METHODS[ctx.fType.type] || (raise ArgumentError, "No renderer found for the key type #{
|
236
|
+
ctx.fType.type}, record #{ctx.rec}, field #{ctx.fld}")
|
237
|
+
else
|
238
|
+
raise "No renderer defined for field #{ctx.fld}"
|
239
|
+
end
|
240
|
+
end
|
241
|
+
|
242
|
+
# Temporary/scratch var -- avoiding collisions at all costs
|
243
|
+
def tmpVar(name); "#{'_'*3}#{name}#{'_'*3}" end
|
244
|
+
|
245
|
+
# generates writable via delegation
|
246
|
+
def genWritable(model, wriOut, ioOut, record, javaPackage, baseName)
|
247
|
+
ctx = RendCtx.new.init(model, record, javaPackage, baseName)
|
248
|
+
fields = record.fields
|
249
|
+
wriName = writableClassName(baseName)
|
250
|
+
ioName = inOutableClassName(baseName)
|
251
|
+
# scan for imports needed
|
252
|
+
hasOptional = fields.values.map{|f|
|
253
|
+
# !model.records[f.dataType.type] &&
|
254
|
+
!f.isRequired
|
255
|
+
}.reduce(:|) # true if there is at least one optional field which isn't a record
|
256
|
+
#fields.values.each { |f|
|
257
|
+
# ctx << DataMetaDom::PojoLexer::JAVA_IMPORTS[f.dataType.type]
|
258
|
+
#}
|
259
|
+
|
260
|
+
# field keys (names) in the order of reading/writing to the in/out record
|
261
|
+
keysInOrder = fields.each_key.map{|k| k.to_s}.sort.map{|k| k.to_sym}
|
262
|
+
reads = ''
|
263
|
+
writes = ''
|
264
|
+
writeNullMaskHead = hasOptional ? 'final BitSet nullFlags = new BitSet(); int fldIndex = -1;' : ''
|
265
|
+
readNullMaskHead = hasOptional ? 'final BitSet nullFlags = new BitSet(readLongArray(in), false); int fldIndex = -1;' : ''
|
266
|
+
indent = "\n#{' ' * 8}"
|
267
|
+
# sorting provides predictable read/write order
|
268
|
+
keysInOrder.each { |k|
|
269
|
+
f = fields[k]
|
270
|
+
ctx.fld = f
|
271
|
+
rwRenderer = getRwRenderer(ctx)
|
272
|
+
# unless ctx.refType.kind_of?(DataMetaDom::Record)
|
273
|
+
reads << (
|
274
|
+
indent + (f.isRequired ? '' : 'fldIndex++;') + "val.#{DataMetaDom.setterName(ctx.fld)}(" +
|
275
|
+
(f.isRequired ? '' : 'nullFlags.get(fldIndex) ? null : ')+
|
276
|
+
"#{rwRenderer.r.call(ctx)});"
|
277
|
+
)
|
278
|
+
# rendering of noReqFld - using the Veryfiable interface instead
|
279
|
+
#=begin
|
280
|
+
writes << (indent + (f.isRequired ?
|
281
|
+
(PRIMITIVABLE_TYPES.member?(f.dataType.type) ? '' : ''):
|
282
|
+
#%Q<if(val.#{DataMetaDom::PojoLexer::getterName(ctx.fld)}() == null) throw noReqFld("#{f.name}"); >) :
|
283
|
+
"if(val.#{DataMetaDom.getterName(ctx.fld)}() != null) ") + "#{rwRenderer.w.call(ctx)};")
|
284
|
+
unless f.isRequired
|
285
|
+
writeNullMaskHead << (indent + "fldIndex++; if(val.#{DataMetaDom.getterName(ctx.fld)}() == null) nullFlags.set(fldIndex);")
|
286
|
+
end
|
287
|
+
#=end
|
288
|
+
# end
|
289
|
+
}
|
290
|
+
writeNullMaskHead << ( indent + 'writeBitSet(out, nullFlags);') if hasOptional
|
291
|
+
ioOut.puts <<IN_OUTABLE_CLASS
|
292
|
+
package #{javaPackage};
|
293
|
+
import org.ebay.datameta.dom.*;
|
294
|
+
import java.io.*;
|
295
|
+
import static org.ebay.datameta.ser.bytes.DataMetaHadoopUtil.*;
|
296
|
+
import static org.apache.hadoop.io.WritableUtils.*;
|
297
|
+
import org.ebay.datameta.ser.bytes.InOutable;
|
298
|
+
#{ctx.importsText}
|
299
|
+
#{DataMetaDom::PojoLexer.classJavaDoc({})}public class #{ioName} extends InOutable<#{baseName}> {
|
300
|
+
|
301
|
+
private static final #{ioName} INSTANCE = new #{ioName}();
|
302
|
+
public static #{ioName} getInstance() { return INSTANCE; }
|
303
|
+
private #{ioName}() {}
|
304
|
+
|
305
|
+
@Override public void write(final DataOutput out, final #{baseName} val) throws IOException {
|
306
|
+
val.verify();
|
307
|
+
#{writeNullMaskHead}
|
308
|
+
#{writes}
|
309
|
+
}
|
310
|
+
|
311
|
+
@Override public #{baseName} read(final DataInput in, final #{baseName} val) throws IOException {
|
312
|
+
#{readNullMaskHead}
|
313
|
+
#{reads}
|
314
|
+
return val;
|
315
|
+
}
|
316
|
+
@Override public #{baseName} read(final DataInput in) throws IOException {
|
317
|
+
return read(in, new #{baseName}());
|
318
|
+
}
|
319
|
+
}
|
320
|
+
IN_OUTABLE_CLASS
|
321
|
+
wriOut.puts <<WRITABLE_CLASS
|
322
|
+
package #{javaPackage};
|
323
|
+
import org.apache.hadoop.io.Writable;
|
324
|
+
import org.ebay.datameta.dom.*;
|
325
|
+
import java.io.*;
|
326
|
+
import static org.ebay.datameta.ser.bytes.DataMetaHadoopUtil.*;
|
327
|
+
import static org.apache.hadoop.io.WritableUtils.*;
|
328
|
+
import org.ebay.datameta.ser.bytes.HdfsReadWrite;
|
329
|
+
#{ctx.importsText}
|
330
|
+
#{DataMetaDom::PojoLexer.classJavaDoc({})}public class #{wriName} extends HdfsReadWrite<#{baseName}> {
|
331
|
+
|
332
|
+
public #{wriName}(final #{baseName} value) {
|
333
|
+
super(value);
|
334
|
+
}
|
335
|
+
|
336
|
+
public #{wriName}() {
|
337
|
+
super(new #{baseName}()); // the value must be on the instance at all times,
|
338
|
+
// for example, when used with hadoop fs -text, this class will be used with default constructor
|
339
|
+
}
|
340
|
+
|
341
|
+
@Override public void write(final DataOutput out) throws IOException {
|
342
|
+
#{ioName}.getInstance().write(out, getVal());
|
343
|
+
}
|
344
|
+
|
345
|
+
@Override public void readFields(final DataInput in) throws IOException {
|
346
|
+
#{ioName}.getInstance().read(in, getVal());
|
347
|
+
}
|
348
|
+
}
|
349
|
+
WRITABLE_CLASS
|
350
|
+
|
351
|
+
########assertValue();
|
352
|
+
end
|
353
|
+
|
354
|
+
=begin rdoc
|
355
|
+
Generates all the writables for the given model.
|
356
|
+
Parameters:
|
357
|
+
* +model+ - the model to generate Writables from.
|
358
|
+
* +outRoot+ - destination directory name.
|
359
|
+
=end
|
360
|
+
def genWritables(model, outRoot)
|
361
|
+
model.records.values.each { |e|
|
362
|
+
javaPackage, base, packagePath = DataMetaDom::PojoLexer::assertNamespace(e.name)
|
363
|
+
destDir = File.join(outRoot, packagePath)
|
364
|
+
FileUtils.mkdir_p destDir
|
365
|
+
wriOut = File.open(File.join(destDir, "#{writableClassName(base)}.java"), 'wb')
|
366
|
+
ioOut = File.open(File.join(destDir, "#{inOutableClassName(base)}.java"), 'wb')
|
367
|
+
begin
|
368
|
+
case
|
369
|
+
when e.kind_of?(DataMetaDom::Record)
|
370
|
+
genWritable model, wriOut, ioOut, e, javaPackage, base
|
371
|
+
else
|
372
|
+
raise "Unsupported Entity: #{e.inspect}"
|
373
|
+
end
|
374
|
+
ensure
|
375
|
+
begin
|
376
|
+
ioOut.close
|
377
|
+
ensure
|
378
|
+
wriOut.close
|
379
|
+
end
|
380
|
+
end
|
381
|
+
}
|
382
|
+
end
|
383
|
+
|
384
|
+
# Shortcut to help for the Hadoop Writables generator.
|
385
|
+
def helpDataMetaBytesSerGen(file, errorText=nil)
|
386
|
+
DataMetaDom::help(file, 'DataMeta Serialization to/from Bytes', '<DataMeta DOM source> <Target Directory>', errorText)
|
387
|
+
end
|
388
|
+
|
389
|
+
module_function :helpDataMetaBytesSerGen, :genWritables, :genWritable, :getRwRenderer,
|
390
|
+
:aggrBaseName, :aggrJavaFull
|
391
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
# keep this underscore naming in the test subdir, it's easier to append files names to test
|
2
|
+
require './test/test_helper.rb'
|
3
|
+
|
4
|
+
# Unit test cases for the DataMetaByteSer
|
5
|
+
# See for instance:
|
6
|
+
# - test_full
|
7
|
+
class TestNewGem < Test::Unit::TestCase
|
8
|
+
|
9
|
+
# an empty stub for now
|
10
|
+
def setup;
|
11
|
+
end
|
12
|
+
|
13
|
+
# stub
|
14
|
+
def test_true
|
15
|
+
assert_equal('a', "a")
|
16
|
+
end
|
17
|
+
end
|
data/test/test_helper.rb
ADDED
data/tmpl/readSwitch.erb
ADDED
@@ -0,0 +1,63 @@
|
|
1
|
+
<%#
|
2
|
+
Template for Java migration guides
|
3
|
+
%>
|
4
|
+
package <%=javaPackage%>;
|
5
|
+
/*
|
6
|
+
This class is generated by DataMeta DOM. Do not edit manually!
|
7
|
+
*/
|
8
|
+
import org.ebay.datameta.ser.bytes.InOutable;
|
9
|
+
import org.ebay.datameta.util.jdk.SemanticVersion;
|
10
|
+
|
11
|
+
import java.io.DataInput;
|
12
|
+
import java.io.IOException;
|
13
|
+
|
14
|
+
public class <%=javaClassName%> {
|
15
|
+
|
16
|
+
private static final <%=javaClassName%> INSTANCE = new <%=javaClassName%>();
|
17
|
+
|
18
|
+
public static <%=javaClassName%> getInstance() { return INSTANCE; }
|
19
|
+
|
20
|
+
<% model2.records.values.each { |trgE|
|
21
|
+
vars.versCases = vers.clone.select{|v| v < ver2}.sort{|x, y| y<=>x}
|
22
|
+
vars.javaPackage, vars.baseName, vars.packagePath = assertNamespace(trgE.name)
|
23
|
+
srcRecName = flipVer(trgE.name, ver2.toVarName, ver1.toVarName)
|
24
|
+
srcE = model1.records[srcRecName]
|
25
|
+
if srcE
|
26
|
+
%>
|
27
|
+
public <%= vars.baseName %> read_<%= vars.baseName %>_versioned(final DataInput in) throws IOException {
|
28
|
+
final SemanticVersion ver = InOutable.readVersion(in);
|
29
|
+
if(ver.equals(<%=vars.baseName%>.VERSION)) {
|
30
|
+
return <%=vars.baseName%>_InOutable.getInstance().read(in);
|
31
|
+
<%
|
32
|
+
while vars.versCases.length > 1 # loop through the case statement - a version per each
|
33
|
+
vars.switchTargVer = vars.versCases.shift
|
34
|
+
vars.brackets = ''
|
35
|
+
caseObjName = flipVer(trgE.name, ver2.toVarName, vars.switchTargVer.toVarName)
|
36
|
+
caseMod = modelForVer.call(vars.switchTargVer)
|
37
|
+
next unless caseMod.records.keys.member?(caseObjName.to_sym) # skip cases for the versions where this object's target version does not exist
|
38
|
+
%>
|
39
|
+
}
|
40
|
+
else if(ver.equals(<%=caseObjName%>.VERSION)){<% vars.versMigr = vers.clone.select{|v| v <= ver2}.sort{|x, y| y<=>x}%>
|
41
|
+
return <% while vars.versMigr.length > 1 # migration steps loop nested in the case statement loop
|
42
|
+
vars.brackets << ')'
|
43
|
+
vars.migrTargVer = vars.versMigr.shift # target version for migration loop
|
44
|
+
vars.srcVer = vars.versMigr[0]
|
45
|
+
vars.srcType = flipVer(trgE.name, ver2.toVarName, vars.srcVer.toVarName)
|
46
|
+
migrMod = modelForVer.call(vars.srcVer)
|
47
|
+
break unless migrMod.records.keys.member?(vars.srcType.to_sym) # enough if there is no record in the target version
|
48
|
+
vars.jpMigr = vars.javaPackage.gsub(".v#{ver2.toVarName}", ".v#{vars.migrTargVer.toVarName}")
|
49
|
+
%>
|
50
|
+
// substituting in <%=vars.javaPackage%>: ".v<%=ver2.toVarName%>" with ".v<%=vars.migrTargVer.toVarName%>"
|
51
|
+
<%=vars.jpMigr%>.<%=migrClass(vars.baseName, vars.srcVer, vars.migrTargVer)%>.getInstance().migrate(<% break if vars.srcVer <= vars.switchTargVer
|
52
|
+
end # migration steps loop %>
|
53
|
+
|
54
|
+
<%=vars.srcType%>_InOutable.getInstance().read(in)
|
55
|
+
<%= vars.brackets %>;
|
56
|
+
<% end %>
|
57
|
+
}
|
58
|
+
else throw new IllegalArgumentException("Unsupported version for the record <%=vars.baseName%>: " + ver);
|
59
|
+
}
|
60
|
+
<% end
|
61
|
+
} # records loop %>
|
62
|
+
}
|
63
|
+
|
metadata
ADDED
@@ -0,0 +1,80 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: dataMetaByteSer
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Michael Bergens
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2017-01-15 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: dataMetaDom
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.0'
|
20
|
+
- - ">="
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: 1.0.0
|
23
|
+
type: :runtime
|
24
|
+
prerelease: false
|
25
|
+
version_requirements: !ruby/object:Gem::Requirement
|
26
|
+
requirements:
|
27
|
+
- - "~>"
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '1.0'
|
30
|
+
- - ">="
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: 1.0.0
|
33
|
+
description: Generates serializers of DataMeta objects to/from byte arrays, which
|
34
|
+
can be used with Hadoop, BigTable and beyond.
|
35
|
+
email: michael.bergens@gmail.com
|
36
|
+
executables:
|
37
|
+
- dataMetaByteSerGen.rb
|
38
|
+
extensions: []
|
39
|
+
extra_rdoc_files: []
|
40
|
+
files:
|
41
|
+
- ".yardopts"
|
42
|
+
- History.md
|
43
|
+
- PostInstall.txt
|
44
|
+
- README.md
|
45
|
+
- Rakefile
|
46
|
+
- bin/dataMetaByteSerGen.rb
|
47
|
+
- lib/dataMetaByteSer.rb
|
48
|
+
- lib/dataMetaByteSer/python.rb
|
49
|
+
- lib/dataMetaByteSer/util.rb
|
50
|
+
- lib/dataMetaByteSer/ver_reads.rb
|
51
|
+
- test/test_dataMetaByteSer.rb
|
52
|
+
- test/test_helper.rb
|
53
|
+
- tmpl/readSwitch.erb
|
54
|
+
homepage: https://github.com/eBayDataMeta
|
55
|
+
licenses:
|
56
|
+
- Apache-2.0
|
57
|
+
metadata: {}
|
58
|
+
post_install_message:
|
59
|
+
rdoc_options: []
|
60
|
+
require_paths:
|
61
|
+
- lib
|
62
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
63
|
+
requirements:
|
64
|
+
- - ">="
|
65
|
+
- !ruby/object:Gem::Version
|
66
|
+
version: 2.1.1
|
67
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
68
|
+
requirements:
|
69
|
+
- - ">="
|
70
|
+
- !ruby/object:Gem::Version
|
71
|
+
version: '0'
|
72
|
+
requirements:
|
73
|
+
- Hadoop libraries
|
74
|
+
rubyforge_project:
|
75
|
+
rubygems_version: 2.5.1
|
76
|
+
signing_key:
|
77
|
+
specification_version: 4
|
78
|
+
summary: DataMeta Byte Array Serializers Gen
|
79
|
+
test_files:
|
80
|
+
- test/test_dataMetaByteSer.rb
|