dataMetaByteSer 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.yardopts +1 -0
- data/History.md +5 -0
- data/PostInstall.txt +1 -0
- data/README.md +35 -0
- data/Rakefile +13 -0
- data/bin/dataMetaByteSerGen.rb +16 -0
- data/lib/dataMetaByteSer/python.rb +387 -0
- data/lib/dataMetaByteSer/util.rb +138 -0
- data/lib/dataMetaByteSer/ver_reads.rb +49 -0
- data/lib/dataMetaByteSer.rb +391 -0
- data/test/test_dataMetaByteSer.rb +17 -0
- data/test/test_helper.rb +4 -0
- data/tmpl/readSwitch.erb +63 -0
- metadata +80 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 4d5201ed98a82b4da2ca29379d7318cb3607e920
|
4
|
+
data.tar.gz: edb71acaf79c4111d1a703c9a7044c9a78019fb8
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 4d9edb7888e536f006becb9ce5612ef7573798afb5f136e0b36745021caaebc8c1cfad455815e6c1269a25763f45590a0133200ffc3121ababe839791e7cae7d
|
7
|
+
data.tar.gz: a5358415079756f869e417d783f069ebdf60aafe46c8fb449dd9014af7ef269ff1f683723f9784f3759b83d79c848aa1ab0ff6303675c1b8fa55f647d2c99639
|
data/.yardopts
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--title "DataMeta Bytes (de)serialization" -r README.md --charset UTF-8 lib/**/* - README.md
|
data/History.md
ADDED
data/PostInstall.txt
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
No special steps
|
data/README.md
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
# `dataMetaByteSer` gem
|
2
|
+
|
3
|
+
Byte array (de)serialization generation from [DataMeta DOM](https://github.com/eBayDataMeta/DataMeta-gems) sources.
|
4
|
+
|
5
|
+
References to this gem's:
|
6
|
+
|
7
|
+
* [Source](https://github.com/eBayDataMeta/DataMeta-gems)
|
8
|
+
|
9
|
+
|
10
|
+
## DESCRIPTION:
|
11
|
+
|
12
|
+
See the [DataMeta Project](https://github.com/eBayDataMeta/DataMeta)
|
13
|
+
|
14
|
+
## FEATURES:
|
15
|
+
|
16
|
+
Generates (de)serializers to/from byte arrays with matching Hadoop writables, performance maximized by storage size
|
17
|
+
first and runtime performance second, both aspects are clocked to perform around best in the class.
|
18
|
+
|
19
|
+
## SYNOPSIS:
|
20
|
+
|
21
|
+
To generate Byte Array serializers in Java, including Hadoop Writables for the DataMeta model, run:
|
22
|
+
|
23
|
+
dataMetaByteSerGen.rb <DataMeta DOM source> <Target Directory>
|
24
|
+
|
25
|
+
## REQUIREMENTS:
|
26
|
+
|
27
|
+
* No special requirements
|
28
|
+
|
29
|
+
## INSTALL:
|
30
|
+
|
31
|
+
gem install dataMetaByteSer
|
32
|
+
|
33
|
+
## LICENSE:
|
34
|
+
|
35
|
+
[Apache v 2.0](https://github.com/eBayDataMeta/DataMeta/blob/master/LICENSE.md)
|
data/Rakefile
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
%w(yard rdoc/task rake/testtask fileutils ./lib/dataMetaByteSer).each{ |r| require r}
|
2
|
+
|
3
|
+
Rake::TestTask.new do |t|
|
4
|
+
t.libs << 'test'
|
5
|
+
end
|
6
|
+
|
7
|
+
desc 'Regen RDocs'
|
8
|
+
task :default => :docs
|
9
|
+
|
10
|
+
YARD::Rake::YardocTask.new('docs') {|r|
|
11
|
+
r.stats_options = ['--list-undoc']
|
12
|
+
}
|
13
|
+
|
@@ -0,0 +1,16 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
%w( dataMetaDom dataMetaByteSer ).each(&method(:require))
|
3
|
+
|
4
|
+
@source, @target = ARGV
|
5
|
+
DataMetaByteSer::helpDataMetaBytesSerGen __FILE__ unless @source && @target
|
6
|
+
DataMetaByteSer::helpDataMetaBytesSerGen(__FILE__, "DataMeta DOM source #{@source} is not a file") unless File.file?(@source)
|
7
|
+
DataMetaByteSer::helpDataMetaBytesSerGen(__FILE__, "Writables destination directory #{@target} is not a dir") unless File.directory?(@target)
|
8
|
+
|
9
|
+
@parser = DataMetaDom::Model.new
|
10
|
+
begin
|
11
|
+
@parser.parse(@source)
|
12
|
+
DataMetaByteSer::genWritables(@parser, @target)
|
13
|
+
rescue Exception => e
|
14
|
+
$stderr.puts "ERROR #{e.message}; #{@parser.diagn}"
|
15
|
+
$stderr.puts e.backtrace.inspect
|
16
|
+
end
|
@@ -0,0 +1,387 @@
|
|
1
|
+
$:.unshift(File.dirname(__FILE__)) unless $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
|
2
|
+
|
3
|
+
require 'fileutils'
|
4
|
+
require 'dataMetaDom'
|
5
|
+
require 'dataMetaDom/util'
|
6
|
+
require 'dataMetaDom/python'
|
7
|
+
require 'dataMetaByteSer/util'
|
8
|
+
|
9
|
+
module DataMetaByteSer
|
10
|
+
# (De)Serialization for Python
|
11
|
+
module Py
|
12
|
+
include DataMetaDom, DataMetaDom::PythonLexer, DataMetaByteSer
|
13
|
+
=begin rdoc
|
14
|
+
Builds a class name for a Writable.
|
15
|
+
=end
|
16
|
+
def writableClassName(baseName); "#{baseName}_Writable" end
|
17
|
+
=begin rdoc
|
18
|
+
Builds a class name for a InOutable.
|
19
|
+
=end
|
20
|
+
def inOutablePy(arg)
|
21
|
+
klassName = case
|
22
|
+
when arg.kind_of?(String)
|
23
|
+
arg
|
24
|
+
else
|
25
|
+
_, s = DataMetaDom.splitNameSpace(arg.fType.type)
|
26
|
+
s
|
27
|
+
end
|
28
|
+
"#{klassName}_InOutable"
|
29
|
+
end
|
30
|
+
|
31
|
+
def mapsNotSupported(fld)
|
32
|
+
raise ArgumentError, "Field #{fld.name}: maps are not currently supported on Hadoop layer"
|
33
|
+
end
|
34
|
+
|
35
|
+
def aggrNotSupported(fld, forWhat)
|
36
|
+
raise ArgumentError, "Field #{fld.name}: aggregate types are not supported for #{forWhat} on Hadoop layer"
|
37
|
+
end
|
38
|
+
|
39
|
+
=begin rdoc
|
40
|
+
HDFS Reader and Writer for textual Python types such as str.
|
41
|
+
=end
|
42
|
+
TEXT_RW_METHODS = DataMetaByteSer::RwHolder.new(
|
43
|
+
lambda{|ctx|
|
44
|
+
ctx.fld.aggr ? ctx.rw.call("DataMetaHadoopUtil.read#{aggrPyFull(ctx.fld.aggr)}String(di)") : ctx.rw.call('DataMetaHadoopUtil.readText(di)')
|
45
|
+
},
|
46
|
+
lambda{|ctx|
|
47
|
+
ctx.fld.aggr ? "DataMetaHadoopUtil.write#{aggrPyFull(ctx.fld.aggr)}String(do, val.#{ctx.valGetter})" : "DataMetaHadoopUtil.writeTextIfAny(do, val.#{ctx.valGetter})"
|
48
|
+
}
|
49
|
+
)
|
50
|
+
|
51
|
+
=begin rdoc
|
52
|
+
HDFS Reader and Writer for integral Python type.
|
53
|
+
=end
|
54
|
+
INTEGRAL_RW_METHODS = RwHolder.new(
|
55
|
+
lambda{ |ctx|
|
56
|
+
mapsNotSupported(ctx.fld) if ctx.fld.trgType # map
|
57
|
+
case
|
58
|
+
when ctx.fType.length <= 4; ctx.fld.aggr ? ctx.rw.call("DataMetaHadoopUtil.read#{aggrPyFull(ctx.fld.aggr)}Integer(di)") :
|
59
|
+
ctx.rw.call('WritableUtils.readVInt(di)')
|
60
|
+
|
61
|
+
when ctx.fType.length <= 8; ; ctx.fld.aggr ? ctx.rw.call("DataMetaHadoopUtil.read#{aggrPyFull(ctx.fld.aggr)}Long(di)") : ctx.rw.call('WritableUtils.readVLong(di)')
|
62
|
+
|
63
|
+
else; raise "Invalid integer field #{ctx.fld}"
|
64
|
+
end
|
65
|
+
},
|
66
|
+
lambda{ |ctx|
|
67
|
+
case
|
68
|
+
when ctx.fType.length <= 4; ctx.fld.aggr ? "DataMetaHadoopUtil.write#{aggrPyFull(ctx.fld.aggr)}Integer(do, val.#{ctx.valGetter})" :
|
69
|
+
"WritableUtils.writeVInt(do, val.#{ctx.valGetter})"
|
70
|
+
|
71
|
+
when ctx.fType.length <= 8; ctx.fld.aggr ? "DataMetaHadoopUtil.write#{aggrPyFull(ctx.fld.aggr)}Long(do, val.#{ctx.valGetter})" : "WritableUtils.writeVLong(do, val.#{ctx.valGetter})"
|
72
|
+
|
73
|
+
else; raise "Invalid integer field #{ctx.fld}"
|
74
|
+
end
|
75
|
+
})
|
76
|
+
|
77
|
+
=begin rdoc
|
78
|
+
HDFS Reader and Writer for Booleans.
|
79
|
+
=end
|
80
|
+
BOOLEAN_RW_METHODS = RwHolder.new(
|
81
|
+
lambda{|ctx|
|
82
|
+
mapsNotSupported(ctx.fld) if ctx.fld.trgType # map
|
83
|
+
ctx.fld.aggr ? ctx.rw.call("DataMetaHadoopUtil.read#{aggrPyFull(ctx.fld.aggr)}Boolean(di)") : ctx.rw.call('di.readBoolean()')
|
84
|
+
},
|
85
|
+
lambda{|ctx|
|
86
|
+
mapsNotSupported(ctx.fld) if ctx.fld.trgType # map
|
87
|
+
ctx.fld.aggr ? "DataMetaHadoopUtil.write#{aggrPyFull(ctx.fld.aggr)}Boolean(do, val.#{ctx.valGetter})" : "do.writeBoolean(val.#{ctx.valGetter})"
|
88
|
+
})
|
89
|
+
|
90
|
+
# Python has no primitivable types
|
91
|
+
PRIMITIVABLE_TYPES = Set.new
|
92
|
+
|
93
|
+
=begin rdoc
|
94
|
+
HDFS Reader and Writer for floating point types.
|
95
|
+
=end
|
96
|
+
FLOAT_RW_METHODS = RwHolder.new(
|
97
|
+
lambda{|ctx|
|
98
|
+
mapsNotSupported(ctx.fld) if ctx.fld.trgType # map
|
99
|
+
case
|
100
|
+
when ctx.fType.length <= 4; ctx.fld.aggr ? ctx.rw.call("DataMetaHadoopUtil.read#{aggrPyFull(ctx.fld.aggr)}Float(di)") : ctx.rw.call('di.readFloat()')
|
101
|
+
when ctx.fType.length <= 8; ctx.fld.aggr ? ctx.rw.call("DataMetaHadoopUtil.read#{aggrPyFull(ctx.fld.aggr)}Double(di)") : ctx.rw.call('di.readDouble()')
|
102
|
+
else; raise "Invalid float field #{ctx.fld}"
|
103
|
+
end
|
104
|
+
},
|
105
|
+
lambda{|ctx|
|
106
|
+
mapsNotSupported(ctx.fld) if ctx.fld.trgType # map
|
107
|
+
case
|
108
|
+
when ctx.fType.length <= 4; ctx.fld.aggr ? "DataMetaHadoopUtil.write#{aggrPyFull(ctx.fld.aggr)}Float(do, val.#{ctx.valGetter})" : "do.writeFloat(val.#{ctx.valGetter})"
|
109
|
+
when ctx.fType.length <= 8; ctx.fld.aggr ? "DataMetaHadoopUtil.write#{aggrPyFull(ctx.fld.aggr)}Double(do, val.#{ctx.valGetter})" : "do.writeDouble(val.#{ctx.valGetter})"
|
110
|
+
else; raise "Invalid float field #{ctx.fld}"
|
111
|
+
end
|
112
|
+
})
|
113
|
+
|
114
|
+
=begin rdoc
|
115
|
+
HDFS Reader and Writer for the temporal type, the DateTime
|
116
|
+
=end
|
117
|
+
DTTM_RW_METHODS = RwHolder.new(
|
118
|
+
lambda { |ctx|
|
119
|
+
ctx.fld.aggr ? ctx.rw.call("DataMetaHadoopUtil.read#{aggrPyFull(ctx.fld.aggr)}DateTime(di)") : ctx.rw.call('DataMetaHadoopUtil.readDttm(di)')
|
120
|
+
},
|
121
|
+
lambda { |ctx|
|
122
|
+
ctx.fld.aggr ? "DataMetaHadoopUtil.write#{aggrPyFull(ctx.fld.aggr)}DateTime(do, val.#{ctx.valGetter})" : "DataMetaHadoopUtil.writeDttm(do, val.#{ctx.valGetter})"
|
123
|
+
}
|
124
|
+
)
|
125
|
+
=begin rdoc
|
126
|
+
HDFS Reader and Writer the variable size Decimal data type.
|
127
|
+
=end
|
128
|
+
NUMERIC_RW_METHODS = RwHolder.new(lambda{|ctx| ctx.fld.aggr ? ctx.rw.call("DataMetaHadoopUtil.read#{aggrPyFull(ctx.fld.aggr)}BigDecimal(di)") : ctx.rw.call('DataMetaHadoopUtil.readBigDecimal(di)')},
|
129
|
+
lambda{|ctx| "DataMetaHadoopUtil.writeBigDecimal(do, val.#{ctx.valGetter})"})
|
130
|
+
|
131
|
+
# Full name of a Py aggregate for the given DataMeta DOM aggregate
|
132
|
+
def aggrPyFull(aggr)
|
133
|
+
case aggr
|
134
|
+
when DataMetaDom::Field::LIST
|
135
|
+
'List'
|
136
|
+
when DataMetaDom::Field::SET
|
137
|
+
'Set'
|
138
|
+
when DataMetaDom::Field::DEQUE
|
139
|
+
'Deque' # note this is different from Java
|
140
|
+
else
|
141
|
+
raise ArgumentError, "Aggregate type #{aggr} not supported for Python serialization"
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
=begin rdoc
|
146
|
+
HDFS Reader and Writer the Java Enums.
|
147
|
+
=end
|
148
|
+
ENUM_RW_METHODS = RwHolder.new(
|
149
|
+
lambda{|ctx|
|
150
|
+
aggrNotSupported(ctx.fld, 'Enums') if ctx.fld.aggr
|
151
|
+
_, s = DataMetaDom.splitNameSpace(ctx.fType.type)
|
152
|
+
"#{s}(WritableUtils.readVInt(di) + 1)" # Python starts their enums from 1 - we save it starting from 0
|
153
|
+
# as Java and Scala does
|
154
|
+
},
|
155
|
+
lambda { |ctx|
|
156
|
+
aggrNotSupported(ctx.fld, 'Enums') if ctx.fld.aggr
|
157
|
+
# Python starts their enums from 1 - we save it starting from 0 as Java and Scala
|
158
|
+
"WritableUtils.writeVInt(do, val.#{ctx.valGetter}.value - 1)"
|
159
|
+
}
|
160
|
+
)
|
161
|
+
=begin rdoc
|
162
|
+
HDFS Reader and Writer the URL.
|
163
|
+
=end
|
164
|
+
URL_RW_METHODS = RwHolder.new(
|
165
|
+
lambda { |ctx|
|
166
|
+
aggrNotSupported(ctx.fld, 'URLs') if ctx.fld.aggr
|
167
|
+
'DataMetaHadoopUtil.readText(di)'
|
168
|
+
},
|
169
|
+
lambda { |ctx|
|
170
|
+
aggrNotSupported(ctx.fld, 'URLs') if ctx.fld.aggr
|
171
|
+
"DataMetaHadoopUtil.writeTextIfAny(do, val.#{ctx.valGetter})"
|
172
|
+
}
|
173
|
+
)
|
174
|
+
# Pseudo-implementers that just raise an error
|
175
|
+
NOT_IMPLEMENTED_METHODS = RwHolder.new(
|
176
|
+
lambda { |ctx|
|
177
|
+
aggrNotSupported(ctx.fld, 'Serialization')
|
178
|
+
},
|
179
|
+
lambda { |ctx|
|
180
|
+
aggrNotSupported(ctx.fld, 'Serialization')
|
181
|
+
}
|
182
|
+
)
|
183
|
+
=begin rdoc
|
184
|
+
Read/write methods for the standard data types.
|
185
|
+
=end
|
186
|
+
STD_RW_METHODS = {
|
187
|
+
DataMetaDom::INT => INTEGRAL_RW_METHODS,
|
188
|
+
DataMetaDom::STRING => TEXT_RW_METHODS,
|
189
|
+
DataMetaDom::DATETIME => DTTM_RW_METHODS,
|
190
|
+
DataMetaDom::BOOL => BOOLEAN_RW_METHODS,
|
191
|
+
DataMetaDom::CHAR => TEXT_RW_METHODS,
|
192
|
+
DataMetaDom::FLOAT => FLOAT_RW_METHODS,
|
193
|
+
DataMetaDom::RAW => NOT_IMPLEMENTED_METHODS,
|
194
|
+
DataMetaDom::NUMERIC => NUMERIC_RW_METHODS,
|
195
|
+
DataMetaDom::URL => URL_RW_METHODS
|
196
|
+
}
|
197
|
+
# DataMeta DOM object renderer
|
198
|
+
RECORD_RW_METHODS = RwHolder.new(
|
199
|
+
lambda { |ctx|
|
200
|
+
if ctx.fld.aggr
|
201
|
+
if ctx.fld.trgType # map
|
202
|
+
mapsNotSupported(ctx.fld)
|
203
|
+
else # list, set or deque
|
204
|
+
"DataMetaHadoopUtil.read#{aggrPyFull(ctx.fld.aggr)}(di, #{
|
205
|
+
inOutablePy(ctx)}())"
|
206
|
+
end
|
207
|
+
else # scalar
|
208
|
+
"#{inOutablePy(ctx)}().read(di)"
|
209
|
+
end
|
210
|
+
},
|
211
|
+
lambda { |ctx|
|
212
|
+
if ctx.fld.aggr && !ctx.fld.trgType
|
213
|
+
if ctx.fld.trgType # map
|
214
|
+
mapsNotSupported(ctx.fld)
|
215
|
+
else # list, set or deque
|
216
|
+
"DataMetaHadoopUtil.writeCollection(val.#{ctx.valGetter}, do, #{inOutablePy(ctx)}())"
|
217
|
+
end
|
218
|
+
else # scalar
|
219
|
+
"#{inOutablePy(ctx)}().write(do, val.#{ctx.valGetter})"
|
220
|
+
end
|
221
|
+
}
|
222
|
+
)
|
223
|
+
=begin rdoc
|
224
|
+
Read/write methods for the DataMeta DOM Maps, accidentally all the same as for the standard data types.
|
225
|
+
=end
|
226
|
+
MAP_RW_METHODS = STD_RW_METHODS
|
227
|
+
|
228
|
+
# Build the Read/Write operation renderer for the given context:
|
229
|
+
def getRwRenderer(ctx)
|
230
|
+
dt = ctx.fld.dataType
|
231
|
+
ctx.refType = nil # reset to avoid misrendering primitives
|
232
|
+
rwRenderer = STD_RW_METHODS[dt.type]
|
233
|
+
return rwRenderer if rwRenderer
|
234
|
+
refKey = dt.type
|
235
|
+
ctx.refType = ctx.model.enums[refKey] || ctx.model.records[refKey]
|
236
|
+
case
|
237
|
+
when ctx.refType.kind_of?(DataMetaDom::Record)
|
238
|
+
RECORD_RW_METHODS
|
239
|
+
when ctx.refType.kind_of?(DataMetaDom::Enum)
|
240
|
+
ENUM_RW_METHODS
|
241
|
+
when ctx.refType.kind_of?(DataMetaDom::BitSet)
|
242
|
+
NOT_IMPLEMENTED_METHODS
|
243
|
+
when ctx.refType.kind_of?(DataMetaDom::Mapping)
|
244
|
+
MAP_RW_METHODS[ctx.fType.type] || (raise ArgumentError, "No renderer found for the key type #{
|
245
|
+
ctx.fType.type}, record #{ctx.rec}, field #{ctx.fld}")
|
246
|
+
else
|
247
|
+
raise "No renderer defined for field #{ctx.fld}"
|
248
|
+
end
|
249
|
+
end
|
250
|
+
|
251
|
+
# Generates one InOutable, Writables here currently are not generated
|
252
|
+
def genWritable(model, wriOut, ioOut, record, pyPackage, baseName)
|
253
|
+
enumCount = model.enums.values.select{|e| e.kind_of?(DataMetaDom::Enum)}.size
|
254
|
+
recImports = model.records.values.map{|r| # import all records
|
255
|
+
p, b, pp = DataMetaDom::PojoLexer::assertNamespace(r.name)
|
256
|
+
%|from #{DataMetaXtra::Str.downCaseFirst(b)} import #{b}|
|
257
|
+
}.join("\n")
|
258
|
+
# ioImports = model.records.values.reject{|r| r.name == record.name}.map{|r| # import all InOutables except of this one
|
259
|
+
# p, b, pp = DataMetaDom::PojoLexer::assertNamespace(r.name)
|
260
|
+
# # since one InOutable may import another which may import another, and Python can't handle this,
|
261
|
+
# # catch the error. It's harmless because if it really failed to import, we'll know
|
262
|
+
# %|
|
263
|
+
# try:
|
264
|
+
# from #{inOutablePy(DataMetaXtra::Str.downCaseFirst(b))} import #{inOutablePy(b)}
|
265
|
+
# except ImportError:
|
266
|
+
# pass|
|
267
|
+
# }.join("\n")
|
268
|
+
ctx = RendCtx.new.init(model, record, pyPackage, baseName)
|
269
|
+
fields = record.fields
|
270
|
+
wriName = nil # writableClassName(baseName)
|
271
|
+
ioName = inOutablePy(baseName)
|
272
|
+
hasOptional = fields.values.map{|f|
|
273
|
+
# !model.records[f.dataType.type] &&
|
274
|
+
!f.isRequired
|
275
|
+
}.reduce(:|) # true if there is at least one optional field which isn't a record
|
276
|
+
keysInOrder = fields.each_key.map{|k| k.to_s}.sort.map{|k| k.to_sym}
|
277
|
+
reads = ''
|
278
|
+
writes = ''
|
279
|
+
writeNullMaskHead = hasOptional ? "nullFlags = bitarray(#{fields.keys.size}); nullFlags.setall(False); fldIndex = -1" : ''
|
280
|
+
readNullMaskHead = hasOptional ? 'nullFlags = DataMetaHadoopUtil.readBitArray(di); fldIndex = -1' : ''
|
281
|
+
indent = "\n#{' ' * 8}"
|
282
|
+
# sorting provides predictable read/write order
|
283
|
+
keysInOrder.each { |k|
|
284
|
+
f = fields[k]
|
285
|
+
ctx.fld = f
|
286
|
+
rwRenderer = getRwRenderer(ctx)
|
287
|
+
reads << ( indent + (f.isRequired ? '' : "fldIndex += 1#{indent}") + "val.#{DataMetaDom.setterName(ctx.fld)}(" +
|
288
|
+
(f.isRequired ? '' : ' None if nullFlags[fldIndex] else ')+ "#{rwRenderer.r.call(ctx)})"
|
289
|
+
)
|
290
|
+
# noinspection RubyNestedTernaryOperatorsInspection
|
291
|
+
writes << (indent + (f.isRequired ?
|
292
|
+
(PRIMITIVABLE_TYPES.member?(f.dataType.type) ? '' : ''):
|
293
|
+
#%Q<if(val.#{DataMetaDom::PojoLexer::getterName(ctx.fld)}() == null) throw noReqFld("#{f.name}"); >) :
|
294
|
+
"if(val.#{DataMetaDom.getterName(ctx.fld)}() is not None): ") + "#{rwRenderer.w.call(ctx)}")
|
295
|
+
unless f.isRequired
|
296
|
+
writeNullMaskHead << (indent + "fldIndex += 1#{indent}if(val.#{DataMetaDom.getterName(ctx.fld)}() is None): nullFlags[fldIndex] = True")
|
297
|
+
end
|
298
|
+
}
|
299
|
+
writeNullMaskHead << ( indent + 'DataMetaHadoopUtil.writeBitArray(do, nullFlags)') if hasOptional
|
300
|
+
|
301
|
+
ioOut.puts <<IN_OUTABLE_CLASS
|
302
|
+
|
303
|
+
class #{ioName}(InOutable):
|
304
|
+
|
305
|
+
def write(self, do, val):
|
306
|
+
val.verify()
|
307
|
+
#{writeNullMaskHead}
|
308
|
+
#{writes}
|
309
|
+
|
310
|
+
def readVal(self, di, val):
|
311
|
+
#{readNullMaskHead}
|
312
|
+
#{reads}
|
313
|
+
return val
|
314
|
+
|
315
|
+
def read(self, di):
|
316
|
+
return self.readVal(di, #{baseName}())
|
317
|
+
|
318
|
+
IN_OUTABLE_CLASS
|
319
|
+
end
|
320
|
+
|
321
|
+
=begin rdoc
|
322
|
+
Generates all the writables for the given model.
|
323
|
+
Parameters:
|
324
|
+
* +model+ - the model to generate Writables from.
|
325
|
+
* +outRoot+ - destination directory name.
|
326
|
+
=end
|
327
|
+
def genWritables(model, outRoot)
|
328
|
+
firstRecord = model.records.values.first
|
329
|
+
pyPackage, base, packagePath = DataMetaDom::PojoLexer::assertNamespace(firstRecord.name)
|
330
|
+
# Next: replace dots with underscores.The path also adjusted accordingly.
|
331
|
+
#
|
332
|
+
# Rationale for this, quoting PEP 8:
|
333
|
+
#
|
334
|
+
# Package and Module Names
|
335
|
+
#
|
336
|
+
# Modules should have short, all-lowercase names. Underscores can be used in the module name if it improves
|
337
|
+
# readability. Python packages should also have short, all-lowercase names, although the use of underscores
|
338
|
+
# is discouraged.
|
339
|
+
#
|
340
|
+
# Short and all-lowercase names, and improving readability if you have complex system and need long package names,
|
341
|
+
# is "discouraged". Can't do this here, our system is more complicated for strictly religous, "pythonic" Python.
|
342
|
+
# A tool must be enabling, and in this case, this irrational ruling gets in the way.
|
343
|
+
# And dots are a no-no, Python can't find packages with complicated package structures and imports.
|
344
|
+
#
|
345
|
+
# Hence, we opt for long package names with underscores for distinctiveness and readability:
|
346
|
+
pyPackage = pyPackage.gsub('.', '_')
|
347
|
+
packagePath = packagePath.gsub('/', '_')
|
348
|
+
destDir = File.join(outRoot, packagePath)
|
349
|
+
FileUtils.mkdir_p destDir
|
350
|
+
wriOut = nil # File.open(File.join(destDir, "#{writableClassName(base)}.py"), 'wb')
|
351
|
+
serFile = File.join(destDir, 'serial.py')
|
352
|
+
FileUtils.rm serFile if File.file?(serFile)
|
353
|
+
ioOut = File.open(serFile, 'wb') # one huge serialization file
|
354
|
+
ioOut.puts %|# This file is generated by DataMeta DOM. Do not edit manually!
|
355
|
+
#package #{pyPackage}
|
356
|
+
|
357
|
+
from hadoop.io import WritableUtils, InputStream, OutputStream, Text
|
358
|
+
from ebay_datameta_core.base import DateTime
|
359
|
+
from decimal import *
|
360
|
+
from collections import *
|
361
|
+
from bitarray import bitarray
|
362
|
+
from ebay_datameta_hadoop.base import *
|
363
|
+
from model import *
|
364
|
+
|
365
|
+
|
|
366
|
+
begin
|
367
|
+
model.records.values.each { |e|
|
368
|
+
_, base, _ = DataMetaDom::PojoLexer::assertNamespace(e.name)
|
369
|
+
case
|
370
|
+
when e.kind_of?(DataMetaDom::Record)
|
371
|
+
genWritable model, wriOut, ioOut, e, pyPackage, base
|
372
|
+
else
|
373
|
+
raise "Unsupported Entity: #{e.inspect}"
|
374
|
+
end
|
375
|
+
}
|
376
|
+
ensure
|
377
|
+
begin
|
378
|
+
ioOut.close
|
379
|
+
ensure
|
380
|
+
#wriOut.close
|
381
|
+
end
|
382
|
+
end
|
383
|
+
end
|
384
|
+
module_function :genWritables, :genWritable, :inOutablePy, :writableClassName, :mapsNotSupported,
|
385
|
+
:aggrNotSupported, :getRwRenderer, :aggrPyFull
|
386
|
+
end
|
387
|
+
end
|
@@ -0,0 +1,138 @@
|
|
1
|
+
$:.unshift(File.dirname(__FILE__)) unless $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
|
2
|
+
|
3
|
+
require 'set'
|
4
|
+
require 'logger'
|
5
|
+
|
6
|
+
module DataMetaByteSer
|
7
|
+
|
8
|
+
=begin rdoc
|
9
|
+
A holder for a read renderer and a write renderer, those come in pairs that have to be consistent so the
|
10
|
+
data is read and written uniformly.
|
11
|
+
=end
|
12
|
+
class RwHolder
|
13
|
+
=begin rdoc
|
14
|
+
Read renderer.
|
15
|
+
=end
|
16
|
+
attr_reader :r
|
17
|
+
=begin rdoc
|
18
|
+
Write renderer.
|
19
|
+
=end
|
20
|
+
attr_reader :w
|
21
|
+
=begin rdoc
|
22
|
+
Creates a new HDFS Reade and Write renderers pair.
|
23
|
+
=end
|
24
|
+
def initialize(readRenderer, writeRenderer); @r = readRenderer; @w = writeRenderer end
|
25
|
+
end
|
26
|
+
|
27
|
+
=begin rdoc
|
28
|
+
Rendering context with rendering-related properties and settings.
|
29
|
+
=end
|
30
|
+
class RendCtx
|
31
|
+
|
32
|
+
=begin rdoc
|
33
|
+
DataMeta DOM Model on the context.
|
34
|
+
=end
|
35
|
+
attr_accessor :model
|
36
|
+
=begin rdoc
|
37
|
+
Record currently worked on.
|
38
|
+
=end
|
39
|
+
attr_accessor :rec
|
40
|
+
|
41
|
+
=begin rdoc
|
42
|
+
Set of imports if any, each as symbol.
|
43
|
+
=end
|
44
|
+
attr_accessor :imps
|
45
|
+
|
46
|
+
=begin rdoc
|
47
|
+
Java package.
|
48
|
+
=end
|
49
|
+
attr_accessor :pckg
|
50
|
+
=begin rdoc
|
51
|
+
Base name of the type, without a namespace.
|
52
|
+
=end
|
53
|
+
attr_accessor :baseName
|
54
|
+
=begin rdoc
|
55
|
+
The data type of the entity on the context.
|
56
|
+
=end
|
57
|
+
attr_accessor :refType
|
58
|
+
=begin rdoc
|
59
|
+
Field currently on the context.
|
60
|
+
=end
|
61
|
+
attr_reader :fld
|
62
|
+
|
63
|
+
=begin rdoc
|
64
|
+
Creates a new context.
|
65
|
+
=end
|
66
|
+
def initialize; @imps = Set.new end
|
67
|
+
|
68
|
+
=begin rdoc
|
69
|
+
Setter for the field on the context, the field currently worked on.
|
70
|
+
=end
|
71
|
+
def fld=(val); @fld = val end
|
72
|
+
|
73
|
+
=begin rdoc
|
74
|
+
Initialize the context with the model, the record, the package and the basename.
|
75
|
+
Returns self for call chaining.
|
76
|
+
=end
|
77
|
+
def init(model, rec, pckg, baseName); @model = model; @rec = rec; @pckg = pckg; @baseName = baseName; self end
|
78
|
+
|
79
|
+
=begin rdoc
|
80
|
+
Add an import to the context, returns self for call chaining.
|
81
|
+
=end
|
82
|
+
def <<(import)
|
83
|
+
@imps << import.to_sym if import
|
84
|
+
self
|
85
|
+
end
|
86
|
+
|
87
|
+
=begin rdoc
|
88
|
+
Formats imports into Java source, sorted.
|
89
|
+
=end
|
90
|
+
def importsText
|
91
|
+
@imps.to_a.map{|k| "import #{k};"}.sort.join("\n")
|
92
|
+
end
|
93
|
+
|
94
|
+
=begin rdoc
|
95
|
+
Determines if the refType is a DataMetaDom::Mapping.
|
96
|
+
=end
|
97
|
+
def isMapping
|
98
|
+
@refType.kind_of?(DataMetaDom::Mapping) && !@refType.kind_of?(DataMetaDom::BitSet)
|
99
|
+
end
|
100
|
+
|
101
|
+
# Effective field type
|
102
|
+
def fType
|
103
|
+
isMapping ? @refType.fromT : @fld.dataType
|
104
|
+
end
|
105
|
+
|
106
|
+
# Readwrap
|
107
|
+
def rw
|
108
|
+
isMapping ? lambda{|t| "new #{condenseType(@fld.dataType.type, self)}(#{t})"} : lambda{|t| t}
|
109
|
+
end
|
110
|
+
|
111
|
+
=begin rdoc
|
112
|
+
Getter name for the current field, if the type is Mapping, includes <tt>.getKey()</tt> too.
|
113
|
+
=end
|
114
|
+
def valGetter
|
115
|
+
"#{DataMetaDom.getterName(@fld)}()" + ( isMapping ? '.getKey()' : '')
|
116
|
+
end
|
117
|
+
end # RendCtx
|
118
|
+
|
119
|
+
=begin rdoc
|
120
|
+
Builds a class name for a Writable.
|
121
|
+
=end
|
122
|
+
def writableClassName(baseName); "#{baseName}_Writable" end
|
123
|
+
|
124
|
+
=begin rdoc
|
125
|
+
Builds a class name for a InOutable.
|
126
|
+
=end
|
127
|
+
def inOutableClassName(baseName); "#{baseName}_InOutable" end
|
128
|
+
|
129
|
+
def mapsNotSupported(fld)
|
130
|
+
raise ArgumentError, "Field #{fld.name}: maps are not currently supported on Hadoop layer"
|
131
|
+
end
|
132
|
+
|
133
|
+
def aggrNotSupported(fld, forWhat)
|
134
|
+
raise ArgumentError, "Field #{fld.name}: aggregate types are not supported for #{forWhat} on Hadoop layer"
|
135
|
+
end
|
136
|
+
|
137
|
+
module_function :writableClassName, :inOutableClassName, :mapsNotSupported, :aggrNotSupported
|
138
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
$:.unshift(File.dirname(__FILE__)) unless $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
|
2
|
+
|
3
|
+
require 'dataMetaDom/field'
|
4
|
+
require 'dataMetaDom/pojo'
|
5
|
+
|
6
|
+
module DataMetaByteSer
|
7
|
+
=begin rdoc
|
8
|
+
Migration tooling.
|
9
|
+
|
10
|
+
=end
|
11
|
+
module VerReads
|
12
|
+
include DataMetaDom, DataMetaDom::PojoLexer
|
13
|
+
=begin
|
14
|
+
Generates Versioned Read switch that channels the read to the proper migration scenario.
|
15
|
+
=end
|
16
|
+
def genVerReadSwitch(v1, v2, modelForVer, vers, outRoot)
|
17
|
+
# v1 = mo1.records.values.first.ver.full
|
18
|
+
# v2 = mo2.records.values.first.ver.full
|
19
|
+
mo1 = modelForVer.call(v1)
|
20
|
+
mo2 = modelForVer.call(v2)
|
21
|
+
destDir = outRoot
|
22
|
+
javaPackage = '' # set the scope for the var
|
23
|
+
vars = OpenStruct.new # for template's local variables. ERB does not make them visible to the binding
|
24
|
+
# sort the models by versions out, 2nd to be the latest:
|
25
|
+
raise ArgumentError, "Versions on the model are the same: #{v1}" if v1 == v2
|
26
|
+
if v1 > v2
|
27
|
+
model2 = mo1
|
28
|
+
model1 = mo2
|
29
|
+
ver1 = v2
|
30
|
+
ver2 = v1
|
31
|
+
else
|
32
|
+
model2 = mo2
|
33
|
+
model1 = mo1
|
34
|
+
ver1 = v1
|
35
|
+
ver2 = v2
|
36
|
+
end
|
37
|
+
puts "Going from ver #{ver1} to #{ver2}"
|
38
|
+
trgE = model2.records.values.first
|
39
|
+
javaPackage, baseName, packagePath = assertNamespace(trgE.name)
|
40
|
+
javaClassName = "Read__Switch_v#{ver1.toVarName}_to_v#{ver2.toVarName}"
|
41
|
+
destDir = File.join(outRoot, packagePath)
|
42
|
+
FileUtils.mkdir_p destDir
|
43
|
+
IO::write(File.join(destDir, "#{javaClassName}.java"),
|
44
|
+
ERB.new(IO.read(File.join(File.dirname(__FILE__), '../../tmpl/readSwitch.erb')),
|
45
|
+
$SAFE, '%<>').result(binding), mode: 'wb')
|
46
|
+
end
|
47
|
+
module_function :genVerReadSwitch
|
48
|
+
end
|
49
|
+
end
|
@@ -0,0 +1,391 @@
|
|
1
|
+
$:.unshift(File.dirname(__FILE__)) unless $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
|
2
|
+
|
3
|
+
# Definition for generating Plain Old Java Objects (POJOs)
|
4
|
+
%w(fileutils dataMetaDom dataMetaDom/pojo dataMetaDom/enum dataMetaDom/record dataMetaDom/help).each(&method(:require))
|
5
|
+
require 'set'
|
6
|
+
require 'dataMetaByteSer/util'
|
7
|
+
|
8
|
+
=begin rdoc
|
9
|
+
Serialization artifacts generation such as Hadoop Writables etc.
|
10
|
+
|
11
|
+
TODO this isn't a bad way, but beter use templating next time such as {ERB}[http://ruby-doc.org/stdlib-1.9.3/libdoc/erb/rdoc/ERB.html].
|
12
|
+
|
13
|
+
For command line details either check the new method's source or the README.rdoc file, the usage section.
|
14
|
+
=end
|
15
|
+
module DataMetaByteSer
|
16
|
+
# Current version
|
17
|
+
VERSION = '1.0.0'
|
18
|
+
include DataMetaDom, DataMetaDom::PojoLexer
|
19
|
+
|
20
|
+
=begin rdoc
|
21
|
+
HDFS Reader and Writer for textual Java types such as String.
|
22
|
+
=end
|
23
|
+
TEXT_RW_METHODS = RwHolder.new(
|
24
|
+
lambda{|ctx|
|
25
|
+
ctx.fld.aggr ? ctx.rw.call("read#{aggrBaseName(aggrJavaFull(ctx.fld.aggr))}String(in)") : ctx.rw.call('readText(in)')
|
26
|
+
},
|
27
|
+
lambda{|ctx|
|
28
|
+
ctx.fld.aggr ? "write#{aggrBaseName(aggrJavaFull(ctx.fld.aggr))}String(out, val.#{ctx.valGetter})" : "writeTextIfAny(out, val.#{ctx.valGetter})"
|
29
|
+
}
|
30
|
+
)
|
31
|
+
|
32
|
+
=begin rdoc
|
33
|
+
HDFS Reader and Writer for integral Java types such as Integer or Long.
|
34
|
+
=end
|
35
|
+
INTEGRAL_RW_METHODS = RwHolder.new(
|
36
|
+
lambda{ |ctx|
|
37
|
+
mapsNotSupported(ctx.fld) if ctx.fld.trgType # map
|
38
|
+
case
|
39
|
+
when ctx.fType.length <= 4; ctx.fld.aggr ? ctx.rw.call("read#{aggrBaseName(aggrJavaFull(ctx.fld.aggr))}Integer(in)") :
|
40
|
+
ctx.rw.call('readVInt(in)')
|
41
|
+
|
42
|
+
when ctx.fType.length <= 8; ; ctx.fld.aggr ? ctx.rw.call("read#{aggrBaseName(aggrJavaFull(ctx.fld.aggr))}Long(in)") : ctx.rw.call('readVLong(in)')
|
43
|
+
|
44
|
+
else; raise "Invalid integer field #{ctx.fld}"
|
45
|
+
end
|
46
|
+
},
|
47
|
+
lambda{ |ctx|
|
48
|
+
case
|
49
|
+
when ctx.fType.length <= 4; ctx.fld.aggr ? "write#{aggrBaseName(aggrJavaFull(ctx.fld.aggr))}Integer(out, val.#{ctx.valGetter})" :
|
50
|
+
"writeVInt(out, val.#{ctx.valGetter})"
|
51
|
+
|
52
|
+
when ctx.fType.length <= 8; ctx.fld.aggr ? "write#{aggrBaseName(aggrJavaFull(ctx.fld.aggr))}Long(out, val.#{ctx.valGetter})" : "writeVLong(out, val.#{ctx.valGetter})"
|
53
|
+
|
54
|
+
else; raise "Invalid integer field #{ctx.fld}"
|
55
|
+
end
|
56
|
+
})
|
57
|
+
|
58
|
+
=begin rdoc
|
59
|
+
HDFS Reader and Writer for floating point Java types such as Float or Double.
|
60
|
+
=end
|
61
|
+
FLOAT_RW_METHODS = RwHolder.new(
|
62
|
+
lambda{|ctx|
|
63
|
+
mapsNotSupported(ctx.fld) if ctx.fld.trgType # map
|
64
|
+
case
|
65
|
+
when ctx.fType.length <= 4; ctx.fld.aggr ? ctx.rw.call("read#{aggrBaseName(aggrJavaFull(ctx.fld.aggr))}Float(in)") : ctx.rw.call('in.readFloat()')
|
66
|
+
when ctx.fType.length <= 8; ctx.fld.aggr ? ctx.rw.call("read#{aggrBaseName(aggrJavaFull(ctx.fld.aggr))}Double(in)") : ctx.rw.call('in.readDouble()')
|
67
|
+
else; raise "Invalid float field #{ctx.fld}"
|
68
|
+
end
|
69
|
+
},
|
70
|
+
lambda{|ctx|
|
71
|
+
case
|
72
|
+
when ctx.fType.length <= 4; ctx.fld.aggr ? "write#{aggrBaseName(aggrJavaFull(ctx.fld.aggr))}Float(out, val.#{ctx.valGetter})" : "out.writeFloat(val.#{ctx.valGetter})"
|
73
|
+
when ctx.fType.length <= 8; ctx.fld.aggr ? "write#{aggrBaseName(aggrJavaFull(ctx.fld.aggr))}Double(out, val.#{ctx.valGetter})" : "out.writeDouble(val.#{ctx.valGetter})"
|
74
|
+
else; raise "Invalid float field #{ctx.fld}"
|
75
|
+
end
|
76
|
+
})
|
77
|
+
|
78
|
+
=begin rdoc
|
79
|
+
HDFS Reader and Writer for the temporal type, the DateTime
|
80
|
+
=end
|
81
|
+
DTTM_RW_METHODS = RwHolder.new(
|
82
|
+
lambda { |ctx|
|
83
|
+
ctx.fld.aggr ? ctx.rw.call("read#{aggrBaseName(aggrJavaFull(ctx.fld.aggr))}ZonedDateTime(in)") : ctx.rw.call('readDttm(in)')
|
84
|
+
},
|
85
|
+
lambda { |ctx|
|
86
|
+
ctx.fld.aggr ? "write#{aggrBaseName(aggrJavaFull(ctx.fld.aggr))}ZonedDateTime(out, val.#{ctx.valGetter})" : "writeDttm(out, val.#{ctx.valGetter})"
|
87
|
+
}
|
88
|
+
)
|
89
|
+
|
90
|
+
=begin rdoc
|
91
|
+
HDFS Reader and Writer for boolean Java type.
|
92
|
+
=end
|
93
|
+
BOOL_RW_METHODS = RwHolder.new(
|
94
|
+
lambda { |ctx|
|
95
|
+
aggrNotSupported(ctx.fld, 'Booleans') if ctx.fld.aggr
|
96
|
+
ctx.rw.call('in.readBoolean()')
|
97
|
+
},
|
98
|
+
lambda { |ctx|
|
99
|
+
aggrNotSupported(ctx.fld, 'Booleans') if ctx.fld.aggr
|
100
|
+
"out.writeBoolean(val.#{ctx.valGetter})"
|
101
|
+
}
|
102
|
+
)
|
103
|
+
|
104
|
+
=begin rdoc
|
105
|
+
HDFS Reader and Writer the raw data type, the byte array.
|
106
|
+
=end
|
107
|
+
RAW_RW_METHODS = RwHolder.new(
|
108
|
+
lambda { |ctx|
|
109
|
+
aggrNotSupported(ctx.fld, 'Raw Data') if ctx.fld.aggr
|
110
|
+
ctx.rw.call('readByteArray(in)')
|
111
|
+
},
|
112
|
+
lambda { |ctx|
|
113
|
+
aggrNotSupported(ctx.fld, 'Raw Data') if ctx.fld.aggr
|
114
|
+
"writeByteArray(out, val.#{ctx.valGetter})" }
|
115
|
+
)
|
116
|
+
|
117
|
+
=begin rdoc
|
118
|
+
HDFS Reader and Writer the variable size Decimal data type.
|
119
|
+
=end
|
120
|
+
NUMERIC_RW_METHODS = RwHolder.new(lambda{|ctx| ctx.fld.aggr ? ctx.rw.call("read#{aggrBaseName(aggrJavaFull(ctx.fld.aggr))}BigDecimal(in)") : ctx.rw.call('readBigDecimal(in)')},
|
121
|
+
lambda{|ctx| "writeBigDecimal(out, val.#{ctx.valGetter})"})
|
122
|
+
|
123
|
+
=begin rdoc
|
124
|
+
HDFS Reader and Writer the Java Enums.
|
125
|
+
=end
|
126
|
+
ENUM_RW_METHODS = RwHolder.new(
|
127
|
+
lambda{|ctx|
|
128
|
+
aggrNotSupported(ctx.fld, 'Enums') if ctx.fld.aggr
|
129
|
+
"#{condenseType(ctx.fType.type, ctx.pckg)}.forOrd(readVInt(in))"
|
130
|
+
},
|
131
|
+
lambda { |ctx|
|
132
|
+
aggrNotSupported(ctx.fld, 'Enums') if ctx.fld.aggr
|
133
|
+
"writeVInt(out, val.#{ctx.valGetter}.ordinal())"
|
134
|
+
}
|
135
|
+
)
|
136
|
+
|
137
|
+
=begin rdoc
|
138
|
+
HDFS Reader and Writer the BitSet.
|
139
|
+
=end
|
140
|
+
BITSET_RW_METHODS = RwHolder.new(
|
141
|
+
lambda { |ctx|
|
142
|
+
aggrNotSupported(ctx.fld, 'BitSets') if ctx.fld.aggr
|
143
|
+
"new #{condenseType(ctx.fld.dataType, ctx.pckg)}(readLongArray(in))"
|
144
|
+
},
|
145
|
+
lambda { |ctx|
|
146
|
+
aggrNotSupported(ctx.fld, 'BitSets') if ctx.fld.aggr
|
147
|
+
"writeBitSet(out, val.#{ctx.valGetter})"
|
148
|
+
}
|
149
|
+
)
|
150
|
+
|
151
|
+
=begin rdoc
|
152
|
+
HDFS Reader and Writer the URL.
|
153
|
+
=end
|
154
|
+
URL_RW_METHODS = RwHolder.new(
|
155
|
+
lambda { |ctx|
|
156
|
+
aggrNotSupported(ctx.fld, 'URLs') if ctx.fld.aggr
|
157
|
+
'new java.net.URL(readText(in))'
|
158
|
+
},
|
159
|
+
lambda { |ctx|
|
160
|
+
aggrNotSupported(ctx.fld, 'URLs') if ctx.fld.aggr
|
161
|
+
"writeTextIfAny(out, val.#{ctx.valGetter}.toExternalForm())"
|
162
|
+
}
|
163
|
+
)
|
164
|
+
=begin rdoc
|
165
|
+
Read/write methods for the standard data types.
|
166
|
+
=end
|
167
|
+
STD_RW_METHODS = {
|
168
|
+
INT => INTEGRAL_RW_METHODS,
|
169
|
+
STRING => TEXT_RW_METHODS,
|
170
|
+
DATETIME => DTTM_RW_METHODS,
|
171
|
+
BOOL => BOOL_RW_METHODS,
|
172
|
+
CHAR => TEXT_RW_METHODS,
|
173
|
+
FLOAT => FLOAT_RW_METHODS,
|
174
|
+
RAW => RAW_RW_METHODS,
|
175
|
+
NUMERIC => NUMERIC_RW_METHODS,
|
176
|
+
URL => URL_RW_METHODS
|
177
|
+
}
|
178
|
+
# DataMeta DOM object renderer
|
179
|
+
RECORD_RW_METHODS = RwHolder.new(
|
180
|
+
lambda { |ctx|
|
181
|
+
if ctx.fld.aggr
|
182
|
+
if ctx.fld.trgType # map
|
183
|
+
mapsNotSupported(ctx.fld)
|
184
|
+
else # list, set or deque
|
185
|
+
"read#{aggrBaseName(aggrJavaFull(ctx.fld.aggr))}(in, #{
|
186
|
+
inOutableClassName(condenseType(ctx.fType.type, ctx.pckg))}.getInstance())"
|
187
|
+
end
|
188
|
+
else # scalar
|
189
|
+
"#{inOutableClassName(condenseType(ctx.fType.type, ctx.pckg))}.getInstance().read(in)"
|
190
|
+
end
|
191
|
+
},
|
192
|
+
lambda { |ctx|
|
193
|
+
if ctx.fld.aggr && !ctx.fld.trgType
|
194
|
+
if ctx.fld.trgType # map
|
195
|
+
mapsNotSupported(ctx.fld)
|
196
|
+
else # list, set or deque
|
197
|
+
"writeCollection(val.#{ctx.valGetter}, out, #{inOutableClassName(condenseType(ctx.fType.type, ctx.pckg))}.getInstance())"
|
198
|
+
end
|
199
|
+
else # scalar
|
200
|
+
"#{inOutableClassName(condenseType(ctx.fType.type, ctx.pckg))}.getInstance().write(out, val.#{ctx.valGetter})"
|
201
|
+
end
|
202
|
+
}
|
203
|
+
)
|
204
|
+
|
205
|
+
# Transforms the given DataMeta DOM aggregate type to full pathed Java class name
|
206
|
+
def aggrJavaFull(aggr)
|
207
|
+
PojoLexer::AGGR_CLASSES[aggr] || (raise ArgumentError, "No Aggregate classes for type #{aggr}" )
|
208
|
+
end
|
209
|
+
|
210
|
+
# Transforms the given full Java name for the aggregate class into base name to interpolate into methods
|
211
|
+
def aggrBaseName(aggr)
|
212
|
+
/^(\w+\.)+(\w+)$/.match(aggr)[2]
|
213
|
+
end
|
214
|
+
=begin rdoc
|
215
|
+
Read/write methods for the DataMeta DOM Maps, accidentally all the same as for the standard data types.
|
216
|
+
=end
|
217
|
+
MAP_RW_METHODS = STD_RW_METHODS
|
218
|
+
|
219
|
+
# Build the Read/Write operation renderer for the given context:
|
220
|
+
def getRwRenderer(ctx)
|
221
|
+
dt = ctx.fld.dataType
|
222
|
+
ctx.refType = nil # reset to avoid misrendering primitives
|
223
|
+
rwRenderer = STD_RW_METHODS[dt.type]
|
224
|
+
return rwRenderer if rwRenderer
|
225
|
+
refKey = dt.type
|
226
|
+
ctx.refType = ctx.model.enums[refKey] || ctx.model.records[refKey]
|
227
|
+
case
|
228
|
+
when ctx.refType.kind_of?(DataMetaDom::Record)
|
229
|
+
RECORD_RW_METHODS
|
230
|
+
when ctx.refType.kind_of?(DataMetaDom::Enum)
|
231
|
+
ENUM_RW_METHODS
|
232
|
+
when ctx.refType.kind_of?(DataMetaDom::BitSet)
|
233
|
+
BITSET_RW_METHODS
|
234
|
+
when ctx.refType.kind_of?(DataMetaDom::Mapping)
|
235
|
+
MAP_RW_METHODS[ctx.fType.type] || (raise ArgumentError, "No renderer found for the key type #{
|
236
|
+
ctx.fType.type}, record #{ctx.rec}, field #{ctx.fld}")
|
237
|
+
else
|
238
|
+
raise "No renderer defined for field #{ctx.fld}"
|
239
|
+
end
|
240
|
+
end
|
241
|
+
|
242
|
+
# Temporary/scratch var -- avoiding collisions at all costs
|
243
|
+
def tmpVar(name); "#{'_'*3}#{name}#{'_'*3}" end
|
244
|
+
|
245
|
+
# generates writable via delegation
|
246
|
+
def genWritable(model, wriOut, ioOut, record, javaPackage, baseName)
|
247
|
+
ctx = RendCtx.new.init(model, record, javaPackage, baseName)
|
248
|
+
fields = record.fields
|
249
|
+
wriName = writableClassName(baseName)
|
250
|
+
ioName = inOutableClassName(baseName)
|
251
|
+
# scan for imports needed
|
252
|
+
hasOptional = fields.values.map{|f|
|
253
|
+
# !model.records[f.dataType.type] &&
|
254
|
+
!f.isRequired
|
255
|
+
}.reduce(:|) # true if there is at least one optional field which isn't a record
|
256
|
+
#fields.values.each { |f|
|
257
|
+
# ctx << DataMetaDom::PojoLexer::JAVA_IMPORTS[f.dataType.type]
|
258
|
+
#}
|
259
|
+
|
260
|
+
# field keys (names) in the order of reading/writing to the in/out record
|
261
|
+
keysInOrder = fields.each_key.map{|k| k.to_s}.sort.map{|k| k.to_sym}
|
262
|
+
reads = ''
|
263
|
+
writes = ''
|
264
|
+
writeNullMaskHead = hasOptional ? 'final BitSet nullFlags = new BitSet(); int fldIndex = -1;' : ''
|
265
|
+
readNullMaskHead = hasOptional ? 'final BitSet nullFlags = new BitSet(readLongArray(in), false); int fldIndex = -1;' : ''
|
266
|
+
indent = "\n#{' ' * 8}"
|
267
|
+
# sorting provides predictable read/write order
|
268
|
+
keysInOrder.each { |k|
|
269
|
+
f = fields[k]
|
270
|
+
ctx.fld = f
|
271
|
+
rwRenderer = getRwRenderer(ctx)
|
272
|
+
# unless ctx.refType.kind_of?(DataMetaDom::Record)
|
273
|
+
reads << (
|
274
|
+
indent + (f.isRequired ? '' : 'fldIndex++;') + "val.#{DataMetaDom.setterName(ctx.fld)}(" +
|
275
|
+
(f.isRequired ? '' : 'nullFlags.get(fldIndex) ? null : ')+
|
276
|
+
"#{rwRenderer.r.call(ctx)});"
|
277
|
+
)
|
278
|
+
# rendering of noReqFld - using the Veryfiable interface instead
|
279
|
+
#=begin
|
280
|
+
writes << (indent + (f.isRequired ?
|
281
|
+
(PRIMITIVABLE_TYPES.member?(f.dataType.type) ? '' : ''):
|
282
|
+
#%Q<if(val.#{DataMetaDom::PojoLexer::getterName(ctx.fld)}() == null) throw noReqFld("#{f.name}"); >) :
|
283
|
+
"if(val.#{DataMetaDom.getterName(ctx.fld)}() != null) ") + "#{rwRenderer.w.call(ctx)};")
|
284
|
+
unless f.isRequired
|
285
|
+
writeNullMaskHead << (indent + "fldIndex++; if(val.#{DataMetaDom.getterName(ctx.fld)}() == null) nullFlags.set(fldIndex);")
|
286
|
+
end
|
287
|
+
#=end
|
288
|
+
# end
|
289
|
+
}
|
290
|
+
writeNullMaskHead << ( indent + 'writeBitSet(out, nullFlags);') if hasOptional
|
291
|
+
ioOut.puts <<IN_OUTABLE_CLASS
|
292
|
+
package #{javaPackage};
|
293
|
+
import org.ebay.datameta.dom.*;
|
294
|
+
import java.io.*;
|
295
|
+
import static org.ebay.datameta.ser.bytes.DataMetaHadoopUtil.*;
|
296
|
+
import static org.apache.hadoop.io.WritableUtils.*;
|
297
|
+
import org.ebay.datameta.ser.bytes.InOutable;
|
298
|
+
#{ctx.importsText}
|
299
|
+
#{DataMetaDom::PojoLexer.classJavaDoc({})}public class #{ioName} extends InOutable<#{baseName}> {
|
300
|
+
|
301
|
+
private static final #{ioName} INSTANCE = new #{ioName}();
|
302
|
+
public static #{ioName} getInstance() { return INSTANCE; }
|
303
|
+
private #{ioName}() {}
|
304
|
+
|
305
|
+
@Override public void write(final DataOutput out, final #{baseName} val) throws IOException {
|
306
|
+
val.verify();
|
307
|
+
#{writeNullMaskHead}
|
308
|
+
#{writes}
|
309
|
+
}
|
310
|
+
|
311
|
+
@Override public #{baseName} read(final DataInput in, final #{baseName} val) throws IOException {
|
312
|
+
#{readNullMaskHead}
|
313
|
+
#{reads}
|
314
|
+
return val;
|
315
|
+
}
|
316
|
+
@Override public #{baseName} read(final DataInput in) throws IOException {
|
317
|
+
return read(in, new #{baseName}());
|
318
|
+
}
|
319
|
+
}
|
320
|
+
IN_OUTABLE_CLASS
|
321
|
+
wriOut.puts <<WRITABLE_CLASS
|
322
|
+
package #{javaPackage};
|
323
|
+
import org.apache.hadoop.io.Writable;
|
324
|
+
import org.ebay.datameta.dom.*;
|
325
|
+
import java.io.*;
|
326
|
+
import static org.ebay.datameta.ser.bytes.DataMetaHadoopUtil.*;
|
327
|
+
import static org.apache.hadoop.io.WritableUtils.*;
|
328
|
+
import org.ebay.datameta.ser.bytes.HdfsReadWrite;
|
329
|
+
#{ctx.importsText}
|
330
|
+
#{DataMetaDom::PojoLexer.classJavaDoc({})}public class #{wriName} extends HdfsReadWrite<#{baseName}> {
|
331
|
+
|
332
|
+
public #{wriName}(final #{baseName} value) {
|
333
|
+
super(value);
|
334
|
+
}
|
335
|
+
|
336
|
+
public #{wriName}() {
|
337
|
+
super(new #{baseName}()); // the value must be on the instance at all times,
|
338
|
+
// for example, when used with hadoop fs -text, this class will be used with default constructor
|
339
|
+
}
|
340
|
+
|
341
|
+
@Override public void write(final DataOutput out) throws IOException {
|
342
|
+
#{ioName}.getInstance().write(out, getVal());
|
343
|
+
}
|
344
|
+
|
345
|
+
@Override public void readFields(final DataInput in) throws IOException {
|
346
|
+
#{ioName}.getInstance().read(in, getVal());
|
347
|
+
}
|
348
|
+
}
|
349
|
+
WRITABLE_CLASS
|
350
|
+
|
351
|
+
########assertValue();
|
352
|
+
end
|
353
|
+
|
354
|
+
=begin rdoc
|
355
|
+
Generates all the writables for the given model.
|
356
|
+
Parameters:
|
357
|
+
* +model+ - the model to generate Writables from.
|
358
|
+
* +outRoot+ - destination directory name.
|
359
|
+
=end
|
360
|
+
def genWritables(model, outRoot)
|
361
|
+
model.records.values.each { |e|
|
362
|
+
javaPackage, base, packagePath = DataMetaDom::PojoLexer::assertNamespace(e.name)
|
363
|
+
destDir = File.join(outRoot, packagePath)
|
364
|
+
FileUtils.mkdir_p destDir
|
365
|
+
wriOut = File.open(File.join(destDir, "#{writableClassName(base)}.java"), 'wb')
|
366
|
+
ioOut = File.open(File.join(destDir, "#{inOutableClassName(base)}.java"), 'wb')
|
367
|
+
begin
|
368
|
+
case
|
369
|
+
when e.kind_of?(DataMetaDom::Record)
|
370
|
+
genWritable model, wriOut, ioOut, e, javaPackage, base
|
371
|
+
else
|
372
|
+
raise "Unsupported Entity: #{e.inspect}"
|
373
|
+
end
|
374
|
+
ensure
|
375
|
+
begin
|
376
|
+
ioOut.close
|
377
|
+
ensure
|
378
|
+
wriOut.close
|
379
|
+
end
|
380
|
+
end
|
381
|
+
}
|
382
|
+
end
|
383
|
+
|
384
|
+
# Shortcut to help for the Hadoop Writables generator.
|
385
|
+
def helpDataMetaBytesSerGen(file, errorText=nil)
|
386
|
+
DataMetaDom::help(file, 'DataMeta Serialization to/from Bytes', '<DataMeta DOM source> <Target Directory>', errorText)
|
387
|
+
end
|
388
|
+
|
389
|
+
module_function :helpDataMetaBytesSerGen, :genWritables, :genWritable, :getRwRenderer,
|
390
|
+
:aggrBaseName, :aggrJavaFull
|
391
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
# keep this underscore naming in the test subdir, it's easier to append files names to test
|
2
|
+
require './test/test_helper.rb'
|
3
|
+
|
4
|
+
# Unit test cases for the DataMetaByteSer
|
5
|
+
# See for instance:
|
6
|
+
# - test_full
|
7
|
+
class TestNewGem < Test::Unit::TestCase
|
8
|
+
|
9
|
+
# an empty stub for now
|
10
|
+
def setup;
|
11
|
+
end
|
12
|
+
|
13
|
+
# stub
|
14
|
+
def test_true
|
15
|
+
assert_equal('a', "a")
|
16
|
+
end
|
17
|
+
end
|
data/test/test_helper.rb
ADDED
data/tmpl/readSwitch.erb
ADDED
@@ -0,0 +1,63 @@
|
|
1
|
+
<%#
|
2
|
+
Template for Java migration guides
|
3
|
+
%>
|
4
|
+
package <%=javaPackage%>;
|
5
|
+
/*
|
6
|
+
This class is generated by DataMeta DOM. Do not edit manually!
|
7
|
+
*/
|
8
|
+
import org.ebay.datameta.ser.bytes.InOutable;
|
9
|
+
import org.ebay.datameta.util.jdk.SemanticVersion;
|
10
|
+
|
11
|
+
import java.io.DataInput;
|
12
|
+
import java.io.IOException;
|
13
|
+
|
14
|
+
public class <%=javaClassName%> {
|
15
|
+
|
16
|
+
private static final <%=javaClassName%> INSTANCE = new <%=javaClassName%>();
|
17
|
+
|
18
|
+
public static <%=javaClassName%> getInstance() { return INSTANCE; }
|
19
|
+
|
20
|
+
<% model2.records.values.each { |trgE|
|
21
|
+
vars.versCases = vers.clone.select{|v| v < ver2}.sort{|x, y| y<=>x}
|
22
|
+
vars.javaPackage, vars.baseName, vars.packagePath = assertNamespace(trgE.name)
|
23
|
+
srcRecName = flipVer(trgE.name, ver2.toVarName, ver1.toVarName)
|
24
|
+
srcE = model1.records[srcRecName]
|
25
|
+
if srcE
|
26
|
+
%>
|
27
|
+
public <%= vars.baseName %> read_<%= vars.baseName %>_versioned(final DataInput in) throws IOException {
|
28
|
+
final SemanticVersion ver = InOutable.readVersion(in);
|
29
|
+
if(ver.equals(<%=vars.baseName%>.VERSION)) {
|
30
|
+
return <%=vars.baseName%>_InOutable.getInstance().read(in);
|
31
|
+
<%
|
32
|
+
while vars.versCases.length > 1 # loop through the case statement - a version per each
|
33
|
+
vars.switchTargVer = vars.versCases.shift
|
34
|
+
vars.brackets = ''
|
35
|
+
caseObjName = flipVer(trgE.name, ver2.toVarName, vars.switchTargVer.toVarName)
|
36
|
+
caseMod = modelForVer.call(vars.switchTargVer)
|
37
|
+
next unless caseMod.records.keys.member?(caseObjName.to_sym) # skip cases for the versions where this object's target version does not exist
|
38
|
+
%>
|
39
|
+
}
|
40
|
+
else if(ver.equals(<%=caseObjName%>.VERSION)){<% vars.versMigr = vers.clone.select{|v| v <= ver2}.sort{|x, y| y<=>x}%>
|
41
|
+
return <% while vars.versMigr.length > 1 # migration steps loop nested in the case statement loop
|
42
|
+
vars.brackets << ')'
|
43
|
+
vars.migrTargVer = vars.versMigr.shift # target version for migration loop
|
44
|
+
vars.srcVer = vars.versMigr[0]
|
45
|
+
vars.srcType = flipVer(trgE.name, ver2.toVarName, vars.srcVer.toVarName)
|
46
|
+
migrMod = modelForVer.call(vars.srcVer)
|
47
|
+
break unless migrMod.records.keys.member?(vars.srcType.to_sym) # enough if there is no record in the target version
|
48
|
+
vars.jpMigr = vars.javaPackage.gsub(".v#{ver2.toVarName}", ".v#{vars.migrTargVer.toVarName}")
|
49
|
+
%>
|
50
|
+
// substituting in <%=vars.javaPackage%>: ".v<%=ver2.toVarName%>" with ".v<%=vars.migrTargVer.toVarName%>"
|
51
|
+
<%=vars.jpMigr%>.<%=migrClass(vars.baseName, vars.srcVer, vars.migrTargVer)%>.getInstance().migrate(<% break if vars.srcVer <= vars.switchTargVer
|
52
|
+
end # migration steps loop %>
|
53
|
+
|
54
|
+
<%=vars.srcType%>_InOutable.getInstance().read(in)
|
55
|
+
<%= vars.brackets %>;
|
56
|
+
<% end %>
|
57
|
+
}
|
58
|
+
else throw new IllegalArgumentException("Unsupported version for the record <%=vars.baseName%>: " + ver);
|
59
|
+
}
|
60
|
+
<% end
|
61
|
+
} # records loop %>
|
62
|
+
}
|
63
|
+
|
metadata
ADDED
@@ -0,0 +1,80 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: dataMetaByteSer
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Michael Bergens
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2017-01-15 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: dataMetaDom
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.0'
|
20
|
+
- - ">="
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: 1.0.0
|
23
|
+
type: :runtime
|
24
|
+
prerelease: false
|
25
|
+
version_requirements: !ruby/object:Gem::Requirement
|
26
|
+
requirements:
|
27
|
+
- - "~>"
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '1.0'
|
30
|
+
- - ">="
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: 1.0.0
|
33
|
+
description: Generates serializers of DataMeta objects to/from byte arrays, which
|
34
|
+
can be used with Hadoop, BigTable and beyond.
|
35
|
+
email: michael.bergens@gmail.com
|
36
|
+
executables:
|
37
|
+
- dataMetaByteSerGen.rb
|
38
|
+
extensions: []
|
39
|
+
extra_rdoc_files: []
|
40
|
+
files:
|
41
|
+
- ".yardopts"
|
42
|
+
- History.md
|
43
|
+
- PostInstall.txt
|
44
|
+
- README.md
|
45
|
+
- Rakefile
|
46
|
+
- bin/dataMetaByteSerGen.rb
|
47
|
+
- lib/dataMetaByteSer.rb
|
48
|
+
- lib/dataMetaByteSer/python.rb
|
49
|
+
- lib/dataMetaByteSer/util.rb
|
50
|
+
- lib/dataMetaByteSer/ver_reads.rb
|
51
|
+
- test/test_dataMetaByteSer.rb
|
52
|
+
- test/test_helper.rb
|
53
|
+
- tmpl/readSwitch.erb
|
54
|
+
homepage: https://github.com/eBayDataMeta
|
55
|
+
licenses:
|
56
|
+
- Apache-2.0
|
57
|
+
metadata: {}
|
58
|
+
post_install_message:
|
59
|
+
rdoc_options: []
|
60
|
+
require_paths:
|
61
|
+
- lib
|
62
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
63
|
+
requirements:
|
64
|
+
- - ">="
|
65
|
+
- !ruby/object:Gem::Version
|
66
|
+
version: 2.1.1
|
67
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
68
|
+
requirements:
|
69
|
+
- - ">="
|
70
|
+
- !ruby/object:Gem::Version
|
71
|
+
version: '0'
|
72
|
+
requirements:
|
73
|
+
- Hadoop libraries
|
74
|
+
rubyforge_project:
|
75
|
+
rubygems_version: 2.5.1
|
76
|
+
signing_key:
|
77
|
+
specification_version: 4
|
78
|
+
summary: DataMeta Byte Array Serializers Gen
|
79
|
+
test_files:
|
80
|
+
- test/test_dataMetaByteSer.rb
|