tensor_stream-opencl 0.3.1 → 0.3.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/benchmark.json +704 -0
- data/benchmark/benchmark.rb +67 -46
- data/lib/tensor_stream/opencl/opencl_evaluator.rb +32 -11
- data/lib/tensor_stream/opencl/version.rb +1 -1
- data/tensor_stream-opencl.gemspec +1 -1
- metadata +7 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d2a9a1d1add56659374f7246f35cd956eba86c9416a1e012abc44fe9bdb857ab
|
4
|
+
data.tar.gz: 6acf877d4b85f93facc750d9541221b0e2a8775d608e7160654c2421411d7a3b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7c9d3d3391d2bc228f5daed33a83fe750ca61bde20b71ce07fe2a73df32661fb910d1bcac5cbcb6a025e04808ac90174a0040dd13f23bc40b134a3196cef50ee
|
7
|
+
data.tar.gz: 6aabe002d353b02647810c8aea9e521ad1bb4029ccfd2c7cfd78f8609f6826c4260ef5f3ff02170d3a2f55f588f68aad8a1613efac8b586f0751c72e492287f0
|
data/benchmark.json
ADDED
@@ -0,0 +1,704 @@
|
|
1
|
+
{
|
2
|
+
"ruby-2.5.5/windows/AMD Ryzen 3 1300X Quad-Core Processor/NVIDIA CUDA GeForce GTX 1060 6GB": {
|
3
|
+
"ruby": {
|
4
|
+
"argmin": {
|
5
|
+
"real": 0.5717836269999452,
|
6
|
+
"stime": 0.0,
|
7
|
+
"total": 0.5780000000000003,
|
8
|
+
"utime": 0.5780000000000003
|
9
|
+
},
|
10
|
+
"bias_add_grad": {
|
11
|
+
"real": 2.280308563999938,
|
12
|
+
"stime": 0.016000000000000014,
|
13
|
+
"total": 2.234,
|
14
|
+
"utime": 2.218
|
15
|
+
},
|
16
|
+
"bias_add": {
|
17
|
+
"real": 2.081161492000092,
|
18
|
+
"stime": 0.0,
|
19
|
+
"total": 2.0470000000000006,
|
20
|
+
"utime": 2.0470000000000006
|
21
|
+
},
|
22
|
+
"conv2d_backprop": {
|
23
|
+
"real": 3.9330320810000785,
|
24
|
+
"stime": 0.015000000000000124,
|
25
|
+
"total": 3.9210000000000007,
|
26
|
+
"utime": 3.9060000000000006
|
27
|
+
},
|
28
|
+
"conv2d": {
|
29
|
+
"real": 0.8119420420000552,
|
30
|
+
"stime": 0.0,
|
31
|
+
"total": 0.8119999999999994,
|
32
|
+
"utime": 0.8119999999999994
|
33
|
+
},
|
34
|
+
"index": {
|
35
|
+
"real": 0.005713835000051404,
|
36
|
+
"stime": 0.0,
|
37
|
+
"total": 0.0,
|
38
|
+
"utime": 0.0
|
39
|
+
},
|
40
|
+
"min": {
|
41
|
+
"real": 3.6236803629999486,
|
42
|
+
"stime": 0.014999999999999902,
|
43
|
+
"total": 3.5300000000000002,
|
44
|
+
"utime": 3.5150000000000006
|
45
|
+
},
|
46
|
+
"sum": {
|
47
|
+
"real": 3.5159757579999678,
|
48
|
+
"stime": 0.0,
|
49
|
+
"total": 3.1709999999999994,
|
50
|
+
"utime": 3.1709999999999994
|
51
|
+
},
|
52
|
+
"sum axis 1": {
|
53
|
+
"real": 3.204440863000059,
|
54
|
+
"stime": 0.0,
|
55
|
+
"total": 3.0790000000000006,
|
56
|
+
"utime": 3.0790000000000006
|
57
|
+
},
|
58
|
+
"split": {
|
59
|
+
"real": 0.020213428000033673,
|
60
|
+
"stime": 0.0,
|
61
|
+
"total": 0.030999999999998806,
|
62
|
+
"utime": 0.030999999999998806
|
63
|
+
},
|
64
|
+
"add_n": {
|
65
|
+
"real": 0.15371632300002602,
|
66
|
+
"stime": 0.0,
|
67
|
+
"total": 0.1559999999999988,
|
68
|
+
"utime": 0.1559999999999988
|
69
|
+
},
|
70
|
+
"out of order matmul": {
|
71
|
+
"real": 1.598296256000026,
|
72
|
+
"stime": 0.0,
|
73
|
+
"total": 1.5629999999999988,
|
74
|
+
"utime": 1.5629999999999988
|
75
|
+
},
|
76
|
+
"softmax": {
|
77
|
+
"real": 0.03161882200004129,
|
78
|
+
"stime": 0.0,
|
79
|
+
"total": 0.01600000000000179,
|
80
|
+
"utime": 0.01600000000000179
|
81
|
+
},
|
82
|
+
"matmul": {
|
83
|
+
"real": 0.8320086150000634,
|
84
|
+
"stime": 0.0,
|
85
|
+
"total": 0.75,
|
86
|
+
"utime": 0.75
|
87
|
+
},
|
88
|
+
"test model": {
|
89
|
+
"real": 2.7918018939999456,
|
90
|
+
"stime": 0.0,
|
91
|
+
"total": 2.7029999999999994,
|
92
|
+
"utime": 2.7029999999999994
|
93
|
+
},
|
94
|
+
"single function test": {
|
95
|
+
"real": 0.41654277299994646,
|
96
|
+
"stime": 0.0,
|
97
|
+
"total": 0.375,
|
98
|
+
"utime": 0.375
|
99
|
+
},
|
100
|
+
"pow (float)": {
|
101
|
+
"real": 0.10249757899998713,
|
102
|
+
"stime": 0.0,
|
103
|
+
"total": 0.06299999999999883,
|
104
|
+
"utime": 0.06299999999999883
|
105
|
+
},
|
106
|
+
"pow (int)": {
|
107
|
+
"real": 0.030574132999959147,
|
108
|
+
"stime": 0.0,
|
109
|
+
"total": 0.03200000000000003,
|
110
|
+
"utime": 0.03200000000000003
|
111
|
+
},
|
112
|
+
"dropout": {
|
113
|
+
"real": 13.77498282299996,
|
114
|
+
"stime": 0.062000000000000055,
|
115
|
+
"total": 12.719,
|
116
|
+
"utime": 12.657
|
117
|
+
}
|
118
|
+
},
|
119
|
+
"opencl": {
|
120
|
+
"argmin": {
|
121
|
+
"real": 0.015615803000059714,
|
122
|
+
"stime": 0.0,
|
123
|
+
"total": 0.016000000000000014,
|
124
|
+
"utime": 0.016000000000000014
|
125
|
+
},
|
126
|
+
"bias_add_grad": {
|
127
|
+
"real": 0.013771769999948447,
|
128
|
+
"stime": 0.014999999999999902,
|
129
|
+
"total": 0.014999999999999902,
|
130
|
+
"utime": 0.0
|
131
|
+
},
|
132
|
+
"bias_add": {
|
133
|
+
"real": 0.1256369549998908,
|
134
|
+
"stime": 0.016000000000000014,
|
135
|
+
"total": 0.12599999999999945,
|
136
|
+
"utime": 0.10999999999999943
|
137
|
+
},
|
138
|
+
"conv2d_backprop": {
|
139
|
+
"real": 0.04058953899993867,
|
140
|
+
"stime": 0.03200000000000003,
|
141
|
+
"total": 0.03200000000000003,
|
142
|
+
"utime": 0.0
|
143
|
+
},
|
144
|
+
"conv2d": {
|
145
|
+
"real": 0.02783402499994736,
|
146
|
+
"stime": 0.0,
|
147
|
+
"total": 0.03200000000000003,
|
148
|
+
"utime": 0.03200000000000003
|
149
|
+
},
|
150
|
+
"index": {
|
151
|
+
"real": 0.015506175999917104,
|
152
|
+
"stime": 0.0,
|
153
|
+
"total": 0.0,
|
154
|
+
"utime": 0.0
|
155
|
+
},
|
156
|
+
"min": {
|
157
|
+
"real": 0.19373339399999168,
|
158
|
+
"stime": 0.03200000000000003,
|
159
|
+
"total": 0.14199999999999946,
|
160
|
+
"utime": 0.10999999999999943
|
161
|
+
},
|
162
|
+
"sum": {
|
163
|
+
"real": 0.01319783600001756,
|
164
|
+
"stime": 0.0,
|
165
|
+
"total": 0.0,
|
166
|
+
"utime": 0.0
|
167
|
+
},
|
168
|
+
"sum axis 1": {
|
169
|
+
"real": 0.01374040599989712,
|
170
|
+
"stime": 0.0,
|
171
|
+
"total": 0.0,
|
172
|
+
"utime": 0.0
|
173
|
+
},
|
174
|
+
"split": {
|
175
|
+
"real": 0.05617720299994744,
|
176
|
+
"stime": 0.014999999999999902,
|
177
|
+
"total": 0.04600000000000226,
|
178
|
+
"utime": 0.03100000000000236
|
179
|
+
},
|
180
|
+
"add_n": {
|
181
|
+
"real": 0.015245883000034155,
|
182
|
+
"stime": 0.016000000000000014,
|
183
|
+
"total": 0.016000000000000014,
|
184
|
+
"utime": 0.0
|
185
|
+
},
|
186
|
+
"out of order matmul": {
|
187
|
+
"real": 0.018022044999952413,
|
188
|
+
"stime": 0.015000000000000124,
|
189
|
+
"total": 0.030000000000000693,
|
190
|
+
"utime": 0.015000000000000568
|
191
|
+
},
|
192
|
+
"softmax": {
|
193
|
+
"real": 0.017924141999969834,
|
194
|
+
"stime": 0.016000000000000014,
|
195
|
+
"total": 0.016000000000000014,
|
196
|
+
"utime": 0.0
|
197
|
+
},
|
198
|
+
"matmul": {
|
199
|
+
"real": 0.014557924000087041,
|
200
|
+
"stime": 0.016000000000000014,
|
201
|
+
"total": 0.016000000000000014,
|
202
|
+
"utime": 0.0
|
203
|
+
},
|
204
|
+
"test model": {
|
205
|
+
"real": 0.2164292770000884,
|
206
|
+
"stime": 0.014999999999999902,
|
207
|
+
"total": 0.2179999999999993,
|
208
|
+
"utime": 0.2029999999999994
|
209
|
+
},
|
210
|
+
"single function test": {
|
211
|
+
"real": 0.13157883999997466,
|
212
|
+
"stime": 0.016000000000000014,
|
213
|
+
"total": 0.14100000000000001,
|
214
|
+
"utime": 0.125
|
215
|
+
},
|
216
|
+
"pow (float)": {
|
217
|
+
"real": 0.025822618000006514,
|
218
|
+
"stime": 0.0,
|
219
|
+
"total": 0.015000000000000568,
|
220
|
+
"utime": 0.015000000000000568
|
221
|
+
},
|
222
|
+
"pow (int)": {
|
223
|
+
"real": 0.02038636999998289,
|
224
|
+
"stime": 0.0,
|
225
|
+
"total": 0.015000000000000568,
|
226
|
+
"utime": 0.015000000000000568
|
227
|
+
},
|
228
|
+
"dropout": {
|
229
|
+
"real": 0.286617729999989,
|
230
|
+
"stime": 0.0,
|
231
|
+
"total": 0.21800000000000352,
|
232
|
+
"utime": 0.21800000000000352
|
233
|
+
}
|
234
|
+
}
|
235
|
+
},
|
236
|
+
"ruby-2.4.0/macosx/Intel(R) Core(TM) i5-5575R CPU @ 2.80GHz/Apple Intel(R) Iris(TM) Pro Graphics 6200": {
|
237
|
+
"ruby": {
|
238
|
+
"argmin": {
|
239
|
+
"real": 0.8880120001267642,
|
240
|
+
"stime": 0.0,
|
241
|
+
"total": 0.8800000000000008,
|
242
|
+
"utime": 0.8800000000000008
|
243
|
+
},
|
244
|
+
"bias_add_grad": {
|
245
|
+
"real": 2.3806210001930594,
|
246
|
+
"stime": 0.040000000000000036,
|
247
|
+
"total": 2.369999999999999,
|
248
|
+
"utime": 2.329999999999999
|
249
|
+
},
|
250
|
+
"bias_add": {
|
251
|
+
"real": 2.6210350000765175,
|
252
|
+
"stime": 0.09000000000000008,
|
253
|
+
"total": 2.5999999999999996,
|
254
|
+
"utime": 2.51
|
255
|
+
},
|
256
|
+
"conv2d_backprop": {
|
257
|
+
"real": 4.102318000048399,
|
258
|
+
"stime": 0.010000000000000009,
|
259
|
+
"total": 4.07,
|
260
|
+
"utime": 4.0600000000000005
|
261
|
+
},
|
262
|
+
"conv2d": {
|
263
|
+
"real": 0.9380730001721531,
|
264
|
+
"stime": 0.0,
|
265
|
+
"total": 0.9400000000000013,
|
266
|
+
"utime": 0.9400000000000013
|
267
|
+
},
|
268
|
+
"index": {
|
269
|
+
"real": 0.004769999999552965,
|
270
|
+
"stime": 0.0,
|
271
|
+
"total": 0.010000000000001563,
|
272
|
+
"utime": 0.010000000000001563
|
273
|
+
},
|
274
|
+
"min": {
|
275
|
+
"real": 3.7127469999250025,
|
276
|
+
"stime": 0.010000000000000009,
|
277
|
+
"total": 3.619999999999999,
|
278
|
+
"utime": 3.6099999999999994
|
279
|
+
},
|
280
|
+
"sum": {
|
281
|
+
"real": 7.684902000008151,
|
282
|
+
"stime": 0.04999999999999993,
|
283
|
+
"total": 7.589999999999999,
|
284
|
+
"utime": 7.539999999999999
|
285
|
+
},
|
286
|
+
"sum axis 1": {
|
287
|
+
"real": 7.972897999919951,
|
288
|
+
"stime": 0.06000000000000005,
|
289
|
+
"total": 7.679999999999998,
|
290
|
+
"utime": 7.619999999999997
|
291
|
+
},
|
292
|
+
"split": {
|
293
|
+
"real": 0.02342700003646314,
|
294
|
+
"stime": 0.0,
|
295
|
+
"total": 0.030000000000001137,
|
296
|
+
"utime": 0.030000000000001137
|
297
|
+
},
|
298
|
+
"add_n": {
|
299
|
+
"real": 0.16519299987703562,
|
300
|
+
"stime": 0.0,
|
301
|
+
"total": 0.1599999999999966,
|
302
|
+
"utime": 0.1599999999999966
|
303
|
+
},
|
304
|
+
"out of order matmul": {
|
305
|
+
"real": 1.6924950000829995,
|
306
|
+
"stime": 0.010000000000000009,
|
307
|
+
"total": 1.6299999999999975,
|
308
|
+
"utime": 1.6199999999999974
|
309
|
+
},
|
310
|
+
"softmax": {
|
311
|
+
"real": 0.03491799999028444,
|
312
|
+
"stime": 0.0,
|
313
|
+
"total": 0.03999999999999915,
|
314
|
+
"utime": 0.03999999999999915
|
315
|
+
},
|
316
|
+
"matmul": {
|
317
|
+
"real": 0.7962330000009388,
|
318
|
+
"stime": 0.0,
|
319
|
+
"total": 0.7899999999999991,
|
320
|
+
"utime": 0.7899999999999991
|
321
|
+
},
|
322
|
+
"test model": {
|
323
|
+
"real": 2.7632220000959933,
|
324
|
+
"stime": 0.010000000000000009,
|
325
|
+
"total": 2.7499999999999947,
|
326
|
+
"utime": 2.739999999999995
|
327
|
+
},
|
328
|
+
"single function test": {
|
329
|
+
"real": 0.37590500013902783,
|
330
|
+
"stime": 0.0,
|
331
|
+
"total": 0.38000000000000256,
|
332
|
+
"utime": 0.38000000000000256
|
333
|
+
},
|
334
|
+
"pow (float)": {
|
335
|
+
"real": 0.08679500012658536,
|
336
|
+
"stime": 0.0,
|
337
|
+
"total": 0.09000000000000341,
|
338
|
+
"utime": 0.09000000000000341
|
339
|
+
},
|
340
|
+
"pow (int)": {
|
341
|
+
"real": 0.023215000052005053,
|
342
|
+
"stime": 0.0,
|
343
|
+
"total": 0.030000000000001137,
|
344
|
+
"utime": 0.030000000000001137
|
345
|
+
},
|
346
|
+
"dropout": {
|
347
|
+
"real": 13.250881999963894,
|
348
|
+
"stime": 0.050000000000000044,
|
349
|
+
"total": 12.810000000000006,
|
350
|
+
"utime": 12.760000000000005
|
351
|
+
}
|
352
|
+
},
|
353
|
+
"opencl": {
|
354
|
+
"argmin": {
|
355
|
+
"real": 0.024038000032305717,
|
356
|
+
"stime": 0.020000000000000018,
|
357
|
+
"total": 0.03999999999999959,
|
358
|
+
"utime": 0.019999999999999574
|
359
|
+
},
|
360
|
+
"bias_add_grad": {
|
361
|
+
"real": 0.027796000009402633,
|
362
|
+
"stime": 0.009999999999999898,
|
363
|
+
"total": 0.02000000000000146,
|
364
|
+
"utime": 0.010000000000001563
|
365
|
+
},
|
366
|
+
"bias_add": {
|
367
|
+
"real": 0.1939310000743717,
|
368
|
+
"stime": 0.029999999999999916,
|
369
|
+
"total": 0.19000000000000006,
|
370
|
+
"utime": 0.16000000000000014
|
371
|
+
},
|
372
|
+
"conv2d_backprop": {
|
373
|
+
"real": 0.07214100006967783,
|
374
|
+
"stime": 0.030000000000000027,
|
375
|
+
"total": 0.05999999999999761,
|
376
|
+
"utime": 0.029999999999997584
|
377
|
+
},
|
378
|
+
"conv2d": {
|
379
|
+
"real": 0.03793899994343519,
|
380
|
+
"stime": 0.010000000000000009,
|
381
|
+
"total": 0.029999999999999583,
|
382
|
+
"utime": 0.019999999999999574
|
383
|
+
},
|
384
|
+
"index": {
|
385
|
+
"real": 0.021783999865874648,
|
386
|
+
"stime": 0.010000000000000009,
|
387
|
+
"total": 0.01999999999999802,
|
388
|
+
"utime": 0.00999999999999801
|
389
|
+
},
|
390
|
+
"min": {
|
391
|
+
"real": 0.16943500004708767,
|
392
|
+
"stime": 0.020000000000000018,
|
393
|
+
"total": 0.17000000000000215,
|
394
|
+
"utime": 0.15000000000000213
|
395
|
+
},
|
396
|
+
"sum": {
|
397
|
+
"real": 0.029592999955639243,
|
398
|
+
"stime": 0.010000000000000009,
|
399
|
+
"total": 0.020000000000001572,
|
400
|
+
"utime": 0.010000000000001563
|
401
|
+
},
|
402
|
+
"sum axis 1": {
|
403
|
+
"real": 0.023158999858424067,
|
404
|
+
"stime": 0.010000000000000009,
|
405
|
+
"total": 0.01999999999999802,
|
406
|
+
"utime": 0.00999999999999801
|
407
|
+
},
|
408
|
+
"split": {
|
409
|
+
"real": 0.09163099993020296,
|
410
|
+
"stime": 0.04999999999999993,
|
411
|
+
"total": 0.1000000000000042,
|
412
|
+
"utime": 0.05000000000000426
|
413
|
+
},
|
414
|
+
"add_n": {
|
415
|
+
"real": 0.032472999999299645,
|
416
|
+
"stime": 0.010000000000000009,
|
417
|
+
"total": 0.030000000000003135,
|
418
|
+
"utime": 0.020000000000003126
|
419
|
+
},
|
420
|
+
"out of order matmul": {
|
421
|
+
"real": 0.028477999847382307,
|
422
|
+
"stime": 0.020000000000000018,
|
423
|
+
"total": 0.029999999999998028,
|
424
|
+
"utime": 0.00999999999999801
|
425
|
+
},
|
426
|
+
"softmax": {
|
427
|
+
"real": 0.03417199989780784,
|
428
|
+
"stime": 0.010000000000000009,
|
429
|
+
"total": 0.030000000000003135,
|
430
|
+
"utime": 0.020000000000003126
|
431
|
+
},
|
432
|
+
"matmul": {
|
433
|
+
"real": 0.02589399996213615,
|
434
|
+
"stime": 0.010000000000000009,
|
435
|
+
"total": 0.030000000000003135,
|
436
|
+
"utime": 0.020000000000003126
|
437
|
+
},
|
438
|
+
"test model": {
|
439
|
+
"real": 0.2408190001733601,
|
440
|
+
"stime": 0.06000000000000005,
|
441
|
+
"total": 0.29000000000000403,
|
442
|
+
"utime": 0.23000000000000398
|
443
|
+
},
|
444
|
+
"single function test": {
|
445
|
+
"real": 0.15275100013241172,
|
446
|
+
"stime": 0.04999999999999982,
|
447
|
+
"total": 0.18999999999999329,
|
448
|
+
"utime": 0.13999999999999346
|
449
|
+
},
|
450
|
+
"pow (float)": {
|
451
|
+
"real": 0.029078999999910593,
|
452
|
+
"stime": 0.020000000000000018,
|
453
|
+
"total": 0.029999999999998028,
|
454
|
+
"utime": 0.00999999999999801
|
455
|
+
},
|
456
|
+
"pow (int)": {
|
457
|
+
"real": 0.029101000167429447,
|
458
|
+
"stime": 0.010000000000000009,
|
459
|
+
"total": 0.01999999999999802,
|
460
|
+
"utime": 0.00999999999999801
|
461
|
+
},
|
462
|
+
"dropout": {
|
463
|
+
"real": 0.35077799996361136,
|
464
|
+
"stime": 0.06000000000000005,
|
465
|
+
"total": 0.3299999999999961,
|
466
|
+
"utime": 0.269999999999996
|
467
|
+
}
|
468
|
+
}
|
469
|
+
},
|
470
|
+
"ruby-2.7.0/macosx/Intel(R) Core(TM) i5-7360U CPU @ 2.30GHz/Apple Intel(R) Iris(TM) Plus Graphics 640": {
|
471
|
+
"ruby": {
|
472
|
+
"argmin": {
|
473
|
+
"real": 0.5259610000066459,
|
474
|
+
"stime": 0.011098000000000052,
|
475
|
+
"total": 0.5211289999999997,
|
476
|
+
"utime": 0.5100309999999997
|
477
|
+
},
|
478
|
+
"bias_add_grad": {
|
479
|
+
"real": 1.7163370000198483,
|
480
|
+
"stime": 0.024986000000000064,
|
481
|
+
"total": 1.6818499999999998,
|
482
|
+
"utime": 1.6568639999999997
|
483
|
+
},
|
484
|
+
"bias_add": {
|
485
|
+
"real": 1.863698000088334,
|
486
|
+
"stime": 0.013836999999999988,
|
487
|
+
"total": 1.8362060000000002,
|
488
|
+
"utime": 1.8223690000000001
|
489
|
+
},
|
490
|
+
"conv2d_backprop": {
|
491
|
+
"real": 2.9946660000132397,
|
492
|
+
"stime": 0.022391999999999967,
|
493
|
+
"total": 2.965606000000001,
|
494
|
+
"utime": 2.943214000000001
|
495
|
+
},
|
496
|
+
"conv2d": {
|
497
|
+
"real": 0.6393570000072941,
|
498
|
+
"stime": 0.0023859999999999992,
|
499
|
+
"total": 0.6361250000000003,
|
500
|
+
"utime": 0.6337390000000003
|
501
|
+
},
|
502
|
+
"index": {
|
503
|
+
"real": 0.002068999921903014,
|
504
|
+
"stime": 2.0000000000575113e-06,
|
505
|
+
"total": 0.002067000000000041,
|
506
|
+
"utime": 0.0020649999999999835
|
507
|
+
},
|
508
|
+
"min": {
|
509
|
+
"real": 2.6769400000339374,
|
510
|
+
"stime": 0.01394899999999999,
|
511
|
+
"total": 2.6575489999999995,
|
512
|
+
"utime": 2.6435999999999993
|
513
|
+
},
|
514
|
+
"sum": {
|
515
|
+
"real": 2.6265199999324977,
|
516
|
+
"stime": 0.020106999999999986,
|
517
|
+
"total": 2.6121140000000005,
|
518
|
+
"utime": 2.5920070000000006
|
519
|
+
},
|
520
|
+
"sum axis 1": {
|
521
|
+
"real": 2.6128099999623373,
|
522
|
+
"stime": 0.009680000000000022,
|
523
|
+
"total": 2.598789999999998,
|
524
|
+
"utime": 2.589109999999998
|
525
|
+
},
|
526
|
+
"split": {
|
527
|
+
"real": 0.01055200002156198,
|
528
|
+
"stime": 0.0005780000000000785,
|
529
|
+
"total": 0.010544999999999694,
|
530
|
+
"utime": 0.009966999999999615
|
531
|
+
},
|
532
|
+
"add_n": {
|
533
|
+
"real": 0.10583600006066263,
|
534
|
+
"stime": 0.00046999999999997044,
|
535
|
+
"total": 0.10540599999999856,
|
536
|
+
"utime": 0.10493599999999859
|
537
|
+
},
|
538
|
+
"out of order matmul": {
|
539
|
+
"real": 1.2986479999963194,
|
540
|
+
"stime": 0.003817999999999988,
|
541
|
+
"total": 1.2841700000000005,
|
542
|
+
"utime": 1.2803520000000006
|
543
|
+
},
|
544
|
+
"softmax": {
|
545
|
+
"real": 0.021060000057332218,
|
546
|
+
"stime": 8.300000000005525e-05,
|
547
|
+
"total": 0.020976000000000994,
|
548
|
+
"utime": 0.02089300000000094
|
549
|
+
},
|
550
|
+
"matmul": {
|
551
|
+
"real": 0.6281420000595972,
|
552
|
+
"stime": 0.0021889999999999965,
|
553
|
+
"total": 0.6234960000000016,
|
554
|
+
"utime": 0.6213070000000016
|
555
|
+
},
|
556
|
+
"test model": {
|
557
|
+
"real": 2.2064549999777228,
|
558
|
+
"stime": 0.007272999999999974,
|
559
|
+
"total": 2.1945329999999985,
|
560
|
+
"utime": 2.1872599999999984
|
561
|
+
},
|
562
|
+
"single function test": {
|
563
|
+
"real": 0.2703520000213757,
|
564
|
+
"stime": 0.0007090000000000707,
|
565
|
+
"total": 0.26878800000000025,
|
566
|
+
"utime": 0.2680790000000002
|
567
|
+
},
|
568
|
+
"pow (float)": {
|
569
|
+
"real": 0.07231099996715784,
|
570
|
+
"stime": 0.0002369999999999317,
|
571
|
+
"total": 0.07210500000000197,
|
572
|
+
"utime": 0.07186800000000204
|
573
|
+
},
|
574
|
+
"pow (int)": {
|
575
|
+
"real": 0.013833999983035028,
|
576
|
+
"stime": 3.6000000000036e-05,
|
577
|
+
"total": 0.013797999999999866,
|
578
|
+
"utime": 0.01376199999999983
|
579
|
+
},
|
580
|
+
"dropout": {
|
581
|
+
"real": 9.595753000001423,
|
582
|
+
"stime": 0.048594000000000026,
|
583
|
+
"total": 9.504062,
|
584
|
+
"utime": 9.455468
|
585
|
+
}
|
586
|
+
},
|
587
|
+
"opencl": {
|
588
|
+
"argmin": {
|
589
|
+
"real": 0.031660000095143914,
|
590
|
+
"stime": 0.010885000000000034,
|
591
|
+
"total": 0.023446999999999996,
|
592
|
+
"utime": 0.012561999999999962
|
593
|
+
},
|
594
|
+
"bias_add_grad": {
|
595
|
+
"real": 0.029562999960035086,
|
596
|
+
"stime": 0.010082999999999953,
|
597
|
+
"total": 0.02101999999999926,
|
598
|
+
"utime": 0.010936999999999308
|
599
|
+
},
|
600
|
+
"bias_add": {
|
601
|
+
"real": 0.17451599997002631,
|
602
|
+
"stime": 0.03910800000000003,
|
603
|
+
"total": 0.15842599999999984,
|
604
|
+
"utime": 0.11931799999999981
|
605
|
+
},
|
606
|
+
"conv2d_backprop": {
|
607
|
+
"real": 0.060010000015608966,
|
608
|
+
"stime": 0.019299999999999984,
|
609
|
+
"total": 0.04448700000000072,
|
610
|
+
"utime": 0.025187000000000737
|
611
|
+
},
|
612
|
+
"conv2d": {
|
613
|
+
"real": 0.035195000004023314,
|
614
|
+
"stime": 0.009983000000000075,
|
615
|
+
"total": 0.026590000000000558,
|
616
|
+
"utime": 0.016607000000000482
|
617
|
+
},
|
618
|
+
"index": {
|
619
|
+
"real": 0.028608000022359192,
|
620
|
+
"stime": 0.010353000000000057,
|
621
|
+
"total": 0.02094600000000013,
|
622
|
+
"utime": 0.010593000000000075
|
623
|
+
},
|
624
|
+
"min": {
|
625
|
+
"real": 0.17778700008057058,
|
626
|
+
"stime": 0.037808000000000064,
|
627
|
+
"total": 0.16168200000000077,
|
628
|
+
"utime": 0.1238740000000007
|
629
|
+
},
|
630
|
+
"sum": {
|
631
|
+
"real": 0.026511000003665686,
|
632
|
+
"stime": 0.00942299999999996,
|
633
|
+
"total": 0.01839499999999994,
|
634
|
+
"utime": 0.00897199999999998
|
635
|
+
},
|
636
|
+
"sum axis 1": {
|
637
|
+
"real": 0.026868000044487417,
|
638
|
+
"stime": 0.009302000000000032,
|
639
|
+
"total": 0.018642999999999188,
|
640
|
+
"utime": 0.009340999999999156
|
641
|
+
},
|
642
|
+
"split": {
|
643
|
+
"real": 0.10158700007013977,
|
644
|
+
"stime": 0.03593499999999994,
|
645
|
+
"total": 0.07264700000000135,
|
646
|
+
"utime": 0.03671200000000141
|
647
|
+
},
|
648
|
+
"add_n": {
|
649
|
+
"real": 0.02838599996175617,
|
650
|
+
"stime": 0.009464000000000028,
|
651
|
+
"total": 0.02101599999999826,
|
652
|
+
"utime": 0.01155199999999823
|
653
|
+
},
|
654
|
+
"out of order matmul": {
|
655
|
+
"real": 0.0278630000539124,
|
656
|
+
"stime": 0.009276000000000062,
|
657
|
+
"total": 0.019607999999998293,
|
658
|
+
"utime": 0.010331999999998231
|
659
|
+
},
|
660
|
+
"softmax": {
|
661
|
+
"real": 0.02873000002000481,
|
662
|
+
"stime": 0.009530999999999956,
|
663
|
+
"total": 0.02050600000000191,
|
664
|
+
"utime": 0.010975000000001955
|
665
|
+
},
|
666
|
+
"matmul": {
|
667
|
+
"real": 0.03137700003571808,
|
668
|
+
"stime": 0.011123000000000105,
|
669
|
+
"total": 0.023513000000000006,
|
670
|
+
"utime": 0.012389999999999901
|
671
|
+
},
|
672
|
+
"test model": {
|
673
|
+
"real": 0.20114699995610863,
|
674
|
+
"stime": 0.05457400000000001,
|
675
|
+
"total": 0.21827399999999864,
|
676
|
+
"utime": 0.16369999999999862
|
677
|
+
},
|
678
|
+
"single function test": {
|
679
|
+
"real": 0.11842899990733713,
|
680
|
+
"stime": 0.036387999999999976,
|
681
|
+
"total": 0.13410199999999983,
|
682
|
+
"utime": 0.09771399999999986
|
683
|
+
},
|
684
|
+
"pow (float)": {
|
685
|
+
"real": 0.029842999996617436,
|
686
|
+
"stime": 0.009394000000000013,
|
687
|
+
"total": 0.021471999999998936,
|
688
|
+
"utime": 0.012077999999998923
|
689
|
+
},
|
690
|
+
"pow (int)": {
|
691
|
+
"real": 0.028302000020630658,
|
692
|
+
"stime": 0.009438999999999975,
|
693
|
+
"total": 0.019182000000000254,
|
694
|
+
"utime": 0.00974300000000028
|
695
|
+
},
|
696
|
+
"dropout": {
|
697
|
+
"real": 0.3020259999902919,
|
698
|
+
"stime": 0.0603800000000001,
|
699
|
+
"total": 0.2593999999999974,
|
700
|
+
"utime": 0.1990199999999973
|
701
|
+
}
|
702
|
+
}
|
703
|
+
}
|
704
|
+
}
|
data/benchmark/benchmark.rb
CHANGED
@@ -102,50 +102,71 @@ puts TensorStream::Evaluator.default_evaluators
|
|
102
102
|
|
103
103
|
sess2 = tf.session
|
104
104
|
|
105
|
-
if os == :macosx
|
106
|
-
|
107
|
-
else
|
108
|
-
|
109
|
-
end
|
105
|
+
cpu = if os == :macosx
|
106
|
+
`sysctl -n machdep.cpu.brand_string`
|
107
|
+
else
|
108
|
+
`cat /proc/cpuinfo | grep "model name" | head -1`
|
109
|
+
end
|
110
|
+
|
110
111
|
device = TensorStream::Evaluator::OpenclEvaluator.default_device.native_device
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
112
|
+
cl_device = "#{device.platform.to_s} #{device.name}"
|
113
|
+
|
114
|
+
tests = {
|
115
|
+
"argmin" => argmin,
|
116
|
+
"bias_add_grad" => bias_add_grad,
|
117
|
+
"bias_add" => bias_add,
|
118
|
+
"conv2d_backprop" => conv2d_grad,
|
119
|
+
"conv2d" => conv2d,
|
120
|
+
"index" =>index,
|
121
|
+
"min" => min,
|
122
|
+
"sum" => sum,
|
123
|
+
"sum axis 1" => sum_axis_1,
|
124
|
+
"split" => split,
|
125
|
+
"add_n" => add_n,
|
126
|
+
"out of order matmul" => out_of_order,
|
127
|
+
"softmax" => softmax,
|
128
|
+
"matmul" => matmul,
|
129
|
+
"test model" => ->(sess) { sess.run(model, feed_dict: { p => rand, q => rand }) },
|
130
|
+
"single function test" => ->(sess) { sess.run(single_function_test, feed_dict: { p => rand, q => rand }) },
|
131
|
+
"pow (float)" => ->(sess) { sess.run(pow_f, feed_dict: { p => rand, q => rand }) },
|
132
|
+
"pow (int)" => ->(sess) { sess.run(pow_i, feed_dict: { p => rand, q => rand }) },
|
133
|
+
"dropout" => dropout
|
134
|
+
}
|
135
|
+
|
136
|
+
stats = {
|
137
|
+
"ruby" => {},
|
138
|
+
"opencl" => {},
|
139
|
+
}
|
140
|
+
|
141
|
+
puts "rehersal"
|
142
|
+
tests.each do |k, v|
|
143
|
+
if v.is_a?(Proc)
|
144
|
+
r = Benchmark.measure("ruby #{k}") { 10.times do v.call(sess) end }
|
145
|
+
r = Benchmark.measure("opencl #{k}") { 10.times do v.call(sess2) end }
|
146
|
+
else
|
147
|
+
r = Benchmark.measure("ruby #{k}") { 10.times do sess.run(v) end }
|
148
|
+
r = Benchmark.measure("opencl #{k}") { 10.times do sess2.run(v) end }
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
puts "writing benchmark"
|
153
|
+
|
154
|
+
tests.each do |k, v|
|
155
|
+
if v.is_a?(Proc)
|
156
|
+
r = Benchmark.measure(k) { 100.times do v.call(sess) end }
|
157
|
+
stats["ruby"][r.label] = { real: r.real, stime: r.stime, total: r.total, utime: r.utime }
|
158
|
+
r = Benchmark.measure(k) { 100.times do v.call(sess2) end }
|
159
|
+
stats["opencl"][r.label] = { real: r.real, stime: r.stime, total: r.total, utime: r.utime }
|
160
|
+
else
|
161
|
+
r = Benchmark.measure(k) { 100.times do sess.run(v) end }
|
162
|
+
stats["ruby"][r.label] = { real: r.real, stime: r.stime, total: r.total, utime: r.utime }
|
163
|
+
r = Benchmark.measure(k) { 100.times do sess2.run(v) end }
|
164
|
+
stats["opencl"][r.label] = { real: r.real, stime: r.stime, total: r.total, utime: r.utime }
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
output = {
|
169
|
+
"#{RUBY_ENGINE }-#{RUBY_VERSION}/#{os}/#{cpu.strip.gsub("model name\t: ", "")}/#{cl_device.strip}" => stats
|
170
|
+
}
|
171
|
+
current_benchmark = JSON.parse(File.read('benchmark.json'))
|
172
|
+
File.write("benchmark_#{Time.now.strftime('%Y%m%d%H%M')}.json", JSON.pretty_generate(current_benchmark.merge(output)))
|
@@ -622,15 +622,15 @@ module TensorStream
|
|
622
622
|
|
623
623
|
if assign.container_buffer
|
624
624
|
event_wait_list = build_event_wait_list([buffer, assign.container_buffer])
|
625
|
-
assign.container_buffer.op = if assign.container_buffer.cl_buffer != buffer.cl_buffer
|
626
|
-
_opencl_queue.enqueue_copy_buffer(buffer.cl_buffer, assign.container_buffer.cl_buffer, event_wait_list: event_wait_list)
|
627
|
-
else
|
628
|
-
buffer.op
|
629
|
-
end
|
630
625
|
else
|
631
|
-
|
632
|
-
assign.options[:container].buffer =
|
633
|
-
|
626
|
+
var_buffer = _create_result_buffer(buffer.data_type, buffer.shape, tensor.name)
|
627
|
+
assign.options[:container].buffer = var_buffer
|
628
|
+
end
|
629
|
+
|
630
|
+
assign.container_buffer.op = if assign.container_buffer.cl_buffer != buffer.cl_buffer
|
631
|
+
_opencl_queue.enqueue_copy_buffer(buffer.cl_buffer, assign.container_buffer.cl_buffer, event_wait_list: event_wait_list)
|
632
|
+
else
|
633
|
+
buffer.op
|
634
634
|
end
|
635
635
|
|
636
636
|
assign.container_buffer.dirty = true
|
@@ -797,7 +797,7 @@ module TensorStream
|
|
797
797
|
|
798
798
|
return nil if buffer.nil?
|
799
799
|
|
800
|
-
cl_buffer = unless value
|
800
|
+
cl_buffer = unless array_fast_empty?(value)
|
801
801
|
cl_buffer_size = 1 if cl_buffer_size.zero?
|
802
802
|
_opencl_context.create_buffer(cl_buffer_size * buffer.element_size)
|
803
803
|
end
|
@@ -810,8 +810,8 @@ module TensorStream
|
|
810
810
|
cl_object.buffer[index] = c
|
811
811
|
end
|
812
812
|
elsif value.is_a?(Array)
|
813
|
-
value.flatten.each_with_index do |element, index|
|
814
|
-
|
813
|
+
cast_value = value.flatten.each_with_index.map do |element, index|
|
814
|
+
if element.is_a?(Tensor)
|
815
815
|
read_final_result(complete_eval(element, {}))
|
816
816
|
elsif data_type == :boolean
|
817
817
|
element ? 1 : 0
|
@@ -819,6 +819,10 @@ module TensorStream
|
|
819
819
|
Tensor.cast_dtype(element, data_type)
|
820
820
|
end
|
821
821
|
end
|
822
|
+
|
823
|
+
cast_value.each_with_index do |v, index|
|
824
|
+
cl_object.buffer[index] = v
|
825
|
+
end
|
822
826
|
elsif value.is_a?(NArray)
|
823
827
|
cl_object.buffer = value
|
824
828
|
elsif data_type == :boolean
|
@@ -998,6 +1002,23 @@ module TensorStream
|
|
998
1002
|
|
999
1003
|
arr != 0
|
1000
1004
|
end
|
1005
|
+
|
1006
|
+
##
|
1007
|
+
# Fast way to determine if array is "empty" by including nested elements
|
1008
|
+
def array_fast_empty?(arr)
|
1009
|
+
return true if arr.size.zero?
|
1010
|
+
|
1011
|
+
arr.each do |a|
|
1012
|
+
if a.is_a?(Array)
|
1013
|
+
return false if !array_fast_empty?(a)
|
1014
|
+
|
1015
|
+
next
|
1016
|
+
end
|
1017
|
+
return false
|
1018
|
+
end
|
1019
|
+
|
1020
|
+
true
|
1021
|
+
end
|
1001
1022
|
end
|
1002
1023
|
end
|
1003
1024
|
end
|
@@ -32,7 +32,7 @@ Gem::Specification.new do |spec|
|
|
32
32
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
33
33
|
spec.require_paths = ["lib"]
|
34
34
|
|
35
|
-
spec.add_development_dependency "bundler"
|
35
|
+
spec.add_development_dependency "bundler"
|
36
36
|
spec.add_development_dependency "rake", "~> 10.0"
|
37
37
|
spec.add_development_dependency "rspec", "~> 3.0"
|
38
38
|
spec.add_development_dependency "pry-byebug"
|
metadata
CHANGED
@@ -1,29 +1,29 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tensor_stream-opencl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Joseph Dayo
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-06-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - "
|
17
|
+
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '
|
19
|
+
version: '0'
|
20
20
|
type: :development
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- - "
|
24
|
+
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '
|
26
|
+
version: '0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: rake
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -166,6 +166,7 @@ files:
|
|
166
166
|
- LICENSE.txt
|
167
167
|
- README.md
|
168
168
|
- Rakefile
|
169
|
+
- benchmark.json
|
169
170
|
- benchmark/benchmark.rb
|
170
171
|
- benchmark_imac2015_iris.txt
|
171
172
|
- benchmark_intel.txt
|