tensor_stream-opencl 0.3.1 → 0.3.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b433e9e7ab38a517c21b57065e5a43b112640fd7c419fb7baa2f3319128cdacf
4
- data.tar.gz: fab7d48513cb0f8481e151d18b088782918cb1539b59586613a00c4d5f5aeed2
3
+ metadata.gz: d2a9a1d1add56659374f7246f35cd956eba86c9416a1e012abc44fe9bdb857ab
4
+ data.tar.gz: 6acf877d4b85f93facc750d9541221b0e2a8775d608e7160654c2421411d7a3b
5
5
  SHA512:
6
- metadata.gz: 04d106f5ee5fac49eba20ff143bb2212a1cafd5140fc04cee20958ffea0c5909d352824948badf16ec5bc8ca2a7b13b4dcf7748eb03cbd6dc8a466c6ae0f5040
7
- data.tar.gz: e17171f28641ce3496c0b338b6913c96e10d9fd5ce93b7980dae6edef00e63e5f7c4dcb60ed04fed5271a474b4940d069ebcf6a00bbfd3c4e6eafa2c0c4f26ed
6
+ metadata.gz: 7c9d3d3391d2bc228f5daed33a83fe750ca61bde20b71ce07fe2a73df32661fb910d1bcac5cbcb6a025e04808ac90174a0040dd13f23bc40b134a3196cef50ee
7
+ data.tar.gz: 6aabe002d353b02647810c8aea9e521ad1bb4029ccfd2c7cfd78f8609f6826c4260ef5f3ff02170d3a2f55f588f68aad8a1613efac8b586f0751c72e492287f0
@@ -0,0 +1,704 @@
1
+ {
2
+ "ruby-2.5.5/windows/AMD Ryzen 3 1300X Quad-Core Processor/NVIDIA CUDA GeForce GTX 1060 6GB": {
3
+ "ruby": {
4
+ "argmin": {
5
+ "real": 0.5717836269999452,
6
+ "stime": 0.0,
7
+ "total": 0.5780000000000003,
8
+ "utime": 0.5780000000000003
9
+ },
10
+ "bias_add_grad": {
11
+ "real": 2.280308563999938,
12
+ "stime": 0.016000000000000014,
13
+ "total": 2.234,
14
+ "utime": 2.218
15
+ },
16
+ "bias_add": {
17
+ "real": 2.081161492000092,
18
+ "stime": 0.0,
19
+ "total": 2.0470000000000006,
20
+ "utime": 2.0470000000000006
21
+ },
22
+ "conv2d_backprop": {
23
+ "real": 3.9330320810000785,
24
+ "stime": 0.015000000000000124,
25
+ "total": 3.9210000000000007,
26
+ "utime": 3.9060000000000006
27
+ },
28
+ "conv2d": {
29
+ "real": 0.8119420420000552,
30
+ "stime": 0.0,
31
+ "total": 0.8119999999999994,
32
+ "utime": 0.8119999999999994
33
+ },
34
+ "index": {
35
+ "real": 0.005713835000051404,
36
+ "stime": 0.0,
37
+ "total": 0.0,
38
+ "utime": 0.0
39
+ },
40
+ "min": {
41
+ "real": 3.6236803629999486,
42
+ "stime": 0.014999999999999902,
43
+ "total": 3.5300000000000002,
44
+ "utime": 3.5150000000000006
45
+ },
46
+ "sum": {
47
+ "real": 3.5159757579999678,
48
+ "stime": 0.0,
49
+ "total": 3.1709999999999994,
50
+ "utime": 3.1709999999999994
51
+ },
52
+ "sum axis 1": {
53
+ "real": 3.204440863000059,
54
+ "stime": 0.0,
55
+ "total": 3.0790000000000006,
56
+ "utime": 3.0790000000000006
57
+ },
58
+ "split": {
59
+ "real": 0.020213428000033673,
60
+ "stime": 0.0,
61
+ "total": 0.030999999999998806,
62
+ "utime": 0.030999999999998806
63
+ },
64
+ "add_n": {
65
+ "real": 0.15371632300002602,
66
+ "stime": 0.0,
67
+ "total": 0.1559999999999988,
68
+ "utime": 0.1559999999999988
69
+ },
70
+ "out of order matmul": {
71
+ "real": 1.598296256000026,
72
+ "stime": 0.0,
73
+ "total": 1.5629999999999988,
74
+ "utime": 1.5629999999999988
75
+ },
76
+ "softmax": {
77
+ "real": 0.03161882200004129,
78
+ "stime": 0.0,
79
+ "total": 0.01600000000000179,
80
+ "utime": 0.01600000000000179
81
+ },
82
+ "matmul": {
83
+ "real": 0.8320086150000634,
84
+ "stime": 0.0,
85
+ "total": 0.75,
86
+ "utime": 0.75
87
+ },
88
+ "test model": {
89
+ "real": 2.7918018939999456,
90
+ "stime": 0.0,
91
+ "total": 2.7029999999999994,
92
+ "utime": 2.7029999999999994
93
+ },
94
+ "single function test": {
95
+ "real": 0.41654277299994646,
96
+ "stime": 0.0,
97
+ "total": 0.375,
98
+ "utime": 0.375
99
+ },
100
+ "pow (float)": {
101
+ "real": 0.10249757899998713,
102
+ "stime": 0.0,
103
+ "total": 0.06299999999999883,
104
+ "utime": 0.06299999999999883
105
+ },
106
+ "pow (int)": {
107
+ "real": 0.030574132999959147,
108
+ "stime": 0.0,
109
+ "total": 0.03200000000000003,
110
+ "utime": 0.03200000000000003
111
+ },
112
+ "dropout": {
113
+ "real": 13.77498282299996,
114
+ "stime": 0.062000000000000055,
115
+ "total": 12.719,
116
+ "utime": 12.657
117
+ }
118
+ },
119
+ "opencl": {
120
+ "argmin": {
121
+ "real": 0.015615803000059714,
122
+ "stime": 0.0,
123
+ "total": 0.016000000000000014,
124
+ "utime": 0.016000000000000014
125
+ },
126
+ "bias_add_grad": {
127
+ "real": 0.013771769999948447,
128
+ "stime": 0.014999999999999902,
129
+ "total": 0.014999999999999902,
130
+ "utime": 0.0
131
+ },
132
+ "bias_add": {
133
+ "real": 0.1256369549998908,
134
+ "stime": 0.016000000000000014,
135
+ "total": 0.12599999999999945,
136
+ "utime": 0.10999999999999943
137
+ },
138
+ "conv2d_backprop": {
139
+ "real": 0.04058953899993867,
140
+ "stime": 0.03200000000000003,
141
+ "total": 0.03200000000000003,
142
+ "utime": 0.0
143
+ },
144
+ "conv2d": {
145
+ "real": 0.02783402499994736,
146
+ "stime": 0.0,
147
+ "total": 0.03200000000000003,
148
+ "utime": 0.03200000000000003
149
+ },
150
+ "index": {
151
+ "real": 0.015506175999917104,
152
+ "stime": 0.0,
153
+ "total": 0.0,
154
+ "utime": 0.0
155
+ },
156
+ "min": {
157
+ "real": 0.19373339399999168,
158
+ "stime": 0.03200000000000003,
159
+ "total": 0.14199999999999946,
160
+ "utime": 0.10999999999999943
161
+ },
162
+ "sum": {
163
+ "real": 0.01319783600001756,
164
+ "stime": 0.0,
165
+ "total": 0.0,
166
+ "utime": 0.0
167
+ },
168
+ "sum axis 1": {
169
+ "real": 0.01374040599989712,
170
+ "stime": 0.0,
171
+ "total": 0.0,
172
+ "utime": 0.0
173
+ },
174
+ "split": {
175
+ "real": 0.05617720299994744,
176
+ "stime": 0.014999999999999902,
177
+ "total": 0.04600000000000226,
178
+ "utime": 0.03100000000000236
179
+ },
180
+ "add_n": {
181
+ "real": 0.015245883000034155,
182
+ "stime": 0.016000000000000014,
183
+ "total": 0.016000000000000014,
184
+ "utime": 0.0
185
+ },
186
+ "out of order matmul": {
187
+ "real": 0.018022044999952413,
188
+ "stime": 0.015000000000000124,
189
+ "total": 0.030000000000000693,
190
+ "utime": 0.015000000000000568
191
+ },
192
+ "softmax": {
193
+ "real": 0.017924141999969834,
194
+ "stime": 0.016000000000000014,
195
+ "total": 0.016000000000000014,
196
+ "utime": 0.0
197
+ },
198
+ "matmul": {
199
+ "real": 0.014557924000087041,
200
+ "stime": 0.016000000000000014,
201
+ "total": 0.016000000000000014,
202
+ "utime": 0.0
203
+ },
204
+ "test model": {
205
+ "real": 0.2164292770000884,
206
+ "stime": 0.014999999999999902,
207
+ "total": 0.2179999999999993,
208
+ "utime": 0.2029999999999994
209
+ },
210
+ "single function test": {
211
+ "real": 0.13157883999997466,
212
+ "stime": 0.016000000000000014,
213
+ "total": 0.14100000000000001,
214
+ "utime": 0.125
215
+ },
216
+ "pow (float)": {
217
+ "real": 0.025822618000006514,
218
+ "stime": 0.0,
219
+ "total": 0.015000000000000568,
220
+ "utime": 0.015000000000000568
221
+ },
222
+ "pow (int)": {
223
+ "real": 0.02038636999998289,
224
+ "stime": 0.0,
225
+ "total": 0.015000000000000568,
226
+ "utime": 0.015000000000000568
227
+ },
228
+ "dropout": {
229
+ "real": 0.286617729999989,
230
+ "stime": 0.0,
231
+ "total": 0.21800000000000352,
232
+ "utime": 0.21800000000000352
233
+ }
234
+ }
235
+ },
236
+ "ruby-2.4.0/macosx/Intel(R) Core(TM) i5-5575R CPU @ 2.80GHz/Apple Intel(R) Iris(TM) Pro Graphics 6200": {
237
+ "ruby": {
238
+ "argmin": {
239
+ "real": 0.8880120001267642,
240
+ "stime": 0.0,
241
+ "total": 0.8800000000000008,
242
+ "utime": 0.8800000000000008
243
+ },
244
+ "bias_add_grad": {
245
+ "real": 2.3806210001930594,
246
+ "stime": 0.040000000000000036,
247
+ "total": 2.369999999999999,
248
+ "utime": 2.329999999999999
249
+ },
250
+ "bias_add": {
251
+ "real": 2.6210350000765175,
252
+ "stime": 0.09000000000000008,
253
+ "total": 2.5999999999999996,
254
+ "utime": 2.51
255
+ },
256
+ "conv2d_backprop": {
257
+ "real": 4.102318000048399,
258
+ "stime": 0.010000000000000009,
259
+ "total": 4.07,
260
+ "utime": 4.0600000000000005
261
+ },
262
+ "conv2d": {
263
+ "real": 0.9380730001721531,
264
+ "stime": 0.0,
265
+ "total": 0.9400000000000013,
266
+ "utime": 0.9400000000000013
267
+ },
268
+ "index": {
269
+ "real": 0.004769999999552965,
270
+ "stime": 0.0,
271
+ "total": 0.010000000000001563,
272
+ "utime": 0.010000000000001563
273
+ },
274
+ "min": {
275
+ "real": 3.7127469999250025,
276
+ "stime": 0.010000000000000009,
277
+ "total": 3.619999999999999,
278
+ "utime": 3.6099999999999994
279
+ },
280
+ "sum": {
281
+ "real": 7.684902000008151,
282
+ "stime": 0.04999999999999993,
283
+ "total": 7.589999999999999,
284
+ "utime": 7.539999999999999
285
+ },
286
+ "sum axis 1": {
287
+ "real": 7.972897999919951,
288
+ "stime": 0.06000000000000005,
289
+ "total": 7.679999999999998,
290
+ "utime": 7.619999999999997
291
+ },
292
+ "split": {
293
+ "real": 0.02342700003646314,
294
+ "stime": 0.0,
295
+ "total": 0.030000000000001137,
296
+ "utime": 0.030000000000001137
297
+ },
298
+ "add_n": {
299
+ "real": 0.16519299987703562,
300
+ "stime": 0.0,
301
+ "total": 0.1599999999999966,
302
+ "utime": 0.1599999999999966
303
+ },
304
+ "out of order matmul": {
305
+ "real": 1.6924950000829995,
306
+ "stime": 0.010000000000000009,
307
+ "total": 1.6299999999999975,
308
+ "utime": 1.6199999999999974
309
+ },
310
+ "softmax": {
311
+ "real": 0.03491799999028444,
312
+ "stime": 0.0,
313
+ "total": 0.03999999999999915,
314
+ "utime": 0.03999999999999915
315
+ },
316
+ "matmul": {
317
+ "real": 0.7962330000009388,
318
+ "stime": 0.0,
319
+ "total": 0.7899999999999991,
320
+ "utime": 0.7899999999999991
321
+ },
322
+ "test model": {
323
+ "real": 2.7632220000959933,
324
+ "stime": 0.010000000000000009,
325
+ "total": 2.7499999999999947,
326
+ "utime": 2.739999999999995
327
+ },
328
+ "single function test": {
329
+ "real": 0.37590500013902783,
330
+ "stime": 0.0,
331
+ "total": 0.38000000000000256,
332
+ "utime": 0.38000000000000256
333
+ },
334
+ "pow (float)": {
335
+ "real": 0.08679500012658536,
336
+ "stime": 0.0,
337
+ "total": 0.09000000000000341,
338
+ "utime": 0.09000000000000341
339
+ },
340
+ "pow (int)": {
341
+ "real": 0.023215000052005053,
342
+ "stime": 0.0,
343
+ "total": 0.030000000000001137,
344
+ "utime": 0.030000000000001137
345
+ },
346
+ "dropout": {
347
+ "real": 13.250881999963894,
348
+ "stime": 0.050000000000000044,
349
+ "total": 12.810000000000006,
350
+ "utime": 12.760000000000005
351
+ }
352
+ },
353
+ "opencl": {
354
+ "argmin": {
355
+ "real": 0.024038000032305717,
356
+ "stime": 0.020000000000000018,
357
+ "total": 0.03999999999999959,
358
+ "utime": 0.019999999999999574
359
+ },
360
+ "bias_add_grad": {
361
+ "real": 0.027796000009402633,
362
+ "stime": 0.009999999999999898,
363
+ "total": 0.02000000000000146,
364
+ "utime": 0.010000000000001563
365
+ },
366
+ "bias_add": {
367
+ "real": 0.1939310000743717,
368
+ "stime": 0.029999999999999916,
369
+ "total": 0.19000000000000006,
370
+ "utime": 0.16000000000000014
371
+ },
372
+ "conv2d_backprop": {
373
+ "real": 0.07214100006967783,
374
+ "stime": 0.030000000000000027,
375
+ "total": 0.05999999999999761,
376
+ "utime": 0.029999999999997584
377
+ },
378
+ "conv2d": {
379
+ "real": 0.03793899994343519,
380
+ "stime": 0.010000000000000009,
381
+ "total": 0.029999999999999583,
382
+ "utime": 0.019999999999999574
383
+ },
384
+ "index": {
385
+ "real": 0.021783999865874648,
386
+ "stime": 0.010000000000000009,
387
+ "total": 0.01999999999999802,
388
+ "utime": 0.00999999999999801
389
+ },
390
+ "min": {
391
+ "real": 0.16943500004708767,
392
+ "stime": 0.020000000000000018,
393
+ "total": 0.17000000000000215,
394
+ "utime": 0.15000000000000213
395
+ },
396
+ "sum": {
397
+ "real": 0.029592999955639243,
398
+ "stime": 0.010000000000000009,
399
+ "total": 0.020000000000001572,
400
+ "utime": 0.010000000000001563
401
+ },
402
+ "sum axis 1": {
403
+ "real": 0.023158999858424067,
404
+ "stime": 0.010000000000000009,
405
+ "total": 0.01999999999999802,
406
+ "utime": 0.00999999999999801
407
+ },
408
+ "split": {
409
+ "real": 0.09163099993020296,
410
+ "stime": 0.04999999999999993,
411
+ "total": 0.1000000000000042,
412
+ "utime": 0.05000000000000426
413
+ },
414
+ "add_n": {
415
+ "real": 0.032472999999299645,
416
+ "stime": 0.010000000000000009,
417
+ "total": 0.030000000000003135,
418
+ "utime": 0.020000000000003126
419
+ },
420
+ "out of order matmul": {
421
+ "real": 0.028477999847382307,
422
+ "stime": 0.020000000000000018,
423
+ "total": 0.029999999999998028,
424
+ "utime": 0.00999999999999801
425
+ },
426
+ "softmax": {
427
+ "real": 0.03417199989780784,
428
+ "stime": 0.010000000000000009,
429
+ "total": 0.030000000000003135,
430
+ "utime": 0.020000000000003126
431
+ },
432
+ "matmul": {
433
+ "real": 0.02589399996213615,
434
+ "stime": 0.010000000000000009,
435
+ "total": 0.030000000000003135,
436
+ "utime": 0.020000000000003126
437
+ },
438
+ "test model": {
439
+ "real": 0.2408190001733601,
440
+ "stime": 0.06000000000000005,
441
+ "total": 0.29000000000000403,
442
+ "utime": 0.23000000000000398
443
+ },
444
+ "single function test": {
445
+ "real": 0.15275100013241172,
446
+ "stime": 0.04999999999999982,
447
+ "total": 0.18999999999999329,
448
+ "utime": 0.13999999999999346
449
+ },
450
+ "pow (float)": {
451
+ "real": 0.029078999999910593,
452
+ "stime": 0.020000000000000018,
453
+ "total": 0.029999999999998028,
454
+ "utime": 0.00999999999999801
455
+ },
456
+ "pow (int)": {
457
+ "real": 0.029101000167429447,
458
+ "stime": 0.010000000000000009,
459
+ "total": 0.01999999999999802,
460
+ "utime": 0.00999999999999801
461
+ },
462
+ "dropout": {
463
+ "real": 0.35077799996361136,
464
+ "stime": 0.06000000000000005,
465
+ "total": 0.3299999999999961,
466
+ "utime": 0.269999999999996
467
+ }
468
+ }
469
+ },
470
+ "ruby-2.7.0/macosx/Intel(R) Core(TM) i5-7360U CPU @ 2.30GHz/Apple Intel(R) Iris(TM) Plus Graphics 640": {
471
+ "ruby": {
472
+ "argmin": {
473
+ "real": 0.5259610000066459,
474
+ "stime": 0.011098000000000052,
475
+ "total": 0.5211289999999997,
476
+ "utime": 0.5100309999999997
477
+ },
478
+ "bias_add_grad": {
479
+ "real": 1.7163370000198483,
480
+ "stime": 0.024986000000000064,
481
+ "total": 1.6818499999999998,
482
+ "utime": 1.6568639999999997
483
+ },
484
+ "bias_add": {
485
+ "real": 1.863698000088334,
486
+ "stime": 0.013836999999999988,
487
+ "total": 1.8362060000000002,
488
+ "utime": 1.8223690000000001
489
+ },
490
+ "conv2d_backprop": {
491
+ "real": 2.9946660000132397,
492
+ "stime": 0.022391999999999967,
493
+ "total": 2.965606000000001,
494
+ "utime": 2.943214000000001
495
+ },
496
+ "conv2d": {
497
+ "real": 0.6393570000072941,
498
+ "stime": 0.0023859999999999992,
499
+ "total": 0.6361250000000003,
500
+ "utime": 0.6337390000000003
501
+ },
502
+ "index": {
503
+ "real": 0.002068999921903014,
504
+ "stime": 2.0000000000575113e-06,
505
+ "total": 0.002067000000000041,
506
+ "utime": 0.0020649999999999835
507
+ },
508
+ "min": {
509
+ "real": 2.6769400000339374,
510
+ "stime": 0.01394899999999999,
511
+ "total": 2.6575489999999995,
512
+ "utime": 2.6435999999999993
513
+ },
514
+ "sum": {
515
+ "real": 2.6265199999324977,
516
+ "stime": 0.020106999999999986,
517
+ "total": 2.6121140000000005,
518
+ "utime": 2.5920070000000006
519
+ },
520
+ "sum axis 1": {
521
+ "real": 2.6128099999623373,
522
+ "stime": 0.009680000000000022,
523
+ "total": 2.598789999999998,
524
+ "utime": 2.589109999999998
525
+ },
526
+ "split": {
527
+ "real": 0.01055200002156198,
528
+ "stime": 0.0005780000000000785,
529
+ "total": 0.010544999999999694,
530
+ "utime": 0.009966999999999615
531
+ },
532
+ "add_n": {
533
+ "real": 0.10583600006066263,
534
+ "stime": 0.00046999999999997044,
535
+ "total": 0.10540599999999856,
536
+ "utime": 0.10493599999999859
537
+ },
538
+ "out of order matmul": {
539
+ "real": 1.2986479999963194,
540
+ "stime": 0.003817999999999988,
541
+ "total": 1.2841700000000005,
542
+ "utime": 1.2803520000000006
543
+ },
544
+ "softmax": {
545
+ "real": 0.021060000057332218,
546
+ "stime": 8.300000000005525e-05,
547
+ "total": 0.020976000000000994,
548
+ "utime": 0.02089300000000094
549
+ },
550
+ "matmul": {
551
+ "real": 0.6281420000595972,
552
+ "stime": 0.0021889999999999965,
553
+ "total": 0.6234960000000016,
554
+ "utime": 0.6213070000000016
555
+ },
556
+ "test model": {
557
+ "real": 2.2064549999777228,
558
+ "stime": 0.007272999999999974,
559
+ "total": 2.1945329999999985,
560
+ "utime": 2.1872599999999984
561
+ },
562
+ "single function test": {
563
+ "real": 0.2703520000213757,
564
+ "stime": 0.0007090000000000707,
565
+ "total": 0.26878800000000025,
566
+ "utime": 0.2680790000000002
567
+ },
568
+ "pow (float)": {
569
+ "real": 0.07231099996715784,
570
+ "stime": 0.0002369999999999317,
571
+ "total": 0.07210500000000197,
572
+ "utime": 0.07186800000000204
573
+ },
574
+ "pow (int)": {
575
+ "real": 0.013833999983035028,
576
+ "stime": 3.6000000000036e-05,
577
+ "total": 0.013797999999999866,
578
+ "utime": 0.01376199999999983
579
+ },
580
+ "dropout": {
581
+ "real": 9.595753000001423,
582
+ "stime": 0.048594000000000026,
583
+ "total": 9.504062,
584
+ "utime": 9.455468
585
+ }
586
+ },
587
+ "opencl": {
588
+ "argmin": {
589
+ "real": 0.031660000095143914,
590
+ "stime": 0.010885000000000034,
591
+ "total": 0.023446999999999996,
592
+ "utime": 0.012561999999999962
593
+ },
594
+ "bias_add_grad": {
595
+ "real": 0.029562999960035086,
596
+ "stime": 0.010082999999999953,
597
+ "total": 0.02101999999999926,
598
+ "utime": 0.010936999999999308
599
+ },
600
+ "bias_add": {
601
+ "real": 0.17451599997002631,
602
+ "stime": 0.03910800000000003,
603
+ "total": 0.15842599999999984,
604
+ "utime": 0.11931799999999981
605
+ },
606
+ "conv2d_backprop": {
607
+ "real": 0.060010000015608966,
608
+ "stime": 0.019299999999999984,
609
+ "total": 0.04448700000000072,
610
+ "utime": 0.025187000000000737
611
+ },
612
+ "conv2d": {
613
+ "real": 0.035195000004023314,
614
+ "stime": 0.009983000000000075,
615
+ "total": 0.026590000000000558,
616
+ "utime": 0.016607000000000482
617
+ },
618
+ "index": {
619
+ "real": 0.028608000022359192,
620
+ "stime": 0.010353000000000057,
621
+ "total": 0.02094600000000013,
622
+ "utime": 0.010593000000000075
623
+ },
624
+ "min": {
625
+ "real": 0.17778700008057058,
626
+ "stime": 0.037808000000000064,
627
+ "total": 0.16168200000000077,
628
+ "utime": 0.1238740000000007
629
+ },
630
+ "sum": {
631
+ "real": 0.026511000003665686,
632
+ "stime": 0.00942299999999996,
633
+ "total": 0.01839499999999994,
634
+ "utime": 0.00897199999999998
635
+ },
636
+ "sum axis 1": {
637
+ "real": 0.026868000044487417,
638
+ "stime": 0.009302000000000032,
639
+ "total": 0.018642999999999188,
640
+ "utime": 0.009340999999999156
641
+ },
642
+ "split": {
643
+ "real": 0.10158700007013977,
644
+ "stime": 0.03593499999999994,
645
+ "total": 0.07264700000000135,
646
+ "utime": 0.03671200000000141
647
+ },
648
+ "add_n": {
649
+ "real": 0.02838599996175617,
650
+ "stime": 0.009464000000000028,
651
+ "total": 0.02101599999999826,
652
+ "utime": 0.01155199999999823
653
+ },
654
+ "out of order matmul": {
655
+ "real": 0.0278630000539124,
656
+ "stime": 0.009276000000000062,
657
+ "total": 0.019607999999998293,
658
+ "utime": 0.010331999999998231
659
+ },
660
+ "softmax": {
661
+ "real": 0.02873000002000481,
662
+ "stime": 0.009530999999999956,
663
+ "total": 0.02050600000000191,
664
+ "utime": 0.010975000000001955
665
+ },
666
+ "matmul": {
667
+ "real": 0.03137700003571808,
668
+ "stime": 0.011123000000000105,
669
+ "total": 0.023513000000000006,
670
+ "utime": 0.012389999999999901
671
+ },
672
+ "test model": {
673
+ "real": 0.20114699995610863,
674
+ "stime": 0.05457400000000001,
675
+ "total": 0.21827399999999864,
676
+ "utime": 0.16369999999999862
677
+ },
678
+ "single function test": {
679
+ "real": 0.11842899990733713,
680
+ "stime": 0.036387999999999976,
681
+ "total": 0.13410199999999983,
682
+ "utime": 0.09771399999999986
683
+ },
684
+ "pow (float)": {
685
+ "real": 0.029842999996617436,
686
+ "stime": 0.009394000000000013,
687
+ "total": 0.021471999999998936,
688
+ "utime": 0.012077999999998923
689
+ },
690
+ "pow (int)": {
691
+ "real": 0.028302000020630658,
692
+ "stime": 0.009438999999999975,
693
+ "total": 0.019182000000000254,
694
+ "utime": 0.00974300000000028
695
+ },
696
+ "dropout": {
697
+ "real": 0.3020259999902919,
698
+ "stime": 0.0603800000000001,
699
+ "total": 0.2593999999999974,
700
+ "utime": 0.1990199999999973
701
+ }
702
+ }
703
+ }
704
+ }
@@ -102,50 +102,71 @@ puts TensorStream::Evaluator.default_evaluators
102
102
 
103
103
  sess2 = tf.session
104
104
 
105
- if os == :macosx
106
- puts `sysctl -n machdep.cpu.brand_string`
107
- else
108
- puts `cat /proc/cpuinfo | grep "model name" | head -1`
109
- end
105
+ cpu = if os == :macosx
106
+ `sysctl -n machdep.cpu.brand_string`
107
+ else
108
+ `cat /proc/cpuinfo | grep "model name" | head -1`
109
+ end
110
+
110
111
  device = TensorStream::Evaluator::OpenclEvaluator.default_device.native_device
111
- puts "OpenCL device #{device.platform.to_s} #{device.name}"
112
- Benchmark.bmbm do |x|
113
- x.report("ruby argmin :") { 100.times do sess.run(argmin) end }
114
- x.report("opencl argmin :") { 100.times do sess2.run(argmin) end }
115
- x.report("ruby bias_add_grad :") { 100.times do sess.run(bias_add_grad) end }
116
- x.report("opencl bias_add_grad :") { 100.times do sess2.run(bias_add_grad) end }
117
- x.report("ruby bias_add :") { 100.times do sess.run(bias_add) end }
118
- x.report("opencl bias_add :") { 100.times do sess2.run(bias_add) end }
119
- x.report("ruby conv2d_backprop :") { 100.times do sess.run(conv2d_grad) end }
120
- x.report("opencl conv2d_backprop :") { 100.times do sess2.run(conv2d_grad) end }
121
- x.report("ruby conv2d :") { 100.times do sess.run(conv2d) end }
122
- x.report("opencl conv2d :") { 100.times do sess2.run(conv2d) end }
123
- x.report("ruby arr index :") { 100.times do sess.run(index) end }
124
- x.report("opencl arr index :") { 100.times do sess2.run(index) end }
125
- x.report("ruby min :") { 100.times do sess.run(min) end }
126
- x.report("opencl min :") { 100.times do sess2.run(min) end }
127
- x.report("ruby sum :") { 100.times do sess.run(sum) end }
128
- x.report("opencl sum :") { 100.times do sess2.run(sum) end }
129
- x.report("ruby sum axis 1 :") { 100.times do sess.run(sum_axis_1) end }
130
- x.report("opencl sum axis 1 :") { 100.times do sess2.run(sum_axis_1) end }
131
- x.report("ruby split :") { 100.times do sess.run(split) end }
132
- x.report("opencl split :") { 100.times do sess2.run(split) end }
133
- x.report("ruby add_n :") { 100.times do sess.run(add_n) end }
134
- x.report("opencl add_n :") { 100.times do sess2.run(add_n) end }
135
- x.report("ruby ooo matmul :") { 100.times do sess.run(out_of_order) end }
136
- x.report("opencl ooo matmul :") { 100.times do sess2.run(out_of_order) end }
137
- x.report("ruby softmax :") { 100.times do sess.run(softmax) end }
138
- x.report("opencl softmax :") { 100.times do sess2.run(softmax) end }
139
- x.report("ruby matmul :") { 100.times do sess.run(matmul) end }
140
- x.report("opencl matmul :") { 100.times do sess2.run(matmul) end }
141
- x.report("ruby :") { 100.times do sess.run(model, feed_dict: { p => rand, q => rand }) end }
142
- x.report("opencl :") { 100.times do sess2.run(model, feed_dict: { p => rand, q => rand }) end }
143
- x.report("ruby single function :") { 100.times do sess.run(single_function_test, feed_dict: { p => rand, q => rand }) end }
144
- x.report("opencl single function :") { 100.times do sess2.run(single_function_test, feed_dict: { p => rand, q => rand }) end }
145
- x.report("ruby pow float :") { 100.times do sess.run(pow_f, feed_dict: { p => rand, q => rand }) end }
146
- x.report("opencl pow float :") { 100.times do sess2.run(pow_f, feed_dict: { p => rand, q => rand }) end }
147
- x.report("ruby pow int :") { 100.times do sess.run(pow_i, feed_dict: { p => rand, q => rand }) end }
148
- x.report("opencl pow int :") { 100.times do sess2.run(pow_i, feed_dict: { p => rand, q => rand }) end }
149
- x.report("ruby dropout :") { 100.times do sess.run(dropout) end }
150
- x.report("opencl dropout :") { 100.times do sess2.run(dropout) end }
151
- end
112
+ cl_device = "#{device.platform.to_s} #{device.name}"
113
+
114
+ tests = {
115
+ "argmin" => argmin,
116
+ "bias_add_grad" => bias_add_grad,
117
+ "bias_add" => bias_add,
118
+ "conv2d_backprop" => conv2d_grad,
119
+ "conv2d" => conv2d,
120
+ "index" =>index,
121
+ "min" => min,
122
+ "sum" => sum,
123
+ "sum axis 1" => sum_axis_1,
124
+ "split" => split,
125
+ "add_n" => add_n,
126
+ "out of order matmul" => out_of_order,
127
+ "softmax" => softmax,
128
+ "matmul" => matmul,
129
+ "test model" => ->(sess) { sess.run(model, feed_dict: { p => rand, q => rand }) },
130
+ "single function test" => ->(sess) { sess.run(single_function_test, feed_dict: { p => rand, q => rand }) },
131
+ "pow (float)" => ->(sess) { sess.run(pow_f, feed_dict: { p => rand, q => rand }) },
132
+ "pow (int)" => ->(sess) { sess.run(pow_i, feed_dict: { p => rand, q => rand }) },
133
+ "dropout" => dropout
134
+ }
135
+
136
+ stats = {
137
+ "ruby" => {},
138
+ "opencl" => {},
139
+ }
140
+
141
+ puts "rehersal"
142
+ tests.each do |k, v|
143
+ if v.is_a?(Proc)
144
+ r = Benchmark.measure("ruby #{k}") { 10.times do v.call(sess) end }
145
+ r = Benchmark.measure("opencl #{k}") { 10.times do v.call(sess2) end }
146
+ else
147
+ r = Benchmark.measure("ruby #{k}") { 10.times do sess.run(v) end }
148
+ r = Benchmark.measure("opencl #{k}") { 10.times do sess2.run(v) end }
149
+ end
150
+ end
151
+
152
+ puts "writing benchmark"
153
+
154
+ tests.each do |k, v|
155
+ if v.is_a?(Proc)
156
+ r = Benchmark.measure(k) { 100.times do v.call(sess) end }
157
+ stats["ruby"][r.label] = { real: r.real, stime: r.stime, total: r.total, utime: r.utime }
158
+ r = Benchmark.measure(k) { 100.times do v.call(sess2) end }
159
+ stats["opencl"][r.label] = { real: r.real, stime: r.stime, total: r.total, utime: r.utime }
160
+ else
161
+ r = Benchmark.measure(k) { 100.times do sess.run(v) end }
162
+ stats["ruby"][r.label] = { real: r.real, stime: r.stime, total: r.total, utime: r.utime }
163
+ r = Benchmark.measure(k) { 100.times do sess2.run(v) end }
164
+ stats["opencl"][r.label] = { real: r.real, stime: r.stime, total: r.total, utime: r.utime }
165
+ end
166
+ end
167
+
168
+ output = {
169
+ "#{RUBY_ENGINE }-#{RUBY_VERSION}/#{os}/#{cpu.strip.gsub("model name\t: ", "")}/#{cl_device.strip}" => stats
170
+ }
171
+ current_benchmark = JSON.parse(File.read('benchmark.json'))
172
+ File.write("benchmark_#{Time.now.strftime('%Y%m%d%H%M')}.json", JSON.pretty_generate(current_benchmark.merge(output)))
@@ -622,15 +622,15 @@ module TensorStream
622
622
 
623
623
  if assign.container_buffer
624
624
  event_wait_list = build_event_wait_list([buffer, assign.container_buffer])
625
- assign.container_buffer.op = if assign.container_buffer.cl_buffer != buffer.cl_buffer
626
- _opencl_queue.enqueue_copy_buffer(buffer.cl_buffer, assign.container_buffer.cl_buffer, event_wait_list: event_wait_list)
627
- else
628
- buffer.op
629
- end
630
625
  else
631
- value = read_final_result(buffer)
632
- assign.options[:container].buffer = convert_to_opencl(value, buffer.shape, data_type: tensor.data_type, name: assign.name)
633
- assign.options[:container].value = value
626
+ var_buffer = _create_result_buffer(buffer.data_type, buffer.shape, tensor.name)
627
+ assign.options[:container].buffer = var_buffer
628
+ end
629
+
630
+ assign.container_buffer.op = if assign.container_buffer.cl_buffer != buffer.cl_buffer
631
+ _opencl_queue.enqueue_copy_buffer(buffer.cl_buffer, assign.container_buffer.cl_buffer, event_wait_list: event_wait_list)
632
+ else
633
+ buffer.op
634
634
  end
635
635
 
636
636
  assign.container_buffer.dirty = true
@@ -797,7 +797,7 @@ module TensorStream
797
797
 
798
798
  return nil if buffer.nil?
799
799
 
800
- cl_buffer = unless value.flatten.empty?
800
+ cl_buffer = unless array_fast_empty?(value)
801
801
  cl_buffer_size = 1 if cl_buffer_size.zero?
802
802
  _opencl_context.create_buffer(cl_buffer_size * buffer.element_size)
803
803
  end
@@ -810,8 +810,8 @@ module TensorStream
810
810
  cl_object.buffer[index] = c
811
811
  end
812
812
  elsif value.is_a?(Array)
813
- value.flatten.each_with_index do |element, index|
814
- cl_object.buffer[index] = if element.is_a?(Tensor)
813
+ cast_value = value.flatten.each_with_index.map do |element, index|
814
+ if element.is_a?(Tensor)
815
815
  read_final_result(complete_eval(element, {}))
816
816
  elsif data_type == :boolean
817
817
  element ? 1 : 0
@@ -819,6 +819,10 @@ module TensorStream
819
819
  Tensor.cast_dtype(element, data_type)
820
820
  end
821
821
  end
822
+
823
+ cast_value.each_with_index do |v, index|
824
+ cl_object.buffer[index] = v
825
+ end
822
826
  elsif value.is_a?(NArray)
823
827
  cl_object.buffer = value
824
828
  elsif data_type == :boolean
@@ -998,6 +1002,23 @@ module TensorStream
998
1002
 
999
1003
  arr != 0
1000
1004
  end
1005
+
1006
+ ##
1007
+ # Fast way to determine if array is "empty" by including nested elements
1008
+ def array_fast_empty?(arr)
1009
+ return true if arr.size.zero?
1010
+
1011
+ arr.each do |a|
1012
+ if a.is_a?(Array)
1013
+ return false if !array_fast_empty?(a)
1014
+
1015
+ next
1016
+ end
1017
+ return false
1018
+ end
1019
+
1020
+ true
1021
+ end
1001
1022
  end
1002
1023
  end
1003
1024
  end
@@ -1,5 +1,5 @@
1
1
  module TensorStream
2
2
  module Opencl
3
- VERSION = "0.3.1"
3
+ VERSION = "0.3.2"
4
4
  end
5
5
  end
@@ -32,7 +32,7 @@ Gem::Specification.new do |spec|
32
32
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
33
33
  spec.require_paths = ["lib"]
34
34
 
35
- spec.add_development_dependency "bundler", "~> 1.16"
35
+ spec.add_development_dependency "bundler"
36
36
  spec.add_development_dependency "rake", "~> 10.0"
37
37
  spec.add_development_dependency "rspec", "~> 3.0"
38
38
  spec.add_development_dependency "pry-byebug"
metadata CHANGED
@@ -1,29 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tensor_stream-opencl
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 0.3.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Joseph Dayo
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-04-09 00:00:00.000000000 Z
11
+ date: 2019-06-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - "~>"
17
+ - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: '1.16'
19
+ version: '0'
20
20
  type: :development
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - "~>"
24
+ - - ">="
25
25
  - !ruby/object:Gem::Version
26
- version: '1.16'
26
+ version: '0'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: rake
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -166,6 +166,7 @@ files:
166
166
  - LICENSE.txt
167
167
  - README.md
168
168
  - Rakefile
169
+ - benchmark.json
169
170
  - benchmark/benchmark.rb
170
171
  - benchmark_imac2015_iris.txt
171
172
  - benchmark_intel.txt