tensor_stream-opencl 0.3.1 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b433e9e7ab38a517c21b57065e5a43b112640fd7c419fb7baa2f3319128cdacf
4
- data.tar.gz: fab7d48513cb0f8481e151d18b088782918cb1539b59586613a00c4d5f5aeed2
3
+ metadata.gz: d2a9a1d1add56659374f7246f35cd956eba86c9416a1e012abc44fe9bdb857ab
4
+ data.tar.gz: 6acf877d4b85f93facc750d9541221b0e2a8775d608e7160654c2421411d7a3b
5
5
  SHA512:
6
- metadata.gz: 04d106f5ee5fac49eba20ff143bb2212a1cafd5140fc04cee20958ffea0c5909d352824948badf16ec5bc8ca2a7b13b4dcf7748eb03cbd6dc8a466c6ae0f5040
7
- data.tar.gz: e17171f28641ce3496c0b338b6913c96e10d9fd5ce93b7980dae6edef00e63e5f7c4dcb60ed04fed5271a474b4940d069ebcf6a00bbfd3c4e6eafa2c0c4f26ed
6
+ metadata.gz: 7c9d3d3391d2bc228f5daed33a83fe750ca61bde20b71ce07fe2a73df32661fb910d1bcac5cbcb6a025e04808ac90174a0040dd13f23bc40b134a3196cef50ee
7
+ data.tar.gz: 6aabe002d353b02647810c8aea9e521ad1bb4029ccfd2c7cfd78f8609f6826c4260ef5f3ff02170d3a2f55f588f68aad8a1613efac8b586f0751c72e492287f0
@@ -0,0 +1,704 @@
1
+ {
2
+ "ruby-2.5.5/windows/AMD Ryzen 3 1300X Quad-Core Processor/NVIDIA CUDA GeForce GTX 1060 6GB": {
3
+ "ruby": {
4
+ "argmin": {
5
+ "real": 0.5717836269999452,
6
+ "stime": 0.0,
7
+ "total": 0.5780000000000003,
8
+ "utime": 0.5780000000000003
9
+ },
10
+ "bias_add_grad": {
11
+ "real": 2.280308563999938,
12
+ "stime": 0.016000000000000014,
13
+ "total": 2.234,
14
+ "utime": 2.218
15
+ },
16
+ "bias_add": {
17
+ "real": 2.081161492000092,
18
+ "stime": 0.0,
19
+ "total": 2.0470000000000006,
20
+ "utime": 2.0470000000000006
21
+ },
22
+ "conv2d_backprop": {
23
+ "real": 3.9330320810000785,
24
+ "stime": 0.015000000000000124,
25
+ "total": 3.9210000000000007,
26
+ "utime": 3.9060000000000006
27
+ },
28
+ "conv2d": {
29
+ "real": 0.8119420420000552,
30
+ "stime": 0.0,
31
+ "total": 0.8119999999999994,
32
+ "utime": 0.8119999999999994
33
+ },
34
+ "index": {
35
+ "real": 0.005713835000051404,
36
+ "stime": 0.0,
37
+ "total": 0.0,
38
+ "utime": 0.0
39
+ },
40
+ "min": {
41
+ "real": 3.6236803629999486,
42
+ "stime": 0.014999999999999902,
43
+ "total": 3.5300000000000002,
44
+ "utime": 3.5150000000000006
45
+ },
46
+ "sum": {
47
+ "real": 3.5159757579999678,
48
+ "stime": 0.0,
49
+ "total": 3.1709999999999994,
50
+ "utime": 3.1709999999999994
51
+ },
52
+ "sum axis 1": {
53
+ "real": 3.204440863000059,
54
+ "stime": 0.0,
55
+ "total": 3.0790000000000006,
56
+ "utime": 3.0790000000000006
57
+ },
58
+ "split": {
59
+ "real": 0.020213428000033673,
60
+ "stime": 0.0,
61
+ "total": 0.030999999999998806,
62
+ "utime": 0.030999999999998806
63
+ },
64
+ "add_n": {
65
+ "real": 0.15371632300002602,
66
+ "stime": 0.0,
67
+ "total": 0.1559999999999988,
68
+ "utime": 0.1559999999999988
69
+ },
70
+ "out of order matmul": {
71
+ "real": 1.598296256000026,
72
+ "stime": 0.0,
73
+ "total": 1.5629999999999988,
74
+ "utime": 1.5629999999999988
75
+ },
76
+ "softmax": {
77
+ "real": 0.03161882200004129,
78
+ "stime": 0.0,
79
+ "total": 0.01600000000000179,
80
+ "utime": 0.01600000000000179
81
+ },
82
+ "matmul": {
83
+ "real": 0.8320086150000634,
84
+ "stime": 0.0,
85
+ "total": 0.75,
86
+ "utime": 0.75
87
+ },
88
+ "test model": {
89
+ "real": 2.7918018939999456,
90
+ "stime": 0.0,
91
+ "total": 2.7029999999999994,
92
+ "utime": 2.7029999999999994
93
+ },
94
+ "single function test": {
95
+ "real": 0.41654277299994646,
96
+ "stime": 0.0,
97
+ "total": 0.375,
98
+ "utime": 0.375
99
+ },
100
+ "pow (float)": {
101
+ "real": 0.10249757899998713,
102
+ "stime": 0.0,
103
+ "total": 0.06299999999999883,
104
+ "utime": 0.06299999999999883
105
+ },
106
+ "pow (int)": {
107
+ "real": 0.030574132999959147,
108
+ "stime": 0.0,
109
+ "total": 0.03200000000000003,
110
+ "utime": 0.03200000000000003
111
+ },
112
+ "dropout": {
113
+ "real": 13.77498282299996,
114
+ "stime": 0.062000000000000055,
115
+ "total": 12.719,
116
+ "utime": 12.657
117
+ }
118
+ },
119
+ "opencl": {
120
+ "argmin": {
121
+ "real": 0.015615803000059714,
122
+ "stime": 0.0,
123
+ "total": 0.016000000000000014,
124
+ "utime": 0.016000000000000014
125
+ },
126
+ "bias_add_grad": {
127
+ "real": 0.013771769999948447,
128
+ "stime": 0.014999999999999902,
129
+ "total": 0.014999999999999902,
130
+ "utime": 0.0
131
+ },
132
+ "bias_add": {
133
+ "real": 0.1256369549998908,
134
+ "stime": 0.016000000000000014,
135
+ "total": 0.12599999999999945,
136
+ "utime": 0.10999999999999943
137
+ },
138
+ "conv2d_backprop": {
139
+ "real": 0.04058953899993867,
140
+ "stime": 0.03200000000000003,
141
+ "total": 0.03200000000000003,
142
+ "utime": 0.0
143
+ },
144
+ "conv2d": {
145
+ "real": 0.02783402499994736,
146
+ "stime": 0.0,
147
+ "total": 0.03200000000000003,
148
+ "utime": 0.03200000000000003
149
+ },
150
+ "index": {
151
+ "real": 0.015506175999917104,
152
+ "stime": 0.0,
153
+ "total": 0.0,
154
+ "utime": 0.0
155
+ },
156
+ "min": {
157
+ "real": 0.19373339399999168,
158
+ "stime": 0.03200000000000003,
159
+ "total": 0.14199999999999946,
160
+ "utime": 0.10999999999999943
161
+ },
162
+ "sum": {
163
+ "real": 0.01319783600001756,
164
+ "stime": 0.0,
165
+ "total": 0.0,
166
+ "utime": 0.0
167
+ },
168
+ "sum axis 1": {
169
+ "real": 0.01374040599989712,
170
+ "stime": 0.0,
171
+ "total": 0.0,
172
+ "utime": 0.0
173
+ },
174
+ "split": {
175
+ "real": 0.05617720299994744,
176
+ "stime": 0.014999999999999902,
177
+ "total": 0.04600000000000226,
178
+ "utime": 0.03100000000000236
179
+ },
180
+ "add_n": {
181
+ "real": 0.015245883000034155,
182
+ "stime": 0.016000000000000014,
183
+ "total": 0.016000000000000014,
184
+ "utime": 0.0
185
+ },
186
+ "out of order matmul": {
187
+ "real": 0.018022044999952413,
188
+ "stime": 0.015000000000000124,
189
+ "total": 0.030000000000000693,
190
+ "utime": 0.015000000000000568
191
+ },
192
+ "softmax": {
193
+ "real": 0.017924141999969834,
194
+ "stime": 0.016000000000000014,
195
+ "total": 0.016000000000000014,
196
+ "utime": 0.0
197
+ },
198
+ "matmul": {
199
+ "real": 0.014557924000087041,
200
+ "stime": 0.016000000000000014,
201
+ "total": 0.016000000000000014,
202
+ "utime": 0.0
203
+ },
204
+ "test model": {
205
+ "real": 0.2164292770000884,
206
+ "stime": 0.014999999999999902,
207
+ "total": 0.2179999999999993,
208
+ "utime": 0.2029999999999994
209
+ },
210
+ "single function test": {
211
+ "real": 0.13157883999997466,
212
+ "stime": 0.016000000000000014,
213
+ "total": 0.14100000000000001,
214
+ "utime": 0.125
215
+ },
216
+ "pow (float)": {
217
+ "real": 0.025822618000006514,
218
+ "stime": 0.0,
219
+ "total": 0.015000000000000568,
220
+ "utime": 0.015000000000000568
221
+ },
222
+ "pow (int)": {
223
+ "real": 0.02038636999998289,
224
+ "stime": 0.0,
225
+ "total": 0.015000000000000568,
226
+ "utime": 0.015000000000000568
227
+ },
228
+ "dropout": {
229
+ "real": 0.286617729999989,
230
+ "stime": 0.0,
231
+ "total": 0.21800000000000352,
232
+ "utime": 0.21800000000000352
233
+ }
234
+ }
235
+ },
236
+ "ruby-2.4.0/macosx/Intel(R) Core(TM) i5-5575R CPU @ 2.80GHz/Apple Intel(R) Iris(TM) Pro Graphics 6200": {
237
+ "ruby": {
238
+ "argmin": {
239
+ "real": 0.8880120001267642,
240
+ "stime": 0.0,
241
+ "total": 0.8800000000000008,
242
+ "utime": 0.8800000000000008
243
+ },
244
+ "bias_add_grad": {
245
+ "real": 2.3806210001930594,
246
+ "stime": 0.040000000000000036,
247
+ "total": 2.369999999999999,
248
+ "utime": 2.329999999999999
249
+ },
250
+ "bias_add": {
251
+ "real": 2.6210350000765175,
252
+ "stime": 0.09000000000000008,
253
+ "total": 2.5999999999999996,
254
+ "utime": 2.51
255
+ },
256
+ "conv2d_backprop": {
257
+ "real": 4.102318000048399,
258
+ "stime": 0.010000000000000009,
259
+ "total": 4.07,
260
+ "utime": 4.0600000000000005
261
+ },
262
+ "conv2d": {
263
+ "real": 0.9380730001721531,
264
+ "stime": 0.0,
265
+ "total": 0.9400000000000013,
266
+ "utime": 0.9400000000000013
267
+ },
268
+ "index": {
269
+ "real": 0.004769999999552965,
270
+ "stime": 0.0,
271
+ "total": 0.010000000000001563,
272
+ "utime": 0.010000000000001563
273
+ },
274
+ "min": {
275
+ "real": 3.7127469999250025,
276
+ "stime": 0.010000000000000009,
277
+ "total": 3.619999999999999,
278
+ "utime": 3.6099999999999994
279
+ },
280
+ "sum": {
281
+ "real": 7.684902000008151,
282
+ "stime": 0.04999999999999993,
283
+ "total": 7.589999999999999,
284
+ "utime": 7.539999999999999
285
+ },
286
+ "sum axis 1": {
287
+ "real": 7.972897999919951,
288
+ "stime": 0.06000000000000005,
289
+ "total": 7.679999999999998,
290
+ "utime": 7.619999999999997
291
+ },
292
+ "split": {
293
+ "real": 0.02342700003646314,
294
+ "stime": 0.0,
295
+ "total": 0.030000000000001137,
296
+ "utime": 0.030000000000001137
297
+ },
298
+ "add_n": {
299
+ "real": 0.16519299987703562,
300
+ "stime": 0.0,
301
+ "total": 0.1599999999999966,
302
+ "utime": 0.1599999999999966
303
+ },
304
+ "out of order matmul": {
305
+ "real": 1.6924950000829995,
306
+ "stime": 0.010000000000000009,
307
+ "total": 1.6299999999999975,
308
+ "utime": 1.6199999999999974
309
+ },
310
+ "softmax": {
311
+ "real": 0.03491799999028444,
312
+ "stime": 0.0,
313
+ "total": 0.03999999999999915,
314
+ "utime": 0.03999999999999915
315
+ },
316
+ "matmul": {
317
+ "real": 0.7962330000009388,
318
+ "stime": 0.0,
319
+ "total": 0.7899999999999991,
320
+ "utime": 0.7899999999999991
321
+ },
322
+ "test model": {
323
+ "real": 2.7632220000959933,
324
+ "stime": 0.010000000000000009,
325
+ "total": 2.7499999999999947,
326
+ "utime": 2.739999999999995
327
+ },
328
+ "single function test": {
329
+ "real": 0.37590500013902783,
330
+ "stime": 0.0,
331
+ "total": 0.38000000000000256,
332
+ "utime": 0.38000000000000256
333
+ },
334
+ "pow (float)": {
335
+ "real": 0.08679500012658536,
336
+ "stime": 0.0,
337
+ "total": 0.09000000000000341,
338
+ "utime": 0.09000000000000341
339
+ },
340
+ "pow (int)": {
341
+ "real": 0.023215000052005053,
342
+ "stime": 0.0,
343
+ "total": 0.030000000000001137,
344
+ "utime": 0.030000000000001137
345
+ },
346
+ "dropout": {
347
+ "real": 13.250881999963894,
348
+ "stime": 0.050000000000000044,
349
+ "total": 12.810000000000006,
350
+ "utime": 12.760000000000005
351
+ }
352
+ },
353
+ "opencl": {
354
+ "argmin": {
355
+ "real": 0.024038000032305717,
356
+ "stime": 0.020000000000000018,
357
+ "total": 0.03999999999999959,
358
+ "utime": 0.019999999999999574
359
+ },
360
+ "bias_add_grad": {
361
+ "real": 0.027796000009402633,
362
+ "stime": 0.009999999999999898,
363
+ "total": 0.02000000000000146,
364
+ "utime": 0.010000000000001563
365
+ },
366
+ "bias_add": {
367
+ "real": 0.1939310000743717,
368
+ "stime": 0.029999999999999916,
369
+ "total": 0.19000000000000006,
370
+ "utime": 0.16000000000000014
371
+ },
372
+ "conv2d_backprop": {
373
+ "real": 0.07214100006967783,
374
+ "stime": 0.030000000000000027,
375
+ "total": 0.05999999999999761,
376
+ "utime": 0.029999999999997584
377
+ },
378
+ "conv2d": {
379
+ "real": 0.03793899994343519,
380
+ "stime": 0.010000000000000009,
381
+ "total": 0.029999999999999583,
382
+ "utime": 0.019999999999999574
383
+ },
384
+ "index": {
385
+ "real": 0.021783999865874648,
386
+ "stime": 0.010000000000000009,
387
+ "total": 0.01999999999999802,
388
+ "utime": 0.00999999999999801
389
+ },
390
+ "min": {
391
+ "real": 0.16943500004708767,
392
+ "stime": 0.020000000000000018,
393
+ "total": 0.17000000000000215,
394
+ "utime": 0.15000000000000213
395
+ },
396
+ "sum": {
397
+ "real": 0.029592999955639243,
398
+ "stime": 0.010000000000000009,
399
+ "total": 0.020000000000001572,
400
+ "utime": 0.010000000000001563
401
+ },
402
+ "sum axis 1": {
403
+ "real": 0.023158999858424067,
404
+ "stime": 0.010000000000000009,
405
+ "total": 0.01999999999999802,
406
+ "utime": 0.00999999999999801
407
+ },
408
+ "split": {
409
+ "real": 0.09163099993020296,
410
+ "stime": 0.04999999999999993,
411
+ "total": 0.1000000000000042,
412
+ "utime": 0.05000000000000426
413
+ },
414
+ "add_n": {
415
+ "real": 0.032472999999299645,
416
+ "stime": 0.010000000000000009,
417
+ "total": 0.030000000000003135,
418
+ "utime": 0.020000000000003126
419
+ },
420
+ "out of order matmul": {
421
+ "real": 0.028477999847382307,
422
+ "stime": 0.020000000000000018,
423
+ "total": 0.029999999999998028,
424
+ "utime": 0.00999999999999801
425
+ },
426
+ "softmax": {
427
+ "real": 0.03417199989780784,
428
+ "stime": 0.010000000000000009,
429
+ "total": 0.030000000000003135,
430
+ "utime": 0.020000000000003126
431
+ },
432
+ "matmul": {
433
+ "real": 0.02589399996213615,
434
+ "stime": 0.010000000000000009,
435
+ "total": 0.030000000000003135,
436
+ "utime": 0.020000000000003126
437
+ },
438
+ "test model": {
439
+ "real": 0.2408190001733601,
440
+ "stime": 0.06000000000000005,
441
+ "total": 0.29000000000000403,
442
+ "utime": 0.23000000000000398
443
+ },
444
+ "single function test": {
445
+ "real": 0.15275100013241172,
446
+ "stime": 0.04999999999999982,
447
+ "total": 0.18999999999999329,
448
+ "utime": 0.13999999999999346
449
+ },
450
+ "pow (float)": {
451
+ "real": 0.029078999999910593,
452
+ "stime": 0.020000000000000018,
453
+ "total": 0.029999999999998028,
454
+ "utime": 0.00999999999999801
455
+ },
456
+ "pow (int)": {
457
+ "real": 0.029101000167429447,
458
+ "stime": 0.010000000000000009,
459
+ "total": 0.01999999999999802,
460
+ "utime": 0.00999999999999801
461
+ },
462
+ "dropout": {
463
+ "real": 0.35077799996361136,
464
+ "stime": 0.06000000000000005,
465
+ "total": 0.3299999999999961,
466
+ "utime": 0.269999999999996
467
+ }
468
+ }
469
+ },
470
+ "ruby-2.7.0/macosx/Intel(R) Core(TM) i5-7360U CPU @ 2.30GHz/Apple Intel(R) Iris(TM) Plus Graphics 640": {
471
+ "ruby": {
472
+ "argmin": {
473
+ "real": 0.5259610000066459,
474
+ "stime": 0.011098000000000052,
475
+ "total": 0.5211289999999997,
476
+ "utime": 0.5100309999999997
477
+ },
478
+ "bias_add_grad": {
479
+ "real": 1.7163370000198483,
480
+ "stime": 0.024986000000000064,
481
+ "total": 1.6818499999999998,
482
+ "utime": 1.6568639999999997
483
+ },
484
+ "bias_add": {
485
+ "real": 1.863698000088334,
486
+ "stime": 0.013836999999999988,
487
+ "total": 1.8362060000000002,
488
+ "utime": 1.8223690000000001
489
+ },
490
+ "conv2d_backprop": {
491
+ "real": 2.9946660000132397,
492
+ "stime": 0.022391999999999967,
493
+ "total": 2.965606000000001,
494
+ "utime": 2.943214000000001
495
+ },
496
+ "conv2d": {
497
+ "real": 0.6393570000072941,
498
+ "stime": 0.0023859999999999992,
499
+ "total": 0.6361250000000003,
500
+ "utime": 0.6337390000000003
501
+ },
502
+ "index": {
503
+ "real": 0.002068999921903014,
504
+ "stime": 2.0000000000575113e-06,
505
+ "total": 0.002067000000000041,
506
+ "utime": 0.0020649999999999835
507
+ },
508
+ "min": {
509
+ "real": 2.6769400000339374,
510
+ "stime": 0.01394899999999999,
511
+ "total": 2.6575489999999995,
512
+ "utime": 2.6435999999999993
513
+ },
514
+ "sum": {
515
+ "real": 2.6265199999324977,
516
+ "stime": 0.020106999999999986,
517
+ "total": 2.6121140000000005,
518
+ "utime": 2.5920070000000006
519
+ },
520
+ "sum axis 1": {
521
+ "real": 2.6128099999623373,
522
+ "stime": 0.009680000000000022,
523
+ "total": 2.598789999999998,
524
+ "utime": 2.589109999999998
525
+ },
526
+ "split": {
527
+ "real": 0.01055200002156198,
528
+ "stime": 0.0005780000000000785,
529
+ "total": 0.010544999999999694,
530
+ "utime": 0.009966999999999615
531
+ },
532
+ "add_n": {
533
+ "real": 0.10583600006066263,
534
+ "stime": 0.00046999999999997044,
535
+ "total": 0.10540599999999856,
536
+ "utime": 0.10493599999999859
537
+ },
538
+ "out of order matmul": {
539
+ "real": 1.2986479999963194,
540
+ "stime": 0.003817999999999988,
541
+ "total": 1.2841700000000005,
542
+ "utime": 1.2803520000000006
543
+ },
544
+ "softmax": {
545
+ "real": 0.021060000057332218,
546
+ "stime": 8.300000000005525e-05,
547
+ "total": 0.020976000000000994,
548
+ "utime": 0.02089300000000094
549
+ },
550
+ "matmul": {
551
+ "real": 0.6281420000595972,
552
+ "stime": 0.0021889999999999965,
553
+ "total": 0.6234960000000016,
554
+ "utime": 0.6213070000000016
555
+ },
556
+ "test model": {
557
+ "real": 2.2064549999777228,
558
+ "stime": 0.007272999999999974,
559
+ "total": 2.1945329999999985,
560
+ "utime": 2.1872599999999984
561
+ },
562
+ "single function test": {
563
+ "real": 0.2703520000213757,
564
+ "stime": 0.0007090000000000707,
565
+ "total": 0.26878800000000025,
566
+ "utime": 0.2680790000000002
567
+ },
568
+ "pow (float)": {
569
+ "real": 0.07231099996715784,
570
+ "stime": 0.0002369999999999317,
571
+ "total": 0.07210500000000197,
572
+ "utime": 0.07186800000000204
573
+ },
574
+ "pow (int)": {
575
+ "real": 0.013833999983035028,
576
+ "stime": 3.6000000000036e-05,
577
+ "total": 0.013797999999999866,
578
+ "utime": 0.01376199999999983
579
+ },
580
+ "dropout": {
581
+ "real": 9.595753000001423,
582
+ "stime": 0.048594000000000026,
583
+ "total": 9.504062,
584
+ "utime": 9.455468
585
+ }
586
+ },
587
+ "opencl": {
588
+ "argmin": {
589
+ "real": 0.031660000095143914,
590
+ "stime": 0.010885000000000034,
591
+ "total": 0.023446999999999996,
592
+ "utime": 0.012561999999999962
593
+ },
594
+ "bias_add_grad": {
595
+ "real": 0.029562999960035086,
596
+ "stime": 0.010082999999999953,
597
+ "total": 0.02101999999999926,
598
+ "utime": 0.010936999999999308
599
+ },
600
+ "bias_add": {
601
+ "real": 0.17451599997002631,
602
+ "stime": 0.03910800000000003,
603
+ "total": 0.15842599999999984,
604
+ "utime": 0.11931799999999981
605
+ },
606
+ "conv2d_backprop": {
607
+ "real": 0.060010000015608966,
608
+ "stime": 0.019299999999999984,
609
+ "total": 0.04448700000000072,
610
+ "utime": 0.025187000000000737
611
+ },
612
+ "conv2d": {
613
+ "real": 0.035195000004023314,
614
+ "stime": 0.009983000000000075,
615
+ "total": 0.026590000000000558,
616
+ "utime": 0.016607000000000482
617
+ },
618
+ "index": {
619
+ "real": 0.028608000022359192,
620
+ "stime": 0.010353000000000057,
621
+ "total": 0.02094600000000013,
622
+ "utime": 0.010593000000000075
623
+ },
624
+ "min": {
625
+ "real": 0.17778700008057058,
626
+ "stime": 0.037808000000000064,
627
+ "total": 0.16168200000000077,
628
+ "utime": 0.1238740000000007
629
+ },
630
+ "sum": {
631
+ "real": 0.026511000003665686,
632
+ "stime": 0.00942299999999996,
633
+ "total": 0.01839499999999994,
634
+ "utime": 0.00897199999999998
635
+ },
636
+ "sum axis 1": {
637
+ "real": 0.026868000044487417,
638
+ "stime": 0.009302000000000032,
639
+ "total": 0.018642999999999188,
640
+ "utime": 0.009340999999999156
641
+ },
642
+ "split": {
643
+ "real": 0.10158700007013977,
644
+ "stime": 0.03593499999999994,
645
+ "total": 0.07264700000000135,
646
+ "utime": 0.03671200000000141
647
+ },
648
+ "add_n": {
649
+ "real": 0.02838599996175617,
650
+ "stime": 0.009464000000000028,
651
+ "total": 0.02101599999999826,
652
+ "utime": 0.01155199999999823
653
+ },
654
+ "out of order matmul": {
655
+ "real": 0.0278630000539124,
656
+ "stime": 0.009276000000000062,
657
+ "total": 0.019607999999998293,
658
+ "utime": 0.010331999999998231
659
+ },
660
+ "softmax": {
661
+ "real": 0.02873000002000481,
662
+ "stime": 0.009530999999999956,
663
+ "total": 0.02050600000000191,
664
+ "utime": 0.010975000000001955
665
+ },
666
+ "matmul": {
667
+ "real": 0.03137700003571808,
668
+ "stime": 0.011123000000000105,
669
+ "total": 0.023513000000000006,
670
+ "utime": 0.012389999999999901
671
+ },
672
+ "test model": {
673
+ "real": 0.20114699995610863,
674
+ "stime": 0.05457400000000001,
675
+ "total": 0.21827399999999864,
676
+ "utime": 0.16369999999999862
677
+ },
678
+ "single function test": {
679
+ "real": 0.11842899990733713,
680
+ "stime": 0.036387999999999976,
681
+ "total": 0.13410199999999983,
682
+ "utime": 0.09771399999999986
683
+ },
684
+ "pow (float)": {
685
+ "real": 0.029842999996617436,
686
+ "stime": 0.009394000000000013,
687
+ "total": 0.021471999999998936,
688
+ "utime": 0.012077999999998923
689
+ },
690
+ "pow (int)": {
691
+ "real": 0.028302000020630658,
692
+ "stime": 0.009438999999999975,
693
+ "total": 0.019182000000000254,
694
+ "utime": 0.00974300000000028
695
+ },
696
+ "dropout": {
697
+ "real": 0.3020259999902919,
698
+ "stime": 0.0603800000000001,
699
+ "total": 0.2593999999999974,
700
+ "utime": 0.1990199999999973
701
+ }
702
+ }
703
+ }
704
+ }
@@ -102,50 +102,71 @@ puts TensorStream::Evaluator.default_evaluators
102
102
 
103
103
  sess2 = tf.session
104
104
 
105
- if os == :macosx
106
- puts `sysctl -n machdep.cpu.brand_string`
107
- else
108
- puts `cat /proc/cpuinfo | grep "model name" | head -1`
109
- end
105
+ cpu = if os == :macosx
106
+ `sysctl -n machdep.cpu.brand_string`
107
+ else
108
+ `cat /proc/cpuinfo | grep "model name" | head -1`
109
+ end
110
+
110
111
  device = TensorStream::Evaluator::OpenclEvaluator.default_device.native_device
111
- puts "OpenCL device #{device.platform.to_s} #{device.name}"
112
- Benchmark.bmbm do |x|
113
- x.report("ruby argmin :") { 100.times do sess.run(argmin) end }
114
- x.report("opencl argmin :") { 100.times do sess2.run(argmin) end }
115
- x.report("ruby bias_add_grad :") { 100.times do sess.run(bias_add_grad) end }
116
- x.report("opencl bias_add_grad :") { 100.times do sess2.run(bias_add_grad) end }
117
- x.report("ruby bias_add :") { 100.times do sess.run(bias_add) end }
118
- x.report("opencl bias_add :") { 100.times do sess2.run(bias_add) end }
119
- x.report("ruby conv2d_backprop :") { 100.times do sess.run(conv2d_grad) end }
120
- x.report("opencl conv2d_backprop :") { 100.times do sess2.run(conv2d_grad) end }
121
- x.report("ruby conv2d :") { 100.times do sess.run(conv2d) end }
122
- x.report("opencl conv2d :") { 100.times do sess2.run(conv2d) end }
123
- x.report("ruby arr index :") { 100.times do sess.run(index) end }
124
- x.report("opencl arr index :") { 100.times do sess2.run(index) end }
125
- x.report("ruby min :") { 100.times do sess.run(min) end }
126
- x.report("opencl min :") { 100.times do sess2.run(min) end }
127
- x.report("ruby sum :") { 100.times do sess.run(sum) end }
128
- x.report("opencl sum :") { 100.times do sess2.run(sum) end }
129
- x.report("ruby sum axis 1 :") { 100.times do sess.run(sum_axis_1) end }
130
- x.report("opencl sum axis 1 :") { 100.times do sess2.run(sum_axis_1) end }
131
- x.report("ruby split :") { 100.times do sess.run(split) end }
132
- x.report("opencl split :") { 100.times do sess2.run(split) end }
133
- x.report("ruby add_n :") { 100.times do sess.run(add_n) end }
134
- x.report("opencl add_n :") { 100.times do sess2.run(add_n) end }
135
- x.report("ruby ooo matmul :") { 100.times do sess.run(out_of_order) end }
136
- x.report("opencl ooo matmul :") { 100.times do sess2.run(out_of_order) end }
137
- x.report("ruby softmax :") { 100.times do sess.run(softmax) end }
138
- x.report("opencl softmax :") { 100.times do sess2.run(softmax) end }
139
- x.report("ruby matmul :") { 100.times do sess.run(matmul) end }
140
- x.report("opencl matmul :") { 100.times do sess2.run(matmul) end }
141
- x.report("ruby :") { 100.times do sess.run(model, feed_dict: { p => rand, q => rand }) end }
142
- x.report("opencl :") { 100.times do sess2.run(model, feed_dict: { p => rand, q => rand }) end }
143
- x.report("ruby single function :") { 100.times do sess.run(single_function_test, feed_dict: { p => rand, q => rand }) end }
144
- x.report("opencl single function :") { 100.times do sess2.run(single_function_test, feed_dict: { p => rand, q => rand }) end }
145
- x.report("ruby pow float :") { 100.times do sess.run(pow_f, feed_dict: { p => rand, q => rand }) end }
146
- x.report("opencl pow float :") { 100.times do sess2.run(pow_f, feed_dict: { p => rand, q => rand }) end }
147
- x.report("ruby pow int :") { 100.times do sess.run(pow_i, feed_dict: { p => rand, q => rand }) end }
148
- x.report("opencl pow int :") { 100.times do sess2.run(pow_i, feed_dict: { p => rand, q => rand }) end }
149
- x.report("ruby dropout :") { 100.times do sess.run(dropout) end }
150
- x.report("opencl dropout :") { 100.times do sess2.run(dropout) end }
151
- end
112
+ cl_device = "#{device.platform.to_s} #{device.name}"
113
+
114
+ tests = {
115
+ "argmin" => argmin,
116
+ "bias_add_grad" => bias_add_grad,
117
+ "bias_add" => bias_add,
118
+ "conv2d_backprop" => conv2d_grad,
119
+ "conv2d" => conv2d,
120
+ "index" =>index,
121
+ "min" => min,
122
+ "sum" => sum,
123
+ "sum axis 1" => sum_axis_1,
124
+ "split" => split,
125
+ "add_n" => add_n,
126
+ "out of order matmul" => out_of_order,
127
+ "softmax" => softmax,
128
+ "matmul" => matmul,
129
+ "test model" => ->(sess) { sess.run(model, feed_dict: { p => rand, q => rand }) },
130
+ "single function test" => ->(sess) { sess.run(single_function_test, feed_dict: { p => rand, q => rand }) },
131
+ "pow (float)" => ->(sess) { sess.run(pow_f, feed_dict: { p => rand, q => rand }) },
132
+ "pow (int)" => ->(sess) { sess.run(pow_i, feed_dict: { p => rand, q => rand }) },
133
+ "dropout" => dropout
134
+ }
135
+
136
+ stats = {
137
+ "ruby" => {},
138
+ "opencl" => {},
139
+ }
140
+
141
+ puts "rehersal"
142
+ tests.each do |k, v|
143
+ if v.is_a?(Proc)
144
+ r = Benchmark.measure("ruby #{k}") { 10.times do v.call(sess) end }
145
+ r = Benchmark.measure("opencl #{k}") { 10.times do v.call(sess2) end }
146
+ else
147
+ r = Benchmark.measure("ruby #{k}") { 10.times do sess.run(v) end }
148
+ r = Benchmark.measure("opencl #{k}") { 10.times do sess2.run(v) end }
149
+ end
150
+ end
151
+
152
+ puts "writing benchmark"
153
+
154
+ tests.each do |k, v|
155
+ if v.is_a?(Proc)
156
+ r = Benchmark.measure(k) { 100.times do v.call(sess) end }
157
+ stats["ruby"][r.label] = { real: r.real, stime: r.stime, total: r.total, utime: r.utime }
158
+ r = Benchmark.measure(k) { 100.times do v.call(sess2) end }
159
+ stats["opencl"][r.label] = { real: r.real, stime: r.stime, total: r.total, utime: r.utime }
160
+ else
161
+ r = Benchmark.measure(k) { 100.times do sess.run(v) end }
162
+ stats["ruby"][r.label] = { real: r.real, stime: r.stime, total: r.total, utime: r.utime }
163
+ r = Benchmark.measure(k) { 100.times do sess2.run(v) end }
164
+ stats["opencl"][r.label] = { real: r.real, stime: r.stime, total: r.total, utime: r.utime }
165
+ end
166
+ end
167
+
168
+ output = {
169
+ "#{RUBY_ENGINE }-#{RUBY_VERSION}/#{os}/#{cpu.strip.gsub("model name\t: ", "")}/#{cl_device.strip}" => stats
170
+ }
171
+ current_benchmark = JSON.parse(File.read('benchmark.json'))
172
+ File.write("benchmark_#{Time.now.strftime('%Y%m%d%H%M')}.json", JSON.pretty_generate(current_benchmark.merge(output)))
@@ -622,15 +622,15 @@ module TensorStream
622
622
 
623
623
  if assign.container_buffer
624
624
  event_wait_list = build_event_wait_list([buffer, assign.container_buffer])
625
- assign.container_buffer.op = if assign.container_buffer.cl_buffer != buffer.cl_buffer
626
- _opencl_queue.enqueue_copy_buffer(buffer.cl_buffer, assign.container_buffer.cl_buffer, event_wait_list: event_wait_list)
627
- else
628
- buffer.op
629
- end
630
625
  else
631
- value = read_final_result(buffer)
632
- assign.options[:container].buffer = convert_to_opencl(value, buffer.shape, data_type: tensor.data_type, name: assign.name)
633
- assign.options[:container].value = value
626
+ var_buffer = _create_result_buffer(buffer.data_type, buffer.shape, tensor.name)
627
+ assign.options[:container].buffer = var_buffer
628
+ end
629
+
630
+ assign.container_buffer.op = if assign.container_buffer.cl_buffer != buffer.cl_buffer
631
+ _opencl_queue.enqueue_copy_buffer(buffer.cl_buffer, assign.container_buffer.cl_buffer, event_wait_list: event_wait_list)
632
+ else
633
+ buffer.op
634
634
  end
635
635
 
636
636
  assign.container_buffer.dirty = true
@@ -797,7 +797,7 @@ module TensorStream
797
797
 
798
798
  return nil if buffer.nil?
799
799
 
800
- cl_buffer = unless value.flatten.empty?
800
+ cl_buffer = unless array_fast_empty?(value)
801
801
  cl_buffer_size = 1 if cl_buffer_size.zero?
802
802
  _opencl_context.create_buffer(cl_buffer_size * buffer.element_size)
803
803
  end
@@ -810,8 +810,8 @@ module TensorStream
810
810
  cl_object.buffer[index] = c
811
811
  end
812
812
  elsif value.is_a?(Array)
813
- value.flatten.each_with_index do |element, index|
814
- cl_object.buffer[index] = if element.is_a?(Tensor)
813
+ cast_value = value.flatten.each_with_index.map do |element, index|
814
+ if element.is_a?(Tensor)
815
815
  read_final_result(complete_eval(element, {}))
816
816
  elsif data_type == :boolean
817
817
  element ? 1 : 0
@@ -819,6 +819,10 @@ module TensorStream
819
819
  Tensor.cast_dtype(element, data_type)
820
820
  end
821
821
  end
822
+
823
+ cast_value.each_with_index do |v, index|
824
+ cl_object.buffer[index] = v
825
+ end
822
826
  elsif value.is_a?(NArray)
823
827
  cl_object.buffer = value
824
828
  elsif data_type == :boolean
@@ -998,6 +1002,23 @@ module TensorStream
998
1002
 
999
1003
  arr != 0
1000
1004
  end
1005
+
1006
+ ##
1007
+ # Fast way to determine if array is "empty" by including nested elements
1008
+ def array_fast_empty?(arr)
1009
+ return true if arr.size.zero?
1010
+
1011
+ arr.each do |a|
1012
+ if a.is_a?(Array)
1013
+ return false if !array_fast_empty?(a)
1014
+
1015
+ next
1016
+ end
1017
+ return false
1018
+ end
1019
+
1020
+ true
1021
+ end
1001
1022
  end
1002
1023
  end
1003
1024
  end
@@ -1,5 +1,5 @@
1
1
  module TensorStream
2
2
  module Opencl
3
- VERSION = "0.3.1"
3
+ VERSION = "0.3.2"
4
4
  end
5
5
  end
@@ -32,7 +32,7 @@ Gem::Specification.new do |spec|
32
32
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
33
33
  spec.require_paths = ["lib"]
34
34
 
35
- spec.add_development_dependency "bundler", "~> 1.16"
35
+ spec.add_development_dependency "bundler"
36
36
  spec.add_development_dependency "rake", "~> 10.0"
37
37
  spec.add_development_dependency "rspec", "~> 3.0"
38
38
  spec.add_development_dependency "pry-byebug"
metadata CHANGED
@@ -1,29 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tensor_stream-opencl
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 0.3.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Joseph Dayo
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-04-09 00:00:00.000000000 Z
11
+ date: 2019-06-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - "~>"
17
+ - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: '1.16'
19
+ version: '0'
20
20
  type: :development
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - "~>"
24
+ - - ">="
25
25
  - !ruby/object:Gem::Version
26
- version: '1.16'
26
+ version: '0'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: rake
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -166,6 +166,7 @@ files:
166
166
  - LICENSE.txt
167
167
  - README.md
168
168
  - Rakefile
169
+ - benchmark.json
169
170
  - benchmark/benchmark.rb
170
171
  - benchmark_imac2015_iris.txt
171
172
  - benchmark_intel.txt