bones-compiler 1.1.0 → 1.3.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (211) hide show
  1. checksums.yaml +15 -0
  2. data/CHANGELOG +37 -0
  3. data/LICENSE +1 -1
  4. data/README.rdoc +95 -70
  5. data/Rakefile +78 -3
  6. data/VERSION +1 -1
  7. data/bin/adarwin +17 -0
  8. data/examples/benchmarks/PolyBench/2mm.c +104 -0
  9. data/examples/benchmarks/{3mm.c → PolyBench/3mm.c} +5 -2
  10. data/examples/benchmarks/{adi.c → PolyBench/adi.c} +6 -3
  11. data/examples/benchmarks/{atax.c → PolyBench/atax.c} +5 -2
  12. data/examples/benchmarks/{bicg.c → PolyBench/bicg.c} +5 -2
  13. data/examples/benchmarks/{cholesky.c → PolyBench/cholesky.c} +3 -0
  14. data/examples/benchmarks/{common.h → PolyBench/common.h} +2 -2
  15. data/examples/benchmarks/{correlation.c → PolyBench/correlation.c} +16 -7
  16. data/examples/benchmarks/{covariance.c → PolyBench/covariance.c} +7 -2
  17. data/examples/benchmarks/{doitgen.c → PolyBench/doitgen.c} +5 -2
  18. data/examples/benchmarks/{durbin.c → PolyBench/durbin.c} +3 -0
  19. data/examples/benchmarks/{dynprog.c → PolyBench/dynprog.c} +3 -0
  20. data/examples/benchmarks/{fdtd-2d-apml.c → PolyBench/fdtd-2d-apml.c} +3 -0
  21. data/examples/benchmarks/{fdtd-2d.c → PolyBench/fdtd-2d.c} +5 -2
  22. data/examples/benchmarks/{floyd-warshall.c → PolyBench/floyd-warshall.c} +3 -0
  23. data/examples/benchmarks/{gemm.c → PolyBench/gemm.c} +5 -2
  24. data/examples/benchmarks/{gemver.c → PolyBench/gemver.c} +5 -2
  25. data/examples/benchmarks/{gesummv.c → PolyBench/gesummv.c} +5 -2
  26. data/examples/benchmarks/{gramschmidt.c → PolyBench/gramschmidt.c} +3 -0
  27. data/examples/benchmarks/{jacobi-1d-imper.c → PolyBench/jacobi-1d-imper.c} +10 -2
  28. data/examples/benchmarks/{jacobi-2d-imper.c → PolyBench/jacobi-2d-imper.c} +8 -3
  29. data/examples/benchmarks/{lu.c → PolyBench/lu.c} +3 -0
  30. data/examples/benchmarks/{ludcmp.c → PolyBench/ludcmp.c} +3 -0
  31. data/examples/benchmarks/{mvt.c → PolyBench/mvt.c} +6 -2
  32. data/examples/benchmarks/{reg_detect.c → PolyBench/reg_detect.c} +3 -0
  33. data/examples/benchmarks/{seidel-2d.c → PolyBench/seidel-2d.c} +3 -0
  34. data/examples/benchmarks/{symm.c → PolyBench/symm.c} +3 -0
  35. data/examples/benchmarks/{syr2k.c → PolyBench/syr2k.c} +5 -2
  36. data/examples/benchmarks/{syrk.c → PolyBench/syrk.c} +7 -4
  37. data/examples/benchmarks/{trisolv.c → PolyBench/trisolv.c} +3 -0
  38. data/examples/benchmarks/{trmm.c → PolyBench/trmm.c} +3 -0
  39. data/examples/benchmarks/Rodinia/cfd.c +180 -0
  40. data/examples/benchmarks/Rodinia/hotspot.c +228 -0
  41. data/examples/benchmarks/Rodinia/kmeans.c +164 -0
  42. data/examples/benchmarks/Rodinia/srad.c +188 -0
  43. data/examples/benchmarks/other/common.h +0 -0
  44. data/examples/benchmarks/other/dct.c +58 -0
  45. data/examples/benchmarks/other/mm.c +50 -0
  46. data/examples/benchmarks/{saxpy.c → other/saxpy.c} +11 -7
  47. data/examples/chunk/{example1.c → example01.c} +0 -0
  48. data/examples/chunk/{example2.c → example02.c} +0 -0
  49. data/examples/chunk/{example3.c → example03.c} +0 -0
  50. data/examples/chunk/{example4.c → example04.c} +0 -0
  51. data/examples/chunk/{example5.c → example05.c} +0 -0
  52. data/examples/chunk/example06.c +45 -0
  53. data/examples/chunk/example07.c +49 -0
  54. data/examples/dependences/example01.c +42 -0
  55. data/examples/dependences/example02.c +40 -0
  56. data/examples/dependences/example03.c +43 -0
  57. data/examples/dependences/example04.c +44 -0
  58. data/examples/dependences/example05.c +42 -0
  59. data/examples/element/{example1.c → example01.c} +0 -0
  60. data/examples/element/{example2.c → example02.c} +2 -2
  61. data/examples/element/{example3.c → example03.c} +0 -0
  62. data/examples/element/{example4.c → example04.c} +0 -0
  63. data/examples/element/{example5.c → example05.c} +0 -0
  64. data/examples/element/{example6.c → example06.c} +0 -0
  65. data/examples/element/{example7.c → example07.c} +0 -0
  66. data/examples/element/{example8.c → example08.c} +0 -0
  67. data/examples/element/{example9.c → example09.c} +0 -0
  68. data/examples/element/example13.c +73 -0
  69. data/examples/fusion/example01.c +68 -0
  70. data/examples/fusion/example02.c +73 -0
  71. data/examples/fusion/example03.c +72 -0
  72. data/examples/fusion/example04.c +61 -0
  73. data/examples/fusion/example05.c +55 -0
  74. data/examples/neighbourhood/{example1.c → example01.c} +0 -0
  75. data/examples/neighbourhood/{example2.c → example02.c} +0 -0
  76. data/examples/neighbourhood/{example3.c → example03.c} +0 -0
  77. data/examples/neighbourhood/{example4.c → example04.c} +0 -0
  78. data/examples/neighbourhood/example05.c +44 -0
  79. data/examples/shared/{example1.c → example01.c} +0 -0
  80. data/examples/shared/{example2.c → example02.c} +0 -0
  81. data/examples/shared/{example3.c → example03.c} +0 -0
  82. data/examples/shared/{example4.c → example04.c} +0 -0
  83. data/examples/shared/{example5.c → example05.c} +0 -0
  84. data/lib/adarwin.rb +62 -0
  85. data/lib/adarwin/dependences.rb +268 -0
  86. data/lib/adarwin/engine.rb +277 -0
  87. data/lib/adarwin/fusion.rb +174 -0
  88. data/lib/adarwin/interval.rb +57 -0
  89. data/lib/adarwin/memorycopies.rb +153 -0
  90. data/lib/adarwin/nest.rb +225 -0
  91. data/lib/adarwin/preprocessor.rb +76 -0
  92. data/lib/adarwin/reference.rb +261 -0
  93. data/lib/bones.rb +4 -55
  94. data/lib/bones/algorithm.rb +77 -40
  95. data/lib/bones/copy.rb +26 -0
  96. data/lib/bones/engine.rb +147 -31
  97. data/lib/bones/preprocessor.rb +92 -12
  98. data/lib/bones/species.rb +4 -3
  99. data/lib/bones/structure.rb +14 -4
  100. data/lib/castaddon.rb +11 -6
  101. data/lib/castaddon/node_adarwin.rb +245 -0
  102. data/lib/castaddon/node_bones.rb +316 -0
  103. data/lib/castaddon/node_common.rb +289 -0
  104. data/lib/castaddon/transformations.rb +236 -0
  105. data/lib/common.rb +216 -0
  106. data/skeletons/CPU-C/common/header.c +3 -0
  107. data/skeletons/CPU-C/common/mem_global.c +0 -0
  108. data/skeletons/CPU-C/common/timer_2_start.c +11 -13
  109. data/skeletons/CPU-C/common/timer_2_stop.c +1 -1
  110. data/skeletons/CPU-C/common/timer_globals.c +29 -0
  111. data/skeletons/CPU-OPENCL-INTEL/common/globals.c +1 -1
  112. data/skeletons/CPU-OPENCL-INTEL/common/header.c +3 -0
  113. data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_D2H.c +7 -2
  114. data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_H2D.c +4 -2
  115. data/skeletons/CPU-OPENCL-INTEL/common/mem_global.c +0 -0
  116. data/skeletons/CPU-OPENCL-INTEL/common/mem_prologue.c +6 -3
  117. data/skeletons/CPU-OPENCL-INTEL/common/timer_2_stop.c +1 -1
  118. data/skeletons/CPU-OPENCL-INTEL/common/timer_globals.c +24 -0
  119. data/skeletons/CPU-OPENMP/common/globals.c +1 -0
  120. data/skeletons/CPU-OPENMP/common/header.c +3 -0
  121. data/skeletons/CPU-OPENMP/common/mem_global.c +0 -0
  122. data/skeletons/CPU-OPENMP/common/timer_1_start.c +0 -12
  123. data/skeletons/CPU-OPENMP/common/timer_2_stop.c +1 -1
  124. data/skeletons/CPU-OPENMP/common/timer_globals.c +33 -0
  125. data/skeletons/GPU-CUDA/common/globals.c +27 -3
  126. data/skeletons/GPU-CUDA/common/header.c +2 -0
  127. data/skeletons/GPU-CUDA/common/mem_async_alloc.c +6 -0
  128. data/skeletons/GPU-CUDA/common/mem_async_copyin.c +6 -0
  129. data/skeletons/GPU-CUDA/common/mem_async_copyout.c +6 -0
  130. data/skeletons/GPU-CUDA/common/mem_async_free.c +6 -0
  131. data/skeletons/GPU-CUDA/common/mem_copy_D2H.c +2 -1
  132. data/skeletons/GPU-CUDA/common/mem_copy_H2D.c +2 -1
  133. data/skeletons/GPU-CUDA/common/mem_global.c +1 -0
  134. data/skeletons/GPU-CUDA/common/mem_prologue.c +1 -2
  135. data/skeletons/GPU-CUDA/common/scheduler.c +86 -0
  136. data/skeletons/GPU-CUDA/common/timer_2_start.c +2 -4
  137. data/skeletons/GPU-CUDA/common/timer_2_stop.c +3 -5
  138. data/skeletons/GPU-CUDA/common/timer_globals.c +26 -0
  139. data/skeletons/GPU-CUDA/kernel/2xN-N-chunk-1-N-to-D-element.kernel.cu +5 -7
  140. data/skeletons/GPU-CUDA/kernel/N-N-chunk-1-N-to-D-element.kernel.cu +4 -6
  141. data/skeletons/GPU-CUDA/kernel/default.host.c +1 -1
  142. data/skeletons/GPU-CUDA/kernel/default.kernel.cu +6 -8
  143. data/skeletons/GPU-CUDA/skeletons.txt +6 -5
  144. data/{examples/benchmarks/2mm.c → test/examples/benchmarks/PolyBench/2mm_species.c} +19 -15
  145. data/test/examples/benchmarks/PolyBench/3mm_species.c +82 -0
  146. data/test/examples/benchmarks/PolyBench/adi_species.c +89 -0
  147. data/test/examples/benchmarks/PolyBench/atax_species.c +69 -0
  148. data/test/examples/benchmarks/PolyBench/bicg_species.c +71 -0
  149. data/test/examples/benchmarks/PolyBench/cholesky_species.c +68 -0
  150. data/test/examples/benchmarks/PolyBench/correlation_species.c +97 -0
  151. data/test/examples/benchmarks/PolyBench/covariance_species.c +78 -0
  152. data/test/examples/benchmarks/PolyBench/doitgen_species.c +67 -0
  153. data/test/examples/benchmarks/PolyBench/durbin_species.c +80 -0
  154. data/test/examples/benchmarks/PolyBench/dynprog_species.c +71 -0
  155. data/test/examples/benchmarks/PolyBench/fdtd-2d-apml_species.c +112 -0
  156. data/test/examples/benchmarks/PolyBench/fdtd-2d_species.c +78 -0
  157. data/test/examples/benchmarks/PolyBench/floyd-warshall_species.c +54 -0
  158. data/test/examples/benchmarks/PolyBench/gemm_species.c +73 -0
  159. data/test/examples/benchmarks/PolyBench/gemver_species.c +93 -0
  160. data/test/examples/benchmarks/PolyBench/gesummv_species.c +68 -0
  161. data/test/examples/benchmarks/PolyBench/gramschmidt_species.c +78 -0
  162. data/test/examples/benchmarks/PolyBench/jacobi-1d-imper_species.c +59 -0
  163. data/test/examples/benchmarks/PolyBench/jacobi-2d-imper_species.c +65 -0
  164. data/test/examples/benchmarks/PolyBench/lu_species.c +57 -0
  165. data/test/examples/benchmarks/PolyBench/ludcmp_species.c +89 -0
  166. data/test/examples/benchmarks/PolyBench/mvt_species.c +69 -0
  167. data/test/examples/benchmarks/PolyBench/reg_detect_species.c +86 -0
  168. data/test/examples/benchmarks/PolyBench/seidel-2d_species.c +53 -0
  169. data/test/examples/benchmarks/PolyBench/symm_species.c +74 -0
  170. data/test/examples/benchmarks/PolyBench/syr2k_species.c +69 -0
  171. data/test/examples/benchmarks/PolyBench/syrk_species.c +66 -0
  172. data/test/examples/benchmarks/PolyBench/trisolv_species.c +61 -0
  173. data/test/examples/benchmarks/PolyBench/trmm_species.c +61 -0
  174. data/test/examples/chunk/example01_species.c +58 -0
  175. data/test/examples/chunk/example02_species.c +48 -0
  176. data/test/examples/chunk/example03_species.c +63 -0
  177. data/test/examples/chunk/example04_species.c +58 -0
  178. data/test/examples/chunk/example05_species.c +56 -0
  179. data/test/examples/chunk/example06_species.c +49 -0
  180. data/test/examples/chunk/example07_species.c +53 -0
  181. data/test/examples/dependences/example01_species.c +46 -0
  182. data/test/examples/dependences/example02_species.c +44 -0
  183. data/test/examples/dependences/example03_species.c +47 -0
  184. data/test/examples/dependences/example04_species.c +48 -0
  185. data/test/examples/dependences/example05_species.c +46 -0
  186. data/test/examples/element/example01_species.c +50 -0
  187. data/test/examples/element/example02_species.c +50 -0
  188. data/test/examples/element/example03_species.c +62 -0
  189. data/test/examples/element/example04_species.c +53 -0
  190. data/test/examples/element/example05_species.c +59 -0
  191. data/test/examples/element/example06_species.c +50 -0
  192. data/test/examples/element/example07_species.c +58 -0
  193. data/test/examples/element/example08_species.c +49 -0
  194. data/test/examples/element/example09_species.c +52 -0
  195. data/test/examples/element/example10_species.c +54 -0
  196. data/test/examples/element/example11_species.c +51 -0
  197. data/test/examples/element/example12_species.c +60 -0
  198. data/test/examples/element/example13_species.c +77 -0
  199. data/test/examples/neighbourhood/example01_species.c +57 -0
  200. data/test/examples/neighbourhood/example02_species.c +56 -0
  201. data/test/examples/neighbourhood/example03_species.c +83 -0
  202. data/test/examples/neighbourhood/example04_species.c +55 -0
  203. data/test/examples/neighbourhood/example05_species.c +48 -0
  204. data/test/examples/shared/example01_species.c +49 -0
  205. data/test/examples/shared/example02_species.c +55 -0
  206. data/test/examples/shared/example03_species.c +59 -0
  207. data/test/examples/shared/example04_species.c +56 -0
  208. data/test/examples/shared/example05_species.c +52 -0
  209. metadata +193 -73
  210. data/examples/benchmarks/overview.txt +0 -38
  211. data/lib/castaddon/node.rb +0 -753
@@ -0,0 +1,228 @@
1
+ //
2
+ // This file is part of the Bones source-to-source compiler examples. This C-code
3
+ // demonstrates the use of Bones for an example application: 'Hotspot', taken from
4
+ // the Rodinia benchmark suite. For more information on the application or on Bones
5
+ // please use the contact information below.
6
+ //
7
+ // == More information on Hotspot
8
+ // Article............http://dx.doi.org/10.1109/TVLSI.2006.876103
9
+ // Original code......https://www.cs.virginia.edu/~skadron/wiki/rodinia/
10
+ //
11
+ // == More information on Bones
12
+ // Contact............Cedric Nugteren <c.nugteren@tue.nl>
13
+ // Web address........http://parse.ele.tue.nl/bones/
14
+ //
15
+ // == File information
16
+ // Filename...........applications/hotspot.c
17
+ // Authors............Cedric Nugteren
18
+ // Last modified on...10-Aug-2012
19
+ //
20
+
21
+ //########################################################################
22
+ //### Includes
23
+ //########################################################################
24
+
25
+ #include <stdio.h>
26
+ #include <stdlib.h>
27
+ #include <math.h>
28
+
29
+ //########################################################################
30
+ //### Input parameters
31
+ //########################################################################
32
+
33
+ #define GRID_ROWS 64 // Number of rows in the grid (positive integer)
34
+ #define GRID_COLS 64 // Number of columns in the grid (positive integer)
35
+ #define SIM_TIME 2 // Number of iterations
36
+ #define TEMPERATURE_FILE "data/hotspot_temperature_64.txt" // Name of the file containing the initial temperature values of each cell
37
+ #define POWER_FILE "data/hotspot_power_64.txt" // Name of the file containing the dissipated power values of each cell
38
+
39
+ //########################################################################
40
+ //### Defines
41
+ //########################################################################
42
+
43
+ #define STRING_SIZE 256 // Length of the strings in the temperature and power files
44
+ #define MAX_PD (3.0e6) // Maximum power density possible (say 300W for a 10mm x 10mm chip)
45
+ #define PRECISION 0.001 // Required precision in degrees
46
+ #define SPEC_HEAT_SI 1.75e6 //
47
+ #define K_SI 100 //
48
+ #define FACTOR_CHIP 0.5 // Capacitance fitting factor
49
+ #define T_CHIP 0.0005 // Chip temperature
50
+ #define CHIP_HEIGHT 0.016 // Chip height
51
+ #define CHIP_WIDTH 0.016 // Chip width
52
+ #define AMB_TEMP 80.0 // Ambient temperature, assuming no package at all
53
+
54
+ //########################################################################
55
+ //### Forward declarations
56
+ //########################################################################
57
+
58
+ void read_input(double* array, const char* filename);
59
+
60
+ //########################################################################
61
+ //### Start of the main function
62
+ //########################################################################
63
+
64
+ int main(void) {
65
+
66
+ // Declare the loop iterators
67
+ int r,c,iter;
68
+
69
+ // Declare other/helper variables
70
+ int index;
71
+ double delta;
72
+ int row = GRID_ROWS;
73
+ int col = GRID_COLS;
74
+ double grid_height = CHIP_HEIGHT/row;
75
+ double grid_width = CHIP_WIDTH/col;
76
+
77
+ // Set domain variables
78
+ double cap = FACTOR_CHIP*SPEC_HEAT_SI*T_CHIP*grid_width*grid_height;
79
+ double Rx = grid_width / (2.0*K_SI*T_CHIP*grid_height);
80
+ double Ry = grid_height / (2.0*K_SI*T_CHIP*grid_width);
81
+ double Rz = T_CHIP / (K_SI*grid_height*grid_width);
82
+ double max_slope = MAX_PD / (FACTOR_CHIP*T_CHIP*SPEC_HEAT_SI);
83
+ double step = PRECISION / max_slope;
84
+
85
+ // Initialising memory
86
+ printf("\n[hotspot] Initialising memory"); fflush(stdout);
87
+ double* temperature = (double*) calloc(row*col, sizeof(double));
88
+ double* power = (double*) calloc(row*col, sizeof(double));
89
+ double* result = (double*) calloc(row*col, sizeof(double));
90
+
91
+ // Read initial temperature and power arrays
92
+ printf("\n[hotspot] Populating memory"); fflush(stdout);
93
+ read_input(temperature, TEMPERATURE_FILE);
94
+ read_input(power, POWER_FILE);
95
+
96
+ // Perform the computation a given number of times
97
+ printf("\n[hotspot] Performing the computation %d times",SIM_TIME); fflush(stdout);
98
+ #pragma scop
99
+ for (iter=0; iter<SIM_TIME; iter++) {
100
+
101
+ // Transient solver driver routine: convert the heat transfer differential equations to difference equations
102
+ // and solve the difference equations by iterating
103
+ for (r=0; r<row; r++) {
104
+ for (c=0; c<col; c++) {
105
+
106
+ // Corner 1
107
+ if ( (r == 0) && (c == 0) ) {
108
+ delta = (step / cap) * (power[0] +
109
+ (temperature[1] - temperature[0]) / Rx +
110
+ (temperature[col] - temperature[0]) / Ry +
111
+ (AMB_TEMP - temperature[0]) / Rz);
112
+ }
113
+ // Corner 2
114
+ else if ((r == 0) && (c == col-1)) {
115
+ delta = (step / cap) * (power[c] +
116
+ (temperature[c-1] - temperature[c]) / Rx +
117
+ (temperature[c+col] - temperature[c]) / Ry +
118
+ (AMB_TEMP - temperature[c]) / Rz);
119
+ }
120
+ // Corner 3
121
+ else if ((r == row-1) && (c == col-1)) {
122
+ delta = (step / cap) * (power[r*col+c] +
123
+ (temperature[r*col+c-1] - temperature[r*col+c]) / Rx +
124
+ (temperature[(r-1)*col+c] - temperature[r*col+c]) / Ry +
125
+ (AMB_TEMP - temperature[r*col+c]) / Rz);
126
+ }
127
+ // Corner 4
128
+ else if ((r == row-1) && (c == 0)) {
129
+ delta = (step / cap) * (power[r*col] +
130
+ (temperature[r*col+1] - temperature[r*col]) / Rx +
131
+ (temperature[(r-1)*col] - temperature[r*col]) / Ry +
132
+ (AMB_TEMP - temperature[r*col]) / Rz);
133
+ }
134
+ // Edge 1
135
+ else if (r == 0) {
136
+ delta = (step / cap) * (power[c] +
137
+ (temperature[c+1] + temperature[c-1] - 2.0*temperature[c]) / Rx +
138
+ (temperature[col+c] - temperature[c]) / Ry +
139
+ (AMB_TEMP - temperature[c]) / Rz);
140
+ }
141
+ // Edge 2
142
+ else if (c == col-1) {
143
+ delta = (step / cap) * (power[r*col+c] +
144
+ (temperature[(r+1)*col+c] + temperature[(r-1)*col+c] - 2.0*temperature[r*col+c]) / Ry +
145
+ (temperature[r*col+c-1] - temperature[r*col+c]) / Rx +
146
+ (AMB_TEMP - temperature[r*col+c]) / Rz);
147
+ }
148
+ // Edge 3
149
+ else if (r == row-1) {
150
+ delta = (step / cap) * (power[r*col+c] +
151
+ (temperature[r*col+c+1] + temperature[r*col+c-1] - 2.0*temperature[r*col+c]) / Rx +
152
+ (temperature[(r-1)*col+c] - temperature[r*col+c]) / Ry +
153
+ (AMB_TEMP - temperature[r*col+c]) / Rz);
154
+ }
155
+ // Edge 4
156
+ else if (c == 0) {
157
+ delta = (step / cap) * (power[r*col] +
158
+ (temperature[(r+1)*col] + temperature[(r-1)*col] - 2.0*temperature[r*col]) / Ry +
159
+ (temperature[r*col+1] - temperature[r*col]) / Rx +
160
+ (AMB_TEMP - temperature[r*col]) / Rz);
161
+ }
162
+ // Inside the chip
163
+ else {
164
+ delta = (step / cap) * (power[r*col+c] +
165
+ (temperature[(r+1)*col+c] + temperature[(r-1)*col+c] - 2.0*temperature[r*col+c]) / Ry +
166
+ (temperature[r*col+c+1] + temperature[r*col+c-1] - 2.0*temperature[r*col+c]) / Rx +
167
+ (AMB_TEMP - temperature[r*col+c]) / Rz);
168
+ }
169
+
170
+ // Update the temperatures
171
+ result[r*col+c] = temperature[r*col+c] + delta;
172
+ }
173
+ }
174
+
175
+ // Copy the result as the new temperatures
176
+ for (r=0; r<row; r++) {
177
+ for (c=0; c<col; c++) {
178
+ temperature[r*col+c] = result[r*col+c];
179
+ }
180
+ }
181
+ }
182
+ #pragma endscop
183
+
184
+ // Print the values matrix
185
+ printf("\n[hotspot] Printing the final temperatures:\n\n"); fflush(stdout);
186
+ for (r=0; r<row; r++) {
187
+ for (c=0; c<col; c++) {
188
+ index = r*col+c;
189
+ printf("%6d: %.3lf ", index, temperature[index]);
190
+ }
191
+ printf("\n");
192
+ }
193
+
194
+ // Clean-up and exit
195
+ printf("\n[hotspot] Completed\n\n"); fflush(stdout);
196
+ free(temperature); free(power); free(result);
197
+ fflush(stdout);
198
+ return 0;
199
+ }
200
+
201
+ //########################################################################
202
+ //### Function to read an input file (power or temperature values)
203
+ //########################################################################
204
+
205
+ void read_input(double* array, const char* filename) {
206
+ int r, c;
207
+ char string[STRING_SIZE];
208
+ double value;
209
+
210
+ // Open the file
211
+ FILE* file_pointer = fopen(filename, "r");
212
+ if (!file_pointer) { printf("\n[hotspot] Error: file '%s' could not be opened for reading\n\n", filename); fflush(stdout); exit(1); }
213
+
214
+ // Process the file
215
+ for (r=0; r<GRID_ROWS; r++) {
216
+ for (c=0; c<GRID_COLS; c++) {
217
+ fgets(string, STRING_SIZE, file_pointer);
218
+ if (feof(file_pointer)) { printf("\n[hotspot] Error: not enough lines in file '%s'\n\n", filename); fflush(stdout); exit(1); }
219
+ if ((sscanf(string, "%lf", &value) != 1) ) { printf("\n[hotspot] Error: invalid file format for '%s'\n\n", filename); fflush(stdout); exit(1); }
220
+ array[r*GRID_COLS+c] = value;
221
+ }
222
+ }
223
+
224
+ // Clean-up and return
225
+ fclose(file_pointer);
226
+ }
227
+
228
+ //########################################################################
@@ -0,0 +1,164 @@
1
+ //
2
+ // This file is part of the Bones source-to-source compiler examples. This C-code
3
+ // demonstrates the use of Bones for an example application: 'K-means clustering',
4
+ // as also available in the Rodinia benchmark suite. For more information on the
5
+ // application or on Bones please use the contact information below.
6
+ //
7
+ // == This implementation of K-means clustering is inspired by:
8
+ // Author.............Roger Zhang
9
+ // Web address........http://cs.smu.ca/~r_zhang/code/kmeans.c
10
+ //
11
+ // == More information on Bones
12
+ // Contact............Cedric Nugteren <c.nugteren@tue.nl>
13
+ // Web address........http://parse.ele.tue.nl/bones/
14
+ //
15
+ // == File information
16
+ // Filename...........applications/kmeans.c
17
+ // Authors............Cedric Nugteren
18
+ // Last modified on...10-Aug-2012
19
+ //
20
+
21
+ //########################################################################
22
+ //### Includes
23
+ //########################################################################
24
+
25
+ #include <stdio.h>
26
+ #include <math.h>
27
+ #include <float.h>
28
+
29
+ //########################################################################
30
+ //### Defines
31
+ //########################################################################
32
+
33
+ #define SIZE 512
34
+ #define NUM_CLUSTERS 20
35
+ #define DIMENSIONS 2
36
+ #define THRESHOLD 0.0001
37
+
38
+ //########################################################################
39
+ //### Start of the main function
40
+ //########################################################################
41
+ int main(void) {
42
+
43
+ // Declare the loop iterators
44
+ int i,j,k;
45
+
46
+ // Declare the error variables
47
+ double error = DBL_MAX;
48
+ double old_error;
49
+ int iterations = 0;
50
+
51
+ // Declare the distance variables and arrays
52
+ double distance[1];
53
+ double min_distance[1];
54
+ double distances[SIZE];
55
+
56
+ // Initialising memory
57
+ printf("\n[k-means] Initialising memory"); fflush(stdout);
58
+ double input[SIZE][DIMENSIONS];
59
+ double centroids[NUM_CLUSTERS][DIMENSIONS];
60
+ double centroids_temp[NUM_CLUSTERS][DIMENSIONS];
61
+ int output[SIZE];
62
+ int counts[NUM_CLUSTERS];
63
+
64
+ // Set the input data
65
+ printf("\n[k-means] Populating memory"); fflush(stdout);
66
+ for (i=0; i<SIZE; i++) {
67
+ input[i][0] = (i/16);
68
+ input[i][1] = i%4;
69
+ }
70
+
71
+ // Pick k initial centroids
72
+ printf("\n[k-means] Setting 'k' initial centroids"); fflush(stdout);
73
+ for (k=0; k<NUM_CLUSTERS; k++) {
74
+ for (j=0; j<DIMENSIONS; j++) {
75
+ centroids[k][j] = input[(SIZE/NUM_CLUSTERS)*k][j];
76
+ }
77
+ }
78
+
79
+ // Perform the k-means clustering algorithm, end when the error is not becoming smaller
80
+ printf("\n[k-means] Perform the clustering algorithm"); fflush(stdout);
81
+ do {
82
+ #pragma scop
83
+
84
+ // Save the error from the last step
85
+ old_error = error;
86
+ error = 0;
87
+
88
+ // Clear old counts and temporary centroids
89
+ for (k=0; k<NUM_CLUSTERS; k++) {
90
+ counts[k] = 0;
91
+ for (j=0; j<DIMENSIONS; j++) {
92
+ centroids_temp[k][j] = 0;
93
+ }
94
+ }
95
+
96
+ // Iterate over all data points
97
+ for (i=0; i<SIZE; i++) {
98
+
99
+ // Find the closest cluster
100
+ min_distance[0] = DBL_MAX;
101
+ for (k=0; k<NUM_CLUSTERS; k++) {
102
+ distance[0] = 0;
103
+ for (j=0; j<DIMENSIONS; j++) {
104
+ distance[0] += pow(input[i][j]-centroids[k][j],2);
105
+ }
106
+ if (distance[0] < min_distance[0]) {
107
+ output[i] = k;
108
+ min_distance[0] = distance[0];
109
+ }
110
+ }
111
+
112
+ // Update the size and temporary centroid of the destination cluster
113
+ for (j=0; j<DIMENSIONS; j++) {
114
+ centroids_temp[output[i]][j] += input[i][j];
115
+ }
116
+ counts[output[i]] += 1;
117
+
118
+ // Store the resulting distance
119
+ distances[i] = min_distance[0];
120
+ }
121
+
122
+ // Update the standard error
123
+ for (i=0; i<SIZE; i++) {
124
+ error += distances[i];
125
+ }
126
+
127
+ // Update all centroids
128
+ for (k=0; k<NUM_CLUSTERS; k++) {
129
+ for (j=0; j<DIMENSIONS; j++) {
130
+ if (counts[k]) {
131
+ centroids[k][j] = centroids_temp[k][j] / counts[k];
132
+ }
133
+ else {
134
+ centroids[k][j] = centroids_temp[k][j];
135
+ }
136
+ }
137
+ }
138
+
139
+ // Go to the next iteration
140
+ iterations += 1;
141
+
142
+ #pragma endscop
143
+ } while (fabs(error-old_error) > THRESHOLD);
144
+
145
+ // Print the results
146
+ printf("\n[k-means] Algorithm finished in %d iterations with an error of %.3lf", iterations, error); fflush(stdout);
147
+ printf("\n[k-means] Printing the results: \n\n"); fflush(stdout);
148
+ for (k=0; k<NUM_CLUSTERS; k++) {
149
+ printf("Cluster %2i: ", k);
150
+ for (i=0; i<SIZE; i++) {
151
+ if (output[i] == k) {
152
+ printf("%3i ", i);
153
+ }
154
+ }
155
+ printf("\n");
156
+ }
157
+
158
+ // Clean-up and exit the function
159
+ printf("\n[k-means] Completed\n\n"); fflush(stdout);
160
+ fflush(stdout);
161
+ return 0;
162
+ }
163
+
164
+ //########################################################################
@@ -0,0 +1,188 @@
1
+ //
2
+ // This file is part of the Bones source-to-source compiler examples. This C-code
3
+ // demonstrates the use of Bones for an example application: 'Speckle Reducing
4
+ // Anisotropic Diffusion' or 'SRAD', taken from the Rodinia benchmark suite. For
5
+ // more information on the application or on Bones please use the contact infor-
6
+ // mation below.
7
+ //
8
+ // == More information on SRAD (Speckle Reducing Anisotropic Diffusion):
9
+ // Article............http://dx.doi.org/10.1109/TIP.2002.804276
10
+ // Original code......https://www.cs.virginia.edu/~skadron/wiki/rodinia/
11
+ //
12
+ // == More information on Bones
13
+ // Contact............Cedric Nugteren <c.nugteren@tue.nl>
14
+ // Web address........http://parse.ele.tue.nl/bones/
15
+ //
16
+ // == File information
17
+ // Filename...........applications/srad.c
18
+ // Authors............Cedric Nugteren
19
+ // Original authors...Rob Janiczek, Drew Gilliam, Lukasz Szafaryn
20
+ // Last modified on...10-Aug-2012
21
+ //
22
+
23
+ //########################################################################
24
+ //### Includes
25
+ //########################################################################
26
+
27
+ #include <stdio.h>
28
+ #include <stdlib.h>
29
+ #include <math.h>
30
+
31
+ //########################################################################
32
+ //### Defines
33
+ //########################################################################
34
+
35
+ #define ROWS 128 // Number of ROWS in the domain
36
+ #define COLS 128 // Number of COLS in the domain
37
+ #define R1 0 // y1 position of the speckle
38
+ #define R2 31 // y2 position of the speckle
39
+ #define C1 0 // x1 position of the speckle
40
+ #define C2 31 // x2 position of the speckle
41
+ #define LAMBDA 0.5 // Lambda value
42
+ #define NITER 2 // Number of iterations
43
+
44
+ //########################################################################
45
+ //### Start of the main function
46
+ //########################################################################
47
+
48
+ int main(void) {
49
+
50
+ // Declare the loop iterators
51
+ int i,j,iter;
52
+
53
+ // Declare domain variables
54
+ float mean_roi, var_roi;
55
+ float q0s, qs;
56
+ float divergence;
57
+ float cN, cS, cW, cE;
58
+ float G2, L;
59
+
60
+ // Declare other/helper variables
61
+ int index;
62
+ float temp_value;
63
+ float sum1, sum2;
64
+ float current_value;
65
+ float temp_a, temp_b;
66
+
67
+ // Check for valid row and column sizes
68
+ if ((ROWS%16 != 0 ) || (COLS%16 != 0)) {
69
+ printf("[srad] Error: the number of rows and columns must be multiples of 16\n");
70
+ fflush(stdout); exit(1);
71
+ }
72
+
73
+ // Initialising memory
74
+ printf("\n[srad] Initialising memory"); fflush(stdout);
75
+ int size = COLS*ROWS;
76
+ int size_roi = (R2-R1+1)*(C2-C1+1);
77
+ float* values = (float*) malloc(sizeof(float)*size);
78
+ float* coefficent = (float*) malloc(sizeof(float)*size);
79
+ float* dN = (float*) malloc(sizeof(float)*size);
80
+ float* dS = (float*) malloc(sizeof(float)*size);
81
+ float* dW = (float*) malloc(sizeof(float)*size);
82
+ float* dE = (float*) malloc(sizeof(float)*size);
83
+
84
+ // Populate the input matrix
85
+ printf("\n[srad] Populating the input matrix with random values"); fflush(stdout);
86
+ for (i=0; i<ROWS; i++) {
87
+ for (j=0; j<COLS; j++) {
88
+ temp_value = rand()/(float)RAND_MAX;
89
+ values[i*COLS+j] = (float)exp(temp_value);
90
+ }
91
+ }
92
+
93
+ // Perform the computation a given number of times
94
+ printf("\n[srad] Performing the computation %d times",NITER); fflush(stdout);
95
+ for (iter=0; iter<NITER; iter++) {
96
+
97
+ // Compute the mean, the variance and the speckle scale function (q0s) of the region of interest (ROI)
98
+ sum1 = 0;
99
+ sum2 = 0;
100
+ for (i=R1; i<=R2; i++) {
101
+ for (j=C1; j<=C2; j++) {
102
+ temp_value = values[i*COLS+j];
103
+ sum1 += temp_value;
104
+ sum2 += temp_value*temp_value;
105
+ }
106
+ }
107
+ mean_roi = sum1/size_roi;
108
+ var_roi = (sum2/size_roi) - mean_roi*mean_roi;
109
+ q0s = var_roi / (mean_roi*mean_roi);
110
+
111
+ // Iterate over the full image and compute
112
+ #pragma scop
113
+ for (i=0; i<ROWS; i++) {
114
+ for (j=0; j<COLS; j++) {
115
+ index = i*COLS+j;
116
+ current_value = values[index];
117
+
118
+ // Compute the directional derivates (N,S,W,E)
119
+ if (i==0) { dN[index] = 0; }
120
+ else { dN[index] = values[(i-1)*COLS + j ] - current_value; }
121
+ if (i==ROWS-1) { dS[index] = 0; }
122
+ else { dS[index] = values[(i+1)*COLS + j ] - current_value; }
123
+ if (j==0) { dW[index] = 0; }
124
+ else { dW[index] = values[i *COLS + (j-1)] - current_value; }
125
+ if (j==COLS-1) { dE[index] = 0; }
126
+ else { dE[index] = values[i *COLS + (j+1)] - current_value; }
127
+
128
+ // Compute the instantaneous coefficient of variation (qs) (equation 35)
129
+ G2 = (dN[index]*dN[index] + dS[index]*dS[index] + dW[index]*dW[index] + dE[index]*dE[index]) / (current_value*current_value);
130
+ L = (dN[index] + dS[index] + dW[index] + dE[index] ) / (current_value );
131
+ temp_a = (0.5*G2)-((1.0/16.0)*(L*L));
132
+ temp_b = 1+(0.25*L);
133
+ qs = temp_a/(temp_b*temp_b);
134
+
135
+ // Set the diffusion coefficent (equation 33)
136
+ coefficent[index] = 1.0 / (1.0+( (qs-q0s)/(q0s*(1+q0s)) ));
137
+
138
+ // Saturate the diffusion coefficent
139
+ if (coefficent[index] < 0) {
140
+ coefficent[index] = 0;
141
+ }
142
+ else if (coefficent[index] > 1) {
143
+ coefficent[index] = 1;
144
+ }
145
+ }
146
+ }
147
+
148
+ // Iterate over the full image again and compute the final values
149
+ for (i=0; i<ROWS; i++) {
150
+ for (j=0; j<COLS; j++) {
151
+ index = i*COLS+j;
152
+
153
+ // Calculate the diffusion coefficent
154
+ cN = coefficent[i *COLS+j ];
155
+ if (i==ROWS-1) { cS = 0; }
156
+ else { cS = coefficent[(i+1)*COLS+j ]; }
157
+ cW = coefficent[i *COLS+j ];
158
+ if (j==COLS-1) { cE = 0; }
159
+ else { cE = coefficent[i *COLS+(j+1)]; }
160
+
161
+ // Calculate the divergence (equation 58)
162
+ divergence = cN*dN[index] + cS*dS[index] + cW*dW[index] + cE*dE[index];
163
+
164
+ // Update the image accordingly (equation 61)
165
+ values[index] = values[index] + 0.25*LAMBDA*divergence;
166
+ }
167
+ }
168
+ #pragma endscop
169
+ }
170
+
171
+ // Print the values matrix
172
+ printf("\n[srad] Printing the output matrix:\n\n"); fflush(stdout);
173
+ for (i=0; i<ROWS; i++) {
174
+ for (j=0; j<COLS; j++) {
175
+ printf("%.5f ", values[i*COLS+j]);
176
+ }
177
+ printf("\n");
178
+ }
179
+
180
+ // Clean-up and exit
181
+ printf("\n[srad] Completed\n\n"); fflush(stdout);
182
+ free(values); free(coefficent);
183
+ free(dN); free(dS); free(dW); free(dE);
184
+ fflush(stdout);
185
+ return 0;
186
+ }
187
+
188
+ //########################################################################