bones-compiler 1.1.0 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (211) hide show
  1. checksums.yaml +15 -0
  2. data/CHANGELOG +37 -0
  3. data/LICENSE +1 -1
  4. data/README.rdoc +95 -70
  5. data/Rakefile +78 -3
  6. data/VERSION +1 -1
  7. data/bin/adarwin +17 -0
  8. data/examples/benchmarks/PolyBench/2mm.c +104 -0
  9. data/examples/benchmarks/{3mm.c → PolyBench/3mm.c} +5 -2
  10. data/examples/benchmarks/{adi.c → PolyBench/adi.c} +6 -3
  11. data/examples/benchmarks/{atax.c → PolyBench/atax.c} +5 -2
  12. data/examples/benchmarks/{bicg.c → PolyBench/bicg.c} +5 -2
  13. data/examples/benchmarks/{cholesky.c → PolyBench/cholesky.c} +3 -0
  14. data/examples/benchmarks/{common.h → PolyBench/common.h} +2 -2
  15. data/examples/benchmarks/{correlation.c → PolyBench/correlation.c} +16 -7
  16. data/examples/benchmarks/{covariance.c → PolyBench/covariance.c} +7 -2
  17. data/examples/benchmarks/{doitgen.c → PolyBench/doitgen.c} +5 -2
  18. data/examples/benchmarks/{durbin.c → PolyBench/durbin.c} +3 -0
  19. data/examples/benchmarks/{dynprog.c → PolyBench/dynprog.c} +3 -0
  20. data/examples/benchmarks/{fdtd-2d-apml.c → PolyBench/fdtd-2d-apml.c} +3 -0
  21. data/examples/benchmarks/{fdtd-2d.c → PolyBench/fdtd-2d.c} +5 -2
  22. data/examples/benchmarks/{floyd-warshall.c → PolyBench/floyd-warshall.c} +3 -0
  23. data/examples/benchmarks/{gemm.c → PolyBench/gemm.c} +5 -2
  24. data/examples/benchmarks/{gemver.c → PolyBench/gemver.c} +5 -2
  25. data/examples/benchmarks/{gesummv.c → PolyBench/gesummv.c} +5 -2
  26. data/examples/benchmarks/{gramschmidt.c → PolyBench/gramschmidt.c} +3 -0
  27. data/examples/benchmarks/{jacobi-1d-imper.c → PolyBench/jacobi-1d-imper.c} +10 -2
  28. data/examples/benchmarks/{jacobi-2d-imper.c → PolyBench/jacobi-2d-imper.c} +8 -3
  29. data/examples/benchmarks/{lu.c → PolyBench/lu.c} +3 -0
  30. data/examples/benchmarks/{ludcmp.c → PolyBench/ludcmp.c} +3 -0
  31. data/examples/benchmarks/{mvt.c → PolyBench/mvt.c} +6 -2
  32. data/examples/benchmarks/{reg_detect.c → PolyBench/reg_detect.c} +3 -0
  33. data/examples/benchmarks/{seidel-2d.c → PolyBench/seidel-2d.c} +3 -0
  34. data/examples/benchmarks/{symm.c → PolyBench/symm.c} +3 -0
  35. data/examples/benchmarks/{syr2k.c → PolyBench/syr2k.c} +5 -2
  36. data/examples/benchmarks/{syrk.c → PolyBench/syrk.c} +7 -4
  37. data/examples/benchmarks/{trisolv.c → PolyBench/trisolv.c} +3 -0
  38. data/examples/benchmarks/{trmm.c → PolyBench/trmm.c} +3 -0
  39. data/examples/benchmarks/Rodinia/cfd.c +180 -0
  40. data/examples/benchmarks/Rodinia/hotspot.c +228 -0
  41. data/examples/benchmarks/Rodinia/kmeans.c +164 -0
  42. data/examples/benchmarks/Rodinia/srad.c +188 -0
  43. data/examples/benchmarks/other/common.h +0 -0
  44. data/examples/benchmarks/other/dct.c +58 -0
  45. data/examples/benchmarks/other/mm.c +50 -0
  46. data/examples/benchmarks/{saxpy.c → other/saxpy.c} +11 -7
  47. data/examples/chunk/{example1.c → example01.c} +0 -0
  48. data/examples/chunk/{example2.c → example02.c} +0 -0
  49. data/examples/chunk/{example3.c → example03.c} +0 -0
  50. data/examples/chunk/{example4.c → example04.c} +0 -0
  51. data/examples/chunk/{example5.c → example05.c} +0 -0
  52. data/examples/chunk/example06.c +45 -0
  53. data/examples/chunk/example07.c +49 -0
  54. data/examples/dependences/example01.c +42 -0
  55. data/examples/dependences/example02.c +40 -0
  56. data/examples/dependences/example03.c +43 -0
  57. data/examples/dependences/example04.c +44 -0
  58. data/examples/dependences/example05.c +42 -0
  59. data/examples/element/{example1.c → example01.c} +0 -0
  60. data/examples/element/{example2.c → example02.c} +2 -2
  61. data/examples/element/{example3.c → example03.c} +0 -0
  62. data/examples/element/{example4.c → example04.c} +0 -0
  63. data/examples/element/{example5.c → example05.c} +0 -0
  64. data/examples/element/{example6.c → example06.c} +0 -0
  65. data/examples/element/{example7.c → example07.c} +0 -0
  66. data/examples/element/{example8.c → example08.c} +0 -0
  67. data/examples/element/{example9.c → example09.c} +0 -0
  68. data/examples/element/example13.c +73 -0
  69. data/examples/fusion/example01.c +68 -0
  70. data/examples/fusion/example02.c +73 -0
  71. data/examples/fusion/example03.c +72 -0
  72. data/examples/fusion/example04.c +61 -0
  73. data/examples/fusion/example05.c +55 -0
  74. data/examples/neighbourhood/{example1.c → example01.c} +0 -0
  75. data/examples/neighbourhood/{example2.c → example02.c} +0 -0
  76. data/examples/neighbourhood/{example3.c → example03.c} +0 -0
  77. data/examples/neighbourhood/{example4.c → example04.c} +0 -0
  78. data/examples/neighbourhood/example05.c +44 -0
  79. data/examples/shared/{example1.c → example01.c} +0 -0
  80. data/examples/shared/{example2.c → example02.c} +0 -0
  81. data/examples/shared/{example3.c → example03.c} +0 -0
  82. data/examples/shared/{example4.c → example04.c} +0 -0
  83. data/examples/shared/{example5.c → example05.c} +0 -0
  84. data/lib/adarwin.rb +62 -0
  85. data/lib/adarwin/dependences.rb +268 -0
  86. data/lib/adarwin/engine.rb +277 -0
  87. data/lib/adarwin/fusion.rb +174 -0
  88. data/lib/adarwin/interval.rb +57 -0
  89. data/lib/adarwin/memorycopies.rb +153 -0
  90. data/lib/adarwin/nest.rb +225 -0
  91. data/lib/adarwin/preprocessor.rb +76 -0
  92. data/lib/adarwin/reference.rb +261 -0
  93. data/lib/bones.rb +4 -55
  94. data/lib/bones/algorithm.rb +77 -40
  95. data/lib/bones/copy.rb +26 -0
  96. data/lib/bones/engine.rb +147 -31
  97. data/lib/bones/preprocessor.rb +92 -12
  98. data/lib/bones/species.rb +4 -3
  99. data/lib/bones/structure.rb +14 -4
  100. data/lib/castaddon.rb +11 -6
  101. data/lib/castaddon/node_adarwin.rb +245 -0
  102. data/lib/castaddon/node_bones.rb +316 -0
  103. data/lib/castaddon/node_common.rb +289 -0
  104. data/lib/castaddon/transformations.rb +236 -0
  105. data/lib/common.rb +216 -0
  106. data/skeletons/CPU-C/common/header.c +3 -0
  107. data/skeletons/CPU-C/common/mem_global.c +0 -0
  108. data/skeletons/CPU-C/common/timer_2_start.c +11 -13
  109. data/skeletons/CPU-C/common/timer_2_stop.c +1 -1
  110. data/skeletons/CPU-C/common/timer_globals.c +29 -0
  111. data/skeletons/CPU-OPENCL-INTEL/common/globals.c +1 -1
  112. data/skeletons/CPU-OPENCL-INTEL/common/header.c +3 -0
  113. data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_D2H.c +7 -2
  114. data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_H2D.c +4 -2
  115. data/skeletons/CPU-OPENCL-INTEL/common/mem_global.c +0 -0
  116. data/skeletons/CPU-OPENCL-INTEL/common/mem_prologue.c +6 -3
  117. data/skeletons/CPU-OPENCL-INTEL/common/timer_2_stop.c +1 -1
  118. data/skeletons/CPU-OPENCL-INTEL/common/timer_globals.c +24 -0
  119. data/skeletons/CPU-OPENMP/common/globals.c +1 -0
  120. data/skeletons/CPU-OPENMP/common/header.c +3 -0
  121. data/skeletons/CPU-OPENMP/common/mem_global.c +0 -0
  122. data/skeletons/CPU-OPENMP/common/timer_1_start.c +0 -12
  123. data/skeletons/CPU-OPENMP/common/timer_2_stop.c +1 -1
  124. data/skeletons/CPU-OPENMP/common/timer_globals.c +33 -0
  125. data/skeletons/GPU-CUDA/common/globals.c +27 -3
  126. data/skeletons/GPU-CUDA/common/header.c +2 -0
  127. data/skeletons/GPU-CUDA/common/mem_async_alloc.c +6 -0
  128. data/skeletons/GPU-CUDA/common/mem_async_copyin.c +6 -0
  129. data/skeletons/GPU-CUDA/common/mem_async_copyout.c +6 -0
  130. data/skeletons/GPU-CUDA/common/mem_async_free.c +6 -0
  131. data/skeletons/GPU-CUDA/common/mem_copy_D2H.c +2 -1
  132. data/skeletons/GPU-CUDA/common/mem_copy_H2D.c +2 -1
  133. data/skeletons/GPU-CUDA/common/mem_global.c +1 -0
  134. data/skeletons/GPU-CUDA/common/mem_prologue.c +1 -2
  135. data/skeletons/GPU-CUDA/common/scheduler.c +86 -0
  136. data/skeletons/GPU-CUDA/common/timer_2_start.c +2 -4
  137. data/skeletons/GPU-CUDA/common/timer_2_stop.c +3 -5
  138. data/skeletons/GPU-CUDA/common/timer_globals.c +26 -0
  139. data/skeletons/GPU-CUDA/kernel/2xN-N-chunk-1-N-to-D-element.kernel.cu +5 -7
  140. data/skeletons/GPU-CUDA/kernel/N-N-chunk-1-N-to-D-element.kernel.cu +4 -6
  141. data/skeletons/GPU-CUDA/kernel/default.host.c +1 -1
  142. data/skeletons/GPU-CUDA/kernel/default.kernel.cu +6 -8
  143. data/skeletons/GPU-CUDA/skeletons.txt +6 -5
  144. data/{examples/benchmarks/2mm.c → test/examples/benchmarks/PolyBench/2mm_species.c} +19 -15
  145. data/test/examples/benchmarks/PolyBench/3mm_species.c +82 -0
  146. data/test/examples/benchmarks/PolyBench/adi_species.c +89 -0
  147. data/test/examples/benchmarks/PolyBench/atax_species.c +69 -0
  148. data/test/examples/benchmarks/PolyBench/bicg_species.c +71 -0
  149. data/test/examples/benchmarks/PolyBench/cholesky_species.c +68 -0
  150. data/test/examples/benchmarks/PolyBench/correlation_species.c +97 -0
  151. data/test/examples/benchmarks/PolyBench/covariance_species.c +78 -0
  152. data/test/examples/benchmarks/PolyBench/doitgen_species.c +67 -0
  153. data/test/examples/benchmarks/PolyBench/durbin_species.c +80 -0
  154. data/test/examples/benchmarks/PolyBench/dynprog_species.c +71 -0
  155. data/test/examples/benchmarks/PolyBench/fdtd-2d-apml_species.c +112 -0
  156. data/test/examples/benchmarks/PolyBench/fdtd-2d_species.c +78 -0
  157. data/test/examples/benchmarks/PolyBench/floyd-warshall_species.c +54 -0
  158. data/test/examples/benchmarks/PolyBench/gemm_species.c +73 -0
  159. data/test/examples/benchmarks/PolyBench/gemver_species.c +93 -0
  160. data/test/examples/benchmarks/PolyBench/gesummv_species.c +68 -0
  161. data/test/examples/benchmarks/PolyBench/gramschmidt_species.c +78 -0
  162. data/test/examples/benchmarks/PolyBench/jacobi-1d-imper_species.c +59 -0
  163. data/test/examples/benchmarks/PolyBench/jacobi-2d-imper_species.c +65 -0
  164. data/test/examples/benchmarks/PolyBench/lu_species.c +57 -0
  165. data/test/examples/benchmarks/PolyBench/ludcmp_species.c +89 -0
  166. data/test/examples/benchmarks/PolyBench/mvt_species.c +69 -0
  167. data/test/examples/benchmarks/PolyBench/reg_detect_species.c +86 -0
  168. data/test/examples/benchmarks/PolyBench/seidel-2d_species.c +53 -0
  169. data/test/examples/benchmarks/PolyBench/symm_species.c +74 -0
  170. data/test/examples/benchmarks/PolyBench/syr2k_species.c +69 -0
  171. data/test/examples/benchmarks/PolyBench/syrk_species.c +66 -0
  172. data/test/examples/benchmarks/PolyBench/trisolv_species.c +61 -0
  173. data/test/examples/benchmarks/PolyBench/trmm_species.c +61 -0
  174. data/test/examples/chunk/example01_species.c +58 -0
  175. data/test/examples/chunk/example02_species.c +48 -0
  176. data/test/examples/chunk/example03_species.c +63 -0
  177. data/test/examples/chunk/example04_species.c +58 -0
  178. data/test/examples/chunk/example05_species.c +56 -0
  179. data/test/examples/chunk/example06_species.c +49 -0
  180. data/test/examples/chunk/example07_species.c +53 -0
  181. data/test/examples/dependences/example01_species.c +46 -0
  182. data/test/examples/dependences/example02_species.c +44 -0
  183. data/test/examples/dependences/example03_species.c +47 -0
  184. data/test/examples/dependences/example04_species.c +48 -0
  185. data/test/examples/dependences/example05_species.c +46 -0
  186. data/test/examples/element/example01_species.c +50 -0
  187. data/test/examples/element/example02_species.c +50 -0
  188. data/test/examples/element/example03_species.c +62 -0
  189. data/test/examples/element/example04_species.c +53 -0
  190. data/test/examples/element/example05_species.c +59 -0
  191. data/test/examples/element/example06_species.c +50 -0
  192. data/test/examples/element/example07_species.c +58 -0
  193. data/test/examples/element/example08_species.c +49 -0
  194. data/test/examples/element/example09_species.c +52 -0
  195. data/test/examples/element/example10_species.c +54 -0
  196. data/test/examples/element/example11_species.c +51 -0
  197. data/test/examples/element/example12_species.c +60 -0
  198. data/test/examples/element/example13_species.c +77 -0
  199. data/test/examples/neighbourhood/example01_species.c +57 -0
  200. data/test/examples/neighbourhood/example02_species.c +56 -0
  201. data/test/examples/neighbourhood/example03_species.c +83 -0
  202. data/test/examples/neighbourhood/example04_species.c +55 -0
  203. data/test/examples/neighbourhood/example05_species.c +48 -0
  204. data/test/examples/shared/example01_species.c +49 -0
  205. data/test/examples/shared/example02_species.c +55 -0
  206. data/test/examples/shared/example03_species.c +59 -0
  207. data/test/examples/shared/example04_species.c +56 -0
  208. data/test/examples/shared/example05_species.c +52 -0
  209. metadata +193 -73
  210. data/examples/benchmarks/overview.txt +0 -38
  211. data/lib/castaddon/node.rb +0 -753
@@ -0,0 +1,228 @@
1
+ //
2
+ // This file is part of the Bones source-to-source compiler examples. This C-code
3
+ // demonstrates the use of Bones for an example application: 'Hotspot', taken from
4
+ // the Rodinia benchmark suite. For more information on the application or on Bones
5
+ // please use the contact information below.
6
+ //
7
+ // == More information on Hotspot
8
+ // Article............http://dx.doi.org/10.1109/TVLSI.2006.876103
9
+ // Original code......https://www.cs.virginia.edu/~skadron/wiki/rodinia/
10
+ //
11
+ // == More information on Bones
12
+ // Contact............Cedric Nugteren <c.nugteren@tue.nl>
13
+ // Web address........http://parse.ele.tue.nl/bones/
14
+ //
15
+ // == File information
16
+ // Filename...........applications/hotspot.c
17
+ // Authors............Cedric Nugteren
18
+ // Last modified on...10-Aug-2012
19
+ //
20
+
21
+ //########################################################################
22
+ //### Includes
23
+ //########################################################################
24
+
25
+ #include <stdio.h>
26
+ #include <stdlib.h>
27
+ #include <math.h>
28
+
29
+ //########################################################################
30
+ //### Input parameters
31
+ //########################################################################
32
+
33
+ #define GRID_ROWS 64 // Number of rows in the grid (positive integer)
34
+ #define GRID_COLS 64 // Number of columns in the grid (positive integer)
35
+ #define SIM_TIME 2 // Number of iterations
36
+ #define TEMPERATURE_FILE "data/hotspot_temperature_64.txt" // Name of the file containing the initial temperature values of each cell
37
+ #define POWER_FILE "data/hotspot_power_64.txt" // Name of the file containing the dissipated power values of each cell
38
+
39
+ //########################################################################
40
+ //### Defines
41
+ //########################################################################
42
+
43
+ #define STRING_SIZE 256 // Length of the strings in the temperature and power files
44
+ #define MAX_PD (3.0e6) // Maximum power density possible (say 300W for a 10mm x 10mm chip)
45
+ #define PRECISION 0.001 // Required precision in degrees
46
+ #define SPEC_HEAT_SI 1.75e6 //
47
+ #define K_SI 100 //
48
+ #define FACTOR_CHIP 0.5 // Capacitance fitting factor
49
+ #define T_CHIP 0.0005 // Chip temperature
50
+ #define CHIP_HEIGHT 0.016 // Chip height
51
+ #define CHIP_WIDTH 0.016 // Chip width
52
+ #define AMB_TEMP 80.0 // Ambient temperature, assuming no package at all
53
+
54
+ //########################################################################
55
+ //### Forward declarations
56
+ //########################################################################
57
+
58
+ void read_input(double* array, const char* filename);
59
+
60
+ //########################################################################
61
+ //### Start of the main function
62
+ //########################################################################
63
+
64
+ int main(void) {
65
+
66
+ // Declare the loop iterators
67
+ int r,c,iter;
68
+
69
+ // Declare other/helper variables
70
+ int index;
71
+ double delta;
72
+ int row = GRID_ROWS;
73
+ int col = GRID_COLS;
74
+ double grid_height = CHIP_HEIGHT/row;
75
+ double grid_width = CHIP_WIDTH/col;
76
+
77
+ // Set domain variables
78
+ double cap = FACTOR_CHIP*SPEC_HEAT_SI*T_CHIP*grid_width*grid_height;
79
+ double Rx = grid_width / (2.0*K_SI*T_CHIP*grid_height);
80
+ double Ry = grid_height / (2.0*K_SI*T_CHIP*grid_width);
81
+ double Rz = T_CHIP / (K_SI*grid_height*grid_width);
82
+ double max_slope = MAX_PD / (FACTOR_CHIP*T_CHIP*SPEC_HEAT_SI);
83
+ double step = PRECISION / max_slope;
84
+
85
+ // Initialising memory
86
+ printf("\n[hotspot] Initialising memory"); fflush(stdout);
87
+ double* temperature = (double*) calloc(row*col, sizeof(double));
88
+ double* power = (double*) calloc(row*col, sizeof(double));
89
+ double* result = (double*) calloc(row*col, sizeof(double));
90
+
91
+ // Read initial temperature and power arrays
92
+ printf("\n[hotspot] Populating memory"); fflush(stdout);
93
+ read_input(temperature, TEMPERATURE_FILE);
94
+ read_input(power, POWER_FILE);
95
+
96
+ // Perform the computation a given number of times
97
+ printf("\n[hotspot] Performing the computation %d times",SIM_TIME); fflush(stdout);
98
+ #pragma scop
99
+ for (iter=0; iter<SIM_TIME; iter++) {
100
+
101
+ // Transient solver driver routine: convert the heat transfer differential equations to difference equations
102
+ // and solve the difference equations by iterating
103
+ for (r=0; r<row; r++) {
104
+ for (c=0; c<col; c++) {
105
+
106
+ // Corner 1
107
+ if ( (r == 0) && (c == 0) ) {
108
+ delta = (step / cap) * (power[0] +
109
+ (temperature[1] - temperature[0]) / Rx +
110
+ (temperature[col] - temperature[0]) / Ry +
111
+ (AMB_TEMP - temperature[0]) / Rz);
112
+ }
113
+ // Corner 2
114
+ else if ((r == 0) && (c == col-1)) {
115
+ delta = (step / cap) * (power[c] +
116
+ (temperature[c-1] - temperature[c]) / Rx +
117
+ (temperature[c+col] - temperature[c]) / Ry +
118
+ (AMB_TEMP - temperature[c]) / Rz);
119
+ }
120
+ // Corner 3
121
+ else if ((r == row-1) && (c == col-1)) {
122
+ delta = (step / cap) * (power[r*col+c] +
123
+ (temperature[r*col+c-1] - temperature[r*col+c]) / Rx +
124
+ (temperature[(r-1)*col+c] - temperature[r*col+c]) / Ry +
125
+ (AMB_TEMP - temperature[r*col+c]) / Rz);
126
+ }
127
+ // Corner 4
128
+ else if ((r == row-1) && (c == 0)) {
129
+ delta = (step / cap) * (power[r*col] +
130
+ (temperature[r*col+1] - temperature[r*col]) / Rx +
131
+ (temperature[(r-1)*col] - temperature[r*col]) / Ry +
132
+ (AMB_TEMP - temperature[r*col]) / Rz);
133
+ }
134
+ // Edge 1
135
+ else if (r == 0) {
136
+ delta = (step / cap) * (power[c] +
137
+ (temperature[c+1] + temperature[c-1] - 2.0*temperature[c]) / Rx +
138
+ (temperature[col+c] - temperature[c]) / Ry +
139
+ (AMB_TEMP - temperature[c]) / Rz);
140
+ }
141
+ // Edge 2
142
+ else if (c == col-1) {
143
+ delta = (step / cap) * (power[r*col+c] +
144
+ (temperature[(r+1)*col+c] + temperature[(r-1)*col+c] - 2.0*temperature[r*col+c]) / Ry +
145
+ (temperature[r*col+c-1] - temperature[r*col+c]) / Rx +
146
+ (AMB_TEMP - temperature[r*col+c]) / Rz);
147
+ }
148
+ // Edge 3
149
+ else if (r == row-1) {
150
+ delta = (step / cap) * (power[r*col+c] +
151
+ (temperature[r*col+c+1] + temperature[r*col+c-1] - 2.0*temperature[r*col+c]) / Rx +
152
+ (temperature[(r-1)*col+c] - temperature[r*col+c]) / Ry +
153
+ (AMB_TEMP - temperature[r*col+c]) / Rz);
154
+ }
155
+ // Edge 4
156
+ else if (c == 0) {
157
+ delta = (step / cap) * (power[r*col] +
158
+ (temperature[(r+1)*col] + temperature[(r-1)*col] - 2.0*temperature[r*col]) / Ry +
159
+ (temperature[r*col+1] - temperature[r*col]) / Rx +
160
+ (AMB_TEMP - temperature[r*col]) / Rz);
161
+ }
162
+ // Inside the chip
163
+ else {
164
+ delta = (step / cap) * (power[r*col+c] +
165
+ (temperature[(r+1)*col+c] + temperature[(r-1)*col+c] - 2.0*temperature[r*col+c]) / Ry +
166
+ (temperature[r*col+c+1] + temperature[r*col+c-1] - 2.0*temperature[r*col+c]) / Rx +
167
+ (AMB_TEMP - temperature[r*col+c]) / Rz);
168
+ }
169
+
170
+ // Update the temperatures
171
+ result[r*col+c] = temperature[r*col+c] + delta;
172
+ }
173
+ }
174
+
175
+ // Copy the result as the new temperatures
176
+ for (r=0; r<row; r++) {
177
+ for (c=0; c<col; c++) {
178
+ temperature[r*col+c] = result[r*col+c];
179
+ }
180
+ }
181
+ }
182
+ #pragma endscop
183
+
184
+ // Print the values matrix
185
+ printf("\n[hotspot] Printing the final temperatures:\n\n"); fflush(stdout);
186
+ for (r=0; r<row; r++) {
187
+ for (c=0; c<col; c++) {
188
+ index = r*col+c;
189
+ printf("%6d: %.3lf ", index, temperature[index]);
190
+ }
191
+ printf("\n");
192
+ }
193
+
194
+ // Clean-up and exit
195
+ printf("\n[hotspot] Completed\n\n"); fflush(stdout);
196
+ free(temperature); free(power); free(result);
197
+ fflush(stdout);
198
+ return 0;
199
+ }
200
+
201
+ //########################################################################
202
+ //### Function to read an input file (power or temperature values)
203
+ //########################################################################
204
+
205
+ void read_input(double* array, const char* filename) {
206
+ int r, c;
207
+ char string[STRING_SIZE];
208
+ double value;
209
+
210
+ // Open the file
211
+ FILE* file_pointer = fopen(filename, "r");
212
+ if (!file_pointer) { printf("\n[hotspot] Error: file '%s' could not be opened for reading\n\n", filename); fflush(stdout); exit(1); }
213
+
214
+ // Process the file
215
+ for (r=0; r<GRID_ROWS; r++) {
216
+ for (c=0; c<GRID_COLS; c++) {
217
+ fgets(string, STRING_SIZE, file_pointer);
218
+ if (feof(file_pointer)) { printf("\n[hotspot] Error: not enough lines in file '%s'\n\n", filename); fflush(stdout); exit(1); }
219
+ if ((sscanf(string, "%lf", &value) != 1) ) { printf("\n[hotspot] Error: invalid file format for '%s'\n\n", filename); fflush(stdout); exit(1); }
220
+ array[r*GRID_COLS+c] = value;
221
+ }
222
+ }
223
+
224
+ // Clean-up and return
225
+ fclose(file_pointer);
226
+ }
227
+
228
+ //########################################################################
@@ -0,0 +1,164 @@
1
+ //
2
+ // This file is part of the Bones source-to-source compiler examples. This C-code
3
+ // demonstrates the use of Bones for an example application: 'K-means clustering',
4
+ // as also available in the Rodinia benchmark suite. For more information on the
5
+ // application or on Bones please use the contact information below.
6
+ //
7
+ // == This implementation of K-means clustering is inspired by:
8
+ // Author.............Roger Zhang
9
+ // Web address........http://cs.smu.ca/~r_zhang/code/kmeans.c
10
+ //
11
+ // == More information on Bones
12
+ // Contact............Cedric Nugteren <c.nugteren@tue.nl>
13
+ // Web address........http://parse.ele.tue.nl/bones/
14
+ //
15
+ // == File information
16
+ // Filename...........applications/kmeans.c
17
+ // Authors............Cedric Nugteren
18
+ // Last modified on...10-Aug-2012
19
+ //
20
+
21
+ //########################################################################
22
+ //### Includes
23
+ //########################################################################
24
+
25
+ #include <stdio.h>
26
+ #include <math.h>
27
+ #include <float.h>
28
+
29
+ //########################################################################
30
+ //### Defines
31
+ //########################################################################
32
+
33
+ #define SIZE 512
34
+ #define NUM_CLUSTERS 20
35
+ #define DIMENSIONS 2
36
+ #define THRESHOLD 0.0001
37
+
38
+ //########################################################################
39
+ //### Start of the main function
40
+ //########################################################################
41
+ int main(void) {
42
+
43
+ // Declare the loop iterators
44
+ int i,j,k;
45
+
46
+ // Declare the error variables
47
+ double error = DBL_MAX;
48
+ double old_error;
49
+ int iterations = 0;
50
+
51
+ // Declare the distance variables and arrays
52
+ double distance[1];
53
+ double min_distance[1];
54
+ double distances[SIZE];
55
+
56
+ // Initialising memory
57
+ printf("\n[k-means] Initialising memory"); fflush(stdout);
58
+ double input[SIZE][DIMENSIONS];
59
+ double centroids[NUM_CLUSTERS][DIMENSIONS];
60
+ double centroids_temp[NUM_CLUSTERS][DIMENSIONS];
61
+ int output[SIZE];
62
+ int counts[NUM_CLUSTERS];
63
+
64
+ // Set the input data
65
+ printf("\n[k-means] Populating memory"); fflush(stdout);
66
+ for (i=0; i<SIZE; i++) {
67
+ input[i][0] = (i/16);
68
+ input[i][1] = i%4;
69
+ }
70
+
71
+ // Pick k initial centroids
72
+ printf("\n[k-means] Setting 'k' initial centroids"); fflush(stdout);
73
+ for (k=0; k<NUM_CLUSTERS; k++) {
74
+ for (j=0; j<DIMENSIONS; j++) {
75
+ centroids[k][j] = input[(SIZE/NUM_CLUSTERS)*k][j];
76
+ }
77
+ }
78
+
79
+ // Perform the k-means clustering algorithm, end when the error is not becoming smaller
80
+ printf("\n[k-means] Perform the clustering algorithm"); fflush(stdout);
81
+ do {
82
+ #pragma scop
83
+
84
+ // Save the error from the last step
85
+ old_error = error;
86
+ error = 0;
87
+
88
+ // Clear old counts and temporary centroids
89
+ for (k=0; k<NUM_CLUSTERS; k++) {
90
+ counts[k] = 0;
91
+ for (j=0; j<DIMENSIONS; j++) {
92
+ centroids_temp[k][j] = 0;
93
+ }
94
+ }
95
+
96
+ // Iterate over all data points
97
+ for (i=0; i<SIZE; i++) {
98
+
99
+ // Find the closest cluster
100
+ min_distance[0] = DBL_MAX;
101
+ for (k=0; k<NUM_CLUSTERS; k++) {
102
+ distance[0] = 0;
103
+ for (j=0; j<DIMENSIONS; j++) {
104
+ distance[0] += pow(input[i][j]-centroids[k][j],2);
105
+ }
106
+ if (distance[0] < min_distance[0]) {
107
+ output[i] = k;
108
+ min_distance[0] = distance[0];
109
+ }
110
+ }
111
+
112
+ // Update the size and temporary centroid of the destination cluster
113
+ for (j=0; j<DIMENSIONS; j++) {
114
+ centroids_temp[output[i]][j] += input[i][j];
115
+ }
116
+ counts[output[i]] += 1;
117
+
118
+ // Store the resulting distance
119
+ distances[i] = min_distance[0];
120
+ }
121
+
122
+ // Update the standard error
123
+ for (i=0; i<SIZE; i++) {
124
+ error += distances[i];
125
+ }
126
+
127
+ // Update all centroids
128
+ for (k=0; k<NUM_CLUSTERS; k++) {
129
+ for (j=0; j<DIMENSIONS; j++) {
130
+ if (counts[k]) {
131
+ centroids[k][j] = centroids_temp[k][j] / counts[k];
132
+ }
133
+ else {
134
+ centroids[k][j] = centroids_temp[k][j];
135
+ }
136
+ }
137
+ }
138
+
139
+ // Go to the next iteration
140
+ iterations += 1;
141
+
142
+ #pragma endscop
143
+ } while (fabs(error-old_error) > THRESHOLD);
144
+
145
+ // Print the results
146
+ printf("\n[k-means] Algorithm finished in %d iterations with an error of %.3lf", iterations, error); fflush(stdout);
147
+ printf("\n[k-means] Printing the results: \n\n"); fflush(stdout);
148
+ for (k=0; k<NUM_CLUSTERS; k++) {
149
+ printf("Cluster %2i: ", k);
150
+ for (i=0; i<SIZE; i++) {
151
+ if (output[i] == k) {
152
+ printf("%3i ", i);
153
+ }
154
+ }
155
+ printf("\n");
156
+ }
157
+
158
+ // Clean-up and exit the function
159
+ printf("\n[k-means] Completed\n\n"); fflush(stdout);
160
+ fflush(stdout);
161
+ return 0;
162
+ }
163
+
164
+ //########################################################################
@@ -0,0 +1,188 @@
1
+ //
2
+ // This file is part of the Bones source-to-source compiler examples. This C-code
3
+ // demonstrates the use of Bones for an example application: 'Speckle Reducing
4
+ // Anisotropic Diffusion' or 'SRAD', taken from the Rodinia benchmark suite. For
5
+ // more information on the application or on Bones please use the contact infor-
6
+ // mation below.
7
+ //
8
+ // == More information on SRAD (Speckle Reducing Anisotropic Diffusion):
9
+ // Article............http://dx.doi.org/10.1109/TIP.2002.804276
10
+ // Original code......https://www.cs.virginia.edu/~skadron/wiki/rodinia/
11
+ //
12
+ // == More information on Bones
13
+ // Contact............Cedric Nugteren <c.nugteren@tue.nl>
14
+ // Web address........http://parse.ele.tue.nl/bones/
15
+ //
16
+ // == File information
17
+ // Filename...........applications/srad.c
18
+ // Authors............Cedric Nugteren
19
+ // Original authors...Rob Janiczek, Drew Gilliam, Lukasz Szafaryn
20
+ // Last modified on...10-Aug-2012
21
+ //
22
+
23
+ //########################################################################
24
+ //### Includes
25
+ //########################################################################
26
+
27
+ #include <stdio.h>
28
+ #include <stdlib.h>
29
+ #include <math.h>
30
+
31
+ //########################################################################
32
+ //### Defines
33
+ //########################################################################
34
+
35
+ #define ROWS 128 // Number of ROWS in the domain
36
+ #define COLS 128 // Number of COLS in the domain
37
+ #define R1 0 // y1 position of the speckle
38
+ #define R2 31 // y2 position of the speckle
39
+ #define C1 0 // x1 position of the speckle
40
+ #define C2 31 // x2 position of the speckle
41
+ #define LAMBDA 0.5 // Lambda value
42
+ #define NITER 2 // Number of iterations
43
+
44
+ //########################################################################
45
+ //### Start of the main function
46
+ //########################################################################
47
+
48
+ int main(void) {
49
+
50
+ // Declare the loop iterators
51
+ int i,j,iter;
52
+
53
+ // Declare domain variables
54
+ float mean_roi, var_roi;
55
+ float q0s, qs;
56
+ float divergence;
57
+ float cN, cS, cW, cE;
58
+ float G2, L;
59
+
60
+ // Declare other/helper variables
61
+ int index;
62
+ float temp_value;
63
+ float sum1, sum2;
64
+ float current_value;
65
+ float temp_a, temp_b;
66
+
67
+ // Check for valid row and column sizes
68
+ if ((ROWS%16 != 0 ) || (COLS%16 != 0)) {
69
+ printf("[srad] Error: the number of rows and columns must be multiples of 16\n");
70
+ fflush(stdout); exit(1);
71
+ }
72
+
73
+ // Initialising memory
74
+ printf("\n[srad] Initialising memory"); fflush(stdout);
75
+ int size = COLS*ROWS;
76
+ int size_roi = (R2-R1+1)*(C2-C1+1);
77
+ float* values = (float*) malloc(sizeof(float)*size);
78
+ float* coefficent = (float*) malloc(sizeof(float)*size);
79
+ float* dN = (float*) malloc(sizeof(float)*size);
80
+ float* dS = (float*) malloc(sizeof(float)*size);
81
+ float* dW = (float*) malloc(sizeof(float)*size);
82
+ float* dE = (float*) malloc(sizeof(float)*size);
83
+
84
+ // Populate the input matrix
85
+ printf("\n[srad] Populating the input matrix with random values"); fflush(stdout);
86
+ for (i=0; i<ROWS; i++) {
87
+ for (j=0; j<COLS; j++) {
88
+ temp_value = rand()/(float)RAND_MAX;
89
+ values[i*COLS+j] = (float)exp(temp_value);
90
+ }
91
+ }
92
+
93
+ // Perform the computation a given number of times
94
+ printf("\n[srad] Performing the computation %d times",NITER); fflush(stdout);
95
+ for (iter=0; iter<NITER; iter++) {
96
+
97
+ // Compute the mean, the variance and the speckle scale function (q0s) of the region of interest (ROI)
98
+ sum1 = 0;
99
+ sum2 = 0;
100
+ for (i=R1; i<=R2; i++) {
101
+ for (j=C1; j<=C2; j++) {
102
+ temp_value = values[i*COLS+j];
103
+ sum1 += temp_value;
104
+ sum2 += temp_value*temp_value;
105
+ }
106
+ }
107
+ mean_roi = sum1/size_roi;
108
+ var_roi = (sum2/size_roi) - mean_roi*mean_roi;
109
+ q0s = var_roi / (mean_roi*mean_roi);
110
+
111
+ // Iterate over the full image and compute
112
+ #pragma scop
113
+ for (i=0; i<ROWS; i++) {
114
+ for (j=0; j<COLS; j++) {
115
+ index = i*COLS+j;
116
+ current_value = values[index];
117
+
118
+ // Compute the directional derivates (N,S,W,E)
119
+ if (i==0) { dN[index] = 0; }
120
+ else { dN[index] = values[(i-1)*COLS + j ] - current_value; }
121
+ if (i==ROWS-1) { dS[index] = 0; }
122
+ else { dS[index] = values[(i+1)*COLS + j ] - current_value; }
123
+ if (j==0) { dW[index] = 0; }
124
+ else { dW[index] = values[i *COLS + (j-1)] - current_value; }
125
+ if (j==COLS-1) { dE[index] = 0; }
126
+ else { dE[index] = values[i *COLS + (j+1)] - current_value; }
127
+
128
+ // Compute the instantaneous coefficient of variation (qs) (equation 35)
129
+ G2 = (dN[index]*dN[index] + dS[index]*dS[index] + dW[index]*dW[index] + dE[index]*dE[index]) / (current_value*current_value);
130
+ L = (dN[index] + dS[index] + dW[index] + dE[index] ) / (current_value );
131
+ temp_a = (0.5*G2)-((1.0/16.0)*(L*L));
132
+ temp_b = 1+(0.25*L);
133
+ qs = temp_a/(temp_b*temp_b);
134
+
135
+ // Set the diffusion coefficent (equation 33)
136
+ coefficent[index] = 1.0 / (1.0+( (qs-q0s)/(q0s*(1+q0s)) ));
137
+
138
+ // Saturate the diffusion coefficent
139
+ if (coefficent[index] < 0) {
140
+ coefficent[index] = 0;
141
+ }
142
+ else if (coefficent[index] > 1) {
143
+ coefficent[index] = 1;
144
+ }
145
+ }
146
+ }
147
+
148
+ // Iterate over the full image again and compute the final values
149
+ for (i=0; i<ROWS; i++) {
150
+ for (j=0; j<COLS; j++) {
151
+ index = i*COLS+j;
152
+
153
+ // Calculate the diffusion coefficent
154
+ cN = coefficent[i *COLS+j ];
155
+ if (i==ROWS-1) { cS = 0; }
156
+ else { cS = coefficent[(i+1)*COLS+j ]; }
157
+ cW = coefficent[i *COLS+j ];
158
+ if (j==COLS-1) { cE = 0; }
159
+ else { cE = coefficent[i *COLS+(j+1)]; }
160
+
161
+ // Calculate the divergence (equation 58)
162
+ divergence = cN*dN[index] + cS*dS[index] + cW*dW[index] + cE*dE[index];
163
+
164
+ // Update the image accordingly (equation 61)
165
+ values[index] = values[index] + 0.25*LAMBDA*divergence;
166
+ }
167
+ }
168
+ #pragma endscop
169
+ }
170
+
171
+ // Print the values matrix
172
+ printf("\n[srad] Printing the output matrix:\n\n"); fflush(stdout);
173
+ for (i=0; i<ROWS; i++) {
174
+ for (j=0; j<COLS; j++) {
175
+ printf("%.5f ", values[i*COLS+j]);
176
+ }
177
+ printf("\n");
178
+ }
179
+
180
+ // Clean-up and exit
181
+ printf("\n[srad] Completed\n\n"); fflush(stdout);
182
+ free(values); free(coefficent);
183
+ free(dN); free(dS); free(dW); free(dE);
184
+ fflush(stdout);
185
+ return 0;
186
+ }
187
+
188
+ //########################################################################