bones-compiler 1.1.0 → 1.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/CHANGELOG +37 -0
- data/LICENSE +1 -1
- data/README.rdoc +95 -70
- data/Rakefile +78 -3
- data/VERSION +1 -1
- data/bin/adarwin +17 -0
- data/examples/benchmarks/PolyBench/2mm.c +104 -0
- data/examples/benchmarks/{3mm.c → PolyBench/3mm.c} +5 -2
- data/examples/benchmarks/{adi.c → PolyBench/adi.c} +6 -3
- data/examples/benchmarks/{atax.c → PolyBench/atax.c} +5 -2
- data/examples/benchmarks/{bicg.c → PolyBench/bicg.c} +5 -2
- data/examples/benchmarks/{cholesky.c → PolyBench/cholesky.c} +3 -0
- data/examples/benchmarks/{common.h → PolyBench/common.h} +2 -2
- data/examples/benchmarks/{correlation.c → PolyBench/correlation.c} +16 -7
- data/examples/benchmarks/{covariance.c → PolyBench/covariance.c} +7 -2
- data/examples/benchmarks/{doitgen.c → PolyBench/doitgen.c} +5 -2
- data/examples/benchmarks/{durbin.c → PolyBench/durbin.c} +3 -0
- data/examples/benchmarks/{dynprog.c → PolyBench/dynprog.c} +3 -0
- data/examples/benchmarks/{fdtd-2d-apml.c → PolyBench/fdtd-2d-apml.c} +3 -0
- data/examples/benchmarks/{fdtd-2d.c → PolyBench/fdtd-2d.c} +5 -2
- data/examples/benchmarks/{floyd-warshall.c → PolyBench/floyd-warshall.c} +3 -0
- data/examples/benchmarks/{gemm.c → PolyBench/gemm.c} +5 -2
- data/examples/benchmarks/{gemver.c → PolyBench/gemver.c} +5 -2
- data/examples/benchmarks/{gesummv.c → PolyBench/gesummv.c} +5 -2
- data/examples/benchmarks/{gramschmidt.c → PolyBench/gramschmidt.c} +3 -0
- data/examples/benchmarks/{jacobi-1d-imper.c → PolyBench/jacobi-1d-imper.c} +10 -2
- data/examples/benchmarks/{jacobi-2d-imper.c → PolyBench/jacobi-2d-imper.c} +8 -3
- data/examples/benchmarks/{lu.c → PolyBench/lu.c} +3 -0
- data/examples/benchmarks/{ludcmp.c → PolyBench/ludcmp.c} +3 -0
- data/examples/benchmarks/{mvt.c → PolyBench/mvt.c} +6 -2
- data/examples/benchmarks/{reg_detect.c → PolyBench/reg_detect.c} +3 -0
- data/examples/benchmarks/{seidel-2d.c → PolyBench/seidel-2d.c} +3 -0
- data/examples/benchmarks/{symm.c → PolyBench/symm.c} +3 -0
- data/examples/benchmarks/{syr2k.c → PolyBench/syr2k.c} +5 -2
- data/examples/benchmarks/{syrk.c → PolyBench/syrk.c} +7 -4
- data/examples/benchmarks/{trisolv.c → PolyBench/trisolv.c} +3 -0
- data/examples/benchmarks/{trmm.c → PolyBench/trmm.c} +3 -0
- data/examples/benchmarks/Rodinia/cfd.c +180 -0
- data/examples/benchmarks/Rodinia/hotspot.c +228 -0
- data/examples/benchmarks/Rodinia/kmeans.c +164 -0
- data/examples/benchmarks/Rodinia/srad.c +188 -0
- data/examples/benchmarks/other/common.h +0 -0
- data/examples/benchmarks/other/dct.c +58 -0
- data/examples/benchmarks/other/mm.c +50 -0
- data/examples/benchmarks/{saxpy.c → other/saxpy.c} +11 -7
- data/examples/chunk/{example1.c → example01.c} +0 -0
- data/examples/chunk/{example2.c → example02.c} +0 -0
- data/examples/chunk/{example3.c → example03.c} +0 -0
- data/examples/chunk/{example4.c → example04.c} +0 -0
- data/examples/chunk/{example5.c → example05.c} +0 -0
- data/examples/chunk/example06.c +45 -0
- data/examples/chunk/example07.c +49 -0
- data/examples/dependences/example01.c +42 -0
- data/examples/dependences/example02.c +40 -0
- data/examples/dependences/example03.c +43 -0
- data/examples/dependences/example04.c +44 -0
- data/examples/dependences/example05.c +42 -0
- data/examples/element/{example1.c → example01.c} +0 -0
- data/examples/element/{example2.c → example02.c} +2 -2
- data/examples/element/{example3.c → example03.c} +0 -0
- data/examples/element/{example4.c → example04.c} +0 -0
- data/examples/element/{example5.c → example05.c} +0 -0
- data/examples/element/{example6.c → example06.c} +0 -0
- data/examples/element/{example7.c → example07.c} +0 -0
- data/examples/element/{example8.c → example08.c} +0 -0
- data/examples/element/{example9.c → example09.c} +0 -0
- data/examples/element/example13.c +73 -0
- data/examples/fusion/example01.c +68 -0
- data/examples/fusion/example02.c +73 -0
- data/examples/fusion/example03.c +72 -0
- data/examples/fusion/example04.c +61 -0
- data/examples/fusion/example05.c +55 -0
- data/examples/neighbourhood/{example1.c → example01.c} +0 -0
- data/examples/neighbourhood/{example2.c → example02.c} +0 -0
- data/examples/neighbourhood/{example3.c → example03.c} +0 -0
- data/examples/neighbourhood/{example4.c → example04.c} +0 -0
- data/examples/neighbourhood/example05.c +44 -0
- data/examples/shared/{example1.c → example01.c} +0 -0
- data/examples/shared/{example2.c → example02.c} +0 -0
- data/examples/shared/{example3.c → example03.c} +0 -0
- data/examples/shared/{example4.c → example04.c} +0 -0
- data/examples/shared/{example5.c → example05.c} +0 -0
- data/lib/adarwin.rb +62 -0
- data/lib/adarwin/dependences.rb +268 -0
- data/lib/adarwin/engine.rb +277 -0
- data/lib/adarwin/fusion.rb +174 -0
- data/lib/adarwin/interval.rb +57 -0
- data/lib/adarwin/memorycopies.rb +153 -0
- data/lib/adarwin/nest.rb +225 -0
- data/lib/adarwin/preprocessor.rb +76 -0
- data/lib/adarwin/reference.rb +261 -0
- data/lib/bones.rb +4 -55
- data/lib/bones/algorithm.rb +77 -40
- data/lib/bones/copy.rb +26 -0
- data/lib/bones/engine.rb +147 -31
- data/lib/bones/preprocessor.rb +92 -12
- data/lib/bones/species.rb +4 -3
- data/lib/bones/structure.rb +14 -4
- data/lib/castaddon.rb +11 -6
- data/lib/castaddon/node_adarwin.rb +245 -0
- data/lib/castaddon/node_bones.rb +316 -0
- data/lib/castaddon/node_common.rb +289 -0
- data/lib/castaddon/transformations.rb +236 -0
- data/lib/common.rb +216 -0
- data/skeletons/CPU-C/common/header.c +3 -0
- data/skeletons/CPU-C/common/mem_global.c +0 -0
- data/skeletons/CPU-C/common/timer_2_start.c +11 -13
- data/skeletons/CPU-C/common/timer_2_stop.c +1 -1
- data/skeletons/CPU-C/common/timer_globals.c +29 -0
- data/skeletons/CPU-OPENCL-INTEL/common/globals.c +1 -1
- data/skeletons/CPU-OPENCL-INTEL/common/header.c +3 -0
- data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_D2H.c +7 -2
- data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_H2D.c +4 -2
- data/skeletons/CPU-OPENCL-INTEL/common/mem_global.c +0 -0
- data/skeletons/CPU-OPENCL-INTEL/common/mem_prologue.c +6 -3
- data/skeletons/CPU-OPENCL-INTEL/common/timer_2_stop.c +1 -1
- data/skeletons/CPU-OPENCL-INTEL/common/timer_globals.c +24 -0
- data/skeletons/CPU-OPENMP/common/globals.c +1 -0
- data/skeletons/CPU-OPENMP/common/header.c +3 -0
- data/skeletons/CPU-OPENMP/common/mem_global.c +0 -0
- data/skeletons/CPU-OPENMP/common/timer_1_start.c +0 -12
- data/skeletons/CPU-OPENMP/common/timer_2_stop.c +1 -1
- data/skeletons/CPU-OPENMP/common/timer_globals.c +33 -0
- data/skeletons/GPU-CUDA/common/globals.c +27 -3
- data/skeletons/GPU-CUDA/common/header.c +2 -0
- data/skeletons/GPU-CUDA/common/mem_async_alloc.c +6 -0
- data/skeletons/GPU-CUDA/common/mem_async_copyin.c +6 -0
- data/skeletons/GPU-CUDA/common/mem_async_copyout.c +6 -0
- data/skeletons/GPU-CUDA/common/mem_async_free.c +6 -0
- data/skeletons/GPU-CUDA/common/mem_copy_D2H.c +2 -1
- data/skeletons/GPU-CUDA/common/mem_copy_H2D.c +2 -1
- data/skeletons/GPU-CUDA/common/mem_global.c +1 -0
- data/skeletons/GPU-CUDA/common/mem_prologue.c +1 -2
- data/skeletons/GPU-CUDA/common/scheduler.c +86 -0
- data/skeletons/GPU-CUDA/common/timer_2_start.c +2 -4
- data/skeletons/GPU-CUDA/common/timer_2_stop.c +3 -5
- data/skeletons/GPU-CUDA/common/timer_globals.c +26 -0
- data/skeletons/GPU-CUDA/kernel/2xN-N-chunk-1-N-to-D-element.kernel.cu +5 -7
- data/skeletons/GPU-CUDA/kernel/N-N-chunk-1-N-to-D-element.kernel.cu +4 -6
- data/skeletons/GPU-CUDA/kernel/default.host.c +1 -1
- data/skeletons/GPU-CUDA/kernel/default.kernel.cu +6 -8
- data/skeletons/GPU-CUDA/skeletons.txt +6 -5
- data/{examples/benchmarks/2mm.c → test/examples/benchmarks/PolyBench/2mm_species.c} +19 -15
- data/test/examples/benchmarks/PolyBench/3mm_species.c +82 -0
- data/test/examples/benchmarks/PolyBench/adi_species.c +89 -0
- data/test/examples/benchmarks/PolyBench/atax_species.c +69 -0
- data/test/examples/benchmarks/PolyBench/bicg_species.c +71 -0
- data/test/examples/benchmarks/PolyBench/cholesky_species.c +68 -0
- data/test/examples/benchmarks/PolyBench/correlation_species.c +97 -0
- data/test/examples/benchmarks/PolyBench/covariance_species.c +78 -0
- data/test/examples/benchmarks/PolyBench/doitgen_species.c +67 -0
- data/test/examples/benchmarks/PolyBench/durbin_species.c +80 -0
- data/test/examples/benchmarks/PolyBench/dynprog_species.c +71 -0
- data/test/examples/benchmarks/PolyBench/fdtd-2d-apml_species.c +112 -0
- data/test/examples/benchmarks/PolyBench/fdtd-2d_species.c +78 -0
- data/test/examples/benchmarks/PolyBench/floyd-warshall_species.c +54 -0
- data/test/examples/benchmarks/PolyBench/gemm_species.c +73 -0
- data/test/examples/benchmarks/PolyBench/gemver_species.c +93 -0
- data/test/examples/benchmarks/PolyBench/gesummv_species.c +68 -0
- data/test/examples/benchmarks/PolyBench/gramschmidt_species.c +78 -0
- data/test/examples/benchmarks/PolyBench/jacobi-1d-imper_species.c +59 -0
- data/test/examples/benchmarks/PolyBench/jacobi-2d-imper_species.c +65 -0
- data/test/examples/benchmarks/PolyBench/lu_species.c +57 -0
- data/test/examples/benchmarks/PolyBench/ludcmp_species.c +89 -0
- data/test/examples/benchmarks/PolyBench/mvt_species.c +69 -0
- data/test/examples/benchmarks/PolyBench/reg_detect_species.c +86 -0
- data/test/examples/benchmarks/PolyBench/seidel-2d_species.c +53 -0
- data/test/examples/benchmarks/PolyBench/symm_species.c +74 -0
- data/test/examples/benchmarks/PolyBench/syr2k_species.c +69 -0
- data/test/examples/benchmarks/PolyBench/syrk_species.c +66 -0
- data/test/examples/benchmarks/PolyBench/trisolv_species.c +61 -0
- data/test/examples/benchmarks/PolyBench/trmm_species.c +61 -0
- data/test/examples/chunk/example01_species.c +58 -0
- data/test/examples/chunk/example02_species.c +48 -0
- data/test/examples/chunk/example03_species.c +63 -0
- data/test/examples/chunk/example04_species.c +58 -0
- data/test/examples/chunk/example05_species.c +56 -0
- data/test/examples/chunk/example06_species.c +49 -0
- data/test/examples/chunk/example07_species.c +53 -0
- data/test/examples/dependences/example01_species.c +46 -0
- data/test/examples/dependences/example02_species.c +44 -0
- data/test/examples/dependences/example03_species.c +47 -0
- data/test/examples/dependences/example04_species.c +48 -0
- data/test/examples/dependences/example05_species.c +46 -0
- data/test/examples/element/example01_species.c +50 -0
- data/test/examples/element/example02_species.c +50 -0
- data/test/examples/element/example03_species.c +62 -0
- data/test/examples/element/example04_species.c +53 -0
- data/test/examples/element/example05_species.c +59 -0
- data/test/examples/element/example06_species.c +50 -0
- data/test/examples/element/example07_species.c +58 -0
- data/test/examples/element/example08_species.c +49 -0
- data/test/examples/element/example09_species.c +52 -0
- data/test/examples/element/example10_species.c +54 -0
- data/test/examples/element/example11_species.c +51 -0
- data/test/examples/element/example12_species.c +60 -0
- data/test/examples/element/example13_species.c +77 -0
- data/test/examples/neighbourhood/example01_species.c +57 -0
- data/test/examples/neighbourhood/example02_species.c +56 -0
- data/test/examples/neighbourhood/example03_species.c +83 -0
- data/test/examples/neighbourhood/example04_species.c +55 -0
- data/test/examples/neighbourhood/example05_species.c +48 -0
- data/test/examples/shared/example01_species.c +49 -0
- data/test/examples/shared/example02_species.c +55 -0
- data/test/examples/shared/example03_species.c +59 -0
- data/test/examples/shared/example04_species.c +56 -0
- data/test/examples/shared/example05_species.c +52 -0
- metadata +193 -73
- data/examples/benchmarks/overview.txt +0 -38
- data/lib/castaddon/node.rb +0 -753
@@ -0,0 +1,228 @@
|
|
1
|
+
//
|
2
|
+
// This file is part of the Bones source-to-source compiler examples. This C-code
|
3
|
+
// demonstrates the use of Bones for an example application: 'Hotspot', taken from
|
4
|
+
// the Rodinia benchmark suite. For more information on the application or on Bones
|
5
|
+
// please use the contact information below.
|
6
|
+
//
|
7
|
+
// == More information on Hotspot
|
8
|
+
// Article............http://dx.doi.org/10.1109/TVLSI.2006.876103
|
9
|
+
// Original code......https://www.cs.virginia.edu/~skadron/wiki/rodinia/
|
10
|
+
//
|
11
|
+
// == More information on Bones
|
12
|
+
// Contact............Cedric Nugteren <c.nugteren@tue.nl>
|
13
|
+
// Web address........http://parse.ele.tue.nl/bones/
|
14
|
+
//
|
15
|
+
// == File information
|
16
|
+
// Filename...........applications/hotspot.c
|
17
|
+
// Authors............Cedric Nugteren
|
18
|
+
// Last modified on...10-Aug-2012
|
19
|
+
//
|
20
|
+
|
21
|
+
//########################################################################
|
22
|
+
//### Includes
|
23
|
+
//########################################################################
|
24
|
+
|
25
|
+
#include <stdio.h>
|
26
|
+
#include <stdlib.h>
|
27
|
+
#include <math.h>
|
28
|
+
|
29
|
+
//########################################################################
|
30
|
+
//### Input parameters
|
31
|
+
//########################################################################
|
32
|
+
|
33
|
+
#define GRID_ROWS 64 // Number of rows in the grid (positive integer)
|
34
|
+
#define GRID_COLS 64 // Number of columns in the grid (positive integer)
|
35
|
+
#define SIM_TIME 2 // Number of iterations
|
36
|
+
#define TEMPERATURE_FILE "data/hotspot_temperature_64.txt" // Name of the file containing the initial temperature values of each cell
|
37
|
+
#define POWER_FILE "data/hotspot_power_64.txt" // Name of the file containing the dissipated power values of each cell
|
38
|
+
|
39
|
+
//########################################################################
|
40
|
+
//### Defines
|
41
|
+
//########################################################################
|
42
|
+
|
43
|
+
#define STRING_SIZE 256 // Length of the strings in the temperature and power files
|
44
|
+
#define MAX_PD (3.0e6) // Maximum power density possible (say 300W for a 10mm x 10mm chip)
|
45
|
+
#define PRECISION 0.001 // Required precision in degrees
|
46
|
+
#define SPEC_HEAT_SI 1.75e6 //
|
47
|
+
#define K_SI 100 //
|
48
|
+
#define FACTOR_CHIP 0.5 // Capacitance fitting factor
|
49
|
+
#define T_CHIP 0.0005 // Chip temperature
|
50
|
+
#define CHIP_HEIGHT 0.016 // Chip height
|
51
|
+
#define CHIP_WIDTH 0.016 // Chip width
|
52
|
+
#define AMB_TEMP 80.0 // Ambient temperature, assuming no package at all
|
53
|
+
|
54
|
+
//########################################################################
|
55
|
+
//### Forward declarations
|
56
|
+
//########################################################################
|
57
|
+
|
58
|
+
void read_input(double* array, const char* filename);
|
59
|
+
|
60
|
+
//########################################################################
|
61
|
+
//### Start of the main function
|
62
|
+
//########################################################################
|
63
|
+
|
64
|
+
int main(void) {
|
65
|
+
|
66
|
+
// Declare the loop iterators
|
67
|
+
int r,c,iter;
|
68
|
+
|
69
|
+
// Declare other/helper variables
|
70
|
+
int index;
|
71
|
+
double delta;
|
72
|
+
int row = GRID_ROWS;
|
73
|
+
int col = GRID_COLS;
|
74
|
+
double grid_height = CHIP_HEIGHT/row;
|
75
|
+
double grid_width = CHIP_WIDTH/col;
|
76
|
+
|
77
|
+
// Set domain variables
|
78
|
+
double cap = FACTOR_CHIP*SPEC_HEAT_SI*T_CHIP*grid_width*grid_height;
|
79
|
+
double Rx = grid_width / (2.0*K_SI*T_CHIP*grid_height);
|
80
|
+
double Ry = grid_height / (2.0*K_SI*T_CHIP*grid_width);
|
81
|
+
double Rz = T_CHIP / (K_SI*grid_height*grid_width);
|
82
|
+
double max_slope = MAX_PD / (FACTOR_CHIP*T_CHIP*SPEC_HEAT_SI);
|
83
|
+
double step = PRECISION / max_slope;
|
84
|
+
|
85
|
+
// Initialising memory
|
86
|
+
printf("\n[hotspot] Initialising memory"); fflush(stdout);
|
87
|
+
double* temperature = (double*) calloc(row*col, sizeof(double));
|
88
|
+
double* power = (double*) calloc(row*col, sizeof(double));
|
89
|
+
double* result = (double*) calloc(row*col, sizeof(double));
|
90
|
+
|
91
|
+
// Read initial temperature and power arrays
|
92
|
+
printf("\n[hotspot] Populating memory"); fflush(stdout);
|
93
|
+
read_input(temperature, TEMPERATURE_FILE);
|
94
|
+
read_input(power, POWER_FILE);
|
95
|
+
|
96
|
+
// Perform the computation a given number of times
|
97
|
+
printf("\n[hotspot] Performing the computation %d times",SIM_TIME); fflush(stdout);
|
98
|
+
#pragma scop
|
99
|
+
for (iter=0; iter<SIM_TIME; iter++) {
|
100
|
+
|
101
|
+
// Transient solver driver routine: convert the heat transfer differential equations to difference equations
|
102
|
+
// and solve the difference equations by iterating
|
103
|
+
for (r=0; r<row; r++) {
|
104
|
+
for (c=0; c<col; c++) {
|
105
|
+
|
106
|
+
// Corner 1
|
107
|
+
if ( (r == 0) && (c == 0) ) {
|
108
|
+
delta = (step / cap) * (power[0] +
|
109
|
+
(temperature[1] - temperature[0]) / Rx +
|
110
|
+
(temperature[col] - temperature[0]) / Ry +
|
111
|
+
(AMB_TEMP - temperature[0]) / Rz);
|
112
|
+
}
|
113
|
+
// Corner 2
|
114
|
+
else if ((r == 0) && (c == col-1)) {
|
115
|
+
delta = (step / cap) * (power[c] +
|
116
|
+
(temperature[c-1] - temperature[c]) / Rx +
|
117
|
+
(temperature[c+col] - temperature[c]) / Ry +
|
118
|
+
(AMB_TEMP - temperature[c]) / Rz);
|
119
|
+
}
|
120
|
+
// Corner 3
|
121
|
+
else if ((r == row-1) && (c == col-1)) {
|
122
|
+
delta = (step / cap) * (power[r*col+c] +
|
123
|
+
(temperature[r*col+c-1] - temperature[r*col+c]) / Rx +
|
124
|
+
(temperature[(r-1)*col+c] - temperature[r*col+c]) / Ry +
|
125
|
+
(AMB_TEMP - temperature[r*col+c]) / Rz);
|
126
|
+
}
|
127
|
+
// Corner 4
|
128
|
+
else if ((r == row-1) && (c == 0)) {
|
129
|
+
delta = (step / cap) * (power[r*col] +
|
130
|
+
(temperature[r*col+1] - temperature[r*col]) / Rx +
|
131
|
+
(temperature[(r-1)*col] - temperature[r*col]) / Ry +
|
132
|
+
(AMB_TEMP - temperature[r*col]) / Rz);
|
133
|
+
}
|
134
|
+
// Edge 1
|
135
|
+
else if (r == 0) {
|
136
|
+
delta = (step / cap) * (power[c] +
|
137
|
+
(temperature[c+1] + temperature[c-1] - 2.0*temperature[c]) / Rx +
|
138
|
+
(temperature[col+c] - temperature[c]) / Ry +
|
139
|
+
(AMB_TEMP - temperature[c]) / Rz);
|
140
|
+
}
|
141
|
+
// Edge 2
|
142
|
+
else if (c == col-1) {
|
143
|
+
delta = (step / cap) * (power[r*col+c] +
|
144
|
+
(temperature[(r+1)*col+c] + temperature[(r-1)*col+c] - 2.0*temperature[r*col+c]) / Ry +
|
145
|
+
(temperature[r*col+c-1] - temperature[r*col+c]) / Rx +
|
146
|
+
(AMB_TEMP - temperature[r*col+c]) / Rz);
|
147
|
+
}
|
148
|
+
// Edge 3
|
149
|
+
else if (r == row-1) {
|
150
|
+
delta = (step / cap) * (power[r*col+c] +
|
151
|
+
(temperature[r*col+c+1] + temperature[r*col+c-1] - 2.0*temperature[r*col+c]) / Rx +
|
152
|
+
(temperature[(r-1)*col+c] - temperature[r*col+c]) / Ry +
|
153
|
+
(AMB_TEMP - temperature[r*col+c]) / Rz);
|
154
|
+
}
|
155
|
+
// Edge 4
|
156
|
+
else if (c == 0) {
|
157
|
+
delta = (step / cap) * (power[r*col] +
|
158
|
+
(temperature[(r+1)*col] + temperature[(r-1)*col] - 2.0*temperature[r*col]) / Ry +
|
159
|
+
(temperature[r*col+1] - temperature[r*col]) / Rx +
|
160
|
+
(AMB_TEMP - temperature[r*col]) / Rz);
|
161
|
+
}
|
162
|
+
// Inside the chip
|
163
|
+
else {
|
164
|
+
delta = (step / cap) * (power[r*col+c] +
|
165
|
+
(temperature[(r+1)*col+c] + temperature[(r-1)*col+c] - 2.0*temperature[r*col+c]) / Ry +
|
166
|
+
(temperature[r*col+c+1] + temperature[r*col+c-1] - 2.0*temperature[r*col+c]) / Rx +
|
167
|
+
(AMB_TEMP - temperature[r*col+c]) / Rz);
|
168
|
+
}
|
169
|
+
|
170
|
+
// Update the temperatures
|
171
|
+
result[r*col+c] = temperature[r*col+c] + delta;
|
172
|
+
}
|
173
|
+
}
|
174
|
+
|
175
|
+
// Copy the result as the new temperatures
|
176
|
+
for (r=0; r<row; r++) {
|
177
|
+
for (c=0; c<col; c++) {
|
178
|
+
temperature[r*col+c] = result[r*col+c];
|
179
|
+
}
|
180
|
+
}
|
181
|
+
}
|
182
|
+
#pragma endscop
|
183
|
+
|
184
|
+
// Print the values matrix
|
185
|
+
printf("\n[hotspot] Printing the final temperatures:\n\n"); fflush(stdout);
|
186
|
+
for (r=0; r<row; r++) {
|
187
|
+
for (c=0; c<col; c++) {
|
188
|
+
index = r*col+c;
|
189
|
+
printf("%6d: %.3lf ", index, temperature[index]);
|
190
|
+
}
|
191
|
+
printf("\n");
|
192
|
+
}
|
193
|
+
|
194
|
+
// Clean-up and exit
|
195
|
+
printf("\n[hotspot] Completed\n\n"); fflush(stdout);
|
196
|
+
free(temperature); free(power); free(result);
|
197
|
+
fflush(stdout);
|
198
|
+
return 0;
|
199
|
+
}
|
200
|
+
|
201
|
+
//########################################################################
|
202
|
+
//### Function to read an input file (power or temperature values)
|
203
|
+
//########################################################################
|
204
|
+
|
205
|
+
void read_input(double* array, const char* filename) {
|
206
|
+
int r, c;
|
207
|
+
char string[STRING_SIZE];
|
208
|
+
double value;
|
209
|
+
|
210
|
+
// Open the file
|
211
|
+
FILE* file_pointer = fopen(filename, "r");
|
212
|
+
if (!file_pointer) { printf("\n[hotspot] Error: file '%s' could not be opened for reading\n\n", filename); fflush(stdout); exit(1); }
|
213
|
+
|
214
|
+
// Process the file
|
215
|
+
for (r=0; r<GRID_ROWS; r++) {
|
216
|
+
for (c=0; c<GRID_COLS; c++) {
|
217
|
+
fgets(string, STRING_SIZE, file_pointer);
|
218
|
+
if (feof(file_pointer)) { printf("\n[hotspot] Error: not enough lines in file '%s'\n\n", filename); fflush(stdout); exit(1); }
|
219
|
+
if ((sscanf(string, "%lf", &value) != 1) ) { printf("\n[hotspot] Error: invalid file format for '%s'\n\n", filename); fflush(stdout); exit(1); }
|
220
|
+
array[r*GRID_COLS+c] = value;
|
221
|
+
}
|
222
|
+
}
|
223
|
+
|
224
|
+
// Clean-up and return
|
225
|
+
fclose(file_pointer);
|
226
|
+
}
|
227
|
+
|
228
|
+
//########################################################################
|
@@ -0,0 +1,164 @@
|
|
1
|
+
//
|
2
|
+
// This file is part of the Bones source-to-source compiler examples. This C-code
|
3
|
+
// demonstrates the use of Bones for an example application: 'K-means clustering',
|
4
|
+
// as also available in the Rodinia benchmark suite. For more information on the
|
5
|
+
// application or on Bones please use the contact information below.
|
6
|
+
//
|
7
|
+
// == This implementation of K-means clustering is inspired by:
|
8
|
+
// Author.............Roger Zhang
|
9
|
+
// Web address........http://cs.smu.ca/~r_zhang/code/kmeans.c
|
10
|
+
//
|
11
|
+
// == More information on Bones
|
12
|
+
// Contact............Cedric Nugteren <c.nugteren@tue.nl>
|
13
|
+
// Web address........http://parse.ele.tue.nl/bones/
|
14
|
+
//
|
15
|
+
// == File information
|
16
|
+
// Filename...........applications/kmeans.c
|
17
|
+
// Authors............Cedric Nugteren
|
18
|
+
// Last modified on...10-Aug-2012
|
19
|
+
//
|
20
|
+
|
21
|
+
//########################################################################
|
22
|
+
//### Includes
|
23
|
+
//########################################################################
|
24
|
+
|
25
|
+
#include <stdio.h>
|
26
|
+
#include <math.h>
|
27
|
+
#include <float.h>
|
28
|
+
|
29
|
+
//########################################################################
|
30
|
+
//### Defines
|
31
|
+
//########################################################################
|
32
|
+
|
33
|
+
#define SIZE 512
|
34
|
+
#define NUM_CLUSTERS 20
|
35
|
+
#define DIMENSIONS 2
|
36
|
+
#define THRESHOLD 0.0001
|
37
|
+
|
38
|
+
//########################################################################
|
39
|
+
//### Start of the main function
|
40
|
+
//########################################################################
|
41
|
+
int main(void) {
|
42
|
+
|
43
|
+
// Declare the loop iterators
|
44
|
+
int i,j,k;
|
45
|
+
|
46
|
+
// Declare the error variables
|
47
|
+
double error = DBL_MAX;
|
48
|
+
double old_error;
|
49
|
+
int iterations = 0;
|
50
|
+
|
51
|
+
// Declare the distance variables and arrays
|
52
|
+
double distance[1];
|
53
|
+
double min_distance[1];
|
54
|
+
double distances[SIZE];
|
55
|
+
|
56
|
+
// Initialising memory
|
57
|
+
printf("\n[k-means] Initialising memory"); fflush(stdout);
|
58
|
+
double input[SIZE][DIMENSIONS];
|
59
|
+
double centroids[NUM_CLUSTERS][DIMENSIONS];
|
60
|
+
double centroids_temp[NUM_CLUSTERS][DIMENSIONS];
|
61
|
+
int output[SIZE];
|
62
|
+
int counts[NUM_CLUSTERS];
|
63
|
+
|
64
|
+
// Set the input data
|
65
|
+
printf("\n[k-means] Populating memory"); fflush(stdout);
|
66
|
+
for (i=0; i<SIZE; i++) {
|
67
|
+
input[i][0] = (i/16);
|
68
|
+
input[i][1] = i%4;
|
69
|
+
}
|
70
|
+
|
71
|
+
// Pick k initial centroids
|
72
|
+
printf("\n[k-means] Setting 'k' initial centroids"); fflush(stdout);
|
73
|
+
for (k=0; k<NUM_CLUSTERS; k++) {
|
74
|
+
for (j=0; j<DIMENSIONS; j++) {
|
75
|
+
centroids[k][j] = input[(SIZE/NUM_CLUSTERS)*k][j];
|
76
|
+
}
|
77
|
+
}
|
78
|
+
|
79
|
+
// Perform the k-means clustering algorithm, end when the error is not becoming smaller
|
80
|
+
printf("\n[k-means] Perform the clustering algorithm"); fflush(stdout);
|
81
|
+
do {
|
82
|
+
#pragma scop
|
83
|
+
|
84
|
+
// Save the error from the last step
|
85
|
+
old_error = error;
|
86
|
+
error = 0;
|
87
|
+
|
88
|
+
// Clear old counts and temporary centroids
|
89
|
+
for (k=0; k<NUM_CLUSTERS; k++) {
|
90
|
+
counts[k] = 0;
|
91
|
+
for (j=0; j<DIMENSIONS; j++) {
|
92
|
+
centroids_temp[k][j] = 0;
|
93
|
+
}
|
94
|
+
}
|
95
|
+
|
96
|
+
// Iterate over all data points
|
97
|
+
for (i=0; i<SIZE; i++) {
|
98
|
+
|
99
|
+
// Find the closest cluster
|
100
|
+
min_distance[0] = DBL_MAX;
|
101
|
+
for (k=0; k<NUM_CLUSTERS; k++) {
|
102
|
+
distance[0] = 0;
|
103
|
+
for (j=0; j<DIMENSIONS; j++) {
|
104
|
+
distance[0] += pow(input[i][j]-centroids[k][j],2);
|
105
|
+
}
|
106
|
+
if (distance[0] < min_distance[0]) {
|
107
|
+
output[i] = k;
|
108
|
+
min_distance[0] = distance[0];
|
109
|
+
}
|
110
|
+
}
|
111
|
+
|
112
|
+
// Update the size and temporary centroid of the destination cluster
|
113
|
+
for (j=0; j<DIMENSIONS; j++) {
|
114
|
+
centroids_temp[output[i]][j] += input[i][j];
|
115
|
+
}
|
116
|
+
counts[output[i]] += 1;
|
117
|
+
|
118
|
+
// Store the resulting distance
|
119
|
+
distances[i] = min_distance[0];
|
120
|
+
}
|
121
|
+
|
122
|
+
// Update the standard error
|
123
|
+
for (i=0; i<SIZE; i++) {
|
124
|
+
error += distances[i];
|
125
|
+
}
|
126
|
+
|
127
|
+
// Update all centroids
|
128
|
+
for (k=0; k<NUM_CLUSTERS; k++) {
|
129
|
+
for (j=0; j<DIMENSIONS; j++) {
|
130
|
+
if (counts[k]) {
|
131
|
+
centroids[k][j] = centroids_temp[k][j] / counts[k];
|
132
|
+
}
|
133
|
+
else {
|
134
|
+
centroids[k][j] = centroids_temp[k][j];
|
135
|
+
}
|
136
|
+
}
|
137
|
+
}
|
138
|
+
|
139
|
+
// Go to the next iteration
|
140
|
+
iterations += 1;
|
141
|
+
|
142
|
+
#pragma endscop
|
143
|
+
} while (fabs(error-old_error) > THRESHOLD);
|
144
|
+
|
145
|
+
// Print the results
|
146
|
+
printf("\n[k-means] Algorithm finished in %d iterations with an error of %.3lf", iterations, error); fflush(stdout);
|
147
|
+
printf("\n[k-means] Printing the results: \n\n"); fflush(stdout);
|
148
|
+
for (k=0; k<NUM_CLUSTERS; k++) {
|
149
|
+
printf("Cluster %2i: ", k);
|
150
|
+
for (i=0; i<SIZE; i++) {
|
151
|
+
if (output[i] == k) {
|
152
|
+
printf("%3i ", i);
|
153
|
+
}
|
154
|
+
}
|
155
|
+
printf("\n");
|
156
|
+
}
|
157
|
+
|
158
|
+
// Clean-up and exit the function
|
159
|
+
printf("\n[k-means] Completed\n\n"); fflush(stdout);
|
160
|
+
fflush(stdout);
|
161
|
+
return 0;
|
162
|
+
}
|
163
|
+
|
164
|
+
//########################################################################
|
@@ -0,0 +1,188 @@
|
|
1
|
+
//
|
2
|
+
// This file is part of the Bones source-to-source compiler examples. This C-code
|
3
|
+
// demonstrates the use of Bones for an example application: 'Speckle Reducing
|
4
|
+
// Anisotropic Diffusion' or 'SRAD', taken from the Rodinia benchmark suite. For
|
5
|
+
// more information on the application or on Bones please use the contact infor-
|
6
|
+
// mation below.
|
7
|
+
//
|
8
|
+
// == More information on SRAD (Speckle Reducing Anisotropic Diffusion):
|
9
|
+
// Article............http://dx.doi.org/10.1109/TIP.2002.804276
|
10
|
+
// Original code......https://www.cs.virginia.edu/~skadron/wiki/rodinia/
|
11
|
+
//
|
12
|
+
// == More information on Bones
|
13
|
+
// Contact............Cedric Nugteren <c.nugteren@tue.nl>
|
14
|
+
// Web address........http://parse.ele.tue.nl/bones/
|
15
|
+
//
|
16
|
+
// == File information
|
17
|
+
// Filename...........applications/srad.c
|
18
|
+
// Authors............Cedric Nugteren
|
19
|
+
// Original authors...Rob Janiczek, Drew Gilliam, Lukasz Szafaryn
|
20
|
+
// Last modified on...10-Aug-2012
|
21
|
+
//
|
22
|
+
|
23
|
+
//########################################################################
|
24
|
+
//### Includes
|
25
|
+
//########################################################################
|
26
|
+
|
27
|
+
#include <stdio.h>
|
28
|
+
#include <stdlib.h>
|
29
|
+
#include <math.h>
|
30
|
+
|
31
|
+
//########################################################################
|
32
|
+
//### Defines
|
33
|
+
//########################################################################
|
34
|
+
|
35
|
+
#define ROWS 128 // Number of ROWS in the domain
|
36
|
+
#define COLS 128 // Number of COLS in the domain
|
37
|
+
#define R1 0 // y1 position of the speckle
|
38
|
+
#define R2 31 // y2 position of the speckle
|
39
|
+
#define C1 0 // x1 position of the speckle
|
40
|
+
#define C2 31 // x2 position of the speckle
|
41
|
+
#define LAMBDA 0.5 // Lambda value
|
42
|
+
#define NITER 2 // Number of iterations
|
43
|
+
|
44
|
+
//########################################################################
|
45
|
+
//### Start of the main function
|
46
|
+
//########################################################################
|
47
|
+
|
48
|
+
int main(void) {
|
49
|
+
|
50
|
+
// Declare the loop iterators
|
51
|
+
int i,j,iter;
|
52
|
+
|
53
|
+
// Declare domain variables
|
54
|
+
float mean_roi, var_roi;
|
55
|
+
float q0s, qs;
|
56
|
+
float divergence;
|
57
|
+
float cN, cS, cW, cE;
|
58
|
+
float G2, L;
|
59
|
+
|
60
|
+
// Declare other/helper variables
|
61
|
+
int index;
|
62
|
+
float temp_value;
|
63
|
+
float sum1, sum2;
|
64
|
+
float current_value;
|
65
|
+
float temp_a, temp_b;
|
66
|
+
|
67
|
+
// Check for valid row and column sizes
|
68
|
+
if ((ROWS%16 != 0 ) || (COLS%16 != 0)) {
|
69
|
+
printf("[srad] Error: the number of rows and columns must be multiples of 16\n");
|
70
|
+
fflush(stdout); exit(1);
|
71
|
+
}
|
72
|
+
|
73
|
+
// Initialising memory
|
74
|
+
printf("\n[srad] Initialising memory"); fflush(stdout);
|
75
|
+
int size = COLS*ROWS;
|
76
|
+
int size_roi = (R2-R1+1)*(C2-C1+1);
|
77
|
+
float* values = (float*) malloc(sizeof(float)*size);
|
78
|
+
float* coefficent = (float*) malloc(sizeof(float)*size);
|
79
|
+
float* dN = (float*) malloc(sizeof(float)*size);
|
80
|
+
float* dS = (float*) malloc(sizeof(float)*size);
|
81
|
+
float* dW = (float*) malloc(sizeof(float)*size);
|
82
|
+
float* dE = (float*) malloc(sizeof(float)*size);
|
83
|
+
|
84
|
+
// Populate the input matrix
|
85
|
+
printf("\n[srad] Populating the input matrix with random values"); fflush(stdout);
|
86
|
+
for (i=0; i<ROWS; i++) {
|
87
|
+
for (j=0; j<COLS; j++) {
|
88
|
+
temp_value = rand()/(float)RAND_MAX;
|
89
|
+
values[i*COLS+j] = (float)exp(temp_value);
|
90
|
+
}
|
91
|
+
}
|
92
|
+
|
93
|
+
// Perform the computation a given number of times
|
94
|
+
printf("\n[srad] Performing the computation %d times",NITER); fflush(stdout);
|
95
|
+
for (iter=0; iter<NITER; iter++) {
|
96
|
+
|
97
|
+
// Compute the mean, the variance and the speckle scale function (q0s) of the region of interest (ROI)
|
98
|
+
sum1 = 0;
|
99
|
+
sum2 = 0;
|
100
|
+
for (i=R1; i<=R2; i++) {
|
101
|
+
for (j=C1; j<=C2; j++) {
|
102
|
+
temp_value = values[i*COLS+j];
|
103
|
+
sum1 += temp_value;
|
104
|
+
sum2 += temp_value*temp_value;
|
105
|
+
}
|
106
|
+
}
|
107
|
+
mean_roi = sum1/size_roi;
|
108
|
+
var_roi = (sum2/size_roi) - mean_roi*mean_roi;
|
109
|
+
q0s = var_roi / (mean_roi*mean_roi);
|
110
|
+
|
111
|
+
// Iterate over the full image and compute
|
112
|
+
#pragma scop
|
113
|
+
for (i=0; i<ROWS; i++) {
|
114
|
+
for (j=0; j<COLS; j++) {
|
115
|
+
index = i*COLS+j;
|
116
|
+
current_value = values[index];
|
117
|
+
|
118
|
+
// Compute the directional derivates (N,S,W,E)
|
119
|
+
if (i==0) { dN[index] = 0; }
|
120
|
+
else { dN[index] = values[(i-1)*COLS + j ] - current_value; }
|
121
|
+
if (i==ROWS-1) { dS[index] = 0; }
|
122
|
+
else { dS[index] = values[(i+1)*COLS + j ] - current_value; }
|
123
|
+
if (j==0) { dW[index] = 0; }
|
124
|
+
else { dW[index] = values[i *COLS + (j-1)] - current_value; }
|
125
|
+
if (j==COLS-1) { dE[index] = 0; }
|
126
|
+
else { dE[index] = values[i *COLS + (j+1)] - current_value; }
|
127
|
+
|
128
|
+
// Compute the instantaneous coefficient of variation (qs) (equation 35)
|
129
|
+
G2 = (dN[index]*dN[index] + dS[index]*dS[index] + dW[index]*dW[index] + dE[index]*dE[index]) / (current_value*current_value);
|
130
|
+
L = (dN[index] + dS[index] + dW[index] + dE[index] ) / (current_value );
|
131
|
+
temp_a = (0.5*G2)-((1.0/16.0)*(L*L));
|
132
|
+
temp_b = 1+(0.25*L);
|
133
|
+
qs = temp_a/(temp_b*temp_b);
|
134
|
+
|
135
|
+
// Set the diffusion coefficent (equation 33)
|
136
|
+
coefficent[index] = 1.0 / (1.0+( (qs-q0s)/(q0s*(1+q0s)) ));
|
137
|
+
|
138
|
+
// Saturate the diffusion coefficent
|
139
|
+
if (coefficent[index] < 0) {
|
140
|
+
coefficent[index] = 0;
|
141
|
+
}
|
142
|
+
else if (coefficent[index] > 1) {
|
143
|
+
coefficent[index] = 1;
|
144
|
+
}
|
145
|
+
}
|
146
|
+
}
|
147
|
+
|
148
|
+
// Iterate over the full image again and compute the final values
|
149
|
+
for (i=0; i<ROWS; i++) {
|
150
|
+
for (j=0; j<COLS; j++) {
|
151
|
+
index = i*COLS+j;
|
152
|
+
|
153
|
+
// Calculate the diffusion coefficent
|
154
|
+
cN = coefficent[i *COLS+j ];
|
155
|
+
if (i==ROWS-1) { cS = 0; }
|
156
|
+
else { cS = coefficent[(i+1)*COLS+j ]; }
|
157
|
+
cW = coefficent[i *COLS+j ];
|
158
|
+
if (j==COLS-1) { cE = 0; }
|
159
|
+
else { cE = coefficent[i *COLS+(j+1)]; }
|
160
|
+
|
161
|
+
// Calculate the divergence (equation 58)
|
162
|
+
divergence = cN*dN[index] + cS*dS[index] + cW*dW[index] + cE*dE[index];
|
163
|
+
|
164
|
+
// Update the image accordingly (equation 61)
|
165
|
+
values[index] = values[index] + 0.25*LAMBDA*divergence;
|
166
|
+
}
|
167
|
+
}
|
168
|
+
#pragma endscop
|
169
|
+
}
|
170
|
+
|
171
|
+
// Print the values matrix
|
172
|
+
printf("\n[srad] Printing the output matrix:\n\n"); fflush(stdout);
|
173
|
+
for (i=0; i<ROWS; i++) {
|
174
|
+
for (j=0; j<COLS; j++) {
|
175
|
+
printf("%.5f ", values[i*COLS+j]);
|
176
|
+
}
|
177
|
+
printf("\n");
|
178
|
+
}
|
179
|
+
|
180
|
+
// Clean-up and exit
|
181
|
+
printf("\n[srad] Completed\n\n"); fflush(stdout);
|
182
|
+
free(values); free(coefficent);
|
183
|
+
free(dN); free(dS); free(dW); free(dE);
|
184
|
+
fflush(stdout);
|
185
|
+
return 0;
|
186
|
+
}
|
187
|
+
|
188
|
+
//########################################################################
|