bones-compiler 1.1.0 → 1.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/CHANGELOG +37 -0
- data/LICENSE +1 -1
- data/README.rdoc +95 -70
- data/Rakefile +78 -3
- data/VERSION +1 -1
- data/bin/adarwin +17 -0
- data/examples/benchmarks/PolyBench/2mm.c +104 -0
- data/examples/benchmarks/{3mm.c → PolyBench/3mm.c} +5 -2
- data/examples/benchmarks/{adi.c → PolyBench/adi.c} +6 -3
- data/examples/benchmarks/{atax.c → PolyBench/atax.c} +5 -2
- data/examples/benchmarks/{bicg.c → PolyBench/bicg.c} +5 -2
- data/examples/benchmarks/{cholesky.c → PolyBench/cholesky.c} +3 -0
- data/examples/benchmarks/{common.h → PolyBench/common.h} +2 -2
- data/examples/benchmarks/{correlation.c → PolyBench/correlation.c} +16 -7
- data/examples/benchmarks/{covariance.c → PolyBench/covariance.c} +7 -2
- data/examples/benchmarks/{doitgen.c → PolyBench/doitgen.c} +5 -2
- data/examples/benchmarks/{durbin.c → PolyBench/durbin.c} +3 -0
- data/examples/benchmarks/{dynprog.c → PolyBench/dynprog.c} +3 -0
- data/examples/benchmarks/{fdtd-2d-apml.c → PolyBench/fdtd-2d-apml.c} +3 -0
- data/examples/benchmarks/{fdtd-2d.c → PolyBench/fdtd-2d.c} +5 -2
- data/examples/benchmarks/{floyd-warshall.c → PolyBench/floyd-warshall.c} +3 -0
- data/examples/benchmarks/{gemm.c → PolyBench/gemm.c} +5 -2
- data/examples/benchmarks/{gemver.c → PolyBench/gemver.c} +5 -2
- data/examples/benchmarks/{gesummv.c → PolyBench/gesummv.c} +5 -2
- data/examples/benchmarks/{gramschmidt.c → PolyBench/gramschmidt.c} +3 -0
- data/examples/benchmarks/{jacobi-1d-imper.c → PolyBench/jacobi-1d-imper.c} +10 -2
- data/examples/benchmarks/{jacobi-2d-imper.c → PolyBench/jacobi-2d-imper.c} +8 -3
- data/examples/benchmarks/{lu.c → PolyBench/lu.c} +3 -0
- data/examples/benchmarks/{ludcmp.c → PolyBench/ludcmp.c} +3 -0
- data/examples/benchmarks/{mvt.c → PolyBench/mvt.c} +6 -2
- data/examples/benchmarks/{reg_detect.c → PolyBench/reg_detect.c} +3 -0
- data/examples/benchmarks/{seidel-2d.c → PolyBench/seidel-2d.c} +3 -0
- data/examples/benchmarks/{symm.c → PolyBench/symm.c} +3 -0
- data/examples/benchmarks/{syr2k.c → PolyBench/syr2k.c} +5 -2
- data/examples/benchmarks/{syrk.c → PolyBench/syrk.c} +7 -4
- data/examples/benchmarks/{trisolv.c → PolyBench/trisolv.c} +3 -0
- data/examples/benchmarks/{trmm.c → PolyBench/trmm.c} +3 -0
- data/examples/benchmarks/Rodinia/cfd.c +180 -0
- data/examples/benchmarks/Rodinia/hotspot.c +228 -0
- data/examples/benchmarks/Rodinia/kmeans.c +164 -0
- data/examples/benchmarks/Rodinia/srad.c +188 -0
- data/examples/benchmarks/other/common.h +0 -0
- data/examples/benchmarks/other/dct.c +58 -0
- data/examples/benchmarks/other/mm.c +50 -0
- data/examples/benchmarks/{saxpy.c → other/saxpy.c} +11 -7
- data/examples/chunk/{example1.c → example01.c} +0 -0
- data/examples/chunk/{example2.c → example02.c} +0 -0
- data/examples/chunk/{example3.c → example03.c} +0 -0
- data/examples/chunk/{example4.c → example04.c} +0 -0
- data/examples/chunk/{example5.c → example05.c} +0 -0
- data/examples/chunk/example06.c +45 -0
- data/examples/chunk/example07.c +49 -0
- data/examples/dependences/example01.c +42 -0
- data/examples/dependences/example02.c +40 -0
- data/examples/dependences/example03.c +43 -0
- data/examples/dependences/example04.c +44 -0
- data/examples/dependences/example05.c +42 -0
- data/examples/element/{example1.c → example01.c} +0 -0
- data/examples/element/{example2.c → example02.c} +2 -2
- data/examples/element/{example3.c → example03.c} +0 -0
- data/examples/element/{example4.c → example04.c} +0 -0
- data/examples/element/{example5.c → example05.c} +0 -0
- data/examples/element/{example6.c → example06.c} +0 -0
- data/examples/element/{example7.c → example07.c} +0 -0
- data/examples/element/{example8.c → example08.c} +0 -0
- data/examples/element/{example9.c → example09.c} +0 -0
- data/examples/element/example13.c +73 -0
- data/examples/fusion/example01.c +68 -0
- data/examples/fusion/example02.c +73 -0
- data/examples/fusion/example03.c +72 -0
- data/examples/fusion/example04.c +61 -0
- data/examples/fusion/example05.c +55 -0
- data/examples/neighbourhood/{example1.c → example01.c} +0 -0
- data/examples/neighbourhood/{example2.c → example02.c} +0 -0
- data/examples/neighbourhood/{example3.c → example03.c} +0 -0
- data/examples/neighbourhood/{example4.c → example04.c} +0 -0
- data/examples/neighbourhood/example05.c +44 -0
- data/examples/shared/{example1.c → example01.c} +0 -0
- data/examples/shared/{example2.c → example02.c} +0 -0
- data/examples/shared/{example3.c → example03.c} +0 -0
- data/examples/shared/{example4.c → example04.c} +0 -0
- data/examples/shared/{example5.c → example05.c} +0 -0
- data/lib/adarwin.rb +62 -0
- data/lib/adarwin/dependences.rb +268 -0
- data/lib/adarwin/engine.rb +277 -0
- data/lib/adarwin/fusion.rb +174 -0
- data/lib/adarwin/interval.rb +57 -0
- data/lib/adarwin/memorycopies.rb +153 -0
- data/lib/adarwin/nest.rb +225 -0
- data/lib/adarwin/preprocessor.rb +76 -0
- data/lib/adarwin/reference.rb +261 -0
- data/lib/bones.rb +4 -55
- data/lib/bones/algorithm.rb +77 -40
- data/lib/bones/copy.rb +26 -0
- data/lib/bones/engine.rb +147 -31
- data/lib/bones/preprocessor.rb +92 -12
- data/lib/bones/species.rb +4 -3
- data/lib/bones/structure.rb +14 -4
- data/lib/castaddon.rb +11 -6
- data/lib/castaddon/node_adarwin.rb +245 -0
- data/lib/castaddon/node_bones.rb +316 -0
- data/lib/castaddon/node_common.rb +289 -0
- data/lib/castaddon/transformations.rb +236 -0
- data/lib/common.rb +216 -0
- data/skeletons/CPU-C/common/header.c +3 -0
- data/skeletons/CPU-C/common/mem_global.c +0 -0
- data/skeletons/CPU-C/common/timer_2_start.c +11 -13
- data/skeletons/CPU-C/common/timer_2_stop.c +1 -1
- data/skeletons/CPU-C/common/timer_globals.c +29 -0
- data/skeletons/CPU-OPENCL-INTEL/common/globals.c +1 -1
- data/skeletons/CPU-OPENCL-INTEL/common/header.c +3 -0
- data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_D2H.c +7 -2
- data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_H2D.c +4 -2
- data/skeletons/CPU-OPENCL-INTEL/common/mem_global.c +0 -0
- data/skeletons/CPU-OPENCL-INTEL/common/mem_prologue.c +6 -3
- data/skeletons/CPU-OPENCL-INTEL/common/timer_2_stop.c +1 -1
- data/skeletons/CPU-OPENCL-INTEL/common/timer_globals.c +24 -0
- data/skeletons/CPU-OPENMP/common/globals.c +1 -0
- data/skeletons/CPU-OPENMP/common/header.c +3 -0
- data/skeletons/CPU-OPENMP/common/mem_global.c +0 -0
- data/skeletons/CPU-OPENMP/common/timer_1_start.c +0 -12
- data/skeletons/CPU-OPENMP/common/timer_2_stop.c +1 -1
- data/skeletons/CPU-OPENMP/common/timer_globals.c +33 -0
- data/skeletons/GPU-CUDA/common/globals.c +27 -3
- data/skeletons/GPU-CUDA/common/header.c +2 -0
- data/skeletons/GPU-CUDA/common/mem_async_alloc.c +6 -0
- data/skeletons/GPU-CUDA/common/mem_async_copyin.c +6 -0
- data/skeletons/GPU-CUDA/common/mem_async_copyout.c +6 -0
- data/skeletons/GPU-CUDA/common/mem_async_free.c +6 -0
- data/skeletons/GPU-CUDA/common/mem_copy_D2H.c +2 -1
- data/skeletons/GPU-CUDA/common/mem_copy_H2D.c +2 -1
- data/skeletons/GPU-CUDA/common/mem_global.c +1 -0
- data/skeletons/GPU-CUDA/common/mem_prologue.c +1 -2
- data/skeletons/GPU-CUDA/common/scheduler.c +86 -0
- data/skeletons/GPU-CUDA/common/timer_2_start.c +2 -4
- data/skeletons/GPU-CUDA/common/timer_2_stop.c +3 -5
- data/skeletons/GPU-CUDA/common/timer_globals.c +26 -0
- data/skeletons/GPU-CUDA/kernel/2xN-N-chunk-1-N-to-D-element.kernel.cu +5 -7
- data/skeletons/GPU-CUDA/kernel/N-N-chunk-1-N-to-D-element.kernel.cu +4 -6
- data/skeletons/GPU-CUDA/kernel/default.host.c +1 -1
- data/skeletons/GPU-CUDA/kernel/default.kernel.cu +6 -8
- data/skeletons/GPU-CUDA/skeletons.txt +6 -5
- data/{examples/benchmarks/2mm.c → test/examples/benchmarks/PolyBench/2mm_species.c} +19 -15
- data/test/examples/benchmarks/PolyBench/3mm_species.c +82 -0
- data/test/examples/benchmarks/PolyBench/adi_species.c +89 -0
- data/test/examples/benchmarks/PolyBench/atax_species.c +69 -0
- data/test/examples/benchmarks/PolyBench/bicg_species.c +71 -0
- data/test/examples/benchmarks/PolyBench/cholesky_species.c +68 -0
- data/test/examples/benchmarks/PolyBench/correlation_species.c +97 -0
- data/test/examples/benchmarks/PolyBench/covariance_species.c +78 -0
- data/test/examples/benchmarks/PolyBench/doitgen_species.c +67 -0
- data/test/examples/benchmarks/PolyBench/durbin_species.c +80 -0
- data/test/examples/benchmarks/PolyBench/dynprog_species.c +71 -0
- data/test/examples/benchmarks/PolyBench/fdtd-2d-apml_species.c +112 -0
- data/test/examples/benchmarks/PolyBench/fdtd-2d_species.c +78 -0
- data/test/examples/benchmarks/PolyBench/floyd-warshall_species.c +54 -0
- data/test/examples/benchmarks/PolyBench/gemm_species.c +73 -0
- data/test/examples/benchmarks/PolyBench/gemver_species.c +93 -0
- data/test/examples/benchmarks/PolyBench/gesummv_species.c +68 -0
- data/test/examples/benchmarks/PolyBench/gramschmidt_species.c +78 -0
- data/test/examples/benchmarks/PolyBench/jacobi-1d-imper_species.c +59 -0
- data/test/examples/benchmarks/PolyBench/jacobi-2d-imper_species.c +65 -0
- data/test/examples/benchmarks/PolyBench/lu_species.c +57 -0
- data/test/examples/benchmarks/PolyBench/ludcmp_species.c +89 -0
- data/test/examples/benchmarks/PolyBench/mvt_species.c +69 -0
- data/test/examples/benchmarks/PolyBench/reg_detect_species.c +86 -0
- data/test/examples/benchmarks/PolyBench/seidel-2d_species.c +53 -0
- data/test/examples/benchmarks/PolyBench/symm_species.c +74 -0
- data/test/examples/benchmarks/PolyBench/syr2k_species.c +69 -0
- data/test/examples/benchmarks/PolyBench/syrk_species.c +66 -0
- data/test/examples/benchmarks/PolyBench/trisolv_species.c +61 -0
- data/test/examples/benchmarks/PolyBench/trmm_species.c +61 -0
- data/test/examples/chunk/example01_species.c +58 -0
- data/test/examples/chunk/example02_species.c +48 -0
- data/test/examples/chunk/example03_species.c +63 -0
- data/test/examples/chunk/example04_species.c +58 -0
- data/test/examples/chunk/example05_species.c +56 -0
- data/test/examples/chunk/example06_species.c +49 -0
- data/test/examples/chunk/example07_species.c +53 -0
- data/test/examples/dependences/example01_species.c +46 -0
- data/test/examples/dependences/example02_species.c +44 -0
- data/test/examples/dependences/example03_species.c +47 -0
- data/test/examples/dependences/example04_species.c +48 -0
- data/test/examples/dependences/example05_species.c +46 -0
- data/test/examples/element/example01_species.c +50 -0
- data/test/examples/element/example02_species.c +50 -0
- data/test/examples/element/example03_species.c +62 -0
- data/test/examples/element/example04_species.c +53 -0
- data/test/examples/element/example05_species.c +59 -0
- data/test/examples/element/example06_species.c +50 -0
- data/test/examples/element/example07_species.c +58 -0
- data/test/examples/element/example08_species.c +49 -0
- data/test/examples/element/example09_species.c +52 -0
- data/test/examples/element/example10_species.c +54 -0
- data/test/examples/element/example11_species.c +51 -0
- data/test/examples/element/example12_species.c +60 -0
- data/test/examples/element/example13_species.c +77 -0
- data/test/examples/neighbourhood/example01_species.c +57 -0
- data/test/examples/neighbourhood/example02_species.c +56 -0
- data/test/examples/neighbourhood/example03_species.c +83 -0
- data/test/examples/neighbourhood/example04_species.c +55 -0
- data/test/examples/neighbourhood/example05_species.c +48 -0
- data/test/examples/shared/example01_species.c +49 -0
- data/test/examples/shared/example02_species.c +55 -0
- data/test/examples/shared/example03_species.c +59 -0
- data/test/examples/shared/example04_species.c +56 -0
- data/test/examples/shared/example05_species.c +52 -0
- metadata +193 -73
- data/examples/benchmarks/overview.txt +0 -38
- data/lib/castaddon/node.rb +0 -753
|
@@ -0,0 +1,228 @@
|
|
|
1
|
+
//
|
|
2
|
+
// This file is part of the Bones source-to-source compiler examples. This C-code
|
|
3
|
+
// demonstrates the use of Bones for an example application: 'Hotspot', taken from
|
|
4
|
+
// the Rodinia benchmark suite. For more information on the application or on Bones
|
|
5
|
+
// please use the contact information below.
|
|
6
|
+
//
|
|
7
|
+
// == More information on Hotspot
|
|
8
|
+
// Article............http://dx.doi.org/10.1109/TVLSI.2006.876103
|
|
9
|
+
// Original code......https://www.cs.virginia.edu/~skadron/wiki/rodinia/
|
|
10
|
+
//
|
|
11
|
+
// == More information on Bones
|
|
12
|
+
// Contact............Cedric Nugteren <c.nugteren@tue.nl>
|
|
13
|
+
// Web address........http://parse.ele.tue.nl/bones/
|
|
14
|
+
//
|
|
15
|
+
// == File information
|
|
16
|
+
// Filename...........applications/hotspot.c
|
|
17
|
+
// Authors............Cedric Nugteren
|
|
18
|
+
// Last modified on...10-Aug-2012
|
|
19
|
+
//
|
|
20
|
+
|
|
21
|
+
//########################################################################
|
|
22
|
+
//### Includes
|
|
23
|
+
//########################################################################
|
|
24
|
+
|
|
25
|
+
#include <stdio.h>
|
|
26
|
+
#include <stdlib.h>
|
|
27
|
+
#include <math.h>
|
|
28
|
+
|
|
29
|
+
//########################################################################
|
|
30
|
+
//### Input parameters
|
|
31
|
+
//########################################################################
|
|
32
|
+
|
|
33
|
+
#define GRID_ROWS 64 // Number of rows in the grid (positive integer)
|
|
34
|
+
#define GRID_COLS 64 // Number of columns in the grid (positive integer)
|
|
35
|
+
#define SIM_TIME 2 // Number of iterations
|
|
36
|
+
#define TEMPERATURE_FILE "data/hotspot_temperature_64.txt" // Name of the file containing the initial temperature values of each cell
|
|
37
|
+
#define POWER_FILE "data/hotspot_power_64.txt" // Name of the file containing the dissipated power values of each cell
|
|
38
|
+
|
|
39
|
+
//########################################################################
|
|
40
|
+
//### Defines
|
|
41
|
+
//########################################################################
|
|
42
|
+
|
|
43
|
+
#define STRING_SIZE 256 // Length of the strings in the temperature and power files
|
|
44
|
+
#define MAX_PD (3.0e6) // Maximum power density possible (say 300W for a 10mm x 10mm chip)
|
|
45
|
+
#define PRECISION 0.001 // Required precision in degrees
|
|
46
|
+
#define SPEC_HEAT_SI 1.75e6 //
|
|
47
|
+
#define K_SI 100 //
|
|
48
|
+
#define FACTOR_CHIP 0.5 // Capacitance fitting factor
|
|
49
|
+
#define T_CHIP 0.0005 // Chip temperature
|
|
50
|
+
#define CHIP_HEIGHT 0.016 // Chip height
|
|
51
|
+
#define CHIP_WIDTH 0.016 // Chip width
|
|
52
|
+
#define AMB_TEMP 80.0 // Ambient temperature, assuming no package at all
|
|
53
|
+
|
|
54
|
+
//########################################################################
|
|
55
|
+
//### Forward declarations
|
|
56
|
+
//########################################################################
|
|
57
|
+
|
|
58
|
+
void read_input(double* array, const char* filename);
|
|
59
|
+
|
|
60
|
+
//########################################################################
|
|
61
|
+
//### Start of the main function
|
|
62
|
+
//########################################################################
|
|
63
|
+
|
|
64
|
+
int main(void) {
|
|
65
|
+
|
|
66
|
+
// Declare the loop iterators
|
|
67
|
+
int r,c,iter;
|
|
68
|
+
|
|
69
|
+
// Declare other/helper variables
|
|
70
|
+
int index;
|
|
71
|
+
double delta;
|
|
72
|
+
int row = GRID_ROWS;
|
|
73
|
+
int col = GRID_COLS;
|
|
74
|
+
double grid_height = CHIP_HEIGHT/row;
|
|
75
|
+
double grid_width = CHIP_WIDTH/col;
|
|
76
|
+
|
|
77
|
+
// Set domain variables
|
|
78
|
+
double cap = FACTOR_CHIP*SPEC_HEAT_SI*T_CHIP*grid_width*grid_height;
|
|
79
|
+
double Rx = grid_width / (2.0*K_SI*T_CHIP*grid_height);
|
|
80
|
+
double Ry = grid_height / (2.0*K_SI*T_CHIP*grid_width);
|
|
81
|
+
double Rz = T_CHIP / (K_SI*grid_height*grid_width);
|
|
82
|
+
double max_slope = MAX_PD / (FACTOR_CHIP*T_CHIP*SPEC_HEAT_SI);
|
|
83
|
+
double step = PRECISION / max_slope;
|
|
84
|
+
|
|
85
|
+
// Initialising memory
|
|
86
|
+
printf("\n[hotspot] Initialising memory"); fflush(stdout);
|
|
87
|
+
double* temperature = (double*) calloc(row*col, sizeof(double));
|
|
88
|
+
double* power = (double*) calloc(row*col, sizeof(double));
|
|
89
|
+
double* result = (double*) calloc(row*col, sizeof(double));
|
|
90
|
+
|
|
91
|
+
// Read initial temperature and power arrays
|
|
92
|
+
printf("\n[hotspot] Populating memory"); fflush(stdout);
|
|
93
|
+
read_input(temperature, TEMPERATURE_FILE);
|
|
94
|
+
read_input(power, POWER_FILE);
|
|
95
|
+
|
|
96
|
+
// Perform the computation a given number of times
|
|
97
|
+
printf("\n[hotspot] Performing the computation %d times",SIM_TIME); fflush(stdout);
|
|
98
|
+
#pragma scop
|
|
99
|
+
for (iter=0; iter<SIM_TIME; iter++) {
|
|
100
|
+
|
|
101
|
+
// Transient solver driver routine: convert the heat transfer differential equations to difference equations
|
|
102
|
+
// and solve the difference equations by iterating
|
|
103
|
+
for (r=0; r<row; r++) {
|
|
104
|
+
for (c=0; c<col; c++) {
|
|
105
|
+
|
|
106
|
+
// Corner 1
|
|
107
|
+
if ( (r == 0) && (c == 0) ) {
|
|
108
|
+
delta = (step / cap) * (power[0] +
|
|
109
|
+
(temperature[1] - temperature[0]) / Rx +
|
|
110
|
+
(temperature[col] - temperature[0]) / Ry +
|
|
111
|
+
(AMB_TEMP - temperature[0]) / Rz);
|
|
112
|
+
}
|
|
113
|
+
// Corner 2
|
|
114
|
+
else if ((r == 0) && (c == col-1)) {
|
|
115
|
+
delta = (step / cap) * (power[c] +
|
|
116
|
+
(temperature[c-1] - temperature[c]) / Rx +
|
|
117
|
+
(temperature[c+col] - temperature[c]) / Ry +
|
|
118
|
+
(AMB_TEMP - temperature[c]) / Rz);
|
|
119
|
+
}
|
|
120
|
+
// Corner 3
|
|
121
|
+
else if ((r == row-1) && (c == col-1)) {
|
|
122
|
+
delta = (step / cap) * (power[r*col+c] +
|
|
123
|
+
(temperature[r*col+c-1] - temperature[r*col+c]) / Rx +
|
|
124
|
+
(temperature[(r-1)*col+c] - temperature[r*col+c]) / Ry +
|
|
125
|
+
(AMB_TEMP - temperature[r*col+c]) / Rz);
|
|
126
|
+
}
|
|
127
|
+
// Corner 4
|
|
128
|
+
else if ((r == row-1) && (c == 0)) {
|
|
129
|
+
delta = (step / cap) * (power[r*col] +
|
|
130
|
+
(temperature[r*col+1] - temperature[r*col]) / Rx +
|
|
131
|
+
(temperature[(r-1)*col] - temperature[r*col]) / Ry +
|
|
132
|
+
(AMB_TEMP - temperature[r*col]) / Rz);
|
|
133
|
+
}
|
|
134
|
+
// Edge 1
|
|
135
|
+
else if (r == 0) {
|
|
136
|
+
delta = (step / cap) * (power[c] +
|
|
137
|
+
(temperature[c+1] + temperature[c-1] - 2.0*temperature[c]) / Rx +
|
|
138
|
+
(temperature[col+c] - temperature[c]) / Ry +
|
|
139
|
+
(AMB_TEMP - temperature[c]) / Rz);
|
|
140
|
+
}
|
|
141
|
+
// Edge 2
|
|
142
|
+
else if (c == col-1) {
|
|
143
|
+
delta = (step / cap) * (power[r*col+c] +
|
|
144
|
+
(temperature[(r+1)*col+c] + temperature[(r-1)*col+c] - 2.0*temperature[r*col+c]) / Ry +
|
|
145
|
+
(temperature[r*col+c-1] - temperature[r*col+c]) / Rx +
|
|
146
|
+
(AMB_TEMP - temperature[r*col+c]) / Rz);
|
|
147
|
+
}
|
|
148
|
+
// Edge 3
|
|
149
|
+
else if (r == row-1) {
|
|
150
|
+
delta = (step / cap) * (power[r*col+c] +
|
|
151
|
+
(temperature[r*col+c+1] + temperature[r*col+c-1] - 2.0*temperature[r*col+c]) / Rx +
|
|
152
|
+
(temperature[(r-1)*col+c] - temperature[r*col+c]) / Ry +
|
|
153
|
+
(AMB_TEMP - temperature[r*col+c]) / Rz);
|
|
154
|
+
}
|
|
155
|
+
// Edge 4
|
|
156
|
+
else if (c == 0) {
|
|
157
|
+
delta = (step / cap) * (power[r*col] +
|
|
158
|
+
(temperature[(r+1)*col] + temperature[(r-1)*col] - 2.0*temperature[r*col]) / Ry +
|
|
159
|
+
(temperature[r*col+1] - temperature[r*col]) / Rx +
|
|
160
|
+
(AMB_TEMP - temperature[r*col]) / Rz);
|
|
161
|
+
}
|
|
162
|
+
// Inside the chip
|
|
163
|
+
else {
|
|
164
|
+
delta = (step / cap) * (power[r*col+c] +
|
|
165
|
+
(temperature[(r+1)*col+c] + temperature[(r-1)*col+c] - 2.0*temperature[r*col+c]) / Ry +
|
|
166
|
+
(temperature[r*col+c+1] + temperature[r*col+c-1] - 2.0*temperature[r*col+c]) / Rx +
|
|
167
|
+
(AMB_TEMP - temperature[r*col+c]) / Rz);
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
// Update the temperatures
|
|
171
|
+
result[r*col+c] = temperature[r*col+c] + delta;
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
// Copy the result as the new temperatures
|
|
176
|
+
for (r=0; r<row; r++) {
|
|
177
|
+
for (c=0; c<col; c++) {
|
|
178
|
+
temperature[r*col+c] = result[r*col+c];
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
#pragma endscop
|
|
183
|
+
|
|
184
|
+
// Print the values matrix
|
|
185
|
+
printf("\n[hotspot] Printing the final temperatures:\n\n"); fflush(stdout);
|
|
186
|
+
for (r=0; r<row; r++) {
|
|
187
|
+
for (c=0; c<col; c++) {
|
|
188
|
+
index = r*col+c;
|
|
189
|
+
printf("%6d: %.3lf ", index, temperature[index]);
|
|
190
|
+
}
|
|
191
|
+
printf("\n");
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
// Clean-up and exit
|
|
195
|
+
printf("\n[hotspot] Completed\n\n"); fflush(stdout);
|
|
196
|
+
free(temperature); free(power); free(result);
|
|
197
|
+
fflush(stdout);
|
|
198
|
+
return 0;
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
//########################################################################
|
|
202
|
+
//### Function to read an input file (power or temperature values)
|
|
203
|
+
//########################################################################
|
|
204
|
+
|
|
205
|
+
void read_input(double* array, const char* filename) {
|
|
206
|
+
int r, c;
|
|
207
|
+
char string[STRING_SIZE];
|
|
208
|
+
double value;
|
|
209
|
+
|
|
210
|
+
// Open the file
|
|
211
|
+
FILE* file_pointer = fopen(filename, "r");
|
|
212
|
+
if (!file_pointer) { printf("\n[hotspot] Error: file '%s' could not be opened for reading\n\n", filename); fflush(stdout); exit(1); }
|
|
213
|
+
|
|
214
|
+
// Process the file
|
|
215
|
+
for (r=0; r<GRID_ROWS; r++) {
|
|
216
|
+
for (c=0; c<GRID_COLS; c++) {
|
|
217
|
+
fgets(string, STRING_SIZE, file_pointer);
|
|
218
|
+
if (feof(file_pointer)) { printf("\n[hotspot] Error: not enough lines in file '%s'\n\n", filename); fflush(stdout); exit(1); }
|
|
219
|
+
if ((sscanf(string, "%lf", &value) != 1) ) { printf("\n[hotspot] Error: invalid file format for '%s'\n\n", filename); fflush(stdout); exit(1); }
|
|
220
|
+
array[r*GRID_COLS+c] = value;
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
// Clean-up and return
|
|
225
|
+
fclose(file_pointer);
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
//########################################################################
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
//
|
|
2
|
+
// This file is part of the Bones source-to-source compiler examples. This C-code
|
|
3
|
+
// demonstrates the use of Bones for an example application: 'K-means clustering',
|
|
4
|
+
// as also available in the Rodinia benchmark suite. For more information on the
|
|
5
|
+
// application or on Bones please use the contact information below.
|
|
6
|
+
//
|
|
7
|
+
// == This implementation of K-means clustering is inspired by:
|
|
8
|
+
// Author.............Roger Zhang
|
|
9
|
+
// Web address........http://cs.smu.ca/~r_zhang/code/kmeans.c
|
|
10
|
+
//
|
|
11
|
+
// == More information on Bones
|
|
12
|
+
// Contact............Cedric Nugteren <c.nugteren@tue.nl>
|
|
13
|
+
// Web address........http://parse.ele.tue.nl/bones/
|
|
14
|
+
//
|
|
15
|
+
// == File information
|
|
16
|
+
// Filename...........applications/kmeans.c
|
|
17
|
+
// Authors............Cedric Nugteren
|
|
18
|
+
// Last modified on...10-Aug-2012
|
|
19
|
+
//
|
|
20
|
+
|
|
21
|
+
//########################################################################
|
|
22
|
+
//### Includes
|
|
23
|
+
//########################################################################
|
|
24
|
+
|
|
25
|
+
#include <stdio.h>
|
|
26
|
+
#include <math.h>
|
|
27
|
+
#include <float.h>
|
|
28
|
+
|
|
29
|
+
//########################################################################
|
|
30
|
+
//### Defines
|
|
31
|
+
//########################################################################
|
|
32
|
+
|
|
33
|
+
#define SIZE 512
|
|
34
|
+
#define NUM_CLUSTERS 20
|
|
35
|
+
#define DIMENSIONS 2
|
|
36
|
+
#define THRESHOLD 0.0001
|
|
37
|
+
|
|
38
|
+
//########################################################################
|
|
39
|
+
//### Start of the main function
|
|
40
|
+
//########################################################################
|
|
41
|
+
int main(void) {
|
|
42
|
+
|
|
43
|
+
// Declare the loop iterators
|
|
44
|
+
int i,j,k;
|
|
45
|
+
|
|
46
|
+
// Declare the error variables
|
|
47
|
+
double error = DBL_MAX;
|
|
48
|
+
double old_error;
|
|
49
|
+
int iterations = 0;
|
|
50
|
+
|
|
51
|
+
// Declare the distance variables and arrays
|
|
52
|
+
double distance[1];
|
|
53
|
+
double min_distance[1];
|
|
54
|
+
double distances[SIZE];
|
|
55
|
+
|
|
56
|
+
// Initialising memory
|
|
57
|
+
printf("\n[k-means] Initialising memory"); fflush(stdout);
|
|
58
|
+
double input[SIZE][DIMENSIONS];
|
|
59
|
+
double centroids[NUM_CLUSTERS][DIMENSIONS];
|
|
60
|
+
double centroids_temp[NUM_CLUSTERS][DIMENSIONS];
|
|
61
|
+
int output[SIZE];
|
|
62
|
+
int counts[NUM_CLUSTERS];
|
|
63
|
+
|
|
64
|
+
// Set the input data
|
|
65
|
+
printf("\n[k-means] Populating memory"); fflush(stdout);
|
|
66
|
+
for (i=0; i<SIZE; i++) {
|
|
67
|
+
input[i][0] = (i/16);
|
|
68
|
+
input[i][1] = i%4;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
// Pick k initial centroids
|
|
72
|
+
printf("\n[k-means] Setting 'k' initial centroids"); fflush(stdout);
|
|
73
|
+
for (k=0; k<NUM_CLUSTERS; k++) {
|
|
74
|
+
for (j=0; j<DIMENSIONS; j++) {
|
|
75
|
+
centroids[k][j] = input[(SIZE/NUM_CLUSTERS)*k][j];
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
// Perform the k-means clustering algorithm, end when the error is not becoming smaller
|
|
80
|
+
printf("\n[k-means] Perform the clustering algorithm"); fflush(stdout);
|
|
81
|
+
do {
|
|
82
|
+
#pragma scop
|
|
83
|
+
|
|
84
|
+
// Save the error from the last step
|
|
85
|
+
old_error = error;
|
|
86
|
+
error = 0;
|
|
87
|
+
|
|
88
|
+
// Clear old counts and temporary centroids
|
|
89
|
+
for (k=0; k<NUM_CLUSTERS; k++) {
|
|
90
|
+
counts[k] = 0;
|
|
91
|
+
for (j=0; j<DIMENSIONS; j++) {
|
|
92
|
+
centroids_temp[k][j] = 0;
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
// Iterate over all data points
|
|
97
|
+
for (i=0; i<SIZE; i++) {
|
|
98
|
+
|
|
99
|
+
// Find the closest cluster
|
|
100
|
+
min_distance[0] = DBL_MAX;
|
|
101
|
+
for (k=0; k<NUM_CLUSTERS; k++) {
|
|
102
|
+
distance[0] = 0;
|
|
103
|
+
for (j=0; j<DIMENSIONS; j++) {
|
|
104
|
+
distance[0] += pow(input[i][j]-centroids[k][j],2);
|
|
105
|
+
}
|
|
106
|
+
if (distance[0] < min_distance[0]) {
|
|
107
|
+
output[i] = k;
|
|
108
|
+
min_distance[0] = distance[0];
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
// Update the size and temporary centroid of the destination cluster
|
|
113
|
+
for (j=0; j<DIMENSIONS; j++) {
|
|
114
|
+
centroids_temp[output[i]][j] += input[i][j];
|
|
115
|
+
}
|
|
116
|
+
counts[output[i]] += 1;
|
|
117
|
+
|
|
118
|
+
// Store the resulting distance
|
|
119
|
+
distances[i] = min_distance[0];
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
// Update the standard error
|
|
123
|
+
for (i=0; i<SIZE; i++) {
|
|
124
|
+
error += distances[i];
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
// Update all centroids
|
|
128
|
+
for (k=0; k<NUM_CLUSTERS; k++) {
|
|
129
|
+
for (j=0; j<DIMENSIONS; j++) {
|
|
130
|
+
if (counts[k]) {
|
|
131
|
+
centroids[k][j] = centroids_temp[k][j] / counts[k];
|
|
132
|
+
}
|
|
133
|
+
else {
|
|
134
|
+
centroids[k][j] = centroids_temp[k][j];
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
// Go to the next iteration
|
|
140
|
+
iterations += 1;
|
|
141
|
+
|
|
142
|
+
#pragma endscop
|
|
143
|
+
} while (fabs(error-old_error) > THRESHOLD);
|
|
144
|
+
|
|
145
|
+
// Print the results
|
|
146
|
+
printf("\n[k-means] Algorithm finished in %d iterations with an error of %.3lf", iterations, error); fflush(stdout);
|
|
147
|
+
printf("\n[k-means] Printing the results: \n\n"); fflush(stdout);
|
|
148
|
+
for (k=0; k<NUM_CLUSTERS; k++) {
|
|
149
|
+
printf("Cluster %2i: ", k);
|
|
150
|
+
for (i=0; i<SIZE; i++) {
|
|
151
|
+
if (output[i] == k) {
|
|
152
|
+
printf("%3i ", i);
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
printf("\n");
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
// Clean-up and exit the function
|
|
159
|
+
printf("\n[k-means] Completed\n\n"); fflush(stdout);
|
|
160
|
+
fflush(stdout);
|
|
161
|
+
return 0;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
//########################################################################
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
//
|
|
2
|
+
// This file is part of the Bones source-to-source compiler examples. This C-code
|
|
3
|
+
// demonstrates the use of Bones for an example application: 'Speckle Reducing
|
|
4
|
+
// Anisotropic Diffusion' or 'SRAD', taken from the Rodinia benchmark suite. For
|
|
5
|
+
// more information on the application or on Bones please use the contact infor-
|
|
6
|
+
// mation below.
|
|
7
|
+
//
|
|
8
|
+
// == More information on SRAD (Speckle Reducing Anisotropic Diffusion):
|
|
9
|
+
// Article............http://dx.doi.org/10.1109/TIP.2002.804276
|
|
10
|
+
// Original code......https://www.cs.virginia.edu/~skadron/wiki/rodinia/
|
|
11
|
+
//
|
|
12
|
+
// == More information on Bones
|
|
13
|
+
// Contact............Cedric Nugteren <c.nugteren@tue.nl>
|
|
14
|
+
// Web address........http://parse.ele.tue.nl/bones/
|
|
15
|
+
//
|
|
16
|
+
// == File information
|
|
17
|
+
// Filename...........applications/srad.c
|
|
18
|
+
// Authors............Cedric Nugteren
|
|
19
|
+
// Original authors...Rob Janiczek, Drew Gilliam, Lukasz Szafaryn
|
|
20
|
+
// Last modified on...10-Aug-2012
|
|
21
|
+
//
|
|
22
|
+
|
|
23
|
+
//########################################################################
|
|
24
|
+
//### Includes
|
|
25
|
+
//########################################################################
|
|
26
|
+
|
|
27
|
+
#include <stdio.h>
|
|
28
|
+
#include <stdlib.h>
|
|
29
|
+
#include <math.h>
|
|
30
|
+
|
|
31
|
+
//########################################################################
|
|
32
|
+
//### Defines
|
|
33
|
+
//########################################################################
|
|
34
|
+
|
|
35
|
+
#define ROWS 128 // Number of ROWS in the domain
|
|
36
|
+
#define COLS 128 // Number of COLS in the domain
|
|
37
|
+
#define R1 0 // y1 position of the speckle
|
|
38
|
+
#define R2 31 // y2 position of the speckle
|
|
39
|
+
#define C1 0 // x1 position of the speckle
|
|
40
|
+
#define C2 31 // x2 position of the speckle
|
|
41
|
+
#define LAMBDA 0.5 // Lambda value
|
|
42
|
+
#define NITER 2 // Number of iterations
|
|
43
|
+
|
|
44
|
+
//########################################################################
|
|
45
|
+
//### Start of the main function
|
|
46
|
+
//########################################################################
|
|
47
|
+
|
|
48
|
+
int main(void) {
|
|
49
|
+
|
|
50
|
+
// Declare the loop iterators
|
|
51
|
+
int i,j,iter;
|
|
52
|
+
|
|
53
|
+
// Declare domain variables
|
|
54
|
+
float mean_roi, var_roi;
|
|
55
|
+
float q0s, qs;
|
|
56
|
+
float divergence;
|
|
57
|
+
float cN, cS, cW, cE;
|
|
58
|
+
float G2, L;
|
|
59
|
+
|
|
60
|
+
// Declare other/helper variables
|
|
61
|
+
int index;
|
|
62
|
+
float temp_value;
|
|
63
|
+
float sum1, sum2;
|
|
64
|
+
float current_value;
|
|
65
|
+
float temp_a, temp_b;
|
|
66
|
+
|
|
67
|
+
// Check for valid row and column sizes
|
|
68
|
+
if ((ROWS%16 != 0 ) || (COLS%16 != 0)) {
|
|
69
|
+
printf("[srad] Error: the number of rows and columns must be multiples of 16\n");
|
|
70
|
+
fflush(stdout); exit(1);
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
// Initialising memory
|
|
74
|
+
printf("\n[srad] Initialising memory"); fflush(stdout);
|
|
75
|
+
int size = COLS*ROWS;
|
|
76
|
+
int size_roi = (R2-R1+1)*(C2-C1+1);
|
|
77
|
+
float* values = (float*) malloc(sizeof(float)*size);
|
|
78
|
+
float* coefficent = (float*) malloc(sizeof(float)*size);
|
|
79
|
+
float* dN = (float*) malloc(sizeof(float)*size);
|
|
80
|
+
float* dS = (float*) malloc(sizeof(float)*size);
|
|
81
|
+
float* dW = (float*) malloc(sizeof(float)*size);
|
|
82
|
+
float* dE = (float*) malloc(sizeof(float)*size);
|
|
83
|
+
|
|
84
|
+
// Populate the input matrix
|
|
85
|
+
printf("\n[srad] Populating the input matrix with random values"); fflush(stdout);
|
|
86
|
+
for (i=0; i<ROWS; i++) {
|
|
87
|
+
for (j=0; j<COLS; j++) {
|
|
88
|
+
temp_value = rand()/(float)RAND_MAX;
|
|
89
|
+
values[i*COLS+j] = (float)exp(temp_value);
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
// Perform the computation a given number of times
|
|
94
|
+
printf("\n[srad] Performing the computation %d times",NITER); fflush(stdout);
|
|
95
|
+
for (iter=0; iter<NITER; iter++) {
|
|
96
|
+
|
|
97
|
+
// Compute the mean, the variance and the speckle scale function (q0s) of the region of interest (ROI)
|
|
98
|
+
sum1 = 0;
|
|
99
|
+
sum2 = 0;
|
|
100
|
+
for (i=R1; i<=R2; i++) {
|
|
101
|
+
for (j=C1; j<=C2; j++) {
|
|
102
|
+
temp_value = values[i*COLS+j];
|
|
103
|
+
sum1 += temp_value;
|
|
104
|
+
sum2 += temp_value*temp_value;
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
mean_roi = sum1/size_roi;
|
|
108
|
+
var_roi = (sum2/size_roi) - mean_roi*mean_roi;
|
|
109
|
+
q0s = var_roi / (mean_roi*mean_roi);
|
|
110
|
+
|
|
111
|
+
// Iterate over the full image and compute
|
|
112
|
+
#pragma scop
|
|
113
|
+
for (i=0; i<ROWS; i++) {
|
|
114
|
+
for (j=0; j<COLS; j++) {
|
|
115
|
+
index = i*COLS+j;
|
|
116
|
+
current_value = values[index];
|
|
117
|
+
|
|
118
|
+
// Compute the directional derivates (N,S,W,E)
|
|
119
|
+
if (i==0) { dN[index] = 0; }
|
|
120
|
+
else { dN[index] = values[(i-1)*COLS + j ] - current_value; }
|
|
121
|
+
if (i==ROWS-1) { dS[index] = 0; }
|
|
122
|
+
else { dS[index] = values[(i+1)*COLS + j ] - current_value; }
|
|
123
|
+
if (j==0) { dW[index] = 0; }
|
|
124
|
+
else { dW[index] = values[i *COLS + (j-1)] - current_value; }
|
|
125
|
+
if (j==COLS-1) { dE[index] = 0; }
|
|
126
|
+
else { dE[index] = values[i *COLS + (j+1)] - current_value; }
|
|
127
|
+
|
|
128
|
+
// Compute the instantaneous coefficient of variation (qs) (equation 35)
|
|
129
|
+
G2 = (dN[index]*dN[index] + dS[index]*dS[index] + dW[index]*dW[index] + dE[index]*dE[index]) / (current_value*current_value);
|
|
130
|
+
L = (dN[index] + dS[index] + dW[index] + dE[index] ) / (current_value );
|
|
131
|
+
temp_a = (0.5*G2)-((1.0/16.0)*(L*L));
|
|
132
|
+
temp_b = 1+(0.25*L);
|
|
133
|
+
qs = temp_a/(temp_b*temp_b);
|
|
134
|
+
|
|
135
|
+
// Set the diffusion coefficent (equation 33)
|
|
136
|
+
coefficent[index] = 1.0 / (1.0+( (qs-q0s)/(q0s*(1+q0s)) ));
|
|
137
|
+
|
|
138
|
+
// Saturate the diffusion coefficent
|
|
139
|
+
if (coefficent[index] < 0) {
|
|
140
|
+
coefficent[index] = 0;
|
|
141
|
+
}
|
|
142
|
+
else if (coefficent[index] > 1) {
|
|
143
|
+
coefficent[index] = 1;
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
// Iterate over the full image again and compute the final values
|
|
149
|
+
for (i=0; i<ROWS; i++) {
|
|
150
|
+
for (j=0; j<COLS; j++) {
|
|
151
|
+
index = i*COLS+j;
|
|
152
|
+
|
|
153
|
+
// Calculate the diffusion coefficent
|
|
154
|
+
cN = coefficent[i *COLS+j ];
|
|
155
|
+
if (i==ROWS-1) { cS = 0; }
|
|
156
|
+
else { cS = coefficent[(i+1)*COLS+j ]; }
|
|
157
|
+
cW = coefficent[i *COLS+j ];
|
|
158
|
+
if (j==COLS-1) { cE = 0; }
|
|
159
|
+
else { cE = coefficent[i *COLS+(j+1)]; }
|
|
160
|
+
|
|
161
|
+
// Calculate the divergence (equation 58)
|
|
162
|
+
divergence = cN*dN[index] + cS*dS[index] + cW*dW[index] + cE*dE[index];
|
|
163
|
+
|
|
164
|
+
// Update the image accordingly (equation 61)
|
|
165
|
+
values[index] = values[index] + 0.25*LAMBDA*divergence;
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
#pragma endscop
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
// Print the values matrix
|
|
172
|
+
printf("\n[srad] Printing the output matrix:\n\n"); fflush(stdout);
|
|
173
|
+
for (i=0; i<ROWS; i++) {
|
|
174
|
+
for (j=0; j<COLS; j++) {
|
|
175
|
+
printf("%.5f ", values[i*COLS+j]);
|
|
176
|
+
}
|
|
177
|
+
printf("\n");
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
// Clean-up and exit
|
|
181
|
+
printf("\n[srad] Completed\n\n"); fflush(stdout);
|
|
182
|
+
free(values); free(coefficent);
|
|
183
|
+
free(dN); free(dS); free(dW); free(dE);
|
|
184
|
+
fflush(stdout);
|
|
185
|
+
return 0;
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
//########################################################################
|