BOAST 0.9995 → 0.9996
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/BOAST.gemspec +3 -1
 - data/lib/BOAST/Algorithm.rb +82 -189
 - data/lib/BOAST/Arithmetic.rb +16 -16
 - data/lib/BOAST/BOAST_OpenCL.rb +44 -41
 - data/lib/BOAST/CKernel.rb +471 -344
 - data/lib/BOAST/Case.rb +21 -23
 - data/lib/BOAST/ControlStructure.rb +9 -1
 - data/lib/BOAST/DataTypes.rb +70 -51
 - data/lib/BOAST/Expression.rb +22 -17
 - data/lib/BOAST/For.rb +37 -23
 - data/lib/BOAST/FuncCall.rb +11 -10
 - data/lib/BOAST/Functors.rb +29 -4
 - data/lib/BOAST/If.rb +21 -23
 - data/lib/BOAST/Index.rb +14 -13
 - data/lib/BOAST/Inspectable.rb +6 -13
 - data/lib/BOAST/Operators.rb +82 -61
 - data/lib/BOAST/Parens.rb +4 -25
 - data/lib/BOAST/Pragma.rb +7 -6
 - data/lib/BOAST/Print.rb +7 -0
 - data/lib/BOAST/Procedure.rb +46 -34
 - data/lib/BOAST/State.rb +79 -0
 - data/lib/BOAST/Transitions.rb +13 -5
 - data/lib/BOAST/Variable.rb +104 -68
 - data/lib/BOAST/While.rb +15 -17
 - data/lib/BOAST.rb +2 -0
 - metadata +4 -2
 
    
        data/lib/BOAST/CKernel.rb
    CHANGED
    
    | 
         @@ -8,7 +8,6 @@ require 'yaml' 
     | 
|
| 
       8 
8 
     | 
    
         
             
            require 'pathname'
         
     | 
| 
       9 
9 
     | 
    
         | 
| 
       10 
10 
     | 
    
         
             
            module BOAST
         
     | 
| 
       11 
     | 
    
         
            -
              @@verbose = false
         
     | 
| 
       12 
11 
     | 
    
         
             
              @@compiler_default_options = {
         
     | 
| 
       13 
12 
     | 
    
         
             
                :FC => 'gfortran',
         
     | 
| 
       14 
13 
     | 
    
         
             
                :FCFLAGS => '-O2 -Wall',
         
     | 
| 
         @@ -36,7 +35,16 @@ module BOAST 
     | 
|
| 
       36 
35 
     | 
    
         
             
                "icpc" => "-openmp"
         
     | 
| 
       37 
36 
     | 
    
         
             
              }
         
     | 
| 
       38 
37 
     | 
    
         | 
| 
       39 
     | 
    
         
            -
               
     | 
| 
      
 38 
     | 
    
         
            +
              module PrivateStateAccessor
         
     | 
| 
      
 39 
     | 
    
         
            +
                private_boolean_state_accessor :verbose
         
     | 
| 
      
 40 
     | 
    
         
            +
              end
         
     | 
| 
      
 41 
     | 
    
         
            +
             
     | 
| 
      
 42 
     | 
    
         
            +
              boolean_state_accessor :verbose
         
     | 
| 
      
 43 
     | 
    
         
            +
              @@verbose = false
         
     | 
| 
      
 44 
     | 
    
         
            +
             
     | 
| 
      
 45 
     | 
    
         
            +
              module_function
         
     | 
| 
      
 46 
     | 
    
         
            +
             
     | 
| 
      
 47 
     | 
    
         
            +
              def read_boast_config
         
     | 
| 
       40 
48 
     | 
    
         
             
                home_config_dir = ENV["XDG_CONFIG_HOME"]
         
     | 
| 
       41 
49 
     | 
    
         
             
                home_config_dir = "#{Dir.home}/.config" if not home_config_dir
         
     | 
| 
       42 
50 
     | 
    
         
             
                Dir.mkdir( home_config_dir ) if not File::exist?( home_config_dir )
         
     | 
| 
         @@ -70,36 +78,22 @@ module BOAST 
     | 
|
| 
       70 
78 
     | 
    
         
             
                @@verbose = ENV["VERBOSE"] if ENV["VERBOSE"]
         
     | 
| 
       71 
79 
     | 
    
         
             
              end
         
     | 
| 
       72 
80 
     | 
    
         | 
| 
       73 
     | 
    
         
            -
               
     | 
| 
      
 81 
     | 
    
         
            +
              read_boast_config
         
     | 
| 
       74 
82 
     | 
    
         | 
| 
       75 
     | 
    
         
            -
              def  
     | 
| 
      
 83 
     | 
    
         
            +
              def get_openmp_flags
         
     | 
| 
       76 
84 
     | 
    
         
             
                return @@openmp_default_flags.clone
         
     | 
| 
       77 
85 
     | 
    
         
             
              end
         
     | 
| 
       78 
86 
     | 
    
         | 
| 
       79 
     | 
    
         
            -
              def  
     | 
| 
      
 87 
     | 
    
         
            +
              def get_compiler_options
         
     | 
| 
       80 
88 
     | 
    
         
             
                return @@compiler_default_options.clone
         
     | 
| 
       81 
89 
     | 
    
         
             
              end
         
     | 
| 
       82 
90 
     | 
    
         | 
| 
       83 
     | 
    
         
            -
              def self.verbose
         
     | 
| 
       84 
     | 
    
         
            -
                return @@verbose
         
     | 
| 
       85 
     | 
    
         
            -
              end
         
     | 
| 
       86 
     | 
    
         
            -
             
     | 
| 
       87 
     | 
    
         
            -
             
     | 
| 
       88 
     | 
    
         
            -
              def self.get_verbose
         
     | 
| 
       89 
     | 
    
         
            -
                return @@verbose
         
     | 
| 
       90 
     | 
    
         
            -
              end
         
     | 
| 
       91 
     | 
    
         
            -
             
     | 
| 
       92 
     | 
    
         
            -
              def self.verbose=(verbose)
         
     | 
| 
       93 
     | 
    
         
            -
                @@verbose = verbose
         
     | 
| 
       94 
     | 
    
         
            -
              end
         
     | 
| 
       95 
     | 
    
         
            -
             
     | 
| 
       96 
     | 
    
         
            -
              def self.set_verbose(verbose)
         
     | 
| 
       97 
     | 
    
         
            -
                @@verbose = verbose
         
     | 
| 
       98 
     | 
    
         
            -
              end
         
     | 
| 
       99 
     | 
    
         
            -
             
     | 
| 
       100 
91 
     | 
    
         
             
              class CKernel
         
     | 
| 
       101 
92 
     | 
    
         
             
                include Rake::DSL
         
     | 
| 
       102 
     | 
    
         
            -
                include  
     | 
| 
      
 93 
     | 
    
         
            +
                include Inspectable
         
     | 
| 
      
 94 
     | 
    
         
            +
                include PrivateStateAccessor
         
     | 
| 
      
 95 
     | 
    
         
            +
                include TypeTransition
         
     | 
| 
      
 96 
     | 
    
         
            +
             
     | 
| 
       103 
97 
     | 
    
         
             
                attr_accessor :code
         
     | 
| 
       104 
98 
     | 
    
         
             
                attr_accessor :procedure
         
     | 
| 
       105 
99 
     | 
    
         
             
                attr_accessor :lang
         
     | 
| 
         @@ -110,13 +104,13 @@ module BOAST 
     | 
|
| 
       110 
104 
     | 
    
         
             
                def initialize(options={})
         
     | 
| 
       111 
105 
     | 
    
         
             
                  if options[:code] then
         
     | 
| 
       112 
106 
     | 
    
         
             
                    @code = options[:code]
         
     | 
| 
       113 
     | 
    
         
            -
                  elsif  
     | 
| 
       114 
     | 
    
         
            -
                    @code =  
     | 
| 
      
 107 
     | 
    
         
            +
                  elsif get_chain_code
         
     | 
| 
      
 108 
     | 
    
         
            +
                    @code = get_output
         
     | 
| 
       115 
109 
     | 
    
         
             
                    @code.seek(0,SEEK_END)
         
     | 
| 
       116 
110 
     | 
    
         
             
                  else
         
     | 
| 
       117 
111 
     | 
    
         
             
                    @code = StringIO::new
         
     | 
| 
       118 
112 
     | 
    
         
             
                  end
         
     | 
| 
       119 
     | 
    
         
            -
                   
     | 
| 
      
 113 
     | 
    
         
            +
                  set_output(@code)
         
     | 
| 
       120 
114 
     | 
    
         
             
                  if options[:kernels] then
         
     | 
| 
       121 
115 
     | 
    
         
             
                    @kernels = options[:kernels]
         
     | 
| 
       122 
116 
     | 
    
         
             
                  else
         
     | 
| 
         @@ -125,7 +119,7 @@ module BOAST 
     | 
|
| 
       125 
119 
     | 
    
         
             
                  if options[:lang] then
         
     | 
| 
       126 
120 
     | 
    
         
             
                    @lang = options[:lang]
         
     | 
| 
       127 
121 
     | 
    
         
             
                  else
         
     | 
| 
       128 
     | 
    
         
            -
                    @lang =  
     | 
| 
      
 122 
     | 
    
         
            +
                    @lang = get_lang
         
     | 
| 
       129 
123 
     | 
    
         
             
                  end
         
     | 
| 
       130 
124 
     | 
    
         
             
                end
         
     | 
| 
       131 
125 
     | 
    
         | 
| 
         @@ -139,29 +133,27 @@ module BOAST 
     | 
|
| 
       139 
133 
     | 
    
         
             
                  return code.read
         
     | 
| 
       140 
134 
     | 
    
         
             
                end
         
     | 
| 
       141 
135 
     | 
    
         | 
| 
       142 
     | 
    
         
            -
                def setup_compiler(options = {})
         
     | 
| 
       143 
     | 
    
         
            -
                  Rake::Task::clear
         
     | 
| 
       144 
     | 
    
         
            -
                  verbose = options[:verbose]
         
     | 
| 
       145 
     | 
    
         
            -
                  verbose = BOAST::get_verbose if not verbose
         
     | 
| 
       146 
     | 
    
         
            -
                  Rake::verbose(verbose)
         
     | 
| 
       147 
     | 
    
         
            -
                  Rake::FileUtilsExt.verbose_flag=verbose
         
     | 
| 
       148 
     | 
    
         
            -
                  f_compiler = options[:FC]
         
     | 
| 
       149 
     | 
    
         
            -
                  c_compiler = options[:CC]
         
     | 
| 
       150 
     | 
    
         
            -
                  cxx_compiler = options[:CXX]
         
     | 
| 
       151 
     | 
    
         
            -
                  cuda_compiler = options[:NVCC]
         
     | 
| 
       152 
     | 
    
         
            -
                  f_flags = options[:FCFLAGS]
         
     | 
| 
       153 
     | 
    
         
            -
                  f_flags += " -fPIC"
         
     | 
| 
       154 
     | 
    
         
            -
                  f_flags += " -fno-second-underscore" if f_compiler == 'g95'
         
     | 
| 
       155 
     | 
    
         
            -
                  ld_flags = options[:LDFLAGS]
         
     | 
| 
       156 
     | 
    
         
            -
                  cuda_flags = options[:NVCCFLAGS]
         
     | 
| 
       157 
     | 
    
         
            -
                  cuda_flags += " --compiler-options '-fPIC'"
         
     | 
| 
       158 
136 
     | 
    
         | 
| 
      
 137 
     | 
    
         
            +
                def get_openmp_flags(compiler)
         
     | 
| 
      
 138 
     | 
    
         
            +
                  openmp_flags = BOAST::get_openmp_flags[compiler]
         
     | 
| 
      
 139 
     | 
    
         
            +
                  if not openmp_flags then
         
     | 
| 
      
 140 
     | 
    
         
            +
                    keys = BOAST::get_openmp_flags.keys
         
     | 
| 
      
 141 
     | 
    
         
            +
                    keys.each { |k|
         
     | 
| 
      
 142 
     | 
    
         
            +
                      openmp_flags = BOAST::get_openmp_flags[k] if compiler.match(k)
         
     | 
| 
      
 143 
     | 
    
         
            +
                    }
         
     | 
| 
      
 144 
     | 
    
         
            +
                  end
         
     | 
| 
      
 145 
     | 
    
         
            +
                  return openmp_flags
         
     | 
| 
      
 146 
     | 
    
         
            +
                end
         
     | 
| 
       159 
147 
     | 
    
         | 
| 
      
 148 
     | 
    
         
            +
                def get_includes(narray_path)
         
     | 
| 
       160 
149 
     | 
    
         
             
                  includes = "-I#{RbConfig::CONFIG["archdir"]}"
         
     | 
| 
       161 
150 
     | 
    
         
             
                  includes += " -I#{RbConfig::CONFIG["rubyhdrdir"]} -I#{RbConfig::CONFIG["rubyhdrdir"]}/#{RbConfig::CONFIG["arch"]}"
         
     | 
| 
       162 
151 
     | 
    
         
             
                  includes += " -I#{RbConfig::CONFIG["rubyarchhdrdir"]}" if RbConfig::CONFIG["rubyarchhdrdir"]
         
     | 
| 
       163 
     | 
    
         
            -
                   
     | 
| 
       164 
     | 
    
         
            -
                   
     | 
| 
      
 152 
     | 
    
         
            +
                  includes += " -I#{narray_path}" if narray_path
         
     | 
| 
      
 153 
     | 
    
         
            +
                  return includes
         
     | 
| 
      
 154 
     | 
    
         
            +
                end
         
     | 
| 
      
 155 
     | 
    
         
            +
             
     | 
| 
      
 156 
     | 
    
         
            +
                def get_narray_path
         
     | 
| 
       165 
157 
     | 
    
         
             
                  narray_path = nil
         
     | 
| 
       166 
158 
     | 
    
         
             
                  begin
         
     | 
| 
       167 
159 
     | 
    
         
             
                    spec = Gem::Specification::find_by_name('narray')
         
     | 
| 
         @@ -174,49 +166,96 @@ module BOAST 
     | 
|
| 
       174 
166 
     | 
    
         
             
                      narray_path = Gem.loaded_specs['narray'].full_gem_path
         
     | 
| 
       175 
167 
     | 
    
         
             
                    end
         
     | 
| 
       176 
168 
     | 
    
         
             
                  end
         
     | 
| 
       177 
     | 
    
         
            -
             
     | 
| 
      
 169 
     | 
    
         
            +
                end
         
     | 
| 
      
 170 
     | 
    
         
            +
             
     | 
| 
      
 171 
     | 
    
         
            +
                def setup_c_compiler(options, includes, narray_path, runner)
         
     | 
| 
      
 172 
     | 
    
         
            +
                  c_compiler = options[:CC]
         
     | 
| 
       178 
173 
     | 
    
         
             
                  cflags = options[:CFLAGS]
         
     | 
| 
       179 
     | 
    
         
            -
                  cxxflags = options[:CXXFLAGS]
         
     | 
| 
       180 
174 
     | 
    
         
             
                  cflags += " -fPIC #{includes}"
         
     | 
| 
       181 
     | 
    
         
            -
                  cxxflags += " -fPIC #{includes}"
         
     | 
| 
       182 
175 
     | 
    
         
             
                  cflags += " -DHAVE_NARRAY_H" if narray_path
         
     | 
| 
       183 
     | 
    
         
            -
                   
     | 
| 
       184 
     | 
    
         
            -
             
     | 
| 
      
 176 
     | 
    
         
            +
                  if options[:openmp] and @lang == C then
         
     | 
| 
      
 177 
     | 
    
         
            +
                      openmp_cflags = get_openmp_flags(c_compiler)
         
     | 
| 
      
 178 
     | 
    
         
            +
                      raise "unkwown openmp flags for: #{c_compiler}" if not openmp_cflags
         
     | 
| 
      
 179 
     | 
    
         
            +
                      cflags += " #{openmp_cflags}"
         
     | 
| 
      
 180 
     | 
    
         
            +
                  end
         
     | 
| 
       185 
181 
     | 
    
         | 
| 
      
 182 
     | 
    
         
            +
                  rule '.o' => '.c' do |t|
         
     | 
| 
      
 183 
     | 
    
         
            +
                    c_call_string = "#{c_compiler} #{cflags} -c -o #{t.name} #{t.source}"
         
     | 
| 
      
 184 
     | 
    
         
            +
                    runner.call(t, c_call_string)
         
     | 
| 
      
 185 
     | 
    
         
            +
                  end
         
     | 
| 
      
 186 
     | 
    
         
            +
                end
         
     | 
| 
      
 187 
     | 
    
         
            +
             
     | 
| 
      
 188 
     | 
    
         
            +
                def setup_cxx_compiler(options, includes, runner)
         
     | 
| 
      
 189 
     | 
    
         
            +
                  cxx_compiler = options[:CXX]
         
     | 
| 
      
 190 
     | 
    
         
            +
                  cxxflags = options[:CXXFLAGS]
         
     | 
| 
      
 191 
     | 
    
         
            +
                  cxxflags += " -fPIC #{includes}"
         
     | 
| 
      
 192 
     | 
    
         
            +
                  if options[:openmp] and @lang == C then
         
     | 
| 
      
 193 
     | 
    
         
            +
                      openmp_cxxflags = get_openmp_flags(cxx_compiler)
         
     | 
| 
      
 194 
     | 
    
         
            +
                      raise "unkwown openmp flags for: #{cxx_compiler}" if not openmp_cxxflags
         
     | 
| 
      
 195 
     | 
    
         
            +
                      cxxflags += " #{openmp_cxxflags}"
         
     | 
| 
      
 196 
     | 
    
         
            +
                  end
         
     | 
| 
      
 197 
     | 
    
         
            +
             
     | 
| 
      
 198 
     | 
    
         
            +
                  rule '.o' => '.cpp' do |t|
         
     | 
| 
      
 199 
     | 
    
         
            +
                    cxx_call_string = "#{cxx_compiler} #{cxxflags} -c -o #{t.name} #{t.source}"
         
     | 
| 
      
 200 
     | 
    
         
            +
                    runner.call(t, cxx_call_string)
         
     | 
| 
      
 201 
     | 
    
         
            +
                  end
         
     | 
| 
      
 202 
     | 
    
         
            +
                end
         
     | 
| 
      
 203 
     | 
    
         
            +
             
     | 
| 
      
 204 
     | 
    
         
            +
                def setup_fortran_compiler(options, runner)
         
     | 
| 
      
 205 
     | 
    
         
            +
                  f_compiler = options[:FC]
         
     | 
| 
      
 206 
     | 
    
         
            +
                  fcflags = options[:FCFLAGS]
         
     | 
| 
      
 207 
     | 
    
         
            +
                  fcflags += " -fPIC"
         
     | 
| 
      
 208 
     | 
    
         
            +
                  fcflags += " -fno-second-underscore" if f_compiler == 'g95'
         
     | 
| 
      
 209 
     | 
    
         
            +
                  if options[:openmp] and @lang == FORTRAN then
         
     | 
| 
      
 210 
     | 
    
         
            +
                      openmp_fcflags = get_openmp_flags(f_compiler)
         
     | 
| 
      
 211 
     | 
    
         
            +
                      raise "unkwown openmp flags for: #{f_compiler}" if not openmp_fcflags
         
     | 
| 
      
 212 
     | 
    
         
            +
                      fcflags += " #{openmp_fcflags}"
         
     | 
| 
      
 213 
     | 
    
         
            +
                  end
         
     | 
| 
      
 214 
     | 
    
         
            +
             
     | 
| 
      
 215 
     | 
    
         
            +
                  rule '.o' => '.f90' do |t|
         
     | 
| 
      
 216 
     | 
    
         
            +
                    f_call_string = "#{f_compiler} #{fcflags} -c -o #{t.name} #{t.source}"
         
     | 
| 
      
 217 
     | 
    
         
            +
                    runner.call(t, f_call_string)
         
     | 
| 
      
 218 
     | 
    
         
            +
                  end
         
     | 
| 
      
 219 
     | 
    
         
            +
                end
         
     | 
| 
      
 220 
     | 
    
         
            +
             
     | 
| 
      
 221 
     | 
    
         
            +
                def setup_cuda_compiler(options, runner)
         
     | 
| 
      
 222 
     | 
    
         
            +
                  cuda_compiler = options[:NVCC]
         
     | 
| 
      
 223 
     | 
    
         
            +
                  cudaflags = options[:NVCCFLAGS]
         
     | 
| 
      
 224 
     | 
    
         
            +
                  cudaflags += " --compiler-options '-fPIC'"
         
     | 
| 
      
 225 
     | 
    
         
            +
             
     | 
| 
      
 226 
     | 
    
         
            +
                  rule '.o' => '.cu' do |t|
         
     | 
| 
      
 227 
     | 
    
         
            +
                    cuda_call_string = "#{cuda_compiler} #{cudaflags} -c -o #{t.name} #{t.source}"
         
     | 
| 
      
 228 
     | 
    
         
            +
                    runner.call(t, cuda_call_string)
         
     | 
| 
      
 229 
     | 
    
         
            +
                  end
         
     | 
| 
      
 230 
     | 
    
         
            +
                end
         
     | 
| 
      
 231 
     | 
    
         
            +
             
     | 
| 
      
 232 
     | 
    
         
            +
                def setup_linker(options)
         
     | 
| 
      
 233 
     | 
    
         
            +
                  ldflags = options[:LDFLAGS]
         
     | 
| 
      
 234 
     | 
    
         
            +
                  ldflags += " -L#{RbConfig::CONFIG["libdir"]} #{RbConfig::CONFIG["LIBRUBYARG"]} -lrt"
         
     | 
| 
      
 235 
     | 
    
         
            +
                  ldflags += " -lcudart" if @lang == CUDA
         
     | 
| 
      
 236 
     | 
    
         
            +
                  c_compiler = options[:CC]
         
     | 
| 
      
 237 
     | 
    
         
            +
                  c_compiler = "cc" if not c_compiler
         
     | 
| 
      
 238 
     | 
    
         
            +
                  linker = options[:LD]
         
     | 
| 
      
 239 
     | 
    
         
            +
                  linker = c_compiler if not linker
         
     | 
| 
       186 
240 
     | 
    
         
             
                  if options[:openmp] then
         
     | 
| 
       187 
     | 
    
         
            -
                     
     | 
| 
       188 
     | 
    
         
            -
                     
     | 
| 
       189 
     | 
    
         
            -
             
     | 
| 
       190 
     | 
    
         
            -
                      if not openmp_c_flags then
         
     | 
| 
       191 
     | 
    
         
            -
                        keys = BOAST::get_openmp_flags.keys
         
     | 
| 
       192 
     | 
    
         
            -
                        keys.each { |k|
         
     | 
| 
       193 
     | 
    
         
            -
                          openmp_c_flags = BOAST::get_openmp_flags[k] if c_compiler.match(k)
         
     | 
| 
       194 
     | 
    
         
            -
                        }
         
     | 
| 
       195 
     | 
    
         
            -
                      end
         
     | 
| 
       196 
     | 
    
         
            -
                      raise "unkwown openmp flags for: #{c_compiler}" if not openmp_c_flags
         
     | 
| 
       197 
     | 
    
         
            -
                      cflags += " #{openmp_c_flags}"
         
     | 
| 
       198 
     | 
    
         
            -
                      openmp_cxx_flags = BOAST::get_openmp_flags[cxx_compiler]
         
     | 
| 
       199 
     | 
    
         
            -
                      if not openmp_cxx_flags then
         
     | 
| 
       200 
     | 
    
         
            -
                        keys = BOAST::get_openmp_flags.keys
         
     | 
| 
       201 
     | 
    
         
            -
                        keys.each { |k|
         
     | 
| 
       202 
     | 
    
         
            -
                          openmp_cxx_flags = BOAST::get_openmp_flags[k] if cxx_compiler.match(k)
         
     | 
| 
       203 
     | 
    
         
            -
                        }
         
     | 
| 
       204 
     | 
    
         
            -
                      end
         
     | 
| 
       205 
     | 
    
         
            -
                      raise "unkwown openmp flags for: #{cxx_compiler}" if not openmp_cxx_flags
         
     | 
| 
       206 
     | 
    
         
            -
                      cxxflags += " #{openmp_cxx_flags}"
         
     | 
| 
       207 
     | 
    
         
            -
                    when BOAST::FORTRAN
         
     | 
| 
       208 
     | 
    
         
            -
                      openmp_f_flags = BOAST::get_openmp_flags[f_compiler]
         
     | 
| 
       209 
     | 
    
         
            -
                      if not openmp_f_flags then
         
     | 
| 
       210 
     | 
    
         
            -
                        keys = BOAST::get_openmp_flags.keys
         
     | 
| 
       211 
     | 
    
         
            -
                        keys.each { |k|
         
     | 
| 
       212 
     | 
    
         
            -
                          openmp_f_flags = BOAST::get_openmp_flags[k] if f_compiler.match(k)
         
     | 
| 
       213 
     | 
    
         
            -
                        }
         
     | 
| 
       214 
     | 
    
         
            -
                      end
         
     | 
| 
       215 
     | 
    
         
            -
                      raise "unkwown openmp flags for: #{f_compiler}" if not openmp_f_flags
         
     | 
| 
       216 
     | 
    
         
            -
                      fcflags += " #{openmp_f_flags}"
         
     | 
| 
       217 
     | 
    
         
            -
                    end
         
     | 
| 
      
 241 
     | 
    
         
            +
                    openmp_ldflags = get_openmp_flags(linker)
         
     | 
| 
      
 242 
     | 
    
         
            +
                    raise "unkwown openmp flags for: #{linker}" if not openmp_ldflags
         
     | 
| 
      
 243 
     | 
    
         
            +
                    ldflags += " #{openmp_ldflags}"
         
     | 
| 
       218 
244 
     | 
    
         
             
                  end
         
     | 
| 
       219 
245 
     | 
    
         | 
| 
      
 246 
     | 
    
         
            +
                  return [linker, ldflags]
         
     | 
| 
      
 247 
     | 
    
         
            +
                end
         
     | 
| 
      
 248 
     | 
    
         
            +
             
     | 
| 
      
 249 
     | 
    
         
            +
                def setup_compilers(options = {})
         
     | 
| 
      
 250 
     | 
    
         
            +
                  Rake::Task::clear
         
     | 
| 
      
 251 
     | 
    
         
            +
                  verbose = options[:verbose]
         
     | 
| 
      
 252 
     | 
    
         
            +
                  verbose = get_verbose if not verbose
         
     | 
| 
      
 253 
     | 
    
         
            +
                  Rake::verbose(verbose)
         
     | 
| 
      
 254 
     | 
    
         
            +
                  Rake::FileUtilsExt.verbose_flag=verbose
         
     | 
| 
      
 255 
     | 
    
         
            +
             
     | 
| 
      
 256 
     | 
    
         
            +
                  narray_path = get_narray_path
         
     | 
| 
      
 257 
     | 
    
         
            +
                  includes = get_includes(narray_path)
         
     | 
| 
      
 258 
     | 
    
         
            +
             
     | 
| 
       220 
259 
     | 
    
         
             
                  runner = lambda { |t, call_string|
         
     | 
| 
       221 
260 
     | 
    
         
             
                    if verbose then
         
     | 
| 
       222 
261 
     | 
    
         
             
                      sh call_string
         
     | 
| 
         @@ -230,26 +269,13 @@ module BOAST 
     | 
|
| 
       230 
269 
     | 
    
         
             
                    end
         
     | 
| 
       231 
270 
     | 
    
         
             
                  }
         
     | 
| 
       232 
271 
     | 
    
         | 
| 
       233 
     | 
    
         
            -
                   
     | 
| 
       234 
     | 
    
         
            -
             
     | 
| 
       235 
     | 
    
         
            -
             
     | 
| 
       236 
     | 
    
         
            -
                   
     | 
| 
      
 272 
     | 
    
         
            +
                  setup_c_compiler(options, includes, narray_path, runner)
         
     | 
| 
      
 273 
     | 
    
         
            +
                  setup_cxx_compiler(options, includes, runner)
         
     | 
| 
      
 274 
     | 
    
         
            +
                  setup_fortran_compiler(options, runner)
         
     | 
| 
      
 275 
     | 
    
         
            +
                  setup_cuda_compiler(options, runner)
         
     | 
| 
       237 
276 
     | 
    
         | 
| 
       238 
     | 
    
         
            -
                   
     | 
| 
       239 
     | 
    
         
            -
                    f_call_string = "#{f_compiler} #{fcflags} -c -o #{t.name} #{t.source}"
         
     | 
| 
       240 
     | 
    
         
            -
                    runner.call(t, f_call_string)
         
     | 
| 
       241 
     | 
    
         
            -
                  end
         
     | 
| 
       242 
     | 
    
         
            -
             
     | 
| 
       243 
     | 
    
         
            -
                  rule '.o' => '.cpp' do |t|
         
     | 
| 
       244 
     | 
    
         
            -
                    cxx_call_string = "#{cxx_compiler} #{cxxflags} -c -o #{t.name} #{t.source}"
         
     | 
| 
       245 
     | 
    
         
            -
                    runner.call(t, cxx_call_string)
         
     | 
| 
       246 
     | 
    
         
            -
                  end
         
     | 
| 
      
 277 
     | 
    
         
            +
                  return setup_linker(options)
         
     | 
| 
       247 
278 
     | 
    
         | 
| 
       248 
     | 
    
         
            -
                  rule '.o' => '.cu' do |t|
         
     | 
| 
       249 
     | 
    
         
            -
                    cuda_call_string = "#{cuda_compiler} #{cudaflags} -c -o #{t.name} #{t.source}"
         
     | 
| 
       250 
     | 
    
         
            -
                    runner.call(t, cuda_call_string)
         
     | 
| 
       251 
     | 
    
         
            -
                  end
         
     | 
| 
       252 
     | 
    
         
            -
                  return ld_flags
         
     | 
| 
       253 
279 
     | 
    
         
             
                end
         
     | 
| 
       254 
280 
     | 
    
         | 
| 
       255 
281 
     | 
    
         
             
                def select_cl_platform(options)
         
     | 
| 
         @@ -287,8 +313,32 @@ module BOAST 
     | 
|
| 
       287 
313 
     | 
    
         
             
                  return devices.first
         
     | 
| 
       288 
314 
     | 
    
         
             
                end
         
     | 
| 
       289 
315 
     | 
    
         | 
| 
       290 
     | 
    
         
            -
                def  
     | 
| 
      
 316 
     | 
    
         
            +
                def init_opencl_types
         
     | 
| 
      
 317 
     | 
    
         
            +
                  @@opencl_real_types = {
         
     | 
| 
      
 318 
     | 
    
         
            +
                    2 => OpenCL::Half,
         
     | 
| 
      
 319 
     | 
    
         
            +
                    4 => OpenCL::Float,
         
     | 
| 
      
 320 
     | 
    
         
            +
                    8 => OpenCL::Double
         
     | 
| 
      
 321 
     | 
    
         
            +
                  }
         
     | 
| 
      
 322 
     | 
    
         
            +
             
     | 
| 
      
 323 
     | 
    
         
            +
                  @@opencl_int_types = {
         
     | 
| 
      
 324 
     | 
    
         
            +
                    true => {
         
     | 
| 
      
 325 
     | 
    
         
            +
                      1 => OpenCL::Char,
         
     | 
| 
      
 326 
     | 
    
         
            +
                      2 => OpenCL::Short,
         
     | 
| 
      
 327 
     | 
    
         
            +
                      4 => OpenCL::Int,
         
     | 
| 
      
 328 
     | 
    
         
            +
                      8 => OpenCL::Long
         
     | 
| 
      
 329 
     | 
    
         
            +
                    },
         
     | 
| 
      
 330 
     | 
    
         
            +
                    false => {
         
     | 
| 
      
 331 
     | 
    
         
            +
                      1 => OpenCL::UChar,
         
     | 
| 
      
 332 
     | 
    
         
            +
                      2 => OpenCL::UShort,
         
     | 
| 
      
 333 
     | 
    
         
            +
                      4 => OpenCL::UInt,
         
     | 
| 
      
 334 
     | 
    
         
            +
                      8 => OpenCL::ULong
         
     | 
| 
      
 335 
     | 
    
         
            +
                    }
         
     | 
| 
      
 336 
     | 
    
         
            +
                  }
         
     | 
| 
      
 337 
     | 
    
         
            +
                end
         
     | 
| 
      
 338 
     | 
    
         
            +
             
     | 
| 
      
 339 
     | 
    
         
            +
                def init_opencl(options)
         
     | 
| 
       291 
340 
     | 
    
         
             
                  require 'opencl_ruby_ffi'
         
     | 
| 
      
 341 
     | 
    
         
            +
                  init_opencl_types
         
     | 
| 
       292 
342 
     | 
    
         
             
                  device = select_cl_device(options)
         
     | 
| 
       293 
343 
     | 
    
         
             
                  @context = OpenCL::create_context([device])
         
     | 
| 
       294 
344 
     | 
    
         
             
                  program = @context.create_program_with_source([@code.string])
         
     | 
| 
         @@ -299,81 +349,84 @@ module BOAST 
     | 
|
| 
       299 
349 
     | 
    
         
             
                    puts e.to_s
         
     | 
| 
       300 
350 
     | 
    
         
             
                    puts program.build_status
         
     | 
| 
       301 
351 
     | 
    
         
             
                    puts program.build_log
         
     | 
| 
       302 
     | 
    
         
            -
                    if options[:verbose] or  
     | 
| 
      
 352 
     | 
    
         
            +
                    if options[:verbose] or get_verbose then
         
     | 
| 
       303 
353 
     | 
    
         
             
                      puts @code.string
         
     | 
| 
       304 
354 
     | 
    
         
             
                    end
         
     | 
| 
       305 
355 
     | 
    
         
             
                    raise "OpenCL Failed to build #{@procedure.name}"
         
     | 
| 
       306 
356 
     | 
    
         
             
                  end
         
     | 
| 
       307 
     | 
    
         
            -
                  if options[:verbose] or  
     | 
| 
      
 357 
     | 
    
         
            +
                  if options[:verbose] or get_verbose then
         
     | 
| 
       308 
358 
     | 
    
         
             
                    program.build_log.each {|dev,log|
         
     | 
| 
       309 
359 
     | 
    
         
             
                      puts "#{device.name}: #{log}"
         
     | 
| 
       310 
360 
     | 
    
         
             
                    }
         
     | 
| 
       311 
361 
     | 
    
         
             
                  end
         
     | 
| 
       312 
362 
     | 
    
         
             
                  @queue = @context.create_command_queue(device, :properties => OpenCL::CommandQueue::PROFILING_ENABLE)
         
     | 
| 
       313 
363 
     | 
    
         
             
                  @kernel = program.create_kernel(@procedure.name)
         
     | 
| 
       314 
     | 
    
         
            -
                   
     | 
| 
       315 
     | 
    
         
            -
             
     | 
| 
       316 
     | 
    
         
            -
             
     | 
| 
       317 
     | 
    
         
            -
             
     | 
| 
       318 
     | 
    
         
            -
             
     | 
| 
       319 
     | 
    
         
            -
              opts = args.pop if args.length == #{@procedure.parameters.length+1}
         
     | 
| 
       320 
     | 
    
         
            -
              @procedure.parameters.each_index { |i|
         
     | 
| 
       321 
     | 
    
         
            -
                if @procedure.parameters[i].dimension then
         
     | 
| 
       322 
     | 
    
         
            -
                  if @procedure.parameters[i].direction == :in then
         
     | 
| 
      
 364 
     | 
    
         
            +
                  return self
         
     | 
| 
      
 365 
     | 
    
         
            +
                end
         
     | 
| 
      
 366 
     | 
    
         
            +
             
     | 
| 
      
 367 
     | 
    
         
            +
                def create_opencl_array(arg, parameter)
         
     | 
| 
      
 368 
     | 
    
         
            +
                  if parameter.direction == :in then
         
     | 
| 
       323 
369 
     | 
    
         
             
                    flags = OpenCL::Mem::Flags::READ_ONLY
         
     | 
| 
       324 
     | 
    
         
            -
                  elsif  
     | 
| 
      
 370 
     | 
    
         
            +
                  elsif parameter.direction == :out then
         
     | 
| 
       325 
371 
     | 
    
         
             
                    flags = OpenCL::Mem::Flags::WRITE_ONLY
         
     | 
| 
       326 
372 
     | 
    
         
             
                  else
         
     | 
| 
       327 
373 
     | 
    
         
             
                    flags = OpenCL::Mem::Flags::READ_WRITE
         
     | 
| 
       328 
374 
     | 
    
         
             
                  end
         
     | 
| 
       329 
     | 
    
         
            -
                  if  
     | 
| 
       330 
     | 
    
         
            -
                     
     | 
| 
       331 
     | 
    
         
            -
             
     | 
| 
       332 
     | 
    
         
            -
                      @queue.enqueue_write_image( params[i], args[i], :blocking => true )
         
     | 
| 
       333 
     | 
    
         
            -
            #        end
         
     | 
| 
      
 375 
     | 
    
         
            +
                  if parameter.texture then
         
     | 
| 
      
 376 
     | 
    
         
            +
                    param = @context.create_image_2D( OpenCL::ImageFormat::new( OpenCL::ChannelOrder::R, OpenCL::ChannelType::UNORM_INT8 ), arg.size * arg.element_size, 1, :flags => flags )
         
     | 
| 
      
 377 
     | 
    
         
            +
                    @queue.enqueue_write_image( param, arg, :blocking => true )
         
     | 
| 
       334 
378 
     | 
    
         
             
                  else
         
     | 
| 
       335 
     | 
    
         
            -
                     
     | 
| 
       336 
     | 
    
         
            -
             
     | 
| 
       337 
     | 
    
         
            -
                      @queue.enqueue_write_buffer( params[i], args[i], :blocking => true )
         
     | 
| 
       338 
     | 
    
         
            -
            #        end
         
     | 
| 
      
 379 
     | 
    
         
            +
                    param = @context.create_buffer( arg.size * arg.element_size, :flags => flags )
         
     | 
| 
      
 380 
     | 
    
         
            +
                    @queue.enqueue_write_buffer( param, arg, :blocking => true )
         
     | 
| 
       339 
381 
     | 
    
         
             
                  end
         
     | 
| 
       340 
     | 
    
         
            -
             
     | 
| 
       341 
     | 
    
         
            -
             
     | 
| 
       342 
     | 
    
         
            -
             
     | 
| 
       343 
     | 
    
         
            -
             
     | 
| 
       344 
     | 
    
         
            -
             
     | 
| 
       345 
     | 
    
         
            -
             
     | 
| 
       346 
     | 
    
         
            -
             
     | 
| 
       347 
     | 
    
         
            -
             
     | 
| 
       348 
     | 
    
         
            -
                      params[i] = OpenCL::Short::new(args[i]) if @procedure.parameters[i].type.size == 2
         
     | 
| 
       349 
     | 
    
         
            -
                      params[i] = OpenCL::Int::new(args[i]) if @procedure.parameters[i].type.size == 4
         
     | 
| 
       350 
     | 
    
         
            -
                      params[i] = OpenCL::Long::new(args[i]) if @procedure.parameters[i].type.size == 8
         
     | 
| 
       351 
     | 
    
         
            -
                    else
         
     | 
| 
       352 
     | 
    
         
            -
                      params[i] = OpenCL::UChar::new(args[i]) if @procedure.parameters[i].type.size == 1
         
     | 
| 
       353 
     | 
    
         
            -
                      params[i] = OpenCL::UShort::new(args[i]) if @procedure.parameters[i].type.size == 2
         
     | 
| 
       354 
     | 
    
         
            -
                      params[i] = OpenCL::UInt::new(args[i]) if @procedure.parameters[i].type.size == 4
         
     | 
| 
       355 
     | 
    
         
            -
                      params[i] = OpenCL::ULong::new(args[i]) if @procedure.parameters[i].type.size == 8
         
     | 
| 
       356 
     | 
    
         
            -
                    end
         
     | 
| 
      
 382 
     | 
    
         
            +
                  return param
         
     | 
| 
      
 383 
     | 
    
         
            +
                end
         
     | 
| 
      
 384 
     | 
    
         
            +
             
     | 
| 
      
 385 
     | 
    
         
            +
                def create_opencl_scalar(arg, parameter)
         
     | 
| 
      
 386 
     | 
    
         
            +
                  if parameter.type.is_a?(Real) then
         
     | 
| 
      
 387 
     | 
    
         
            +
                    return @@opencl_real_types[parameter.type.size]::new(arg)
         
     | 
| 
      
 388 
     | 
    
         
            +
                  elsif parameter.type.is_a?(Int) then
         
     | 
| 
      
 389 
     | 
    
         
            +
                    return @@opencl_int_types[parameter.type.signed][parameter.type.size]::new(arg)
         
     | 
| 
       357 
390 
     | 
    
         
             
                  else
         
     | 
| 
       358 
     | 
    
         
            -
                     
     | 
| 
      
 391 
     | 
    
         
            +
                    return arg
         
     | 
| 
       359 
392 
     | 
    
         
             
                  end
         
     | 
| 
       360 
393 
     | 
    
         
             
                end
         
     | 
| 
      
 394 
     | 
    
         
            +
             
     | 
| 
      
 395 
     | 
    
         
            +
                def create_opencl_param(arg, parameter)
         
     | 
| 
      
 396 
     | 
    
         
            +
                  if parameter.dimension then
         
     | 
| 
      
 397 
     | 
    
         
            +
                    return create_opencl_array(arg, parameter)
         
     | 
| 
      
 398 
     | 
    
         
            +
                  else
         
     | 
| 
      
 399 
     | 
    
         
            +
                    return create_opencl_scalar(arg, parameter)
         
     | 
| 
      
 400 
     | 
    
         
            +
                  end
         
     | 
| 
      
 401 
     | 
    
         
            +
                end
         
     | 
| 
      
 402 
     | 
    
         
            +
             
     | 
| 
      
 403 
     | 
    
         
            +
                def read_opencl_param(param, arg, parameter)
         
     | 
| 
      
 404 
     | 
    
         
            +
                  if parameter.texture then
         
     | 
| 
      
 405 
     | 
    
         
            +
                    @queue.enqueue_read_image( param, arg, :blocking => true )
         
     | 
| 
      
 406 
     | 
    
         
            +
                  else
         
     | 
| 
      
 407 
     | 
    
         
            +
                    @queue.enqueue_read_buffer( param, arg, :blocking => true )
         
     | 
| 
      
 408 
     | 
    
         
            +
                  end
         
     | 
| 
      
 409 
     | 
    
         
            +
                end
         
     | 
| 
      
 410 
     | 
    
         
            +
             
     | 
| 
      
 411 
     | 
    
         
            +
                def build_opencl(options)
         
     | 
| 
      
 412 
     | 
    
         
            +
                  init_opencl(options)
         
     | 
| 
      
 413 
     | 
    
         
            +
             
     | 
| 
      
 414 
     | 
    
         
            +
                  run_method = <<EOF
         
     | 
| 
      
 415 
     | 
    
         
            +
            def self.run(*args)
         
     | 
| 
      
 416 
     | 
    
         
            +
              raise "Wrong number of arguments \#{args.length} for #{@procedure.parameters.length}" if args.length > #{@procedure.parameters.length+1} or args.length < #{@procedure.parameters.length}
         
     | 
| 
      
 417 
     | 
    
         
            +
              params = []
         
     | 
| 
      
 418 
     | 
    
         
            +
              opts = {}
         
     | 
| 
      
 419 
     | 
    
         
            +
              opts = args.pop if args.length == #{@procedure.parameters.length+1}
         
     | 
| 
      
 420 
     | 
    
         
            +
              @procedure.parameters.each_index { |i|
         
     | 
| 
      
 421 
     | 
    
         
            +
                params[i] = create_opencl_param( args[i], @procedure.parameters[i] )
         
     | 
| 
       361 
422 
     | 
    
         
             
              }
         
     | 
| 
       362 
423 
     | 
    
         
             
              params.each_index{ |i|
         
     | 
| 
       363 
424 
     | 
    
         
             
                @kernel.set_arg(i, params[i])
         
     | 
| 
       364 
425 
     | 
    
         
             
              }
         
     | 
| 
       365 
426 
     | 
    
         
             
              event = @queue.enqueue_NDrange_kernel(@kernel, opts[:global_work_size], :local_work_size => opts[:local_work_size])
         
     | 
| 
       366 
427 
     | 
    
         
             
              @procedure.parameters.each_index { |i|
         
     | 
| 
       367 
     | 
    
         
            -
                if @procedure.parameters[i].dimension then
         
     | 
| 
       368 
     | 
    
         
            -
                   
     | 
| 
       369 
     | 
    
         
            -
                    if @procedure.parameters[i].direction == :inout or @procedure.parameters[i].direction == :out then
         
     | 
| 
       370 
     | 
    
         
            -
                      @queue.enqueue_read_image( params[i], args[i], :blocking => true )
         
     | 
| 
       371 
     | 
    
         
            -
                    end
         
     | 
| 
       372 
     | 
    
         
            -
                  else
         
     | 
| 
       373 
     | 
    
         
            -
                    if @procedure.parameters[i].direction == :inout or @procedure.parameters[i].direction == :out then
         
     | 
| 
       374 
     | 
    
         
            -
                      @queue.enqueue_read_buffer( params[i], args[i], :blocking => true )
         
     | 
| 
       375 
     | 
    
         
            -
                    end
         
     | 
| 
       376 
     | 
    
         
            -
                  end
         
     | 
| 
      
 428 
     | 
    
         
            +
                if @procedure.parameters[i].dimension and (@procedure.parameters[i].direction == :inout or @procedure.parameters[i].direction == :out) then
         
     | 
| 
      
 429 
     | 
    
         
            +
                  read_opencl_param( params[i], args[i], @procedure.parameters[i] )
         
     | 
| 
       377 
430 
     | 
    
         
             
                end
         
     | 
| 
       378 
431 
     | 
    
         
             
              }
         
     | 
| 
       379 
432 
     | 
    
         
             
              result = {}
         
     | 
| 
         @@ -387,75 +440,90 @@ EOF 
     | 
|
| 
       387 
440 
     | 
    
         
             
                return self
         
     | 
| 
       388 
441 
     | 
    
         
             
                end
         
     | 
| 
       389 
442 
     | 
    
         | 
| 
       390 
     | 
    
         
            -
                 
     | 
| 
       391 
     | 
    
         
            -
                   
     | 
| 
       392 
     | 
    
         
            -
                   
     | 
| 
       393 
     | 
    
         
            -
                   
     | 
| 
       394 
     | 
    
         
            -
             
     | 
| 
       395 
     | 
    
         
            -
                  extension = ".c" if @lang == BOAST::C
         
     | 
| 
       396 
     | 
    
         
            -
                  extension = ".cu" if @lang == BOAST::CUDA
         
     | 
| 
       397 
     | 
    
         
            -
                  extension = ".f90" if @lang == BOAST::FORTRAN
         
     | 
| 
       398 
     | 
    
         
            -
            #temporary
         
     | 
| 
       399 
     | 
    
         
            -
                  c_compiler = compiler_options[:CC]
         
     | 
| 
       400 
     | 
    
         
            -
                  c_compiler = "cc" if not c_compiler
         
     | 
| 
       401 
     | 
    
         
            -
                  linker = compiler_options[:LD]
         
     | 
| 
       402 
     | 
    
         
            -
                  linker = c_compiler if not linker
         
     | 
| 
       403 
     | 
    
         
            -
            #end temporary
         
     | 
| 
       404 
     | 
    
         
            -
                  if options[:openmp] then
         
     | 
| 
       405 
     | 
    
         
            -
                    openmp_ld_flags = BOAST::get_openmp_flags[linker]
         
     | 
| 
       406 
     | 
    
         
            -
                      if not openmp_ld_flags then
         
     | 
| 
       407 
     | 
    
         
            -
                        keys = BOAST::get_openmp_flags.keys
         
     | 
| 
       408 
     | 
    
         
            -
                        keys.each { |k|
         
     | 
| 
       409 
     | 
    
         
            -
                          openmp_ld_flags = BOAST::get_openmp_flags[k] if linker.match(k)
         
     | 
| 
       410 
     | 
    
         
            -
                        }
         
     | 
| 
       411 
     | 
    
         
            -
                      end
         
     | 
| 
       412 
     | 
    
         
            -
                      raise "unkwown openmp flags for: #{linker}" if not openmp_ld_flags
         
     | 
| 
       413 
     | 
    
         
            -
                      ldflags += " #{openmp_ld_flags}"
         
     | 
| 
       414 
     | 
    
         
            -
                  end
         
     | 
| 
       415 
     | 
    
         
            -
                  source_file = Tempfile::new([@procedure.name,extension])
         
     | 
| 
       416 
     | 
    
         
            -
                  path = source_file.path
         
     | 
| 
       417 
     | 
    
         
            -
                  target = path.chomp(File::extname(path))+".o"
         
     | 
| 
       418 
     | 
    
         
            -
                  fill_code(source_file)
         
     | 
| 
       419 
     | 
    
         
            -
                  source_file.close
         
     | 
| 
      
 443 
     | 
    
         
            +
                @@extensions = {
         
     | 
| 
      
 444 
     | 
    
         
            +
                  C => ".c",
         
     | 
| 
      
 445 
     | 
    
         
            +
                  CUDA => ".cu",
         
     | 
| 
      
 446 
     | 
    
         
            +
                  FORTRAN => ".f90"
         
     | 
| 
      
 447 
     | 
    
         
            +
                }
         
     | 
| 
       420 
448 
     | 
    
         | 
| 
       421 
     | 
    
         
            -
             
     | 
| 
       422 
     | 
    
         
            -
                   
     | 
| 
       423 
     | 
    
         
            -
                   
     | 
| 
      
 449 
     | 
    
         
            +
                def get_sub_kernels
         
     | 
| 
      
 450 
     | 
    
         
            +
                  kernel_files = []
         
     | 
| 
      
 451 
     | 
    
         
            +
                  @kernels.each { |kernel|
         
     | 
| 
      
 452 
     | 
    
         
            +
                    kernel_file = Tempfile::new([kernel.procedure.name,".o"])
         
     | 
| 
      
 453 
     | 
    
         
            +
                    kernel.binary.rewind
         
     | 
| 
      
 454 
     | 
    
         
            +
                    kernel_file.write( kernel.binary.read )
         
     | 
| 
      
 455 
     | 
    
         
            +
                    kernel_file.close
         
     | 
| 
      
 456 
     | 
    
         
            +
                    kernel_files.push(kernel_file)
         
     | 
| 
      
 457 
     | 
    
         
            +
                  }
         
     | 
| 
      
 458 
     | 
    
         
            +
                end
         
     | 
| 
      
 459 
     | 
    
         
            +
             
     | 
| 
      
 460 
     | 
    
         
            +
                def create_module_source(path)
         
     | 
| 
      
 461 
     | 
    
         
            +
                  previous_lang = get_lang
         
     | 
| 
      
 462 
     | 
    
         
            +
                  previous_output = get_output
         
     | 
| 
      
 463 
     | 
    
         
            +
                  set_lang( C )
         
     | 
| 
       424 
464 
     | 
    
         
             
                  module_file_name = File::split(path.chomp(File::extname(path)))[0] + "/Mod_" + File::split(path.chomp(File::extname(path)))[1].gsub("-","_") + ".c"
         
     | 
| 
       425 
465 
     | 
    
         
             
                  module_name = File::split(module_file_name.chomp(File::extname(module_file_name)))[1]
         
     | 
| 
       426 
466 
     | 
    
         
             
                  module_file = File::open(module_file_name,"w+")
         
     | 
| 
       427 
     | 
    
         
            -
                   
     | 
| 
      
 467 
     | 
    
         
            +
                  set_output( module_file )
         
     | 
| 
       428 
468 
     | 
    
         
             
                  fill_module(module_file, module_name)
         
     | 
| 
       429 
469 
     | 
    
         
             
                  module_file.rewind
         
     | 
| 
       430 
470 
     | 
    
         
             
                 #puts module_file.read
         
     | 
| 
       431 
471 
     | 
    
         
             
                  module_file.close
         
     | 
| 
       432 
     | 
    
         
            -
                   
     | 
| 
       433 
     | 
    
         
            -
                   
     | 
| 
      
 472 
     | 
    
         
            +
                  set_lang( previous_lang )
         
     | 
| 
      
 473 
     | 
    
         
            +
                  set_output( previous_output )
         
     | 
| 
      
 474 
     | 
    
         
            +
                  return [module_file_name, module_name]
         
     | 
| 
      
 475 
     | 
    
         
            +
                end
         
     | 
| 
      
 476 
     | 
    
         
            +
             
     | 
| 
      
 477 
     | 
    
         
            +
                def save_binary(target)
         
     | 
| 
      
 478 
     | 
    
         
            +
                  f = File::open(target,"rb")
         
     | 
| 
      
 479 
     | 
    
         
            +
                  @binary = StringIO::new
         
     | 
| 
      
 480 
     | 
    
         
            +
                  @binary.write( f.read )
         
     | 
| 
      
 481 
     | 
    
         
            +
                  f.close
         
     | 
| 
      
 482 
     | 
    
         
            +
                end
         
     | 
| 
      
 483 
     | 
    
         
            +
             
     | 
| 
      
 484 
     | 
    
         
            +
                def create_source
         
     | 
| 
      
 485 
     | 
    
         
            +
                  extension = @@extensions[@lang]
         
     | 
| 
      
 486 
     | 
    
         
            +
                  source_file = Tempfile::new([@procedure.name,extension])
         
     | 
| 
      
 487 
     | 
    
         
            +
                  path = source_file.path
         
     | 
| 
      
 488 
     | 
    
         
            +
                  target = path.chomp(File::extname(path))+".o"
         
     | 
| 
      
 489 
     | 
    
         
            +
                  fill_code(source_file)
         
     | 
| 
      
 490 
     | 
    
         
            +
                  source_file.close
         
     | 
| 
      
 491 
     | 
    
         
            +
                  return [source_file, path, target]
         
     | 
| 
      
 492 
     | 
    
         
            +
                end
         
     | 
| 
      
 493 
     | 
    
         
            +
             
     | 
| 
      
 494 
     | 
    
         
            +
                def build(options = {})
         
     | 
| 
      
 495 
     | 
    
         
            +
                  compiler_options = BOAST::get_compiler_options
         
     | 
| 
      
 496 
     | 
    
         
            +
                  compiler_options.update(options)
         
     | 
| 
      
 497 
     | 
    
         
            +
                  return build_opencl(compiler_options) if @lang == CL
         
     | 
| 
      
 498 
     | 
    
         
            +
             
     | 
| 
      
 499 
     | 
    
         
            +
                  linker, ldflags = setup_compilers(compiler_options)
         
     | 
| 
      
 500 
     | 
    
         
            +
             
     | 
| 
      
 501 
     | 
    
         
            +
                  extension = @@extensions[@lang]
         
     | 
| 
      
 502 
     | 
    
         
            +
             
     | 
| 
      
 503 
     | 
    
         
            +
                  source_file, path, target = create_source
         
     | 
| 
      
 504 
     | 
    
         
            +
             
     | 
| 
      
 505 
     | 
    
         
            +
                  module_file_name, module_name = create_module_source(path)
         
     | 
| 
      
 506 
     | 
    
         
            +
             
     | 
| 
       434 
507 
     | 
    
         
             
                  module_target = module_file_name.chomp(File::extname(module_file_name))+".o"
         
     | 
| 
       435 
508 
     | 
    
         
             
                  module_final = module_file_name.chomp(File::extname(module_file_name))+".so"
         
     | 
| 
       436 
     | 
    
         
            -
             
     | 
| 
       437 
     | 
    
         
            -
             
     | 
| 
       438 
     | 
    
         
            -
             
     | 
| 
       439 
     | 
    
         
            -
             
     | 
| 
       440 
     | 
    
         
            -
                    kernel_file.write( kernel.binary.read )
         
     | 
| 
       441 
     | 
    
         
            -
                    kernel_file.close
         
     | 
| 
       442 
     | 
    
         
            -
                    kernel_files.push(kernel_file)
         
     | 
| 
       443 
     | 
    
         
            -
                  }
         
     | 
| 
      
 509 
     | 
    
         
            +
             
     | 
| 
      
 510 
     | 
    
         
            +
             
     | 
| 
      
 511 
     | 
    
         
            +
                  kernel_files = get_sub_kernels
         
     | 
| 
      
 512 
     | 
    
         
            +
             
     | 
| 
       444 
513 
     | 
    
         
             
                  file module_final => [module_target, target] do
         
     | 
| 
       445 
514 
     | 
    
         
             
                    #puts "#{linker} -shared -o #{module_final} #{module_target} #{target} #{kernel_files.join(" ")} -Wl,-Bsymbolic-functions -Wl,-z,relro -rdynamic -Wl,-export-dynamic #{ldflags}"
         
     | 
| 
       446 
515 
     | 
    
         
             
                    sh "#{linker} -shared -o #{module_final} #{module_target} #{target} #{(kernel_files.collect {|f| f.path}).join(" ")} -Wl,-Bsymbolic-functions -Wl,-z,relro -rdynamic -Wl,-export-dynamic #{ldflags}"
         
     | 
| 
       447 
516 
     | 
    
         
             
                  end
         
     | 
| 
       448 
517 
     | 
    
         
             
                  Rake::Task[module_final].invoke
         
     | 
| 
      
 518 
     | 
    
         
            +
             
     | 
| 
       449 
519 
     | 
    
         
             
                  require(module_final)
         
     | 
| 
       450 
520 
     | 
    
         
             
                  eval "self.extend(#{module_name})"
         
     | 
| 
       451 
     | 
    
         
            -
             
     | 
| 
       452 
     | 
    
         
            -
                   
     | 
| 
       453 
     | 
    
         
            -
             
     | 
| 
       454 
     | 
    
         
            -
                   
     | 
| 
       455 
     | 
    
         
            -
             
     | 
| 
       456 
     | 
    
         
            -
                   
     | 
| 
       457 
     | 
    
         
            -
                  File.unlink(module_file_name)
         
     | 
| 
       458 
     | 
    
         
            -
                  File.unlink(module_final)
         
     | 
| 
      
 521 
     | 
    
         
            +
             
     | 
| 
      
 522 
     | 
    
         
            +
                  save_binary(target)
         
     | 
| 
      
 523 
     | 
    
         
            +
             
     | 
| 
      
 524 
     | 
    
         
            +
                  [target, module_target, module_file_name, module_final].each { |fn|
         
     | 
| 
      
 525 
     | 
    
         
            +
                    File::unlink(fn)
         
     | 
| 
      
 526 
     | 
    
         
            +
                  }
         
     | 
| 
       459 
527 
     | 
    
         
             
                  kernel_files.each { |f|
         
     | 
| 
       460 
528 
     | 
    
         
             
                    f.unlink
         
     | 
| 
       461 
529 
     | 
    
         
             
                  }
         
     | 
| 
         @@ -464,13 +532,13 @@ EOF 
     | 
|
| 
       464 
532 
     | 
    
         | 
| 
       465 
533 
     | 
    
         
             
                def fill_code(source_file)
         
     | 
| 
       466 
534 
     | 
    
         
             
                  @code.rewind
         
     | 
| 
       467 
     | 
    
         
            -
                  source_file.puts "#include <inttypes.h>" if @lang ==  
     | 
| 
       468 
     | 
    
         
            -
                  source_file.puts "#include <cuda.h>" if @lang ==  
     | 
| 
      
 535 
     | 
    
         
            +
                  source_file.puts "#include <inttypes.h>" if @lang == C or @lang == CUDA
         
     | 
| 
      
 536 
     | 
    
         
            +
                  source_file.puts "#include <cuda.h>" if @lang == CUDA
         
     | 
| 
       469 
537 
     | 
    
         
             
                  source_file.write @code.read
         
     | 
| 
       470 
     | 
    
         
            -
                  if @lang ==  
     | 
| 
      
 538 
     | 
    
         
            +
                  if @lang == CUDA then
         
     | 
| 
       471 
539 
     | 
    
         
             
                    source_file.write <<EOF
         
     | 
| 
       472 
540 
     | 
    
         
             
            extern "C" {
         
     | 
| 
       473 
     | 
    
         
            -
              #{@procedure.boast_header_s( 
     | 
| 
      
 541 
     | 
    
         
            +
              #{@procedure.boast_header_s(CUDA)}{
         
     | 
| 
       474 
542 
     | 
    
         
             
                dim3 dimBlock(block_size[0], block_size[1], block_size[2]);
         
     | 
| 
       475 
543 
     | 
    
         
             
                dim3 dimGrid(block_number[0], block_number[1], block_number[2]);
         
     | 
| 
       476 
544 
     | 
    
         
             
                cudaEvent_t __start, __stop;
         
     | 
| 
         @@ -490,8 +558,8 @@ EOF 
     | 
|
| 
       490 
558 
     | 
    
         
             
                  @code.rewind
         
     | 
| 
       491 
559 
     | 
    
         
             
                end
         
     | 
| 
       492 
560 
     | 
    
         | 
| 
       493 
     | 
    
         
            -
                def  
     | 
| 
       494 
     | 
    
         
            -
                  module_file. 
     | 
| 
      
 561 
     | 
    
         
            +
                def module_header(module_file)
         
     | 
| 
      
 562 
     | 
    
         
            +
                  module_file.print <<EOF
         
     | 
| 
       495 
563 
     | 
    
         
             
            #include "ruby.h"
         
     | 
| 
       496 
564 
     | 
    
         
             
            #include <inttypes.h>
         
     | 
| 
       497 
565 
     | 
    
         
             
            #include <time.h>
         
     | 
| 
         @@ -499,11 +567,13 @@ EOF 
     | 
|
| 
       499 
567 
     | 
    
         
             
            #include "narray.h"
         
     | 
| 
       500 
568 
     | 
    
         
             
            #endif
         
     | 
| 
       501 
569 
     | 
    
         
             
            EOF
         
     | 
| 
       502 
     | 
    
         
            -
                  if( @lang ==  
     | 
| 
      
 570 
     | 
    
         
            +
                  if( @lang == CUDA ) then
         
     | 
| 
       503 
571 
     | 
    
         
             
                    module_file.print "#include <cuda_runtime.h>\n"
         
     | 
| 
       504 
572 
     | 
    
         
             
                  end
         
     | 
| 
       505 
     | 
    
         
            -
             
     | 
| 
       506 
     | 
    
         
            -
             
     | 
| 
      
 573 
     | 
    
         
            +
                end
         
     | 
| 
      
 574 
     | 
    
         
            +
             
     | 
| 
      
 575 
     | 
    
         
            +
                def module_preamble(module_file, module_name)
         
     | 
| 
      
 576 
     | 
    
         
            +
                  module_file.print <<EOF
         
     | 
| 
       507 
577 
     | 
    
         
             
            VALUE #{module_name} = Qnil;
         
     | 
| 
       508 
578 
     | 
    
         
             
            void Init_#{module_name}();
         
     | 
| 
       509 
579 
     | 
    
         
             
            VALUE method_run(int argc, VALUE *argv, VALUE self);
         
     | 
| 
         @@ -511,46 +581,37 @@ void Init_#{module_name}() { 
     | 
|
| 
       511 
581 
     | 
    
         
             
              #{module_name} = rb_define_module("#{module_name}");
         
     | 
| 
       512 
582 
     | 
    
         
             
              rb_define_method(#{module_name}, "run", method_run, -1);
         
     | 
| 
       513 
583 
     | 
    
         
             
            }
         
     | 
| 
       514 
     | 
    
         
            -
            VALUE method_run(int argc, VALUE *argv, VALUE self) {
         
     | 
| 
       515 
584 
     | 
    
         
             
            EOF
         
     | 
| 
       516 
     | 
    
         
            -
             
     | 
| 
       517 
     | 
    
         
            -
             
     | 
| 
      
 585 
     | 
    
         
            +
                end
         
     | 
| 
      
 586 
     | 
    
         
            +
             
     | 
| 
      
 587 
     | 
    
         
            +
                def check_args(module_file)
         
     | 
| 
      
 588 
     | 
    
         
            +
                  if @lang == CUDA then
         
     | 
| 
      
 589 
     | 
    
         
            +
                    module_file.print <<EOF
         
     | 
| 
       518 
590 
     | 
    
         
             
              if( argc < #{@procedure.parameters.length} || argc > #{@procedure.parameters.length + 1} )
         
     | 
| 
       519 
591 
     | 
    
         
             
                rb_raise(rb_eArgError, "wrong number of arguments for #{@procedure.name} (%d for #{@procedure.parameters.length})", argc);
         
     | 
| 
       520 
     | 
    
         
            -
              VALUE rb_opts;
         
     | 
| 
       521 
     | 
    
         
            -
              VALUE rb_ptr;
         
     | 
| 
       522 
     | 
    
         
            -
              size_t block_size[3] = {1,1,1};
         
     | 
| 
       523 
     | 
    
         
            -
              size_t block_number[3] = {1,1,1};
         
     | 
| 
       524 
592 
     | 
    
         
             
            EOF
         
     | 
| 
       525 
593 
     | 
    
         
             
                  else
         
     | 
| 
       526 
     | 
    
         
            -
                    module_file. 
     | 
| 
      
 594 
     | 
    
         
            +
                    module_file.print <<EOF
         
     | 
| 
       527 
595 
     | 
    
         
             
              if( argc != #{@procedure.parameters.length} )
         
     | 
| 
       528 
596 
     | 
    
         
             
                rb_raise(rb_eArgError, "wrong number of arguments for #{@procedure.name} (%d for #{@procedure.parameters.length})", argc);
         
     | 
| 
       529 
     | 
    
         
            -
              VALUE rb_ptr;
         
     | 
| 
       530 
597 
     | 
    
         
             
            EOF
         
     | 
| 
       531 
598 
     | 
    
         
             
                  end
         
     | 
| 
       532 
     | 
    
         
            -
             
     | 
| 
       533 
     | 
    
         
            -
             
     | 
| 
       534 
     | 
    
         
            -
             
     | 
| 
       535 
     | 
    
         
            -
                  @procedure.parameters.each { |param| 
         
     | 
| 
       536 
     | 
    
         
            -
                    param_copy = param.copy
         
     | 
| 
       537 
     | 
    
         
            -
                    param_copy.constant = nil
         
     | 
| 
       538 
     | 
    
         
            -
                    param_copy.direction = nil
         
     | 
| 
       539 
     | 
    
         
            -
                    param_copy.decl
         
     | 
| 
       540 
     | 
    
         
            -
                  }
         
     | 
| 
      
 599 
     | 
    
         
            +
                end
         
     | 
| 
      
 600 
     | 
    
         
            +
             
     | 
| 
      
 601 
     | 
    
         
            +
                def get_params_value(module_file, argv, rb_ptr)
         
     | 
| 
       541 
602 
     | 
    
         
             
                  @procedure.parameters.each_index do |i|
         
     | 
| 
       542 
603 
     | 
    
         
             
                    param = @procedure.parameters[i]
         
     | 
| 
       543 
604 
     | 
    
         
             
                    if not param.dimension then
         
     | 
| 
       544 
605 
     | 
    
         
             
                      case param.type
         
     | 
| 
       545 
     | 
    
         
            -
             
     | 
| 
       546 
     | 
    
         
            -
             
     | 
| 
       547 
     | 
    
         
            -
             
     | 
| 
       548 
     | 
    
         
            -
             
     | 
| 
       549 
     | 
    
         
            -
             
     | 
| 
      
 606 
     | 
    
         
            +
                      when Int 
         
     | 
| 
      
 607 
     | 
    
         
            +
                        (param === FuncCall::new("NUM2INT", argv[i])).pr if param.type.size == 4
         
     | 
| 
      
 608 
     | 
    
         
            +
                        (param === FuncCall::new("NUM2LONG", argv[i])).pr if param.type.size == 8
         
     | 
| 
      
 609 
     | 
    
         
            +
                      when Real
         
     | 
| 
      
 610 
     | 
    
         
            +
                        (param === FuncCall::new("NUM2DBL", argv[i])).pr
         
     | 
| 
       550 
611 
     | 
    
         
             
                      end
         
     | 
| 
       551 
612 
     | 
    
         
             
                    else
         
     | 
| 
       552 
     | 
    
         
            -
                      (rb_ptr === argv[i]). 
     | 
| 
       553 
     | 
    
         
            -
                      if @lang ==  
     | 
| 
      
 613 
     | 
    
         
            +
                      (rb_ptr === argv[i]).pr
         
     | 
| 
      
 614 
     | 
    
         
            +
                      if @lang == CUDA then
         
     | 
| 
       554 
615 
     | 
    
         
             
                        module_file.print <<EOF
         
     | 
| 
       555 
616 
     | 
    
         
             
              if ( IsNArray(rb_ptr) ) {
         
     | 
| 
       556 
617 
     | 
    
         
             
                struct NARRAY *n_ary;
         
     | 
| 
         @@ -558,16 +619,9 @@ EOF 
     | 
|
| 
       558 
619 
     | 
    
         
             
                Data_Get_Struct(rb_ptr, struct NARRAY, n_ary);
         
     | 
| 
       559 
620 
     | 
    
         
             
                array_size = n_ary->total * na_sizeof[n_ary->type];
         
     | 
| 
       560 
621 
     | 
    
         
             
                cudaMalloc( (void **) &#{param.name}, array_size);
         
     | 
| 
       561 
     | 
    
         
            -
            EOF
         
     | 
| 
       562 
     | 
    
         
            -
            #            if param.direction == :in then
         
     | 
| 
       563 
     | 
    
         
            -
                        module_file.print <<EOF
         
     | 
| 
       564 
622 
     | 
    
         
             
                cudaMemcpy(#{param.name}, (void *) n_ary->ptr, array_size, cudaMemcpyHostToDevice);
         
     | 
| 
       565 
     | 
    
         
            -
            EOF
         
     | 
| 
       566 
     | 
    
         
            -
            #            end
         
     | 
| 
       567 
     | 
    
         
            -
                        module_file.print <<EOF
         
     | 
| 
       568 
623 
     | 
    
         
             
              } else
         
     | 
| 
       569 
624 
     | 
    
         
             
                rb_raise(rb_eArgError, "wrong type of argument %d", #{i});
         
     | 
| 
       570 
     | 
    
         
            -
              
         
     | 
| 
       571 
625 
     | 
    
         
             
            EOF
         
     | 
| 
       572 
626 
     | 
    
         
             
                      else
         
     | 
| 
       573 
627 
     | 
    
         
             
                        module_file.print <<EOF
         
     | 
| 
         @@ -583,8 +637,26 @@ EOF 
     | 
|
| 
       583 
637 
     | 
    
         
             
                      end
         
     | 
| 
       584 
638 
     | 
    
         
             
                    end
         
     | 
| 
       585 
639 
     | 
    
         
             
                  end
         
     | 
| 
       586 
     | 
    
         
            -
             
     | 
| 
       587 
     | 
    
         
            -
             
     | 
| 
      
 640 
     | 
    
         
            +
                end
         
     | 
| 
      
 641 
     | 
    
         
            +
             
     | 
| 
      
 642 
     | 
    
         
            +
                def decl_module_params(module_file)
         
     | 
| 
      
 643 
     | 
    
         
            +
                  @procedure.parameters.each { |param| 
         
     | 
| 
      
 644 
     | 
    
         
            +
                    param_copy = param.copy
         
     | 
| 
      
 645 
     | 
    
         
            +
                    param_copy.constant = nil
         
     | 
| 
      
 646 
     | 
    
         
            +
                    param_copy.direction = nil
         
     | 
| 
      
 647 
     | 
    
         
            +
                    param_copy.decl
         
     | 
| 
      
 648 
     | 
    
         
            +
                  }
         
     | 
| 
      
 649 
     | 
    
         
            +
                  module_file.print "  #{@procedure.properties[:return].type.decl} ret;\n" if @procedure.properties[:return]
         
     | 
| 
      
 650 
     | 
    
         
            +
                  module_file.print "  VALUE stats = rb_hash_new();\n"
         
     | 
| 
      
 651 
     | 
    
         
            +
                  module_file.print "  struct timespec start, stop;\n"
         
     | 
| 
      
 652 
     | 
    
         
            +
                  module_file.print "  unsigned long long int duration;\n"
         
     | 
| 
      
 653 
     | 
    
         
            +
                end
         
     | 
| 
      
 654 
     | 
    
         
            +
             
     | 
| 
      
 655 
     | 
    
         
            +
                def get_cuda_launch_bounds(module_file)
         
     | 
| 
      
 656 
     | 
    
         
            +
                  module_file.print <<EOF
         
     | 
| 
      
 657 
     | 
    
         
            +
              VALUE rb_opts;
         
     | 
| 
      
 658 
     | 
    
         
            +
              size_t block_size[3] = {1,1,1};
         
     | 
| 
      
 659 
     | 
    
         
            +
              size_t block_number[3] = {1,1,1};
         
     | 
| 
       588 
660 
     | 
    
         
             
              if( argc == #{@procedure.parameters.length + 1} ) {
         
     | 
| 
       589 
661 
     | 
    
         
             
                rb_opts = argv[argc -1];
         
     | 
| 
       590 
662 
     | 
    
         
             
                if ( rb_opts != Qnil ) {
         
     | 
| 
         @@ -615,23 +687,20 @@ EOF 
     | 
|
| 
       615 
687 
     | 
    
         
             
                }
         
     | 
| 
       616 
688 
     | 
    
         
             
              }
         
     | 
| 
       617 
689 
     | 
    
         
             
            EOF
         
     | 
| 
       618 
     | 
    
         
            -
             
     | 
| 
       619 
     | 
    
         
            -
             
     | 
| 
       620 
     | 
    
         
            -
             
     | 
| 
       621 
     | 
    
         
            -
                   
     | 
| 
       622 
     | 
    
         
            -
                  module_file.print "  unsigned long long int duration;\n"
         
     | 
| 
       623 
     | 
    
         
            -
                  module_file.print "  clock_gettime(CLOCK_REALTIME, &start);\n"
         
     | 
| 
       624 
     | 
    
         
            -
                  if @lang == BOAST::CUDA then
         
     | 
| 
      
 690 
     | 
    
         
            +
                end
         
     | 
| 
      
 691 
     | 
    
         
            +
             
     | 
| 
      
 692 
     | 
    
         
            +
                def create_procedure_call(module_file)
         
     | 
| 
      
 693 
     | 
    
         
            +
                  if @lang == CUDA then
         
     | 
| 
       625 
694 
     | 
    
         
             
                    module_file.print "  duration = "
         
     | 
| 
       626 
695 
     | 
    
         
             
                  elsif @procedure.properties[:return] then
         
     | 
| 
       627 
696 
     | 
    
         
             
                    module_file.print "  ret = "
         
     | 
| 
       628 
697 
     | 
    
         
             
                  end
         
     | 
| 
       629 
698 
     | 
    
         
             
                  module_file.print "  #{@procedure.name}"
         
     | 
| 
       630 
     | 
    
         
            -
                  module_file.print "_" if @lang ==  
     | 
| 
       631 
     | 
    
         
            -
                  module_file.print "_wrapper" if @lang ==  
     | 
| 
      
 699 
     | 
    
         
            +
                  module_file.print "_" if @lang == FORTRAN
         
     | 
| 
      
 700 
     | 
    
         
            +
                  module_file.print "_wrapper" if @lang == CUDA
         
     | 
| 
       632 
701 
     | 
    
         
             
                  module_file.print "("
         
     | 
| 
       633 
702 
     | 
    
         
             
                  params = []
         
     | 
| 
       634 
     | 
    
         
            -
                  if(@lang ==  
     | 
| 
      
 703 
     | 
    
         
            +
                  if(@lang == FORTRAN) then
         
     | 
| 
       635 
704 
     | 
    
         
             
                    @procedure.parameters.each { |param|
         
     | 
| 
       636 
705 
     | 
    
         
             
                      if param.dimension then
         
     | 
| 
       637 
706 
     | 
    
         
             
                        params.push( param.name )
         
     | 
| 
         @@ -650,18 +719,19 @@ EOF 
     | 
|
| 
       650 
719 
     | 
    
         
             
                      end
         
     | 
| 
       651 
720 
     | 
    
         
             
                    }
         
     | 
| 
       652 
721 
     | 
    
         
             
                  end
         
     | 
| 
       653 
     | 
    
         
            -
                  if @lang ==  
     | 
| 
      
 722 
     | 
    
         
            +
                  if @lang == CUDA then
         
     | 
| 
       654 
723 
     | 
    
         
             
                    params.push( "block_number", "block_size" )
         
     | 
| 
       655 
724 
     | 
    
         
             
                  end
         
     | 
| 
       656 
725 
     | 
    
         
             
                  module_file.print params.join(", ")
         
     | 
| 
       657 
726 
     | 
    
         
             
                  module_file.print "  );\n"
         
     | 
| 
       658 
     | 
    
         
            -
             
     | 
| 
      
 727 
     | 
    
         
            +
                end
         
     | 
| 
       659 
728 
     | 
    
         | 
| 
       660 
     | 
    
         
            -
             
     | 
| 
      
 729 
     | 
    
         
            +
                def get_results(module_file, argv, rb_ptr)
         
     | 
| 
      
 730 
     | 
    
         
            +
                  if @lang == CUDA then
         
     | 
| 
       661 
731 
     | 
    
         
             
                    @procedure.parameters.each_index do |i|
         
     | 
| 
       662 
732 
     | 
    
         
             
                      param = @procedure.parameters[i]
         
     | 
| 
       663 
733 
     | 
    
         
             
                      if param.dimension then
         
     | 
| 
       664 
     | 
    
         
            -
                        (rb_ptr === argv[i]). 
     | 
| 
      
 734 
     | 
    
         
            +
                        (rb_ptr === argv[i]).pr
         
     | 
| 
       665 
735 
     | 
    
         
             
                        module_file.print <<EOF
         
     | 
| 
       666 
736 
     | 
    
         
             
              if ( IsNArray(rb_ptr) ) {
         
     | 
| 
       667 
737 
     | 
    
         
             
            EOF
         
     | 
| 
         @@ -683,7 +753,10 @@ EOF 
     | 
|
| 
       683 
753 
     | 
    
         
             
                      end
         
     | 
| 
       684 
754 
     | 
    
         
             
                    end
         
     | 
| 
       685 
755 
     | 
    
         
             
                  end
         
     | 
| 
       686 
     | 
    
         
            -
             
     | 
| 
      
 756 
     | 
    
         
            +
                end
         
     | 
| 
      
 757 
     | 
    
         
            +
             
     | 
| 
      
 758 
     | 
    
         
            +
                def store_result(module_file)
         
     | 
| 
      
 759 
     | 
    
         
            +
                  if @lang != CUDA then
         
     | 
| 
       687 
760 
     | 
    
         
             
                    module_file.print "  duration = (unsigned long long int)stop.tv_sec * (unsigned long long int)1000000000 + stop.tv_nsec;\n"
         
     | 
| 
       688 
761 
     | 
    
         
             
                    module_file.print "  duration -= (unsigned long long int)start.tv_sec * (unsigned long long int)1000000000 + start.tv_nsec;\n"
         
     | 
| 
       689 
762 
     | 
    
         
             
                  end
         
     | 
| 
         @@ -694,14 +767,49 @@ EOF 
     | 
|
| 
       694 
767 
     | 
    
         
             
                    module_file.print "  rb_hash_aset(stats,ID2SYM(rb_intern(\"return\")),rb_int_new((unsigned long long)ret));\n" if type_ret.kind_of?(Int) and not type_ret.signed
         
     | 
| 
       695 
768 
     | 
    
         
             
                    module_file.print "  rb_hash_aset(stats,ID2SYM(rb_intern(\"return\")),rb_float_new((double)ret));\n" if type_ret.kind_of?(Real)
         
     | 
| 
       696 
769 
     | 
    
         
             
                  end
         
     | 
| 
      
 770 
     | 
    
         
            +
                end
         
     | 
| 
      
 771 
     | 
    
         
            +
             
     | 
| 
      
 772 
     | 
    
         
            +
                def fill_module(module_file, module_name)
         
     | 
| 
      
 773 
     | 
    
         
            +
                  module_header(module_file)
         
     | 
| 
      
 774 
     | 
    
         
            +
                  @procedure.boast_header(@lang)
         
     | 
| 
      
 775 
     | 
    
         
            +
                  module_preamble(module_file, module_name)
         
     | 
| 
      
 776 
     | 
    
         
            +
             
     | 
| 
      
 777 
     | 
    
         
            +
                  module_file.puts "VALUE method_run(int argc, VALUE *argv, VALUE self) {"
         
     | 
| 
      
 778 
     | 
    
         
            +
             
     | 
| 
      
 779 
     | 
    
         
            +
                  check_args(module_file)
         
     | 
| 
      
 780 
     | 
    
         
            +
             
     | 
| 
      
 781 
     | 
    
         
            +
                  argc = @procedure.parameters.length
         
     | 
| 
      
 782 
     | 
    
         
            +
                  argv = Variable::new("argv", CustomType, :type_name => "VALUE", :dimension => [ Dimension::new(0,argc-1) ] )
         
     | 
| 
      
 783 
     | 
    
         
            +
                  rb_ptr = Variable::new("rb_ptr", CustomType, :type_name => "VALUE")
         
     | 
| 
      
 784 
     | 
    
         
            +
                  set_transition("VALUE", "VALUE", :default,  CustomType::new(:type_name => "VALUE"))
         
     | 
| 
      
 785 
     | 
    
         
            +
                  rb_ptr.decl
         
     | 
| 
      
 786 
     | 
    
         
            +
             
     | 
| 
      
 787 
     | 
    
         
            +
                  decl_module_params(module_file)
         
     | 
| 
      
 788 
     | 
    
         
            +
             
     | 
| 
      
 789 
     | 
    
         
            +
                  get_params_value(module_file, argv, rb_ptr)
         
     | 
| 
      
 790 
     | 
    
         
            +
             
     | 
| 
      
 791 
     | 
    
         
            +
                  if @lang == CUDA then
         
     | 
| 
      
 792 
     | 
    
         
            +
                    module_file.print get_cuda_launch_bounds(module_file)
         
     | 
| 
      
 793 
     | 
    
         
            +
                  end
         
     | 
| 
      
 794 
     | 
    
         
            +
             
     | 
| 
      
 795 
     | 
    
         
            +
                  module_file.print "  clock_gettime(CLOCK_REALTIME, &start);\n"
         
     | 
| 
      
 796 
     | 
    
         
            +
             
     | 
| 
      
 797 
     | 
    
         
            +
                  create_procedure_call(module_file)
         
     | 
| 
      
 798 
     | 
    
         
            +
             
     | 
| 
      
 799 
     | 
    
         
            +
                  module_file.print "  clock_gettime(CLOCK_REALTIME, &stop);\n"
         
     | 
| 
      
 800 
     | 
    
         
            +
             
     | 
| 
      
 801 
     | 
    
         
            +
                  get_results(module_file, argv, rb_ptr)
         
     | 
| 
      
 802 
     | 
    
         
            +
             
     | 
| 
      
 803 
     | 
    
         
            +
                  store_result(module_file)
         
     | 
| 
      
 804 
     | 
    
         
            +
             
     | 
| 
       697 
805 
     | 
    
         
             
                  module_file.print "  return stats;\n"
         
     | 
| 
       698 
     | 
    
         
            -
                  module_file.print 
     | 
| 
      
 806 
     | 
    
         
            +
                  module_file.print "}"
         
     | 
| 
       699 
807 
     | 
    
         
             
                end
         
     | 
| 
       700 
808 
     | 
    
         | 
| 
       701 
809 
     | 
    
         
             
                def method_missing(meth, *args, &block)
         
     | 
| 
       702 
810 
     | 
    
         
             
                 if meth.to_s == "run" then
         
     | 
| 
       703 
     | 
    
         
            -
                    
     | 
| 
       704 
     | 
    
         
            -
                    
     | 
| 
      
 811 
     | 
    
         
            +
                   build
         
     | 
| 
      
 812 
     | 
    
         
            +
                   run(*args,&block)
         
     | 
| 
       705 
813 
     | 
    
         
             
                 else
         
     | 
| 
       706 
814 
     | 
    
         
             
                   super
         
     | 
| 
       707 
815 
     | 
    
         
             
                 end
         
     | 
| 
         @@ -737,6 +845,104 @@ EOF 
     | 
|
| 
       737 
845 
     | 
    
         
             
                  return res
         
     | 
| 
       738 
846 
     | 
    
         
             
                end
         
     | 
| 
       739 
847 
     | 
    
         | 
| 
      
 848 
     | 
    
         
            +
                def get_array_type(param)
         
     | 
| 
      
 849 
     | 
    
         
            +
                  if param.type.class == Real then
         
     | 
| 
      
 850 
     | 
    
         
            +
                    case param.type.size
         
     | 
| 
      
 851 
     | 
    
         
            +
                    when 4
         
     | 
| 
      
 852 
     | 
    
         
            +
                      type = NArray::SFLOAT
         
     | 
| 
      
 853 
     | 
    
         
            +
                    when 8
         
     | 
| 
      
 854 
     | 
    
         
            +
                      type = NArray::FLOAT
         
     | 
| 
      
 855 
     | 
    
         
            +
                    else
         
     | 
| 
      
 856 
     | 
    
         
            +
                      STDERR::puts "Unsupported Float size for NArray: #{param.type.size}, defaulting to byte" if debug?
         
     | 
| 
      
 857 
     | 
    
         
            +
                      type = NArray::BYTE
         
     | 
| 
      
 858 
     | 
    
         
            +
                    end
         
     | 
| 
      
 859 
     | 
    
         
            +
                  elsif param.type.class == Int then
         
     | 
| 
      
 860 
     | 
    
         
            +
                    case param.type.size
         
     | 
| 
      
 861 
     | 
    
         
            +
                    when 1
         
     | 
| 
      
 862 
     | 
    
         
            +
                      type = NArray::BYTE
         
     | 
| 
      
 863 
     | 
    
         
            +
                    when 2
         
     | 
| 
      
 864 
     | 
    
         
            +
                      type = NArray::SINT
         
     | 
| 
      
 865 
     | 
    
         
            +
                    when 4
         
     | 
| 
      
 866 
     | 
    
         
            +
                      type = NArray::SINT
         
     | 
| 
      
 867 
     | 
    
         
            +
                    else
         
     | 
| 
      
 868 
     | 
    
         
            +
                      STDERR::puts "Unsupported Int size for NArray: #{param.type.size}, defaulting to byte" if debug?
         
     | 
| 
      
 869 
     | 
    
         
            +
                      type = NArray::BYTE
         
     | 
| 
      
 870 
     | 
    
         
            +
                    end
         
     | 
| 
      
 871 
     | 
    
         
            +
                  else
         
     | 
| 
      
 872 
     | 
    
         
            +
                    STDERR::puts "Unkown array type for NArray: #{param.type}, defaulting to byte" if debug?
         
     | 
| 
      
 873 
     | 
    
         
            +
                    type = NArray::BYTE
         
     | 
| 
      
 874 
     | 
    
         
            +
                  end
         
     | 
| 
      
 875 
     | 
    
         
            +
                  return type
         
     | 
| 
      
 876 
     | 
    
         
            +
                end
         
     | 
| 
      
 877 
     | 
    
         
            +
             
     | 
| 
      
 878 
     | 
    
         
            +
                def get_scalar_type(param)
         
     | 
| 
      
 879 
     | 
    
         
            +
                  if param.type.class == Real then
         
     | 
| 
      
 880 
     | 
    
         
            +
                    case param.type.size
         
     | 
| 
      
 881 
     | 
    
         
            +
                    when 4
         
     | 
| 
      
 882 
     | 
    
         
            +
                      type = "f"
         
     | 
| 
      
 883 
     | 
    
         
            +
                    when 8
         
     | 
| 
      
 884 
     | 
    
         
            +
                      type = "d"
         
     | 
| 
      
 885 
     | 
    
         
            +
                    else
         
     | 
| 
      
 886 
     | 
    
         
            +
                      raise "Unsupported Real scalar size: #{param.type.size}!"
         
     | 
| 
      
 887 
     | 
    
         
            +
                    end
         
     | 
| 
      
 888 
     | 
    
         
            +
                  elsif param.type.class == Int then
         
     | 
| 
      
 889 
     | 
    
         
            +
                    case param.type.size
         
     | 
| 
      
 890 
     | 
    
         
            +
                    when 1
         
     | 
| 
      
 891 
     | 
    
         
            +
                      type = "C"
         
     | 
| 
      
 892 
     | 
    
         
            +
                    when 2
         
     | 
| 
      
 893 
     | 
    
         
            +
                      type = "S"
         
     | 
| 
      
 894 
     | 
    
         
            +
                    when 4
         
     | 
| 
      
 895 
     | 
    
         
            +
                      type = "L"
         
     | 
| 
      
 896 
     | 
    
         
            +
                    when 8
         
     | 
| 
      
 897 
     | 
    
         
            +
                      type = "Q"
         
     | 
| 
      
 898 
     | 
    
         
            +
                    else
         
     | 
| 
      
 899 
     | 
    
         
            +
                      raise "Unsupported Int scalar size: #{param.type.size}!"
         
     | 
| 
      
 900 
     | 
    
         
            +
                    end
         
     | 
| 
      
 901 
     | 
    
         
            +
                    if param.type.signed? then
         
     | 
| 
      
 902 
     | 
    
         
            +
                      type.downcase!
         
     | 
| 
      
 903 
     | 
    
         
            +
                    end
         
     | 
| 
      
 904 
     | 
    
         
            +
                  end
         
     | 
| 
      
 905 
     | 
    
         
            +
                  return type
         
     | 
| 
      
 906 
     | 
    
         
            +
                end
         
     | 
| 
      
 907 
     | 
    
         
            +
             
     | 
| 
      
 908 
     | 
    
         
            +
                def read_param(param, directory, suffix, intent)
         
     | 
| 
      
 909 
     | 
    
         
            +
                  if intent == :out and ( param.direction == :in or param.constant ) then
         
     | 
| 
      
 910 
     | 
    
         
            +
                    return nil
         
     | 
| 
      
 911 
     | 
    
         
            +
                  end
         
     | 
| 
      
 912 
     | 
    
         
            +
                  f = File::new( directory + "/" + param.name+suffix, "rb" )
         
     | 
| 
      
 913 
     | 
    
         
            +
                  if param.dimension then
         
     | 
| 
      
 914 
     | 
    
         
            +
                    type = get_array_type(param)
         
     | 
| 
      
 915 
     | 
    
         
            +
                    if f.size == 0 then
         
     | 
| 
      
 916 
     | 
    
         
            +
                      res = NArray::new(type, 1)
         
     | 
| 
      
 917 
     | 
    
         
            +
                    else
         
     | 
| 
      
 918 
     | 
    
         
            +
                      res = NArray.to_na(f.read, type)
         
     | 
| 
      
 919 
     | 
    
         
            +
                    end
         
     | 
| 
      
 920 
     | 
    
         
            +
                  else
         
     | 
| 
      
 921 
     | 
    
         
            +
                    type = get_scalar_type(param)
         
     | 
| 
      
 922 
     | 
    
         
            +
                    res = f.read.unpack(type).first
         
     | 
| 
      
 923 
     | 
    
         
            +
                  end
         
     | 
| 
      
 924 
     | 
    
         
            +
                  f.close
         
     | 
| 
      
 925 
     | 
    
         
            +
                  return res
         
     | 
| 
      
 926 
     | 
    
         
            +
                end
         
     | 
| 
      
 927 
     | 
    
         
            +
             
     | 
| 
      
 928 
     | 
    
         
            +
                def get_gpu_dim(directory)
         
     | 
| 
      
 929 
     | 
    
         
            +
                  f = File::new( directory + "/problem_size", "r")
         
     | 
| 
      
 930 
     | 
    
         
            +
                  s = f.read
         
     | 
| 
      
 931 
     | 
    
         
            +
                  local_dim, global_dim = s.scan(/<(.*?)>/)
         
     | 
| 
      
 932 
     | 
    
         
            +
                  local_dim  = local_dim.pop.split(",").collect!{ |e| e.to_i }
         
     | 
| 
      
 933 
     | 
    
         
            +
                  global_dim = global_dim.pop.split(",").collect!{ |e| e.to_i }
         
     | 
| 
      
 934 
     | 
    
         
            +
                  (local_dim.length..2).each{ |i| local_dim[i] = 1 }
         
     | 
| 
      
 935 
     | 
    
         
            +
                  (global_dim.length..2).each{ |i| global_dim[i] = 1 }
         
     | 
| 
      
 936 
     | 
    
         
            +
                  if @lang == CL then
         
     | 
| 
      
 937 
     | 
    
         
            +
                    local_dim.each_index { |indx| global_dim[indx] *= local_dim[indx] }
         
     | 
| 
      
 938 
     | 
    
         
            +
                    res = { :global_work_size => global_dim, :local_work_size => local_dim }
         
     | 
| 
      
 939 
     | 
    
         
            +
                  else
         
     | 
| 
      
 940 
     | 
    
         
            +
                    res = { :block_number => global_dim, :block_size => local_dim }
         
     | 
| 
      
 941 
     | 
    
         
            +
                  end
         
     | 
| 
      
 942 
     | 
    
         
            +
                  f.close
         
     | 
| 
      
 943 
     | 
    
         
            +
                  return res
         
     | 
| 
      
 944 
     | 
    
         
            +
                end
         
     | 
| 
      
 945 
     | 
    
         
            +
             
     | 
| 
       740 
946 
     | 
    
         
             
                def load_ref_files(  path = "", suffix = "", intent )
         
     | 
| 
       741 
947 
     | 
    
         
             
                  proc_path = path + "/#{@procedure.name}/"
         
     | 
| 
       742 
948 
     | 
    
         
             
                  res_h = {}
         
     | 
| 
         @@ -749,89 +955,10 @@ EOF 
     | 
|
| 
       749 
955 
     | 
    
         
             
                  dirs.each { |d|
         
     | 
| 
       750 
956 
     | 
    
         
             
                    res = [] 
         
     | 
| 
       751 
957 
     | 
    
         
             
                    @procedure.parameters.collect { |param|
         
     | 
| 
       752 
     | 
    
         
            -
                       
     | 
| 
       753 
     | 
    
         
            -
                        res.push nil
         
     | 
| 
       754 
     | 
    
         
            -
                        next
         
     | 
| 
       755 
     | 
    
         
            -
                      end
         
     | 
| 
       756 
     | 
    
         
            -
                      f = File::new( d+"/"+param.name+suffix, "rb" )
         
     | 
| 
       757 
     | 
    
         
            -
                      if param.dimension then
         
     | 
| 
       758 
     | 
    
         
            -
                        if param.type.class == BOAST::Real then
         
     | 
| 
       759 
     | 
    
         
            -
                          case param.type.size
         
     | 
| 
       760 
     | 
    
         
            -
                          when 4
         
     | 
| 
       761 
     | 
    
         
            -
                            type = NArray::SFLOAT
         
     | 
| 
       762 
     | 
    
         
            -
                          when 8
         
     | 
| 
       763 
     | 
    
         
            -
                            type = NArray::FLOAT
         
     | 
| 
       764 
     | 
    
         
            -
                          else
         
     | 
| 
       765 
     | 
    
         
            -
                            STDERR::puts "Unsupported Float size for NArray: #{param.type.size}, defaulting to byte" if BOAST::debug
         
     | 
| 
       766 
     | 
    
         
            -
                            type = NArray::BYTE
         
     | 
| 
       767 
     | 
    
         
            -
                          end
         
     | 
| 
       768 
     | 
    
         
            -
                        elsif param.type.class == BOAST::Int then
         
     | 
| 
       769 
     | 
    
         
            -
                          case param.type.size
         
     | 
| 
       770 
     | 
    
         
            -
                          when 1
         
     | 
| 
       771 
     | 
    
         
            -
                            type = NArray::BYTE
         
     | 
| 
       772 
     | 
    
         
            -
                          when 2
         
     | 
| 
       773 
     | 
    
         
            -
                            type = NArray::SINT
         
     | 
| 
       774 
     | 
    
         
            -
                          when 4
         
     | 
| 
       775 
     | 
    
         
            -
                            type = NArray::SINT
         
     | 
| 
       776 
     | 
    
         
            -
                          else
         
     | 
| 
       777 
     | 
    
         
            -
                            STDERR::puts "Unsupported Int size for NArray: #{param.type.size}, defaulting to byte" if BOAST::debug
         
     | 
| 
       778 
     | 
    
         
            -
                            type = NArray::BYTE
         
     | 
| 
       779 
     | 
    
         
            -
                          end
         
     | 
| 
       780 
     | 
    
         
            -
                        else
         
     | 
| 
       781 
     | 
    
         
            -
                          STDERR::puts "Unkown array type for NArray: #{param.type}, defaulting to byte" if BOAST::debug
         
     | 
| 
       782 
     | 
    
         
            -
                          type = NArray::BYTE
         
     | 
| 
       783 
     | 
    
         
            -
                        end
         
     | 
| 
       784 
     | 
    
         
            -
                        if f.size == 0 then
         
     | 
| 
       785 
     | 
    
         
            -
                          res.push NArray::new(type, 1)
         
     | 
| 
       786 
     | 
    
         
            -
                        else
         
     | 
| 
       787 
     | 
    
         
            -
                          res.push NArray.to_na(f.read, type)
         
     | 
| 
       788 
     | 
    
         
            -
                        end
         
     | 
| 
       789 
     | 
    
         
            -
                      else
         
     | 
| 
       790 
     | 
    
         
            -
                        if param.type.class == BOAST::Real then
         
     | 
| 
       791 
     | 
    
         
            -
                          case param.type.size
         
     | 
| 
       792 
     | 
    
         
            -
                          when 4
         
     | 
| 
       793 
     | 
    
         
            -
                            type = "f"
         
     | 
| 
       794 
     | 
    
         
            -
                          when 8
         
     | 
| 
       795 
     | 
    
         
            -
                            type = "d"
         
     | 
| 
       796 
     | 
    
         
            -
                          else
         
     | 
| 
       797 
     | 
    
         
            -
                            raise "Unsupported Real scalar size: #{param.type.size}!"
         
     | 
| 
       798 
     | 
    
         
            -
                          end
         
     | 
| 
       799 
     | 
    
         
            -
                        elsif param.type.class == BOAST::Int then
         
     | 
| 
       800 
     | 
    
         
            -
                          case param.type.size
         
     | 
| 
       801 
     | 
    
         
            -
                          when 1
         
     | 
| 
       802 
     | 
    
         
            -
                            type = "C"
         
     | 
| 
       803 
     | 
    
         
            -
                          when 2
         
     | 
| 
       804 
     | 
    
         
            -
                            type = "S"
         
     | 
| 
       805 
     | 
    
         
            -
                          when 4
         
     | 
| 
       806 
     | 
    
         
            -
                            type = "L"
         
     | 
| 
       807 
     | 
    
         
            -
                          when 8
         
     | 
| 
       808 
     | 
    
         
            -
                            type = "Q"
         
     | 
| 
       809 
     | 
    
         
            -
                          else
         
     | 
| 
       810 
     | 
    
         
            -
                            raise "Unsupported Int scalar size: #{param.type.size}!"
         
     | 
| 
       811 
     | 
    
         
            -
                          end
         
     | 
| 
       812 
     | 
    
         
            -
                          if param.type.signed? then
         
     | 
| 
       813 
     | 
    
         
            -
                            type.downcase!
         
     | 
| 
       814 
     | 
    
         
            -
                          end
         
     | 
| 
       815 
     | 
    
         
            -
                        end
         
     | 
| 
       816 
     | 
    
         
            -
                        res.push f.read.unpack(type).first
         
     | 
| 
       817 
     | 
    
         
            -
                      end
         
     | 
| 
       818 
     | 
    
         
            -
                      f.close
         
     | 
| 
      
 958 
     | 
    
         
            +
                      res.push read_param(param, d, suffix, intent)
         
     | 
| 
       819 
959 
     | 
    
         
             
                    }
         
     | 
| 
       820 
     | 
    
         
            -
                    if @lang ==  
     | 
| 
       821 
     | 
    
         
            -
                       
     | 
| 
       822 
     | 
    
         
            -
                      s = f.read
         
     | 
| 
       823 
     | 
    
         
            -
                      local_dim, global_dim = s.scan(/<(.*?)>/)
         
     | 
| 
       824 
     | 
    
         
            -
                      local_dim  = local_dim.pop.split(",").collect!{ |e| e.to_i }
         
     | 
| 
       825 
     | 
    
         
            -
                      global_dim = global_dim.pop.split(",").collect!{ |e| e.to_i }
         
     | 
| 
       826 
     | 
    
         
            -
                      (local_dim.length..2).each{ |i| local_dim[i] = 1 }
         
     | 
| 
       827 
     | 
    
         
            -
                      (global_dim.length..2).each{ |i| global_dim[i] = 1 }
         
     | 
| 
       828 
     | 
    
         
            -
                      if @lang == BOAST::CL then
         
     | 
| 
       829 
     | 
    
         
            -
                        local_dim.each_index { |indx| global_dim[indx] *= local_dim[indx] }
         
     | 
| 
       830 
     | 
    
         
            -
                        res.push( { :global_work_size => global_dim, :local_work_size => local_dim } )
         
     | 
| 
       831 
     | 
    
         
            -
                      else
         
     | 
| 
       832 
     | 
    
         
            -
                        res.push( { :block_number => global_dim, :block_size => local_dim } )
         
     | 
| 
       833 
     | 
    
         
            -
                      end
         
     | 
| 
       834 
     | 
    
         
            -
                      f.close
         
     | 
| 
      
 960 
     | 
    
         
            +
                    if @lang == CUDA or @lang == CL then
         
     | 
| 
      
 961 
     | 
    
         
            +
                      res.push get_gpu_dim(d)
         
     | 
| 
       835 
962 
     | 
    
         
             
                    end
         
     | 
| 
       836 
963 
     | 
    
         
             
                    res_h[d] =  res
         
     | 
| 
       837 
964 
     | 
    
         
             
                  }
         
     |