ikra 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
 - data/lib/ast/builder.rb +225 -77
 - data/lib/ast/host_section_builder.rb +38 -0
 - data/lib/ast/interpreter.rb +67 -0
 - data/lib/ast/lexical_variables_enumerator.rb +3 -2
 - data/lib/ast/nodes.rb +521 -31
 - data/lib/ast/printer.rb +116 -18
 - data/lib/ast/ssa_generator.rb +192 -0
 - data/lib/ast/visitor.rb +235 -21
 - data/lib/config/configuration.rb +28 -3
 - data/lib/config/os_configuration.rb +62 -9
 - data/lib/cpu/cpu_implementation.rb +39 -0
 - data/lib/ikra.rb +13 -3
 - data/lib/resources/cuda/allocate_device_memory.cpp +5 -0
 - data/lib/resources/cuda/allocate_host_memory.cpp +1 -0
 - data/lib/resources/cuda/allocate_memcpy_environment_to_device.cpp +11 -0
 - data/lib/resources/cuda/ast/assignment.cpp +1 -0
 - data/lib/resources/cuda/block_function_head.cpp +7 -1
 - data/lib/resources/cuda/entry_point.cpp +47 -0
 - data/lib/resources/cuda/env_builder_copy_array.cpp +8 -2
 - data/lib/resources/cuda/free_device_memory.cpp +3 -0
 - data/lib/resources/cuda/free_memory_for_command.cpp +24 -0
 - data/lib/resources/cuda/header.cpp +23 -9
 - data/lib/resources/cuda/header_structs.cpp +92 -0
 - data/lib/resources/cuda/host_section_block_function_head.cpp +12 -0
 - data/lib/resources/cuda/host_section_entry_point.cpp +55 -0
 - data/lib/resources/cuda/host_section_free_device_memory.cpp +18 -0
 - data/lib/resources/cuda/host_section_launch_parallel_section.cpp +14 -0
 - data/lib/resources/cuda/host_section_malloc_memcpy_device_to_host.cpp +10 -0
 - data/lib/resources/cuda/kernel.cpp +9 -2
 - data/lib/resources/cuda/launch_kernel.cpp +5 -0
 - data/lib/resources/cuda/memcpy_device_to_host.cpp +3 -0
 - data/lib/resources/cuda/memcpy_device_to_host_expr.cpp +10 -0
 - data/lib/resources/cuda/reduce_body.cpp +88 -0
 - data/lib/resources/cuda/stencil_array_reconstruction.cpp +2 -0
 - data/lib/resources/cuda/stencil_body.cpp +16 -0
 - data/lib/resources/cuda/struct_definition.cpp +4 -0
 - data/lib/ruby_core/array.rb +34 -0
 - data/lib/ruby_core/array_command.rb +313 -0
 - data/lib/ruby_core/core.rb +103 -0
 - data/lib/ruby_core/interpreter.rb +16 -0
 - data/lib/ruby_core/math.rb +32 -0
 - data/lib/ruby_core/ruby_integration.rb +256 -0
 - data/lib/symbolic/host_section.rb +115 -0
 - data/lib/symbolic/input.rb +87 -0
 - data/lib/symbolic/input_visitor.rb +68 -0
 - data/lib/symbolic/symbolic.rb +793 -117
 - data/lib/symbolic/visitor.rb +70 -8
 - data/lib/translator/array_command_struct_builder.rb +163 -0
 - data/lib/translator/ast_translator.rb +572 -0
 - data/lib/translator/block_translator.rb +104 -48
 - data/lib/translator/commands/array_combine_command.rb +41 -0
 - data/lib/translator/commands/array_identity_command.rb +28 -0
 - data/lib/translator/commands/array_index_command.rb +52 -0
 - data/lib/translator/commands/array_reduce_command.rb +135 -0
 - data/lib/translator/commands/array_stencil_command.rb +129 -0
 - data/lib/translator/commands/array_zip_command.rb +30 -0
 - data/lib/translator/commands/command_translator.rb +264 -0
 - data/lib/translator/cuda_errors.rb +32 -0
 - data/lib/translator/environment_builder.rb +263 -0
 - data/lib/translator/host_section/array_host_section_command.rb +150 -0
 - data/lib/translator/host_section/array_in_host_section_command.rb +41 -0
 - data/lib/translator/host_section/ast_translator.rb +14 -0
 - data/lib/translator/host_section/parallel_section_invocation_visitor.rb +20 -0
 - data/lib/translator/host_section/program_builder.rb +89 -0
 - data/lib/translator/input_translator.rb +226 -0
 - data/lib/translator/kernel_builder.rb +137 -0
 - data/lib/translator/kernel_launcher/for_loop_kernel_launcher.rb +40 -0
 - data/lib/translator/kernel_launcher/kernel_launcher.rb +259 -0
 - data/lib/translator/kernel_launcher/while_loop_kernel_launcher.rb +38 -0
 - data/lib/translator/last_returns_visitor.rb +19 -10
 - data/lib/translator/program_builder.rb +197 -0
 - data/lib/translator/program_launcher.rb +273 -0
 - data/lib/translator/struct_type.rb +55 -0
 - data/lib/translator/translator.rb +34 -11
 - data/lib/translator/variable_classifier_visitor.rb +56 -0
 - data/lib/types/inference/ast_inference.rb +586 -0
 - data/lib/types/inference/clear_types_visitor.rb +11 -0
 - data/lib/types/inference/command_inference.rb +101 -0
 - data/lib/types/inference/input_inference.rb +62 -0
 - data/lib/types/{object_tracer.rb → inference/object_tracer.rb} +5 -6
 - data/lib/types/inference/ruby_extension.rb +35 -0
 - data/lib/types/inference/symbol_table.rb +131 -0
 - data/lib/types/types.rb +14 -0
 - data/lib/types/types/array_command_type.rb +123 -0
 - data/lib/types/types/array_type.rb +137 -0
 - data/lib/types/{class_type.rb → types/class_type.rb} +42 -18
 - data/lib/types/{primitive_type.rb → types/primitive_type.rb} +20 -7
 - data/lib/types/types/ruby_type.rb +88 -0
 - data/lib/types/types/struct_type.rb +179 -0
 - data/lib/types/types/union_type.rb +239 -0
 - metadata +160 -18
 - data/lib/ast/method_definition.rb +0 -37
 - data/lib/ast/translator.rb +0 -264
 - data/lib/resources/cuda/kernel_launcher.cpp +0 -28
 - data/lib/scope.rb +0 -166
 - data/lib/translator/command_translator.rb +0 -421
 - data/lib/translator/local_variables_enumerator.rb +0 -35
 - data/lib/translator/method_translator.rb +0 -24
 - data/lib/types/array_type.rb +0 -51
 - data/lib/types/ruby_extension.rb +0 -67
 - data/lib/types/ruby_type.rb +0 -45
 - data/lib/types/type_inference.rb +0 -382
 - data/lib/types/union_type.rb +0 -155
 
| 
         @@ -0,0 +1,12 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            #undef checkErrorReturn
         
     | 
| 
      
 2 
     | 
    
         
            +
            #define checkErrorReturn(result_var, expr) \
         
     | 
| 
      
 3 
     | 
    
         
            +
            if (result_var->last_error = expr) \
         
     | 
| 
      
 4 
     | 
    
         
            +
            {\
         
     | 
| 
      
 5 
     | 
    
         
            +
                cudaError_t error = cudaGetLastError();\
         
     | 
| 
      
 6 
     | 
    
         
            +
                printf("!!! Cuda Failure %s:%d (%i): '%s'\n", __FILE__, __LINE__, expr, cudaGetErrorString(error));\
         
     | 
| 
      
 7 
     | 
    
         
            +
                cudaDeviceReset();\
         
     | 
| 
      
 8 
     | 
    
         
            +
                return /*{result_type}*/::error_return_value;\
         
     | 
| 
      
 9 
     | 
    
         
            +
            }
         
     | 
| 
      
 10 
     | 
    
         
            +
             
     | 
| 
      
 11 
     | 
    
         
            +
            /*{result_type}*/ /*{name}*/(/*{parameters}*/)
         
     | 
| 
      
 12 
     | 
    
         
            +
            /*{body}*/
         
     | 
| 
         @@ -0,0 +1,55 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            #undef checkErrorReturn
         
     | 
| 
      
 2 
     | 
    
         
            +
            #define checkErrorReturn(result_var, expr) \
         
     | 
| 
      
 3 
     | 
    
         
            +
            if (result_var->last_error = expr) \
         
     | 
| 
      
 4 
     | 
    
         
            +
            {\
         
     | 
| 
      
 5 
     | 
    
         
            +
                cudaError_t error = cudaGetLastError();\
         
     | 
| 
      
 6 
     | 
    
         
            +
                printf("!!! Cuda Failure %s:%d (%i): '%s'\n", __FILE__, __LINE__, expr, cudaGetErrorString(error));\
         
     | 
| 
      
 7 
     | 
    
         
            +
                cudaDeviceReset();\
         
     | 
| 
      
 8 
     | 
    
         
            +
                return result_var;\
         
     | 
| 
      
 9 
     | 
    
         
            +
            }
         
     | 
| 
      
 10 
     | 
    
         
            +
             
     | 
| 
      
 11 
     | 
    
         
            +
            extern "C" EXPORT result_t *launch_kernel(environment_t */*{host_env_var_name}*/)
         
     | 
| 
      
 12 
     | 
    
         
            +
            {
         
     | 
| 
      
 13 
     | 
    
         
            +
                // CUDA Initialization
         
     | 
| 
      
 14 
     | 
    
         
            +
                program_result = new result_t();
         
     | 
| 
      
 15 
     | 
    
         
            +
                program_result->device_allocations = new vector<void*>();
         
     | 
| 
      
 16 
     | 
    
         
            +
             
     | 
| 
      
 17 
     | 
    
         
            +
                timeStartMeasure();
         
     | 
| 
      
 18 
     | 
    
         
            +
             
     | 
| 
      
 19 
     | 
    
         
            +
                cudaError_t cudaStatus = cudaSetDevice(0);
         
     | 
| 
      
 20 
     | 
    
         
            +
             
     | 
| 
      
 21 
     | 
    
         
            +
                if (cudaStatus != cudaSuccess) {
         
     | 
| 
      
 22 
     | 
    
         
            +
                    fprintf(stderr, "cudaSetDevice failed! Do you have a CUDA-capable GPU installed?\n");
         
     | 
| 
      
 23 
     | 
    
         
            +
                    program_result->last_error = -1;
         
     | 
| 
      
 24 
     | 
    
         
            +
                    return program_result;
         
     | 
| 
      
 25 
     | 
    
         
            +
                }
         
     | 
| 
      
 26 
     | 
    
         
            +
             
     | 
| 
      
 27 
     | 
    
         
            +
                checkErrorReturn(program_result, cudaFree(0));
         
     | 
| 
      
 28 
     | 
    
         
            +
             
     | 
| 
      
 29 
     | 
    
         
            +
                timeReportMeasure(program_result, setup_cuda);
         
     | 
| 
      
 30 
     | 
    
         
            +
             
     | 
| 
      
 31 
     | 
    
         
            +
             
     | 
| 
      
 32 
     | 
    
         
            +
                /* Prepare environment */
         
     | 
| 
      
 33 
     | 
    
         
            +
                /*{prepare_environment}*/
         
     | 
| 
      
 34 
     | 
    
         
            +
             
     | 
| 
      
 35 
     | 
    
         
            +
             
     | 
| 
      
 36 
     | 
    
         
            +
                /* Copy back memory and set pointer of result */
         
     | 
| 
      
 37 
     | 
    
         
            +
                program_result->result = /*{host_result_array}*/;
         
     | 
| 
      
 38 
     | 
    
         
            +
             
     | 
| 
      
 39 
     | 
    
         
            +
                /* Free device memory */
         
     | 
| 
      
 40 
     | 
    
         
            +
                timeStartMeasure();
         
     | 
| 
      
 41 
     | 
    
         
            +
             
     | 
| 
      
 42 
     | 
    
         
            +
                for (
         
     | 
| 
      
 43 
     | 
    
         
            +
                    auto device_ptr = program_result->device_allocations->begin(); 
         
     | 
| 
      
 44 
     | 
    
         
            +
                    device_ptr < program_result->device_allocations->end(); 
         
     | 
| 
      
 45 
     | 
    
         
            +
                    device_ptr++)
         
     | 
| 
      
 46 
     | 
    
         
            +
                {
         
     | 
| 
      
 47 
     | 
    
         
            +
                    checkErrorReturn(program_result, cudaFree(*device_ptr));
         
     | 
| 
      
 48 
     | 
    
         
            +
                }
         
     | 
| 
      
 49 
     | 
    
         
            +
             
     | 
| 
      
 50 
     | 
    
         
            +
                delete program_result->device_allocations;
         
     | 
| 
      
 51 
     | 
    
         
            +
             
     | 
| 
      
 52 
     | 
    
         
            +
                timeReportMeasure(program_result, free_memory);
         
     | 
| 
      
 53 
     | 
    
         
            +
             
     | 
| 
      
 54 
     | 
    
         
            +
                return program_result;
         
     | 
| 
      
 55 
     | 
    
         
            +
            }
         
     | 
| 
         @@ -0,0 +1,18 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
                timeStartMeasure();
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
                if (/*{name}*/ != cmd->result) {
         
     | 
| 
      
 4 
     | 
    
         
            +
                    // Don't free memory if it is the result. There is already a similar check in
         
     | 
| 
      
 5 
     | 
    
         
            +
                    // program_builder (free all except for last). However, this check is not sufficient in
         
     | 
| 
      
 6 
     | 
    
         
            +
                    // case the same array is reused!
         
     | 
| 
      
 7 
     | 
    
         
            +
             
     | 
| 
      
 8 
     | 
    
         
            +
                    checkErrorReturn(program_result, cudaFree(/*{name}*/));
         
     | 
| 
      
 9 
     | 
    
         
            +
                    // Remove from list of allocations
         
     | 
| 
      
 10 
     | 
    
         
            +
                    program_result->device_allocations->erase(
         
     | 
| 
      
 11 
     | 
    
         
            +
                        std::remove(
         
     | 
| 
      
 12 
     | 
    
         
            +
                            program_result->device_allocations->begin(),
         
     | 
| 
      
 13 
     | 
    
         
            +
                            program_result->device_allocations->end(),
         
     | 
| 
      
 14 
     | 
    
         
            +
                            /*{name}*/),
         
     | 
| 
      
 15 
     | 
    
         
            +
                        program_result->device_allocations->end());
         
     | 
| 
      
 16 
     | 
    
         
            +
                }
         
     | 
| 
      
 17 
     | 
    
         
            +
             
     | 
| 
      
 18 
     | 
    
         
            +
                timeReportMeasure(program_result, free_memory);
         
     | 
| 
         @@ -0,0 +1,14 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            ({
         
     | 
| 
      
 2 
     | 
    
         
            +
                // /*{debug_information}*/
         
     | 
| 
      
 3 
     | 
    
         
            +
             
     | 
| 
      
 4 
     | 
    
         
            +
                /*{array_command_type}*/ cmd = /*{array_command}*/;
         
     | 
| 
      
 5 
     | 
    
         
            +
             
     | 
| 
      
 6 
     | 
    
         
            +
                if (cmd->result == 0) {
         
     | 
| 
      
 7 
     | 
    
         
            +
                    /*{kernel_invocation}*/
         
     | 
| 
      
 8 
     | 
    
         
            +
                    cmd->result = /*{kernel_result}*/;
         
     | 
| 
      
 9 
     | 
    
         
            +
             
     | 
| 
      
 10 
     | 
    
         
            +
                    /*{free_memory}*/
         
     | 
| 
      
 11 
     | 
    
         
            +
                }
         
     | 
| 
      
 12 
     | 
    
         
            +
             
     | 
| 
      
 13 
     | 
    
         
            +
                variable_size_array_t((void *) cmd->result, /*{result_size}*/);
         
     | 
| 
      
 14 
     | 
    
         
            +
            })
         
     | 
| 
         @@ -0,0 +1,10 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
             
     | 
| 
      
 2 
     | 
    
         
            +
                {
         
     | 
| 
      
 3 
     | 
    
         
            +
                    /*{type}*/ * tmp_result = (/*{type}*/ *) malloc(/*{bytes}*/);
         
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
                    timeStartMeasure();
         
     | 
| 
      
 6 
     | 
    
         
            +
                    checkErrorReturn(program_result, cudaMemcpy(tmp_result, program_result->result, /*{bytes}*/, cudaMemcpyDeviceToHost));
         
     | 
| 
      
 7 
     | 
    
         
            +
                    timeReportMeasure(program_result, transfer_memory);
         
     | 
| 
      
 8 
     | 
    
         
            +
             
     | 
| 
      
 9 
     | 
    
         
            +
                    program_result->result = tmp_result;
         
     | 
| 
      
 10 
     | 
    
         
            +
                }
         
     | 
| 
         @@ -1,8 +1,15 @@ 
     | 
|
| 
       1 
1 
     | 
    
         | 
| 
       2 
2 
     | 
    
         | 
| 
       3 
     | 
    
         
            -
            __global__ void  
     | 
| 
      
 3 
     | 
    
         
            +
            __global__ void /*{kernel_name}*/(/*{parameters}*/)
         
     | 
| 
       4 
4 
     | 
    
         
             
            {
         
     | 
| 
       5 
     | 
    
         
            -
                 
     | 
| 
      
 5 
     | 
    
         
            +
                int _tid_ = threadIdx.x + blockIdx.x * blockDim.x;
         
     | 
| 
      
 6 
     | 
    
         
            +
             
     | 
| 
      
 7 
     | 
    
         
            +
                if (_tid_ < /*{num_threads}*/)
         
     | 
| 
      
 8 
     | 
    
         
            +
                {
         
     | 
| 
      
 9 
     | 
    
         
            +
            /*{execution}*/
         
     | 
| 
      
 10 
     | 
    
         
            +
                    
         
     | 
| 
      
 11 
     | 
    
         
            +
                    _result_[_tid_] = /*{block_invocation}*/;
         
     | 
| 
      
 12 
     | 
    
         
            +
                }
         
     | 
| 
       6 
13 
     | 
    
         
             
            }
         
     | 
| 
       7 
14 
     | 
    
         | 
| 
       8 
15 
     | 
    
         | 
| 
         @@ -0,0 +1,10 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            ({
         
     | 
| 
      
 2 
     | 
    
         
            +
                variable_size_array_t device_array = /*{device_array}*/;
         
     | 
| 
      
 3 
     | 
    
         
            +
                /*{type}*/ * tmp_result = (/*{type}*/ *) malloc(sizeof(/*{type}*/) * device_array.size);
         
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
                timeStartMeasure();
         
     | 
| 
      
 6 
     | 
    
         
            +
                checkErrorReturn(program_result, cudaMemcpy(tmp_result, device_array.content, sizeof(/*{type}*/) * device_array.size, cudaMemcpyDeviceToHost));
         
     | 
| 
      
 7 
     | 
    
         
            +
                timeReportMeasure(program_result, transfer_memory);
         
     | 
| 
      
 8 
     | 
    
         
            +
             
     | 
| 
      
 9 
     | 
    
         
            +
                variable_size_array_t((void *) tmp_result, device_array.size);
         
     | 
| 
      
 10 
     | 
    
         
            +
            })
         
     | 
| 
         @@ -0,0 +1,88 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
                    int thread_idx = threadIdx.x;
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
                    // Single result of this block
         
     | 
| 
      
 4 
     | 
    
         
            +
                    /*{type}*/ /*{temp_result}*/;
         
     | 
| 
      
 5 
     | 
    
         
            +
             
     | 
| 
      
 6 
     | 
    
         
            +
                    int num_args = 2 * /*{block_size}*/;
         
     | 
| 
      
 7 
     | 
    
         
            +
                    if (blockIdx.x == gridDim.x - 1)
         
     | 
| 
      
 8 
     | 
    
         
            +
                    {
         
     | 
| 
      
 9 
     | 
    
         
            +
                        // Processing the last block, which might be odd (number of elements to reduce).
         
     | 
| 
      
 10 
     | 
    
         
            +
                        // Other blocks cannot be "odd", because every block reduces 2*block_size many elements.
         
     | 
| 
      
 11 
     | 
    
         
            +
             
     | 
| 
      
 12 
     | 
    
         
            +
                        // Number of elements to reduce in the last block
         
     | 
| 
      
 13 
     | 
    
         
            +
                        num_args = ((2 * /*{num_threads}*/ - 1) % (2 * /*{block_size}*/)) + (/*{odd}*/ ? 0 : 1);
         
     | 
| 
      
 14 
     | 
    
         
            +
                    }
         
     | 
| 
      
 15 
     | 
    
         
            +
             
     | 
| 
      
 16 
     | 
    
         
            +
                    if (num_args == 1)
         
     | 
| 
      
 17 
     | 
    
         
            +
                    {
         
     | 
| 
      
 18 
     | 
    
         
            +
                        /*{temp_result}*/ = /*{previous_result}*/[_tid_];
         
     | 
| 
      
 19 
     | 
    
         
            +
                    }
         
     | 
| 
      
 20 
     | 
    
         
            +
                    else if (num_args == 2)
         
     | 
| 
      
 21 
     | 
    
         
            +
                    {
         
     | 
| 
      
 22 
     | 
    
         
            +
                        /*{temp_result}*/ = /*{block_name}*/(/*{arguments}*/, /*{previous_result}*/[_tid_], /*{previous_result}*/[_tid_ + /*{num_threads}*/]);
         
     | 
| 
      
 23 
     | 
    
         
            +
                    }
         
     | 
| 
      
 24 
     | 
    
         
            +
                    else
         
     | 
| 
      
 25 
     | 
    
         
            +
                    {
         
     | 
| 
      
 26 
     | 
    
         
            +
                        // Allocate block_size many slots to contain the result of up to block_size many reductions, i.e.,
         
     | 
| 
      
 27 
     | 
    
         
            +
                        // this array contains the reduction of (up to) 2*block_size many elements.
         
     | 
| 
      
 28 
     | 
    
         
            +
                        __shared__ /*{type}*/ sdata[/*{block_size}*/];
         
     | 
| 
      
 29 
     | 
    
         
            +
             
     | 
| 
      
 30 
     | 
    
         
            +
                        /*{odd}*/ = num_args % 2 == 1;
         
     | 
| 
      
 31 
     | 
    
         
            +
             
     | 
| 
      
 32 
     | 
    
         
            +
                        // --- FIRST REDUCTION ---  Load from global memory
         
     | 
| 
      
 33 
     | 
    
         
            +
                        // Number of elements after the first reduction
         
     | 
| 
      
 34 
     | 
    
         
            +
                        num_args = num_args / 2 + num_args % 2;
         
     | 
| 
      
 35 
     | 
    
         
            +
             
     | 
| 
      
 36 
     | 
    
         
            +
                        if (thread_idx == num_args - 1 && /*{odd}*/)
         
     | 
| 
      
 37 
     | 
    
         
            +
                        {
         
     | 
| 
      
 38 
     | 
    
         
            +
                            // This is the last thread, and it should reduce only one element.
         
     | 
| 
      
 39 
     | 
    
         
            +
                            sdata[thread_idx] = /*{previous_result}*/[_tid_];
         
     | 
| 
      
 40 
     | 
    
         
            +
                        }
         
     | 
| 
      
 41 
     | 
    
         
            +
                        else
         
     | 
| 
      
 42 
     | 
    
         
            +
                        {
         
     | 
| 
      
 43 
     | 
    
         
            +
                            sdata[thread_idx] = /*{block_name}*/(/*{arguments}*/, /*{previous_result}*/[_tid_], /*{previous_result}*/[_tid_ + /*{num_threads}*/]);
         
     | 
| 
      
 44 
     | 
    
         
            +
                        }
         
     | 
| 
      
 45 
     | 
    
         
            +
             
     | 
| 
      
 46 
     | 
    
         
            +
                        __syncthreads();
         
     | 
| 
      
 47 
     | 
    
         
            +
             
     | 
| 
      
 48 
     | 
    
         
            +
             
     | 
| 
      
 49 
     | 
    
         
            +
                        // --- SUBSEQUENT REDUCTION ---  Read from shared memory only
         
     | 
| 
      
 50 
     | 
    
         
            +
                        /*{odd}*/ = num_args % 2 == 1;
         
     | 
| 
      
 51 
     | 
    
         
            +
             
     | 
| 
      
 52 
     | 
    
         
            +
                        for (
         
     | 
| 
      
 53 
     | 
    
         
            +
                            num_args = num_args / 2 + num_args % 2;             // Number of elements after this reduction
         
     | 
| 
      
 54 
     | 
    
         
            +
                            num_args > 1;                                       // ... as long as there's at least 3 elements left
         
     | 
| 
      
 55 
     | 
    
         
            +
                            num_args = num_args / 2 + num_args % 2) {
         
     | 
| 
      
 56 
     | 
    
         
            +
             
     | 
| 
      
 57 
     | 
    
         
            +
                            if (thread_idx < num_args) {
         
     | 
| 
      
 58 
     | 
    
         
            +
                                // This thread has work to do...
         
     | 
| 
      
 59 
     | 
    
         
            +
             
     | 
| 
      
 60 
     | 
    
         
            +
                                if (thread_idx != num_args - 1 || !/*{odd}*/)
         
     | 
| 
      
 61 
     | 
    
         
            +
                                {
         
     | 
| 
      
 62 
     | 
    
         
            +
                                    sdata[thread_idx] = /*{block_name}*/(/*{arguments}*/, sdata[thread_idx], sdata[thread_idx + num_args]);
         
     | 
| 
      
 63 
     | 
    
         
            +
                                }
         
     | 
| 
      
 64 
     | 
    
         
            +
                                else
         
     | 
| 
      
 65 
     | 
    
         
            +
                                {
         
     | 
| 
      
 66 
     | 
    
         
            +
                                    // This is the last element and it is odd, do nothing
         
     | 
| 
      
 67 
     | 
    
         
            +
                                }
         
     | 
| 
      
 68 
     | 
    
         
            +
                            }
         
     | 
| 
      
 69 
     | 
    
         
            +
             
     | 
| 
      
 70 
     | 
    
         
            +
                            __syncthreads();
         
     | 
| 
      
 71 
     | 
    
         
            +
             
     | 
| 
      
 72 
     | 
    
         
            +
                            /*{odd}*/ = num_args % 2 == 1;
         
     | 
| 
      
 73 
     | 
    
         
            +
                        }
         
     | 
| 
      
 74 
     | 
    
         
            +
             
     | 
| 
      
 75 
     | 
    
         
            +
                        if (thread_idx == 0)
         
     | 
| 
      
 76 
     | 
    
         
            +
                        {
         
     | 
| 
      
 77 
     | 
    
         
            +
                            // Last thread returns result
         
     | 
| 
      
 78 
     | 
    
         
            +
                            /*{temp_result}*/ = /*{block_name}*/(/*{arguments}*/, sdata[0], sdata[1]);
         
     | 
| 
      
 79 
     | 
    
         
            +
                        }
         
     | 
| 
      
 80 
     | 
    
         
            +
                    }
         
     | 
| 
      
 81 
     | 
    
         
            +
             
     | 
| 
      
 82 
     | 
    
         
            +
                    // Write result to different position
         
     | 
| 
      
 83 
     | 
    
         
            +
                    _tid_ = blockIdx.x;
         
     | 
| 
      
 84 
     | 
    
         
            +
             
     | 
| 
      
 85 
     | 
    
         
            +
                    if (thread_idx != 0) {
         
     | 
| 
      
 86 
     | 
    
         
            +
                        // Only one thread should report the result
         
     | 
| 
      
 87 
     | 
    
         
            +
                        return;
         
     | 
| 
      
 88 
     | 
    
         
            +
                    }
         
     | 
| 
         @@ -0,0 +1,16 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
                /*{result_type}*/ /*{temp_var}*/;
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
                // Indices for all dimensions
         
     | 
| 
      
 4 
     | 
    
         
            +
                /*{compute_indices}*/
         
     | 
| 
      
 5 
     | 
    
         
            +
             
     | 
| 
      
 6 
     | 
    
         
            +
                if (/*{out_of_bounds_check}*/)
         
     | 
| 
      
 7 
     | 
    
         
            +
                {
         
     | 
| 
      
 8 
     | 
    
         
            +
                    // All value indices within bounds
         
     | 
| 
      
 9 
     | 
    
         
            +
                    /*{execution}*/
         
     | 
| 
      
 10 
     | 
    
         
            +
                    /*{temp_var}*/ = /*{stencil_computation}*/;
         
     | 
| 
      
 11 
     | 
    
         
            +
                }
         
     | 
| 
      
 12 
     | 
    
         
            +
                else
         
     | 
| 
      
 13 
     | 
    
         
            +
                {
         
     | 
| 
      
 14 
     | 
    
         
            +
                    // At least one index is out of bounds
         
     | 
| 
      
 15 
     | 
    
         
            +
                    /*{temp_var}*/ = /*{out_of_bounds_fallback}*/;
         
     | 
| 
      
 16 
     | 
    
         
            +
                }
         
     | 
| 
         @@ -0,0 +1,34 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            module Ikra
         
     | 
| 
      
 2 
     | 
    
         
            +
                module RubyIntegration
         
     | 
| 
      
 3 
     | 
    
         
            +
                    ALL_ARRAY_TYPES = proc do |type|
         
     | 
| 
      
 4 
     | 
    
         
            +
                        type.is_a?(Types::ArrayType) && !type.is_a?(Types::LocationAwareArrayType)
         
     | 
| 
      
 5 
     | 
    
         
            +
                    end
         
     | 
| 
      
 6 
     | 
    
         
            +
             
     | 
| 
      
 7 
     | 
    
         
            +
                    LOCATION_AWARE_ARRAY_TYPE = proc do |type|
         
     | 
| 
      
 8 
     | 
    
         
            +
                        # TODO: Maybe there should be an automated transfer to host side here if necessary?
         
     | 
| 
      
 9 
     | 
    
         
            +
                        type.is_a?(Types::LocationAwareArrayType)
         
     | 
| 
      
 10 
     | 
    
         
            +
                    end
         
     | 
| 
      
 11 
     | 
    
         
            +
             
     | 
| 
      
 12 
     | 
    
         
            +
                    LOCATION_AWARE_ARRAY_ACCESS = proc do |receiver, method_name, args, translator, result_type|
         
     | 
| 
      
 13 
     | 
    
         
            +
             
     | 
| 
      
 14 
     | 
    
         
            +
                        recv = receiver.accept(translator.expression_translator)
         
     | 
| 
      
 15 
     | 
    
         
            +
                        inner_type = receiver.get_type.singleton_type.inner_type.to_c_type
         
     | 
| 
      
 16 
     | 
    
         
            +
                        index = args[0].accept(translator.expression_translator)
         
     | 
| 
      
 17 
     | 
    
         
            +
             
     | 
| 
      
 18 
     | 
    
         
            +
                        "((#{inner_type} *) #{recv}.content)[#{index}]"
         
     | 
| 
      
 19 
     | 
    
         
            +
                    end
         
     | 
| 
      
 20 
     | 
    
         
            +
             
     | 
| 
      
 21 
     | 
    
         
            +
                    INNER_TYPE = proc do |rcvr|
         
     | 
| 
      
 22 
     | 
    
         
            +
                        rcvr.inner_type
         
     | 
| 
      
 23 
     | 
    
         
            +
                    end
         
     | 
| 
      
 24 
     | 
    
         
            +
             
     | 
| 
      
 25 
     | 
    
         
            +
                    implement ALL_ARRAY_TYPES, :[], INNER_TYPE, 1, "#0[#I1]"
         
     | 
| 
      
 26 
     | 
    
         
            +
             
     | 
| 
      
 27 
     | 
    
         
            +
                    implement(
         
     | 
| 
      
 28 
     | 
    
         
            +
                        LOCATION_AWARE_ARRAY_TYPE, 
         
     | 
| 
      
 29 
     | 
    
         
            +
                        :[], 
         
     | 
| 
      
 30 
     | 
    
         
            +
                        INNER_TYPE, 
         
     | 
| 
      
 31 
     | 
    
         
            +
                        1, 
         
     | 
| 
      
 32 
     | 
    
         
            +
                        LOCATION_AWARE_ARRAY_ACCESS)
         
     | 
| 
      
 33 
     | 
    
         
            +
                end
         
     | 
| 
      
 34 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,313 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require_relative "../types/types/array_type.rb"
         
     | 
| 
      
 2 
     | 
    
         
            +
            require_relative "../ast/interpreter.rb"
         
     | 
| 
      
 3 
     | 
    
         
            +
             
     | 
| 
      
 4 
     | 
    
         
            +
            module Ikra
         
     | 
| 
      
 5 
     | 
    
         
            +
                module RubyIntegration
         
     | 
| 
      
 6 
     | 
    
         
            +
             
     | 
| 
      
 7 
     | 
    
         
            +
                    # This visitor traverses the tree of symbolically executed parallel operations. It raises
         
     | 
| 
      
 8 
     | 
    
         
            +
                    # an exception, if an array command was generated by symbolic execution/interpretation of
         
     | 
| 
      
 9 
     | 
    
         
            +
                    # `send_node`.
         
     | 
| 
      
 10 
     | 
    
         
            +
                    class SymbolicCycleFinder < Symbolic::Visitor
         
     | 
| 
      
 11 
     | 
    
         
            +
                        def self.raise_on_cycle(command, send_node)
         
     | 
| 
      
 12 
     | 
    
         
            +
                            visitor = self.new(send_node)
         
     | 
| 
      
 13 
     | 
    
         
            +
                            command.accept(visitor)
         
     | 
| 
      
 14 
     | 
    
         
            +
                        end
         
     | 
| 
      
 15 
     | 
    
         
            +
             
     | 
| 
      
 16 
     | 
    
         
            +
                        def initialize(send_node)
         
     | 
| 
      
 17 
     | 
    
         
            +
                            @send_node = send_node
         
     | 
| 
      
 18 
     | 
    
         
            +
                        end
         
     | 
| 
      
 19 
     | 
    
         
            +
             
     | 
| 
      
 20 
     | 
    
         
            +
                        def visit_array_command(node)
         
     | 
| 
      
 21 
     | 
    
         
            +
                            if node.generator_node == @send_node
         
     | 
| 
      
 22 
     | 
    
         
            +
                                raise CycleDetectedError.new(node: node)
         
     | 
| 
      
 23 
     | 
    
         
            +
                            else
         
     | 
| 
      
 24 
     | 
    
         
            +
                                # No cycle found yet, check dependent computations
         
     | 
| 
      
 25 
     | 
    
         
            +
                                super
         
     | 
| 
      
 26 
     | 
    
         
            +
                            end
         
     | 
| 
      
 27 
     | 
    
         
            +
                        end
         
     | 
| 
      
 28 
     | 
    
         
            +
                    end
         
     | 
| 
      
 29 
     | 
    
         
            +
             
     | 
| 
      
 30 
     | 
    
         
            +
                    class CycleDetectedError < RuntimeError
         
     | 
| 
      
 31 
     | 
    
         
            +
                        def initialize(node:)
         
     | 
| 
      
 32 
     | 
    
         
            +
                            @node = node
         
     | 
| 
      
 33 
     | 
    
         
            +
                        end
         
     | 
| 
      
 34 
     | 
    
         
            +
                    end
         
     | 
| 
      
 35 
     | 
    
         
            +
             
     | 
| 
      
 36 
     | 
    
         
            +
                    ALL_ARRAY_COMMAND_TYPES = proc do |type|
         
     | 
| 
      
 37 
     | 
    
         
            +
                        type.is_a?(Symbolic::ArrayCommand)
         
     | 
| 
      
 38 
     | 
    
         
            +
                    end
         
     | 
| 
      
 39 
     | 
    
         
            +
             
     | 
| 
      
 40 
     | 
    
         
            +
                    PMAP_TYPE = proc do |rcvr_type, *args_types, send_node:|
         
     | 
| 
      
 41 
     | 
    
         
            +
                        # TODO: Handle keyword arguments
         
     | 
| 
      
 42 
     | 
    
         
            +
                        
         
     | 
| 
      
 43 
     | 
    
         
            +
                        # Ensure that there is no cycle here. "Cycle" means that the same AST send node
         
     | 
| 
      
 44 
     | 
    
         
            +
                        # was used earlier (i.e., in one of `rcvr_type`'s inputs/dependent computations).
         
     | 
| 
      
 45 
     | 
    
         
            +
                        # In that case we have to abort type inference here, because it would not terminate.
         
     | 
| 
      
 46 
     | 
    
         
            +
                        SymbolicCycleFinder.raise_on_cycle(rcvr_type, send_node)
         
     | 
| 
      
 47 
     | 
    
         
            +
             
     | 
| 
      
 48 
     | 
    
         
            +
                        more_kw_args = {}
         
     | 
| 
      
 49 
     | 
    
         
            +
             
     | 
| 
      
 50 
     | 
    
         
            +
                        if send_node.arguments.size == 1
         
     | 
| 
      
 51 
     | 
    
         
            +
                            if !send_node.arguments.first.is_a?(AST::HashNode)
         
     | 
| 
      
 52 
     | 
    
         
            +
                                raise ArgumentError.new("If an argument is given, it must be a Hash of kwargs.")
         
     | 
| 
      
 53 
     | 
    
         
            +
                            end
         
     | 
| 
      
 54 
     | 
    
         
            +
             
     | 
| 
      
 55 
     | 
    
         
            +
                            # Pass kwargs separately
         
     | 
| 
      
 56 
     | 
    
         
            +
                            more_kw_args = AST::Interpreter.interpret(send_node.arguments.first)
         
     | 
| 
      
 57 
     | 
    
         
            +
                        end
         
     | 
| 
      
 58 
     | 
    
         
            +
             
     | 
| 
      
 59 
     | 
    
         
            +
                        rcvr_type.pmap(
         
     | 
| 
      
 60 
     | 
    
         
            +
                            ast: send_node.block_argument, 
         
     | 
| 
      
 61 
     | 
    
         
            +
                            generator_node: send_node, 
         
     | 
| 
      
 62 
     | 
    
         
            +
                            # TODO: Fix binding
         
     | 
| 
      
 63 
     | 
    
         
            +
                            command_binding: send_node.find_behavior_node.binding,
         
     | 
| 
      
 64 
     | 
    
         
            +
                            **more_kw_args).to_union_type
         
     | 
| 
      
 65 
     | 
    
         
            +
                    end
         
     | 
| 
      
 66 
     | 
    
         
            +
             
     | 
| 
      
 67 
     | 
    
         
            +
                    PZIP_TYPE = proc do |rcvr_type, *args_types, send_node:|
         
     | 
| 
      
 68 
     | 
    
         
            +
                        # TODO: Support multiple arguments for `pzip`
         
     | 
| 
      
 69 
     | 
    
         
            +
                        types = args_types[0].map do |sing_type|
         
     | 
| 
      
 70 
     | 
    
         
            +
                            raise AssertionError.new("Singleton type expected") if sing_type.is_union_type?
         
     | 
| 
      
 71 
     | 
    
         
            +
                            rcvr_type.pzip(sing_type, generator_node: send_node).to_union_type
         
     | 
| 
      
 72 
     | 
    
         
            +
                        end
         
     | 
| 
      
 73 
     | 
    
         
            +
             
     | 
| 
      
 74 
     | 
    
         
            +
                        types.reduce(Types::UnionType.new) do |acc, type|
         
     | 
| 
      
 75 
     | 
    
         
            +
                            acc.expand_return_type(type)
         
     | 
| 
      
 76 
     | 
    
         
            +
                        end
         
     | 
| 
      
 77 
     | 
    
         
            +
                    end
         
     | 
| 
      
 78 
     | 
    
         
            +
             
     | 
| 
      
 79 
     | 
    
         
            +
                    PSTENCIL_TYPE = proc do |rcvr_type, *args_types, send_node:|
         
     | 
| 
      
 80 
     | 
    
         
            +
                        # TODO: Handle keyword arguments
         
     | 
| 
      
 81 
     | 
    
         
            +
                        ruby_args = send_node.arguments.map do |node|
         
     | 
| 
      
 82 
     | 
    
         
            +
                            AST::Interpreter.interpret(node)
         
     | 
| 
      
 83 
     | 
    
         
            +
                        end
         
     | 
| 
      
 84 
     | 
    
         
            +
             
     | 
| 
      
 85 
     | 
    
         
            +
                        more_kw_args = {}
         
     | 
| 
      
 86 
     | 
    
         
            +
             
     | 
| 
      
 87 
     | 
    
         
            +
                        if args_types.size == 3
         
     | 
| 
      
 88 
     | 
    
         
            +
                            if !ruby_args.last.is_a?(Hash)
         
     | 
| 
      
 89 
     | 
    
         
            +
                                raise ArgumentError.new("If 3 arguments are given, the last one must be a Hash of kwargs.")
         
     | 
| 
      
 90 
     | 
    
         
            +
                            end
         
     | 
| 
      
 91 
     | 
    
         
            +
             
     | 
| 
      
 92 
     | 
    
         
            +
                            # Pass kwargs separately
         
     | 
| 
      
 93 
     | 
    
         
            +
                            more_kw_args = ruby_args.pop
         
     | 
| 
      
 94 
     | 
    
         
            +
                        end
         
     | 
| 
      
 95 
     | 
    
         
            +
             
     | 
| 
      
 96 
     | 
    
         
            +
                        SymbolicCycleFinder.raise_on_cycle(rcvr_type, send_node)
         
     | 
| 
      
 97 
     | 
    
         
            +
             
     | 
| 
      
 98 
     | 
    
         
            +
                        rcvr_type.pstencil(
         
     | 
| 
      
 99 
     | 
    
         
            +
                            *ruby_args, 
         
     | 
| 
      
 100 
     | 
    
         
            +
                            ast: send_node.block_argument, 
         
     | 
| 
      
 101 
     | 
    
         
            +
                            generator_node: send_node, 
         
     | 
| 
      
 102 
     | 
    
         
            +
                            # TODO: Fix binding
         
     | 
| 
      
 103 
     | 
    
         
            +
                            command_binding: send_node.find_behavior_node.binding,
         
     | 
| 
      
 104 
     | 
    
         
            +
                            **more_kw_args).to_union_type
         
     | 
| 
      
 105 
     | 
    
         
            +
                    end
         
     | 
| 
      
 106 
     | 
    
         
            +
             
     | 
| 
      
 107 
     | 
    
         
            +
                    PREDUCE_TYPE = proc do |rcvr_type, *args_types, send_node:|
         
     | 
| 
      
 108 
     | 
    
         
            +
                        # TODO: Handle keyword arguments
         
     | 
| 
      
 109 
     | 
    
         
            +
                        
         
     | 
| 
      
 110 
     | 
    
         
            +
                        SymbolicCycleFinder.raise_on_cycle(rcvr_type, send_node)
         
     | 
| 
      
 111 
     | 
    
         
            +
             
     | 
| 
      
 112 
     | 
    
         
            +
                        rcvr_type.preduce(ast: send_node.block_argument, generator_node: send_node).to_union_type
         
     | 
| 
      
 113 
     | 
    
         
            +
                    end
         
     | 
| 
      
 114 
     | 
    
         
            +
             
     | 
| 
      
 115 
     | 
    
         
            +
                    LAUNCH_KERNEL = proc do |receiver, method_name, arguments, translator, result_type|
         
     | 
| 
      
 116 
     | 
    
         
            +
                        # The result type is the symbolically executed result of applying this
         
     | 
| 
      
 117 
     | 
    
         
            +
                        # parallel section. The result type is an ArrayCommand.
         
     | 
| 
      
 118 
     | 
    
         
            +
                        array_command = receiver.get_type.singleton_type
         
     | 
| 
      
 119 
     | 
    
         
            +
             
     | 
| 
      
 120 
     | 
    
         
            +
                        # Translate command
         
     | 
| 
      
 121 
     | 
    
         
            +
                        command_translator = translator.command_translator
         
     | 
| 
      
 122 
     | 
    
         
            +
                        command_translator.push_kernel_launcher
         
     | 
| 
      
 123 
     | 
    
         
            +
                        result = array_command.accept(command_translator)
         
     | 
| 
      
 124 
     | 
    
         
            +
                        kernel_launcher = command_translator.pop_kernel_launcher(result)
         
     | 
| 
      
 125 
     | 
    
         
            +
             
     | 
| 
      
 126 
     | 
    
         
            +
                        # Prepare kernel launchers for launch of `array_command`
         
     | 
| 
      
 127 
     | 
    
         
            +
                        command_translator.program_builder.prepare_additional_args_for_launch(array_command)
         
     | 
| 
      
 128 
     | 
    
         
            +
             
     | 
| 
      
 129 
     | 
    
         
            +
                        # Generate launch code for all kernels
         
     | 
| 
      
 130 
     | 
    
         
            +
                        launch_code = command_translator.program_builder.build_kernel_launchers
         
     | 
| 
      
 131 
     | 
    
         
            +
             
     | 
| 
      
 132 
     | 
    
         
            +
                        # Always return a device pointer. Only at the very end, we transfer data to the host.
         
     | 
| 
      
 133 
     | 
    
         
            +
                        result_expr = kernel_launcher.kernel_result_var_name
         
     | 
| 
      
 134 
     | 
    
         
            +
             
     | 
| 
      
 135 
     | 
    
         
            +
                        if Translator::ArrayCommandStructBuilder::RequireRuntimeSizeChecker.require_size_function?(array_command)
         
     | 
| 
      
 136 
     | 
    
         
            +
             
     | 
| 
      
 137 
     | 
    
         
            +
                            # Size is not statically known, take information from receiver.
         
     | 
| 
      
 138 
     | 
    
         
            +
                            # TODO: Code depends on template. `cmd` is defined in template.
         
     | 
| 
      
 139 
     | 
    
         
            +
                            result_size = "cmd->size()"
         
     | 
| 
      
 140 
     | 
    
         
            +
                        else
         
     | 
| 
      
 141 
     | 
    
         
            +
                            # Size is known statically
         
     | 
| 
      
 142 
     | 
    
         
            +
                            result_size = array_command.size.to_s
         
     | 
| 
      
 143 
     | 
    
         
            +
                        end
         
     | 
| 
      
 144 
     | 
    
         
            +
                                
         
     | 
| 
      
 145 
     | 
    
         
            +
                        # Debug information
         
     | 
| 
      
 146 
     | 
    
         
            +
                        if array_command.generator_node != nil
         
     | 
| 
      
 147 
     | 
    
         
            +
                            debug_information = array_command.to_s + ": " + array_command.generator_node.to_s
         
     | 
| 
      
 148 
     | 
    
         
            +
                        else
         
     | 
| 
      
 149 
     | 
    
         
            +
                            debug_information = array_command.to_s
         
     | 
| 
      
 150 
     | 
    
         
            +
                        end
         
     | 
| 
      
 151 
     | 
    
         
            +
             
     | 
| 
      
 152 
     | 
    
         
            +
                        result = Translator.read_file(file_name: "host_section_launch_parallel_section.cpp", replacements: {
         
     | 
| 
      
 153 
     | 
    
         
            +
                            "debug_information" => debug_information, 
         
     | 
| 
      
 154 
     | 
    
         
            +
                            "array_command" => receiver.accept(translator.expression_translator),
         
     | 
| 
      
 155 
     | 
    
         
            +
                            "array_command_type" => array_command.to_c_type,
         
     | 
| 
      
 156 
     | 
    
         
            +
                            "result_size" => result_size,
         
     | 
| 
      
 157 
     | 
    
         
            +
                            "kernel_invocation" => launch_code,
         
     | 
| 
      
 158 
     | 
    
         
            +
                            "kernel_result" => result_expr,
         
     | 
| 
      
 159 
     | 
    
         
            +
                            "free_memory" => command_translator.program_builder.build_memory_free_except_last})
         
     | 
| 
      
 160 
     | 
    
         
            +
             
     | 
| 
      
 161 
     | 
    
         
            +
                        # Clear kernel launchers. Otherwise, we might launch them again in a later, unrelated
         
     | 
| 
      
 162 
     | 
    
         
            +
                        # LAUNCH_KERNEL branch. This is because we reuse the same [ProgramBuilder] for an
         
     | 
| 
      
 163 
     | 
    
         
            +
                        # entire host section.
         
     | 
| 
      
 164 
     | 
    
         
            +
                        command_translator.program_builder.clear_kernel_launchers
         
     | 
| 
      
 165 
     | 
    
         
            +
             
     | 
| 
      
 166 
     | 
    
         
            +
                        # Build all array command structs for this command
         
     | 
| 
      
 167 
     | 
    
         
            +
                        command_translator.program_builder.add_array_command_struct(
         
     | 
| 
      
 168 
     | 
    
         
            +
                            *Translator::ArrayCommandStructBuilder.build_all_structs(array_command))
         
     | 
| 
      
 169 
     | 
    
         
            +
             
     | 
| 
      
 170 
     | 
    
         
            +
                        result
         
     | 
| 
      
 171 
     | 
    
         
            +
                    end
         
     | 
| 
      
 172 
     | 
    
         
            +
             
     | 
| 
      
 173 
     | 
    
         
            +
                    ARRAY_COMMAND_TO_ARRAY_TYPE = proc do |rcvr_type, *args_types, send_node:|
         
     | 
| 
      
 174 
     | 
    
         
            +
                        Types::LocationAwareFixedSizeArrayType.new(
         
     | 
| 
      
 175 
     | 
    
         
            +
                            rcvr_type.result_type,
         
     | 
| 
      
 176 
     | 
    
         
            +
                            rcvr_type.dimensions,
         
     | 
| 
      
 177 
     | 
    
         
            +
                            location: :device).to_union_type
         
     | 
| 
      
 178 
     | 
    
         
            +
                    end
         
     | 
| 
      
 179 
     | 
    
         
            +
             
     | 
| 
      
 180 
     | 
    
         
            +
                    SYMBOLICALLY_EXECUTE_KERNEL = proc do |receiver, method_name, arguments, translator, result_type|
         
     | 
| 
      
 181 
     | 
    
         
            +
                        if !result_type.is_singleton?
         
     | 
| 
      
 182 
     | 
    
         
            +
                            raise AssertionError.new("Singleton type expected")
         
     | 
| 
      
 183 
     | 
    
         
            +
                        end
         
     | 
| 
      
 184 
     | 
    
         
            +
             
     | 
| 
      
 185 
     | 
    
         
            +
                        # Build arguments to constructor. First one (result field) is NULL.
         
     | 
| 
      
 186 
     | 
    
         
            +
                        constructor_args = ["NULL"]
         
     | 
| 
      
 187 
     | 
    
         
            +
             
     | 
| 
      
 188 
     | 
    
         
            +
                        # Translate all inputs (receiver, then arguments to parallel section)
         
     | 
| 
      
 189 
     | 
    
         
            +
                        constructor_args.push(receiver.accept(translator.expression_translator))
         
     | 
| 
      
 190 
     | 
    
         
            +
             
     | 
| 
      
 191 
     | 
    
         
            +
                        for arg in arguments
         
     | 
| 
      
 192 
     | 
    
         
            +
                            if arg.get_type.is_singleton? && 
         
     | 
| 
      
 193 
     | 
    
         
            +
                                arg.get_type.singleton_type.is_a?(Symbolic::ArrayCommand)
         
     | 
| 
      
 194 
     | 
    
         
            +
                                
         
     | 
| 
      
 195 
     | 
    
         
            +
                                # Only ArrayCommands should show up as arguments
         
     | 
| 
      
 196 
     | 
    
         
            +
                                constructor_args.push(arg.accept(translator.expression_translator))
         
     | 
| 
      
 197 
     | 
    
         
            +
                            end
         
     | 
| 
      
 198 
     | 
    
         
            +
                        end
         
     | 
| 
      
 199 
     | 
    
         
            +
             
     | 
| 
      
 200 
     | 
    
         
            +
                        all_args = constructor_args.join(", ")
         
     | 
| 
      
 201 
     | 
    
         
            +
             
     | 
| 
      
 202 
     | 
    
         
            +
                        # This is a hack because the type is a pointer type
         
     | 
| 
      
 203 
     | 
    
         
            +
                        "new #{result_type.singleton_type.to_c_type[0...-2]}(#{all_args})"
         
     | 
| 
      
 204 
     | 
    
         
            +
                    end
         
     | 
| 
      
 205 
     | 
    
         
            +
             
     | 
| 
      
 206 
     | 
    
         
            +
                    ALL_LOCATION_AWARE_ARRAY_TYPES = proc do |type|
         
     | 
| 
      
 207 
     | 
    
         
            +
                        type.is_a?(Types::LocationAwareArrayType)
         
     | 
| 
      
 208 
     | 
    
         
            +
                    end
         
     | 
| 
      
 209 
     | 
    
         
            +
             
     | 
| 
      
 210 
     | 
    
         
            +
                    LOCATION_AWARE_ARRAY_TO_HOST_ARRAY_TYPE = proc do |rcvr_type, *args_types|
         
     | 
| 
      
 211 
     | 
    
         
            +
                        # TODO: Should also be able to handle variable variant
         
     | 
| 
      
 212 
     | 
    
         
            +
                        Types::LocationAwareFixedSizeArrayType.new(
         
     | 
| 
      
 213 
     | 
    
         
            +
                            rcvr_type.inner_type,
         
     | 
| 
      
 214 
     | 
    
         
            +
                            rcvr_type.dimensions,
         
     | 
| 
      
 215 
     | 
    
         
            +
                            location: :host).to_union_type
         
     | 
| 
      
 216 
     | 
    
         
            +
                    end
         
     | 
| 
      
 217 
     | 
    
         
            +
             
     | 
| 
      
 218 
     | 
    
         
            +
                    LOCATION_AWARE_ARRAY_CALL_TYPE = proc do |rcvr_type, *args_types|
         
     | 
| 
      
 219 
     | 
    
         
            +
                        # Calling `__call__` on an array does not do anything
         
     | 
| 
      
 220 
     | 
    
         
            +
                        rcvr_type.to_union_type
         
     | 
| 
      
 221 
     | 
    
         
            +
                    end
         
     | 
| 
      
 222 
     | 
    
         
            +
             
     | 
| 
      
 223 
     | 
    
         
            +
                    COPY_ARRAY_TO_HOST = proc do |receiver, method_name, args, translator, result_type|
         
     | 
| 
      
 224 
     | 
    
         
            +
                        if receiver.get_type.singleton_type.location == :host
         
     | 
| 
      
 225 
     | 
    
         
            +
                            receiver.accept(translator.expression_translator)
         
     | 
| 
      
 226 
     | 
    
         
            +
                        else
         
     | 
| 
      
 227 
     | 
    
         
            +
                            c_type = receiver.get_type.singleton_type.inner_type.to_c_type
         
     | 
| 
      
 228 
     | 
    
         
            +
             
     | 
| 
      
 229 
     | 
    
         
            +
                            Translator.read_file(file_name: "memcpy_device_to_host_expr.cpp", replacements: {
         
     | 
| 
      
 230 
     | 
    
         
            +
                                "type" => c_type,
         
     | 
| 
      
 231 
     | 
    
         
            +
                                "device_array" => receiver.accept(translator.expression_translator)})
         
     | 
| 
      
 232 
     | 
    
         
            +
                        end
         
     | 
| 
      
 233 
     | 
    
         
            +
                    end
         
     | 
| 
      
 234 
     | 
    
         
            +
             
     | 
| 
      
 235 
     | 
    
         
            +
                    ARRAY_TYPE_TO_COMMAND_TYPE = proc do |rcvr_type, *args_types, send_node:|
         
     | 
| 
      
 236 
     | 
    
         
            +
                        rcvr_type.to_command.to_union_type
         
     | 
| 
      
 237 
     | 
    
         
            +
                    end
         
     | 
| 
      
 238 
     | 
    
         
            +
             
     | 
| 
      
 239 
     | 
    
         
            +
                    FREE_MEMORY_FOR_ARRAY_COMMAND = proc do |receiver, method_name, args, translator, result_type|
         
     | 
| 
      
 240 
     | 
    
         
            +
             
     | 
| 
      
 241 
     | 
    
         
            +
                        Translator.read_file(file_name: "free_memory_for_command.cpp", replacements: {
         
     | 
| 
      
 242 
     | 
    
         
            +
                            "type" => receiver.get_type.to_c_type,
         
     | 
| 
      
 243 
     | 
    
         
            +
                            "receiver" => receiver.accept(translator.expression_translator)})
         
     | 
| 
      
 244 
     | 
    
         
            +
                    end
         
     | 
| 
      
 245 
     | 
    
         
            +
             
     | 
| 
      
 246 
     | 
    
         
            +
                    # Manually free memory
         
     | 
| 
      
 247 
     | 
    
         
            +
                    # TODO: Implement escape analysis and try to reuse memory
         
     | 
| 
      
 248 
     | 
    
         
            +
                    implement(
         
     | 
| 
      
 249 
     | 
    
         
            +
                        ALL_ARRAY_COMMAND_TYPES,
         
     | 
| 
      
 250 
     | 
    
         
            +
                        :free_memory,
         
     | 
| 
      
 251 
     | 
    
         
            +
                        BOOL,
         
     | 
| 
      
 252 
     | 
    
         
            +
                        0,
         
     | 
| 
      
 253 
     | 
    
         
            +
                        FREE_MEMORY_FOR_ARRAY_COMMAND)
         
     | 
| 
      
 254 
     | 
    
         
            +
             
     | 
| 
      
 255 
     | 
    
         
            +
                    # Implement all parallel operations
         
     | 
| 
      
 256 
     | 
    
         
            +
                    implement(
         
     | 
| 
      
 257 
     | 
    
         
            +
                        ALL_ARRAY_COMMAND_TYPES,
         
     | 
| 
      
 258 
     | 
    
         
            +
                        :pmap,
         
     | 
| 
      
 259 
     | 
    
         
            +
                        PMAP_TYPE,
         
     | 
| 
      
 260 
     | 
    
         
            +
                        0..1,
         
     | 
| 
      
 261 
     | 
    
         
            +
                        SYMBOLICALLY_EXECUTE_KERNEL)
         
     | 
| 
      
 262 
     | 
    
         
            +
             
     | 
| 
      
 263 
     | 
    
         
            +
                    implement(
         
     | 
| 
      
 264 
     | 
    
         
            +
                        ALL_LOCATION_AWARE_ARRAY_TYPES,
         
     | 
| 
      
 265 
     | 
    
         
            +
                        :to_command,
         
     | 
| 
      
 266 
     | 
    
         
            +
                        ARRAY_TYPE_TO_COMMAND_TYPE,
         
     | 
| 
      
 267 
     | 
    
         
            +
                        0,
         
     | 
| 
      
 268 
     | 
    
         
            +
                        SYMBOLICALLY_EXECUTE_KERNEL)
         
     | 
| 
      
 269 
     | 
    
         
            +
             
     | 
| 
      
 270 
     | 
    
         
            +
                    implement(
         
     | 
| 
      
 271 
     | 
    
         
            +
                        ALL_ARRAY_COMMAND_TYPES,
         
     | 
| 
      
 272 
     | 
    
         
            +
                        :pzip,
         
     | 
| 
      
 273 
     | 
    
         
            +
                        PZIP_TYPE,
         
     | 
| 
      
 274 
     | 
    
         
            +
                        1,
         
     | 
| 
      
 275 
     | 
    
         
            +
                        SYMBOLICALLY_EXECUTE_KERNEL,
         
     | 
| 
      
 276 
     | 
    
         
            +
                        expect_singleton_args: true)
         
     | 
| 
      
 277 
     | 
    
         
            +
             
     | 
| 
      
 278 
     | 
    
         
            +
                    implement(
         
     | 
| 
      
 279 
     | 
    
         
            +
                        ALL_ARRAY_COMMAND_TYPES,
         
     | 
| 
      
 280 
     | 
    
         
            +
                        :pstencil,
         
     | 
| 
      
 281 
     | 
    
         
            +
                        PSTENCIL_TYPE,
         
     | 
| 
      
 282 
     | 
    
         
            +
                        2..3,      # neighborhood and default value, maybe hash
         
     | 
| 
      
 283 
     | 
    
         
            +
                        SYMBOLICALLY_EXECUTE_KERNEL)
         
     | 
| 
      
 284 
     | 
    
         
            +
             
     | 
| 
      
 285 
     | 
    
         
            +
                    implement(
         
     | 
| 
      
 286 
     | 
    
         
            +
                        ALL_ARRAY_COMMAND_TYPES,
         
     | 
| 
      
 287 
     | 
    
         
            +
                        :preduce,
         
     | 
| 
      
 288 
     | 
    
         
            +
                        PREDUCE_TYPE,
         
     | 
| 
      
 289 
     | 
    
         
            +
                        0,
         
     | 
| 
      
 290 
     | 
    
         
            +
                        SYMBOLICALLY_EXECUTE_KERNEL)
         
     | 
| 
      
 291 
     | 
    
         
            +
             
     | 
| 
      
 292 
     | 
    
         
            +
                    implement(
         
     | 
| 
      
 293 
     | 
    
         
            +
                        ALL_ARRAY_COMMAND_TYPES,
         
     | 
| 
      
 294 
     | 
    
         
            +
                        :__call__,
         
     | 
| 
      
 295 
     | 
    
         
            +
                        ARRAY_COMMAND_TO_ARRAY_TYPE,
         
     | 
| 
      
 296 
     | 
    
         
            +
                        0,
         
     | 
| 
      
 297 
     | 
    
         
            +
                        LAUNCH_KERNEL)
         
     | 
| 
      
 298 
     | 
    
         
            +
             
     | 
| 
      
 299 
     | 
    
         
            +
                    implement(
         
     | 
| 
      
 300 
     | 
    
         
            +
                        ALL_LOCATION_AWARE_ARRAY_TYPES, 
         
     | 
| 
      
 301 
     | 
    
         
            +
                        :__to_host_array__,
         
     | 
| 
      
 302 
     | 
    
         
            +
                        LOCATION_AWARE_ARRAY_TO_HOST_ARRAY_TYPE,
         
     | 
| 
      
 303 
     | 
    
         
            +
                        0,
         
     | 
| 
      
 304 
     | 
    
         
            +
                        COPY_ARRAY_TO_HOST)
         
     | 
| 
      
 305 
     | 
    
         
            +
             
     | 
| 
      
 306 
     | 
    
         
            +
                    implement(
         
     | 
| 
      
 307 
     | 
    
         
            +
                        ALL_LOCATION_AWARE_ARRAY_TYPES,
         
     | 
| 
      
 308 
     | 
    
         
            +
                        :__call__,
         
     | 
| 
      
 309 
     | 
    
         
            +
                        LOCATION_AWARE_ARRAY_CALL_TYPE,
         
     | 
| 
      
 310 
     | 
    
         
            +
                        0,
         
     | 
| 
      
 311 
     | 
    
         
            +
                        "#0")
         
     | 
| 
      
 312 
     | 
    
         
            +
                end
         
     | 
| 
      
 313 
     | 
    
         
            +
            end
         
     |