lightgbm 0.3.4 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -0
- data/LICENSE.txt +1 -1
- data/lib/lightgbm/booster.rb +125 -82
- data/lib/lightgbm/dataset.rb +55 -33
- data/lib/lightgbm/ffi.rb +15 -0
- data/lib/lightgbm/inner_predictor.rb +159 -0
- data/lib/lightgbm/model.rb +1 -1
- data/lib/lightgbm/utils.rb +9 -2
- data/lib/lightgbm/version.rb +1 -1
- data/lib/lightgbm.rb +4 -0
- metadata +5 -8
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA256:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: 4fadfa7ea250cf7c48f076effb5ce8f5db3cf0c8ab87bb04f2033457a502721a
         | 
| 4 | 
            +
              data.tar.gz: 3af4cac369a3c684bdb387036845eca04747c14c8e12c9b38625e5c38130de74
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: 21fb7ae25e1f085cd3642bb02ce32f8378f5d6013f6e8504b86586c86dfaf5c29b12d83b10e3bfd747a3dbfc996eb8473c12313ca5e2f4302554b0a6c40261e3
         | 
| 7 | 
            +
              data.tar.gz: 75d7b3cea373adedbe8a6cc7c9f0b47f473ac0e77126779efd2f5ab1899596f4a8a926e1d97b033e5cb6e43b0c0f3706e8e7eb9f280d29354ba18792e2f4078b
         | 
    
        data/CHANGELOG.md
    CHANGED
    
    | @@ -1,3 +1,14 @@ | |
| 1 | 
            +
            ## 0.4.0 (2025-01-05)
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            - Added support for different prediction types
         | 
| 4 | 
            +
            - Added support for `pandas_categorical` to `predict` method
         | 
| 5 | 
            +
            - Added support for hashes and Rover data frames to `predict` method
         | 
| 6 | 
            +
            - Added support for hashes to `Dataset`
         | 
| 7 | 
            +
            - Added `importance_type` option to `dump_model`, `model_to_string`, and `save_model` methods
         | 
| 8 | 
            +
            - Changed `Dataset` to use column names for feature names with Rover and Daru
         | 
| 9 | 
            +
            - Changed `predict` method to match feature names with Daru
         | 
| 10 | 
            +
            - Dropped support for Ruby < 3.1
         | 
| 11 | 
            +
             | 
| 1 12 | 
             
            ## 0.3.4 (2024-07-28)
         | 
| 2 13 |  | 
| 3 14 | 
             
            - Updated LightGBM to 4.5.0
         | 
    
        data/LICENSE.txt
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            The MIT License (MIT)
         | 
| 2 2 |  | 
| 3 3 | 
             
            Copyright (c) Microsoft Corporation
         | 
| 4 | 
            -
            Copyright (c) 2019- | 
| 4 | 
            +
            Copyright (c) 2019-2025 Andrew Kane
         | 
| 5 5 |  | 
| 6 6 | 
             
            Permission is hereby granted, free of charge, to any person obtaining a copy
         | 
| 7 7 | 
             
            of this software and associated documentation files (the "Software"), to deal
         | 
    
        data/lib/lightgbm/booster.rb
    CHANGED
    
    | @@ -1,20 +1,29 @@ | |
| 1 1 | 
             
            module LightGBM
         | 
| 2 2 | 
             
              class Booster
         | 
| 3 | 
            -
                 | 
| 3 | 
            +
                include Utils
         | 
| 4 | 
            +
             | 
| 5 | 
            +
                attr_accessor :best_iteration, :train_data_name, :params
         | 
| 4 6 |  | 
| 5 7 | 
             
                def initialize(params: nil, train_set: nil, model_file: nil, model_str: nil)
         | 
| 6 | 
            -
                  @handle = ::FFI::MemoryPointer.new(:pointer)
         | 
| 7 8 | 
             
                  if model_str
         | 
| 8 9 | 
             
                    model_from_string(model_str)
         | 
| 9 10 | 
             
                  elsif model_file
         | 
| 10 11 | 
             
                    out_num_iterations = ::FFI::MemoryPointer.new(:int)
         | 
| 11 | 
            -
                     | 
| 12 | 
            +
                    create_handle do |handle|
         | 
| 13 | 
            +
                      safe_call FFI.LGBM_BoosterCreateFromModelfile(model_file, out_num_iterations, handle)
         | 
| 14 | 
            +
                    end
         | 
| 15 | 
            +
                    @pandas_categorical = load_pandas_categorical(file_name: model_file)
         | 
| 16 | 
            +
                    if params
         | 
| 17 | 
            +
                      warn "[lightgbm] Ignoring params argument, using parameters from model file."
         | 
| 18 | 
            +
                    end
         | 
| 19 | 
            +
                    @params = loaded_param
         | 
| 12 20 | 
             
                  else
         | 
| 13 21 | 
             
                    params ||= {}
         | 
| 14 22 | 
             
                    set_verbosity(params)
         | 
| 15 | 
            -
                     | 
| 23 | 
            +
                    create_handle do |handle|
         | 
| 24 | 
            +
                      safe_call FFI.LGBM_BoosterCreate(train_set.handle, params_str(params), handle)
         | 
| 25 | 
            +
                    end
         | 
| 16 26 | 
             
                  end
         | 
| 17 | 
            -
                  ObjectSpace.define_finalizer(@handle, self.class.finalize(handle_pointer.to_i))
         | 
| 18 27 |  | 
| 19 28 | 
             
                  self.best_iteration = -1
         | 
| 20 29 |  | 
| @@ -23,28 +32,28 @@ module LightGBM | |
| 23 32 | 
             
                end
         | 
| 24 33 |  | 
| 25 34 | 
             
                def add_valid(data, name)
         | 
| 26 | 
            -
                   | 
| 35 | 
            +
                  safe_call FFI.LGBM_BoosterAddValidData(@handle, data.handle)
         | 
| 27 36 | 
             
                  @name_valid_sets << name
         | 
| 28 37 | 
             
                  self # consistent with Python API
         | 
| 29 38 | 
             
                end
         | 
| 30 39 |  | 
| 31 40 | 
             
                def current_iteration
         | 
| 32 41 | 
             
                  out = ::FFI::MemoryPointer.new(:int)
         | 
| 33 | 
            -
                   | 
| 42 | 
            +
                  safe_call FFI.LGBM_BoosterGetCurrentIteration(@handle, out)
         | 
| 34 43 | 
             
                  out.read_int
         | 
| 35 44 | 
             
                end
         | 
| 36 45 |  | 
| 37 | 
            -
                def dump_model(num_iteration: nil, start_iteration: 0)
         | 
| 46 | 
            +
                def dump_model(num_iteration: nil, start_iteration: 0, importance_type: "split")
         | 
| 38 47 | 
             
                  num_iteration ||= best_iteration
         | 
| 48 | 
            +
                  importance_type_int = feature_importance_type_mapper(importance_type)
         | 
| 39 49 | 
             
                  buffer_len = 1 << 20
         | 
| 40 50 | 
             
                  out_len = ::FFI::MemoryPointer.new(:int64)
         | 
| 41 51 | 
             
                  out_str = ::FFI::MemoryPointer.new(:char, buffer_len)
         | 
| 42 | 
            -
                   | 
| 43 | 
            -
                   | 
| 44 | 
            -
                  actual_len = read_int64(out_len)
         | 
| 52 | 
            +
                  safe_call FFI.LGBM_BoosterDumpModel(@handle, start_iteration, num_iteration, importance_type_int, buffer_len, out_len, out_str)
         | 
| 53 | 
            +
                  actual_len = out_len.read_int64
         | 
| 45 54 | 
             
                  if actual_len > buffer_len
         | 
| 46 55 | 
             
                    out_str = ::FFI::MemoryPointer.new(:char, actual_len)
         | 
| 47 | 
            -
                     | 
| 56 | 
            +
                    safe_call FFI.LGBM_BoosterDumpModel(@handle, start_iteration, num_iteration, importance_type_int, actual_len, out_len, out_str)
         | 
| 48 57 | 
             
                  end
         | 
| 49 58 | 
             
                  out_str.read_string
         | 
| 50 59 | 
             
                end
         | 
| @@ -60,19 +69,10 @@ module LightGBM | |
| 60 69 |  | 
| 61 70 | 
             
                def feature_importance(iteration: nil, importance_type: "split")
         | 
| 62 71 | 
             
                  iteration ||= best_iteration
         | 
| 63 | 
            -
                   | 
| 64 | 
            -
                    case importance_type
         | 
| 65 | 
            -
                    when "split"
         | 
| 66 | 
            -
                      0
         | 
| 67 | 
            -
                    when "gain"
         | 
| 68 | 
            -
                      1
         | 
| 69 | 
            -
                    else
         | 
| 70 | 
            -
                      -1
         | 
| 71 | 
            -
                    end
         | 
| 72 | 
            -
             | 
| 72 | 
            +
                  importance_type_int = feature_importance_type_mapper(importance_type)
         | 
| 73 73 | 
             
                  num_feature = self.num_feature
         | 
| 74 74 | 
             
                  out_result = ::FFI::MemoryPointer.new(:double, num_feature)
         | 
| 75 | 
            -
                   | 
| 75 | 
            +
                  safe_call FFI.LGBM_BoosterFeatureImportance(@handle, iteration, importance_type_int, out_result)
         | 
| 76 76 | 
             
                  out_result.read_array_of_double(num_feature).map(&:to_i)
         | 
| 77 77 | 
             
                end
         | 
| 78 78 |  | 
| @@ -84,13 +84,13 @@ module LightGBM | |
| 84 84 | 
             
                  out_strs = ::FFI::MemoryPointer.new(:pointer, num_feature)
         | 
| 85 85 | 
             
                  str_ptrs = len.times.map { ::FFI::MemoryPointer.new(:char, buffer_len) }
         | 
| 86 86 | 
             
                  out_strs.write_array_of_pointer(str_ptrs)
         | 
| 87 | 
            -
                   | 
| 87 | 
            +
                  safe_call FFI.LGBM_BoosterGetFeatureNames(@handle, len, out_len, buffer_len, out_buffer_len, out_strs)
         | 
| 88 88 |  | 
| 89 89 | 
             
                  actual_len = out_buffer_len.read(:size_t)
         | 
| 90 90 | 
             
                  if actual_len > buffer_len
         | 
| 91 91 | 
             
                    str_ptrs = len.times.map { ::FFI::MemoryPointer.new(:char, actual_len) }
         | 
| 92 92 | 
             
                    out_strs.write_array_of_pointer(str_ptrs)
         | 
| 93 | 
            -
                     | 
| 93 | 
            +
                    safe_call FFI.LGBM_BoosterGetFeatureNames(@handle, len, out_len, actual_len, out_buffer_len, out_strs)
         | 
| 94 94 | 
             
                  end
         | 
| 95 95 |  | 
| 96 96 | 
             
                  str_ptrs[0, out_len.read(:size_t)].map(&:read_string)
         | 
| @@ -98,130 +98,122 @@ module LightGBM | |
| 98 98 |  | 
| 99 99 | 
             
                def model_from_string(model_str)
         | 
| 100 100 | 
             
                  out_num_iterations = ::FFI::MemoryPointer.new(:int)
         | 
| 101 | 
            -
                   | 
| 101 | 
            +
                  create_handle do |handle|
         | 
| 102 | 
            +
                    safe_call FFI.LGBM_BoosterLoadModelFromString(model_str, out_num_iterations, handle)
         | 
| 103 | 
            +
                  end
         | 
| 104 | 
            +
                  @pandas_categorical = load_pandas_categorical(model_str: model_str)
         | 
| 105 | 
            +
                  @params = loaded_param
         | 
| 106 | 
            +
                  @cached_feature_name = nil
         | 
| 102 107 | 
             
                  self
         | 
| 103 108 | 
             
                end
         | 
| 104 109 |  | 
| 105 | 
            -
                def model_to_string(num_iteration: nil, start_iteration: 0)
         | 
| 110 | 
            +
                def model_to_string(num_iteration: nil, start_iteration: 0, importance_type: "split")
         | 
| 106 111 | 
             
                  num_iteration ||= best_iteration
         | 
| 112 | 
            +
                  importance_type_int = feature_importance_type_mapper(importance_type)
         | 
| 107 113 | 
             
                  buffer_len = 1 << 20
         | 
| 108 114 | 
             
                  out_len = ::FFI::MemoryPointer.new(:int64)
         | 
| 109 115 | 
             
                  out_str = ::FFI::MemoryPointer.new(:char, buffer_len)
         | 
| 110 | 
            -
                   | 
| 111 | 
            -
                   | 
| 112 | 
            -
                  actual_len = read_int64(out_len)
         | 
| 116 | 
            +
                  safe_call FFI.LGBM_BoosterSaveModelToString(@handle, start_iteration, num_iteration, importance_type_int, buffer_len, out_len, out_str)
         | 
| 117 | 
            +
                  actual_len = out_len.read_int64
         | 
| 113 118 | 
             
                  if actual_len > buffer_len
         | 
| 114 119 | 
             
                    out_str = ::FFI::MemoryPointer.new(:char, actual_len)
         | 
| 115 | 
            -
                     | 
| 120 | 
            +
                    safe_call FFI.LGBM_BoosterSaveModelToString(@handle, start_iteration, num_iteration, importance_type_int, actual_len, out_len, out_str)
         | 
| 116 121 | 
             
                  end
         | 
| 117 122 | 
             
                  out_str.read_string
         | 
| 118 123 | 
             
                end
         | 
| 119 124 |  | 
| 120 125 | 
             
                def num_feature
         | 
| 121 126 | 
             
                  out = ::FFI::MemoryPointer.new(:int)
         | 
| 122 | 
            -
                   | 
| 127 | 
            +
                  safe_call FFI.LGBM_BoosterGetNumFeature(@handle, out)
         | 
| 123 128 | 
             
                  out.read_int
         | 
| 124 129 | 
             
                end
         | 
| 125 130 | 
             
                alias_method :num_features, :num_feature # legacy typo
         | 
| 126 131 |  | 
| 127 132 | 
             
                def num_model_per_iteration
         | 
| 128 133 | 
             
                  out = ::FFI::MemoryPointer.new(:int)
         | 
| 129 | 
            -
                   | 
| 134 | 
            +
                  safe_call FFI.LGBM_BoosterNumModelPerIteration(@handle, out)
         | 
| 130 135 | 
             
                  out.read_int
         | 
| 131 136 | 
             
                end
         | 
| 132 137 |  | 
| 133 138 | 
             
                def num_trees
         | 
| 134 139 | 
             
                  out = ::FFI::MemoryPointer.new(:int)
         | 
| 135 | 
            -
                   | 
| 140 | 
            +
                  safe_call FFI.LGBM_BoosterNumberOfTotalModel(@handle, out)
         | 
| 136 141 | 
             
                  out.read_int
         | 
| 137 142 | 
             
                end
         | 
| 138 143 |  | 
| 139 | 
            -
                 | 
| 140 | 
            -
             | 
| 141 | 
            -
                   | 
| 142 | 
            -
                    if  | 
| 143 | 
            -
                       | 
| 144 | 
            +
                def predict(data, start_iteration: 0, num_iteration: nil, raw_score: false, pred_leaf: false, pred_contrib: false, **kwargs)
         | 
| 145 | 
            +
                  predictor = InnerPredictor.from_booster(self, kwargs.transform_values(&:dup))
         | 
| 146 | 
            +
                  if num_iteration.nil?
         | 
| 147 | 
            +
                    if start_iteration <= 0
         | 
| 148 | 
            +
                      num_iteration = best_iteration
         | 
| 144 149 | 
             
                    else
         | 
| 145 | 
            -
                       | 
| 150 | 
            +
                      num_iteration = -1
         | 
| 146 151 | 
             
                    end
         | 
| 147 | 
            -
             | 
| 148 | 
            -
                   | 
| 149 | 
            -
             | 
| 150 | 
            -
             | 
| 151 | 
            -
             | 
| 152 | 
            -
             | 
| 153 | 
            -
             | 
| 154 | 
            -
             | 
| 155 | 
            -
                   | 
| 156 | 
            -
                  handle_missing(flat_input)
         | 
| 157 | 
            -
                  data = ::FFI::MemoryPointer.new(:double, input.count * input.first.count)
         | 
| 158 | 
            -
                  data.write_array_of_double(flat_input)
         | 
| 159 | 
            -
             | 
| 160 | 
            -
                  out_len = ::FFI::MemoryPointer.new(:int64)
         | 
| 161 | 
            -
                  out_result = ::FFI::MemoryPointer.new(:double, num_class * input.count)
         | 
| 162 | 
            -
                  check_result FFI.LGBM_BoosterPredictForMat(handle_pointer, data, 1, input.count, input.first.count, 1, 0, start_iteration, num_iteration, params_str(params), out_len, out_result)
         | 
| 163 | 
            -
                  out = out_result.read_array_of_double(read_int64(out_len))
         | 
| 164 | 
            -
                  out = out.each_slice(num_class).to_a if num_class > 1
         | 
| 165 | 
            -
             | 
| 166 | 
            -
                  singular ? out.first : out
         | 
| 152 | 
            +
                  end
         | 
| 153 | 
            +
                  predictor.predict(
         | 
| 154 | 
            +
                    data,
         | 
| 155 | 
            +
                    start_iteration: start_iteration,
         | 
| 156 | 
            +
                    num_iteration: num_iteration,
         | 
| 157 | 
            +
                    raw_score: raw_score,
         | 
| 158 | 
            +
                    pred_leaf: pred_leaf,
         | 
| 159 | 
            +
                    pred_contrib: pred_contrib
         | 
| 160 | 
            +
                  )
         | 
| 167 161 | 
             
                end
         | 
| 168 162 |  | 
| 169 | 
            -
                def save_model(filename, num_iteration: nil, start_iteration: 0)
         | 
| 163 | 
            +
                def save_model(filename, num_iteration: nil, start_iteration: 0, importance_type: "split")
         | 
| 170 164 | 
             
                  num_iteration ||= best_iteration
         | 
| 171 | 
            -
                   | 
| 172 | 
            -
                   | 
| 165 | 
            +
                  importance_type_int = feature_importance_type_mapper(importance_type)
         | 
| 166 | 
            +
                  safe_call FFI.LGBM_BoosterSaveModel(@handle, start_iteration, num_iteration, importance_type_int, filename)
         | 
| 173 167 | 
             
                  self # consistent with Python API
         | 
| 174 168 | 
             
                end
         | 
| 175 169 |  | 
| 176 170 | 
             
                def update
         | 
| 177 171 | 
             
                  finished = ::FFI::MemoryPointer.new(:int)
         | 
| 178 | 
            -
                   | 
| 172 | 
            +
                  safe_call FFI.LGBM_BoosterUpdateOneIter(@handle, finished)
         | 
| 179 173 | 
             
                  finished.read_int == 1
         | 
| 180 174 | 
             
                end
         | 
| 181 175 |  | 
| 182 | 
            -
                def self.finalize(addr)
         | 
| 183 | 
            -
                  # must use proc instead of stabby lambda
         | 
| 184 | 
            -
                  proc { FFI.LGBM_BoosterFree(::FFI::Pointer.new(:pointer, addr)) }
         | 
| 185 | 
            -
                end
         | 
| 186 | 
            -
             | 
| 187 176 | 
             
                private
         | 
| 188 177 |  | 
| 189 | 
            -
                def  | 
| 190 | 
            -
                   | 
| 178 | 
            +
                def create_handle
         | 
| 179 | 
            +
                  ::FFI::MemoryPointer.new(:pointer) do |handle|
         | 
| 180 | 
            +
                    yield handle
         | 
| 181 | 
            +
                    @handle = ::FFI::AutoPointer.new(handle.read_pointer, FFI.method(:LGBM_BoosterFree))
         | 
| 182 | 
            +
                  end
         | 
| 191 183 | 
             
                end
         | 
| 192 184 |  | 
| 193 185 | 
             
                def eval_counts
         | 
| 194 186 | 
             
                  out = ::FFI::MemoryPointer.new(:int)
         | 
| 195 | 
            -
                   | 
| 187 | 
            +
                  safe_call FFI.LGBM_BoosterGetEvalCounts(@handle, out)
         | 
| 196 188 | 
             
                  out.read_int
         | 
| 197 189 | 
             
                end
         | 
| 198 190 |  | 
| 199 191 | 
             
                def eval_names
         | 
| 200 | 
            -
                  eval_counts  | 
| 192 | 
            +
                  eval_counts = self.eval_counts
         | 
| 201 193 | 
             
                  out_len = ::FFI::MemoryPointer.new(:int)
         | 
| 202 194 | 
             
                  out_buffer_len = ::FFI::MemoryPointer.new(:size_t)
         | 
| 203 195 | 
             
                  out_strs = ::FFI::MemoryPointer.new(:pointer, eval_counts)
         | 
| 204 196 | 
             
                  buffer_len = 255
         | 
| 205 197 | 
             
                  str_ptrs = eval_counts.times.map { ::FFI::MemoryPointer.new(:char, buffer_len) }
         | 
| 206 198 | 
             
                  out_strs.write_array_of_pointer(str_ptrs)
         | 
| 207 | 
            -
                   | 
| 199 | 
            +
                  safe_call FFI.LGBM_BoosterGetEvalNames(@handle, eval_counts, out_len, buffer_len, out_buffer_len, out_strs)
         | 
| 208 200 |  | 
| 209 201 | 
             
                  actual_len = out_buffer_len.read(:size_t)
         | 
| 210 202 | 
             
                  if actual_len > buffer_len
         | 
| 211 203 | 
             
                    str_ptrs = eval_counts.times.map { ::FFI::MemoryPointer.new(:char, actual_len) }
         | 
| 212 204 | 
             
                    out_strs.write_array_of_pointer(str_ptrs)
         | 
| 213 | 
            -
                     | 
| 205 | 
            +
                    safe_call FFI.LGBM_BoosterGetEvalNames(@handle, eval_counts, out_len, actual_len, out_buffer_len, out_strs)
         | 
| 214 206 | 
             
                  end
         | 
| 215 207 |  | 
| 216 208 | 
             
                  str_ptrs.map(&:read_string)
         | 
| 217 209 | 
             
                end
         | 
| 218 210 |  | 
| 219 211 | 
             
                def inner_eval(name, i)
         | 
| 220 | 
            -
                  eval_names  | 
| 212 | 
            +
                  eval_names = self.eval_names
         | 
| 221 213 |  | 
| 222 214 | 
             
                  out_len = ::FFI::MemoryPointer.new(:int)
         | 
| 223 215 | 
             
                  out_results = ::FFI::MemoryPointer.new(:double, eval_names.count)
         | 
| 224 | 
            -
                   | 
| 216 | 
            +
                  safe_call FFI.LGBM_BoosterGetEval(@handle, i, out_len, out_results)
         | 
| 225 217 | 
             
                  vals = out_results.read_array_of_double(out_len.read_int)
         | 
| 226 218 |  | 
| 227 219 | 
             
                  eval_names.zip(vals).map do |eval_name, val|
         | 
| @@ -232,15 +224,66 @@ module LightGBM | |
| 232 224 |  | 
| 233 225 | 
             
                def num_class
         | 
| 234 226 | 
             
                  out = ::FFI::MemoryPointer.new(:int)
         | 
| 235 | 
            -
                   | 
| 227 | 
            +
                  safe_call FFI.LGBM_BoosterGetNumClasses(@handle, out)
         | 
| 236 228 | 
             
                  out.read_int
         | 
| 237 229 | 
             
                end
         | 
| 238 230 |  | 
| 239 | 
            -
                 | 
| 240 | 
            -
             | 
| 241 | 
            -
                  ptr.read_array_of_int64(1).first
         | 
| 231 | 
            +
                def cached_feature_name
         | 
| 232 | 
            +
                  @cached_feature_name ||= feature_name
         | 
| 242 233 | 
             
                end
         | 
| 243 234 |  | 
| 244 | 
            -
                 | 
| 235 | 
            +
                def feature_importance_type_mapper(importance_type)
         | 
| 236 | 
            +
                  case importance_type
         | 
| 237 | 
            +
                  when "split"
         | 
| 238 | 
            +
                    FFI::C_API_FEATURE_IMPORTANCE_SPLIT
         | 
| 239 | 
            +
                  when "gain"
         | 
| 240 | 
            +
                    FFI::C_API_FEATURE_IMPORTANCE_GAIN
         | 
| 241 | 
            +
                  else
         | 
| 242 | 
            +
                    -1
         | 
| 243 | 
            +
                  end
         | 
| 244 | 
            +
                end
         | 
| 245 | 
            +
             | 
| 246 | 
            +
                def load_pandas_categorical(file_name: nil, model_str: nil)
         | 
| 247 | 
            +
                  pandas_key = "pandas_categorical:"
         | 
| 248 | 
            +
                  offset = -pandas_key.length
         | 
| 249 | 
            +
                  if !file_name.nil?
         | 
| 250 | 
            +
                    max_offset = -File.size(file_name)
         | 
| 251 | 
            +
                    lines = []
         | 
| 252 | 
            +
                    File.open(file_name, "rb") do |f|
         | 
| 253 | 
            +
                      loop do
         | 
| 254 | 
            +
                        offset = [offset, max_offset].max
         | 
| 255 | 
            +
                        f.seek(offset, IO::SEEK_END)
         | 
| 256 | 
            +
                        lines = f.readlines
         | 
| 257 | 
            +
                        if lines.length >= 2 || offset == max_offset
         | 
| 258 | 
            +
                          break
         | 
| 259 | 
            +
                        end
         | 
| 260 | 
            +
                        offset *= 2
         | 
| 261 | 
            +
                      end
         | 
| 262 | 
            +
                    end
         | 
| 263 | 
            +
                    last_line = lines[-1].strip
         | 
| 264 | 
            +
                    if !last_line.start_with?(pandas_key)
         | 
| 265 | 
            +
                      last_line = lines[-2].strip
         | 
| 266 | 
            +
                    end
         | 
| 267 | 
            +
                  elsif !model_str.nil?
         | 
| 268 | 
            +
                    idx = model_str[..offset].rindex("\n")
         | 
| 269 | 
            +
                    last_line = model_str[idx..].strip
         | 
| 270 | 
            +
                  end
         | 
| 271 | 
            +
                  if last_line.start_with?(pandas_key)
         | 
| 272 | 
            +
                    JSON.parse(last_line[pandas_key.length..])
         | 
| 273 | 
            +
                  end
         | 
| 274 | 
            +
                end
         | 
| 275 | 
            +
             | 
| 276 | 
            +
                def loaded_param
         | 
| 277 | 
            +
                  buffer_len = 1 << 20
         | 
| 278 | 
            +
                  out_len = ::FFI::MemoryPointer.new(:int64)
         | 
| 279 | 
            +
                  out_str = ::FFI::MemoryPointer.new(:char, buffer_len)
         | 
| 280 | 
            +
                  safe_call FFI.LGBM_BoosterGetLoadedParam(@handle, buffer_len, out_len, out_str)
         | 
| 281 | 
            +
                  actual_len = out_len.read_int64
         | 
| 282 | 
            +
                  if actual_len > buffer_len
         | 
| 283 | 
            +
                    out_str = ::FFI::MemoryPointer.new(:char, actual_len)
         | 
| 284 | 
            +
                    safe_call FFI.LGBM_BoosterGetLoadedParam(@handle, actual_len, out_len, out_str)
         | 
| 285 | 
            +
                  end
         | 
| 286 | 
            +
                  JSON.parse(out_str.read_string)
         | 
| 287 | 
            +
                end
         | 
| 245 288 | 
             
              end
         | 
| 246 289 | 
             
            end
         | 
    
        data/lib/lightgbm/dataset.rb
    CHANGED
    
    | @@ -1,8 +1,10 @@ | |
| 1 1 | 
             
            module LightGBM
         | 
| 2 2 | 
             
              class Dataset
         | 
| 3 | 
            +
                include Utils
         | 
| 4 | 
            +
             | 
| 3 5 | 
             
                attr_reader :data, :params
         | 
| 4 6 |  | 
| 5 | 
            -
                def initialize(data, label: nil, weight: nil, group: nil, params: nil, reference: nil, used_indices: nil, categorical_feature: "auto", feature_names: nil)
         | 
| 7 | 
            +
                def initialize(data, label: nil, weight: nil, group: nil, params: nil, reference: nil, used_indices: nil, categorical_feature: "auto", feature_name: nil, feature_names: nil)
         | 
| 6 8 | 
             
                  @data = data
         | 
| 7 9 | 
             
                  @label = label
         | 
| 8 10 | 
             
                  @weight = weight
         | 
| @@ -11,7 +13,7 @@ module LightGBM | |
| 11 13 | 
             
                  @reference = reference
         | 
| 12 14 | 
             
                  @used_indices = used_indices
         | 
| 13 15 | 
             
                  @categorical_feature = categorical_feature
         | 
| 14 | 
            -
                  @ | 
| 16 | 
            +
                  @feature_name = feature_name || feature_names || "auto"
         | 
| 15 17 |  | 
| 16 18 | 
             
                  construct
         | 
| 17 19 | 
             
                end
         | 
| @@ -24,7 +26,7 @@ module LightGBM | |
| 24 26 | 
             
                  field("weight")
         | 
| 25 27 | 
             
                end
         | 
| 26 28 |  | 
| 27 | 
            -
                def  | 
| 29 | 
            +
                def feature_name
         | 
| 28 30 | 
             
                  # must preallocate space
         | 
| 29 31 | 
             
                  num_feature_names = ::FFI::MemoryPointer.new(:int)
         | 
| 30 32 | 
             
                  out_buffer_len = ::FFI::MemoryPointer.new(:size_t)
         | 
| @@ -33,7 +35,7 @@ module LightGBM | |
| 33 35 | 
             
                  buffer_len = 255
         | 
| 34 36 | 
             
                  str_ptrs = len.times.map { ::FFI::MemoryPointer.new(:char, buffer_len) }
         | 
| 35 37 | 
             
                  out_strs.write_array_of_pointer(str_ptrs)
         | 
| 36 | 
            -
                   | 
| 38 | 
            +
                  safe_call FFI.LGBM_DatasetGetFeatureNames(@handle, len, num_feature_names, buffer_len, out_buffer_len, out_strs)
         | 
| 37 39 |  | 
| 38 40 | 
             
                  num_features = num_feature_names.read_int
         | 
| 39 41 | 
             
                  actual_len = out_buffer_len.read(:size_t)
         | 
| @@ -41,13 +43,14 @@ module LightGBM | |
| 41 43 | 
             
                    out_strs = ::FFI::MemoryPointer.new(:pointer, num_features) if num_features > len
         | 
| 42 44 | 
             
                    str_ptrs = num_features.times.map { ::FFI::MemoryPointer.new(:char, actual_len) }
         | 
| 43 45 | 
             
                    out_strs.write_array_of_pointer(str_ptrs)
         | 
| 44 | 
            -
                     | 
| 46 | 
            +
                    safe_call FFI.LGBM_DatasetGetFeatureNames(@handle, num_features, num_feature_names, actual_len, out_buffer_len, out_strs)
         | 
| 45 47 | 
             
                  end
         | 
| 46 48 |  | 
| 47 49 | 
             
                  # should be the same, but get number of features
         | 
| 48 50 | 
             
                  # from most recent call (instead of num_features)
         | 
| 49 51 | 
             
                  str_ptrs[0, num_feature_names.read_int].map(&:read_string)
         | 
| 50 52 | 
             
                end
         | 
| 53 | 
            +
                alias_method :feature_names, :feature_name
         | 
| 51 54 |  | 
| 52 55 | 
             
                def label=(label)
         | 
| 53 56 | 
             
                  @label = label
         | 
| @@ -64,12 +67,16 @@ module LightGBM | |
| 64 67 | 
             
                  set_field("group", group, type: :int32)
         | 
| 65 68 | 
             
                end
         | 
| 66 69 |  | 
| 67 | 
            -
                def  | 
| 70 | 
            +
                def feature_name=(feature_names)
         | 
| 71 | 
            +
                  feature_names = feature_names.map(&:to_s)
         | 
| 68 72 | 
             
                  @feature_names = feature_names
         | 
| 69 73 | 
             
                  c_feature_names = ::FFI::MemoryPointer.new(:pointer, feature_names.size)
         | 
| 70 | 
            -
                   | 
| 71 | 
            -
                   | 
| 74 | 
            +
                  # keep reference to string pointers
         | 
| 75 | 
            +
                  str_ptrs = feature_names.map { |v| ::FFI::MemoryPointer.from_string(v) }
         | 
| 76 | 
            +
                  c_feature_names.write_array_of_pointer(str_ptrs)
         | 
| 77 | 
            +
                  safe_call FFI.LGBM_DatasetSetFeatureNames(@handle, c_feature_names, feature_names.size)
         | 
| 72 78 | 
             
                end
         | 
| 79 | 
            +
                alias_method :feature_names=, :feature_name=
         | 
| 73 80 |  | 
| 74 81 | 
             
                # TODO only update reference if not in chain
         | 
| 75 82 | 
             
                def reference=(reference)
         | 
| @@ -81,18 +88,18 @@ module LightGBM | |
| 81 88 |  | 
| 82 89 | 
             
                def num_data
         | 
| 83 90 | 
             
                  out = ::FFI::MemoryPointer.new(:int)
         | 
| 84 | 
            -
                   | 
| 91 | 
            +
                  safe_call FFI.LGBM_DatasetGetNumData(@handle, out)
         | 
| 85 92 | 
             
                  out.read_int
         | 
| 86 93 | 
             
                end
         | 
| 87 94 |  | 
| 88 95 | 
             
                def num_feature
         | 
| 89 96 | 
             
                  out = ::FFI::MemoryPointer.new(:int)
         | 
| 90 | 
            -
                   | 
| 97 | 
            +
                  safe_call FFI.LGBM_DatasetGetNumFeature(@handle, out)
         | 
| 91 98 | 
             
                  out.read_int
         | 
| 92 99 | 
             
                end
         | 
| 93 100 |  | 
| 94 101 | 
             
                def save_binary(filename)
         | 
| 95 | 
            -
                   | 
| 102 | 
            +
                  safe_call FFI.LGBM_DatasetSaveBinary(@handle, filename)
         | 
| 96 103 | 
             
                end
         | 
| 97 104 |  | 
| 98 105 | 
             
                def subset(used_indices, params: nil)
         | 
| @@ -105,13 +112,8 @@ module LightGBM | |
| 105 112 | 
             
                  )
         | 
| 106 113 | 
             
                end
         | 
| 107 114 |  | 
| 108 | 
            -
                def  | 
| 109 | 
            -
                  @handle | 
| 110 | 
            -
                end
         | 
| 111 | 
            -
             | 
| 112 | 
            -
                def self.finalize(addr)
         | 
| 113 | 
            -
                  # must use proc instead of stabby lambda
         | 
| 114 | 
            -
                  proc { FFI.LGBM_DatasetFree(::FFI::Pointer.new(:pointer, addr)) }
         | 
| 115 | 
            +
                def handle
         | 
| 116 | 
            +
                  @handle
         | 
| 115 117 | 
             
                end
         | 
| 116 118 |  | 
| 117 119 | 
             
                private
         | 
| @@ -127,27 +129,45 @@ module LightGBM | |
| 127 129 | 
             
                  end
         | 
| 128 130 | 
             
                  set_verbosity(params)
         | 
| 129 131 |  | 
| 130 | 
            -
                   | 
| 132 | 
            +
                  handle = ::FFI::MemoryPointer.new(:pointer)
         | 
| 131 133 | 
             
                  parameters = params_str(params)
         | 
| 132 | 
            -
                  reference = @reference. | 
| 134 | 
            +
                  reference = @reference.handle if @reference
         | 
| 133 135 | 
             
                  if used_indices
         | 
| 134 136 | 
             
                    used_row_indices = ::FFI::MemoryPointer.new(:int32, used_indices.count)
         | 
| 135 137 | 
             
                    used_row_indices.write_array_of_int32(used_indices)
         | 
| 136 | 
            -
                     | 
| 138 | 
            +
                    safe_call FFI.LGBM_DatasetGetSubset(reference, used_row_indices, used_indices.count, parameters, handle)
         | 
| 137 139 | 
             
                  elsif data.is_a?(String)
         | 
| 138 | 
            -
                     | 
| 140 | 
            +
                    safe_call FFI.LGBM_DatasetCreateFromFile(data, parameters, reference, handle)
         | 
| 139 141 | 
             
                  else
         | 
| 140 142 | 
             
                    if matrix?(data)
         | 
| 141 143 | 
             
                      nrow = data.row_count
         | 
| 142 144 | 
             
                      ncol = data.column_count
         | 
| 143 145 | 
             
                      flat_data = data.to_a.flatten
         | 
| 144 146 | 
             
                    elsif daru?(data)
         | 
| 147 | 
            +
                      if @feature_name == "auto"
         | 
| 148 | 
            +
                        @feature_name = data.vectors.to_a
         | 
| 149 | 
            +
                      end
         | 
| 145 150 | 
             
                      nrow, ncol = data.shape
         | 
| 146 151 | 
             
                      flat_data = data.map_rows(&:to_a).flatten
         | 
| 147 | 
            -
                    elsif numo?(data) | 
| 148 | 
            -
                       | 
| 152 | 
            +
                    elsif numo?(data)
         | 
| 153 | 
            +
                      nrow, ncol = data.shape
         | 
| 154 | 
            +
                    elsif rover?(data)
         | 
| 155 | 
            +
                      if @feature_name == "auto"
         | 
| 156 | 
            +
                        @feature_name = data.keys
         | 
| 157 | 
            +
                      end
         | 
| 158 | 
            +
                      data = data.to_numo
         | 
| 149 159 | 
             
                      nrow, ncol = data.shape
         | 
| 160 | 
            +
                    elsif data.is_a?(Array) && data.first.is_a?(Hash)
         | 
| 161 | 
            +
                      keys = data.first.keys
         | 
| 162 | 
            +
                      if @feature_name == "auto"
         | 
| 163 | 
            +
                        @feature_name = keys
         | 
| 164 | 
            +
                      end
         | 
| 165 | 
            +
                      nrow = data.count
         | 
| 166 | 
            +
                      ncol = data.first.count
         | 
| 167 | 
            +
                      flat_data = data.flat_map { |v| v.fetch_values(*keys) }
         | 
| 150 168 | 
             
                    else
         | 
| 169 | 
            +
                      data = data.to_a
         | 
| 170 | 
            +
                      check_2d_array(data)
         | 
| 151 171 | 
             
                      nrow = data.count
         | 
| 152 172 | 
             
                      ncol = data.first.count
         | 
| 153 173 | 
             
                      flat_data = data.flatten
         | 
| @@ -161,18 +181,22 @@ module LightGBM | |
| 161 181 | 
             
                      c_data.write_array_of_double(flat_data)
         | 
| 162 182 | 
             
                    end
         | 
| 163 183 |  | 
| 164 | 
            -
                     | 
| 184 | 
            +
                    safe_call FFI.LGBM_DatasetCreateFromMat(c_data, FFI::C_API_DTYPE_FLOAT64, nrow, ncol, 1, parameters, reference, handle)
         | 
| 185 | 
            +
                  end
         | 
| 186 | 
            +
                  if used_indices
         | 
| 187 | 
            +
                    @handle = handle.read_pointer
         | 
| 188 | 
            +
                  else
         | 
| 189 | 
            +
                    @handle = ::FFI::AutoPointer.new(handle.read_pointer, FFI.method(:LGBM_DatasetFree))
         | 
| 165 190 | 
             
                  end
         | 
| 166 | 
            -
                  ObjectSpace.define_finalizer(@handle, self.class.finalize(handle_pointer.to_i)) unless used_indices
         | 
| 167 191 |  | 
| 168 192 | 
             
                  self.label = @label if @label
         | 
| 169 193 | 
             
                  self.weight = @weight if @weight
         | 
| 170 194 | 
             
                  self.group = @group if @group
         | 
| 171 | 
            -
                  self. | 
| 195 | 
            +
                  self.feature_name = @feature_name if @feature_name && @feature_name != "auto"
         | 
| 172 196 | 
             
                end
         | 
| 173 197 |  | 
| 174 198 | 
             
                def dump_text(filename)
         | 
| 175 | 
            -
                   | 
| 199 | 
            +
                  safe_call FFI.LGBM_DatasetDumpText(@handle, filename)
         | 
| 176 200 | 
             
                end
         | 
| 177 201 |  | 
| 178 202 | 
             
                def field(field_name)
         | 
| @@ -180,7 +204,7 @@ module LightGBM | |
| 180 204 | 
             
                  out_len = ::FFI::MemoryPointer.new(:int)
         | 
| 181 205 | 
             
                  out_ptr = ::FFI::MemoryPointer.new(:float, num_data)
         | 
| 182 206 | 
             
                  out_type = ::FFI::MemoryPointer.new(:int)
         | 
| 183 | 
            -
                   | 
| 207 | 
            +
                  safe_call FFI.LGBM_DatasetGetField(@handle, field_name, out_len, out_ptr, out_type)
         | 
| 184 208 | 
             
                  out_ptr.read_pointer.read_array_of_float(num_data)
         | 
| 185 209 | 
             
                end
         | 
| 186 210 |  | 
| @@ -189,14 +213,12 @@ module LightGBM | |
| 189 213 | 
             
                  if type == :int32
         | 
| 190 214 | 
             
                    c_data = ::FFI::MemoryPointer.new(:int32, data.count)
         | 
| 191 215 | 
             
                    c_data.write_array_of_int32(data)
         | 
| 192 | 
            -
                     | 
| 216 | 
            +
                    safe_call FFI.LGBM_DatasetSetField(@handle, field_name, c_data, data.count, 2)
         | 
| 193 217 | 
             
                  else
         | 
| 194 218 | 
             
                    c_data = ::FFI::MemoryPointer.new(:float, data.count)
         | 
| 195 219 | 
             
                    c_data.write_array_of_float(data)
         | 
| 196 | 
            -
                     | 
| 220 | 
            +
                    safe_call FFI.LGBM_DatasetSetField(@handle, field_name, c_data, data.count, 0)
         | 
| 197 221 | 
             
                  end
         | 
| 198 222 | 
             
                end
         | 
| 199 | 
            -
             | 
| 200 | 
            -
                include Utils
         | 
| 201 223 | 
             
              end
         | 
| 202 224 | 
             
            end
         | 
    
        data/lib/lightgbm/ffi.rb
    CHANGED
    
    | @@ -15,6 +15,19 @@ module LightGBM | |
| 15 15 | 
             
                # https://github.com/microsoft/LightGBM/blob/master/include/LightGBM/c_api.h
         | 
| 16 16 | 
             
                # keep same order
         | 
| 17 17 |  | 
| 18 | 
            +
                C_API_DTYPE_FLOAT32 = 0
         | 
| 19 | 
            +
                C_API_DTYPE_FLOAT64 = 1
         | 
| 20 | 
            +
                C_API_DTYPE_INT32 = 2
         | 
| 21 | 
            +
                C_API_DTYPE_INT64 = 3
         | 
| 22 | 
            +
             | 
| 23 | 
            +
                C_API_PREDICT_NORMAL = 0
         | 
| 24 | 
            +
                C_API_PREDICT_RAW_SCORE = 1
         | 
| 25 | 
            +
                C_API_PREDICT_LEAF_INDEX = 2
         | 
| 26 | 
            +
                C_API_PREDICT_CONTRIB = 3
         | 
| 27 | 
            +
             | 
| 28 | 
            +
                C_API_FEATURE_IMPORTANCE_SPLIT = 0
         | 
| 29 | 
            +
                C_API_FEATURE_IMPORTANCE_GAIN = 1
         | 
| 30 | 
            +
             | 
| 18 31 | 
             
                # error
         | 
| 19 32 | 
             
                attach_function :LGBM_GetLastError, %i[], :string
         | 
| 20 33 |  | 
| @@ -36,6 +49,7 @@ module LightGBM | |
| 36 49 | 
             
                attach_function :LGBM_BoosterCreate, %i[pointer string pointer], :int
         | 
| 37 50 | 
             
                attach_function :LGBM_BoosterCreateFromModelfile, %i[string pointer pointer], :int
         | 
| 38 51 | 
             
                attach_function :LGBM_BoosterLoadModelFromString, %i[string pointer pointer], :int
         | 
| 52 | 
            +
                attach_function :LGBM_BoosterGetLoadedParam, %i[pointer int64 pointer pointer], :int
         | 
| 39 53 | 
             
                attach_function :LGBM_BoosterFree, %i[pointer], :int
         | 
| 40 54 | 
             
                attach_function :LGBM_BoosterAddValidData, %i[pointer pointer], :int
         | 
| 41 55 | 
             
                attach_function :LGBM_BoosterGetNumClasses, %i[pointer pointer], :int
         | 
| @@ -48,6 +62,7 @@ module LightGBM | |
| 48 62 | 
             
                attach_function :LGBM_BoosterGetFeatureNames, %i[pointer int pointer size_t pointer pointer], :int
         | 
| 49 63 | 
             
                attach_function :LGBM_BoosterGetNumFeature, %i[pointer pointer], :int
         | 
| 50 64 | 
             
                attach_function :LGBM_BoosterGetEval, %i[pointer int pointer pointer], :int
         | 
| 65 | 
            +
                attach_function :LGBM_BoosterCalcNumPredict, %i[pointer int int int int pointer], :int
         | 
| 51 66 | 
             
                attach_function :LGBM_BoosterPredictForMat, %i[pointer pointer int int32 int32 int int int int string pointer pointer], :int
         | 
| 52 67 | 
             
                attach_function :LGBM_BoosterSaveModel, %i[pointer int int int string], :int
         | 
| 53 68 | 
             
                attach_function :LGBM_BoosterSaveModelToString, %i[pointer int int int int64 pointer pointer], :int
         | 
| @@ -0,0 +1,159 @@ | |
| 1 | 
            +
            module LightGBM
         | 
| 2 | 
            +
              class InnerPredictor
         | 
| 3 | 
            +
                include Utils
         | 
| 4 | 
            +
             | 
| 5 | 
            +
                MAX_INT32 = (1 << 31) - 1
         | 
| 6 | 
            +
             | 
| 7 | 
            +
                def initialize(booster, pred_parameter)
         | 
| 8 | 
            +
                  @handle = booster.instance_variable_get(:@handle)
         | 
| 9 | 
            +
                  @pandas_categorical = booster.instance_variable_get(:@pandas_categorical)
         | 
| 10 | 
            +
                  @pred_parameter = params_str(pred_parameter)
         | 
| 11 | 
            +
             | 
| 12 | 
            +
                  # keep booster for cached_feature_name
         | 
| 13 | 
            +
                  @booster = booster
         | 
| 14 | 
            +
                end
         | 
| 15 | 
            +
             | 
| 16 | 
            +
                def self.from_booster(booster, pred_parameter)
         | 
| 17 | 
            +
                  new(booster, pred_parameter)
         | 
| 18 | 
            +
                end
         | 
| 19 | 
            +
             | 
| 20 | 
            +
                def predict(data, start_iteration: 0, num_iteration: -1, raw_score: false, pred_leaf: false, pred_contrib: false)
         | 
| 21 | 
            +
                  if data.is_a?(Dataset)
         | 
| 22 | 
            +
                    raise TypeError, "Cannot use Dataset instance for prediction, please use raw data instead"
         | 
| 23 | 
            +
                  end
         | 
| 24 | 
            +
             | 
| 25 | 
            +
                  predict_type = FFI::C_API_PREDICT_NORMAL
         | 
| 26 | 
            +
                  if raw_score
         | 
| 27 | 
            +
                    predict_type = FFI::C_API_PREDICT_RAW_SCORE
         | 
| 28 | 
            +
                  end
         | 
| 29 | 
            +
                  if pred_leaf
         | 
| 30 | 
            +
                    predict_type = FFI::C_API_PREDICT_LEAF_INDEX
         | 
| 31 | 
            +
                  end
         | 
| 32 | 
            +
                  if pred_contrib
         | 
| 33 | 
            +
                    predict_type = FFI::C_API_PREDICT_CONTRIB
         | 
| 34 | 
            +
                  end
         | 
| 35 | 
            +
             | 
| 36 | 
            +
                  if daru?(data)
         | 
| 37 | 
            +
                    data = data[*cached_feature_name].map_rows(&:to_a)
         | 
| 38 | 
            +
                    singular = false
         | 
| 39 | 
            +
                  elsif data.is_a?(Hash) # sort feature.values to match the order of model.feature_name
         | 
| 40 | 
            +
                    data = [sorted_feature_values(data)]
         | 
| 41 | 
            +
                    singular = true
         | 
| 42 | 
            +
                  elsif data.is_a?(Array) && data.first.is_a?(Hash) # on multiple elems, if 1st is hash, assume they all are
         | 
| 43 | 
            +
                    data = data.map(&method(:sorted_feature_values))
         | 
| 44 | 
            +
                    singular = false
         | 
| 45 | 
            +
                  elsif rover?(data)
         | 
| 46 | 
            +
                    # TODO improve performance
         | 
| 47 | 
            +
                    data = data[cached_feature_name].to_numo.to_a
         | 
| 48 | 
            +
                    singular = false
         | 
| 49 | 
            +
                  else
         | 
| 50 | 
            +
                    data = data.to_a
         | 
| 51 | 
            +
                    singular = !data.first.is_a?(Array)
         | 
| 52 | 
            +
                    data = [data] if singular
         | 
| 53 | 
            +
                    check_2d_array(data)
         | 
| 54 | 
            +
                    data = data.map(&:dup) if @pandas_categorical&.any?
         | 
| 55 | 
            +
                  end
         | 
| 56 | 
            +
             | 
| 57 | 
            +
                  if @pandas_categorical&.any?
         | 
| 58 | 
            +
                    apply_pandas_categorical(
         | 
| 59 | 
            +
                      data,
         | 
| 60 | 
            +
                      @booster.params["categorical_feature"],
         | 
| 61 | 
            +
                      @pandas_categorical
         | 
| 62 | 
            +
                    )
         | 
| 63 | 
            +
                  end
         | 
| 64 | 
            +
             | 
| 65 | 
            +
                  preds, nrow =
         | 
| 66 | 
            +
                    pred_for_array(
         | 
| 67 | 
            +
                      data,
         | 
| 68 | 
            +
                      start_iteration,
         | 
| 69 | 
            +
                      num_iteration,
         | 
| 70 | 
            +
                      predict_type
         | 
| 71 | 
            +
                    )
         | 
| 72 | 
            +
             | 
| 73 | 
            +
                  if pred_leaf
         | 
| 74 | 
            +
                    preds = preds.map(&:to_i)
         | 
| 75 | 
            +
                  end
         | 
| 76 | 
            +
             | 
| 77 | 
            +
                  if preds.size != nrow
         | 
| 78 | 
            +
                    if preds.size % nrow == 0
         | 
| 79 | 
            +
                      preds = preds.each_slice(preds.size / nrow).to_a
         | 
| 80 | 
            +
                    else
         | 
| 81 | 
            +
                      raise Error, "Length of predict result (#{preds.size}) cannot be divide nrow (#{nrow})"
         | 
| 82 | 
            +
                    end
         | 
| 83 | 
            +
                  end
         | 
| 84 | 
            +
             | 
| 85 | 
            +
                  singular ? preds.first : preds
         | 
| 86 | 
            +
                end
         | 
| 87 | 
            +
             | 
| 88 | 
            +
                private
         | 
| 89 | 
            +
             | 
| 90 | 
            +
                def pred_for_array(input, start_iteration, num_iteration, predict_type)
         | 
| 91 | 
            +
                  nrow = input.count
         | 
| 92 | 
            +
                  if nrow > MAX_INT32
         | 
| 93 | 
            +
                    raise Error, "Not supported"
         | 
| 94 | 
            +
                  end
         | 
| 95 | 
            +
                  inner_predict_array(
         | 
| 96 | 
            +
                    input,
         | 
| 97 | 
            +
                    start_iteration,
         | 
| 98 | 
            +
                    num_iteration,
         | 
| 99 | 
            +
                    predict_type
         | 
| 100 | 
            +
                  )
         | 
| 101 | 
            +
                end
         | 
| 102 | 
            +
             | 
| 103 | 
            +
                def inner_predict_array(input, start_iteration, num_iteration, predict_type)
         | 
| 104 | 
            +
                  n_preds =
         | 
| 105 | 
            +
                    num_preds(
         | 
| 106 | 
            +
                      start_iteration,
         | 
| 107 | 
            +
                      num_iteration,
         | 
| 108 | 
            +
                      input.count,
         | 
| 109 | 
            +
                      predict_type
         | 
| 110 | 
            +
                    )
         | 
| 111 | 
            +
             | 
| 112 | 
            +
                  flat_input = input.flatten
         | 
| 113 | 
            +
                  handle_missing(flat_input)
         | 
| 114 | 
            +
                  data = ::FFI::MemoryPointer.new(:double, input.count * input.first.count)
         | 
| 115 | 
            +
                  data.write_array_of_double(flat_input)
         | 
| 116 | 
            +
             | 
| 117 | 
            +
                  out_num_preds = ::FFI::MemoryPointer.new(:int64)
         | 
| 118 | 
            +
                  out_result = ::FFI::MemoryPointer.new(:double, n_preds)
         | 
| 119 | 
            +
                  safe_call FFI.LGBM_BoosterPredictForMat(@handle, data, FFI::C_API_DTYPE_FLOAT64, input.count, input.first.count, 1, predict_type, start_iteration, num_iteration, @pred_parameter, out_num_preds, out_result)
         | 
| 120 | 
            +
                  if n_preds != out_num_preds.read_int64
         | 
| 121 | 
            +
                    raise Error, "Wrong length for predict results"
         | 
| 122 | 
            +
                  end
         | 
| 123 | 
            +
                  preds = out_result.read_array_of_double(out_num_preds.read_int64)
         | 
| 124 | 
            +
                  [preds, input.count]
         | 
| 125 | 
            +
                end
         | 
| 126 | 
            +
             | 
| 127 | 
            +
                def num_preds(start_iteration, num_iteration, nrow, predict_type)
         | 
| 128 | 
            +
                  out = ::FFI::MemoryPointer.new(:int64)
         | 
| 129 | 
            +
                  safe_call FFI.LGBM_BoosterCalcNumPredict(@handle, nrow, predict_type, start_iteration, num_iteration, out)
         | 
| 130 | 
            +
                  out.read_int64
         | 
| 131 | 
            +
                end
         | 
| 132 | 
            +
             | 
| 133 | 
            +
                def sorted_feature_values(input_hash)
         | 
| 134 | 
            +
                  input_hash.transform_keys(&:to_s).fetch_values(*cached_feature_name)
         | 
| 135 | 
            +
                end
         | 
| 136 | 
            +
             | 
| 137 | 
            +
                def cached_feature_name
         | 
| 138 | 
            +
                  @booster.send(:cached_feature_name)
         | 
| 139 | 
            +
                end
         | 
| 140 | 
            +
             | 
| 141 | 
            +
                def apply_pandas_categorical(data, categorical_feature, pandas_categorical)
         | 
| 142 | 
            +
                  (categorical_feature || []).each_with_index do |cf, i|
         | 
| 143 | 
            +
                    cat_codes = pandas_categorical[i].map.with_index.to_h
         | 
| 144 | 
            +
                    data.each do |r|
         | 
| 145 | 
            +
                      cat = r[cf]
         | 
| 146 | 
            +
                      unless cat.nil?
         | 
| 147 | 
            +
                        r[cf] =
         | 
| 148 | 
            +
                          cat_codes.fetch(cat) do
         | 
| 149 | 
            +
                            unless cat.is_a?(String)
         | 
| 150 | 
            +
                              raise ArgumentError, "expected categorical value"
         | 
| 151 | 
            +
                            end
         | 
| 152 | 
            +
                            nil
         | 
| 153 | 
            +
                          end
         | 
| 154 | 
            +
                      end
         | 
| 155 | 
            +
                    end
         | 
| 156 | 
            +
                  end
         | 
| 157 | 
            +
                end
         | 
| 158 | 
            +
              end
         | 
| 159 | 
            +
            end
         | 
    
        data/lib/lightgbm/model.rb
    CHANGED
    
    
    
        data/lib/lightgbm/utils.rb
    CHANGED
    
    | @@ -2,8 +2,8 @@ module LightGBM | |
| 2 2 | 
             
              module Utils
         | 
| 3 3 | 
             
                private
         | 
| 4 4 |  | 
| 5 | 
            -
                def  | 
| 6 | 
            -
                  raise  | 
| 5 | 
            +
                def safe_call(err)
         | 
| 6 | 
            +
                  raise Error, FFI.LGBM_GetLastError if err != 0
         | 
| 7 7 | 
             
                end
         | 
| 8 8 |  | 
| 9 9 | 
             
                # remove spaces in keys and values to prevent injection
         | 
| @@ -24,6 +24,13 @@ module LightGBM | |
| 24 24 | 
             
                  end
         | 
| 25 25 | 
             
                end
         | 
| 26 26 |  | 
| 27 | 
            +
                def check_2d_array(data)
         | 
| 28 | 
            +
                  ncol = data.first&.size || 0
         | 
| 29 | 
            +
                  if !data.all? { |r| r.size == ncol }
         | 
| 30 | 
            +
                    raise ArgumentError, "Rows have different sizes"
         | 
| 31 | 
            +
                  end
         | 
| 32 | 
            +
                end
         | 
| 33 | 
            +
             | 
| 27 34 | 
             
                # for categorical, NaN and negative value are the same
         | 
| 28 35 | 
             
                def handle_missing(data)
         | 
| 29 36 | 
             
                  data.map! { |v| v.nil? ? Float::NAN : v }
         | 
    
        data/lib/lightgbm/version.rb
    CHANGED
    
    
    
        data/lib/lightgbm.rb
    CHANGED
    
    | @@ -1,10 +1,14 @@ | |
| 1 1 | 
             
            # dependencies
         | 
| 2 2 | 
             
            require "ffi"
         | 
| 3 3 |  | 
| 4 | 
            +
            # stdlib
         | 
| 5 | 
            +
            require "json"
         | 
| 6 | 
            +
             | 
| 4 7 | 
             
            # modules
         | 
| 5 8 | 
             
            require_relative "lightgbm/utils"
         | 
| 6 9 | 
             
            require_relative "lightgbm/booster"
         | 
| 7 10 | 
             
            require_relative "lightgbm/dataset"
         | 
| 11 | 
            +
            require_relative "lightgbm/inner_predictor"
         | 
| 8 12 | 
             
            require_relative "lightgbm/version"
         | 
| 9 13 |  | 
| 10 14 | 
             
            # scikit-learn API
         | 
    
        metadata
    CHANGED
    
    | @@ -1,14 +1,13 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: lightgbm
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 0. | 
| 4 | 
            +
              version: 0.4.0
         | 
| 5 5 | 
             
            platform: ruby
         | 
| 6 6 | 
             
            authors:
         | 
| 7 7 | 
             
            - Andrew Kane
         | 
| 8 | 
            -
            autorequire:
         | 
| 9 8 | 
             
            bindir: bin
         | 
| 10 9 | 
             
            cert_chain: []
         | 
| 11 | 
            -
            date:  | 
| 10 | 
            +
            date: 2025-01-05 00:00:00.000000000 Z
         | 
| 12 11 | 
             
            dependencies:
         | 
| 13 12 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 14 13 | 
             
              name: ffi
         | 
| @@ -24,7 +23,6 @@ dependencies: | |
| 24 23 | 
             
                - - ">="
         | 
| 25 24 | 
             
                  - !ruby/object:Gem::Version
         | 
| 26 25 | 
             
                    version: '0'
         | 
| 27 | 
            -
            description:
         | 
| 28 26 | 
             
            email: andrew@ankane.org
         | 
| 29 27 | 
             
            executables: []
         | 
| 30 28 | 
             
            extensions: []
         | 
| @@ -38,6 +36,7 @@ files: | |
| 38 36 | 
             
            - lib/lightgbm/classifier.rb
         | 
| 39 37 | 
             
            - lib/lightgbm/dataset.rb
         | 
| 40 38 | 
             
            - lib/lightgbm/ffi.rb
         | 
| 39 | 
            +
            - lib/lightgbm/inner_predictor.rb
         | 
| 41 40 | 
             
            - lib/lightgbm/model.rb
         | 
| 42 41 | 
             
            - lib/lightgbm/ranker.rb
         | 
| 43 42 | 
             
            - lib/lightgbm/regressor.rb
         | 
| @@ -53,7 +52,6 @@ homepage: https://github.com/ankane/lightgbm-ruby | |
| 53 52 | 
             
            licenses:
         | 
| 54 53 | 
             
            - MIT
         | 
| 55 54 | 
             
            metadata: {}
         | 
| 56 | 
            -
            post_install_message:
         | 
| 57 55 | 
             
            rdoc_options: []
         | 
| 58 56 | 
             
            require_paths:
         | 
| 59 57 | 
             
            - lib
         | 
| @@ -61,15 +59,14 @@ required_ruby_version: !ruby/object:Gem::Requirement | |
| 61 59 | 
             
              requirements:
         | 
| 62 60 | 
             
              - - ">="
         | 
| 63 61 | 
             
                - !ruby/object:Gem::Version
         | 
| 64 | 
            -
                  version: '3'
         | 
| 62 | 
            +
                  version: '3.1'
         | 
| 65 63 | 
             
            required_rubygems_version: !ruby/object:Gem::Requirement
         | 
| 66 64 | 
             
              requirements:
         | 
| 67 65 | 
             
              - - ">="
         | 
| 68 66 | 
             
                - !ruby/object:Gem::Version
         | 
| 69 67 | 
             
                  version: '0'
         | 
| 70 68 | 
             
            requirements: []
         | 
| 71 | 
            -
            rubygems_version: 3. | 
| 72 | 
            -
            signing_key:
         | 
| 69 | 
            +
            rubygems_version: 3.6.2
         | 
| 73 70 | 
             
            specification_version: 4
         | 
| 74 71 | 
             
            summary: High performance gradient boosting for Ruby
         | 
| 75 72 | 
             
            test_files: []
         |