cmfrec 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/README.md +7 -7
- data/lib/cmfrec/recommender.rb +127 -0
- data/lib/cmfrec/version.rb +1 -1
- metadata +2 -2
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA256:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: 3bd946bc2c7425ba3550a9cd5bf346b0cac6de5597d0d38ff0dfb49db32e754d
         | 
| 4 | 
            +
              data.tar.gz: 20038dc0c401389d75dc3a35415919f6721dab7e32459eeb34c9dcb23569c49a
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: 1d0019e89fe0ca946cd83d60c052bff2e3dcc3990e3bf37de92b620870b6bf8a31010aec87bb2e5a352c446441b683f0148da83497b5d44ef89d62396f4b7d3c
         | 
| 7 | 
            +
              data.tar.gz: f4d65f294b9a313c2c86111eaf66e734999924392fedc4715c51919a4ca4de3864ba3eec732f3d707dd06ea62ddb931d9b28b8436a09a807c01e14ac2c542e44
         | 
    
        data/CHANGELOG.md
    CHANGED
    
    
    
        data/README.md
    CHANGED
    
    | @@ -82,11 +82,11 @@ Add side information about users, items, or both | |
| 82 82 | 
             
            ```ruby
         | 
| 83 83 | 
             
            user_info = [
         | 
| 84 84 | 
             
              {user_id: 1, cats: 1, dogs: 0},
         | 
| 85 | 
            -
              {user_id: 2, cats: 2, dogs: 1} | 
| 85 | 
            +
              {user_id: 2, cats: 2, dogs: 1}
         | 
| 86 86 | 
             
            ]
         | 
| 87 87 | 
             
            item_info = [
         | 
| 88 88 | 
             
              {item_id: 1, genre_comedy: 1, genre_drama: 0},
         | 
| 89 | 
            -
              {item_id: 2, genre_comedy: 0, genre_drama: 1} | 
| 89 | 
            +
              {item_id: 2, genre_comedy: 0, genre_drama: 1}
         | 
| 90 90 | 
             
            ]
         | 
| 91 91 | 
             
            recommender.fit(ratings, user_info: user_info, item_info: item_info)
         | 
| 92 92 | 
             
            ```
         | 
| @@ -213,17 +213,17 @@ Rover.read_csv("ratings.csv") | |
| 213 213 | 
             
            Store the recommender
         | 
| 214 214 |  | 
| 215 215 | 
             
            ```ruby
         | 
| 216 | 
            -
             | 
| 217 | 
            -
            File. | 
| 216 | 
            +
            json = recommender.to_json
         | 
| 217 | 
            +
            File.write("recommender.json", json)
         | 
| 218 218 | 
             
            ```
         | 
| 219 219 |  | 
| 220 | 
            -
             | 
| 220 | 
            +
            The serialized recommender includes user activity from the training data (to avoid recommending previously rated items), so be sure to protect it. You can save it to a file, database, or any other storage system, or use a tool like [Trove](https://github.com/ankane/trove). Also, user and item IDs should be integers or strings for this.
         | 
| 221 221 |  | 
| 222 222 | 
             
            Load a recommender
         | 
| 223 223 |  | 
| 224 224 | 
             
            ```ruby
         | 
| 225 | 
            -
             | 
| 226 | 
            -
            recommender =  | 
| 225 | 
            +
            json = File.read("recommender.json")
         | 
| 226 | 
            +
            recommender = Cmfrec::Recommender.load_json(json)
         | 
| 227 227 | 
             
            ```
         | 
| 228 228 |  | 
| 229 229 | 
             
            Alternatively, you can store only the factors and use a library like [Neighbor](https://github.com/ankane/neighbor). See the [examples](https://github.com/ankane/neighbor/tree/master/examples) for Disco, which has a similar API. For explicit feedback, you should [disable the bias](#explicit-feedback) with this approach.
         | 
    
        data/lib/cmfrec/recommender.rb
    CHANGED
    
    | @@ -249,6 +249,68 @@ module Cmfrec | |
| 249 249 | 
             
                  similar(user_id, @user_map, user_factors, count, user_index)
         | 
| 250 250 | 
             
                end
         | 
| 251 251 |  | 
| 252 | 
            +
                def to_json
         | 
| 253 | 
            +
                  require "base64"
         | 
| 254 | 
            +
                  require "json"
         | 
| 255 | 
            +
             | 
| 256 | 
            +
                  obj = {
         | 
| 257 | 
            +
                    implicit: @implicit
         | 
| 258 | 
            +
                  }
         | 
| 259 | 
            +
             | 
| 260 | 
            +
                  # options
         | 
| 261 | 
            +
                  obj[:factors] = @k
         | 
| 262 | 
            +
                  obj[:epochs] = @niter
         | 
| 263 | 
            +
                  obj[:verbose] = @verbose
         | 
| 264 | 
            +
             | 
| 265 | 
            +
                  # factors
         | 
| 266 | 
            +
                  obj[:user_ids] = @user_map.keys
         | 
| 267 | 
            +
                  obj[:item_ids] = @item_map.keys
         | 
| 268 | 
            +
                  obj[:rated] = @user_map.map { |_, u| (@rated[u] || {}).keys }
         | 
| 269 | 
            +
                  obj[:user_factors] = json_dump_ptr(@a)
         | 
| 270 | 
            +
                  obj[:item_factors] = json_dump_ptr(@b)
         | 
| 271 | 
            +
             | 
| 272 | 
            +
                  # bias
         | 
| 273 | 
            +
                  obj[:user_bias] = json_dump_ptr(@bias_a)
         | 
| 274 | 
            +
                  obj[:item_bias] = json_dump_ptr(@bias_b)
         | 
| 275 | 
            +
             | 
| 276 | 
            +
                  # mean
         | 
| 277 | 
            +
                  obj[:global_mean] = @global_mean
         | 
| 278 | 
            +
             | 
| 279 | 
            +
                  unless (@user_info_map.keys + @item_info_map.keys).all? { |v| v.is_a?(Symbol) }
         | 
| 280 | 
            +
                    raise "Side info keys must be symbols to save"
         | 
| 281 | 
            +
                  end
         | 
| 282 | 
            +
             | 
| 283 | 
            +
                  # side info
         | 
| 284 | 
            +
                  obj[:user_info_ids] = @user_info_map.keys
         | 
| 285 | 
            +
                  obj[:item_info_ids] = @item_info_map.keys
         | 
| 286 | 
            +
                  obj[:user_info_factors] = json_dump_ptr(@c)
         | 
| 287 | 
            +
                  obj[:item_info_factors] = json_dump_ptr(@d)
         | 
| 288 | 
            +
             | 
| 289 | 
            +
                  # implicit features
         | 
| 290 | 
            +
                  obj[:add_implicit_features] = @add_implicit_features
         | 
| 291 | 
            +
                  obj[:user_factors_implicit] = json_dump_ptr(@ai)
         | 
| 292 | 
            +
                  obj[:item_factors_implicit] = json_dump_ptr(@bi)
         | 
| 293 | 
            +
             | 
| 294 | 
            +
                  unless @implicit
         | 
| 295 | 
            +
                    obj[:min_rating] = @min_rating
         | 
| 296 | 
            +
                    obj[:max_rating] = @max_rating
         | 
| 297 | 
            +
                  end
         | 
| 298 | 
            +
             | 
| 299 | 
            +
                  obj[:user_means] = json_dump_ptr(@u_colmeans)
         | 
| 300 | 
            +
             | 
| 301 | 
            +
                  JSON.generate(obj)
         | 
| 302 | 
            +
                end
         | 
| 303 | 
            +
             | 
| 304 | 
            +
                def self.load_json(json)
         | 
| 305 | 
            +
                  require "json"
         | 
| 306 | 
            +
             | 
| 307 | 
            +
                  obj = JSON.parse(json)
         | 
| 308 | 
            +
             | 
| 309 | 
            +
                  recommender = new
         | 
| 310 | 
            +
                  recommender.send(:json_load, obj)
         | 
| 311 | 
            +
                  recommender
         | 
| 312 | 
            +
                end
         | 
| 313 | 
            +
             | 
| 252 314 | 
             
                private
         | 
| 253 315 |  | 
| 254 316 | 
             
                def user_index
         | 
| @@ -856,5 +918,70 @@ module Cmfrec | |
| 856 918 |  | 
| 857 919 | 
             
                  @fit = @m > 0
         | 
| 858 920 | 
             
                end
         | 
| 921 | 
            +
             | 
| 922 | 
            +
                def json_dump_ptr(ptr)
         | 
| 923 | 
            +
                  Base64.strict_encode64(ptr.to_s(ptr.size)) if ptr
         | 
| 924 | 
            +
                end
         | 
| 925 | 
            +
             | 
| 926 | 
            +
                def json_load_ptr(str)
         | 
| 927 | 
            +
                  Fiddle::Pointer[Base64.strict_decode64(str)] if str
         | 
| 928 | 
            +
                end
         | 
| 929 | 
            +
             | 
| 930 | 
            +
                def json_load(obj)
         | 
| 931 | 
            +
                  require "base64"
         | 
| 932 | 
            +
             | 
| 933 | 
            +
                  @implicit = obj["implicit"]
         | 
| 934 | 
            +
             | 
| 935 | 
            +
                  # options
         | 
| 936 | 
            +
                  set_params(
         | 
| 937 | 
            +
                    k: obj["factors"],
         | 
| 938 | 
            +
                    niter: obj["epochs"],
         | 
| 939 | 
            +
                    verbose: obj["verbose"],
         | 
| 940 | 
            +
                    user_bias: !obj["user_bias"].nil?,
         | 
| 941 | 
            +
                    item_bias: !obj["item_bias"].nil?,
         | 
| 942 | 
            +
                    add_implicit_features: obj["add_implicit_features"]
         | 
| 943 | 
            +
                  )
         | 
| 944 | 
            +
             | 
| 945 | 
            +
                  # factors
         | 
| 946 | 
            +
                  @user_map = obj["user_ids"].map.with_index.to_h
         | 
| 947 | 
            +
                  @item_map = obj["item_ids"].map.with_index.to_h
         | 
| 948 | 
            +
                  @rated = obj["rated"].map.with_index.to_h { |r, i| [i, r.to_h { |v| [v, true] }] }
         | 
| 949 | 
            +
                  @a = json_load_ptr(obj["user_factors"])
         | 
| 950 | 
            +
                  @b = json_load_ptr(obj["item_factors"])
         | 
| 951 | 
            +
             | 
| 952 | 
            +
                  # bias
         | 
| 953 | 
            +
                  @bias_a = json_load_ptr(obj["user_bias"])
         | 
| 954 | 
            +
                  @bias_b = json_load_ptr(obj["item_bias"])
         | 
| 955 | 
            +
             | 
| 956 | 
            +
                  # mean
         | 
| 957 | 
            +
                  @global_mean = obj["global_mean"]
         | 
| 958 | 
            +
             | 
| 959 | 
            +
                  # side info
         | 
| 960 | 
            +
                  @user_info_map = obj["user_info_ids"].map(&:to_sym).map.with_index.to_h
         | 
| 961 | 
            +
                  @item_info_map = obj["item_info_ids"].map(&:to_sym).map.with_index.to_h
         | 
| 962 | 
            +
                  @c = json_load_ptr(obj["user_info_factors"])
         | 
| 963 | 
            +
                  @d = json_load_ptr(obj["item_info_factors"])
         | 
| 964 | 
            +
             | 
| 965 | 
            +
                  # implicit features
         | 
| 966 | 
            +
                  @add_implicit_features = obj["add_implicit_features"]
         | 
| 967 | 
            +
                  @ai = json_load_ptr(obj["user_factors_implicit"])
         | 
| 968 | 
            +
                  @bi = json_load_ptr(obj["item_factors_implicit"])
         | 
| 969 | 
            +
             | 
| 970 | 
            +
                  unless @implicit
         | 
| 971 | 
            +
                    @min_rating = obj["min_rating"]
         | 
| 972 | 
            +
                    @max_rating = obj["max_rating"]
         | 
| 973 | 
            +
                  end
         | 
| 974 | 
            +
             | 
| 975 | 
            +
                  @u_colmeans = json_load_ptr(obj["user_means"])
         | 
| 976 | 
            +
             | 
| 977 | 
            +
                  @m = @user_map.size
         | 
| 978 | 
            +
                  @n = @item_map.size
         | 
| 979 | 
            +
                  @m_u = @user_info_map.size
         | 
| 980 | 
            +
                  @n_i = @item_info_map.size
         | 
| 981 | 
            +
             | 
| 982 | 
            +
                  set_implicit_vars if @implicit
         | 
| 983 | 
            +
             | 
| 984 | 
            +
                  @fit = @m > 0
         | 
| 985 | 
            +
                end
         | 
| 859 986 | 
             
              end
         | 
| 860 987 | 
             
            end
         | 
    
        data/lib/cmfrec/version.rb
    CHANGED
    
    
    
        metadata
    CHANGED
    
    | @@ -1,14 +1,14 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: cmfrec
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 0.2. | 
| 4 | 
            +
              version: 0.2.1
         | 
| 5 5 | 
             
            platform: ruby
         | 
| 6 6 | 
             
            authors:
         | 
| 7 7 | 
             
            - Andrew Kane
         | 
| 8 8 | 
             
            autorequire:
         | 
| 9 9 | 
             
            bindir: bin
         | 
| 10 10 | 
             
            cert_chain: []
         | 
| 11 | 
            -
            date: 2022- | 
| 11 | 
            +
            date: 2022-07-11 00:00:00.000000000 Z
         | 
| 12 12 | 
             
            dependencies: []
         | 
| 13 13 | 
             
            description:
         | 
| 14 14 | 
             
            email: andrew@ankane.org
         |