libffm 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,105 @@
1
+ #include <cstring>
2
+ #include <fstream>
3
+ #include <iostream>
4
+ #include <string>
5
+ #include <iomanip>
6
+ #include <memory>
7
+ #include <cmath>
8
+ #include <stdexcept>
9
+ #include <vector>
10
+ #include <cstdlib>
11
+
12
+ #include "ffm.h"
13
+
14
+ using namespace std;
15
+ using namespace ffm;
16
+
17
+ struct Option {
18
+ string test_path, model_path, output_path;
19
+ };
20
+
21
+ string predict_help() {
22
+ return string(
23
+ "usage: ffm-predict test_file model_file output_file\n");
24
+ }
25
+
26
+ Option parse_option(int argc, char **argv) {
27
+ vector<string> args;
28
+ for(int i = 0; i < argc; i++)
29
+ args.push_back(string(argv[i]));
30
+
31
+ if(argc == 1)
32
+ throw invalid_argument(predict_help());
33
+
34
+ Option option;
35
+
36
+ if(argc != 4)
37
+ throw invalid_argument("cannot parse argument");
38
+
39
+ option.test_path = string(args[1]);
40
+ option.model_path = string(args[2]);
41
+ option.output_path = string(args[3]);
42
+
43
+ return option;
44
+ }
45
+
46
+ void predict(string test_path, string model_path, string output_path) {
47
+ int const kMaxLineSize = 1000000;
48
+
49
+ FILE *f_in = fopen(test_path.c_str(), "r");
50
+ ofstream f_out(output_path);
51
+ char line[kMaxLineSize];
52
+
53
+ ffm_model model = ffm_load_model(model_path);
54
+
55
+ ffm_double loss = 0;
56
+ vector<ffm_node> x;
57
+ ffm_int i = 0;
58
+
59
+ for(; fgets(line, kMaxLineSize, f_in) != nullptr; i++) {
60
+ x.clear();
61
+ char *y_char = strtok(line, " \t");
62
+ ffm_float y = (atoi(y_char)>0)? 1.0f : -1.0f;
63
+
64
+ while(true) {
65
+ char *field_char = strtok(nullptr,":");
66
+ char *idx_char = strtok(nullptr,":");
67
+ char *value_char = strtok(nullptr," \t");
68
+ if(field_char == nullptr || *field_char == '\n')
69
+ break;
70
+
71
+ ffm_node N;
72
+ N.f = atoi(field_char);
73
+ N.j = atoi(idx_char);
74
+ N.v = atof(value_char);
75
+
76
+ x.push_back(N);
77
+ }
78
+
79
+ ffm_float y_bar = ffm_predict(x.data(), x.data()+x.size(), model);
80
+
81
+ loss -= y==1? log(y_bar) : log(1-y_bar);
82
+
83
+ f_out << y_bar << "\n";
84
+ }
85
+
86
+ loss /= i;
87
+
88
+ cout << "logloss = " << fixed << setprecision(5) << loss << endl;
89
+
90
+ fclose(f_in);
91
+ }
92
+
93
+ int main(int argc, char **argv) {
94
+ Option option;
95
+ try {
96
+ option = parse_option(argc, argv);
97
+ } catch(invalid_argument const &e) {
98
+ cout << e.what() << endl;
99
+ return 1;
100
+ }
101
+
102
+ predict(option.test_path, option.model_path, option.output_path);
103
+
104
+ return 0;
105
+ }
@@ -0,0 +1,173 @@
1
+ #pragma GCC diagnostic ignored "-Wunused-result"
2
+ #include <algorithm>
3
+ #include <cstring>
4
+ #include <iostream>
5
+ #include <stdexcept>
6
+ #include <string>
7
+ #include <vector>
8
+ #include <cstdlib>
9
+
10
+ #include "ffm.h"
11
+
12
+ #if defined USEOMP
13
+ #include <omp.h>
14
+ #endif
15
+
16
+ using namespace std;
17
+ using namespace ffm;
18
+
19
+ string train_help() {
20
+ return string(
21
+ "usage: ffm-train [options] training_set_file [model_file]\n"
22
+ "\n"
23
+ "options:\n"
24
+ "-l <lambda>: set regularization parameter (default 0.00002)\n"
25
+ "-k <factor>: set number of latent factors (default 4)\n"
26
+ "-t <iteration>: set number of iterations (default 15)\n"
27
+ "-r <eta>: set learning rate (default 0.2)\n"
28
+ "-s <nr_threads>: set number of threads (default 1)\n"
29
+ "-p <path>: set path to the validation set\n"
30
+ "--quiet: quiet mode (no output)\n"
31
+ "--no-norm: disable instance-wise normalization\n"
32
+ "--auto-stop: stop at the iteration that achieves the best validation loss (must be used with -p)\n");
33
+ }
34
+
35
+ struct Option {
36
+ string tr_path;
37
+ string va_path;
38
+ string model_path;
39
+ ffm_parameter param;
40
+ bool quiet = false;
41
+ ffm_int nr_threads = 1;
42
+ };
43
+
44
+ string basename(string path) {
45
+ const char *ptr = strrchr(&*path.begin(), '/');
46
+ if(!ptr)
47
+ ptr = path.c_str();
48
+ else
49
+ ptr++;
50
+ return string(ptr);
51
+ }
52
+
53
+ Option parse_option(int argc, char **argv) {
54
+ vector<string> args;
55
+ for(int i = 0; i < argc; i++)
56
+ args.push_back(string(argv[i]));
57
+
58
+ if(argc == 1)
59
+ throw invalid_argument(train_help());
60
+
61
+ Option opt;
62
+
63
+ ffm_int i = 1;
64
+ for(; i < argc; i++) {
65
+ if(args[i].compare("-t") == 0)
66
+ {
67
+ if(i == argc-1)
68
+ throw invalid_argument("need to specify number of iterations after -t");
69
+ i++;
70
+ opt.param.nr_iters = atoi(args[i].c_str());
71
+ if(opt.param.nr_iters <= 0)
72
+ throw invalid_argument("number of iterations should be greater than zero");
73
+ } else if(args[i].compare("-k") == 0) {
74
+ if(i == argc-1)
75
+ throw invalid_argument("need to specify number of factors after -k");
76
+ i++;
77
+ opt.param.k = atoi(args[i].c_str());
78
+ if(opt.param.k <= 0)
79
+ throw invalid_argument("number of factors should be greater than zero");
80
+ } else if(args[i].compare("-r") == 0) {
81
+ if(i == argc-1)
82
+ throw invalid_argument("need to specify eta after -r");
83
+ i++;
84
+ opt.param.eta = atof(args[i].c_str());
85
+ if(opt.param.eta <= 0)
86
+ throw invalid_argument("learning rate should be greater than zero");
87
+ } else if(args[i].compare("-l") == 0) {
88
+ if(i == argc-1)
89
+ throw invalid_argument("need to specify lambda after -l");
90
+ i++;
91
+ opt.param.lambda = atof(args[i].c_str());
92
+ if(opt.param.lambda < 0)
93
+ throw invalid_argument("regularization cost should not be smaller than zero");
94
+ } else if(args[i].compare("-s") == 0) {
95
+ if(i == argc-1)
96
+ throw invalid_argument("need to specify number of threads after -s");
97
+ i++;
98
+ opt.nr_threads = atoi(args[i].c_str());
99
+ if(opt.nr_threads <= 0)
100
+ throw invalid_argument("number of threads should be greater than zero");
101
+ } else if(args[i].compare("-p") == 0) {
102
+ if(i == argc-1)
103
+ throw invalid_argument("need to specify path after -p");
104
+ i++;
105
+ opt.va_path = args[i];
106
+ } else if(args[i].compare("--no-norm") == 0) {
107
+ opt.param.normalization = false;
108
+ } else if(args[i].compare("--quiet") == 0) {
109
+ opt.quiet = true;
110
+ } else if(args[i].compare("--auto-stop") == 0) {
111
+ opt.param.auto_stop = true;
112
+ } else {
113
+ break;
114
+ }
115
+ }
116
+
117
+ if(i != argc-2 && i != argc-1)
118
+ throw invalid_argument("cannot parse command\n");
119
+
120
+ opt.tr_path = args[i];
121
+ i++;
122
+
123
+ if(i < argc) {
124
+ opt.model_path = string(args[i]);
125
+ } else if(i == argc) {
126
+ opt.model_path = basename(opt.tr_path) + ".model";
127
+ } else {
128
+ throw invalid_argument("cannot parse argument");
129
+ }
130
+
131
+ return opt;
132
+ }
133
+
134
+ int train_on_disk(Option opt) {
135
+ string tr_bin_path = basename(opt.tr_path) + ".bin";
136
+ string va_bin_path = opt.va_path.empty()? "" : basename(opt.va_path) + ".bin";
137
+
138
+ ffm_read_problem_to_disk(opt.tr_path, tr_bin_path);
139
+ if(!opt.va_path.empty())
140
+ ffm_read_problem_to_disk(opt.va_path, va_bin_path);
141
+
142
+ ffm_model model = ffm_train_on_disk(tr_bin_path.c_str(), va_bin_path.c_str(), opt.param);
143
+
144
+ ffm_save_model(model, opt.model_path);
145
+
146
+ return 0;
147
+ }
148
+
149
+ int main(int argc, char **argv) {
150
+ Option opt;
151
+ try {
152
+ opt = parse_option(argc, argv);
153
+ } catch(invalid_argument &e) {
154
+ cout << e.what() << endl;
155
+ return 1;
156
+ }
157
+
158
+ if(opt.quiet)
159
+ cout.setstate(ios_base::badbit);
160
+
161
+ if(opt.param.auto_stop && opt.va_path.empty()) {
162
+ cout << "To use auto-stop, you need to assign a validation set" << endl;
163
+ return 1;
164
+ }
165
+
166
+ #if defined USEOMP
167
+ omp_set_num_threads(opt.nr_threads);
168
+ #endif
169
+
170
+ train_on_disk(opt);
171
+
172
+ return 0;
173
+ }
@@ -0,0 +1,699 @@
1
+ /*
2
+ The following table is the meaning of some variables in this code:
3
+
4
+ W: The pointer to the beginning of the model
5
+ w: Dynamic pointer to access values in the model
6
+ m: Number of fields
7
+ k: Number of latent factors
8
+ n: Number of features
9
+ l: Number of data points
10
+ f: Field index (0 to m-1)
11
+ d: Latent factor index (0 to k-1)
12
+ j: Feature index (0 to n-1)
13
+ i: Data point index (0 to l-1)
14
+ nnz: Number of non-zero elements
15
+ X, P: Used to store the problem in a compressed sparse row (CSR) format. len(X) = nnz, len(P) = l + 1
16
+ Y: The label. len(Y) = l
17
+ R: Precomputed scaling factor to make the 2-norm of each instance to be 1. len(R) = l
18
+ v: Value of each element in the problem
19
+ */
20
+
21
+ #pragma GCC diagnostic ignored "-Wunused-result"
22
+ #include <algorithm>
23
+ #include <cmath>
24
+ #include <iostream>
25
+ #include <iomanip>
26
+ #include <fstream>
27
+ #include <new>
28
+ #include <memory>
29
+ #include <random>
30
+ #include <stdexcept>
31
+ #include <string>
32
+ #include <cstring>
33
+ #include <vector>
34
+ #include <cassert>
35
+ #include <numeric>
36
+
37
+ #if defined USESSE
38
+ #include <pmmintrin.h>
39
+ #endif
40
+
41
+ #if defined USEOMP
42
+ #include <omp.h>
43
+ #endif
44
+
45
+ #include "ffm.h"
46
+ #include "timer.h"
47
+
48
+ namespace ffm {
49
+
50
+ namespace {
51
+
52
+ using namespace std;
53
+
54
+ #if defined USESSE
55
+ ffm_int const kALIGNByte = 16;
56
+ #else
57
+ ffm_int const kALIGNByte = 4;
58
+ #endif
59
+
60
+ ffm_int const kALIGN = kALIGNByte/sizeof(ffm_float);
61
+ ffm_int const kCHUNK_SIZE = 10000000;
62
+ ffm_int const kMaxLineSize = 100000;
63
+
64
+ inline ffm_int get_k_aligned(ffm_int k) {
65
+ return (ffm_int) ceil((ffm_float)k / kALIGN) * kALIGN;
66
+ }
67
+
68
+ ffm_long get_w_size(ffm_model &model) {
69
+ ffm_int k_aligned = get_k_aligned(model.k);
70
+ return (ffm_long) model.n * model.m * k_aligned * 2;
71
+ }
72
+
73
+ #if defined USESSE
74
+ inline ffm_float wTx(
75
+ ffm_node *begin,
76
+ ffm_node *end,
77
+ ffm_float r,
78
+ ffm_model &model,
79
+ ffm_float kappa=0,
80
+ ffm_float eta=0,
81
+ ffm_float lambda=0,
82
+ bool do_update=false) {
83
+
84
+ ffm_int align0 = 2 * get_k_aligned(model.k);
85
+ ffm_int align1 = model.m * align0;
86
+
87
+ __m128 XMMkappa = _mm_set1_ps(kappa);
88
+ __m128 XMMeta = _mm_set1_ps(eta);
89
+ __m128 XMMlambda = _mm_set1_ps(lambda);
90
+
91
+ __m128 XMMt = _mm_setzero_ps();
92
+
93
+ for(ffm_node *N1 = begin; N1 != end; N1++)
94
+ {
95
+ ffm_int j1 = N1->j;
96
+ ffm_int f1 = N1->f;
97
+ ffm_float v1 = N1->v;
98
+ if(j1 >= model.n || f1 >= model.m)
99
+ continue;
100
+
101
+ for(ffm_node *N2 = N1+1; N2 != end; N2++)
102
+ {
103
+ ffm_int j2 = N2->j;
104
+ ffm_int f2 = N2->f;
105
+ ffm_float v2 = N2->v;
106
+ if(j2 >= model.n || f2 >= model.m)
107
+ continue;
108
+
109
+ ffm_float *w1_base = model.W + (ffm_long)j1*align1 + f2*align0;
110
+ ffm_float *w2_base = model.W + (ffm_long)j2*align1 + f1*align0;
111
+
112
+ __m128 XMMv = _mm_set1_ps(v1*v2*r);
113
+
114
+ if(do_update)
115
+ {
116
+ __m128 XMMkappav = _mm_mul_ps(XMMkappa, XMMv);
117
+
118
+ for(ffm_int d = 0; d < align0; d += kALIGN * 2)
119
+ {
120
+ ffm_float *w1 = w1_base + d;
121
+ ffm_float *w2 = w2_base + d;
122
+
123
+ ffm_float *wg1 = w1 + kALIGN;
124
+ ffm_float *wg2 = w2 + kALIGN;
125
+
126
+ __m128 XMMw1 = _mm_load_ps(w1);
127
+ __m128 XMMw2 = _mm_load_ps(w2);
128
+
129
+ __m128 XMMwg1 = _mm_load_ps(wg1);
130
+ __m128 XMMwg2 = _mm_load_ps(wg2);
131
+
132
+ __m128 XMMg1 = _mm_add_ps(
133
+ _mm_mul_ps(XMMlambda, XMMw1),
134
+ _mm_mul_ps(XMMkappav, XMMw2));
135
+ __m128 XMMg2 = _mm_add_ps(
136
+ _mm_mul_ps(XMMlambda, XMMw2),
137
+ _mm_mul_ps(XMMkappav, XMMw1));
138
+
139
+ XMMwg1 = _mm_add_ps(XMMwg1, _mm_mul_ps(XMMg1, XMMg1));
140
+ XMMwg2 = _mm_add_ps(XMMwg2, _mm_mul_ps(XMMg2, XMMg2));
141
+
142
+ XMMw1 = _mm_sub_ps(XMMw1, _mm_mul_ps(XMMeta,
143
+ _mm_mul_ps(_mm_rsqrt_ps(XMMwg1), XMMg1)));
144
+ XMMw2 = _mm_sub_ps(XMMw2, _mm_mul_ps(XMMeta,
145
+ _mm_mul_ps(_mm_rsqrt_ps(XMMwg2), XMMg2)));
146
+
147
+ _mm_store_ps(w1, XMMw1);
148
+ _mm_store_ps(w2, XMMw2);
149
+
150
+ _mm_store_ps(wg1, XMMwg1);
151
+ _mm_store_ps(wg2, XMMwg2);
152
+ }
153
+ }
154
+ else
155
+ {
156
+ for(ffm_int d = 0; d < align0; d += kALIGN * 2)
157
+ {
158
+ __m128 XMMw1 = _mm_load_ps(w1_base+d);
159
+ __m128 XMMw2 = _mm_load_ps(w2_base+d);
160
+
161
+ XMMt = _mm_add_ps(XMMt,
162
+ _mm_mul_ps(_mm_mul_ps(XMMw1, XMMw2), XMMv));
163
+ }
164
+ }
165
+ }
166
+ }
167
+
168
+ if(do_update)
169
+ return 0;
170
+
171
+ XMMt = _mm_hadd_ps(XMMt, XMMt);
172
+ XMMt = _mm_hadd_ps(XMMt, XMMt);
173
+ ffm_float t;
174
+ _mm_store_ss(&t, XMMt);
175
+
176
+ return t;
177
+ }
178
+
179
+ #else
180
+
181
+ inline ffm_float wTx(
182
+ ffm_node *begin,
183
+ ffm_node *end,
184
+ ffm_float r,
185
+ ffm_model &model,
186
+ ffm_float kappa=0,
187
+ ffm_float eta=0,
188
+ ffm_float lambda=0,
189
+ bool do_update=false) {
190
+
191
+ ffm_int align0 = 2 * get_k_aligned(model.k);
192
+ ffm_int align1 = model.m * align0;
193
+
194
+ ffm_float t = 0;
195
+ for(ffm_node *N1 = begin; N1 != end; N1++) {
196
+ ffm_int j1 = N1->j;
197
+ ffm_int f1 = N1->f;
198
+ ffm_float v1 = N1->v;
199
+ if(j1 >= model.n || f1 >= model.m)
200
+ continue;
201
+
202
+ for(ffm_node *N2 = N1+1; N2 != end; N2++) {
203
+ ffm_int j2 = N2->j;
204
+ ffm_int f2 = N2->f;
205
+ ffm_float v2 = N2->v;
206
+ if(j2 >= model.n || f2 >= model.m)
207
+ continue;
208
+
209
+ ffm_float *w1 = model.W + (ffm_long)j1*align1 + f2*align0;
210
+ ffm_float *w2 = model.W + (ffm_long)j2*align1 + f1*align0;
211
+
212
+ ffm_float v = v1 * v2 * r;
213
+
214
+ if(do_update) {
215
+ ffm_float *wg1 = w1 + kALIGN;
216
+ ffm_float *wg2 = w2 + kALIGN;
217
+ for(ffm_int d = 0; d < align0; d += kALIGN * 2)
218
+ {
219
+ ffm_float g1 = lambda * w1[d] + kappa * w2[d] * v;
220
+ ffm_float g2 = lambda * w2[d] + kappa * w1[d] * v;
221
+
222
+ wg1[d] += g1 * g1;
223
+ wg2[d] += g2 * g2;
224
+
225
+ w1[d] -= eta / sqrt(wg1[d]) * g1;
226
+ w2[d] -= eta / sqrt(wg2[d]) * g2;
227
+ }
228
+ } else {
229
+ for(ffm_int d = 0; d < align0; d += kALIGN * 2)
230
+ t += w1[d] * w2[d] * v;
231
+ }
232
+ }
233
+ }
234
+
235
+ return t;
236
+ }
237
+ #endif
238
+
239
+ ffm_float* malloc_aligned_float(ffm_long size)
240
+ {
241
+ void *ptr;
242
+
243
+ #ifndef USESSE
244
+
245
+ ptr = malloc(size * sizeof(ffm_float));
246
+
247
+ #else
248
+
249
+ #ifdef _WIN32
250
+ ptr = _aligned_malloc(size*sizeof(ffm_float), kALIGNByte);
251
+ if(ptr == nullptr)
252
+ throw bad_alloc();
253
+ #else
254
+ int status = posix_memalign(&ptr, kALIGNByte, size*sizeof(ffm_float));
255
+ if(status != 0)
256
+ throw bad_alloc();
257
+ #endif
258
+
259
+ #endif
260
+
261
+ return (ffm_float*)ptr;
262
+ }
263
+
264
+ ffm_model init_model(ffm_int n, ffm_int m, ffm_parameter param)
265
+ {
266
+ ffm_model model;
267
+ model.n = n;
268
+ model.k = param.k;
269
+ model.m = m;
270
+ model.W = nullptr;
271
+ model.normalization = param.normalization;
272
+
273
+ ffm_int k_aligned = get_k_aligned(model.k);
274
+
275
+ model.W = malloc_aligned_float((ffm_long)n*m*k_aligned*2);
276
+
277
+ ffm_float coef = 1.0f / sqrt(model.k);
278
+ ffm_float *w = model.W;
279
+
280
+ default_random_engine generator;
281
+ uniform_real_distribution<ffm_float> distribution(0.0, 1.0);
282
+
283
+ for(ffm_int j = 0; j < model.n; j++) {
284
+ for(ffm_int f = 0; f < model.m; f++) {
285
+ for(ffm_int d = 0; d < k_aligned;) {
286
+ for(ffm_int s = 0; s < kALIGN; s++, w++, d++) {
287
+ w[0] = (d < model.k)? coef * distribution(generator) : 0.0;
288
+ w[kALIGN] = 1;
289
+ }
290
+ w += kALIGN;
291
+ }
292
+ }
293
+ }
294
+
295
+ return model;
296
+ }
297
+
298
+ struct disk_problem_meta {
299
+ ffm_int n = 0;
300
+ ffm_int m = 0;
301
+ ffm_int l = 0;
302
+ ffm_int num_blocks = 0;
303
+ ffm_long B_pos = 0;
304
+ uint64_t hash1;
305
+ uint64_t hash2;
306
+ };
307
+
308
+ struct problem_on_disk {
309
+ disk_problem_meta meta;
310
+ vector<ffm_float> Y;
311
+ vector<ffm_float> R;
312
+ vector<ffm_long> P;
313
+ vector<ffm_node> X;
314
+ vector<ffm_long> B;
315
+
316
+ problem_on_disk(string path) {
317
+ f.open(path, ios::in | ios::binary);
318
+ if(f.good()) {
319
+ f.read(reinterpret_cast<char*>(&meta), sizeof(disk_problem_meta));
320
+ f.seekg(meta.B_pos);
321
+ B.resize(meta.num_blocks);
322
+ f.read(reinterpret_cast<char*>(B.data()), sizeof(ffm_long) * meta.num_blocks);
323
+ }
324
+ }
325
+
326
+ int load_block(int block_index) {
327
+ if(block_index >= meta.num_blocks)
328
+ assert(false);
329
+
330
+ f.seekg(B[block_index]);
331
+
332
+ ffm_int l;
333
+ f.read(reinterpret_cast<char*>(&l), sizeof(ffm_int));
334
+
335
+ Y.resize(l);
336
+ f.read(reinterpret_cast<char*>(Y.data()), sizeof(ffm_float) * l);
337
+
338
+ R.resize(l);
339
+ f.read(reinterpret_cast<char*>(R.data()), sizeof(ffm_float) * l);
340
+
341
+ P.resize(l+1);
342
+ f.read(reinterpret_cast<char*>(P.data()), sizeof(ffm_long) * (l+1));
343
+
344
+ X.resize(P[l]);
345
+ f.read(reinterpret_cast<char*>(X.data()), sizeof(ffm_node) * P[l]);
346
+
347
+ return l;
348
+ }
349
+
350
+ bool is_empty() {
351
+ return meta.l == 0;
352
+ }
353
+
354
+ private:
355
+ ifstream f;
356
+ };
357
+
358
+ uint64_t hashfile(string txt_path, bool one_block=false)
359
+ {
360
+ ifstream f(txt_path, ios::ate | ios::binary);
361
+ if(f.bad())
362
+ return 0;
363
+
364
+ ffm_long end = (ffm_long) f.tellg();
365
+ f.seekg(0, ios::beg);
366
+ assert(static_cast<int>(f.tellg()) == 0);
367
+
368
+ uint64_t magic = 90359;
369
+ for(ffm_long pos = 0; pos < end; ) {
370
+ ffm_long next_pos = min(pos + kCHUNK_SIZE, end);
371
+ ffm_long size = next_pos - pos;
372
+ vector<char> buffer(kCHUNK_SIZE);
373
+ f.read(buffer.data(), size);
374
+
375
+ ffm_int i = 0;
376
+ while(i < size - 8) {
377
+ uint64_t x = *reinterpret_cast<uint64_t*>(buffer.data() + i);
378
+ magic = ( (magic + x) * (magic + x + 1) >> 1) + x;
379
+ i += 8;
380
+ }
381
+ for(; i < size; i++) {
382
+ char x = buffer[i];
383
+ magic = ( (magic + x) * (magic + x + 1) >> 1) + x;
384
+ }
385
+
386
+ pos = next_pos;
387
+ if(one_block)
388
+ break;
389
+ }
390
+
391
+ return magic;
392
+ }
393
+
394
+ void txt2bin(string txt_path, string bin_path) {
395
+
396
+ FILE *f_txt = fopen(txt_path.c_str(), "r");
397
+ if(f_txt == nullptr)
398
+ throw;
399
+
400
+ ofstream f_bin(bin_path, ios::out | ios::binary);
401
+
402
+ vector<char> line(kMaxLineSize);
403
+
404
+ ffm_long p = 0;
405
+ disk_problem_meta meta;
406
+
407
+ vector<ffm_float> Y;
408
+ vector<ffm_float> R;
409
+ vector<ffm_long> P(1, 0);
410
+ vector<ffm_node> X;
411
+ vector<ffm_long> B;
412
+
413
+ auto write_chunk = [&] () {
414
+ B.push_back(f_bin.tellp());
415
+ ffm_int l = Y.size();
416
+ ffm_long nnz = P[l];
417
+ meta.l += l;
418
+
419
+ f_bin.write(reinterpret_cast<char*>(&l), sizeof(ffm_int));
420
+ f_bin.write(reinterpret_cast<char*>(Y.data()), sizeof(ffm_float) * l);
421
+ f_bin.write(reinterpret_cast<char*>(R.data()), sizeof(ffm_float) * l);
422
+ f_bin.write(reinterpret_cast<char*>(P.data()), sizeof(ffm_long) * (l+1));
423
+ f_bin.write(reinterpret_cast<char*>(X.data()), sizeof(ffm_node) * nnz);
424
+
425
+ Y.clear();
426
+ R.clear();
427
+ P.assign(1, 0);
428
+ X.clear();
429
+ p = 0;
430
+ meta.num_blocks++;
431
+ };
432
+
433
+ f_bin.write(reinterpret_cast<char*>(&meta), sizeof(disk_problem_meta));
434
+
435
+ while(fgets(line.data(), kMaxLineSize, f_txt)) {
436
+ char *y_char = strtok(line.data(), " \t");
437
+
438
+ ffm_float y = (atoi(y_char)>0)? 1.0f : -1.0f;
439
+
440
+ ffm_float scale = 0;
441
+ for(; ; p++) {
442
+ char *field_char = strtok(nullptr,":");
443
+ char *idx_char = strtok(nullptr,":");
444
+ char *value_char = strtok(nullptr," \t");
445
+ if(field_char == nullptr || *field_char == '\n')
446
+ break;
447
+
448
+ ffm_node N;
449
+ N.f = atoi(field_char);
450
+ N.j = atoi(idx_char);
451
+ N.v = atof(value_char);
452
+
453
+ X.push_back(N);
454
+
455
+ meta.m = max(meta.m, N.f+1);
456
+ meta.n = max(meta.n, N.j+1);
457
+
458
+ scale += N.v*N.v;
459
+ }
460
+ scale = 1.0 / scale;
461
+
462
+ Y.push_back(y);
463
+ R.push_back(scale);
464
+ P.push_back(p);
465
+
466
+ if(X.size() > (size_t)kCHUNK_SIZE)
467
+ write_chunk();
468
+ }
469
+ write_chunk();
470
+ write_chunk(); // write a dummy empty chunk in order to know where the EOF is
471
+ assert(meta.num_blocks == (ffm_int)B.size());
472
+ meta.B_pos = f_bin.tellp();
473
+ f_bin.write(reinterpret_cast<char*>(B.data()), sizeof(ffm_long) * B.size());
474
+
475
+ fclose(f_txt);
476
+ meta.hash1 = hashfile(txt_path, true);
477
+ meta.hash2 = hashfile(txt_path, false);
478
+
479
+ f_bin.seekp(0, ios::beg);
480
+ f_bin.write(reinterpret_cast<char*>(&meta), sizeof(disk_problem_meta));
481
+ }
482
+
483
+ bool check_same_txt_bin(string txt_path, string bin_path) {
484
+ ifstream f_bin(bin_path, ios::binary | ios::ate);
485
+ if(f_bin.tellg() < (ffm_long)sizeof(disk_problem_meta))
486
+ return false;
487
+ disk_problem_meta meta;
488
+ f_bin.seekg(0, ios::beg);
489
+ f_bin.read(reinterpret_cast<char*>(&meta), sizeof(disk_problem_meta));
490
+ if(meta.hash1 != hashfile(txt_path, true))
491
+ return false;
492
+ if(meta.hash2 != hashfile(txt_path, false))
493
+ return false;
494
+
495
+ return true;
496
+ }
497
+
498
+ } // unnamed namespace
499
+
500
+ void ffm_model::release() {
501
+ if(W != nullptr) {
502
+ #ifndef USESSE
503
+ free(W);
504
+ #else
505
+ #ifdef _WIN32
506
+ _aligned_free(W);
507
+ #else
508
+ free(W);
509
+ #endif
510
+ #endif
511
+ W = nullptr;
512
+ }
513
+ }
514
+
515
+ void ffm_read_problem_to_disk(string txt_path, string bin_path) {
516
+
517
+ Timer timer;
518
+
519
+ cout << "First check if the text file has already been converted to binary format " << flush;
520
+ bool same_file = check_same_txt_bin(txt_path, bin_path);
521
+ cout << "(" << fixed << setprecision(1) << timer.toc() << " seconds)" << endl;
522
+
523
+ if(same_file) {
524
+ cout << "Binary file found. Skip converting text to binary" << endl;
525
+ } else {
526
+ cout << "Binary file NOT found. Convert text file to binary file " << flush;
527
+ txt2bin(txt_path, bin_path);
528
+ cout << "(" << fixed << setprecision(1) << timer.toc() << " seconds)" << endl;
529
+ }
530
+ }
531
+
532
+ ffm_model ffm_train_on_disk(string tr_path, string va_path, ffm_parameter param) {
533
+
534
+ problem_on_disk tr(tr_path);
535
+ problem_on_disk va(va_path);
536
+
537
+ ffm_model model = init_model(tr.meta.n, tr.meta.m, param);
538
+
539
+ bool auto_stop = param.auto_stop && !va_path.empty();
540
+
541
+ ffm_long w_size = get_w_size(model);
542
+ vector<ffm_float> prev_W(w_size, 0);
543
+ if(auto_stop)
544
+ prev_W.assign(w_size, 0);
545
+ ffm_double best_va_loss = numeric_limits<ffm_double>::max();
546
+
547
+ cout.width(4);
548
+ cout << "iter";
549
+ cout.width(13);
550
+ cout << "tr_logloss";
551
+ if(!va_path.empty())
552
+ {
553
+ cout.width(13);
554
+ cout << "va_logloss";
555
+ }
556
+ cout.width(13);
557
+ cout << "tr_time";
558
+ cout << endl;
559
+
560
+ Timer timer;
561
+
562
+ auto one_epoch = [&] (problem_on_disk &prob, bool do_update) {
563
+
564
+ ffm_double loss = 0;
565
+
566
+ vector<ffm_int> outer_order(prob.meta.num_blocks);
567
+ iota(outer_order.begin(), outer_order.end(), 0);
568
+ random_shuffle(outer_order.begin(), outer_order.end());
569
+ for(auto blk : outer_order) {
570
+ ffm_int l = prob.load_block(blk);
571
+
572
+ vector<ffm_int> inner_order(l);
573
+ iota(inner_order.begin(), inner_order.end(), 0);
574
+ random_shuffle(inner_order.begin(), inner_order.end());
575
+
576
+ #if defined USEOMP
577
+ #pragma omp parallel for schedule(static) reduction(+: loss)
578
+ #endif
579
+ for(ffm_int ii = 0; ii < l; ii++) {
580
+ ffm_int i = inner_order[ii];
581
+
582
+ ffm_float y = prob.Y[i];
583
+
584
+ ffm_node *begin = &prob.X[prob.P[i]];
585
+
586
+ ffm_node *end = &prob.X[prob.P[i+1]];
587
+
588
+ ffm_float r = param.normalization? prob.R[i] : 1;
589
+
590
+ ffm_double t = wTx(begin, end, r, model);
591
+
592
+ ffm_double expnyt = exp(-y*t);
593
+
594
+ loss += log1p(expnyt);
595
+
596
+ if(do_update) {
597
+
598
+ ffm_float kappa = -y*expnyt/(1+expnyt);
599
+
600
+ wTx(begin, end, r, model, kappa, param.eta, param.lambda, true);
601
+ }
602
+ }
603
+ }
604
+
605
+ return loss / prob.meta.l;
606
+ };
607
+
608
+ for(ffm_int iter = 1; iter <= param.nr_iters; iter++) {
609
+ timer.tic();
610
+ ffm_double tr_loss = one_epoch(tr, true);
611
+ timer.toc();
612
+
613
+ cout.width(4);
614
+ cout << iter;
615
+ cout.width(13);
616
+ cout << fixed << setprecision(5) << tr_loss;
617
+
618
+ if(!va.is_empty()) {
619
+ ffm_double va_loss = one_epoch(va, false);
620
+
621
+ cout.width(13);
622
+ cout << fixed << setprecision(5) << va_loss;
623
+
624
+ if(auto_stop) {
625
+ if(va_loss > best_va_loss) {
626
+ memcpy(model.W, prev_W.data(), w_size*sizeof(ffm_float));
627
+ cout << endl << "Auto-stop. Use model at " << iter-1 << "th iteration." << endl;
628
+ break;
629
+ } else {
630
+ memcpy(prev_W.data(), model.W, w_size*sizeof(ffm_float));
631
+ best_va_loss = va_loss;
632
+ }
633
+ }
634
+ }
635
+ cout.width(13);
636
+ cout << fixed << setprecision(1) << timer.get() << endl;
637
+ }
638
+
639
+ return model;
640
+ }
641
+
642
+ void ffm_save_model(ffm_model &model, string path) {
643
+ ofstream f_out(path, ios::out | ios::binary);
644
+ f_out.write(reinterpret_cast<char*>(&model.n), sizeof(ffm_int));
645
+ f_out.write(reinterpret_cast<char*>(&model.m), sizeof(ffm_int));
646
+ f_out.write(reinterpret_cast<char*>(&model.k), sizeof(ffm_int));
647
+ f_out.write(reinterpret_cast<char*>(&model.normalization), sizeof(bool));
648
+
649
+ ffm_long w_size = get_w_size(model);
650
+ // f_out.write(reinterpret_cast<char*>(model.W), sizeof(ffm_float) * w_size);
651
+ // Need to write chunk by chunk because some compiler use int32 and will overflow when w_size * 4 > MAX_INT
652
+
653
+ for(ffm_long offset = 0; offset < w_size; ) {
654
+ ffm_long next_offset = min(w_size, offset + (ffm_long) sizeof(ffm_float) * kCHUNK_SIZE);
655
+ ffm_long size = next_offset - offset;
656
+ f_out.write(reinterpret_cast<char*>(model.W+offset), sizeof(ffm_float) * size);
657
+ offset = next_offset;
658
+ }
659
+ }
660
+
661
+ ffm_model ffm_load_model(string path) {
662
+ ifstream f_in(path, ios::in | ios::binary);
663
+
664
+ ffm_model model;
665
+ f_in.read(reinterpret_cast<char*>(&model.n), sizeof(ffm_int));
666
+ f_in.read(reinterpret_cast<char*>(&model.m), sizeof(ffm_int));
667
+ f_in.read(reinterpret_cast<char*>(&model.k), sizeof(ffm_int));
668
+ f_in.read(reinterpret_cast<char*>(&model.normalization), sizeof(bool));
669
+
670
+ ffm_long w_size = get_w_size(model);
671
+ model.W = malloc_aligned_float(w_size);
672
+ // f_in.read(reinterpret_cast<char*>(model.W), sizeof(ffm_float) * w_size);
673
+ // Need to write chunk by chunk because some compiler use int32 and will overflow when w_size * 4 > MAX_INT
674
+
675
+ for(ffm_long offset = 0; offset < w_size; ) {
676
+ ffm_long next_offset = min(w_size, offset + (ffm_long) sizeof(ffm_float) * kCHUNK_SIZE);
677
+ ffm_long size = next_offset - offset;
678
+ f_in.read(reinterpret_cast<char*>(model.W+offset), sizeof(ffm_float) * size);
679
+ offset = next_offset;
680
+ }
681
+
682
+ return model;
683
+ }
684
+
685
+ ffm_float ffm_predict(ffm_node *begin, ffm_node *end, ffm_model &model) {
686
+ ffm_float r = 1;
687
+ if(model.normalization) {
688
+ r = 0;
689
+ for(ffm_node *N = begin; N != end; N++)
690
+ r += N->v*N->v;
691
+ r = 1/r;
692
+ }
693
+
694
+ ffm_float t = wTx(begin, end, r, model);
695
+
696
+ return 1/(1+exp(-t));
697
+ }
698
+
699
+ } // namespace ffm