libffm 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,105 @@
1
+ #include <cstring>
2
+ #include <fstream>
3
+ #include <iostream>
4
+ #include <string>
5
+ #include <iomanip>
6
+ #include <memory>
7
+ #include <cmath>
8
+ #include <stdexcept>
9
+ #include <vector>
10
+ #include <cstdlib>
11
+
12
+ #include "ffm.h"
13
+
14
+ using namespace std;
15
+ using namespace ffm;
16
+
17
+ struct Option {
18
+ string test_path, model_path, output_path;
19
+ };
20
+
21
+ string predict_help() {
22
+ return string(
23
+ "usage: ffm-predict test_file model_file output_file\n");
24
+ }
25
+
26
+ Option parse_option(int argc, char **argv) {
27
+ vector<string> args;
28
+ for(int i = 0; i < argc; i++)
29
+ args.push_back(string(argv[i]));
30
+
31
+ if(argc == 1)
32
+ throw invalid_argument(predict_help());
33
+
34
+ Option option;
35
+
36
+ if(argc != 4)
37
+ throw invalid_argument("cannot parse argument");
38
+
39
+ option.test_path = string(args[1]);
40
+ option.model_path = string(args[2]);
41
+ option.output_path = string(args[3]);
42
+
43
+ return option;
44
+ }
45
+
46
+ void predict(string test_path, string model_path, string output_path) {
47
+ int const kMaxLineSize = 1000000;
48
+
49
+ FILE *f_in = fopen(test_path.c_str(), "r");
50
+ ofstream f_out(output_path);
51
+ char line[kMaxLineSize];
52
+
53
+ ffm_model model = ffm_load_model(model_path);
54
+
55
+ ffm_double loss = 0;
56
+ vector<ffm_node> x;
57
+ ffm_int i = 0;
58
+
59
+ for(; fgets(line, kMaxLineSize, f_in) != nullptr; i++) {
60
+ x.clear();
61
+ char *y_char = strtok(line, " \t");
62
+ ffm_float y = (atoi(y_char)>0)? 1.0f : -1.0f;
63
+
64
+ while(true) {
65
+ char *field_char = strtok(nullptr,":");
66
+ char *idx_char = strtok(nullptr,":");
67
+ char *value_char = strtok(nullptr," \t");
68
+ if(field_char == nullptr || *field_char == '\n')
69
+ break;
70
+
71
+ ffm_node N;
72
+ N.f = atoi(field_char);
73
+ N.j = atoi(idx_char);
74
+ N.v = atof(value_char);
75
+
76
+ x.push_back(N);
77
+ }
78
+
79
+ ffm_float y_bar = ffm_predict(x.data(), x.data()+x.size(), model);
80
+
81
+ loss -= y==1? log(y_bar) : log(1-y_bar);
82
+
83
+ f_out << y_bar << "\n";
84
+ }
85
+
86
+ loss /= i;
87
+
88
+ cout << "logloss = " << fixed << setprecision(5) << loss << endl;
89
+
90
+ fclose(f_in);
91
+ }
92
+
93
+ int main(int argc, char **argv) {
94
+ Option option;
95
+ try {
96
+ option = parse_option(argc, argv);
97
+ } catch(invalid_argument const &e) {
98
+ cout << e.what() << endl;
99
+ return 1;
100
+ }
101
+
102
+ predict(option.test_path, option.model_path, option.output_path);
103
+
104
+ return 0;
105
+ }
@@ -0,0 +1,173 @@
1
+ #pragma GCC diagnostic ignored "-Wunused-result"
2
+ #include <algorithm>
3
+ #include <cstring>
4
+ #include <iostream>
5
+ #include <stdexcept>
6
+ #include <string>
7
+ #include <vector>
8
+ #include <cstdlib>
9
+
10
+ #include "ffm.h"
11
+
12
+ #if defined USEOMP
13
+ #include <omp.h>
14
+ #endif
15
+
16
+ using namespace std;
17
+ using namespace ffm;
18
+
19
+ string train_help() {
20
+ return string(
21
+ "usage: ffm-train [options] training_set_file [model_file]\n"
22
+ "\n"
23
+ "options:\n"
24
+ "-l <lambda>: set regularization parameter (default 0.00002)\n"
25
+ "-k <factor>: set number of latent factors (default 4)\n"
26
+ "-t <iteration>: set number of iterations (default 15)\n"
27
+ "-r <eta>: set learning rate (default 0.2)\n"
28
+ "-s <nr_threads>: set number of threads (default 1)\n"
29
+ "-p <path>: set path to the validation set\n"
30
+ "--quiet: quiet mode (no output)\n"
31
+ "--no-norm: disable instance-wise normalization\n"
32
+ "--auto-stop: stop at the iteration that achieves the best validation loss (must be used with -p)\n");
33
+ }
34
+
35
+ struct Option {
36
+ string tr_path;
37
+ string va_path;
38
+ string model_path;
39
+ ffm_parameter param;
40
+ bool quiet = false;
41
+ ffm_int nr_threads = 1;
42
+ };
43
+
44
+ string basename(string path) {
45
+ const char *ptr = strrchr(&*path.begin(), '/');
46
+ if(!ptr)
47
+ ptr = path.c_str();
48
+ else
49
+ ptr++;
50
+ return string(ptr);
51
+ }
52
+
53
+ Option parse_option(int argc, char **argv) {
54
+ vector<string> args;
55
+ for(int i = 0; i < argc; i++)
56
+ args.push_back(string(argv[i]));
57
+
58
+ if(argc == 1)
59
+ throw invalid_argument(train_help());
60
+
61
+ Option opt;
62
+
63
+ ffm_int i = 1;
64
+ for(; i < argc; i++) {
65
+ if(args[i].compare("-t") == 0)
66
+ {
67
+ if(i == argc-1)
68
+ throw invalid_argument("need to specify number of iterations after -t");
69
+ i++;
70
+ opt.param.nr_iters = atoi(args[i].c_str());
71
+ if(opt.param.nr_iters <= 0)
72
+ throw invalid_argument("number of iterations should be greater than zero");
73
+ } else if(args[i].compare("-k") == 0) {
74
+ if(i == argc-1)
75
+ throw invalid_argument("need to specify number of factors after -k");
76
+ i++;
77
+ opt.param.k = atoi(args[i].c_str());
78
+ if(opt.param.k <= 0)
79
+ throw invalid_argument("number of factors should be greater than zero");
80
+ } else if(args[i].compare("-r") == 0) {
81
+ if(i == argc-1)
82
+ throw invalid_argument("need to specify eta after -r");
83
+ i++;
84
+ opt.param.eta = atof(args[i].c_str());
85
+ if(opt.param.eta <= 0)
86
+ throw invalid_argument("learning rate should be greater than zero");
87
+ } else if(args[i].compare("-l") == 0) {
88
+ if(i == argc-1)
89
+ throw invalid_argument("need to specify lambda after -l");
90
+ i++;
91
+ opt.param.lambda = atof(args[i].c_str());
92
+ if(opt.param.lambda < 0)
93
+ throw invalid_argument("regularization cost should not be smaller than zero");
94
+ } else if(args[i].compare("-s") == 0) {
95
+ if(i == argc-1)
96
+ throw invalid_argument("need to specify number of threads after -s");
97
+ i++;
98
+ opt.nr_threads = atoi(args[i].c_str());
99
+ if(opt.nr_threads <= 0)
100
+ throw invalid_argument("number of threads should be greater than zero");
101
+ } else if(args[i].compare("-p") == 0) {
102
+ if(i == argc-1)
103
+ throw invalid_argument("need to specify path after -p");
104
+ i++;
105
+ opt.va_path = args[i];
106
+ } else if(args[i].compare("--no-norm") == 0) {
107
+ opt.param.normalization = false;
108
+ } else if(args[i].compare("--quiet") == 0) {
109
+ opt.quiet = true;
110
+ } else if(args[i].compare("--auto-stop") == 0) {
111
+ opt.param.auto_stop = true;
112
+ } else {
113
+ break;
114
+ }
115
+ }
116
+
117
+ if(i != argc-2 && i != argc-1)
118
+ throw invalid_argument("cannot parse command\n");
119
+
120
+ opt.tr_path = args[i];
121
+ i++;
122
+
123
+ if(i < argc) {
124
+ opt.model_path = string(args[i]);
125
+ } else if(i == argc) {
126
+ opt.model_path = basename(opt.tr_path) + ".model";
127
+ } else {
128
+ throw invalid_argument("cannot parse argument");
129
+ }
130
+
131
+ return opt;
132
+ }
133
+
134
+ int train_on_disk(Option opt) {
135
+ string tr_bin_path = basename(opt.tr_path) + ".bin";
136
+ string va_bin_path = opt.va_path.empty()? "" : basename(opt.va_path) + ".bin";
137
+
138
+ ffm_read_problem_to_disk(opt.tr_path, tr_bin_path);
139
+ if(!opt.va_path.empty())
140
+ ffm_read_problem_to_disk(opt.va_path, va_bin_path);
141
+
142
+ ffm_model model = ffm_train_on_disk(tr_bin_path.c_str(), va_bin_path.c_str(), opt.param);
143
+
144
+ ffm_save_model(model, opt.model_path);
145
+
146
+ return 0;
147
+ }
148
+
149
+ int main(int argc, char **argv) {
150
+ Option opt;
151
+ try {
152
+ opt = parse_option(argc, argv);
153
+ } catch(invalid_argument &e) {
154
+ cout << e.what() << endl;
155
+ return 1;
156
+ }
157
+
158
+ if(opt.quiet)
159
+ cout.setstate(ios_base::badbit);
160
+
161
+ if(opt.param.auto_stop && opt.va_path.empty()) {
162
+ cout << "To use auto-stop, you need to assign a validation set" << endl;
163
+ return 1;
164
+ }
165
+
166
+ #if defined USEOMP
167
+ omp_set_num_threads(opt.nr_threads);
168
+ #endif
169
+
170
+ train_on_disk(opt);
171
+
172
+ return 0;
173
+ }
@@ -0,0 +1,699 @@
1
+ /*
2
+ The following table is the meaning of some variables in this code:
3
+
4
+ W: The pointer to the beginning of the model
5
+ w: Dynamic pointer to access values in the model
6
+ m: Number of fields
7
+ k: Number of latent factors
8
+ n: Number of features
9
+ l: Number of data points
10
+ f: Field index (0 to m-1)
11
+ d: Latent factor index (0 to k-1)
12
+ j: Feature index (0 to n-1)
13
+ i: Data point index (0 to l-1)
14
+ nnz: Number of non-zero elements
15
+ X, P: Used to store the problem in a compressed sparse row (CSR) format. len(X) = nnz, len(P) = l + 1
16
+ Y: The label. len(Y) = l
17
+ R: Precomputed scaling factor to make the 2-norm of each instance to be 1. len(R) = l
18
+ v: Value of each element in the problem
19
+ */
20
+
21
+ #pragma GCC diagnostic ignored "-Wunused-result"
22
+ #include <algorithm>
23
+ #include <cmath>
24
+ #include <iostream>
25
+ #include <iomanip>
26
+ #include <fstream>
27
+ #include <new>
28
+ #include <memory>
29
+ #include <random>
30
+ #include <stdexcept>
31
+ #include <string>
32
+ #include <cstring>
33
+ #include <vector>
34
+ #include <cassert>
35
+ #include <numeric>
36
+
37
+ #if defined USESSE
38
+ #include <pmmintrin.h>
39
+ #endif
40
+
41
+ #if defined USEOMP
42
+ #include <omp.h>
43
+ #endif
44
+
45
+ #include "ffm.h"
46
+ #include "timer.h"
47
+
48
+ namespace ffm {
49
+
50
+ namespace {
51
+
52
+ using namespace std;
53
+
54
+ #if defined USESSE
55
+ ffm_int const kALIGNByte = 16;
56
+ #else
57
+ ffm_int const kALIGNByte = 4;
58
+ #endif
59
+
60
+ ffm_int const kALIGN = kALIGNByte/sizeof(ffm_float);
61
+ ffm_int const kCHUNK_SIZE = 10000000;
62
+ ffm_int const kMaxLineSize = 100000;
63
+
64
+ inline ffm_int get_k_aligned(ffm_int k) {
65
+ return (ffm_int) ceil((ffm_float)k / kALIGN) * kALIGN;
66
+ }
67
+
68
+ ffm_long get_w_size(ffm_model &model) {
69
+ ffm_int k_aligned = get_k_aligned(model.k);
70
+ return (ffm_long) model.n * model.m * k_aligned * 2;
71
+ }
72
+
73
+ #if defined USESSE
74
+ inline ffm_float wTx(
75
+ ffm_node *begin,
76
+ ffm_node *end,
77
+ ffm_float r,
78
+ ffm_model &model,
79
+ ffm_float kappa=0,
80
+ ffm_float eta=0,
81
+ ffm_float lambda=0,
82
+ bool do_update=false) {
83
+
84
+ ffm_int align0 = 2 * get_k_aligned(model.k);
85
+ ffm_int align1 = model.m * align0;
86
+
87
+ __m128 XMMkappa = _mm_set1_ps(kappa);
88
+ __m128 XMMeta = _mm_set1_ps(eta);
89
+ __m128 XMMlambda = _mm_set1_ps(lambda);
90
+
91
+ __m128 XMMt = _mm_setzero_ps();
92
+
93
+ for(ffm_node *N1 = begin; N1 != end; N1++)
94
+ {
95
+ ffm_int j1 = N1->j;
96
+ ffm_int f1 = N1->f;
97
+ ffm_float v1 = N1->v;
98
+ if(j1 >= model.n || f1 >= model.m)
99
+ continue;
100
+
101
+ for(ffm_node *N2 = N1+1; N2 != end; N2++)
102
+ {
103
+ ffm_int j2 = N2->j;
104
+ ffm_int f2 = N2->f;
105
+ ffm_float v2 = N2->v;
106
+ if(j2 >= model.n || f2 >= model.m)
107
+ continue;
108
+
109
+ ffm_float *w1_base = model.W + (ffm_long)j1*align1 + f2*align0;
110
+ ffm_float *w2_base = model.W + (ffm_long)j2*align1 + f1*align0;
111
+
112
+ __m128 XMMv = _mm_set1_ps(v1*v2*r);
113
+
114
+ if(do_update)
115
+ {
116
+ __m128 XMMkappav = _mm_mul_ps(XMMkappa, XMMv);
117
+
118
+ for(ffm_int d = 0; d < align0; d += kALIGN * 2)
119
+ {
120
+ ffm_float *w1 = w1_base + d;
121
+ ffm_float *w2 = w2_base + d;
122
+
123
+ ffm_float *wg1 = w1 + kALIGN;
124
+ ffm_float *wg2 = w2 + kALIGN;
125
+
126
+ __m128 XMMw1 = _mm_load_ps(w1);
127
+ __m128 XMMw2 = _mm_load_ps(w2);
128
+
129
+ __m128 XMMwg1 = _mm_load_ps(wg1);
130
+ __m128 XMMwg2 = _mm_load_ps(wg2);
131
+
132
+ __m128 XMMg1 = _mm_add_ps(
133
+ _mm_mul_ps(XMMlambda, XMMw1),
134
+ _mm_mul_ps(XMMkappav, XMMw2));
135
+ __m128 XMMg2 = _mm_add_ps(
136
+ _mm_mul_ps(XMMlambda, XMMw2),
137
+ _mm_mul_ps(XMMkappav, XMMw1));
138
+
139
+ XMMwg1 = _mm_add_ps(XMMwg1, _mm_mul_ps(XMMg1, XMMg1));
140
+ XMMwg2 = _mm_add_ps(XMMwg2, _mm_mul_ps(XMMg2, XMMg2));
141
+
142
+ XMMw1 = _mm_sub_ps(XMMw1, _mm_mul_ps(XMMeta,
143
+ _mm_mul_ps(_mm_rsqrt_ps(XMMwg1), XMMg1)));
144
+ XMMw2 = _mm_sub_ps(XMMw2, _mm_mul_ps(XMMeta,
145
+ _mm_mul_ps(_mm_rsqrt_ps(XMMwg2), XMMg2)));
146
+
147
+ _mm_store_ps(w1, XMMw1);
148
+ _mm_store_ps(w2, XMMw2);
149
+
150
+ _mm_store_ps(wg1, XMMwg1);
151
+ _mm_store_ps(wg2, XMMwg2);
152
+ }
153
+ }
154
+ else
155
+ {
156
+ for(ffm_int d = 0; d < align0; d += kALIGN * 2)
157
+ {
158
+ __m128 XMMw1 = _mm_load_ps(w1_base+d);
159
+ __m128 XMMw2 = _mm_load_ps(w2_base+d);
160
+
161
+ XMMt = _mm_add_ps(XMMt,
162
+ _mm_mul_ps(_mm_mul_ps(XMMw1, XMMw2), XMMv));
163
+ }
164
+ }
165
+ }
166
+ }
167
+
168
+ if(do_update)
169
+ return 0;
170
+
171
+ XMMt = _mm_hadd_ps(XMMt, XMMt);
172
+ XMMt = _mm_hadd_ps(XMMt, XMMt);
173
+ ffm_float t;
174
+ _mm_store_ss(&t, XMMt);
175
+
176
+ return t;
177
+ }
178
+
179
+ #else
180
+
181
+ inline ffm_float wTx(
182
+ ffm_node *begin,
183
+ ffm_node *end,
184
+ ffm_float r,
185
+ ffm_model &model,
186
+ ffm_float kappa=0,
187
+ ffm_float eta=0,
188
+ ffm_float lambda=0,
189
+ bool do_update=false) {
190
+
191
+ ffm_int align0 = 2 * get_k_aligned(model.k);
192
+ ffm_int align1 = model.m * align0;
193
+
194
+ ffm_float t = 0;
195
+ for(ffm_node *N1 = begin; N1 != end; N1++) {
196
+ ffm_int j1 = N1->j;
197
+ ffm_int f1 = N1->f;
198
+ ffm_float v1 = N1->v;
199
+ if(j1 >= model.n || f1 >= model.m)
200
+ continue;
201
+
202
+ for(ffm_node *N2 = N1+1; N2 != end; N2++) {
203
+ ffm_int j2 = N2->j;
204
+ ffm_int f2 = N2->f;
205
+ ffm_float v2 = N2->v;
206
+ if(j2 >= model.n || f2 >= model.m)
207
+ continue;
208
+
209
+ ffm_float *w1 = model.W + (ffm_long)j1*align1 + f2*align0;
210
+ ffm_float *w2 = model.W + (ffm_long)j2*align1 + f1*align0;
211
+
212
+ ffm_float v = v1 * v2 * r;
213
+
214
+ if(do_update) {
215
+ ffm_float *wg1 = w1 + kALIGN;
216
+ ffm_float *wg2 = w2 + kALIGN;
217
+ for(ffm_int d = 0; d < align0; d += kALIGN * 2)
218
+ {
219
+ ffm_float g1 = lambda * w1[d] + kappa * w2[d] * v;
220
+ ffm_float g2 = lambda * w2[d] + kappa * w1[d] * v;
221
+
222
+ wg1[d] += g1 * g1;
223
+ wg2[d] += g2 * g2;
224
+
225
+ w1[d] -= eta / sqrt(wg1[d]) * g1;
226
+ w2[d] -= eta / sqrt(wg2[d]) * g2;
227
+ }
228
+ } else {
229
+ for(ffm_int d = 0; d < align0; d += kALIGN * 2)
230
+ t += w1[d] * w2[d] * v;
231
+ }
232
+ }
233
+ }
234
+
235
+ return t;
236
+ }
237
+ #endif
238
+
239
+ ffm_float* malloc_aligned_float(ffm_long size)
240
+ {
241
+ void *ptr;
242
+
243
+ #ifndef USESSE
244
+
245
+ ptr = malloc(size * sizeof(ffm_float));
246
+
247
+ #else
248
+
249
+ #ifdef _WIN32
250
+ ptr = _aligned_malloc(size*sizeof(ffm_float), kALIGNByte);
251
+ if(ptr == nullptr)
252
+ throw bad_alloc();
253
+ #else
254
+ int status = posix_memalign(&ptr, kALIGNByte, size*sizeof(ffm_float));
255
+ if(status != 0)
256
+ throw bad_alloc();
257
+ #endif
258
+
259
+ #endif
260
+
261
+ return (ffm_float*)ptr;
262
+ }
263
+
264
+ ffm_model init_model(ffm_int n, ffm_int m, ffm_parameter param)
265
+ {
266
+ ffm_model model;
267
+ model.n = n;
268
+ model.k = param.k;
269
+ model.m = m;
270
+ model.W = nullptr;
271
+ model.normalization = param.normalization;
272
+
273
+ ffm_int k_aligned = get_k_aligned(model.k);
274
+
275
+ model.W = malloc_aligned_float((ffm_long)n*m*k_aligned*2);
276
+
277
+ ffm_float coef = 1.0f / sqrt(model.k);
278
+ ffm_float *w = model.W;
279
+
280
+ default_random_engine generator;
281
+ uniform_real_distribution<ffm_float> distribution(0.0, 1.0);
282
+
283
+ for(ffm_int j = 0; j < model.n; j++) {
284
+ for(ffm_int f = 0; f < model.m; f++) {
285
+ for(ffm_int d = 0; d < k_aligned;) {
286
+ for(ffm_int s = 0; s < kALIGN; s++, w++, d++) {
287
+ w[0] = (d < model.k)? coef * distribution(generator) : 0.0;
288
+ w[kALIGN] = 1;
289
+ }
290
+ w += kALIGN;
291
+ }
292
+ }
293
+ }
294
+
295
+ return model;
296
+ }
297
+
298
+ struct disk_problem_meta {
299
+ ffm_int n = 0;
300
+ ffm_int m = 0;
301
+ ffm_int l = 0;
302
+ ffm_int num_blocks = 0;
303
+ ffm_long B_pos = 0;
304
+ uint64_t hash1;
305
+ uint64_t hash2;
306
+ };
307
+
308
+ struct problem_on_disk {
309
+ disk_problem_meta meta;
310
+ vector<ffm_float> Y;
311
+ vector<ffm_float> R;
312
+ vector<ffm_long> P;
313
+ vector<ffm_node> X;
314
+ vector<ffm_long> B;
315
+
316
+ problem_on_disk(string path) {
317
+ f.open(path, ios::in | ios::binary);
318
+ if(f.good()) {
319
+ f.read(reinterpret_cast<char*>(&meta), sizeof(disk_problem_meta));
320
+ f.seekg(meta.B_pos);
321
+ B.resize(meta.num_blocks);
322
+ f.read(reinterpret_cast<char*>(B.data()), sizeof(ffm_long) * meta.num_blocks);
323
+ }
324
+ }
325
+
326
+ int load_block(int block_index) {
327
+ if(block_index >= meta.num_blocks)
328
+ assert(false);
329
+
330
+ f.seekg(B[block_index]);
331
+
332
+ ffm_int l;
333
+ f.read(reinterpret_cast<char*>(&l), sizeof(ffm_int));
334
+
335
+ Y.resize(l);
336
+ f.read(reinterpret_cast<char*>(Y.data()), sizeof(ffm_float) * l);
337
+
338
+ R.resize(l);
339
+ f.read(reinterpret_cast<char*>(R.data()), sizeof(ffm_float) * l);
340
+
341
+ P.resize(l+1);
342
+ f.read(reinterpret_cast<char*>(P.data()), sizeof(ffm_long) * (l+1));
343
+
344
+ X.resize(P[l]);
345
+ f.read(reinterpret_cast<char*>(X.data()), sizeof(ffm_node) * P[l]);
346
+
347
+ return l;
348
+ }
349
+
350
+ bool is_empty() {
351
+ return meta.l == 0;
352
+ }
353
+
354
+ private:
355
+ ifstream f;
356
+ };
357
+
358
+ uint64_t hashfile(string txt_path, bool one_block=false)
359
+ {
360
+ ifstream f(txt_path, ios::ate | ios::binary);
361
+ if(f.bad())
362
+ return 0;
363
+
364
+ ffm_long end = (ffm_long) f.tellg();
365
+ f.seekg(0, ios::beg);
366
+ assert(static_cast<int>(f.tellg()) == 0);
367
+
368
+ uint64_t magic = 90359;
369
+ for(ffm_long pos = 0; pos < end; ) {
370
+ ffm_long next_pos = min(pos + kCHUNK_SIZE, end);
371
+ ffm_long size = next_pos - pos;
372
+ vector<char> buffer(kCHUNK_SIZE);
373
+ f.read(buffer.data(), size);
374
+
375
+ ffm_int i = 0;
376
+ while(i < size - 8) {
377
+ uint64_t x = *reinterpret_cast<uint64_t*>(buffer.data() + i);
378
+ magic = ( (magic + x) * (magic + x + 1) >> 1) + x;
379
+ i += 8;
380
+ }
381
+ for(; i < size; i++) {
382
+ char x = buffer[i];
383
+ magic = ( (magic + x) * (magic + x + 1) >> 1) + x;
384
+ }
385
+
386
+ pos = next_pos;
387
+ if(one_block)
388
+ break;
389
+ }
390
+
391
+ return magic;
392
+ }
393
+
394
+ void txt2bin(string txt_path, string bin_path) {
395
+
396
+ FILE *f_txt = fopen(txt_path.c_str(), "r");
397
+ if(f_txt == nullptr)
398
+ throw;
399
+
400
+ ofstream f_bin(bin_path, ios::out | ios::binary);
401
+
402
+ vector<char> line(kMaxLineSize);
403
+
404
+ ffm_long p = 0;
405
+ disk_problem_meta meta;
406
+
407
+ vector<ffm_float> Y;
408
+ vector<ffm_float> R;
409
+ vector<ffm_long> P(1, 0);
410
+ vector<ffm_node> X;
411
+ vector<ffm_long> B;
412
+
413
+ auto write_chunk = [&] () {
414
+ B.push_back(f_bin.tellp());
415
+ ffm_int l = Y.size();
416
+ ffm_long nnz = P[l];
417
+ meta.l += l;
418
+
419
+ f_bin.write(reinterpret_cast<char*>(&l), sizeof(ffm_int));
420
+ f_bin.write(reinterpret_cast<char*>(Y.data()), sizeof(ffm_float) * l);
421
+ f_bin.write(reinterpret_cast<char*>(R.data()), sizeof(ffm_float) * l);
422
+ f_bin.write(reinterpret_cast<char*>(P.data()), sizeof(ffm_long) * (l+1));
423
+ f_bin.write(reinterpret_cast<char*>(X.data()), sizeof(ffm_node) * nnz);
424
+
425
+ Y.clear();
426
+ R.clear();
427
+ P.assign(1, 0);
428
+ X.clear();
429
+ p = 0;
430
+ meta.num_blocks++;
431
+ };
432
+
433
+ f_bin.write(reinterpret_cast<char*>(&meta), sizeof(disk_problem_meta));
434
+
435
+ while(fgets(line.data(), kMaxLineSize, f_txt)) {
436
+ char *y_char = strtok(line.data(), " \t");
437
+
438
+ ffm_float y = (atoi(y_char)>0)? 1.0f : -1.0f;
439
+
440
+ ffm_float scale = 0;
441
+ for(; ; p++) {
442
+ char *field_char = strtok(nullptr,":");
443
+ char *idx_char = strtok(nullptr,":");
444
+ char *value_char = strtok(nullptr," \t");
445
+ if(field_char == nullptr || *field_char == '\n')
446
+ break;
447
+
448
+ ffm_node N;
449
+ N.f = atoi(field_char);
450
+ N.j = atoi(idx_char);
451
+ N.v = atof(value_char);
452
+
453
+ X.push_back(N);
454
+
455
+ meta.m = max(meta.m, N.f+1);
456
+ meta.n = max(meta.n, N.j+1);
457
+
458
+ scale += N.v*N.v;
459
+ }
460
+ scale = 1.0 / scale;
461
+
462
+ Y.push_back(y);
463
+ R.push_back(scale);
464
+ P.push_back(p);
465
+
466
+ if(X.size() > (size_t)kCHUNK_SIZE)
467
+ write_chunk();
468
+ }
469
+ write_chunk();
470
+ write_chunk(); // write a dummy empty chunk in order to know where the EOF is
471
+ assert(meta.num_blocks == (ffm_int)B.size());
472
+ meta.B_pos = f_bin.tellp();
473
+ f_bin.write(reinterpret_cast<char*>(B.data()), sizeof(ffm_long) * B.size());
474
+
475
+ fclose(f_txt);
476
+ meta.hash1 = hashfile(txt_path, true);
477
+ meta.hash2 = hashfile(txt_path, false);
478
+
479
+ f_bin.seekp(0, ios::beg);
480
+ f_bin.write(reinterpret_cast<char*>(&meta), sizeof(disk_problem_meta));
481
+ }
482
+
483
+ bool check_same_txt_bin(string txt_path, string bin_path) {
484
+ ifstream f_bin(bin_path, ios::binary | ios::ate);
485
+ if(f_bin.tellg() < (ffm_long)sizeof(disk_problem_meta))
486
+ return false;
487
+ disk_problem_meta meta;
488
+ f_bin.seekg(0, ios::beg);
489
+ f_bin.read(reinterpret_cast<char*>(&meta), sizeof(disk_problem_meta));
490
+ if(meta.hash1 != hashfile(txt_path, true))
491
+ return false;
492
+ if(meta.hash2 != hashfile(txt_path, false))
493
+ return false;
494
+
495
+ return true;
496
+ }
497
+
498
+ } // unnamed namespace
499
+
500
+ void ffm_model::release() {
501
+ if(W != nullptr) {
502
+ #ifndef USESSE
503
+ free(W);
504
+ #else
505
+ #ifdef _WIN32
506
+ _aligned_free(W);
507
+ #else
508
+ free(W);
509
+ #endif
510
+ #endif
511
+ W = nullptr;
512
+ }
513
+ }
514
+
515
+ void ffm_read_problem_to_disk(string txt_path, string bin_path) {
516
+
517
+ Timer timer;
518
+
519
+ cout << "First check if the text file has already been converted to binary format " << flush;
520
+ bool same_file = check_same_txt_bin(txt_path, bin_path);
521
+ cout << "(" << fixed << setprecision(1) << timer.toc() << " seconds)" << endl;
522
+
523
+ if(same_file) {
524
+ cout << "Binary file found. Skip converting text to binary" << endl;
525
+ } else {
526
+ cout << "Binary file NOT found. Convert text file to binary file " << flush;
527
+ txt2bin(txt_path, bin_path);
528
+ cout << "(" << fixed << setprecision(1) << timer.toc() << " seconds)" << endl;
529
+ }
530
+ }
531
+
532
+ ffm_model ffm_train_on_disk(string tr_path, string va_path, ffm_parameter param) {
533
+
534
+ problem_on_disk tr(tr_path);
535
+ problem_on_disk va(va_path);
536
+
537
+ ffm_model model = init_model(tr.meta.n, tr.meta.m, param);
538
+
539
+ bool auto_stop = param.auto_stop && !va_path.empty();
540
+
541
+ ffm_long w_size = get_w_size(model);
542
+ vector<ffm_float> prev_W(w_size, 0);
543
+ if(auto_stop)
544
+ prev_W.assign(w_size, 0);
545
+ ffm_double best_va_loss = numeric_limits<ffm_double>::max();
546
+
547
+ cout.width(4);
548
+ cout << "iter";
549
+ cout.width(13);
550
+ cout << "tr_logloss";
551
+ if(!va_path.empty())
552
+ {
553
+ cout.width(13);
554
+ cout << "va_logloss";
555
+ }
556
+ cout.width(13);
557
+ cout << "tr_time";
558
+ cout << endl;
559
+
560
+ Timer timer;
561
+
562
+ auto one_epoch = [&] (problem_on_disk &prob, bool do_update) {
563
+
564
+ ffm_double loss = 0;
565
+
566
+ vector<ffm_int> outer_order(prob.meta.num_blocks);
567
+ iota(outer_order.begin(), outer_order.end(), 0);
568
+ random_shuffle(outer_order.begin(), outer_order.end());
569
+ for(auto blk : outer_order) {
570
+ ffm_int l = prob.load_block(blk);
571
+
572
+ vector<ffm_int> inner_order(l);
573
+ iota(inner_order.begin(), inner_order.end(), 0);
574
+ random_shuffle(inner_order.begin(), inner_order.end());
575
+
576
+ #if defined USEOMP
577
+ #pragma omp parallel for schedule(static) reduction(+: loss)
578
+ #endif
579
+ for(ffm_int ii = 0; ii < l; ii++) {
580
+ ffm_int i = inner_order[ii];
581
+
582
+ ffm_float y = prob.Y[i];
583
+
584
+ ffm_node *begin = &prob.X[prob.P[i]];
585
+
586
+ ffm_node *end = &prob.X[prob.P[i+1]];
587
+
588
+ ffm_float r = param.normalization? prob.R[i] : 1;
589
+
590
+ ffm_double t = wTx(begin, end, r, model);
591
+
592
+ ffm_double expnyt = exp(-y*t);
593
+
594
+ loss += log1p(expnyt);
595
+
596
+ if(do_update) {
597
+
598
+ ffm_float kappa = -y*expnyt/(1+expnyt);
599
+
600
+ wTx(begin, end, r, model, kappa, param.eta, param.lambda, true);
601
+ }
602
+ }
603
+ }
604
+
605
+ return loss / prob.meta.l;
606
+ };
607
+
608
+ for(ffm_int iter = 1; iter <= param.nr_iters; iter++) {
609
+ timer.tic();
610
+ ffm_double tr_loss = one_epoch(tr, true);
611
+ timer.toc();
612
+
613
+ cout.width(4);
614
+ cout << iter;
615
+ cout.width(13);
616
+ cout << fixed << setprecision(5) << tr_loss;
617
+
618
+ if(!va.is_empty()) {
619
+ ffm_double va_loss = one_epoch(va, false);
620
+
621
+ cout.width(13);
622
+ cout << fixed << setprecision(5) << va_loss;
623
+
624
+ if(auto_stop) {
625
+ if(va_loss > best_va_loss) {
626
+ memcpy(model.W, prev_W.data(), w_size*sizeof(ffm_float));
627
+ cout << endl << "Auto-stop. Use model at " << iter-1 << "th iteration." << endl;
628
+ break;
629
+ } else {
630
+ memcpy(prev_W.data(), model.W, w_size*sizeof(ffm_float));
631
+ best_va_loss = va_loss;
632
+ }
633
+ }
634
+ }
635
+ cout.width(13);
636
+ cout << fixed << setprecision(1) << timer.get() << endl;
637
+ }
638
+
639
+ return model;
640
+ }
641
+
642
+ void ffm_save_model(ffm_model &model, string path) {
643
+ ofstream f_out(path, ios::out | ios::binary);
644
+ f_out.write(reinterpret_cast<char*>(&model.n), sizeof(ffm_int));
645
+ f_out.write(reinterpret_cast<char*>(&model.m), sizeof(ffm_int));
646
+ f_out.write(reinterpret_cast<char*>(&model.k), sizeof(ffm_int));
647
+ f_out.write(reinterpret_cast<char*>(&model.normalization), sizeof(bool));
648
+
649
+ ffm_long w_size = get_w_size(model);
650
+ // f_out.write(reinterpret_cast<char*>(model.W), sizeof(ffm_float) * w_size);
651
+ // Need to write chunk by chunk because some compiler use int32 and will overflow when w_size * 4 > MAX_INT
652
+
653
+ for(ffm_long offset = 0; offset < w_size; ) {
654
+ ffm_long next_offset = min(w_size, offset + (ffm_long) sizeof(ffm_float) * kCHUNK_SIZE);
655
+ ffm_long size = next_offset - offset;
656
+ f_out.write(reinterpret_cast<char*>(model.W+offset), sizeof(ffm_float) * size);
657
+ offset = next_offset;
658
+ }
659
+ }
660
+
661
+ ffm_model ffm_load_model(string path) {
662
+ ifstream f_in(path, ios::in | ios::binary);
663
+
664
+ ffm_model model;
665
+ f_in.read(reinterpret_cast<char*>(&model.n), sizeof(ffm_int));
666
+ f_in.read(reinterpret_cast<char*>(&model.m), sizeof(ffm_int));
667
+ f_in.read(reinterpret_cast<char*>(&model.k), sizeof(ffm_int));
668
+ f_in.read(reinterpret_cast<char*>(&model.normalization), sizeof(bool));
669
+
670
+ ffm_long w_size = get_w_size(model);
671
+ model.W = malloc_aligned_float(w_size);
672
+ // f_in.read(reinterpret_cast<char*>(model.W), sizeof(ffm_float) * w_size);
673
+ // Need to write chunk by chunk because some compiler use int32 and will overflow when w_size * 4 > MAX_INT
674
+
675
+ for(ffm_long offset = 0; offset < w_size; ) {
676
+ ffm_long next_offset = min(w_size, offset + (ffm_long) sizeof(ffm_float) * kCHUNK_SIZE);
677
+ ffm_long size = next_offset - offset;
678
+ f_in.read(reinterpret_cast<char*>(model.W+offset), sizeof(ffm_float) * size);
679
+ offset = next_offset;
680
+ }
681
+
682
+ return model;
683
+ }
684
+
685
+ ffm_float ffm_predict(ffm_node *begin, ffm_node *end, ffm_model &model) {
686
+ ffm_float r = 1;
687
+ if(model.normalization) {
688
+ r = 0;
689
+ for(ffm_node *N = begin; N != end; N++)
690
+ r += N->v*N->v;
691
+ r = 1/r;
692
+ }
693
+
694
+ ffm_float t = wTx(begin, end, r, model);
695
+
696
+ return 1/(1+exp(-t));
697
+ }
698
+
699
+ } // namespace ffm