libffm 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +3 -0
- data/LICENSE.txt +32 -0
- data/README.md +91 -0
- data/ext/libffm/ext.cpp +110 -0
- data/ext/libffm/extconf.rb +21 -0
- data/lib/libffm.rb +13 -0
- data/lib/libffm/model.rb +45 -0
- data/lib/libffm/version.rb +3 -0
- data/vendor/libffm/COPYRIGHT +31 -0
- data/vendor/libffm/Makefile +26 -0
- data/vendor/libffm/Makefile.win +26 -0
- data/vendor/libffm/README +294 -0
- data/vendor/libffm/ffm-predict.cpp +105 -0
- data/vendor/libffm/ffm-train.cpp +173 -0
- data/vendor/libffm/ffm.cpp +699 -0
- data/vendor/libffm/ffm.h +51 -0
- data/vendor/libffm/timer.cpp +31 -0
- data/vendor/libffm/timer.h +14 -0
- metadata +74 -0
@@ -0,0 +1,105 @@
|
|
1
|
+
#include <cstring>
|
2
|
+
#include <fstream>
|
3
|
+
#include <iostream>
|
4
|
+
#include <string>
|
5
|
+
#include <iomanip>
|
6
|
+
#include <memory>
|
7
|
+
#include <cmath>
|
8
|
+
#include <stdexcept>
|
9
|
+
#include <vector>
|
10
|
+
#include <cstdlib>
|
11
|
+
|
12
|
+
#include "ffm.h"
|
13
|
+
|
14
|
+
using namespace std;
|
15
|
+
using namespace ffm;
|
16
|
+
|
17
|
+
struct Option {
|
18
|
+
string test_path, model_path, output_path;
|
19
|
+
};
|
20
|
+
|
21
|
+
string predict_help() {
|
22
|
+
return string(
|
23
|
+
"usage: ffm-predict test_file model_file output_file\n");
|
24
|
+
}
|
25
|
+
|
26
|
+
Option parse_option(int argc, char **argv) {
|
27
|
+
vector<string> args;
|
28
|
+
for(int i = 0; i < argc; i++)
|
29
|
+
args.push_back(string(argv[i]));
|
30
|
+
|
31
|
+
if(argc == 1)
|
32
|
+
throw invalid_argument(predict_help());
|
33
|
+
|
34
|
+
Option option;
|
35
|
+
|
36
|
+
if(argc != 4)
|
37
|
+
throw invalid_argument("cannot parse argument");
|
38
|
+
|
39
|
+
option.test_path = string(args[1]);
|
40
|
+
option.model_path = string(args[2]);
|
41
|
+
option.output_path = string(args[3]);
|
42
|
+
|
43
|
+
return option;
|
44
|
+
}
|
45
|
+
|
46
|
+
void predict(string test_path, string model_path, string output_path) {
|
47
|
+
int const kMaxLineSize = 1000000;
|
48
|
+
|
49
|
+
FILE *f_in = fopen(test_path.c_str(), "r");
|
50
|
+
ofstream f_out(output_path);
|
51
|
+
char line[kMaxLineSize];
|
52
|
+
|
53
|
+
ffm_model model = ffm_load_model(model_path);
|
54
|
+
|
55
|
+
ffm_double loss = 0;
|
56
|
+
vector<ffm_node> x;
|
57
|
+
ffm_int i = 0;
|
58
|
+
|
59
|
+
for(; fgets(line, kMaxLineSize, f_in) != nullptr; i++) {
|
60
|
+
x.clear();
|
61
|
+
char *y_char = strtok(line, " \t");
|
62
|
+
ffm_float y = (atoi(y_char)>0)? 1.0f : -1.0f;
|
63
|
+
|
64
|
+
while(true) {
|
65
|
+
char *field_char = strtok(nullptr,":");
|
66
|
+
char *idx_char = strtok(nullptr,":");
|
67
|
+
char *value_char = strtok(nullptr," \t");
|
68
|
+
if(field_char == nullptr || *field_char == '\n')
|
69
|
+
break;
|
70
|
+
|
71
|
+
ffm_node N;
|
72
|
+
N.f = atoi(field_char);
|
73
|
+
N.j = atoi(idx_char);
|
74
|
+
N.v = atof(value_char);
|
75
|
+
|
76
|
+
x.push_back(N);
|
77
|
+
}
|
78
|
+
|
79
|
+
ffm_float y_bar = ffm_predict(x.data(), x.data()+x.size(), model);
|
80
|
+
|
81
|
+
loss -= y==1? log(y_bar) : log(1-y_bar);
|
82
|
+
|
83
|
+
f_out << y_bar << "\n";
|
84
|
+
}
|
85
|
+
|
86
|
+
loss /= i;
|
87
|
+
|
88
|
+
cout << "logloss = " << fixed << setprecision(5) << loss << endl;
|
89
|
+
|
90
|
+
fclose(f_in);
|
91
|
+
}
|
92
|
+
|
93
|
+
int main(int argc, char **argv) {
|
94
|
+
Option option;
|
95
|
+
try {
|
96
|
+
option = parse_option(argc, argv);
|
97
|
+
} catch(invalid_argument const &e) {
|
98
|
+
cout << e.what() << endl;
|
99
|
+
return 1;
|
100
|
+
}
|
101
|
+
|
102
|
+
predict(option.test_path, option.model_path, option.output_path);
|
103
|
+
|
104
|
+
return 0;
|
105
|
+
}
|
@@ -0,0 +1,173 @@
|
|
1
|
+
#pragma GCC diagnostic ignored "-Wunused-result"
|
2
|
+
#include <algorithm>
|
3
|
+
#include <cstring>
|
4
|
+
#include <iostream>
|
5
|
+
#include <stdexcept>
|
6
|
+
#include <string>
|
7
|
+
#include <vector>
|
8
|
+
#include <cstdlib>
|
9
|
+
|
10
|
+
#include "ffm.h"
|
11
|
+
|
12
|
+
#if defined USEOMP
|
13
|
+
#include <omp.h>
|
14
|
+
#endif
|
15
|
+
|
16
|
+
using namespace std;
|
17
|
+
using namespace ffm;
|
18
|
+
|
19
|
+
string train_help() {
|
20
|
+
return string(
|
21
|
+
"usage: ffm-train [options] training_set_file [model_file]\n"
|
22
|
+
"\n"
|
23
|
+
"options:\n"
|
24
|
+
"-l <lambda>: set regularization parameter (default 0.00002)\n"
|
25
|
+
"-k <factor>: set number of latent factors (default 4)\n"
|
26
|
+
"-t <iteration>: set number of iterations (default 15)\n"
|
27
|
+
"-r <eta>: set learning rate (default 0.2)\n"
|
28
|
+
"-s <nr_threads>: set number of threads (default 1)\n"
|
29
|
+
"-p <path>: set path to the validation set\n"
|
30
|
+
"--quiet: quiet mode (no output)\n"
|
31
|
+
"--no-norm: disable instance-wise normalization\n"
|
32
|
+
"--auto-stop: stop at the iteration that achieves the best validation loss (must be used with -p)\n");
|
33
|
+
}
|
34
|
+
|
35
|
+
struct Option {
|
36
|
+
string tr_path;
|
37
|
+
string va_path;
|
38
|
+
string model_path;
|
39
|
+
ffm_parameter param;
|
40
|
+
bool quiet = false;
|
41
|
+
ffm_int nr_threads = 1;
|
42
|
+
};
|
43
|
+
|
44
|
+
string basename(string path) {
|
45
|
+
const char *ptr = strrchr(&*path.begin(), '/');
|
46
|
+
if(!ptr)
|
47
|
+
ptr = path.c_str();
|
48
|
+
else
|
49
|
+
ptr++;
|
50
|
+
return string(ptr);
|
51
|
+
}
|
52
|
+
|
53
|
+
Option parse_option(int argc, char **argv) {
|
54
|
+
vector<string> args;
|
55
|
+
for(int i = 0; i < argc; i++)
|
56
|
+
args.push_back(string(argv[i]));
|
57
|
+
|
58
|
+
if(argc == 1)
|
59
|
+
throw invalid_argument(train_help());
|
60
|
+
|
61
|
+
Option opt;
|
62
|
+
|
63
|
+
ffm_int i = 1;
|
64
|
+
for(; i < argc; i++) {
|
65
|
+
if(args[i].compare("-t") == 0)
|
66
|
+
{
|
67
|
+
if(i == argc-1)
|
68
|
+
throw invalid_argument("need to specify number of iterations after -t");
|
69
|
+
i++;
|
70
|
+
opt.param.nr_iters = atoi(args[i].c_str());
|
71
|
+
if(opt.param.nr_iters <= 0)
|
72
|
+
throw invalid_argument("number of iterations should be greater than zero");
|
73
|
+
} else if(args[i].compare("-k") == 0) {
|
74
|
+
if(i == argc-1)
|
75
|
+
throw invalid_argument("need to specify number of factors after -k");
|
76
|
+
i++;
|
77
|
+
opt.param.k = atoi(args[i].c_str());
|
78
|
+
if(opt.param.k <= 0)
|
79
|
+
throw invalid_argument("number of factors should be greater than zero");
|
80
|
+
} else if(args[i].compare("-r") == 0) {
|
81
|
+
if(i == argc-1)
|
82
|
+
throw invalid_argument("need to specify eta after -r");
|
83
|
+
i++;
|
84
|
+
opt.param.eta = atof(args[i].c_str());
|
85
|
+
if(opt.param.eta <= 0)
|
86
|
+
throw invalid_argument("learning rate should be greater than zero");
|
87
|
+
} else if(args[i].compare("-l") == 0) {
|
88
|
+
if(i == argc-1)
|
89
|
+
throw invalid_argument("need to specify lambda after -l");
|
90
|
+
i++;
|
91
|
+
opt.param.lambda = atof(args[i].c_str());
|
92
|
+
if(opt.param.lambda < 0)
|
93
|
+
throw invalid_argument("regularization cost should not be smaller than zero");
|
94
|
+
} else if(args[i].compare("-s") == 0) {
|
95
|
+
if(i == argc-1)
|
96
|
+
throw invalid_argument("need to specify number of threads after -s");
|
97
|
+
i++;
|
98
|
+
opt.nr_threads = atoi(args[i].c_str());
|
99
|
+
if(opt.nr_threads <= 0)
|
100
|
+
throw invalid_argument("number of threads should be greater than zero");
|
101
|
+
} else if(args[i].compare("-p") == 0) {
|
102
|
+
if(i == argc-1)
|
103
|
+
throw invalid_argument("need to specify path after -p");
|
104
|
+
i++;
|
105
|
+
opt.va_path = args[i];
|
106
|
+
} else if(args[i].compare("--no-norm") == 0) {
|
107
|
+
opt.param.normalization = false;
|
108
|
+
} else if(args[i].compare("--quiet") == 0) {
|
109
|
+
opt.quiet = true;
|
110
|
+
} else if(args[i].compare("--auto-stop") == 0) {
|
111
|
+
opt.param.auto_stop = true;
|
112
|
+
} else {
|
113
|
+
break;
|
114
|
+
}
|
115
|
+
}
|
116
|
+
|
117
|
+
if(i != argc-2 && i != argc-1)
|
118
|
+
throw invalid_argument("cannot parse command\n");
|
119
|
+
|
120
|
+
opt.tr_path = args[i];
|
121
|
+
i++;
|
122
|
+
|
123
|
+
if(i < argc) {
|
124
|
+
opt.model_path = string(args[i]);
|
125
|
+
} else if(i == argc) {
|
126
|
+
opt.model_path = basename(opt.tr_path) + ".model";
|
127
|
+
} else {
|
128
|
+
throw invalid_argument("cannot parse argument");
|
129
|
+
}
|
130
|
+
|
131
|
+
return opt;
|
132
|
+
}
|
133
|
+
|
134
|
+
int train_on_disk(Option opt) {
|
135
|
+
string tr_bin_path = basename(opt.tr_path) + ".bin";
|
136
|
+
string va_bin_path = opt.va_path.empty()? "" : basename(opt.va_path) + ".bin";
|
137
|
+
|
138
|
+
ffm_read_problem_to_disk(opt.tr_path, tr_bin_path);
|
139
|
+
if(!opt.va_path.empty())
|
140
|
+
ffm_read_problem_to_disk(opt.va_path, va_bin_path);
|
141
|
+
|
142
|
+
ffm_model model = ffm_train_on_disk(tr_bin_path.c_str(), va_bin_path.c_str(), opt.param);
|
143
|
+
|
144
|
+
ffm_save_model(model, opt.model_path);
|
145
|
+
|
146
|
+
return 0;
|
147
|
+
}
|
148
|
+
|
149
|
+
int main(int argc, char **argv) {
|
150
|
+
Option opt;
|
151
|
+
try {
|
152
|
+
opt = parse_option(argc, argv);
|
153
|
+
} catch(invalid_argument &e) {
|
154
|
+
cout << e.what() << endl;
|
155
|
+
return 1;
|
156
|
+
}
|
157
|
+
|
158
|
+
if(opt.quiet)
|
159
|
+
cout.setstate(ios_base::badbit);
|
160
|
+
|
161
|
+
if(opt.param.auto_stop && opt.va_path.empty()) {
|
162
|
+
cout << "To use auto-stop, you need to assign a validation set" << endl;
|
163
|
+
return 1;
|
164
|
+
}
|
165
|
+
|
166
|
+
#if defined USEOMP
|
167
|
+
omp_set_num_threads(opt.nr_threads);
|
168
|
+
#endif
|
169
|
+
|
170
|
+
train_on_disk(opt);
|
171
|
+
|
172
|
+
return 0;
|
173
|
+
}
|
@@ -0,0 +1,699 @@
|
|
1
|
+
/*
|
2
|
+
The following table is the meaning of some variables in this code:
|
3
|
+
|
4
|
+
W: The pointer to the beginning of the model
|
5
|
+
w: Dynamic pointer to access values in the model
|
6
|
+
m: Number of fields
|
7
|
+
k: Number of latent factors
|
8
|
+
n: Number of features
|
9
|
+
l: Number of data points
|
10
|
+
f: Field index (0 to m-1)
|
11
|
+
d: Latent factor index (0 to k-1)
|
12
|
+
j: Feature index (0 to n-1)
|
13
|
+
i: Data point index (0 to l-1)
|
14
|
+
nnz: Number of non-zero elements
|
15
|
+
X, P: Used to store the problem in a compressed sparse row (CSR) format. len(X) = nnz, len(P) = l + 1
|
16
|
+
Y: The label. len(Y) = l
|
17
|
+
R: Precomputed scaling factor to make the 2-norm of each instance to be 1. len(R) = l
|
18
|
+
v: Value of each element in the problem
|
19
|
+
*/
|
20
|
+
|
21
|
+
#pragma GCC diagnostic ignored "-Wunused-result"
|
22
|
+
#include <algorithm>
|
23
|
+
#include <cmath>
|
24
|
+
#include <iostream>
|
25
|
+
#include <iomanip>
|
26
|
+
#include <fstream>
|
27
|
+
#include <new>
|
28
|
+
#include <memory>
|
29
|
+
#include <random>
|
30
|
+
#include <stdexcept>
|
31
|
+
#include <string>
|
32
|
+
#include <cstring>
|
33
|
+
#include <vector>
|
34
|
+
#include <cassert>
|
35
|
+
#include <numeric>
|
36
|
+
|
37
|
+
#if defined USESSE
|
38
|
+
#include <pmmintrin.h>
|
39
|
+
#endif
|
40
|
+
|
41
|
+
#if defined USEOMP
|
42
|
+
#include <omp.h>
|
43
|
+
#endif
|
44
|
+
|
45
|
+
#include "ffm.h"
|
46
|
+
#include "timer.h"
|
47
|
+
|
48
|
+
namespace ffm {
|
49
|
+
|
50
|
+
namespace {
|
51
|
+
|
52
|
+
using namespace std;
|
53
|
+
|
54
|
+
#if defined USESSE
|
55
|
+
ffm_int const kALIGNByte = 16;
|
56
|
+
#else
|
57
|
+
ffm_int const kALIGNByte = 4;
|
58
|
+
#endif
|
59
|
+
|
60
|
+
ffm_int const kALIGN = kALIGNByte/sizeof(ffm_float);
|
61
|
+
ffm_int const kCHUNK_SIZE = 10000000;
|
62
|
+
ffm_int const kMaxLineSize = 100000;
|
63
|
+
|
64
|
+
inline ffm_int get_k_aligned(ffm_int k) {
|
65
|
+
return (ffm_int) ceil((ffm_float)k / kALIGN) * kALIGN;
|
66
|
+
}
|
67
|
+
|
68
|
+
ffm_long get_w_size(ffm_model &model) {
|
69
|
+
ffm_int k_aligned = get_k_aligned(model.k);
|
70
|
+
return (ffm_long) model.n * model.m * k_aligned * 2;
|
71
|
+
}
|
72
|
+
|
73
|
+
#if defined USESSE
|
74
|
+
inline ffm_float wTx(
|
75
|
+
ffm_node *begin,
|
76
|
+
ffm_node *end,
|
77
|
+
ffm_float r,
|
78
|
+
ffm_model &model,
|
79
|
+
ffm_float kappa=0,
|
80
|
+
ffm_float eta=0,
|
81
|
+
ffm_float lambda=0,
|
82
|
+
bool do_update=false) {
|
83
|
+
|
84
|
+
ffm_int align0 = 2 * get_k_aligned(model.k);
|
85
|
+
ffm_int align1 = model.m * align0;
|
86
|
+
|
87
|
+
__m128 XMMkappa = _mm_set1_ps(kappa);
|
88
|
+
__m128 XMMeta = _mm_set1_ps(eta);
|
89
|
+
__m128 XMMlambda = _mm_set1_ps(lambda);
|
90
|
+
|
91
|
+
__m128 XMMt = _mm_setzero_ps();
|
92
|
+
|
93
|
+
for(ffm_node *N1 = begin; N1 != end; N1++)
|
94
|
+
{
|
95
|
+
ffm_int j1 = N1->j;
|
96
|
+
ffm_int f1 = N1->f;
|
97
|
+
ffm_float v1 = N1->v;
|
98
|
+
if(j1 >= model.n || f1 >= model.m)
|
99
|
+
continue;
|
100
|
+
|
101
|
+
for(ffm_node *N2 = N1+1; N2 != end; N2++)
|
102
|
+
{
|
103
|
+
ffm_int j2 = N2->j;
|
104
|
+
ffm_int f2 = N2->f;
|
105
|
+
ffm_float v2 = N2->v;
|
106
|
+
if(j2 >= model.n || f2 >= model.m)
|
107
|
+
continue;
|
108
|
+
|
109
|
+
ffm_float *w1_base = model.W + (ffm_long)j1*align1 + f2*align0;
|
110
|
+
ffm_float *w2_base = model.W + (ffm_long)j2*align1 + f1*align0;
|
111
|
+
|
112
|
+
__m128 XMMv = _mm_set1_ps(v1*v2*r);
|
113
|
+
|
114
|
+
if(do_update)
|
115
|
+
{
|
116
|
+
__m128 XMMkappav = _mm_mul_ps(XMMkappa, XMMv);
|
117
|
+
|
118
|
+
for(ffm_int d = 0; d < align0; d += kALIGN * 2)
|
119
|
+
{
|
120
|
+
ffm_float *w1 = w1_base + d;
|
121
|
+
ffm_float *w2 = w2_base + d;
|
122
|
+
|
123
|
+
ffm_float *wg1 = w1 + kALIGN;
|
124
|
+
ffm_float *wg2 = w2 + kALIGN;
|
125
|
+
|
126
|
+
__m128 XMMw1 = _mm_load_ps(w1);
|
127
|
+
__m128 XMMw2 = _mm_load_ps(w2);
|
128
|
+
|
129
|
+
__m128 XMMwg1 = _mm_load_ps(wg1);
|
130
|
+
__m128 XMMwg2 = _mm_load_ps(wg2);
|
131
|
+
|
132
|
+
__m128 XMMg1 = _mm_add_ps(
|
133
|
+
_mm_mul_ps(XMMlambda, XMMw1),
|
134
|
+
_mm_mul_ps(XMMkappav, XMMw2));
|
135
|
+
__m128 XMMg2 = _mm_add_ps(
|
136
|
+
_mm_mul_ps(XMMlambda, XMMw2),
|
137
|
+
_mm_mul_ps(XMMkappav, XMMw1));
|
138
|
+
|
139
|
+
XMMwg1 = _mm_add_ps(XMMwg1, _mm_mul_ps(XMMg1, XMMg1));
|
140
|
+
XMMwg2 = _mm_add_ps(XMMwg2, _mm_mul_ps(XMMg2, XMMg2));
|
141
|
+
|
142
|
+
XMMw1 = _mm_sub_ps(XMMw1, _mm_mul_ps(XMMeta,
|
143
|
+
_mm_mul_ps(_mm_rsqrt_ps(XMMwg1), XMMg1)));
|
144
|
+
XMMw2 = _mm_sub_ps(XMMw2, _mm_mul_ps(XMMeta,
|
145
|
+
_mm_mul_ps(_mm_rsqrt_ps(XMMwg2), XMMg2)));
|
146
|
+
|
147
|
+
_mm_store_ps(w1, XMMw1);
|
148
|
+
_mm_store_ps(w2, XMMw2);
|
149
|
+
|
150
|
+
_mm_store_ps(wg1, XMMwg1);
|
151
|
+
_mm_store_ps(wg2, XMMwg2);
|
152
|
+
}
|
153
|
+
}
|
154
|
+
else
|
155
|
+
{
|
156
|
+
for(ffm_int d = 0; d < align0; d += kALIGN * 2)
|
157
|
+
{
|
158
|
+
__m128 XMMw1 = _mm_load_ps(w1_base+d);
|
159
|
+
__m128 XMMw2 = _mm_load_ps(w2_base+d);
|
160
|
+
|
161
|
+
XMMt = _mm_add_ps(XMMt,
|
162
|
+
_mm_mul_ps(_mm_mul_ps(XMMw1, XMMw2), XMMv));
|
163
|
+
}
|
164
|
+
}
|
165
|
+
}
|
166
|
+
}
|
167
|
+
|
168
|
+
if(do_update)
|
169
|
+
return 0;
|
170
|
+
|
171
|
+
XMMt = _mm_hadd_ps(XMMt, XMMt);
|
172
|
+
XMMt = _mm_hadd_ps(XMMt, XMMt);
|
173
|
+
ffm_float t;
|
174
|
+
_mm_store_ss(&t, XMMt);
|
175
|
+
|
176
|
+
return t;
|
177
|
+
}
|
178
|
+
|
179
|
+
#else
|
180
|
+
|
181
|
+
inline ffm_float wTx(
|
182
|
+
ffm_node *begin,
|
183
|
+
ffm_node *end,
|
184
|
+
ffm_float r,
|
185
|
+
ffm_model &model,
|
186
|
+
ffm_float kappa=0,
|
187
|
+
ffm_float eta=0,
|
188
|
+
ffm_float lambda=0,
|
189
|
+
bool do_update=false) {
|
190
|
+
|
191
|
+
ffm_int align0 = 2 * get_k_aligned(model.k);
|
192
|
+
ffm_int align1 = model.m * align0;
|
193
|
+
|
194
|
+
ffm_float t = 0;
|
195
|
+
for(ffm_node *N1 = begin; N1 != end; N1++) {
|
196
|
+
ffm_int j1 = N1->j;
|
197
|
+
ffm_int f1 = N1->f;
|
198
|
+
ffm_float v1 = N1->v;
|
199
|
+
if(j1 >= model.n || f1 >= model.m)
|
200
|
+
continue;
|
201
|
+
|
202
|
+
for(ffm_node *N2 = N1+1; N2 != end; N2++) {
|
203
|
+
ffm_int j2 = N2->j;
|
204
|
+
ffm_int f2 = N2->f;
|
205
|
+
ffm_float v2 = N2->v;
|
206
|
+
if(j2 >= model.n || f2 >= model.m)
|
207
|
+
continue;
|
208
|
+
|
209
|
+
ffm_float *w1 = model.W + (ffm_long)j1*align1 + f2*align0;
|
210
|
+
ffm_float *w2 = model.W + (ffm_long)j2*align1 + f1*align0;
|
211
|
+
|
212
|
+
ffm_float v = v1 * v2 * r;
|
213
|
+
|
214
|
+
if(do_update) {
|
215
|
+
ffm_float *wg1 = w1 + kALIGN;
|
216
|
+
ffm_float *wg2 = w2 + kALIGN;
|
217
|
+
for(ffm_int d = 0; d < align0; d += kALIGN * 2)
|
218
|
+
{
|
219
|
+
ffm_float g1 = lambda * w1[d] + kappa * w2[d] * v;
|
220
|
+
ffm_float g2 = lambda * w2[d] + kappa * w1[d] * v;
|
221
|
+
|
222
|
+
wg1[d] += g1 * g1;
|
223
|
+
wg2[d] += g2 * g2;
|
224
|
+
|
225
|
+
w1[d] -= eta / sqrt(wg1[d]) * g1;
|
226
|
+
w2[d] -= eta / sqrt(wg2[d]) * g2;
|
227
|
+
}
|
228
|
+
} else {
|
229
|
+
for(ffm_int d = 0; d < align0; d += kALIGN * 2)
|
230
|
+
t += w1[d] * w2[d] * v;
|
231
|
+
}
|
232
|
+
}
|
233
|
+
}
|
234
|
+
|
235
|
+
return t;
|
236
|
+
}
|
237
|
+
#endif
|
238
|
+
|
239
|
+
ffm_float* malloc_aligned_float(ffm_long size)
|
240
|
+
{
|
241
|
+
void *ptr;
|
242
|
+
|
243
|
+
#ifndef USESSE
|
244
|
+
|
245
|
+
ptr = malloc(size * sizeof(ffm_float));
|
246
|
+
|
247
|
+
#else
|
248
|
+
|
249
|
+
#ifdef _WIN32
|
250
|
+
ptr = _aligned_malloc(size*sizeof(ffm_float), kALIGNByte);
|
251
|
+
if(ptr == nullptr)
|
252
|
+
throw bad_alloc();
|
253
|
+
#else
|
254
|
+
int status = posix_memalign(&ptr, kALIGNByte, size*sizeof(ffm_float));
|
255
|
+
if(status != 0)
|
256
|
+
throw bad_alloc();
|
257
|
+
#endif
|
258
|
+
|
259
|
+
#endif
|
260
|
+
|
261
|
+
return (ffm_float*)ptr;
|
262
|
+
}
|
263
|
+
|
264
|
+
ffm_model init_model(ffm_int n, ffm_int m, ffm_parameter param)
|
265
|
+
{
|
266
|
+
ffm_model model;
|
267
|
+
model.n = n;
|
268
|
+
model.k = param.k;
|
269
|
+
model.m = m;
|
270
|
+
model.W = nullptr;
|
271
|
+
model.normalization = param.normalization;
|
272
|
+
|
273
|
+
ffm_int k_aligned = get_k_aligned(model.k);
|
274
|
+
|
275
|
+
model.W = malloc_aligned_float((ffm_long)n*m*k_aligned*2);
|
276
|
+
|
277
|
+
ffm_float coef = 1.0f / sqrt(model.k);
|
278
|
+
ffm_float *w = model.W;
|
279
|
+
|
280
|
+
default_random_engine generator;
|
281
|
+
uniform_real_distribution<ffm_float> distribution(0.0, 1.0);
|
282
|
+
|
283
|
+
for(ffm_int j = 0; j < model.n; j++) {
|
284
|
+
for(ffm_int f = 0; f < model.m; f++) {
|
285
|
+
for(ffm_int d = 0; d < k_aligned;) {
|
286
|
+
for(ffm_int s = 0; s < kALIGN; s++, w++, d++) {
|
287
|
+
w[0] = (d < model.k)? coef * distribution(generator) : 0.0;
|
288
|
+
w[kALIGN] = 1;
|
289
|
+
}
|
290
|
+
w += kALIGN;
|
291
|
+
}
|
292
|
+
}
|
293
|
+
}
|
294
|
+
|
295
|
+
return model;
|
296
|
+
}
|
297
|
+
|
298
|
+
struct disk_problem_meta {
|
299
|
+
ffm_int n = 0;
|
300
|
+
ffm_int m = 0;
|
301
|
+
ffm_int l = 0;
|
302
|
+
ffm_int num_blocks = 0;
|
303
|
+
ffm_long B_pos = 0;
|
304
|
+
uint64_t hash1;
|
305
|
+
uint64_t hash2;
|
306
|
+
};
|
307
|
+
|
308
|
+
struct problem_on_disk {
|
309
|
+
disk_problem_meta meta;
|
310
|
+
vector<ffm_float> Y;
|
311
|
+
vector<ffm_float> R;
|
312
|
+
vector<ffm_long> P;
|
313
|
+
vector<ffm_node> X;
|
314
|
+
vector<ffm_long> B;
|
315
|
+
|
316
|
+
problem_on_disk(string path) {
|
317
|
+
f.open(path, ios::in | ios::binary);
|
318
|
+
if(f.good()) {
|
319
|
+
f.read(reinterpret_cast<char*>(&meta), sizeof(disk_problem_meta));
|
320
|
+
f.seekg(meta.B_pos);
|
321
|
+
B.resize(meta.num_blocks);
|
322
|
+
f.read(reinterpret_cast<char*>(B.data()), sizeof(ffm_long) * meta.num_blocks);
|
323
|
+
}
|
324
|
+
}
|
325
|
+
|
326
|
+
int load_block(int block_index) {
|
327
|
+
if(block_index >= meta.num_blocks)
|
328
|
+
assert(false);
|
329
|
+
|
330
|
+
f.seekg(B[block_index]);
|
331
|
+
|
332
|
+
ffm_int l;
|
333
|
+
f.read(reinterpret_cast<char*>(&l), sizeof(ffm_int));
|
334
|
+
|
335
|
+
Y.resize(l);
|
336
|
+
f.read(reinterpret_cast<char*>(Y.data()), sizeof(ffm_float) * l);
|
337
|
+
|
338
|
+
R.resize(l);
|
339
|
+
f.read(reinterpret_cast<char*>(R.data()), sizeof(ffm_float) * l);
|
340
|
+
|
341
|
+
P.resize(l+1);
|
342
|
+
f.read(reinterpret_cast<char*>(P.data()), sizeof(ffm_long) * (l+1));
|
343
|
+
|
344
|
+
X.resize(P[l]);
|
345
|
+
f.read(reinterpret_cast<char*>(X.data()), sizeof(ffm_node) * P[l]);
|
346
|
+
|
347
|
+
return l;
|
348
|
+
}
|
349
|
+
|
350
|
+
bool is_empty() {
|
351
|
+
return meta.l == 0;
|
352
|
+
}
|
353
|
+
|
354
|
+
private:
|
355
|
+
ifstream f;
|
356
|
+
};
|
357
|
+
|
358
|
+
uint64_t hashfile(string txt_path, bool one_block=false)
|
359
|
+
{
|
360
|
+
ifstream f(txt_path, ios::ate | ios::binary);
|
361
|
+
if(f.bad())
|
362
|
+
return 0;
|
363
|
+
|
364
|
+
ffm_long end = (ffm_long) f.tellg();
|
365
|
+
f.seekg(0, ios::beg);
|
366
|
+
assert(static_cast<int>(f.tellg()) == 0);
|
367
|
+
|
368
|
+
uint64_t magic = 90359;
|
369
|
+
for(ffm_long pos = 0; pos < end; ) {
|
370
|
+
ffm_long next_pos = min(pos + kCHUNK_SIZE, end);
|
371
|
+
ffm_long size = next_pos - pos;
|
372
|
+
vector<char> buffer(kCHUNK_SIZE);
|
373
|
+
f.read(buffer.data(), size);
|
374
|
+
|
375
|
+
ffm_int i = 0;
|
376
|
+
while(i < size - 8) {
|
377
|
+
uint64_t x = *reinterpret_cast<uint64_t*>(buffer.data() + i);
|
378
|
+
magic = ( (magic + x) * (magic + x + 1) >> 1) + x;
|
379
|
+
i += 8;
|
380
|
+
}
|
381
|
+
for(; i < size; i++) {
|
382
|
+
char x = buffer[i];
|
383
|
+
magic = ( (magic + x) * (magic + x + 1) >> 1) + x;
|
384
|
+
}
|
385
|
+
|
386
|
+
pos = next_pos;
|
387
|
+
if(one_block)
|
388
|
+
break;
|
389
|
+
}
|
390
|
+
|
391
|
+
return magic;
|
392
|
+
}
|
393
|
+
|
394
|
+
void txt2bin(string txt_path, string bin_path) {
|
395
|
+
|
396
|
+
FILE *f_txt = fopen(txt_path.c_str(), "r");
|
397
|
+
if(f_txt == nullptr)
|
398
|
+
throw;
|
399
|
+
|
400
|
+
ofstream f_bin(bin_path, ios::out | ios::binary);
|
401
|
+
|
402
|
+
vector<char> line(kMaxLineSize);
|
403
|
+
|
404
|
+
ffm_long p = 0;
|
405
|
+
disk_problem_meta meta;
|
406
|
+
|
407
|
+
vector<ffm_float> Y;
|
408
|
+
vector<ffm_float> R;
|
409
|
+
vector<ffm_long> P(1, 0);
|
410
|
+
vector<ffm_node> X;
|
411
|
+
vector<ffm_long> B;
|
412
|
+
|
413
|
+
auto write_chunk = [&] () {
|
414
|
+
B.push_back(f_bin.tellp());
|
415
|
+
ffm_int l = Y.size();
|
416
|
+
ffm_long nnz = P[l];
|
417
|
+
meta.l += l;
|
418
|
+
|
419
|
+
f_bin.write(reinterpret_cast<char*>(&l), sizeof(ffm_int));
|
420
|
+
f_bin.write(reinterpret_cast<char*>(Y.data()), sizeof(ffm_float) * l);
|
421
|
+
f_bin.write(reinterpret_cast<char*>(R.data()), sizeof(ffm_float) * l);
|
422
|
+
f_bin.write(reinterpret_cast<char*>(P.data()), sizeof(ffm_long) * (l+1));
|
423
|
+
f_bin.write(reinterpret_cast<char*>(X.data()), sizeof(ffm_node) * nnz);
|
424
|
+
|
425
|
+
Y.clear();
|
426
|
+
R.clear();
|
427
|
+
P.assign(1, 0);
|
428
|
+
X.clear();
|
429
|
+
p = 0;
|
430
|
+
meta.num_blocks++;
|
431
|
+
};
|
432
|
+
|
433
|
+
f_bin.write(reinterpret_cast<char*>(&meta), sizeof(disk_problem_meta));
|
434
|
+
|
435
|
+
while(fgets(line.data(), kMaxLineSize, f_txt)) {
|
436
|
+
char *y_char = strtok(line.data(), " \t");
|
437
|
+
|
438
|
+
ffm_float y = (atoi(y_char)>0)? 1.0f : -1.0f;
|
439
|
+
|
440
|
+
ffm_float scale = 0;
|
441
|
+
for(; ; p++) {
|
442
|
+
char *field_char = strtok(nullptr,":");
|
443
|
+
char *idx_char = strtok(nullptr,":");
|
444
|
+
char *value_char = strtok(nullptr," \t");
|
445
|
+
if(field_char == nullptr || *field_char == '\n')
|
446
|
+
break;
|
447
|
+
|
448
|
+
ffm_node N;
|
449
|
+
N.f = atoi(field_char);
|
450
|
+
N.j = atoi(idx_char);
|
451
|
+
N.v = atof(value_char);
|
452
|
+
|
453
|
+
X.push_back(N);
|
454
|
+
|
455
|
+
meta.m = max(meta.m, N.f+1);
|
456
|
+
meta.n = max(meta.n, N.j+1);
|
457
|
+
|
458
|
+
scale += N.v*N.v;
|
459
|
+
}
|
460
|
+
scale = 1.0 / scale;
|
461
|
+
|
462
|
+
Y.push_back(y);
|
463
|
+
R.push_back(scale);
|
464
|
+
P.push_back(p);
|
465
|
+
|
466
|
+
if(X.size() > (size_t)kCHUNK_SIZE)
|
467
|
+
write_chunk();
|
468
|
+
}
|
469
|
+
write_chunk();
|
470
|
+
write_chunk(); // write a dummy empty chunk in order to know where the EOF is
|
471
|
+
assert(meta.num_blocks == (ffm_int)B.size());
|
472
|
+
meta.B_pos = f_bin.tellp();
|
473
|
+
f_bin.write(reinterpret_cast<char*>(B.data()), sizeof(ffm_long) * B.size());
|
474
|
+
|
475
|
+
fclose(f_txt);
|
476
|
+
meta.hash1 = hashfile(txt_path, true);
|
477
|
+
meta.hash2 = hashfile(txt_path, false);
|
478
|
+
|
479
|
+
f_bin.seekp(0, ios::beg);
|
480
|
+
f_bin.write(reinterpret_cast<char*>(&meta), sizeof(disk_problem_meta));
|
481
|
+
}
|
482
|
+
|
483
|
+
bool check_same_txt_bin(string txt_path, string bin_path) {
|
484
|
+
ifstream f_bin(bin_path, ios::binary | ios::ate);
|
485
|
+
if(f_bin.tellg() < (ffm_long)sizeof(disk_problem_meta))
|
486
|
+
return false;
|
487
|
+
disk_problem_meta meta;
|
488
|
+
f_bin.seekg(0, ios::beg);
|
489
|
+
f_bin.read(reinterpret_cast<char*>(&meta), sizeof(disk_problem_meta));
|
490
|
+
if(meta.hash1 != hashfile(txt_path, true))
|
491
|
+
return false;
|
492
|
+
if(meta.hash2 != hashfile(txt_path, false))
|
493
|
+
return false;
|
494
|
+
|
495
|
+
return true;
|
496
|
+
}
|
497
|
+
|
498
|
+
} // unnamed namespace
|
499
|
+
|
500
|
+
void ffm_model::release() {
|
501
|
+
if(W != nullptr) {
|
502
|
+
#ifndef USESSE
|
503
|
+
free(W);
|
504
|
+
#else
|
505
|
+
#ifdef _WIN32
|
506
|
+
_aligned_free(W);
|
507
|
+
#else
|
508
|
+
free(W);
|
509
|
+
#endif
|
510
|
+
#endif
|
511
|
+
W = nullptr;
|
512
|
+
}
|
513
|
+
}
|
514
|
+
|
515
|
+
void ffm_read_problem_to_disk(string txt_path, string bin_path) {
|
516
|
+
|
517
|
+
Timer timer;
|
518
|
+
|
519
|
+
cout << "First check if the text file has already been converted to binary format " << flush;
|
520
|
+
bool same_file = check_same_txt_bin(txt_path, bin_path);
|
521
|
+
cout << "(" << fixed << setprecision(1) << timer.toc() << " seconds)" << endl;
|
522
|
+
|
523
|
+
if(same_file) {
|
524
|
+
cout << "Binary file found. Skip converting text to binary" << endl;
|
525
|
+
} else {
|
526
|
+
cout << "Binary file NOT found. Convert text file to binary file " << flush;
|
527
|
+
txt2bin(txt_path, bin_path);
|
528
|
+
cout << "(" << fixed << setprecision(1) << timer.toc() << " seconds)" << endl;
|
529
|
+
}
|
530
|
+
}
|
531
|
+
|
532
|
+
ffm_model ffm_train_on_disk(string tr_path, string va_path, ffm_parameter param) {
|
533
|
+
|
534
|
+
problem_on_disk tr(tr_path);
|
535
|
+
problem_on_disk va(va_path);
|
536
|
+
|
537
|
+
ffm_model model = init_model(tr.meta.n, tr.meta.m, param);
|
538
|
+
|
539
|
+
bool auto_stop = param.auto_stop && !va_path.empty();
|
540
|
+
|
541
|
+
ffm_long w_size = get_w_size(model);
|
542
|
+
vector<ffm_float> prev_W(w_size, 0);
|
543
|
+
if(auto_stop)
|
544
|
+
prev_W.assign(w_size, 0);
|
545
|
+
ffm_double best_va_loss = numeric_limits<ffm_double>::max();
|
546
|
+
|
547
|
+
cout.width(4);
|
548
|
+
cout << "iter";
|
549
|
+
cout.width(13);
|
550
|
+
cout << "tr_logloss";
|
551
|
+
if(!va_path.empty())
|
552
|
+
{
|
553
|
+
cout.width(13);
|
554
|
+
cout << "va_logloss";
|
555
|
+
}
|
556
|
+
cout.width(13);
|
557
|
+
cout << "tr_time";
|
558
|
+
cout << endl;
|
559
|
+
|
560
|
+
Timer timer;
|
561
|
+
|
562
|
+
auto one_epoch = [&] (problem_on_disk &prob, bool do_update) {
|
563
|
+
|
564
|
+
ffm_double loss = 0;
|
565
|
+
|
566
|
+
vector<ffm_int> outer_order(prob.meta.num_blocks);
|
567
|
+
iota(outer_order.begin(), outer_order.end(), 0);
|
568
|
+
random_shuffle(outer_order.begin(), outer_order.end());
|
569
|
+
for(auto blk : outer_order) {
|
570
|
+
ffm_int l = prob.load_block(blk);
|
571
|
+
|
572
|
+
vector<ffm_int> inner_order(l);
|
573
|
+
iota(inner_order.begin(), inner_order.end(), 0);
|
574
|
+
random_shuffle(inner_order.begin(), inner_order.end());
|
575
|
+
|
576
|
+
#if defined USEOMP
|
577
|
+
#pragma omp parallel for schedule(static) reduction(+: loss)
|
578
|
+
#endif
|
579
|
+
for(ffm_int ii = 0; ii < l; ii++) {
|
580
|
+
ffm_int i = inner_order[ii];
|
581
|
+
|
582
|
+
ffm_float y = prob.Y[i];
|
583
|
+
|
584
|
+
ffm_node *begin = &prob.X[prob.P[i]];
|
585
|
+
|
586
|
+
ffm_node *end = &prob.X[prob.P[i+1]];
|
587
|
+
|
588
|
+
ffm_float r = param.normalization? prob.R[i] : 1;
|
589
|
+
|
590
|
+
ffm_double t = wTx(begin, end, r, model);
|
591
|
+
|
592
|
+
ffm_double expnyt = exp(-y*t);
|
593
|
+
|
594
|
+
loss += log1p(expnyt);
|
595
|
+
|
596
|
+
if(do_update) {
|
597
|
+
|
598
|
+
ffm_float kappa = -y*expnyt/(1+expnyt);
|
599
|
+
|
600
|
+
wTx(begin, end, r, model, kappa, param.eta, param.lambda, true);
|
601
|
+
}
|
602
|
+
}
|
603
|
+
}
|
604
|
+
|
605
|
+
return loss / prob.meta.l;
|
606
|
+
};
|
607
|
+
|
608
|
+
for(ffm_int iter = 1; iter <= param.nr_iters; iter++) {
|
609
|
+
timer.tic();
|
610
|
+
ffm_double tr_loss = one_epoch(tr, true);
|
611
|
+
timer.toc();
|
612
|
+
|
613
|
+
cout.width(4);
|
614
|
+
cout << iter;
|
615
|
+
cout.width(13);
|
616
|
+
cout << fixed << setprecision(5) << tr_loss;
|
617
|
+
|
618
|
+
if(!va.is_empty()) {
|
619
|
+
ffm_double va_loss = one_epoch(va, false);
|
620
|
+
|
621
|
+
cout.width(13);
|
622
|
+
cout << fixed << setprecision(5) << va_loss;
|
623
|
+
|
624
|
+
if(auto_stop) {
|
625
|
+
if(va_loss > best_va_loss) {
|
626
|
+
memcpy(model.W, prev_W.data(), w_size*sizeof(ffm_float));
|
627
|
+
cout << endl << "Auto-stop. Use model at " << iter-1 << "th iteration." << endl;
|
628
|
+
break;
|
629
|
+
} else {
|
630
|
+
memcpy(prev_W.data(), model.W, w_size*sizeof(ffm_float));
|
631
|
+
best_va_loss = va_loss;
|
632
|
+
}
|
633
|
+
}
|
634
|
+
}
|
635
|
+
cout.width(13);
|
636
|
+
cout << fixed << setprecision(1) << timer.get() << endl;
|
637
|
+
}
|
638
|
+
|
639
|
+
return model;
|
640
|
+
}
|
641
|
+
|
642
|
+
void ffm_save_model(ffm_model &model, string path) {
|
643
|
+
ofstream f_out(path, ios::out | ios::binary);
|
644
|
+
f_out.write(reinterpret_cast<char*>(&model.n), sizeof(ffm_int));
|
645
|
+
f_out.write(reinterpret_cast<char*>(&model.m), sizeof(ffm_int));
|
646
|
+
f_out.write(reinterpret_cast<char*>(&model.k), sizeof(ffm_int));
|
647
|
+
f_out.write(reinterpret_cast<char*>(&model.normalization), sizeof(bool));
|
648
|
+
|
649
|
+
ffm_long w_size = get_w_size(model);
|
650
|
+
// f_out.write(reinterpret_cast<char*>(model.W), sizeof(ffm_float) * w_size);
|
651
|
+
// Need to write chunk by chunk because some compiler use int32 and will overflow when w_size * 4 > MAX_INT
|
652
|
+
|
653
|
+
for(ffm_long offset = 0; offset < w_size; ) {
|
654
|
+
ffm_long next_offset = min(w_size, offset + (ffm_long) sizeof(ffm_float) * kCHUNK_SIZE);
|
655
|
+
ffm_long size = next_offset - offset;
|
656
|
+
f_out.write(reinterpret_cast<char*>(model.W+offset), sizeof(ffm_float) * size);
|
657
|
+
offset = next_offset;
|
658
|
+
}
|
659
|
+
}
|
660
|
+
|
661
|
+
ffm_model ffm_load_model(string path) {
|
662
|
+
ifstream f_in(path, ios::in | ios::binary);
|
663
|
+
|
664
|
+
ffm_model model;
|
665
|
+
f_in.read(reinterpret_cast<char*>(&model.n), sizeof(ffm_int));
|
666
|
+
f_in.read(reinterpret_cast<char*>(&model.m), sizeof(ffm_int));
|
667
|
+
f_in.read(reinterpret_cast<char*>(&model.k), sizeof(ffm_int));
|
668
|
+
f_in.read(reinterpret_cast<char*>(&model.normalization), sizeof(bool));
|
669
|
+
|
670
|
+
ffm_long w_size = get_w_size(model);
|
671
|
+
model.W = malloc_aligned_float(w_size);
|
672
|
+
// f_in.read(reinterpret_cast<char*>(model.W), sizeof(ffm_float) * w_size);
|
673
|
+
// Need to write chunk by chunk because some compiler use int32 and will overflow when w_size * 4 > MAX_INT
|
674
|
+
|
675
|
+
for(ffm_long offset = 0; offset < w_size; ) {
|
676
|
+
ffm_long next_offset = min(w_size, offset + (ffm_long) sizeof(ffm_float) * kCHUNK_SIZE);
|
677
|
+
ffm_long size = next_offset - offset;
|
678
|
+
f_in.read(reinterpret_cast<char*>(model.W+offset), sizeof(ffm_float) * size);
|
679
|
+
offset = next_offset;
|
680
|
+
}
|
681
|
+
|
682
|
+
return model;
|
683
|
+
}
|
684
|
+
|
685
|
+
ffm_float ffm_predict(ffm_node *begin, ffm_node *end, ffm_model &model) {
|
686
|
+
ffm_float r = 1;
|
687
|
+
if(model.normalization) {
|
688
|
+
r = 0;
|
689
|
+
for(ffm_node *N = begin; N != end; N++)
|
690
|
+
r += N->v*N->v;
|
691
|
+
r = 1/r;
|
692
|
+
}
|
693
|
+
|
694
|
+
ffm_float t = wTx(begin, end, r, model);
|
695
|
+
|
696
|
+
return 1/(1+exp(-t));
|
697
|
+
}
|
698
|
+
|
699
|
+
} // namespace ffm
|