word2vec-rb 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +13 -0
- data/.rspec +3 -0
- data/.travis.yml +6 -0
- data/CHANGELOG +16 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +38 -0
- data/README.md +55 -0
- data/Rakefile +12 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/data/minimal.bin +0 -0
- data/data/readme.md +1 -0
- data/ext/word2vec/common.c +117 -0
- data/ext/word2vec/common.h +29 -0
- data/ext/word2vec/extconf.rb +46 -0
- data/ext/word2vec/word2vec.c +93 -0
- data/lib/word2vec.rb +23 -0
- data/lib/word2vec/version.rb +3 -0
- data/word2vec-rb.gemspec +34 -0
- metadata +125 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 99c301d5325561032f1d8b71b39387708b957b4e656ed449b943f0debb3c015b
|
4
|
+
data.tar.gz: b4dfc40380ed5b7c505aef4c403b6234202732ffc89e6593d0d1f274846addae
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 40a8267dff46b2db00b8734f3a4c6bb2c245955047de3971253af6222b2a41ed1dfde25eb436707ba409a67bdb6e4e580f641277d34cfc5b9be8c6d649a8485d
|
7
|
+
data.tar.gz: 64306bf16b1e6b2f60fa7560e96e370e912d7d01498c6b464c90220bc0b534760ffc5002d8ac3c73ccbaa1a6ce0de67b9a1c4b3afb12b019b0433541e9964276
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.travis.yml
ADDED
data/CHANGELOG
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
# Changelog
|
2
|
+
All notable changes to this project will be documented in this file.
|
3
|
+
|
4
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
5
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
6
|
+
|
7
|
+
## [Unreleased]
|
8
|
+
|
9
|
+
## [0.1.0] - 2021-04-25
|
10
|
+
### Added
|
11
|
+
- Load word2vec model from bin file.
|
12
|
+
- Find the nearest words to a given one.
|
13
|
+
|
14
|
+
### Changed
|
15
|
+
### Fixed
|
16
|
+
### Removed
|
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,38 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
word2vec-rb (0.1.0)
|
5
|
+
|
6
|
+
GEM
|
7
|
+
remote: https://rubygems.org/
|
8
|
+
specs:
|
9
|
+
diff-lcs (1.4.4)
|
10
|
+
rake (12.3.3)
|
11
|
+
rake-compiler (1.1.1)
|
12
|
+
rake
|
13
|
+
rspec (3.10.0)
|
14
|
+
rspec-core (~> 3.10.0)
|
15
|
+
rspec-expectations (~> 3.10.0)
|
16
|
+
rspec-mocks (~> 3.10.0)
|
17
|
+
rspec-core (3.10.1)
|
18
|
+
rspec-support (~> 3.10.0)
|
19
|
+
rspec-expectations (3.10.1)
|
20
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
21
|
+
rspec-support (~> 3.10.0)
|
22
|
+
rspec-mocks (3.10.2)
|
23
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
24
|
+
rspec-support (~> 3.10.0)
|
25
|
+
rspec-support (3.10.2)
|
26
|
+
|
27
|
+
PLATFORMS
|
28
|
+
ruby
|
29
|
+
|
30
|
+
DEPENDENCIES
|
31
|
+
bundler (~> 2.1.0)
|
32
|
+
rake (~> 12.0)
|
33
|
+
rake-compiler (~> 1.0)
|
34
|
+
rspec (~> 3.0)
|
35
|
+
word2vec-rb!
|
36
|
+
|
37
|
+
BUNDLED WITH
|
38
|
+
2.1.4
|
data/README.md
ADDED
@@ -0,0 +1,55 @@
|
|
1
|
+
# word2vec-rb
|
2
|
+
|
3
|
+
Gem using word2vec functionality from https://code.google.com/archive/p/word2vec/
|
4
|
+
|
5
|
+
This gem was developed using the `.c` files of the Google word2vec as base. Mostly by applying copy-and-paste.
|
6
|
+
|
7
|
+
## Installation
|
8
|
+
|
9
|
+
Add this line to your application's Gemfile:
|
10
|
+
|
11
|
+
```ruby
|
12
|
+
gem 'word2vec-rb'
|
13
|
+
```
|
14
|
+
|
15
|
+
And then execute:
|
16
|
+
|
17
|
+
$ bundle install
|
18
|
+
|
19
|
+
Or install it yourself as:
|
20
|
+
|
21
|
+
$ gem install word2vec-rb
|
22
|
+
|
23
|
+
## Usage
|
24
|
+
|
25
|
+
### To find the nearest words, try:
|
26
|
+
|
27
|
+
require 'word2vec'
|
28
|
+
|
29
|
+
model = Word2vec::Model.load("./data/minimal.bin")
|
30
|
+
words = model.distance("from")
|
31
|
+
words.each do |w|
|
32
|
+
puts "#{w.first} #{w.last}"
|
33
|
+
end
|
34
|
+
|
35
|
+
## Development
|
36
|
+
|
37
|
+
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
38
|
+
|
39
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
40
|
+
|
41
|
+
### Build extension
|
42
|
+
|
43
|
+
$ rake build
|
44
|
+
|
45
|
+
### Launch tests
|
46
|
+
|
47
|
+
$ rake spec
|
48
|
+
|
49
|
+
### Build extension
|
50
|
+
|
51
|
+
$ rake compile
|
52
|
+
|
53
|
+
## Contributing
|
54
|
+
|
55
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/madcato/word2vec-rb.
|
data/Rakefile
ADDED
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "word2vec"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require "irb"
|
14
|
+
IRB.start(__FILE__)
|
data/bin/setup
ADDED
data/data/minimal.bin
ADDED
Binary file
|
data/data/readme.md
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
File `minimal.bin` is only for testing purposes.
|
@@ -0,0 +1,117 @@
|
|
1
|
+
#include "common.h"
|
2
|
+
|
3
|
+
// max length of strings
|
4
|
+
const long long max_size = 2000;
|
5
|
+
// number of closest words that will be shown
|
6
|
+
const long long N = 40;
|
7
|
+
// max length of vocabulary entries
|
8
|
+
const long long max_w = 50;
|
9
|
+
|
10
|
+
// Load the binary vector file generated by word2vec project
|
11
|
+
void word2vec_model_load(word2vec_model* model, char* file_name) {
|
12
|
+
FILE *f;
|
13
|
+
f = fopen(file_name, "rb");
|
14
|
+
if (f == NULL) {
|
15
|
+
rb_raise(rb_eArgError, "file not found");
|
16
|
+
return;
|
17
|
+
}
|
18
|
+
|
19
|
+
long long words, size;
|
20
|
+
fscanf(f, "%lld", &words);
|
21
|
+
fscanf(f, "%lld", &size);
|
22
|
+
model->word_count = words;
|
23
|
+
model->vector_dim = size;
|
24
|
+
|
25
|
+
model->vocabulary = ZALLOC_N(char, (long long)words * max_w);
|
26
|
+
model->vectors = ALLOC_N(float, model->word_count * model->vector_dim);
|
27
|
+
if (model->vectors == NULL) {
|
28
|
+
rb_raise(rb_eNoMemError, "Cannot allocate memory: %lld MB %lld %lld\n", (long long)words * size * sizeof(float) / 1048576, words, size);
|
29
|
+
return;
|
30
|
+
}
|
31
|
+
|
32
|
+
for (long long b = 0; b < words; b++) {
|
33
|
+
long long a = 0;
|
34
|
+
while (true) {
|
35
|
+
model->vocabulary[b * max_w + a] = fgetc(f);
|
36
|
+
if (feof(f) || (model->vocabulary[b * max_w + a] == ' ')) break;
|
37
|
+
if ((a < max_w) && (model->vocabulary[b * max_w + a] != '\n')) a++;
|
38
|
+
}
|
39
|
+
model->vocabulary[b * max_w + a] = 0;
|
40
|
+
for (a = 0; a < size; a++) fread(&model->vectors[a + b * size], sizeof(float), 1, f);
|
41
|
+
float len = 0;
|
42
|
+
for (a = 0; a < size; a++) len += model->vectors[a + b * size] * model->vectors[a + b * size];
|
43
|
+
len = sqrt(len);
|
44
|
+
for (a = 0; a < size; a++) model->vectors[a + b * size] /= len;
|
45
|
+
}
|
46
|
+
fclose(f);
|
47
|
+
}
|
48
|
+
|
49
|
+
// Find nearest words in the model
|
50
|
+
size_t word2vec_model_distance(word2vec_model* model, char* word, WordSimilarity word_list[]) {
|
51
|
+
if (strlen(word) >= max_size) {
|
52
|
+
rb_raise(rb_eArgError, "word must be %lld character max size", max_size);
|
53
|
+
return 0;
|
54
|
+
}
|
55
|
+
|
56
|
+
long long size = model->vector_dim;
|
57
|
+
long long a;
|
58
|
+
char *bestw[N];
|
59
|
+
float bestd[N];
|
60
|
+
size_t besti[N];
|
61
|
+
for (a = 0; a < N; a++) bestw[a] = (char *)malloc(max_size * sizeof(char));
|
62
|
+
a = 0;
|
63
|
+
|
64
|
+
long long b = 0;
|
65
|
+
long long c = 0;
|
66
|
+
|
67
|
+
long long words = model->word_count;
|
68
|
+
for (b = 0; b < words; b++) {
|
69
|
+
if (!strcmp(&model->vocabulary[b * max_w], word)) break;
|
70
|
+
}
|
71
|
+
if (b == words) b = -1;
|
72
|
+
long long bi = b;
|
73
|
+
if (b == -1) {
|
74
|
+
rb_raise(rb_eArgError, "Out of dictionary word!");
|
75
|
+
return 0;
|
76
|
+
}
|
77
|
+
|
78
|
+
float vec[max_size];
|
79
|
+
float dist;
|
80
|
+
long long d;
|
81
|
+
for (a = 0; a < size; a++) vec[a] = 0;
|
82
|
+
for (a = 0; a < size; a++) vec[a] += model->vectors[a + bi * size];
|
83
|
+
float len = 0;
|
84
|
+
for (a = 0; a < size; a++) len += vec[a] * vec[a];
|
85
|
+
len = sqrt(len);
|
86
|
+
for (a = 0; a < size; a++) vec[a] /= len;
|
87
|
+
for (a = 0; a < N; a++) bestd[a] = -1;
|
88
|
+
for (a = 0; a < N; a++) bestw[a][0] = 0;
|
89
|
+
for (c = 0; c < words; c++) {
|
90
|
+
a = 0;
|
91
|
+
if (bi == c) continue;
|
92
|
+
dist = 0;
|
93
|
+
for (a = 0; a < size; a++) dist += vec[a] * model->vectors[a + c * size];
|
94
|
+
for (a = 0; a < N; a++) {
|
95
|
+
if (dist > bestd[a]) {
|
96
|
+
for (d = N - 1; d > a; d--) {
|
97
|
+
bestd[d] = bestd[d - 1];
|
98
|
+
strcpy(bestw[d], bestw[d - 1]);
|
99
|
+
besti[d] = d - 1;
|
100
|
+
}
|
101
|
+
bestd[a] = dist;
|
102
|
+
strcpy(bestw[a], &model->vocabulary[c * max_w]);
|
103
|
+
besti[a] = c * max_w;
|
104
|
+
break;
|
105
|
+
}
|
106
|
+
}
|
107
|
+
}
|
108
|
+
|
109
|
+
for (a = 0; a < N; a++) {
|
110
|
+
word_list[a].index = besti[a];
|
111
|
+
word_list[a].score = bestd[a];
|
112
|
+
}
|
113
|
+
|
114
|
+
for (a = 0; a < N; a++) free(bestw[a]);
|
115
|
+
|
116
|
+
return N;
|
117
|
+
}
|
@@ -0,0 +1,29 @@
|
|
1
|
+
#ifndef _WORD2VEC_COMMON_H
|
2
|
+
#define _WORD2VEC_COMMON_H
|
3
|
+
|
4
|
+
#include <ruby.h>
|
5
|
+
#include <ruby/io.h>
|
6
|
+
#include <math.h>
|
7
|
+
#include <stdbool.h>
|
8
|
+
#include <stdio.h>
|
9
|
+
#include <stdlib.h>
|
10
|
+
#include <sys/types.h>
|
11
|
+
|
12
|
+
extern const long long N; // number of closest words that will be shown
|
13
|
+
|
14
|
+
typedef struct word2vec_model_s {
|
15
|
+
long long word_count;
|
16
|
+
char *vocabulary; // char *[word_count]
|
17
|
+
long long vector_dim;
|
18
|
+
float *vectors; // float[word_count][vector_dim]
|
19
|
+
} word2vec_model;
|
20
|
+
|
21
|
+
typedef struct WordSimilarity_s {
|
22
|
+
size_t index;
|
23
|
+
float score;
|
24
|
+
} WordSimilarity;
|
25
|
+
|
26
|
+
void word2vec_model_load(word2vec_model* model, char* file_name);
|
27
|
+
size_t word2vec_model_distance(word2vec_model* model, char* word, WordSimilarity word_list[]);
|
28
|
+
|
29
|
+
#endif /* _WORD2VEC_COMMON_H */
|
@@ -0,0 +1,46 @@
|
|
1
|
+
require "mkmf"
|
2
|
+
|
3
|
+
additional_prefixed_cflags = %w(-std=gnu99)
|
4
|
+
additional_suffixed_cflags = %w(-Wno-declaration-after-statement)
|
5
|
+
additional_prefixed_ldflags = %w()
|
6
|
+
|
7
|
+
#
|
8
|
+
# Add some additional development-oriented warning flags. Enable by compiling with:
|
9
|
+
#
|
10
|
+
# rake compile -- --enable-development
|
11
|
+
#
|
12
|
+
# or, if using with the actual gem (for whatever reason):
|
13
|
+
#
|
14
|
+
# gem install word2vec -- --enable-development
|
15
|
+
#
|
16
|
+
if enable_config("development")
|
17
|
+
additional_prefixed_cflags = [*additional_prefixed_cflags, *%w(-Wall -Wextra -Werror)]
|
18
|
+
end
|
19
|
+
|
20
|
+
#
|
21
|
+
# Use `clang`'s [AddressSanitizer](http://clang.llvm.org/docs/AddressSanitizer.html). Enable by compiling with:
|
22
|
+
#
|
23
|
+
# rake compile -- --enable-address-sanitizer
|
24
|
+
#
|
25
|
+
if enable_config("address-sanitizer")
|
26
|
+
additional_prefixed_cflags = [*additional_prefixed_cflags, "-fsanitize=address"]
|
27
|
+
additional_prefixed_ldflags = [*additional_prefixed_ldflags, "-fsanitize=address"]
|
28
|
+
end
|
29
|
+
|
30
|
+
unless (new_prefixed_cflags = additional_prefixed_cflags - $CFLAGS.split(/\s+/)).empty?
|
31
|
+
$CFLAGS.prepend(new_prefixed_cflags.join(" ") << " ")
|
32
|
+
end
|
33
|
+
|
34
|
+
unless (new_suffixed_cflags = additional_suffixed_cflags - $CFLAGS.split(/\s+/)).empty?
|
35
|
+
$CFLAGS << " " << new_suffixed_cflags.join(" ")
|
36
|
+
end
|
37
|
+
|
38
|
+
unless (new_prefixed_ldflags = additional_prefixed_ldflags - $LDFLAGS.split(/\s+/)).empty?
|
39
|
+
$LDFLAGS.prepend(new_prefixed_ldflags.join(" ") << " ")
|
40
|
+
end
|
41
|
+
|
42
|
+
## Check existence of functions before build
|
43
|
+
# Check for the C11 [`getdelim`](http://pubs.opengroup.org/onlinepubs/9699919799/functions/getdelim.html) function.
|
44
|
+
abort "missing getdelim()" unless have_func("getdelim")
|
45
|
+
|
46
|
+
create_makefile "word2vec/word2vec"
|
@@ -0,0 +1,93 @@
|
|
1
|
+
#include "common.h"
|
2
|
+
|
3
|
+
/*
|
4
|
+
* model_deallocate
|
5
|
+
* clean model memory
|
6
|
+
*/
|
7
|
+
static void model_deallocate(word2vec_model *model) {
|
8
|
+
if (model != NULL) {
|
9
|
+
if (model->vocabulary != NULL) {
|
10
|
+
xfree(model->vocabulary);
|
11
|
+
}
|
12
|
+
if (model->vectors != NULL) {
|
13
|
+
xfree(model->vectors);
|
14
|
+
}
|
15
|
+
xfree(model);
|
16
|
+
}
|
17
|
+
}
|
18
|
+
|
19
|
+
/*
|
20
|
+
* model_load
|
21
|
+
* load the vectors.bin file from disc
|
22
|
+
* @param [String] rb_filename
|
23
|
+
*/
|
24
|
+
static VALUE model_load(VALUE mod, VALUE rb_filename) {
|
25
|
+
word2vec_model* model = ZALLOC(word2vec_model);
|
26
|
+
|
27
|
+
char* filename = StringValueCStr(rb_filename);
|
28
|
+
|
29
|
+
word2vec_model_load(model, filename);
|
30
|
+
|
31
|
+
return Data_Wrap_Struct(mod, NULL, model_deallocate, model);
|
32
|
+
}
|
33
|
+
|
34
|
+
/*
|
35
|
+
* model vocabulary length
|
36
|
+
* @return [Integer]
|
37
|
+
*/
|
38
|
+
static VALUE model_word_count(VALUE mod) {
|
39
|
+
word2vec_model *model;
|
40
|
+
|
41
|
+
Data_Get_Struct(mod, word2vec_model, model);
|
42
|
+
|
43
|
+
return SIZET2NUM(model->word_count);
|
44
|
+
}
|
45
|
+
|
46
|
+
/*
|
47
|
+
* model vector dimensionality
|
48
|
+
* @return [Integer]
|
49
|
+
*/
|
50
|
+
static VALUE model_vector_dim(VALUE mod) {
|
51
|
+
word2vec_model *model;
|
52
|
+
|
53
|
+
Data_Get_Struct(mod, word2vec_model, model);
|
54
|
+
|
55
|
+
return SIZET2NUM(model->vector_dim);
|
56
|
+
}
|
57
|
+
|
58
|
+
/*
|
59
|
+
* model find the nearest distance words
|
60
|
+
* @param [String] rb_word
|
61
|
+
* @return [Hash<String, Float>]
|
62
|
+
*/
|
63
|
+
static VALUE model_distance(VALUE mod, VALUE rb_word) {
|
64
|
+
word2vec_model *model;
|
65
|
+
Data_Get_Struct(mod, word2vec_model, model);
|
66
|
+
char* word = StringValueCStr(rb_word);
|
67
|
+
|
68
|
+
WordSimilarity word_list[N];
|
69
|
+
|
70
|
+
size_t word_count = word2vec_model_distance(model, word, word_list);
|
71
|
+
|
72
|
+
VALUE rb_ret = rb_hash_new();
|
73
|
+
|
74
|
+
for (size_t i = 0 ; i < word_count ; i++) {
|
75
|
+
size_t index = word_list[i].index;
|
76
|
+
if (index >= 0) {
|
77
|
+
VALUE rb_word = rb_str_freeze(rb_utf8_str_new_cstr(&model->vocabulary[index]));
|
78
|
+
VALUE rb_score = DBL2NUM(word_list[i].score);
|
79
|
+
rb_hash_aset(rb_ret, rb_word, rb_score);
|
80
|
+
}
|
81
|
+
}
|
82
|
+
|
83
|
+
return rb_ret;
|
84
|
+
}
|
85
|
+
|
86
|
+
void Init_word2vec(void) {
|
87
|
+
VALUE mWord2vec = rb_define_module("Word2vec");
|
88
|
+
VALUE mWord2vecModel = rb_define_class_under(mWord2vec, "Model", rb_cObject);
|
89
|
+
rb_define_singleton_method(mWord2vecModel, "load", model_load, 1);
|
90
|
+
rb_define_method(mWord2vecModel, "word_count", model_word_count, 0);
|
91
|
+
rb_define_method(mWord2vecModel, "vector_dim", model_vector_dim, 0);
|
92
|
+
rb_define_method(mWord2vecModel, "distance", model_distance, 1);
|
93
|
+
}
|
data/lib/word2vec.rb
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
require "word2vec/version"
|
2
|
+
|
3
|
+
module Word2vec
|
4
|
+
class Error < StandardError; end
|
5
|
+
# Your code goes here...
|
6
|
+
|
7
|
+
class Hola
|
8
|
+
# Say hi to the world!
|
9
|
+
#
|
10
|
+
# Example:
|
11
|
+
# >> Hola.hi("spanish")
|
12
|
+
# => hola mundo
|
13
|
+
#
|
14
|
+
# Arguments:
|
15
|
+
# language: (String)
|
16
|
+
def self.hi(language = "english")
|
17
|
+
translator = Translator.new(language)
|
18
|
+
puts translator.hi
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
require "word2vec/word2vec"
|
data/word2vec-rb.gemspec
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
require_relative 'lib/word2vec/version'
|
2
|
+
|
3
|
+
Gem::Specification.new do |spec|
|
4
|
+
spec.name = "word2vec-rb"
|
5
|
+
spec.version = Word2vec::VERSION
|
6
|
+
spec.authors = ["Dani Vela"]
|
7
|
+
spec.email = ["veladan@me.com"]
|
8
|
+
|
9
|
+
spec.summary = %q{Ruby interface to use word2vec arithmetic.}
|
10
|
+
spec.description = %q{To use this gem is required the file`vectors.bin` where is stored the output of the Google algorithm called `word2vec`. This gem doesn't produce this file. Once produced, this can can load it and use it to calculate some arithmetic operations like distance between words or to calculate the relations between them.'}
|
11
|
+
spec.homepage = "https://github.com/madcato/word2vec-rb"
|
12
|
+
spec.required_ruby_version = Gem::Requirement.new(">= 2.3.0")
|
13
|
+
|
14
|
+
spec.metadata["homepage_uri"] = spec.homepage
|
15
|
+
spec.metadata["source_code_uri"] = "https://github.com/madcato/word2vec-rb"
|
16
|
+
spec.metadata["changelog_uri"] = "http://github.com/macato/word2vec-rb/CHANGELOG"
|
17
|
+
|
18
|
+
# Specify which files should be added to the gem when it is released.
|
19
|
+
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
20
|
+
spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
|
21
|
+
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
22
|
+
end
|
23
|
+
spec.bindir = "exe"
|
24
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
25
|
+
spec.require_paths = ["lib"]
|
26
|
+
spec.license = 'MIT'
|
27
|
+
|
28
|
+
spec.extensions = %w[ext/word2vec/extconf.rb]
|
29
|
+
|
30
|
+
spec.add_development_dependency "bundler", "~> 2.1.0"
|
31
|
+
spec.add_development_dependency "rake", "~> 12.0"
|
32
|
+
spec.add_development_dependency "rake-compiler", "~> 1.0"
|
33
|
+
spec.add_development_dependency "rspec", "~> 3.0"
|
34
|
+
end
|
metadata
ADDED
@@ -0,0 +1,125 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: word2vec-rb
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Dani Vela
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2021-04-26 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 2.1.0
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 2.1.0
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '12.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '12.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rake-compiler
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '1.0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '1.0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: rspec
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '3.0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '3.0'
|
69
|
+
description: To use this gem is required the file`vectors.bin` where is stored the
|
70
|
+
output of the Google algorithm called `word2vec`. This gem doesn't produce this
|
71
|
+
file. Once produced, this can can load it and use it to calculate some arithmetic
|
72
|
+
operations like distance between words or to calculate the relations between them.'
|
73
|
+
email:
|
74
|
+
- veladan@me.com
|
75
|
+
executables: []
|
76
|
+
extensions:
|
77
|
+
- ext/word2vec/extconf.rb
|
78
|
+
extra_rdoc_files: []
|
79
|
+
files:
|
80
|
+
- ".gitignore"
|
81
|
+
- ".rspec"
|
82
|
+
- ".travis.yml"
|
83
|
+
- CHANGELOG
|
84
|
+
- Gemfile
|
85
|
+
- Gemfile.lock
|
86
|
+
- README.md
|
87
|
+
- Rakefile
|
88
|
+
- bin/console
|
89
|
+
- bin/setup
|
90
|
+
- data/minimal.bin
|
91
|
+
- data/readme.md
|
92
|
+
- ext/word2vec/common.c
|
93
|
+
- ext/word2vec/common.h
|
94
|
+
- ext/word2vec/extconf.rb
|
95
|
+
- ext/word2vec/word2vec.c
|
96
|
+
- lib/word2vec.rb
|
97
|
+
- lib/word2vec/version.rb
|
98
|
+
- word2vec-rb.gemspec
|
99
|
+
homepage: https://github.com/madcato/word2vec-rb
|
100
|
+
licenses:
|
101
|
+
- MIT
|
102
|
+
metadata:
|
103
|
+
homepage_uri: https://github.com/madcato/word2vec-rb
|
104
|
+
source_code_uri: https://github.com/madcato/word2vec-rb
|
105
|
+
changelog_uri: http://github.com/macato/word2vec-rb/CHANGELOG
|
106
|
+
post_install_message:
|
107
|
+
rdoc_options: []
|
108
|
+
require_paths:
|
109
|
+
- lib
|
110
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
111
|
+
requirements:
|
112
|
+
- - ">="
|
113
|
+
- !ruby/object:Gem::Version
|
114
|
+
version: 2.3.0
|
115
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
116
|
+
requirements:
|
117
|
+
- - ">="
|
118
|
+
- !ruby/object:Gem::Version
|
119
|
+
version: '0'
|
120
|
+
requirements: []
|
121
|
+
rubygems_version: 3.1.2
|
122
|
+
signing_key:
|
123
|
+
specification_version: 4
|
124
|
+
summary: Ruby interface to use word2vec arithmetic.
|
125
|
+
test_files: []
|