text_rank 1.2.5 → 1.2.9
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +4 -0
- data/.travis.yml +1 -0
- data/Rakefile +5 -0
- data/ext/text_rank/extconf.rb +3 -0
- data/ext/text_rank/page_rank_sparse_native.c +296 -0
- data/ext/text_rank/page_rank_sparse_native.h +93 -0
- data/ext/text_rank/text_rank.c +5 -0
- data/lib/page_rank.rb +5 -4
- data/lib/page_rank/base.rb +3 -1
- data/lib/page_rank/dense.rb +1 -1
- data/lib/page_rank/sparse.rb +1 -1
- data/lib/page_rank/sparse_native.rb +21 -0
- data/lib/text_rank.rb +2 -0
- data/lib/text_rank/version.rb +1 -1
- data/text_rank.gemspec +2 -0
- metadata +23 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ea53630f7c00d0731b2a190c8c8775b7f433557611b85b17eae8c0989bfac108
|
4
|
+
data.tar.gz: 184389405a3ddbf216290f2a9bc806c5c70484dea4b677a89f089bfb0054f161
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4cce8fa0323a59e52bb1b0f220f93c8dfdb68b795f7027e1f871c563d331083dbe4803b3176b14f15d2de5dc7c9d533af8104e125149ff05af397b88f02df6bb
|
7
|
+
data.tar.gz: 07efa700bbc61c56ca440a0d241a576a531f4caa4f26c48b7e8a938f19cdadf5d59d1d6bcf46d3cfc43ca4c5a4ada865ef2dbb4e6d376abefed283c6a410c719
|
data/.gitignore
CHANGED
data/.travis.yml
CHANGED
@@ -9,6 +9,7 @@ before_script:
|
|
9
9
|
- curl -L https://codeclimate.com/downloads/test-reporter/test-reporter-latest-linux-amd64 > ./cc-test-reporter
|
10
10
|
- chmod +x ./cc-test-reporter
|
11
11
|
- ./cc-test-reporter before-build
|
12
|
+
- bundle exec rake compile
|
12
13
|
script:
|
13
14
|
- bundle exec rspec
|
14
15
|
after_script:
|
data/Rakefile
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
require "bundler/gem_tasks"
|
2
|
+
require "rake/extensiontask"
|
2
3
|
require "rspec/core/rake_task"
|
3
4
|
|
4
5
|
RSpec::Core::RakeTask.new(:spec)
|
@@ -10,3 +11,7 @@ RDoc::Task.new do |rdoc|
|
|
10
11
|
rdoc.main = "README.md"
|
11
12
|
rdoc.rdoc_files.include("README.md", "lib/**/*.rb")
|
12
13
|
end
|
14
|
+
|
15
|
+
Rake::ExtensionTask.new('text_rank') do |ext|
|
16
|
+
ext.lib_dir = 'lib/text_rank'
|
17
|
+
end
|
@@ -0,0 +1,296 @@
|
|
1
|
+
#include <ruby.h>
|
2
|
+
#include <math.h>
|
3
|
+
#include <page_rank_sparse_native.h>
|
4
|
+
|
5
|
+
const size_t NODE_LIST_SIZE = sizeof(NodeListStruct);
|
6
|
+
const size_t EDGE_LIST_SIZE = sizeof(EdgeListStruct);
|
7
|
+
const size_t NODE_SIZE = sizeof(NodeStruct);
|
8
|
+
const size_t EDGE_SIZE = sizeof(EdgeStruct);
|
9
|
+
const size_t GRAPH_SIZE = sizeof(GraphStruct);
|
10
|
+
|
11
|
+
static const rb_data_type_t graph_typed_data = {
|
12
|
+
"PageRank/SparseNative/Graph",
|
13
|
+
{ 0, free_graph, },
|
14
|
+
0, 0,
|
15
|
+
RUBY_TYPED_FREE_IMMEDIATELY,
|
16
|
+
};
|
17
|
+
|
18
|
+
|
19
|
+
//////////////////////////////////////////////////////////////////////////////////////
|
20
|
+
|
21
|
+
void Init_sparse_native() {
|
22
|
+
VALUE PageRankModule, SparseNativeClass;
|
23
|
+
|
24
|
+
PageRankModule = rb_const_get(rb_cObject, rb_intern("PageRank"));
|
25
|
+
SparseNativeClass = rb_const_get(PageRankModule, rb_intern("SparseNative"));
|
26
|
+
|
27
|
+
rb_define_alloc_func(SparseNativeClass, sparse_native_allocate);
|
28
|
+
rb_define_private_method(SparseNativeClass, "_add_edge", sparse_native_add_edge, 3);
|
29
|
+
rb_define_private_method(SparseNativeClass, "_calculate", sparse_native_calculate, 3);
|
30
|
+
}
|
31
|
+
|
32
|
+
VALUE sparse_native_allocate(VALUE self) {
|
33
|
+
Graph g = malloc(GRAPH_SIZE);
|
34
|
+
//st_table *tmp, *node_lookup;
|
35
|
+
|
36
|
+
const struct st_hash_type *objhash = rb_hash_tbl(rb_hash_new())->type;
|
37
|
+
|
38
|
+
g->node_count = 0;
|
39
|
+
g->nodes = NULL;
|
40
|
+
g->dangling_nodes = NULL;
|
41
|
+
g->node_lookup = st_init_table_with_size(objhash, 0);
|
42
|
+
|
43
|
+
return TypedData_Wrap_Struct(self, &graph_typed_data, g);
|
44
|
+
}
|
45
|
+
|
46
|
+
VALUE sparse_native_add_edge(VALUE self, VALUE source, VALUE dest, VALUE weight) {
|
47
|
+
Graph g;
|
48
|
+
|
49
|
+
TypedData_Get_Struct(self, GraphStruct, &graph_typed_data, g);
|
50
|
+
add_edge_with_labels(g, source, dest, NUM2DBL(weight));
|
51
|
+
return Qnil;
|
52
|
+
}
|
53
|
+
|
54
|
+
VALUE sparse_native_calculate(VALUE self, VALUE max_iterations, VALUE damping, VALUE tolerance) {
|
55
|
+
Graph g;
|
56
|
+
VALUE ranks;
|
57
|
+
|
58
|
+
TypedData_Get_Struct(self, GraphStruct, &graph_typed_data, g);
|
59
|
+
calculate(g, FIX2INT(max_iterations), NUM2DBL(damping), NUM2DBL(tolerance));
|
60
|
+
|
61
|
+
ranks = rb_hash_new();
|
62
|
+
sort_and_normalize_ranks(g, rb_hash_dset, ranks);
|
63
|
+
return ranks;
|
64
|
+
}
|
65
|
+
|
66
|
+
void rb_hash_dset(VALUE hash, VALUE key, double value) {
|
67
|
+
rb_hash_aset(hash, key, DBL2NUM(value));
|
68
|
+
}
|
69
|
+
|
70
|
+
//////////////////////////////////////////////////////////////////////////////////////
|
71
|
+
|
72
|
+
void free_graph(void *data) {
|
73
|
+
Graph g = (Graph)data;
|
74
|
+
free_node_list(g->nodes, free_node);
|
75
|
+
free_node_list(g->dangling_nodes, NULL);
|
76
|
+
free(g->node_lookup);
|
77
|
+
free(g);
|
78
|
+
}
|
79
|
+
|
80
|
+
void free_node(Node n) {
|
81
|
+
free_edge_list(n->source_edges, free_edge);
|
82
|
+
free(n);
|
83
|
+
}
|
84
|
+
|
85
|
+
void free_node_list(NodeList nodes, void (*free_item)(Node)) {
|
86
|
+
NodeList tmp;
|
87
|
+
while (nodes != NULL) {
|
88
|
+
tmp = nodes;
|
89
|
+
nodes = nodes->next;
|
90
|
+
if (free_item) {
|
91
|
+
free_item(tmp->node);
|
92
|
+
}
|
93
|
+
free(tmp);
|
94
|
+
}
|
95
|
+
}
|
96
|
+
|
97
|
+
void free_edge(Edge e) {
|
98
|
+
// Assume source node was allocated elsewhere and will be free'd elsewhere
|
99
|
+
free(e);
|
100
|
+
}
|
101
|
+
|
102
|
+
void free_edge_list(EdgeList edges, void (*free_item)(Edge)) {
|
103
|
+
EdgeList tmp;
|
104
|
+
while (edges != NULL) {
|
105
|
+
tmp = edges;
|
106
|
+
edges = edges->next;
|
107
|
+
if (free_item) {
|
108
|
+
free_item(tmp->edge);
|
109
|
+
}
|
110
|
+
free(tmp);
|
111
|
+
}
|
112
|
+
}
|
113
|
+
|
114
|
+
//////////////////////////////////////////////////////////////////////////////////////
|
115
|
+
|
116
|
+
Node add_node(Graph g, VALUE label) {
|
117
|
+
NodeList tmp = malloc(NODE_LIST_SIZE);
|
118
|
+
|
119
|
+
tmp->node = malloc(NODE_SIZE);
|
120
|
+
tmp->node->label = label;
|
121
|
+
tmp->node->source_edges = NULL;
|
122
|
+
tmp->node->rank = 0.0;
|
123
|
+
tmp->node->prev_rank = 0.0;
|
124
|
+
tmp->node->outbound_weight_total = 0.0;
|
125
|
+
|
126
|
+
tmp->next = g->nodes;
|
127
|
+
g->nodes = tmp;
|
128
|
+
g->node_count += 1;
|
129
|
+
|
130
|
+
return tmp->node;
|
131
|
+
}
|
132
|
+
|
133
|
+
Node add_dangling_node(Graph g, Node n) {
|
134
|
+
NodeList tmp = malloc(NODE_LIST_SIZE);
|
135
|
+
|
136
|
+
tmp->node = n;
|
137
|
+
tmp->next = g->dangling_nodes;
|
138
|
+
g->dangling_nodes = tmp;
|
139
|
+
|
140
|
+
return n;
|
141
|
+
}
|
142
|
+
|
143
|
+
Edge add_edge(Node source, Node destination, double weight) {
|
144
|
+
EdgeList tmp = malloc(EDGE_LIST_SIZE);
|
145
|
+
|
146
|
+
tmp->edge = malloc(EDGE_SIZE);
|
147
|
+
tmp->edge->source = source;
|
148
|
+
tmp->edge->weight = weight;
|
149
|
+
|
150
|
+
tmp->next = destination->source_edges;
|
151
|
+
destination->source_edges = tmp;
|
152
|
+
source->outbound_weight_total += weight;
|
153
|
+
|
154
|
+
return tmp->edge;
|
155
|
+
}
|
156
|
+
|
157
|
+
Edge add_edge_with_labels(Graph g, VALUE source_label, VALUE dest_label, double weight) {
|
158
|
+
Node source, dest;
|
159
|
+
|
160
|
+
source = lookup_node(g, source_label);
|
161
|
+
dest = lookup_node(g, dest_label);
|
162
|
+
|
163
|
+
return add_edge(source, dest, weight);
|
164
|
+
}
|
165
|
+
|
166
|
+
Node lookup_node(Graph g, VALUE label) {
|
167
|
+
Node n;
|
168
|
+
|
169
|
+
if (!st_lookup(g->node_lookup, (st_data_t)label, (st_data_t *)&n)) {
|
170
|
+
n = add_node(g, label);
|
171
|
+
st_add_direct(g->node_lookup, (st_data_t)label, (st_data_t)n);
|
172
|
+
}
|
173
|
+
return n;
|
174
|
+
}
|
175
|
+
|
176
|
+
//////////////////////////////////////////////////////////////////////////////////////
|
177
|
+
|
178
|
+
void calculate_start(Graph g) {
|
179
|
+
NodeList nodes;
|
180
|
+
Node source, destination;
|
181
|
+
EdgeList edges;
|
182
|
+
Edge e;
|
183
|
+
|
184
|
+
for (nodes = g->nodes; nodes != NULL; nodes = nodes->next) {
|
185
|
+
destination = nodes->node;
|
186
|
+
|
187
|
+
// If there is no outband, this is a "dangling" node
|
188
|
+
if (destination->outbound_weight_total == 0.0) {
|
189
|
+
add_dangling_node(g, destination);
|
190
|
+
}
|
191
|
+
|
192
|
+
// Normalize all source edge weights
|
193
|
+
for (edges = destination->source_edges; edges != NULL; edges = edges->next) {
|
194
|
+
e = edges->edge;
|
195
|
+
source = e->source;
|
196
|
+
e->weight = e->weight / source->outbound_weight_total;
|
197
|
+
}
|
198
|
+
|
199
|
+
// Set the initial rank
|
200
|
+
destination->prev_rank = 0;
|
201
|
+
destination->rank = 1.0 / g->node_count;
|
202
|
+
}
|
203
|
+
}
|
204
|
+
|
205
|
+
void calculate_step(Graph g, double damping) {
|
206
|
+
NodeList nodes, dangling_nodes;
|
207
|
+
Node source, destination;
|
208
|
+
EdgeList edges;
|
209
|
+
Edge e;
|
210
|
+
double sum;
|
211
|
+
|
212
|
+
// Set prev rank to rank for all nodes
|
213
|
+
for (nodes = g->nodes; nodes != NULL; nodes = nodes->next) {
|
214
|
+
destination = nodes->node;
|
215
|
+
destination->prev_rank = destination->rank;
|
216
|
+
}
|
217
|
+
|
218
|
+
// Re-destribute the rankings according to weight
|
219
|
+
for (nodes = g->nodes; nodes != NULL; nodes = nodes->next) {
|
220
|
+
destination = nodes->node;
|
221
|
+
sum = 0.0;
|
222
|
+
for (edges = destination->source_edges; edges != NULL; edges = edges->next) {
|
223
|
+
e = edges->edge;
|
224
|
+
source = e->source;
|
225
|
+
sum += source->prev_rank * e->weight;
|
226
|
+
}
|
227
|
+
for (dangling_nodes = g->dangling_nodes; dangling_nodes != NULL; dangling_nodes = dangling_nodes->next) {
|
228
|
+
source = dangling_nodes->node;
|
229
|
+
sum += source->prev_rank / g->node_count;
|
230
|
+
}
|
231
|
+
destination->rank = damping * sum + (1 - damping) / g->node_count;
|
232
|
+
}
|
233
|
+
}
|
234
|
+
|
235
|
+
// Calculate the Euclidean distance from prev_rank to rank across all nodes
|
236
|
+
double prev_distance(Graph g) {
|
237
|
+
NodeList nodes;
|
238
|
+
Node n;
|
239
|
+
double rank_diff, sum_squares = 0.0;
|
240
|
+
|
241
|
+
for (nodes = g->nodes; nodes != NULL; nodes = nodes->next) {
|
242
|
+
n = nodes->node;
|
243
|
+
rank_diff = n->prev_rank - n->rank;
|
244
|
+
sum_squares += rank_diff * rank_diff;
|
245
|
+
}
|
246
|
+
|
247
|
+
return sqrt(sum_squares);
|
248
|
+
}
|
249
|
+
|
250
|
+
void calculate(Graph g, int max_iterations, double damping, double tolerance) {
|
251
|
+
calculate_start(g);
|
252
|
+
|
253
|
+
while (max_iterations != 0) { // If negative one, allow to go without limit
|
254
|
+
calculate_step(g, damping);
|
255
|
+
if (prev_distance(g) < tolerance) {
|
256
|
+
break;
|
257
|
+
}
|
258
|
+
max_iterations--;
|
259
|
+
}
|
260
|
+
}
|
261
|
+
|
262
|
+
int node_compare(const void *v1, const void *v2) {
|
263
|
+
double rank1, rank2, cmp;
|
264
|
+
|
265
|
+
rank1 = (*(Node *)v1)->rank;
|
266
|
+
rank2 = (*(Node *)v2)->rank;
|
267
|
+
cmp = rank2 - rank1; // Decreasing order
|
268
|
+
if (cmp < 0) return -1;
|
269
|
+
if (cmp > 0) return 1;
|
270
|
+
return 0;
|
271
|
+
}
|
272
|
+
|
273
|
+
void sort_and_normalize_ranks(Graph g, void (*callback)(VALUE, VALUE, double), VALUE callback_arg) {
|
274
|
+
NodeList nodes;
|
275
|
+
Node n;
|
276
|
+
double sum = 0.0;
|
277
|
+
unsigned long i;
|
278
|
+
Node *tmp;
|
279
|
+
|
280
|
+
i = g->node_count;
|
281
|
+
tmp = malloc(g->node_count * sizeof(Node));
|
282
|
+
for (nodes = g->nodes; nodes != NULL; nodes = nodes->next) {
|
283
|
+
n = nodes->node;
|
284
|
+
tmp[--i] = n;
|
285
|
+
sum += n->rank;
|
286
|
+
}
|
287
|
+
|
288
|
+
qsort(tmp, g->node_count, sizeof(Node), node_compare);
|
289
|
+
|
290
|
+
for (i = 0; i < g->node_count; i++) {
|
291
|
+
n = tmp[i];
|
292
|
+
callback(callback_arg, n->label, n->rank / sum);
|
293
|
+
}
|
294
|
+
|
295
|
+
free(tmp);
|
296
|
+
}
|
@@ -0,0 +1,93 @@
|
|
1
|
+
#ifndef PAGE_RANK_SPARSE_NATIVE_H
|
2
|
+
#define PAGE_RANK_SPARSE_NATIVE_H
|
3
|
+
|
4
|
+
#include <ruby.h>
|
5
|
+
|
6
|
+
struct NodeListStruct;
|
7
|
+
typedef struct NodeListStruct* NodeList;
|
8
|
+
|
9
|
+
typedef struct NodeListStruct {
|
10
|
+
struct NodeStruct *node;
|
11
|
+
struct NodeListStruct *next;
|
12
|
+
} NodeListStruct;
|
13
|
+
|
14
|
+
//////////////////////////////////////////////////////////////////////////////////////
|
15
|
+
|
16
|
+
struct EdgeListStruct;
|
17
|
+
typedef struct EdgeListStruct* EdgeList;
|
18
|
+
|
19
|
+
typedef struct EdgeListStruct {
|
20
|
+
struct EdgeStruct *edge;
|
21
|
+
struct EdgeListStruct *next;
|
22
|
+
} EdgeListStruct;
|
23
|
+
|
24
|
+
//////////////////////////////////////////////////////////////////////////////////////
|
25
|
+
|
26
|
+
struct NodeStruct;
|
27
|
+
typedef struct NodeStruct* Node;
|
28
|
+
|
29
|
+
typedef struct NodeStruct {
|
30
|
+
EdgeList source_edges;
|
31
|
+
VALUE label;
|
32
|
+
double prev_rank;
|
33
|
+
double rank;
|
34
|
+
double outbound_weight_total;
|
35
|
+
} NodeStruct;
|
36
|
+
|
37
|
+
//////////////////////////////////////////////////////////////////////////////////////
|
38
|
+
|
39
|
+
struct EdgeStruct;
|
40
|
+
typedef struct EdgeStruct* Edge;
|
41
|
+
|
42
|
+
typedef struct EdgeStruct {
|
43
|
+
Node source;
|
44
|
+
double weight;
|
45
|
+
} EdgeStruct;
|
46
|
+
|
47
|
+
//////////////////////////////////////////////////////////////////////////////////////
|
48
|
+
|
49
|
+
struct GraphStruct;
|
50
|
+
typedef struct GraphStruct* Graph;
|
51
|
+
|
52
|
+
typedef struct GraphStruct {
|
53
|
+
unsigned long node_count;
|
54
|
+
NodeList nodes;
|
55
|
+
NodeList dangling_nodes;
|
56
|
+
st_table *node_lookup;
|
57
|
+
} GraphStruct;
|
58
|
+
|
59
|
+
//////////////////////////////////////////////////////////////////////////////////////
|
60
|
+
|
61
|
+
void free_graph(void *data);
|
62
|
+
void free_node(Node n);
|
63
|
+
void free_node_list(NodeList nodes, void (*free_item)(Node));
|
64
|
+
void free_edge(Edge e);
|
65
|
+
void free_edge_list(EdgeList edges, void (*free_item)(Edge));
|
66
|
+
|
67
|
+
//////////////////////////////////////////////////////////////////////////////////////
|
68
|
+
|
69
|
+
Node add_node(Graph g, VALUE label);
|
70
|
+
Node add_dangling_node(Graph g, Node n);
|
71
|
+
Edge add_edge(Node source, Node destination, double weight);
|
72
|
+
Edge add_edge_with_labels(Graph g, VALUE source_label, VALUE dest_label, double weight);
|
73
|
+
Node lookup_node(Graph g, VALUE label);
|
74
|
+
|
75
|
+
//////////////////////////////////////////////////////////////////////////////////////
|
76
|
+
|
77
|
+
void calculate_start(Graph g);
|
78
|
+
void calculate_step(Graph g, double damping);
|
79
|
+
double prev_distance(Graph g);
|
80
|
+
void calculate(Graph g, int max_iterations, double damping, double tolerance);
|
81
|
+
int node_compare(const void *v1, const void *v2);
|
82
|
+
void sort_and_normalize_ranks(Graph g, void (*callback)(VALUE, VALUE, double), VALUE callback_arg);
|
83
|
+
|
84
|
+
//////////////////////////////////////////////////////////////////////////////////////
|
85
|
+
|
86
|
+
void Init_sparse_native();
|
87
|
+
VALUE sparse_native_allocate(VALUE self);
|
88
|
+
VALUE sparse_native_add_edge(VALUE self, VALUE source, VALUE dest, VALUE weight);
|
89
|
+
VALUE sparse_native_calculate(VALUE self, VALUE max_iterations, VALUE damping, VALUE tolerance);
|
90
|
+
VALUE sorted_and_normalized_ranks(Graph g);
|
91
|
+
void rb_hash_dset(VALUE hash, VALUE key, double value);
|
92
|
+
|
93
|
+
#endif
|
data/lib/page_rank.rb
CHANGED
@@ -17,16 +17,17 @@ require 'set'
|
|
17
17
|
##
|
18
18
|
module PageRank
|
19
19
|
|
20
|
-
autoload :Base,
|
21
|
-
autoload :Dense,
|
22
|
-
autoload :Sparse,
|
20
|
+
autoload :Base, 'page_rank/base'
|
21
|
+
autoload :Dense, 'page_rank/dense'
|
22
|
+
autoload :Sparse, 'page_rank/sparse'
|
23
|
+
autoload :SparseNative, 'page_rank/sparse_native'
|
23
24
|
|
24
25
|
# @option options [Symbol] :strategy PageRank strategy to use (either :sparse or :dense)
|
25
26
|
# @option options [Float] :damping The probability of following the graph vs. randomly choosing a new node
|
26
27
|
# @option options [Float] :tolerance The desired accuracy of the results
|
27
28
|
# @return [PageRank::Base]
|
28
29
|
def self.new(strategy: :sparse, **options)
|
29
|
-
const_get(strategy.to_s.capitalize).new(**options)
|
30
|
+
const_get(strategy.to_s.split('_').map(&:capitalize).join).new(**options)
|
30
31
|
end
|
31
32
|
|
32
33
|
# Convenience method to quickly calculate PageRank. In the calling block, graph edges can be added.
|
data/lib/page_rank/base.rb
CHANGED
@@ -7,6 +7,8 @@ module PageRank
|
|
7
7
|
##
|
8
8
|
class Base
|
9
9
|
|
10
|
+
attr_reader :damping, :tolerance
|
11
|
+
|
10
12
|
# @param (see #damping=)
|
11
13
|
# @param (see #tolerance=)
|
12
14
|
def initialize(damping: nil, tolerance: nil, **_)
|
@@ -48,7 +50,7 @@ module PageRank
|
|
48
50
|
|
49
51
|
prev_ranks = ranks
|
50
52
|
ranks = calculate_step(ranks)
|
51
|
-
break if distance(ranks, prev_ranks) <
|
53
|
+
break if distance(ranks, prev_ranks) < tolerance
|
52
54
|
|
53
55
|
max_iterations -= 1
|
54
56
|
end
|
data/lib/page_rank/dense.rb
CHANGED
@@ -79,7 +79,7 @@ module PageRank
|
|
79
79
|
total = total_out_weights[source_idx]
|
80
80
|
if total
|
81
81
|
w = @out_links[source_idx][dest_idx] || 0.0
|
82
|
-
|
82
|
+
damping * w / total + (1 - damping) / node_count.to_f
|
83
83
|
else
|
84
84
|
1.0 / node_count.to_f
|
85
85
|
end
|
data/lib/page_rank/sparse.rb
CHANGED
@@ -0,0 +1,21 @@
|
|
1
|
+
module PageRank
|
2
|
+
class SparseNative < Base
|
3
|
+
|
4
|
+
#require 'page_rank/sparse_native.so'
|
5
|
+
|
6
|
+
# @param (see Base#add)
|
7
|
+
# @param weight [Float] Optional weight for the graph edge
|
8
|
+
# @return (see Base#add)
|
9
|
+
def add(source, dest, weight: 1.0)
|
10
|
+
_add_edge(source, dest, weight) unless source == dest
|
11
|
+
end
|
12
|
+
|
13
|
+
# Perform the PageRank calculation
|
14
|
+
# @param max_iterations [Fixnum] Maximum number of PageRank iterations to perform (or -1 for no max)
|
15
|
+
# @return [Hash<Object, Float>] of nodes with rank
|
16
|
+
def calculate(max_iterations: -1, **_)
|
17
|
+
_calculate(max_iterations, damping, tolerance)
|
18
|
+
end
|
19
|
+
|
20
|
+
end
|
21
|
+
end
|
data/lib/text_rank.rb
CHANGED
data/lib/text_rank/version.rb
CHANGED
data/text_rank.gemspec
CHANGED
@@ -16,10 +16,12 @@ Gem::Specification.new do |spec|
|
|
16
16
|
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
17
17
|
spec.bindir = 'exe'
|
18
18
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
19
|
+
spec.extensions = ['ext/text_rank/extconf.rb']
|
19
20
|
spec.require_paths = ['lib']
|
20
21
|
|
21
22
|
spec.add_development_dependency 'bundler'
|
22
23
|
spec.add_development_dependency 'rake'
|
24
|
+
spec.add_development_dependency 'rake-compiler'
|
23
25
|
spec.add_development_dependency 'rspec'
|
24
26
|
spec.add_development_dependency 'rubocop'
|
25
27
|
spec.add_development_dependency 'simplecov', '~> 0.17.0' # 0.18 not supported by code climate
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: text_rank
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.2.
|
4
|
+
version: 1.2.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- David McCullars
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-02-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -38,6 +38,20 @@ dependencies:
|
|
38
38
|
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rake-compiler
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
41
55
|
- !ruby/object:Gem::Dependency
|
42
56
|
name: rspec
|
43
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -127,7 +141,8 @@ description: Implementation of TextRank solution to ranked keyword extraction.
|
|
127
141
|
email:
|
128
142
|
- david.mccullars@gmail.com
|
129
143
|
executables: []
|
130
|
-
extensions:
|
144
|
+
extensions:
|
145
|
+
- ext/text_rank/extconf.rb
|
131
146
|
extra_rdoc_files: []
|
132
147
|
files:
|
133
148
|
- ".codeclimate.yml"
|
@@ -143,10 +158,15 @@ files:
|
|
143
158
|
- Rakefile
|
144
159
|
- bin/console
|
145
160
|
- bin/setup
|
161
|
+
- ext/text_rank/extconf.rb
|
162
|
+
- ext/text_rank/page_rank_sparse_native.c
|
163
|
+
- ext/text_rank/page_rank_sparse_native.h
|
164
|
+
- ext/text_rank/text_rank.c
|
146
165
|
- lib/page_rank.rb
|
147
166
|
- lib/page_rank/base.rb
|
148
167
|
- lib/page_rank/dense.rb
|
149
168
|
- lib/page_rank/sparse.rb
|
169
|
+
- lib/page_rank/sparse_native.rb
|
150
170
|
- lib/text_rank.rb
|
151
171
|
- lib/text_rank/char_filter.rb
|
152
172
|
- lib/text_rank/char_filter/ascii_folding.rb
|