word_scoop 2.0.1 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +23 -0
- data/Gemfile +4 -0
- data/README.md +66 -0
- data/Rakefile +17 -24
- data/benchmark/article.txt +43 -0
- data/benchmark/keywords.txt +56720 -0
- data/benchmark/measure.rb +37 -0
- data/ext/{extconf.rb → word_scoop/extconf.rb} +0 -0
- data/ext/{word_scoop.c → word_scoop/word_scoop.c} +36 -4
- data/ext/{word_scoop.h → word_scoop/word_scoop.h} +4 -1
- data/lib/word_scoop/version.rb +3 -0
- data/lib/word_scoop.rb +2 -3
- data/spec/spec_helper.rb +2 -0
- data/spec/unit/word_scoop_spec.rb +28 -0
- data/word_scoop.gemspec +26 -0
- metadata +99 -82
- data/README.rdoc +0 -35
@@ -0,0 +1,37 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
require 'benchmark'
|
3
|
+
$LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
|
4
|
+
require 'word_scoop'
|
5
|
+
|
6
|
+
class WordScoopBenchmark
|
7
|
+
def initialize
|
8
|
+
keywords = []
|
9
|
+
File.open(File.expand_path("../keywords.txt", __FILE__), "r") do |f|
|
10
|
+
f.each do |line|
|
11
|
+
keywords << line.strip
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
@keywords = keywords.uniq
|
16
|
+
@article = File.read(File.expand_path("../article.txt", __FILE__))
|
17
|
+
|
18
|
+
puts "keywords size\t#{@keywords.size}"
|
19
|
+
puts "article size\t#{@article.size}\n\n"
|
20
|
+
end
|
21
|
+
|
22
|
+
def run
|
23
|
+
keywords_size = @keywords.size
|
24
|
+
|
25
|
+
Benchmark.bm(7, "avg") do |x|
|
26
|
+
tree = nil
|
27
|
+
regist_ms = x.report("register") { tree = WordScoop.new(@keywords) }
|
28
|
+
searcg_ms = x.report("search") { 1000.times{ tree.search(@article) }}
|
29
|
+
|
30
|
+
regist_avg = regist_ms.real * 1000 * 1000 / keywords_size
|
31
|
+
puts "1 word regist avg\t#{"%.03f" % regist_avg} µs"
|
32
|
+
puts "search avg\t\t#{"%.03f" % searcg_ms.real} ms"
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
WordScoopBenchmark.new.run if File.basename($PROGRAM_NAME) == File.basename(__FILE__)
|
File without changes
|
@@ -10,6 +10,7 @@
|
|
10
10
|
#include <stdlib.h>
|
11
11
|
#include <string.h>
|
12
12
|
#include <ruby.h>
|
13
|
+
#include <ruby/encoding.h>
|
13
14
|
#include "word_scoop.h"
|
14
15
|
|
15
16
|
|
@@ -86,6 +87,13 @@ void destroy_node(node n)
|
|
86
87
|
free(n);
|
87
88
|
}
|
88
89
|
|
90
|
+
// add encoding info
|
91
|
+
static VALUE add_encode(VALUE str, rb_encoding *enc)
|
92
|
+
{
|
93
|
+
rb_enc_associate(str, enc);
|
94
|
+
return str;
|
95
|
+
}
|
96
|
+
|
89
97
|
//-----------------------------------------------------------
|
90
98
|
// Ruby Methods
|
91
99
|
// ----------------------------------------------------------
|
@@ -154,8 +162,10 @@ static VALUE t_search(VALUE self, VALUE str)
|
|
154
162
|
char *text;
|
155
163
|
int i, head_i, tail_i, total_len;
|
156
164
|
VALUE array;
|
165
|
+
rb_encoding *enc;
|
157
166
|
|
158
167
|
array = rb_ary_new();
|
168
|
+
enc = rb_enc_get(str);
|
159
169
|
text = StringValuePtr(str);
|
160
170
|
|
161
171
|
Data_Get_Struct(self, struct _node, root);
|
@@ -180,7 +190,12 @@ static VALUE t_search(VALUE self, VALUE str)
|
|
180
190
|
} else {
|
181
191
|
if (head_i != -1) {
|
182
192
|
if (tail_i != -1) {
|
183
|
-
rb_funcall(
|
193
|
+
rb_funcall(
|
194
|
+
array,
|
195
|
+
rb_intern("push"),
|
196
|
+
1,
|
197
|
+
add_encode(rb_str_new(&text[head_i], (tail_i - head_i + 1)), enc)
|
198
|
+
);
|
184
199
|
i = tail_i;
|
185
200
|
tail_i = -1;
|
186
201
|
} else {
|
@@ -205,8 +220,10 @@ static VALUE t_filter_hrml(VALUE self, VALUE str)
|
|
205
220
|
char *text, *inner_tag;
|
206
221
|
int i, head_i, tail_i, copy_head_i, total_len;
|
207
222
|
VALUE change_str, url_base, word;
|
223
|
+
rb_encoding *enc;
|
208
224
|
|
209
225
|
change_str = rb_str_new2(EMPTY_STRING);
|
226
|
+
enc = rb_enc_get(str);
|
210
227
|
text = StringValuePtr(str);
|
211
228
|
|
212
229
|
Data_Get_Struct(self, struct _node, root);
|
@@ -271,11 +288,21 @@ static VALUE t_filter_hrml(VALUE self, VALUE str)
|
|
271
288
|
if (head_i != -1) {
|
272
289
|
if (tail_i != -1) {
|
273
290
|
if (copy_head_i < head_i) {
|
274
|
-
rb_funcall(
|
291
|
+
rb_funcall(
|
292
|
+
change_str,
|
293
|
+
rb_intern("concat"),
|
294
|
+
1,
|
295
|
+
add_encode(rb_str_new(&text[copy_head_i], (head_i - copy_head_i)), enc)
|
296
|
+
);
|
275
297
|
}
|
276
298
|
|
277
299
|
word = rb_str_new(&text[head_i], (tail_i - head_i + 1));
|
278
|
-
rb_funcall(
|
300
|
+
rb_funcall(
|
301
|
+
change_str,
|
302
|
+
rb_intern("concat"),
|
303
|
+
1,
|
304
|
+
add_encode(rb_funcall(url_base, rb_intern("%"), 1, rb_assoc_new(word, word)), enc)
|
305
|
+
);
|
279
306
|
i = tail_i;
|
280
307
|
copy_head_i = tail_i + 1;
|
281
308
|
tail_i = -1;
|
@@ -291,7 +318,12 @@ static VALUE t_filter_hrml(VALUE self, VALUE str)
|
|
291
318
|
if (copy_head_i == 0) {
|
292
319
|
return str;
|
293
320
|
} else {
|
294
|
-
rb_funcall(
|
321
|
+
rb_funcall(
|
322
|
+
change_str,
|
323
|
+
rb_intern("concat"),
|
324
|
+
1,
|
325
|
+
add_encode(rb_str_new(&text[copy_head_i], (total_len - copy_head_i)), enc)
|
326
|
+
);
|
295
327
|
return change_str;
|
296
328
|
}
|
297
329
|
}
|
@@ -29,7 +29,7 @@ typedef char bool;
|
|
29
29
|
|
30
30
|
#define EMPTY_STRING ""
|
31
31
|
#define LINK_URL_VARIABLE "@link_url"
|
32
|
-
#define DEAULT_LINK_URL "<a href
|
32
|
+
#define DEAULT_LINK_URL "<a href='http://ja.wikipedia.org/wiki/%s'>%s</a>"
|
33
33
|
|
34
34
|
// node is 1 byte character
|
35
35
|
typedef struct _node {
|
@@ -57,6 +57,9 @@ node search_child_or_create(node, char);
|
|
57
57
|
// free memory all child and self
|
58
58
|
void destroy_node(node);
|
59
59
|
|
60
|
+
// add encoding info
|
61
|
+
static VALUE add_encode(VALUE, rb_encoding *);
|
62
|
+
|
60
63
|
//-----------------------------------------------------------
|
61
64
|
// Ruby Methods
|
62
65
|
// ----------------------------------------------------------
|
data/lib/word_scoop.rb
CHANGED
@@ -1,10 +1,9 @@
|
|
1
1
|
$:.unshift(File.dirname(__FILE__)) unless
|
2
2
|
$:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
|
3
3
|
|
4
|
-
require 'word_scoop.
|
5
|
-
class WordScoop
|
6
|
-
VERSION = '2.0.0'
|
4
|
+
require 'word_scoop/word_scoop.bundle'
|
7
5
|
|
6
|
+
class WordScoop
|
8
7
|
attr_accessor :link_url
|
9
8
|
end
|
10
9
|
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
require 'spec_helper'
|
3
|
+
|
4
|
+
describe WordScoop do
|
5
|
+
before(:each) do
|
6
|
+
keywords = %w|ninja 忍者|
|
7
|
+
@tree = WordScoop.new(keywords)
|
8
|
+
end
|
9
|
+
|
10
|
+
context "#serch" do
|
11
|
+
it "pickup keywords" do
|
12
|
+
pickup = @tree.search("I am a ninja. 私は忍者です。Are you a ninja?")
|
13
|
+
expect(pickup).to eq(%w|ninja 忍者 ninja|)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
context "#filter_html" do
|
18
|
+
it "add link to keywords" do
|
19
|
+
text = "I am a ninja. 私は忍者です。Are you a ninja?"
|
20
|
+
html = @tree.filter_html(text)
|
21
|
+
expect(html).to eq(
|
22
|
+
text.gsub(/ninja|忍者/) do |keyword|
|
23
|
+
"<a href='http://ja.wikipedia.org/wiki/#{keyword}'>#{keyword}</a>"
|
24
|
+
end
|
25
|
+
)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
data/word_scoop.gemspec
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'word_scoop/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "word_scoop"
|
8
|
+
spec.version = WordScoop::VERSION
|
9
|
+
spec.authors = ["Tsukasa OISHI"]
|
10
|
+
spec.email = ["tsukasa.oishi@gmail.com"]
|
11
|
+
spec.summary = %q{WordScoop will pick up keywords that have been pre-registered from the text.}
|
12
|
+
spec.description = %q{WordScoop will pick up keywords that have been pre-registered from the text.}
|
13
|
+
spec.homepage = "https://github.com/tsukasaoishi/word_scoop"
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files -z`.split("\x0")
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib", "ext"]
|
20
|
+
spec.extensions = ["ext/word_scoop/extconf.rb"]
|
21
|
+
|
22
|
+
spec.add_development_dependency "bundler", "~> 1.6"
|
23
|
+
spec.add_development_dependency "rake", '~> 10.0'
|
24
|
+
spec.add_development_dependency "rspec", '~> 2.14'
|
25
|
+
spec.add_development_dependency "rake-compiler", '~> 0.9'
|
26
|
+
end
|
metadata
CHANGED
@@ -1,105 +1,122 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: word_scoop
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
5
|
-
segments:
|
6
|
-
- 2
|
7
|
-
- 0
|
8
|
-
- 1
|
9
|
-
version: 2.0.1
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 2.1.0
|
10
5
|
platform: ruby
|
11
|
-
authors:
|
6
|
+
authors:
|
12
7
|
- Tsukasa OISHI
|
13
8
|
autorequire:
|
14
9
|
bindir: bin
|
15
10
|
cert_chain: []
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
11
|
+
date: 2014-06-20 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.6'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.6'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '10.0'
|
34
|
+
type: :development
|
22
35
|
prerelease: false
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rspec
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '2.14'
|
33
48
|
type: :development
|
34
|
-
version_requirements: *id001
|
35
|
-
- !ruby/object:Gem::Dependency
|
36
|
-
name: hoe
|
37
49
|
prerelease: false
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '2.14'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: rake-compiler
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0.9'
|
48
62
|
type: :development
|
49
|
-
|
50
|
-
|
51
|
-
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0.9'
|
69
|
+
description: WordScoop will pick up keywords that have been pre-registered from the
|
70
|
+
text.
|
71
|
+
email:
|
52
72
|
- tsukasa.oishi@gmail.com
|
53
73
|
executables: []
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
-
|
59
|
-
-
|
60
|
-
- README.rdoc
|
61
|
-
files:
|
74
|
+
extensions:
|
75
|
+
- ext/word_scoop/extconf.rb
|
76
|
+
extra_rdoc_files: []
|
77
|
+
files:
|
78
|
+
- ".gitignore"
|
79
|
+
- Gemfile
|
62
80
|
- History.txt
|
63
81
|
- Manifest.txt
|
64
|
-
- README.
|
82
|
+
- README.md
|
65
83
|
- Rakefile
|
84
|
+
- benchmark/article.txt
|
85
|
+
- benchmark/keywords.txt
|
86
|
+
- benchmark/measure.rb
|
87
|
+
- ext/word_scoop/extconf.rb
|
88
|
+
- ext/word_scoop/word_scoop.c
|
89
|
+
- ext/word_scoop/word_scoop.h
|
66
90
|
- lib/word_scoop.rb
|
67
|
-
-
|
68
|
-
-
|
69
|
-
-
|
70
|
-
|
71
|
-
homepage:
|
72
|
-
licenses:
|
73
|
-
|
91
|
+
- lib/word_scoop/version.rb
|
92
|
+
- spec/spec_helper.rb
|
93
|
+
- spec/unit/word_scoop_spec.rb
|
94
|
+
- word_scoop.gemspec
|
95
|
+
homepage: https://github.com/tsukasaoishi/word_scoop
|
96
|
+
licenses:
|
97
|
+
- MIT
|
98
|
+
metadata: {}
|
74
99
|
post_install_message:
|
75
|
-
rdoc_options:
|
76
|
-
|
77
|
-
- README.rdoc
|
78
|
-
require_paths:
|
100
|
+
rdoc_options: []
|
101
|
+
require_paths:
|
79
102
|
- lib
|
80
103
|
- ext
|
81
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
82
|
-
|
83
|
-
requirements:
|
104
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
105
|
+
requirements:
|
84
106
|
- - ">="
|
85
|
-
- !ruby/object:Gem::Version
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
90
|
-
none: false
|
91
|
-
requirements:
|
107
|
+
- !ruby/object:Gem::Version
|
108
|
+
version: '0'
|
109
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
110
|
+
requirements:
|
92
111
|
- - ">="
|
93
|
-
- !ruby/object:Gem::Version
|
94
|
-
|
95
|
-
- 0
|
96
|
-
version: "0"
|
112
|
+
- !ruby/object:Gem::Version
|
113
|
+
version: '0'
|
97
114
|
requirements: []
|
98
|
-
|
99
|
-
|
100
|
-
rubygems_version: 1.3.7
|
115
|
+
rubyforge_project:
|
116
|
+
rubygems_version: 2.2.2
|
101
117
|
signing_key:
|
102
|
-
specification_version:
|
103
|
-
summary: WordScoop
|
104
|
-
test_files:
|
105
|
-
|
118
|
+
specification_version: 4
|
119
|
+
summary: WordScoop will pick up keywords that have been pre-registered from the text.
|
120
|
+
test_files:
|
121
|
+
- spec/spec_helper.rb
|
122
|
+
- spec/unit/word_scoop_spec.rb
|
data/README.rdoc
DELETED
@@ -1,35 +0,0 @@
|
|
1
|
-
= WordScoop
|
2
|
-
|
3
|
-
= Description
|
4
|
-
WordScoop is a library that searching keyword in text.
|
5
|
-
|
6
|
-
= How to
|
7
|
-
|
8
|
-
== Register keywords
|
9
|
-
keywords = WordScoop.new(["Ruby", "Rails"])
|
10
|
-
|
11
|
-
== Add keyword
|
12
|
-
keywords << "Tsukasa"
|
13
|
-
|
14
|
-
== Keyword in the text is picked up
|
15
|
-
keywords.search("I Love Ruby") #=> ["Ruby"]
|
16
|
-
|
17
|
-
== HTML text support
|
18
|
-
|
19
|
-
=== URL is registered
|
20
|
-
|
21
|
-
keyword.link_url = %Q|<a href="http://ja.wikipedia.org/wiki/%s">%s</a>|
|
22
|
-
(Default is %Q|http://www.kaeruspoon.net/keywords/%s>%s</a>|)
|
23
|
-
|
24
|
-
=== Keyword in the text is enclosed with HTML 'a' tag
|
25
|
-
|
26
|
-
keywords.filter_html("I Love Ruby") #=> %Q|I Love <a href="http://ja.wikipedia.org/wiki/Ruby">Ruby</a>|
|
27
|
-
|
28
|
-
|
29
|
-
== INSTALL:
|
30
|
-
|
31
|
-
sudo gem install word_scoop
|
32
|
-
|
33
|
-
== LICENSE:
|
34
|
-
|
35
|
-
WordScoop is released under the MIT license.
|