word_scoop 2.0.1 → 2.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,37 @@
1
+ # coding: utf-8
2
+ require 'benchmark'
3
+ $LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
4
+ require 'word_scoop'
5
+
6
+ class WordScoopBenchmark
7
+ def initialize
8
+ keywords = []
9
+ File.open(File.expand_path("../keywords.txt", __FILE__), "r") do |f|
10
+ f.each do |line|
11
+ keywords << line.strip
12
+ end
13
+ end
14
+
15
+ @keywords = keywords.uniq
16
+ @article = File.read(File.expand_path("../article.txt", __FILE__))
17
+
18
+ puts "keywords size\t#{@keywords.size}"
19
+ puts "article size\t#{@article.size}\n\n"
20
+ end
21
+
22
+ def run
23
+ keywords_size = @keywords.size
24
+
25
+ Benchmark.bm(7, "avg") do |x|
26
+ tree = nil
27
+ regist_ms = x.report("register") { tree = WordScoop.new(@keywords) }
28
+ searcg_ms = x.report("search") { 1000.times{ tree.search(@article) }}
29
+
30
+ regist_avg = regist_ms.real * 1000 * 1000 / keywords_size
31
+ puts "1 word regist avg\t#{"%.03f" % regist_avg} µs"
32
+ puts "search avg\t\t#{"%.03f" % searcg_ms.real} ms"
33
+ end
34
+ end
35
+ end
36
+
37
+ WordScoopBenchmark.new.run if File.basename($PROGRAM_NAME) == File.basename(__FILE__)
File without changes
@@ -10,6 +10,7 @@
10
10
  #include <stdlib.h>
11
11
  #include <string.h>
12
12
  #include <ruby.h>
13
+ #include <ruby/encoding.h>
13
14
  #include "word_scoop.h"
14
15
 
15
16
 
@@ -86,6 +87,13 @@ void destroy_node(node n)
86
87
  free(n);
87
88
  }
88
89
 
90
+ // add encoding info
91
+ static VALUE add_encode(VALUE str, rb_encoding *enc)
92
+ {
93
+ rb_enc_associate(str, enc);
94
+ return str;
95
+ }
96
+
89
97
  //-----------------------------------------------------------
90
98
  // Ruby Methods
91
99
  // ----------------------------------------------------------
@@ -154,8 +162,10 @@ static VALUE t_search(VALUE self, VALUE str)
154
162
  char *text;
155
163
  int i, head_i, tail_i, total_len;
156
164
  VALUE array;
165
+ rb_encoding *enc;
157
166
 
158
167
  array = rb_ary_new();
168
+ enc = rb_enc_get(str);
159
169
  text = StringValuePtr(str);
160
170
 
161
171
  Data_Get_Struct(self, struct _node, root);
@@ -180,7 +190,12 @@ static VALUE t_search(VALUE self, VALUE str)
180
190
  } else {
181
191
  if (head_i != -1) {
182
192
  if (tail_i != -1) {
183
- rb_funcall(array, rb_intern("push"), 1, rb_str_new(&text[head_i], (tail_i - head_i + 1)));
193
+ rb_funcall(
194
+ array,
195
+ rb_intern("push"),
196
+ 1,
197
+ add_encode(rb_str_new(&text[head_i], (tail_i - head_i + 1)), enc)
198
+ );
184
199
  i = tail_i;
185
200
  tail_i = -1;
186
201
  } else {
@@ -205,8 +220,10 @@ static VALUE t_filter_hrml(VALUE self, VALUE str)
205
220
  char *text, *inner_tag;
206
221
  int i, head_i, tail_i, copy_head_i, total_len;
207
222
  VALUE change_str, url_base, word;
223
+ rb_encoding *enc;
208
224
 
209
225
  change_str = rb_str_new2(EMPTY_STRING);
226
+ enc = rb_enc_get(str);
210
227
  text = StringValuePtr(str);
211
228
 
212
229
  Data_Get_Struct(self, struct _node, root);
@@ -271,11 +288,21 @@ static VALUE t_filter_hrml(VALUE self, VALUE str)
271
288
  if (head_i != -1) {
272
289
  if (tail_i != -1) {
273
290
  if (copy_head_i < head_i) {
274
- rb_funcall(change_str, rb_intern("concat"), 1, rb_str_new(&text[copy_head_i], (head_i - copy_head_i)));
291
+ rb_funcall(
292
+ change_str,
293
+ rb_intern("concat"),
294
+ 1,
295
+ add_encode(rb_str_new(&text[copy_head_i], (head_i - copy_head_i)), enc)
296
+ );
275
297
  }
276
298
 
277
299
  word = rb_str_new(&text[head_i], (tail_i - head_i + 1));
278
- rb_funcall(change_str, rb_intern("concat"), 1, rb_funcall(url_base, rb_intern("%"), 1, rb_assoc_new(word, word)));
300
+ rb_funcall(
301
+ change_str,
302
+ rb_intern("concat"),
303
+ 1,
304
+ add_encode(rb_funcall(url_base, rb_intern("%"), 1, rb_assoc_new(word, word)), enc)
305
+ );
279
306
  i = tail_i;
280
307
  copy_head_i = tail_i + 1;
281
308
  tail_i = -1;
@@ -291,7 +318,12 @@ static VALUE t_filter_hrml(VALUE self, VALUE str)
291
318
  if (copy_head_i == 0) {
292
319
  return str;
293
320
  } else {
294
- rb_funcall(change_str, rb_intern("concat"), 1, rb_str_new(&text[copy_head_i], (total_len - copy_head_i)));
321
+ rb_funcall(
322
+ change_str,
323
+ rb_intern("concat"),
324
+ 1,
325
+ add_encode(rb_str_new(&text[copy_head_i], (total_len - copy_head_i)), enc)
326
+ );
295
327
  return change_str;
296
328
  }
297
329
  }
@@ -29,7 +29,7 @@ typedef char bool;
29
29
 
30
30
  #define EMPTY_STRING ""
31
31
  #define LINK_URL_VARIABLE "@link_url"
32
- #define DEAULT_LINK_URL "<a href=\"http://www.kaeruspoon.net/keywords/%s\">%s</a>"
32
+ #define DEAULT_LINK_URL "<a href='http://ja.wikipedia.org/wiki/%s'>%s</a>"
33
33
 
34
34
  // node is 1 byte character
35
35
  typedef struct _node {
@@ -57,6 +57,9 @@ node search_child_or_create(node, char);
57
57
  // free memory all child and self
58
58
  void destroy_node(node);
59
59
 
60
+ // add encoding info
61
+ static VALUE add_encode(VALUE, rb_encoding *);
62
+
60
63
  //-----------------------------------------------------------
61
64
  // Ruby Methods
62
65
  // ----------------------------------------------------------
@@ -0,0 +1,3 @@
1
+ class WordScoop
2
+ VERSION = '2.1.0'
3
+ end
data/lib/word_scoop.rb CHANGED
@@ -1,10 +1,9 @@
1
1
  $:.unshift(File.dirname(__FILE__)) unless
2
2
  $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
3
3
 
4
- require 'word_scoop.so'
5
- class WordScoop
6
- VERSION = '2.0.0'
4
+ require 'word_scoop/word_scoop.bundle'
7
5
 
6
+ class WordScoop
8
7
  attr_accessor :link_url
9
8
  end
10
9
 
@@ -0,0 +1,2 @@
1
+ $LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
2
+ require 'word_scoop'
@@ -0,0 +1,28 @@
1
+ # coding: utf-8
2
+ require 'spec_helper'
3
+
4
+ describe WordScoop do
5
+ before(:each) do
6
+ keywords = %w|ninja 忍者|
7
+ @tree = WordScoop.new(keywords)
8
+ end
9
+
10
+ context "#serch" do
11
+ it "pickup keywords" do
12
+ pickup = @tree.search("I am a ninja. 私は忍者です。Are you a ninja?")
13
+ expect(pickup).to eq(%w|ninja 忍者 ninja|)
14
+ end
15
+ end
16
+
17
+ context "#filter_html" do
18
+ it "add link to keywords" do
19
+ text = "I am a ninja. 私は忍者です。Are you a ninja?"
20
+ html = @tree.filter_html(text)
21
+ expect(html).to eq(
22
+ text.gsub(/ninja|忍者/) do |keyword|
23
+ "<a href='http://ja.wikipedia.org/wiki/#{keyword}'>#{keyword}</a>"
24
+ end
25
+ )
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,26 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'word_scoop/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "word_scoop"
8
+ spec.version = WordScoop::VERSION
9
+ spec.authors = ["Tsukasa OISHI"]
10
+ spec.email = ["tsukasa.oishi@gmail.com"]
11
+ spec.summary = %q{WordScoop will pick up keywords that have been pre-registered from the text.}
12
+ spec.description = %q{WordScoop will pick up keywords that have been pre-registered from the text.}
13
+ spec.homepage = "https://github.com/tsukasaoishi/word_scoop"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files -z`.split("\x0")
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib", "ext"]
20
+ spec.extensions = ["ext/word_scoop/extconf.rb"]
21
+
22
+ spec.add_development_dependency "bundler", "~> 1.6"
23
+ spec.add_development_dependency "rake", '~> 10.0'
24
+ spec.add_development_dependency "rspec", '~> 2.14'
25
+ spec.add_development_dependency "rake-compiler", '~> 0.9'
26
+ end
metadata CHANGED
@@ -1,105 +1,122 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: word_scoop
3
- version: !ruby/object:Gem::Version
4
- prerelease: false
5
- segments:
6
- - 2
7
- - 0
8
- - 1
9
- version: 2.0.1
3
+ version: !ruby/object:Gem::Version
4
+ version: 2.1.0
10
5
  platform: ruby
11
- authors:
6
+ authors:
12
7
  - Tsukasa OISHI
13
8
  autorequire:
14
9
  bindir: bin
15
10
  cert_chain: []
16
-
17
- date: 2011-01-23 00:00:00 +09:00
18
- default_executable:
19
- dependencies:
20
- - !ruby/object:Gem::Dependency
21
- name: newgem
11
+ date: 2014-06-20 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.6'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.6'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
22
35
  prerelease: false
23
- requirement: &id001 !ruby/object:Gem::Requirement
24
- none: false
25
- requirements:
26
- - - ">="
27
- - !ruby/object:Gem::Version
28
- segments:
29
- - 1
30
- - 2
31
- - 3
32
- version: 1.2.3
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '2.14'
33
48
  type: :development
34
- version_requirements: *id001
35
- - !ruby/object:Gem::Dependency
36
- name: hoe
37
49
  prerelease: false
38
- requirement: &id002 !ruby/object:Gem::Requirement
39
- none: false
40
- requirements:
41
- - - ">="
42
- - !ruby/object:Gem::Version
43
- segments:
44
- - 1
45
- - 8
46
- - 0
47
- version: 1.8.0
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '2.14'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rake-compiler
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '0.9'
48
62
  type: :development
49
- version_requirements: *id002
50
- description: WordScoop is a library that searching keyword in text.
51
- email:
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '0.9'
69
+ description: WordScoop will pick up keywords that have been pre-registered from the
70
+ text.
71
+ email:
52
72
  - tsukasa.oishi@gmail.com
53
73
  executables: []
54
-
55
- extensions:
56
- - ext/extconf.rb
57
- extra_rdoc_files:
58
- - History.txt
59
- - Manifest.txt
60
- - README.rdoc
61
- files:
74
+ extensions:
75
+ - ext/word_scoop/extconf.rb
76
+ extra_rdoc_files: []
77
+ files:
78
+ - ".gitignore"
79
+ - Gemfile
62
80
  - History.txt
63
81
  - Manifest.txt
64
- - README.rdoc
82
+ - README.md
65
83
  - Rakefile
84
+ - benchmark/article.txt
85
+ - benchmark/keywords.txt
86
+ - benchmark/measure.rb
87
+ - ext/word_scoop/extconf.rb
88
+ - ext/word_scoop/word_scoop.c
89
+ - ext/word_scoop/word_scoop.h
66
90
  - lib/word_scoop.rb
67
- - ext/extconf.rb
68
- - ext/word_scoop.c
69
- - ext/word_scoop.h
70
- has_rdoc: true
71
- homepage: http://www.kaeruspoon.net/
72
- licenses: []
73
-
91
+ - lib/word_scoop/version.rb
92
+ - spec/spec_helper.rb
93
+ - spec/unit/word_scoop_spec.rb
94
+ - word_scoop.gemspec
95
+ homepage: https://github.com/tsukasaoishi/word_scoop
96
+ licenses:
97
+ - MIT
98
+ metadata: {}
74
99
  post_install_message:
75
- rdoc_options:
76
- - --main
77
- - README.rdoc
78
- require_paths:
100
+ rdoc_options: []
101
+ require_paths:
79
102
  - lib
80
103
  - ext
81
- required_ruby_version: !ruby/object:Gem::Requirement
82
- none: false
83
- requirements:
104
+ required_ruby_version: !ruby/object:Gem::Requirement
105
+ requirements:
84
106
  - - ">="
85
- - !ruby/object:Gem::Version
86
- segments:
87
- - 0
88
- version: "0"
89
- required_rubygems_version: !ruby/object:Gem::Requirement
90
- none: false
91
- requirements:
107
+ - !ruby/object:Gem::Version
108
+ version: '0'
109
+ required_rubygems_version: !ruby/object:Gem::Requirement
110
+ requirements:
92
111
  - - ">="
93
- - !ruby/object:Gem::Version
94
- segments:
95
- - 0
96
- version: "0"
112
+ - !ruby/object:Gem::Version
113
+ version: '0'
97
114
  requirements: []
98
-
99
- rubyforge_project: word_scoop
100
- rubygems_version: 1.3.7
115
+ rubyforge_project:
116
+ rubygems_version: 2.2.2
101
117
  signing_key:
102
- specification_version: 2
103
- summary: WordScoop is a library that searching keyword in text.
104
- test_files: []
105
-
118
+ specification_version: 4
119
+ summary: WordScoop will pick up keywords that have been pre-registered from the text.
120
+ test_files:
121
+ - spec/spec_helper.rb
122
+ - spec/unit/word_scoop_spec.rb
data/README.rdoc DELETED
@@ -1,35 +0,0 @@
1
- = WordScoop
2
-
3
- = Description
4
- WordScoop is a library that searching keyword in text.
5
-
6
- = How to
7
-
8
- == Register keywords
9
- keywords = WordScoop.new(["Ruby", "Rails"])
10
-
11
- == Add keyword
12
- keywords << "Tsukasa"
13
-
14
- == Keyword in the text is picked up
15
- keywords.search("I Love Ruby") #=> ["Ruby"]
16
-
17
- == HTML text support
18
-
19
- === URL is registered
20
-
21
- keyword.link_url = %Q|<a href="http://ja.wikipedia.org/wiki/%s">%s</a>|
22
- (Default is %Q|http://www.kaeruspoon.net/keywords/%s>%s</a>|)
23
-
24
- === Keyword in the text is enclosed with HTML 'a' tag
25
-
26
- keywords.filter_html("I Love Ruby") #=> %Q|I Love <a href="http://ja.wikipedia.org/wiki/Ruby">Ruby</a>|
27
-
28
-
29
- == INSTALL:
30
-
31
- sudo gem install word_scoop
32
-
33
- == LICENSE:
34
-
35
- WordScoop is released under the MIT license.