word_scoop 2.0.1 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,37 @@
1
+ # coding: utf-8
2
+ require 'benchmark'
3
+ $LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
4
+ require 'word_scoop'
5
+
6
+ class WordScoopBenchmark
7
+ def initialize
8
+ keywords = []
9
+ File.open(File.expand_path("../keywords.txt", __FILE__), "r") do |f|
10
+ f.each do |line|
11
+ keywords << line.strip
12
+ end
13
+ end
14
+
15
+ @keywords = keywords.uniq
16
+ @article = File.read(File.expand_path("../article.txt", __FILE__))
17
+
18
+ puts "keywords size\t#{@keywords.size}"
19
+ puts "article size\t#{@article.size}\n\n"
20
+ end
21
+
22
+ def run
23
+ keywords_size = @keywords.size
24
+
25
+ Benchmark.bm(7, "avg") do |x|
26
+ tree = nil
27
+ regist_ms = x.report("register") { tree = WordScoop.new(@keywords) }
28
+ searcg_ms = x.report("search") { 1000.times{ tree.search(@article) }}
29
+
30
+ regist_avg = regist_ms.real * 1000 * 1000 / keywords_size
31
+ puts "1 word regist avg\t#{"%.03f" % regist_avg} µs"
32
+ puts "search avg\t\t#{"%.03f" % searcg_ms.real} ms"
33
+ end
34
+ end
35
+ end
36
+
37
+ WordScoopBenchmark.new.run if File.basename($PROGRAM_NAME) == File.basename(__FILE__)
File without changes
@@ -10,6 +10,7 @@
10
10
  #include <stdlib.h>
11
11
  #include <string.h>
12
12
  #include <ruby.h>
13
+ #include <ruby/encoding.h>
13
14
  #include "word_scoop.h"
14
15
 
15
16
 
@@ -86,6 +87,13 @@ void destroy_node(node n)
86
87
  free(n);
87
88
  }
88
89
 
90
+ // add encoding info
91
+ static VALUE add_encode(VALUE str, rb_encoding *enc)
92
+ {
93
+ rb_enc_associate(str, enc);
94
+ return str;
95
+ }
96
+
89
97
  //-----------------------------------------------------------
90
98
  // Ruby Methods
91
99
  // ----------------------------------------------------------
@@ -154,8 +162,10 @@ static VALUE t_search(VALUE self, VALUE str)
154
162
  char *text;
155
163
  int i, head_i, tail_i, total_len;
156
164
  VALUE array;
165
+ rb_encoding *enc;
157
166
 
158
167
  array = rb_ary_new();
168
+ enc = rb_enc_get(str);
159
169
  text = StringValuePtr(str);
160
170
 
161
171
  Data_Get_Struct(self, struct _node, root);
@@ -180,7 +190,12 @@ static VALUE t_search(VALUE self, VALUE str)
180
190
  } else {
181
191
  if (head_i != -1) {
182
192
  if (tail_i != -1) {
183
- rb_funcall(array, rb_intern("push"), 1, rb_str_new(&text[head_i], (tail_i - head_i + 1)));
193
+ rb_funcall(
194
+ array,
195
+ rb_intern("push"),
196
+ 1,
197
+ add_encode(rb_str_new(&text[head_i], (tail_i - head_i + 1)), enc)
198
+ );
184
199
  i = tail_i;
185
200
  tail_i = -1;
186
201
  } else {
@@ -205,8 +220,10 @@ static VALUE t_filter_hrml(VALUE self, VALUE str)
205
220
  char *text, *inner_tag;
206
221
  int i, head_i, tail_i, copy_head_i, total_len;
207
222
  VALUE change_str, url_base, word;
223
+ rb_encoding *enc;
208
224
 
209
225
  change_str = rb_str_new2(EMPTY_STRING);
226
+ enc = rb_enc_get(str);
210
227
  text = StringValuePtr(str);
211
228
 
212
229
  Data_Get_Struct(self, struct _node, root);
@@ -271,11 +288,21 @@ static VALUE t_filter_hrml(VALUE self, VALUE str)
271
288
  if (head_i != -1) {
272
289
  if (tail_i != -1) {
273
290
  if (copy_head_i < head_i) {
274
- rb_funcall(change_str, rb_intern("concat"), 1, rb_str_new(&text[copy_head_i], (head_i - copy_head_i)));
291
+ rb_funcall(
292
+ change_str,
293
+ rb_intern("concat"),
294
+ 1,
295
+ add_encode(rb_str_new(&text[copy_head_i], (head_i - copy_head_i)), enc)
296
+ );
275
297
  }
276
298
 
277
299
  word = rb_str_new(&text[head_i], (tail_i - head_i + 1));
278
- rb_funcall(change_str, rb_intern("concat"), 1, rb_funcall(url_base, rb_intern("%"), 1, rb_assoc_new(word, word)));
300
+ rb_funcall(
301
+ change_str,
302
+ rb_intern("concat"),
303
+ 1,
304
+ add_encode(rb_funcall(url_base, rb_intern("%"), 1, rb_assoc_new(word, word)), enc)
305
+ );
279
306
  i = tail_i;
280
307
  copy_head_i = tail_i + 1;
281
308
  tail_i = -1;
@@ -291,7 +318,12 @@ static VALUE t_filter_hrml(VALUE self, VALUE str)
291
318
  if (copy_head_i == 0) {
292
319
  return str;
293
320
  } else {
294
- rb_funcall(change_str, rb_intern("concat"), 1, rb_str_new(&text[copy_head_i], (total_len - copy_head_i)));
321
+ rb_funcall(
322
+ change_str,
323
+ rb_intern("concat"),
324
+ 1,
325
+ add_encode(rb_str_new(&text[copy_head_i], (total_len - copy_head_i)), enc)
326
+ );
295
327
  return change_str;
296
328
  }
297
329
  }
@@ -29,7 +29,7 @@ typedef char bool;
29
29
 
30
30
  #define EMPTY_STRING ""
31
31
  #define LINK_URL_VARIABLE "@link_url"
32
- #define DEAULT_LINK_URL "<a href=\"http://www.kaeruspoon.net/keywords/%s\">%s</a>"
32
+ #define DEAULT_LINK_URL "<a href='http://ja.wikipedia.org/wiki/%s'>%s</a>"
33
33
 
34
34
  // node is 1 byte character
35
35
  typedef struct _node {
@@ -57,6 +57,9 @@ node search_child_or_create(node, char);
57
57
  // free memory all child and self
58
58
  void destroy_node(node);
59
59
 
60
+ // add encoding info
61
+ static VALUE add_encode(VALUE, rb_encoding *);
62
+
60
63
  //-----------------------------------------------------------
61
64
  // Ruby Methods
62
65
  // ----------------------------------------------------------
@@ -0,0 +1,3 @@
1
+ class WordScoop
2
+ VERSION = '2.1.0'
3
+ end
data/lib/word_scoop.rb CHANGED
@@ -1,10 +1,9 @@
1
1
  $:.unshift(File.dirname(__FILE__)) unless
2
2
  $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
3
3
 
4
- require 'word_scoop.so'
5
- class WordScoop
6
- VERSION = '2.0.0'
4
+ require 'word_scoop/word_scoop.bundle'
7
5
 
6
+ class WordScoop
8
7
  attr_accessor :link_url
9
8
  end
10
9
 
@@ -0,0 +1,2 @@
1
+ $LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
2
+ require 'word_scoop'
@@ -0,0 +1,28 @@
1
+ # coding: utf-8
2
+ require 'spec_helper'
3
+
4
+ describe WordScoop do
5
+ before(:each) do
6
+ keywords = %w|ninja 忍者|
7
+ @tree = WordScoop.new(keywords)
8
+ end
9
+
10
+ context "#serch" do
11
+ it "pickup keywords" do
12
+ pickup = @tree.search("I am a ninja. 私は忍者です。Are you a ninja?")
13
+ expect(pickup).to eq(%w|ninja 忍者 ninja|)
14
+ end
15
+ end
16
+
17
+ context "#filter_html" do
18
+ it "add link to keywords" do
19
+ text = "I am a ninja. 私は忍者です。Are you a ninja?"
20
+ html = @tree.filter_html(text)
21
+ expect(html).to eq(
22
+ text.gsub(/ninja|忍者/) do |keyword|
23
+ "<a href='http://ja.wikipedia.org/wiki/#{keyword}'>#{keyword}</a>"
24
+ end
25
+ )
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,26 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'word_scoop/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "word_scoop"
8
+ spec.version = WordScoop::VERSION
9
+ spec.authors = ["Tsukasa OISHI"]
10
+ spec.email = ["tsukasa.oishi@gmail.com"]
11
+ spec.summary = %q{WordScoop will pick up keywords that have been pre-registered from the text.}
12
+ spec.description = %q{WordScoop will pick up keywords that have been pre-registered from the text.}
13
+ spec.homepage = "https://github.com/tsukasaoishi/word_scoop"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files -z`.split("\x0")
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib", "ext"]
20
+ spec.extensions = ["ext/word_scoop/extconf.rb"]
21
+
22
+ spec.add_development_dependency "bundler", "~> 1.6"
23
+ spec.add_development_dependency "rake", '~> 10.0'
24
+ spec.add_development_dependency "rspec", '~> 2.14'
25
+ spec.add_development_dependency "rake-compiler", '~> 0.9'
26
+ end
metadata CHANGED
@@ -1,105 +1,122 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: word_scoop
3
- version: !ruby/object:Gem::Version
4
- prerelease: false
5
- segments:
6
- - 2
7
- - 0
8
- - 1
9
- version: 2.0.1
3
+ version: !ruby/object:Gem::Version
4
+ version: 2.1.0
10
5
  platform: ruby
11
- authors:
6
+ authors:
12
7
  - Tsukasa OISHI
13
8
  autorequire:
14
9
  bindir: bin
15
10
  cert_chain: []
16
-
17
- date: 2011-01-23 00:00:00 +09:00
18
- default_executable:
19
- dependencies:
20
- - !ruby/object:Gem::Dependency
21
- name: newgem
11
+ date: 2014-06-20 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.6'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.6'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
22
35
  prerelease: false
23
- requirement: &id001 !ruby/object:Gem::Requirement
24
- none: false
25
- requirements:
26
- - - ">="
27
- - !ruby/object:Gem::Version
28
- segments:
29
- - 1
30
- - 2
31
- - 3
32
- version: 1.2.3
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '2.14'
33
48
  type: :development
34
- version_requirements: *id001
35
- - !ruby/object:Gem::Dependency
36
- name: hoe
37
49
  prerelease: false
38
- requirement: &id002 !ruby/object:Gem::Requirement
39
- none: false
40
- requirements:
41
- - - ">="
42
- - !ruby/object:Gem::Version
43
- segments:
44
- - 1
45
- - 8
46
- - 0
47
- version: 1.8.0
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '2.14'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rake-compiler
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '0.9'
48
62
  type: :development
49
- version_requirements: *id002
50
- description: WordScoop is a library that searching keyword in text.
51
- email:
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '0.9'
69
+ description: WordScoop will pick up keywords that have been pre-registered from the
70
+ text.
71
+ email:
52
72
  - tsukasa.oishi@gmail.com
53
73
  executables: []
54
-
55
- extensions:
56
- - ext/extconf.rb
57
- extra_rdoc_files:
58
- - History.txt
59
- - Manifest.txt
60
- - README.rdoc
61
- files:
74
+ extensions:
75
+ - ext/word_scoop/extconf.rb
76
+ extra_rdoc_files: []
77
+ files:
78
+ - ".gitignore"
79
+ - Gemfile
62
80
  - History.txt
63
81
  - Manifest.txt
64
- - README.rdoc
82
+ - README.md
65
83
  - Rakefile
84
+ - benchmark/article.txt
85
+ - benchmark/keywords.txt
86
+ - benchmark/measure.rb
87
+ - ext/word_scoop/extconf.rb
88
+ - ext/word_scoop/word_scoop.c
89
+ - ext/word_scoop/word_scoop.h
66
90
  - lib/word_scoop.rb
67
- - ext/extconf.rb
68
- - ext/word_scoop.c
69
- - ext/word_scoop.h
70
- has_rdoc: true
71
- homepage: http://www.kaeruspoon.net/
72
- licenses: []
73
-
91
+ - lib/word_scoop/version.rb
92
+ - spec/spec_helper.rb
93
+ - spec/unit/word_scoop_spec.rb
94
+ - word_scoop.gemspec
95
+ homepage: https://github.com/tsukasaoishi/word_scoop
96
+ licenses:
97
+ - MIT
98
+ metadata: {}
74
99
  post_install_message:
75
- rdoc_options:
76
- - --main
77
- - README.rdoc
78
- require_paths:
100
+ rdoc_options: []
101
+ require_paths:
79
102
  - lib
80
103
  - ext
81
- required_ruby_version: !ruby/object:Gem::Requirement
82
- none: false
83
- requirements:
104
+ required_ruby_version: !ruby/object:Gem::Requirement
105
+ requirements:
84
106
  - - ">="
85
- - !ruby/object:Gem::Version
86
- segments:
87
- - 0
88
- version: "0"
89
- required_rubygems_version: !ruby/object:Gem::Requirement
90
- none: false
91
- requirements:
107
+ - !ruby/object:Gem::Version
108
+ version: '0'
109
+ required_rubygems_version: !ruby/object:Gem::Requirement
110
+ requirements:
92
111
  - - ">="
93
- - !ruby/object:Gem::Version
94
- segments:
95
- - 0
96
- version: "0"
112
+ - !ruby/object:Gem::Version
113
+ version: '0'
97
114
  requirements: []
98
-
99
- rubyforge_project: word_scoop
100
- rubygems_version: 1.3.7
115
+ rubyforge_project:
116
+ rubygems_version: 2.2.2
101
117
  signing_key:
102
- specification_version: 2
103
- summary: WordScoop is a library that searching keyword in text.
104
- test_files: []
105
-
118
+ specification_version: 4
119
+ summary: WordScoop will pick up keywords that have been pre-registered from the text.
120
+ test_files:
121
+ - spec/spec_helper.rb
122
+ - spec/unit/word_scoop_spec.rb
data/README.rdoc DELETED
@@ -1,35 +0,0 @@
1
- = WordScoop
2
-
3
- = Description
4
- WordScoop is a library that searching keyword in text.
5
-
6
- = How to
7
-
8
- == Register keywords
9
- keywords = WordScoop.new(["Ruby", "Rails"])
10
-
11
- == Add keyword
12
- keywords << "Tsukasa"
13
-
14
- == Keyword in the text is picked up
15
- keywords.search("I Love Ruby") #=> ["Ruby"]
16
-
17
- == HTML text support
18
-
19
- === URL is registered
20
-
21
- keyword.link_url = %Q|<a href="http://ja.wikipedia.org/wiki/%s">%s</a>|
22
- (Default is %Q|http://www.kaeruspoon.net/keywords/%s>%s</a>|)
23
-
24
- === Keyword in the text is enclosed with HTML 'a' tag
25
-
26
- keywords.filter_html("I Love Ruby") #=> %Q|I Love <a href="http://ja.wikipedia.org/wiki/Ruby">Ruby</a>|
27
-
28
-
29
- == INSTALL:
30
-
31
- sudo gem install word_scoop
32
-
33
- == LICENSE:
34
-
35
- WordScoop is released under the MIT license.