word_scoop 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +22 -0
- data/Manifest.txt +8 -0
- data/README.rdoc +199 -0
- data/Rakefile +28 -0
- data/ext/extconf.rb +2 -0
- data/ext/word_scoop.c +311 -0
- data/ext/word_scoop.h +79 -0
- data/lib/word_scoop.rb +10 -0
- metadata +86 -0
data/History.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
== 0.0.1 2008-08-30
|
2
|
+
|
3
|
+
* 1 major enhancement:
|
4
|
+
* Initial release
|
5
|
+
|
6
|
+
== 1.0.1 2009-03-07
|
7
|
+
|
8
|
+
* 1 minor enhancement:
|
9
|
+
* move to GitHub from Rubyforge
|
10
|
+
|
11
|
+
== 1.0.2 2009-09-27
|
12
|
+
* 1 minor enhancement:
|
13
|
+
* bug fix.
|
14
|
+
|
15
|
+
== 1.1 2009-10-03
|
16
|
+
* 1 major enhancement:
|
17
|
+
* all renewal by C
|
18
|
+
|
19
|
+
== 2.0 2009-11-21
|
20
|
+
* 1 major enhancement:
|
21
|
+
* The library name is changed from Kaerukeyword to WordScoop.
|
22
|
+
* filter_html method : word enclosed with 'a' tag.
|
data/Manifest.txt
ADDED
data/README.rdoc
ADDED
@@ -0,0 +1,199 @@
|
|
1
|
+
= WordScoop
|
2
|
+
|
3
|
+
= Description
|
4
|
+
WordScoop is a library that searching keyword in text.
|
5
|
+
|
6
|
+
= How to
|
7
|
+
|
8
|
+
== Register keywords
|
9
|
+
keywords = WordScoop.new(["Ruby", "Rails"])
|
10
|
+
|
11
|
+
== Add keyword
|
12
|
+
keywords << "Tsukasa"
|
13
|
+
|
14
|
+
== Keyword in the text is picked up
|
15
|
+
keywords.search("I Love Ruby") #=> ["Ruby"]
|
16
|
+
|
17
|
+
== HTML text support
|
18
|
+
|
19
|
+
=== URL is registered
|
20
|
+
|
21
|
+
keyword.link_url = %Q|<a href="http://ja.wikipedia.org/wiki/%s">%s</a>|
|
22
|
+
(Default is %Q|http://www.kaeruspoon.net/keywords/%s>%s</a>|)
|
23
|
+
|
24
|
+
=== Keyword in the text is enclosed with HTML 'a' tag
|
25
|
+
|
26
|
+
keywords.filter_html("I Love Ruby") #=> %Q|I Love <a href="http://ja.wikipedia.org/wiki/Ruby">Ruby</a>|
|
27
|
+
|
28
|
+
|
29
|
+
== INSTALL:
|
30
|
+
|
31
|
+
sudo gem install word_scoop
|
32
|
+
|
33
|
+
== LICENSE:
|
34
|
+
|
35
|
+
GNU LESSER GENERAL PUBLIC LICENSE
|
36
|
+
Version 3, 29 June 2007
|
37
|
+
|
38
|
+
Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
|
39
|
+
Everyone is permitted to copy and distribute verbatim copies
|
40
|
+
of this license document, but changing it is not allowed.
|
41
|
+
|
42
|
+
|
43
|
+
This version of the GNU Lesser General Public License incorporates
|
44
|
+
the terms and conditions of version 3 of the GNU General Public
|
45
|
+
License, supplemented by the additional permissions listed below.
|
46
|
+
|
47
|
+
0. Additional Definitions.
|
48
|
+
|
49
|
+
As used herein, "this License" refers to version 3 of the GNU Lesser
|
50
|
+
General Public License, and the "GNU GPL" refers to version 3 of the GNU
|
51
|
+
General Public License.
|
52
|
+
|
53
|
+
"The Library" refers to a covered work governed by this License,
|
54
|
+
other than an Application or a Combined Work as defined below.
|
55
|
+
|
56
|
+
An "Application" is any work that makes use of an interface provided
|
57
|
+
by the Library, but which is not otherwise based on the Library.
|
58
|
+
Defining a subclass of a class defined by the Library is deemed a mode
|
59
|
+
of using an interface provided by the Library.
|
60
|
+
|
61
|
+
A "Combined Work" is a work produced by combining or linking an
|
62
|
+
Application with the Library. The particular version of the Library
|
63
|
+
with which the Combined Work was made is also called the "Linked
|
64
|
+
Version".
|
65
|
+
|
66
|
+
The "Minimal Corresponding Source" for a Combined Work means the
|
67
|
+
Corresponding Source for the Combined Work, excluding any source code
|
68
|
+
for portions of the Combined Work that, considered in isolation, are
|
69
|
+
based on the Application, and not on the Linked Version.
|
70
|
+
|
71
|
+
The "Corresponding Application Code" for a Combined Work means the
|
72
|
+
object code and/or source code for the Application, including any data
|
73
|
+
and utility programs needed for reproducing the Combined Work from the
|
74
|
+
Application, but excluding the System Libraries of the Combined Work.
|
75
|
+
|
76
|
+
1. Exception to Section 3 of the GNU GPL.
|
77
|
+
|
78
|
+
You may convey a covered work under sections 3 and 4 of this License
|
79
|
+
without being bound by section 3 of the GNU GPL.
|
80
|
+
|
81
|
+
2. Conveying Modified Versions.
|
82
|
+
|
83
|
+
If you modify a copy of the Library, and, in your modifications, a
|
84
|
+
facility refers to a function or data to be supplied by an Application
|
85
|
+
that uses the facility (other than as an argument passed when the
|
86
|
+
facility is invoked), then you may convey a copy of the modified
|
87
|
+
version:
|
88
|
+
|
89
|
+
a) under this License, provided that you make a good faith effort to
|
90
|
+
ensure that, in the event an Application does not supply the
|
91
|
+
function or data, the facility still operates, and performs
|
92
|
+
whatever part of its purpose remains meaningful, or
|
93
|
+
|
94
|
+
b) under the GNU GPL, with none of the additional permissions of
|
95
|
+
this License applicable to that copy.
|
96
|
+
|
97
|
+
3. Object Code Incorporating Material from Library Header Files.
|
98
|
+
|
99
|
+
The object code form of an Application may incorporate material from
|
100
|
+
a header file that is part of the Library. You may convey such object
|
101
|
+
code under terms of your choice, provided that, if the incorporated
|
102
|
+
material is not limited to numerical parameters, data structure
|
103
|
+
layouts and accessors, or small macros, inline functions and templates
|
104
|
+
(ten or fewer lines in length), you do both of the following:
|
105
|
+
|
106
|
+
a) Give prominent notice with each copy of the object code that the
|
107
|
+
Library is used in it and that the Library and its use are
|
108
|
+
covered by this License.
|
109
|
+
|
110
|
+
b) Accompany the object code with a copy of the GNU GPL and this license
|
111
|
+
document.
|
112
|
+
|
113
|
+
4. Combined Works.
|
114
|
+
|
115
|
+
You may convey a Combined Work under terms of your choice that,
|
116
|
+
taken together, effectively do not restrict modification of the
|
117
|
+
portions of the Library contained in the Combined Work and reverse
|
118
|
+
engineering for debugging such modifications, if you also do each of
|
119
|
+
the following:
|
120
|
+
|
121
|
+
a) Give prominent notice with each copy of the Combined Work that
|
122
|
+
the Library is used in it and that the Library and its use are
|
123
|
+
covered by this License.
|
124
|
+
|
125
|
+
b) Accompany the Combined Work with a copy of the GNU GPL and this license
|
126
|
+
document.
|
127
|
+
|
128
|
+
c) For a Combined Work that displays copyright notices during
|
129
|
+
execution, include the copyright notice for the Library among
|
130
|
+
these notices, as well as a reference directing the user to the
|
131
|
+
copies of the GNU GPL and this license document.
|
132
|
+
|
133
|
+
d) Do one of the following:
|
134
|
+
|
135
|
+
0) Convey the Minimal Corresponding Source under the terms of this
|
136
|
+
License, and the Corresponding Application Code in a form
|
137
|
+
suitable for, and under terms that permit, the user to
|
138
|
+
recombine or relink the Application with a modified version of
|
139
|
+
the Linked Version to produce a modified Combined Work, in the
|
140
|
+
manner specified by section 6 of the GNU GPL for conveying
|
141
|
+
Corresponding Source.
|
142
|
+
|
143
|
+
1) Use a suitable shared library mechanism for linking with the
|
144
|
+
Library. A suitable mechanism is one that (a) uses at run time
|
145
|
+
a copy of the Library already present on the user's computer
|
146
|
+
system, and (b) will operate properly with a modified version
|
147
|
+
of the Library that is interface-compatible with the Linked
|
148
|
+
Version.
|
149
|
+
|
150
|
+
e) Provide Installation Information, but only if you would otherwise
|
151
|
+
be required to provide such information under section 6 of the
|
152
|
+
GNU GPL, and only to the extent that such information is
|
153
|
+
necessary to install and execute a modified version of the
|
154
|
+
Combined Work produced by recombining or relinking the
|
155
|
+
Application with a modified version of the Linked Version. (If
|
156
|
+
you use option 4d0, the Installation Information must accompany
|
157
|
+
the Minimal Corresponding Source and Corresponding Application
|
158
|
+
Code. If you use option 4d1, you must provide the Installation
|
159
|
+
Information in the manner specified by section 6 of the GNU GPL
|
160
|
+
for conveying Corresponding Source.)
|
161
|
+
|
162
|
+
5. Combined Libraries.
|
163
|
+
|
164
|
+
You may place library facilities that are a work based on the
|
165
|
+
Library side by side in a single library together with other library
|
166
|
+
facilities that are not Applications and are not covered by this
|
167
|
+
License, and convey such a combined library under terms of your
|
168
|
+
choice, if you do both of the following:
|
169
|
+
|
170
|
+
a) Accompany the combined library with a copy of the same work based
|
171
|
+
on the Library, uncombined with any other library facilities,
|
172
|
+
conveyed under the terms of this License.
|
173
|
+
|
174
|
+
b) Give prominent notice with the combined library that part of it
|
175
|
+
is a work based on the Library, and explaining where to find the
|
176
|
+
accompanying uncombined form of the same work.
|
177
|
+
|
178
|
+
6. Revised Versions of the GNU Lesser General Public License.
|
179
|
+
|
180
|
+
The Free Software Foundation may publish revised and/or new versions
|
181
|
+
of the GNU Lesser General Public License from time to time. Such new
|
182
|
+
versions will be similar in spirit to the present version, but may
|
183
|
+
differ in detail to address new problems or concerns.
|
184
|
+
|
185
|
+
Each version is given a distinguishing version number. If the
|
186
|
+
Library as you received it specifies that a certain numbered version
|
187
|
+
of the GNU Lesser General Public License "or any later version"
|
188
|
+
applies to it, you have the option of following the terms and
|
189
|
+
conditions either of that published version or of any later version
|
190
|
+
published by the Free Software Foundation. If the Library as you
|
191
|
+
received it does not specify a version number of the GNU Lesser
|
192
|
+
General Public License, you may choose any version of the GNU Lesser
|
193
|
+
General Public License ever published by the Free Software Foundation.
|
194
|
+
|
195
|
+
If the Library as you received it specifies that a proxy can decide
|
196
|
+
whether future versions of the GNU Lesser General Public License shall
|
197
|
+
apply, that proxy's public statement of acceptance of any version is
|
198
|
+
permanent authorization for you to choose that version for the
|
199
|
+
Library.
|
data/Rakefile
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
%w[rubygems rake rake/clean fileutils newgem rubigen].each { |f| require f }
|
2
|
+
require File.dirname(__FILE__) + '/lib/kaerukeyword'
|
3
|
+
|
4
|
+
# Generate all the Rake tasks
|
5
|
+
# Run 'rake -T' to see list of generated tasks (from gem root directory)
|
6
|
+
$hoe = Hoe.new('word_scoop', WordScoop::VERSION) do |p|
|
7
|
+
p.developer('Tsukasa OISHI', 'tsukasa.oishi@gmail.com')
|
8
|
+
p.changes = p.paragraphs_of("History.txt", 0..1).join("\n\n")
|
9
|
+
p.rubyforge_name = p.name # TODO this is default value
|
10
|
+
p.extra_dev_deps = [
|
11
|
+
['newgem', ">= #{::Newgem::VERSION}"]
|
12
|
+
]
|
13
|
+
|
14
|
+
pec_extras = {
|
15
|
+
:extensions => ['ext/extconf.rb'],
|
16
|
+
}
|
17
|
+
|
18
|
+
p.clean_globs |= %w[**/.DS_Store tmp *.log]
|
19
|
+
path = (p.rubyforge_name == p.name) ? p.rubyforge_name : "\#{p.rubyforge_name}/\#{p.name}"
|
20
|
+
p.remote_rdoc_dir = File.join(path.gsub(/^#{p.rubyforge_name}\/?/,''), 'rdoc')
|
21
|
+
p.rsync_args = '-av --delete --ignore-errors'
|
22
|
+
end
|
23
|
+
|
24
|
+
require 'newgem/tasks' # load /tasks/*.rake
|
25
|
+
Dir['tasks/**/*.rake'].each { |t| load t }
|
26
|
+
|
27
|
+
# TODO - want other tests/tasks run by default? Add them to the list
|
28
|
+
# task :default => [:spec, :features]
|
data/ext/extconf.rb
ADDED
data/ext/word_scoop.c
ADDED
@@ -0,0 +1,311 @@
|
|
1
|
+
//************************************
|
2
|
+
// word_scoop.c
|
3
|
+
//
|
4
|
+
// Tsukasa OISHI
|
5
|
+
//
|
6
|
+
// 2009/11/21
|
7
|
+
//************************************
|
8
|
+
|
9
|
+
#include <stdio.h>
|
10
|
+
#include <stdlib.h>
|
11
|
+
#include <string.h>
|
12
|
+
#include <ruby.h>
|
13
|
+
#include "word_scoop.h"
|
14
|
+
|
15
|
+
|
16
|
+
// initialize node
|
17
|
+
node initialize_node(char moji)
|
18
|
+
{
|
19
|
+
node work = (node)malloc(sizeof(struct _node));
|
20
|
+
if (!work) {
|
21
|
+
// except
|
22
|
+
rb_raise(rb_eStandardError, "memory is not enough");
|
23
|
+
}
|
24
|
+
|
25
|
+
work->moji = moji;
|
26
|
+
work->end_flag = false;
|
27
|
+
work->child_head = NULL;
|
28
|
+
work->next = NULL;
|
29
|
+
|
30
|
+
return work;
|
31
|
+
}
|
32
|
+
|
33
|
+
// add child node
|
34
|
+
void add_child(node parent, node child)
|
35
|
+
{
|
36
|
+
if (parent->child_head) {
|
37
|
+
child->next = parent->child_head;
|
38
|
+
}
|
39
|
+
parent->child_head = child;
|
40
|
+
}
|
41
|
+
|
42
|
+
// search node by use character
|
43
|
+
node search_child(node n, char moji)
|
44
|
+
{
|
45
|
+
node child;
|
46
|
+
|
47
|
+
child = n->child_head;
|
48
|
+
while(child) {
|
49
|
+
if (child->moji == moji) {
|
50
|
+
break;
|
51
|
+
}
|
52
|
+
child = child->next;
|
53
|
+
}
|
54
|
+
|
55
|
+
return child;
|
56
|
+
}
|
57
|
+
|
58
|
+
// search node by use character.
|
59
|
+
// if nothing, create new node
|
60
|
+
node search_child_or_create(node n, char moji)
|
61
|
+
{
|
62
|
+
node child;
|
63
|
+
|
64
|
+
child = search_child(n, moji);
|
65
|
+
if(!child) {
|
66
|
+
child = initialize_node(moji);
|
67
|
+
add_child(n, child);
|
68
|
+
}
|
69
|
+
|
70
|
+
return child;
|
71
|
+
}
|
72
|
+
|
73
|
+
// free memory all child and self
|
74
|
+
void destroy_node(node n)
|
75
|
+
{
|
76
|
+
int i;
|
77
|
+
node now, next;
|
78
|
+
|
79
|
+
now = n->child_head;
|
80
|
+
while(now) {
|
81
|
+
next = now->next;
|
82
|
+
destroy_node(now);
|
83
|
+
now = next;
|
84
|
+
}
|
85
|
+
|
86
|
+
free(n);
|
87
|
+
}
|
88
|
+
|
89
|
+
//-----------------------------------------------------------
|
90
|
+
// Ruby Methods
|
91
|
+
// ----------------------------------------------------------
|
92
|
+
|
93
|
+
/**
|
94
|
+
* new
|
95
|
+
**/
|
96
|
+
static VALUE t_new(int argc, VALUE *argv, VALUE klass)
|
97
|
+
{
|
98
|
+
node root;
|
99
|
+
VALUE obj, array, string;
|
100
|
+
|
101
|
+
root = initialize_node(NULL_CHAR);
|
102
|
+
|
103
|
+
obj = Data_Make_Struct(klass, struct _node, NULL, destroy_node, root);
|
104
|
+
|
105
|
+
if (argc == 1) {
|
106
|
+
array = argv[0];
|
107
|
+
while((string = rb_ary_shift(argv[0])) != Qnil) {
|
108
|
+
t_add(obj, string);
|
109
|
+
}
|
110
|
+
}
|
111
|
+
|
112
|
+
return obj;
|
113
|
+
}
|
114
|
+
|
115
|
+
/**
|
116
|
+
* add
|
117
|
+
**/
|
118
|
+
static VALUE t_add(VALUE self, VALUE str)
|
119
|
+
{
|
120
|
+
node root, now;
|
121
|
+
char *keyword;
|
122
|
+
int i, len;
|
123
|
+
|
124
|
+
keyword = STR2CSTR(str);
|
125
|
+
|
126
|
+
len = strlen(keyword);
|
127
|
+
while(keyword[len - 1] == CR || keyword[len - 1] == LF ||
|
128
|
+
keyword[len - 1] == TAB || keyword[len - 1] == SPACE) {
|
129
|
+
len--;
|
130
|
+
}
|
131
|
+
|
132
|
+
if (len < 1) {
|
133
|
+
return Qfalse;
|
134
|
+
}
|
135
|
+
|
136
|
+
Data_Get_Struct(self, struct _node, root);
|
137
|
+
now = root;
|
138
|
+
|
139
|
+
for(i = 0; i < len; i++) {
|
140
|
+
now = search_child_or_create(now, keyword[i]);
|
141
|
+
}
|
142
|
+
|
143
|
+
now->end_flag = true;
|
144
|
+
|
145
|
+
return str;
|
146
|
+
}
|
147
|
+
|
148
|
+
/**
|
149
|
+
* search
|
150
|
+
**/
|
151
|
+
static VALUE t_search(VALUE self, VALUE str)
|
152
|
+
{
|
153
|
+
node root, now, ret;
|
154
|
+
char *text;
|
155
|
+
int i, head_i, tail_i, total_len;
|
156
|
+
VALUE array;
|
157
|
+
|
158
|
+
array = rb_ary_new();
|
159
|
+
text = STR2CSTR(str);
|
160
|
+
|
161
|
+
Data_Get_Struct(self, struct _node, root);
|
162
|
+
|
163
|
+
now = root;
|
164
|
+
total_len = strlen(text);
|
165
|
+
head_i = -1;
|
166
|
+
tail_i = -1;
|
167
|
+
|
168
|
+
for(i = 0; i <= total_len; i++) {
|
169
|
+
ret = search_child(now, text[i]);
|
170
|
+
|
171
|
+
if (ret && i != total_len) {
|
172
|
+
if (head_i == -1) {
|
173
|
+
head_i = i;
|
174
|
+
}
|
175
|
+
|
176
|
+
if (ret->end_flag) {
|
177
|
+
tail_i = i;
|
178
|
+
}
|
179
|
+
now = ret;
|
180
|
+
} else {
|
181
|
+
if (head_i != -1) {
|
182
|
+
if (tail_i != -1) {
|
183
|
+
rb_funcall(array, rb_intern("push"), 1, rb_str_new(&text[head_i], (tail_i - head_i + 1)));
|
184
|
+
i = tail_i;
|
185
|
+
tail_i = -1;
|
186
|
+
} else {
|
187
|
+
i = head_i;
|
188
|
+
}
|
189
|
+
head_i = -1;
|
190
|
+
}
|
191
|
+
now = root;
|
192
|
+
}
|
193
|
+
}
|
194
|
+
|
195
|
+
return array;
|
196
|
+
}
|
197
|
+
|
198
|
+
/**
|
199
|
+
* filter_html
|
200
|
+
**/
|
201
|
+
static VALUE t_filter_hrml(VALUE self, VALUE str)
|
202
|
+
{
|
203
|
+
node root, now, ret;
|
204
|
+
bool in_tag;
|
205
|
+
char *text, *inner_tag;
|
206
|
+
int i, head_i, tail_i, copy_head_i, total_len;
|
207
|
+
VALUE change_str, url_base, word;
|
208
|
+
|
209
|
+
change_str = rb_str_new2(EMPTY_STRING);
|
210
|
+
text = STR2CSTR(str);
|
211
|
+
|
212
|
+
Data_Get_Struct(self, struct _node, root);
|
213
|
+
url_base = rb_iv_get(self, LINK_URL_VARIABLE);
|
214
|
+
if (url_base == Qnil) {
|
215
|
+
url_base = rb_str_new2(DEAULT_LINK_URL);
|
216
|
+
}
|
217
|
+
|
218
|
+
now = root;
|
219
|
+
total_len = strlen(text);
|
220
|
+
head_i = -1;
|
221
|
+
tail_i = -1;
|
222
|
+
copy_head_i = 0;
|
223
|
+
in_tag = false;
|
224
|
+
inner_tag = NULL;
|
225
|
+
|
226
|
+
for(i = 0; i <= total_len; i++) {
|
227
|
+
if (!in_tag && text[i] == BEGIN_TAG) {
|
228
|
+
in_tag = true;
|
229
|
+
if (strncasecmp(&text[i + 1], A_TAG, strlen(A_TAG)) == 0) {
|
230
|
+
inner_tag = A_TAG;
|
231
|
+
} else if (strncasecmp(&text[i + 1], SCRIPT_TAG, strlen(SCRIPT_TAG)) == 0) {
|
232
|
+
inner_tag = SCRIPT_TAG;
|
233
|
+
} else if (strncasecmp(&text[i + 1], PRE_TAG, strlen(PRE_TAG)) == 0) {
|
234
|
+
inner_tag = PRE_TAG;
|
235
|
+
} else if (strncasecmp(&text[i + 1], IFRAME_TAG, strlen(IFRAME_TAG)) == 0) {
|
236
|
+
inner_tag = IFRAME_TAG;
|
237
|
+
} else if (strncasecmp(&text[i + 1], OBJECT_TAG, strlen(OBJECT_TAG)) == 0) {
|
238
|
+
inner_tag = OBJECT_TAG;
|
239
|
+
}
|
240
|
+
continue;
|
241
|
+
}
|
242
|
+
|
243
|
+
if (in_tag && !inner_tag && text[i] == END_TAG) {
|
244
|
+
in_tag = false;
|
245
|
+
continue;
|
246
|
+
}
|
247
|
+
|
248
|
+
if (inner_tag && text[i] == BEGIN_TAG) {
|
249
|
+
if (strncasecmp(&text[i + 2], inner_tag, strlen(inner_tag)) == 0) {
|
250
|
+
inner_tag = NULL;
|
251
|
+
continue;
|
252
|
+
}
|
253
|
+
}
|
254
|
+
|
255
|
+
if (in_tag) {
|
256
|
+
continue;
|
257
|
+
}
|
258
|
+
|
259
|
+
ret = search_child(now, text[i]);
|
260
|
+
|
261
|
+
if (ret && i != total_len) {
|
262
|
+
if (head_i == -1) {
|
263
|
+
head_i = i;
|
264
|
+
}
|
265
|
+
|
266
|
+
if (ret->end_flag) {
|
267
|
+
tail_i = i;
|
268
|
+
}
|
269
|
+
now = ret;
|
270
|
+
} else {
|
271
|
+
if (head_i != -1) {
|
272
|
+
if (tail_i != -1) {
|
273
|
+
if (copy_head_i < head_i) {
|
274
|
+
rb_funcall(change_str, rb_intern("concat"), 1, rb_str_new(&text[copy_head_i], (head_i - copy_head_i)));
|
275
|
+
}
|
276
|
+
|
277
|
+
word = rb_str_new(&text[head_i], (tail_i - head_i + 1));
|
278
|
+
rb_funcall(change_str, rb_intern("concat"), 1, rb_funcall(url_base, rb_intern("%"), 1, rb_assoc_new(word, word)));
|
279
|
+
i = tail_i;
|
280
|
+
copy_head_i = tail_i + 1;
|
281
|
+
tail_i = -1;
|
282
|
+
} else {
|
283
|
+
i = head_i;
|
284
|
+
}
|
285
|
+
head_i = -1;
|
286
|
+
}
|
287
|
+
now = root;
|
288
|
+
}
|
289
|
+
}
|
290
|
+
|
291
|
+
if (copy_head_i == 0) {
|
292
|
+
return str;
|
293
|
+
} else {
|
294
|
+
rb_funcall(change_str, rb_intern("concat"), 1, rb_str_new(&text[copy_head_i], (total_len - copy_head_i)));
|
295
|
+
return change_str;
|
296
|
+
}
|
297
|
+
}
|
298
|
+
|
299
|
+
/**
|
300
|
+
* define class
|
301
|
+
**/
|
302
|
+
void Init_word_scoop() {
|
303
|
+
VALUE cWordScoop;
|
304
|
+
|
305
|
+
cWordScoop = rb_define_class("WordScoop", rb_cObject);
|
306
|
+
rb_define_singleton_method(cWordScoop, "new", t_new, -1);
|
307
|
+
rb_define_method(cWordScoop, "add", t_add, 1);
|
308
|
+
rb_define_method(cWordScoop, "search", t_search, 1);
|
309
|
+
rb_define_method(cWordScoop, "filter_html", t_filter_hrml, 1);
|
310
|
+
rb_define_alias(cWordScoop, "<<", "add");
|
311
|
+
}
|
data/ext/word_scoop.h
ADDED
@@ -0,0 +1,79 @@
|
|
1
|
+
//************************************
|
2
|
+
// word_scoop.h
|
3
|
+
//
|
4
|
+
// Tsukasa OISHI
|
5
|
+
//
|
6
|
+
// 2009/11/21
|
7
|
+
//************************************
|
8
|
+
|
9
|
+
|
10
|
+
// bool type
|
11
|
+
#define true 1
|
12
|
+
#define false 0
|
13
|
+
typedef char bool;
|
14
|
+
|
15
|
+
#define CR '\r'
|
16
|
+
#define LF '\n'
|
17
|
+
#define TAB '\t'
|
18
|
+
#define SPACE ' '
|
19
|
+
#define NULL_CHAR '\0'
|
20
|
+
|
21
|
+
#define BEGIN_TAG '<'
|
22
|
+
#define END_TAG '>'
|
23
|
+
|
24
|
+
#define A_TAG "a"
|
25
|
+
#define SCRIPT_TAG "script"
|
26
|
+
#define PRE_TAG "pre"
|
27
|
+
#define IFRAME_TAG "iframe"
|
28
|
+
#define OBJECT_TAG "object"
|
29
|
+
|
30
|
+
#define EMPTY_STRING ""
|
31
|
+
#define LINK_URL_VARIABLE "@link_url"
|
32
|
+
#define DEAULT_LINK_URL "<a href=\"http://www.kaeruspoon.net/keywords/%s\">%s</a>"
|
33
|
+
|
34
|
+
// node is 1 byte character
|
35
|
+
typedef struct _node {
|
36
|
+
char moji; // character
|
37
|
+
bool end_flag; // true if end of word
|
38
|
+
|
39
|
+
struct _node *child_head;// head of child list
|
40
|
+
struct _node *next; // pointer of sibling node
|
41
|
+
} *node;
|
42
|
+
|
43
|
+
|
44
|
+
// initialize node
|
45
|
+
node initialize_node(char);
|
46
|
+
|
47
|
+
// add child node
|
48
|
+
void add_child(node, node);
|
49
|
+
|
50
|
+
// search node by use character
|
51
|
+
node search_child(node, char);
|
52
|
+
|
53
|
+
// search node by use character.
|
54
|
+
// if nothing, create new node
|
55
|
+
node search_child_or_create(node, char);
|
56
|
+
|
57
|
+
// free memory all child and self
|
58
|
+
void destroy_node(node);
|
59
|
+
|
60
|
+
//-----------------------------------------------------------
|
61
|
+
// Ruby Methods
|
62
|
+
// ----------------------------------------------------------
|
63
|
+
|
64
|
+
// new
|
65
|
+
static VALUE t_new(int, VALUE *, VALUE);
|
66
|
+
|
67
|
+
// add
|
68
|
+
static VALUE t_add(VALUE, VALUE);
|
69
|
+
|
70
|
+
// search
|
71
|
+
static VALUE t_search(VALUE, VALUE);
|
72
|
+
|
73
|
+
//filter_html
|
74
|
+
static VALUE t_filter_html(VALUE, VALUE);
|
75
|
+
|
76
|
+
|
77
|
+
// defined class
|
78
|
+
void Init_word_scoop();
|
79
|
+
|
data/lib/word_scoop.rb
ADDED
metadata
ADDED
@@ -0,0 +1,86 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: word_scoop
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 2.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Tsukasa OISHI
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-11-21 00:00:00 +09:00
|
13
|
+
default_executable:
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: newgem
|
17
|
+
type: :development
|
18
|
+
version_requirement:
|
19
|
+
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 1.2.3
|
24
|
+
version:
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: hoe
|
27
|
+
type: :development
|
28
|
+
version_requirement:
|
29
|
+
version_requirements: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 1.8.0
|
34
|
+
version:
|
35
|
+
description: WordScoop is a library that searching keyword in text.
|
36
|
+
email:
|
37
|
+
- tsukasa.oishi@gmail.com
|
38
|
+
executables: []
|
39
|
+
|
40
|
+
extensions:
|
41
|
+
- ext/extconf.rb
|
42
|
+
extra_rdoc_files:
|
43
|
+
- History.txt
|
44
|
+
- Manifest.txt
|
45
|
+
- README.rdoc
|
46
|
+
files:
|
47
|
+
- History.txt
|
48
|
+
- Manifest.txt
|
49
|
+
- README.rdoc
|
50
|
+
- Rakefile
|
51
|
+
- lib/word_scoop.rb
|
52
|
+
- ext/extconf.rb
|
53
|
+
- ext/word_scoop.c
|
54
|
+
- ext/word_scoop.h
|
55
|
+
has_rdoc: true
|
56
|
+
homepage: http://www.kaeruspoon.net/
|
57
|
+
licenses: []
|
58
|
+
|
59
|
+
post_install_message:
|
60
|
+
rdoc_options:
|
61
|
+
- --main
|
62
|
+
- README.rdoc
|
63
|
+
require_paths:
|
64
|
+
- lib
|
65
|
+
- ext
|
66
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
67
|
+
requirements:
|
68
|
+
- - ">="
|
69
|
+
- !ruby/object:Gem::Version
|
70
|
+
version: "0"
|
71
|
+
version:
|
72
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
73
|
+
requirements:
|
74
|
+
- - ">="
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: "0"
|
77
|
+
version:
|
78
|
+
requirements: []
|
79
|
+
|
80
|
+
rubyforge_project: word_scoop
|
81
|
+
rubygems_version: 1.3.5
|
82
|
+
signing_key:
|
83
|
+
specification_version: 2
|
84
|
+
summary: WordScoop is a library that searching keyword in text.
|
85
|
+
test_files: []
|
86
|
+
|