hobix 0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/bin/hobix +90 -0
- data/lib/hobix/api.rb +91 -0
- data/lib/hobix/article.rb +22 -0
- data/lib/hobix/base.rb +477 -0
- data/lib/hobix/bixwik.rb +200 -0
- data/lib/hobix/commandline.rb +661 -0
- data/lib/hobix/comments.rb +99 -0
- data/lib/hobix/config.rb +39 -0
- data/lib/hobix/datamarsh.rb +110 -0
- data/lib/hobix/entry.rb +83 -0
- data/lib/hobix/facets/comments.rb +74 -0
- data/lib/hobix/facets/publisher.rb +314 -0
- data/lib/hobix/facets/trackbacks.rb +80 -0
- data/lib/hobix/linklist.rb +76 -0
- data/lib/hobix/out/atom.rb +92 -0
- data/lib/hobix/out/erb.rb +64 -0
- data/lib/hobix/out/okaynews.rb +55 -0
- data/lib/hobix/out/quick.rb +312 -0
- data/lib/hobix/out/rdf.rb +97 -0
- data/lib/hobix/out/redrum.rb +26 -0
- data/lib/hobix/out/rss.rb +115 -0
- data/lib/hobix/plugin/bloglines.rb +73 -0
- data/lib/hobix/plugin/calendar.rb +220 -0
- data/lib/hobix/plugin/flickr.rb +110 -0
- data/lib/hobix/plugin/recent_comments.rb +82 -0
- data/lib/hobix/plugin/sections.rb +91 -0
- data/lib/hobix/plugin/tags.rb +60 -0
- data/lib/hobix/publish/ping.rb +53 -0
- data/lib/hobix/publish/replicate.rb +283 -0
- data/lib/hobix/publisher.rb +18 -0
- data/lib/hobix/search/dictionary.rb +141 -0
- data/lib/hobix/search/porter_stemmer.rb +203 -0
- data/lib/hobix/search/simple.rb +209 -0
- data/lib/hobix/search/vector.rb +100 -0
- data/lib/hobix/storage/filesys.rb +398 -0
- data/lib/hobix/trackbacks.rb +94 -0
- data/lib/hobix/util/objedit.rb +193 -0
- data/lib/hobix/util/patcher.rb +155 -0
- data/lib/hobix/webapp/cli.rb +195 -0
- data/lib/hobix/webapp/htmlform.rb +107 -0
- data/lib/hobix/webapp/message.rb +177 -0
- data/lib/hobix/webapp/urigen.rb +141 -0
- data/lib/hobix/webapp/webrick-servlet.rb +90 -0
- data/lib/hobix/webapp.rb +723 -0
- data/lib/hobix/weblog.rb +860 -0
- data/lib/hobix.rb +223 -0
- metadata +87 -0
@@ -0,0 +1,283 @@
|
|
1
|
+
# Content replication: will replicate and/or update the target with
|
2
|
+
# the files generated by Hobix
|
3
|
+
# Can handle (currently) copy via FTP and on local filesystem. Plug of sftp should
|
4
|
+
# not be hard.
|
5
|
+
#
|
6
|
+
# == How to use
|
7
|
+
# Add the follozing to your hobix.yaml file:
|
8
|
+
#
|
9
|
+
# - replicate:
|
10
|
+
# target: ftp://user:pass@isp.com/foo/bar/
|
11
|
+
# production_link: http://www.myisp.com/~me/blog
|
12
|
+
#
|
13
|
+
# production_link is optionnal and has been stolen from Sebastian Kanthak
|
14
|
+
# (http://www.kanthak.net/explorations/blog/hobix/staging_hobix.html)
|
15
|
+
#
|
16
|
+
# It allows to change the link to the weblog based on the HOBIX_ENV variable
|
17
|
+
# which mean that you can have a local test blog (link set to /home/me/foo/blog/htdocs)
|
18
|
+
# for example, and a "production" blog (link is http://www.myisp.com/~me/blog) and it should
|
19
|
+
# be uploaded to isp.com/foo/bar/ using given user/pass
|
20
|
+
#
|
21
|
+
# Thus to sum-up:
|
22
|
+
#
|
23
|
+
# * If the HOBIX_ENV variable is set to "production", then the link is changed
|
24
|
+
# to value of production_link (if available) and the page are uploaded to target
|
25
|
+
# * If HOBIX_ENV is not set, then the link is not changed and the page go to htdocs
|
26
|
+
#
|
27
|
+
#
|
28
|
+
# == TODO
|
29
|
+
# - Beautify/simplify code
|
30
|
+
# - Potentially add some other "non-generated" files (for example css files..)
|
31
|
+
#
|
32
|
+
# Copyright - Frederick Ros
|
33
|
+
# License: same as Ruby's one
|
34
|
+
#
|
35
|
+
# $Id$
|
36
|
+
#
|
37
|
+
require 'hobix/base'
|
38
|
+
require 'net/ftp'
|
39
|
+
require 'fileutils'
|
40
|
+
|
41
|
+
module Publish
|
42
|
+
|
43
|
+
Target = Struct.new( :path, :host, :user, :passwd )
|
44
|
+
|
45
|
+
class PublishReplication < Hobix::BasePublish
|
46
|
+
attr_reader :production, :replicator, :weblog
|
47
|
+
|
48
|
+
def initialize( blog, hash_opt )
|
49
|
+
@weblog = blog
|
50
|
+
hash_opt['items'] = nil
|
51
|
+
hash_opt['source'] = weblog.output_path
|
52
|
+
|
53
|
+
if ENV['HOBIX_ENV'] == "production"
|
54
|
+
@production = true
|
55
|
+
#
|
56
|
+
# Change link if a production one is given
|
57
|
+
#
|
58
|
+
blog.link = hash_opt['production_link'] || blog.link
|
59
|
+
else
|
60
|
+
@production = false
|
61
|
+
end
|
62
|
+
|
63
|
+
if hash_opt['target'] =~ /^ftp:\/\/([^:]+):([^@]+)@([^\/]+)(\/.*)$/
|
64
|
+
tgt = Target.new($4,$3,$1,$2)
|
65
|
+
|
66
|
+
@replicator = ReplicateFtp::new(hash_opt, tgt)
|
67
|
+
else
|
68
|
+
#
|
69
|
+
# File replication
|
70
|
+
#
|
71
|
+
tgt = Target.new(hash_opt['target'])
|
72
|
+
@replicator = ReplicateFS.new(hash_opt, tgt)
|
73
|
+
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
def watch
|
78
|
+
['index']
|
79
|
+
end
|
80
|
+
|
81
|
+
def publish( page_name )
|
82
|
+
return unless production
|
83
|
+
replicator.items = weblog.updated_pages.map { |o| o.link }
|
84
|
+
replicator.copy do |nb,f,src,tgt|
|
85
|
+
puts "## Replicating #{src}"
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
module Hobix
|
93
|
+
class Weblog
|
94
|
+
attr_reader :updated_pages
|
95
|
+
|
96
|
+
alias p_publish_orig p_publish
|
97
|
+
|
98
|
+
def p_publish( obj )
|
99
|
+
@updated_pages ||= []
|
100
|
+
@updated_pages << obj
|
101
|
+
p_publish_orig( obj )
|
102
|
+
end
|
103
|
+
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
class Replicate
|
108
|
+
|
109
|
+
attr_accessor :items, :target, :source
|
110
|
+
|
111
|
+
def initialize(hash_src, hash_tgt)
|
112
|
+
@items = hash_src['items']
|
113
|
+
@source = hash_src['source']
|
114
|
+
@target = hash_tgt['path']
|
115
|
+
|
116
|
+
end
|
117
|
+
|
118
|
+
|
119
|
+
DIRFILE = /^(.*\/)?([^\/]*)$/
|
120
|
+
|
121
|
+
def get_dirs
|
122
|
+
dirs = Array.new
|
123
|
+
|
124
|
+
dirfiles = items.collect do |itm|
|
125
|
+
dir,file = DIRFILE.match(itm).captures
|
126
|
+
|
127
|
+
if dir && dir.strip.size != 0
|
128
|
+
dirs.push dir
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
dirs
|
133
|
+
end
|
134
|
+
|
135
|
+
def get_files
|
136
|
+
files = Array.new
|
137
|
+
dirfiles = items.collect do |itm|
|
138
|
+
dir,file = DIRFILE.match(itm).captures
|
139
|
+
|
140
|
+
if file && file.strip.size != 0
|
141
|
+
files.push itm
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
files
|
146
|
+
end
|
147
|
+
|
148
|
+
def check_and_make_dirs
|
149
|
+
dirs = get_dirs
|
150
|
+
|
151
|
+
dirs.each do |dir|
|
152
|
+
# Check existence and create if not present
|
153
|
+
dir = File.join(target,dir)
|
154
|
+
if !directory?(dir)
|
155
|
+
# Let's create it !
|
156
|
+
mkdir_p(dir)
|
157
|
+
end
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
|
162
|
+
def copy_files ( &block)
|
163
|
+
files = get_files
|
164
|
+
|
165
|
+
nb_files = files.size
|
166
|
+
|
167
|
+
files.each do |file|
|
168
|
+
|
169
|
+
src_f = File.join(source,file)
|
170
|
+
tgt_f = File.join(target,file)
|
171
|
+
|
172
|
+
if block_given?
|
173
|
+
yield nb_files,file, src_f, tgt_f
|
174
|
+
end
|
175
|
+
|
176
|
+
cp(src_f,tgt_f)
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
|
181
|
+
def copy (&block)
|
182
|
+
if respond_to?(:login)
|
183
|
+
send :login
|
184
|
+
end
|
185
|
+
|
186
|
+
check_and_make_dirs
|
187
|
+
|
188
|
+
copy_files &block
|
189
|
+
|
190
|
+
if respond_to?(:logout)
|
191
|
+
send :logout
|
192
|
+
end
|
193
|
+
|
194
|
+
end
|
195
|
+
|
196
|
+
end
|
197
|
+
|
198
|
+
|
199
|
+
class ReplicateFtp < Replicate
|
200
|
+
|
201
|
+
attr_accessor :ftp, :passwd, :user, :host
|
202
|
+
|
203
|
+
def initialize(hash_src, hash_tgt)
|
204
|
+
super(hash_src,hash_tgt)
|
205
|
+
|
206
|
+
@user = hash_tgt['user']
|
207
|
+
@passwd = hash_tgt['passwd']
|
208
|
+
@host = hash_tgt['host']
|
209
|
+
|
210
|
+
end
|
211
|
+
|
212
|
+
def login
|
213
|
+
@ftp = Net::FTP.open(host)
|
214
|
+
ftp.login user,passwd
|
215
|
+
end
|
216
|
+
|
217
|
+
def logout
|
218
|
+
ftp.close
|
219
|
+
end
|
220
|
+
|
221
|
+
def directory?(d)
|
222
|
+
old_dir = ftp.pwd
|
223
|
+
|
224
|
+
begin
|
225
|
+
ftp.chdir d
|
226
|
+
# If we successfully change to d, we could now return to orig dir
|
227
|
+
# otherwise we're in the rescue section ...
|
228
|
+
ftp.chdir(old_dir)
|
229
|
+
return true
|
230
|
+
|
231
|
+
rescue Net::FTPPermError
|
232
|
+
if $!.to_s[0,3] == "550"
|
233
|
+
# 550 : No such file or directory
|
234
|
+
return false
|
235
|
+
end
|
236
|
+
raise Net::FTPPermError, $!
|
237
|
+
end
|
238
|
+
end
|
239
|
+
|
240
|
+
|
241
|
+
def mkdir_p(tgt)
|
242
|
+
old_dir = ftp.pwd
|
243
|
+
tgt.split(/\/+/).each do |dir|
|
244
|
+
next if dir.size == 0
|
245
|
+
# Let's try to go down
|
246
|
+
begin
|
247
|
+
ftp.chdir(dir)
|
248
|
+
# Ok .. So it was already existing ..
|
249
|
+
rescue Net::FTPPermError
|
250
|
+
if $!.to_s[0,3] == "550"
|
251
|
+
# 550 : No such file or directory : let's create ..
|
252
|
+
ftp.mkdir(dir)
|
253
|
+
# and retry
|
254
|
+
retry
|
255
|
+
end
|
256
|
+
raise Net::FTPPermError, $!
|
257
|
+
end
|
258
|
+
end
|
259
|
+
ftp.chdir(old_dir)
|
260
|
+
|
261
|
+
end
|
262
|
+
|
263
|
+
|
264
|
+
def cp(src,tgt)
|
265
|
+
ftp.putbinaryfile src, tgt
|
266
|
+
end
|
267
|
+
end
|
268
|
+
|
269
|
+
class ReplicateFS < Replicate
|
270
|
+
|
271
|
+
def directory?(d)
|
272
|
+
File.directory? d
|
273
|
+
end
|
274
|
+
|
275
|
+
def mkdir_p(tgt)
|
276
|
+
FileUtils.mkdir_p tgt
|
277
|
+
end
|
278
|
+
|
279
|
+
def cp(src,tgt)
|
280
|
+
FileUtils.cp src, tgt
|
281
|
+
end
|
282
|
+
|
283
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
#
|
2
|
+
# = hobix/publisher.rb
|
3
|
+
#
|
4
|
+
# Hobix command-line weblog system, web-based publishing interface.
|
5
|
+
#
|
6
|
+
# Copyright (c) 2003-2004 why the lucky stiff
|
7
|
+
#
|
8
|
+
# Written & maintained by why the lucky stiff <why@ruby-lang.org>
|
9
|
+
#
|
10
|
+
# This program is free software, released under a BSD license.
|
11
|
+
# See COPYING for details.
|
12
|
+
#
|
13
|
+
#--
|
14
|
+
# $Id$
|
15
|
+
#++
|
16
|
+
|
17
|
+
require 'hobix/facets/publisher'
|
18
|
+
|
@@ -0,0 +1,141 @@
|
|
1
|
+
# Maintain a dictionary mapping words to consecutive integers (the
|
2
|
+
# first unique word is 0, the second is 1 and so on)
|
3
|
+
|
4
|
+
require 'hobix/search/porter_stemmer'
|
5
|
+
module Hobix
|
6
|
+
module Search
|
7
|
+
module Simple
|
8
|
+
class Dictionary
|
9
|
+
STOP_WORDS = {
|
10
|
+
"a" => 1,
|
11
|
+
"again" => 1,
|
12
|
+
"all" => 1,
|
13
|
+
"along" => 1,
|
14
|
+
"also" => 1,
|
15
|
+
"an" => 1,
|
16
|
+
"and" => 1,
|
17
|
+
"arialhelvetica" => 1,
|
18
|
+
"as" => 1,
|
19
|
+
"at" => 1,
|
20
|
+
"but" => 1,
|
21
|
+
"by" => 1,
|
22
|
+
"came" => 1,
|
23
|
+
"can" => 1,
|
24
|
+
"cant" => 1,
|
25
|
+
"couldnt" => 1,
|
26
|
+
"did" => 1,
|
27
|
+
"didn" => 1,
|
28
|
+
"didnt" => 1,
|
29
|
+
"do" => 1,
|
30
|
+
"doesnt" => 1,
|
31
|
+
"dont" => 1,
|
32
|
+
"entrytitledetail" => 1,
|
33
|
+
"ever" => 1,
|
34
|
+
"first" => 1,
|
35
|
+
"fontvariant" => 1,
|
36
|
+
"from" => 1,
|
37
|
+
"have" => 1,
|
38
|
+
"her" => 1,
|
39
|
+
"here" => 1,
|
40
|
+
"him" => 1,
|
41
|
+
"how" => 1,
|
42
|
+
"i" => 1,
|
43
|
+
"if" => 1,
|
44
|
+
"in" => 1,
|
45
|
+
"into" => 1,
|
46
|
+
"is" => 1,
|
47
|
+
"isnt" => 1,
|
48
|
+
"it" => 1,
|
49
|
+
"itll" => 1,
|
50
|
+
"just" => 1,
|
51
|
+
"last" => 1,
|
52
|
+
"least" => 1,
|
53
|
+
"like" => 1,
|
54
|
+
"most" => 1,
|
55
|
+
"my" => 1,
|
56
|
+
"new" => 1,
|
57
|
+
"no" => 1,
|
58
|
+
"not" => 1,
|
59
|
+
"now" => 1,
|
60
|
+
"of" => 1,
|
61
|
+
"on" => 1,
|
62
|
+
"or" => 1,
|
63
|
+
"should" => 1,
|
64
|
+
"sidebartitl" => 1,
|
65
|
+
"sinc" => 1,
|
66
|
+
"so" => 1,
|
67
|
+
"some" => 1,
|
68
|
+
"textdecoration" => 1,
|
69
|
+
"th" => 1,
|
70
|
+
"than" => 1,
|
71
|
+
"that" => 1,
|
72
|
+
"the" => 1,
|
73
|
+
"their" => 1,
|
74
|
+
"then" => 1,
|
75
|
+
"those" => 1,
|
76
|
+
"to" => 1,
|
77
|
+
"told" => 1,
|
78
|
+
"too" => 1,
|
79
|
+
"true" => 1,
|
80
|
+
"try" => 1,
|
81
|
+
"until" => 1,
|
82
|
+
"url" => 1,
|
83
|
+
"us" => 1,
|
84
|
+
"were" => 1,
|
85
|
+
"when" => 1,
|
86
|
+
"whether" => 1,
|
87
|
+
"while" => 1,
|
88
|
+
"with" => 1,
|
89
|
+
"within" => 1,
|
90
|
+
"yes" => 1,
|
91
|
+
"you" => 1,
|
92
|
+
"youll" => 1,
|
93
|
+
}
|
94
|
+
|
95
|
+
attr_reader :total, :clsf, :words
|
96
|
+
|
97
|
+
def initialize
|
98
|
+
@total = 0
|
99
|
+
@clsf = {}
|
100
|
+
@words = {}
|
101
|
+
end
|
102
|
+
|
103
|
+
def add_word(word, classifications = [], mod = 1)
|
104
|
+
word = Stemmable::stem_porter(word)
|
105
|
+
if STOP_WORDS[word]
|
106
|
+
nil
|
107
|
+
else
|
108
|
+
@words[word] ||= {:pos => @words.size, :clsf => {}}
|
109
|
+
classifications.each do |c|
|
110
|
+
@clsf[c] ||= {}
|
111
|
+
@clsf[c][word] ||= 0
|
112
|
+
@clsf[c][word] += mod
|
113
|
+
@total += mod
|
114
|
+
end
|
115
|
+
@words[word][:pos]
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
def remove_word(word, classifications = [])
|
120
|
+
add_word(word, classifications, -1)
|
121
|
+
end
|
122
|
+
|
123
|
+
def find(word)
|
124
|
+
word = Stemmable::stem_porter(word)
|
125
|
+
if @words[word] and not STOP_WORDS[word]
|
126
|
+
@words[word][:pos]
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
def size
|
131
|
+
@words.size
|
132
|
+
end
|
133
|
+
|
134
|
+
def dump
|
135
|
+
puts @words.keys.sort
|
136
|
+
end
|
137
|
+
|
138
|
+
end
|
139
|
+
end
|
140
|
+
end
|
141
|
+
end
|
@@ -0,0 +1,203 @@
|
|
1
|
+
#! /local/ruby/bin/ruby
|
2
|
+
#
|
3
|
+
# $Id$
|
4
|
+
#
|
5
|
+
# Lifted from SimpleSearch by Chad Fowler / Dave Thomas / Allen Condit / perhaps other unseeable folks in the distance ...
|
6
|
+
#
|
7
|
+
# See example usage at the end of this file.
|
8
|
+
#
|
9
|
+
|
10
|
+
module Hobix
|
11
|
+
module Stemmable
|
12
|
+
|
13
|
+
STEMMED = {}
|
14
|
+
|
15
|
+
STEP_2_LIST = {
|
16
|
+
'ational'=>'ate', 'tional'=>'tion', 'enci'=>'ence', 'anci'=>'ance',
|
17
|
+
'izer'=>'ize', 'bli'=>'ble',
|
18
|
+
'alli'=>'al', 'entli'=>'ent', 'eli'=>'e', 'ousli'=>'ous',
|
19
|
+
'ization'=>'ize', 'ation'=>'ate',
|
20
|
+
'ator'=>'ate', 'alism'=>'al', 'iveness'=>'ive', 'fulness'=>'ful',
|
21
|
+
'ousness'=>'ous', 'aliti'=>'al',
|
22
|
+
'iviti'=>'ive', 'biliti'=>'ble', 'logi'=>'log'
|
23
|
+
}
|
24
|
+
|
25
|
+
STEP_3_LIST = {
|
26
|
+
'icate'=>'ic', 'ative'=>'', 'alize'=>'al', 'iciti'=>'ic',
|
27
|
+
'ical'=>'ic', 'ful'=>'', 'ness'=>''
|
28
|
+
}
|
29
|
+
|
30
|
+
|
31
|
+
SUFFIX_1_REGEXP = /(
|
32
|
+
ational |
|
33
|
+
tional |
|
34
|
+
enci |
|
35
|
+
anci |
|
36
|
+
izer |
|
37
|
+
bli |
|
38
|
+
alli |
|
39
|
+
entli |
|
40
|
+
eli |
|
41
|
+
ousli |
|
42
|
+
ization |
|
43
|
+
ation |
|
44
|
+
ator |
|
45
|
+
alism |
|
46
|
+
iveness |
|
47
|
+
fulness |
|
48
|
+
ousness |
|
49
|
+
aliti |
|
50
|
+
iviti |
|
51
|
+
biliti |
|
52
|
+
logi)$/x
|
53
|
+
|
54
|
+
|
55
|
+
SUFFIX_2_REGEXP = /(
|
56
|
+
al |
|
57
|
+
ance |
|
58
|
+
ence |
|
59
|
+
er |
|
60
|
+
ic |
|
61
|
+
able |
|
62
|
+
ible |
|
63
|
+
ant |
|
64
|
+
ement |
|
65
|
+
ment |
|
66
|
+
ent |
|
67
|
+
ou |
|
68
|
+
ism |
|
69
|
+
ate |
|
70
|
+
iti |
|
71
|
+
ous |
|
72
|
+
ive |
|
73
|
+
ize)$/x
|
74
|
+
|
75
|
+
|
76
|
+
C = "[^aeiou]" # consonant
|
77
|
+
V = "[aeiouy]" # vowel
|
78
|
+
CC = "#{C}(?>[^aeiouy]*)" # consonant sequence
|
79
|
+
VV = "#{V}(?>[aeiou]*)" # vowel sequence
|
80
|
+
|
81
|
+
MGR0 = /^(#{CC})?#{VV}#{CC}/o # [cc]vvcc... is m>0
|
82
|
+
MEQ1 = /^(#{CC})?#{VV}#{CC}(#{VV})?$/o # [cc]vvcc[vv] is m=1
|
83
|
+
MGR1 = /^(#{CC})?#{VV}#{CC}#{VV}#{CC}/o # [cc]vvccvvcc... is m>1
|
84
|
+
VOWEL_IN_STEM = /^(#{CC})?#{V}/o # vowel in stem
|
85
|
+
|
86
|
+
#
|
87
|
+
# Porter stemmer in Ruby.
|
88
|
+
#
|
89
|
+
# This is the Porter stemming algorithm, ported to Ruby from the
|
90
|
+
# version coded up in Perl. It's easy to follow against the rules
|
91
|
+
# in the original paper in:
|
92
|
+
#
|
93
|
+
# Porter, 1980, An algorithm for suffix stripping, Program, Vol. 14,
|
94
|
+
# no. 3, pp 130-137,
|
95
|
+
#
|
96
|
+
# See also http://www.tartarus.org/~martin/PorterStemmer
|
97
|
+
#
|
98
|
+
# Send comments to raypereda@hotmail.com
|
99
|
+
#
|
100
|
+
|
101
|
+
def stem_porter(w = self.to_str.dup)
|
102
|
+
|
103
|
+
# make a copy of the given object and convert it to a string.
|
104
|
+
original_word = w
|
105
|
+
|
106
|
+
return w if w.length < 3
|
107
|
+
|
108
|
+
result = STEMMED[w]
|
109
|
+
return result if result
|
110
|
+
|
111
|
+
# now map initial y to Y so that the patterns never treat it as vowel
|
112
|
+
w[0] = 'Y' if w[0] == ?y
|
113
|
+
|
114
|
+
# Step 1a
|
115
|
+
if w =~ /(ss|i)es$/
|
116
|
+
w = $` + $1
|
117
|
+
elsif w =~ /([^s])s$/
|
118
|
+
w = $` + $1
|
119
|
+
end
|
120
|
+
|
121
|
+
# Step 1b
|
122
|
+
if w =~ /eed$/
|
123
|
+
w.chop! if $` =~ MGR0
|
124
|
+
elsif w =~ /(ed|ing)$/
|
125
|
+
stem = $`
|
126
|
+
if stem =~ VOWEL_IN_STEM
|
127
|
+
w = stem
|
128
|
+
case w
|
129
|
+
when /(at|bl|iz)$/ then w << "e"
|
130
|
+
when /([^aeiouylsz])\1$/ then w.chop!
|
131
|
+
when /^#{CC}#{V}[^aeiouwxy]$/o then w << "e"
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
if w =~ /y$/
|
137
|
+
stem = $`
|
138
|
+
w = stem + "i" if stem =~ VOWEL_IN_STEM
|
139
|
+
end
|
140
|
+
|
141
|
+
# Step 2
|
142
|
+
if w =~ SUFFIX_1_REGEXP
|
143
|
+
stem = $`
|
144
|
+
suffix = $1
|
145
|
+
# print "stem= " + stem + "\n" + "suffix=" + suffix + "\n"
|
146
|
+
if stem =~ MGR0
|
147
|
+
w = stem + STEP_2_LIST[suffix]
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
# Step 3
|
152
|
+
if w =~ /(icate|ative|alize|iciti|ical|ful|ness)$/
|
153
|
+
stem = $`
|
154
|
+
suffix = $1
|
155
|
+
if stem =~ MGR0
|
156
|
+
w = stem + STEP_3_LIST[suffix]
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
# Step 4
|
161
|
+
if w =~ SUFFIX_2_REGEXP
|
162
|
+
stem = $`
|
163
|
+
if stem =~ MGR1
|
164
|
+
w = stem
|
165
|
+
end
|
166
|
+
elsif w =~ /(s|t)(ion)$/
|
167
|
+
stem = $` + $1
|
168
|
+
if stem =~ MGR1
|
169
|
+
w = stem
|
170
|
+
end
|
171
|
+
end
|
172
|
+
|
173
|
+
# Step 5
|
174
|
+
if w =~ /e$/
|
175
|
+
stem = $`
|
176
|
+
if (stem =~ MGR1) ||
|
177
|
+
(stem =~ MEQ1 && stem !~ /^#{CC}#{V}[^aeiouwxy]$/o)
|
178
|
+
w = stem
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
182
|
+
if w =~ /ll$/ && w =~ MGR1
|
183
|
+
w.chop!
|
184
|
+
end
|
185
|
+
|
186
|
+
# and turn initial Y back to y
|
187
|
+
w[0] = 'y' if w[0] == ?Y
|
188
|
+
|
189
|
+
STEMMED[original_word] = w
|
190
|
+
|
191
|
+
w
|
192
|
+
end
|
193
|
+
|
194
|
+
|
195
|
+
module_function :stem_porter
|
196
|
+
#
|
197
|
+
# make the stem_porter the default stem method, just in case we
|
198
|
+
# feel like having multiple stemmers available later.
|
199
|
+
#
|
200
|
+
alias stem stem_porter
|
201
|
+
|
202
|
+
end
|
203
|
+
end
|