hobix 0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/bin/hobix +90 -0
- data/lib/hobix/api.rb +91 -0
- data/lib/hobix/article.rb +22 -0
- data/lib/hobix/base.rb +477 -0
- data/lib/hobix/bixwik.rb +200 -0
- data/lib/hobix/commandline.rb +661 -0
- data/lib/hobix/comments.rb +99 -0
- data/lib/hobix/config.rb +39 -0
- data/lib/hobix/datamarsh.rb +110 -0
- data/lib/hobix/entry.rb +83 -0
- data/lib/hobix/facets/comments.rb +74 -0
- data/lib/hobix/facets/publisher.rb +314 -0
- data/lib/hobix/facets/trackbacks.rb +80 -0
- data/lib/hobix/linklist.rb +76 -0
- data/lib/hobix/out/atom.rb +92 -0
- data/lib/hobix/out/erb.rb +64 -0
- data/lib/hobix/out/okaynews.rb +55 -0
- data/lib/hobix/out/quick.rb +312 -0
- data/lib/hobix/out/rdf.rb +97 -0
- data/lib/hobix/out/redrum.rb +26 -0
- data/lib/hobix/out/rss.rb +115 -0
- data/lib/hobix/plugin/bloglines.rb +73 -0
- data/lib/hobix/plugin/calendar.rb +220 -0
- data/lib/hobix/plugin/flickr.rb +110 -0
- data/lib/hobix/plugin/recent_comments.rb +82 -0
- data/lib/hobix/plugin/sections.rb +91 -0
- data/lib/hobix/plugin/tags.rb +60 -0
- data/lib/hobix/publish/ping.rb +53 -0
- data/lib/hobix/publish/replicate.rb +283 -0
- data/lib/hobix/publisher.rb +18 -0
- data/lib/hobix/search/dictionary.rb +141 -0
- data/lib/hobix/search/porter_stemmer.rb +203 -0
- data/lib/hobix/search/simple.rb +209 -0
- data/lib/hobix/search/vector.rb +100 -0
- data/lib/hobix/storage/filesys.rb +398 -0
- data/lib/hobix/trackbacks.rb +94 -0
- data/lib/hobix/util/objedit.rb +193 -0
- data/lib/hobix/util/patcher.rb +155 -0
- data/lib/hobix/webapp/cli.rb +195 -0
- data/lib/hobix/webapp/htmlform.rb +107 -0
- data/lib/hobix/webapp/message.rb +177 -0
- data/lib/hobix/webapp/urigen.rb +141 -0
- data/lib/hobix/webapp/webrick-servlet.rb +90 -0
- data/lib/hobix/webapp.rb +723 -0
- data/lib/hobix/weblog.rb +860 -0
- data/lib/hobix.rb +223 -0
- metadata +87 -0
@@ -0,0 +1,283 @@
|
|
1
|
+
# Content replication: will replicate and/or update the target with
|
2
|
+
# the files generated by Hobix
|
3
|
+
# Can handle (currently) copy via FTP and on local filesystem. Plug of sftp should
|
4
|
+
# not be hard.
|
5
|
+
#
|
6
|
+
# == How to use
|
7
|
+
# Add the follozing to your hobix.yaml file:
|
8
|
+
#
|
9
|
+
# - replicate:
|
10
|
+
# target: ftp://user:pass@isp.com/foo/bar/
|
11
|
+
# production_link: http://www.myisp.com/~me/blog
|
12
|
+
#
|
13
|
+
# production_link is optionnal and has been stolen from Sebastian Kanthak
|
14
|
+
# (http://www.kanthak.net/explorations/blog/hobix/staging_hobix.html)
|
15
|
+
#
|
16
|
+
# It allows to change the link to the weblog based on the HOBIX_ENV variable
|
17
|
+
# which mean that you can have a local test blog (link set to /home/me/foo/blog/htdocs)
|
18
|
+
# for example, and a "production" blog (link is http://www.myisp.com/~me/blog) and it should
|
19
|
+
# be uploaded to isp.com/foo/bar/ using given user/pass
|
20
|
+
#
|
21
|
+
# Thus to sum-up:
|
22
|
+
#
|
23
|
+
# * If the HOBIX_ENV variable is set to "production", then the link is changed
|
24
|
+
# to value of production_link (if available) and the page are uploaded to target
|
25
|
+
# * If HOBIX_ENV is not set, then the link is not changed and the page go to htdocs
|
26
|
+
#
|
27
|
+
#
|
28
|
+
# == TODO
|
29
|
+
# - Beautify/simplify code
|
30
|
+
# - Potentially add some other "non-generated" files (for example css files..)
|
31
|
+
#
|
32
|
+
# Copyright - Frederick Ros
|
33
|
+
# License: same as Ruby's one
|
34
|
+
#
|
35
|
+
# $Id$
|
36
|
+
#
|
37
|
+
require 'hobix/base'
|
38
|
+
require 'net/ftp'
|
39
|
+
require 'fileutils'
|
40
|
+
|
41
|
+
module Publish
|
42
|
+
|
43
|
+
Target = Struct.new( :path, :host, :user, :passwd )
|
44
|
+
|
45
|
+
class PublishReplication < Hobix::BasePublish
|
46
|
+
attr_reader :production, :replicator, :weblog
|
47
|
+
|
48
|
+
def initialize( blog, hash_opt )
|
49
|
+
@weblog = blog
|
50
|
+
hash_opt['items'] = nil
|
51
|
+
hash_opt['source'] = weblog.output_path
|
52
|
+
|
53
|
+
if ENV['HOBIX_ENV'] == "production"
|
54
|
+
@production = true
|
55
|
+
#
|
56
|
+
# Change link if a production one is given
|
57
|
+
#
|
58
|
+
blog.link = hash_opt['production_link'] || blog.link
|
59
|
+
else
|
60
|
+
@production = false
|
61
|
+
end
|
62
|
+
|
63
|
+
if hash_opt['target'] =~ /^ftp:\/\/([^:]+):([^@]+)@([^\/]+)(\/.*)$/
|
64
|
+
tgt = Target.new($4,$3,$1,$2)
|
65
|
+
|
66
|
+
@replicator = ReplicateFtp::new(hash_opt, tgt)
|
67
|
+
else
|
68
|
+
#
|
69
|
+
# File replication
|
70
|
+
#
|
71
|
+
tgt = Target.new(hash_opt['target'])
|
72
|
+
@replicator = ReplicateFS.new(hash_opt, tgt)
|
73
|
+
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
def watch
|
78
|
+
['index']
|
79
|
+
end
|
80
|
+
|
81
|
+
def publish( page_name )
|
82
|
+
return unless production
|
83
|
+
replicator.items = weblog.updated_pages.map { |o| o.link }
|
84
|
+
replicator.copy do |nb,f,src,tgt|
|
85
|
+
puts "## Replicating #{src}"
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
module Hobix
|
93
|
+
class Weblog
|
94
|
+
attr_reader :updated_pages
|
95
|
+
|
96
|
+
alias p_publish_orig p_publish
|
97
|
+
|
98
|
+
def p_publish( obj )
|
99
|
+
@updated_pages ||= []
|
100
|
+
@updated_pages << obj
|
101
|
+
p_publish_orig( obj )
|
102
|
+
end
|
103
|
+
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
class Replicate
|
108
|
+
|
109
|
+
attr_accessor :items, :target, :source
|
110
|
+
|
111
|
+
def initialize(hash_src, hash_tgt)
|
112
|
+
@items = hash_src['items']
|
113
|
+
@source = hash_src['source']
|
114
|
+
@target = hash_tgt['path']
|
115
|
+
|
116
|
+
end
|
117
|
+
|
118
|
+
|
119
|
+
DIRFILE = /^(.*\/)?([^\/]*)$/
|
120
|
+
|
121
|
+
def get_dirs
|
122
|
+
dirs = Array.new
|
123
|
+
|
124
|
+
dirfiles = items.collect do |itm|
|
125
|
+
dir,file = DIRFILE.match(itm).captures
|
126
|
+
|
127
|
+
if dir && dir.strip.size != 0
|
128
|
+
dirs.push dir
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
dirs
|
133
|
+
end
|
134
|
+
|
135
|
+
def get_files
|
136
|
+
files = Array.new
|
137
|
+
dirfiles = items.collect do |itm|
|
138
|
+
dir,file = DIRFILE.match(itm).captures
|
139
|
+
|
140
|
+
if file && file.strip.size != 0
|
141
|
+
files.push itm
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
files
|
146
|
+
end
|
147
|
+
|
148
|
+
def check_and_make_dirs
|
149
|
+
dirs = get_dirs
|
150
|
+
|
151
|
+
dirs.each do |dir|
|
152
|
+
# Check existence and create if not present
|
153
|
+
dir = File.join(target,dir)
|
154
|
+
if !directory?(dir)
|
155
|
+
# Let's create it !
|
156
|
+
mkdir_p(dir)
|
157
|
+
end
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
|
162
|
+
def copy_files ( &block)
|
163
|
+
files = get_files
|
164
|
+
|
165
|
+
nb_files = files.size
|
166
|
+
|
167
|
+
files.each do |file|
|
168
|
+
|
169
|
+
src_f = File.join(source,file)
|
170
|
+
tgt_f = File.join(target,file)
|
171
|
+
|
172
|
+
if block_given?
|
173
|
+
yield nb_files,file, src_f, tgt_f
|
174
|
+
end
|
175
|
+
|
176
|
+
cp(src_f,tgt_f)
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
|
181
|
+
def copy (&block)
|
182
|
+
if respond_to?(:login)
|
183
|
+
send :login
|
184
|
+
end
|
185
|
+
|
186
|
+
check_and_make_dirs
|
187
|
+
|
188
|
+
copy_files &block
|
189
|
+
|
190
|
+
if respond_to?(:logout)
|
191
|
+
send :logout
|
192
|
+
end
|
193
|
+
|
194
|
+
end
|
195
|
+
|
196
|
+
end
|
197
|
+
|
198
|
+
|
199
|
+
class ReplicateFtp < Replicate
|
200
|
+
|
201
|
+
attr_accessor :ftp, :passwd, :user, :host
|
202
|
+
|
203
|
+
def initialize(hash_src, hash_tgt)
|
204
|
+
super(hash_src,hash_tgt)
|
205
|
+
|
206
|
+
@user = hash_tgt['user']
|
207
|
+
@passwd = hash_tgt['passwd']
|
208
|
+
@host = hash_tgt['host']
|
209
|
+
|
210
|
+
end
|
211
|
+
|
212
|
+
def login
|
213
|
+
@ftp = Net::FTP.open(host)
|
214
|
+
ftp.login user,passwd
|
215
|
+
end
|
216
|
+
|
217
|
+
def logout
|
218
|
+
ftp.close
|
219
|
+
end
|
220
|
+
|
221
|
+
def directory?(d)
|
222
|
+
old_dir = ftp.pwd
|
223
|
+
|
224
|
+
begin
|
225
|
+
ftp.chdir d
|
226
|
+
# If we successfully change to d, we could now return to orig dir
|
227
|
+
# otherwise we're in the rescue section ...
|
228
|
+
ftp.chdir(old_dir)
|
229
|
+
return true
|
230
|
+
|
231
|
+
rescue Net::FTPPermError
|
232
|
+
if $!.to_s[0,3] == "550"
|
233
|
+
# 550 : No such file or directory
|
234
|
+
return false
|
235
|
+
end
|
236
|
+
raise Net::FTPPermError, $!
|
237
|
+
end
|
238
|
+
end
|
239
|
+
|
240
|
+
|
241
|
+
def mkdir_p(tgt)
|
242
|
+
old_dir = ftp.pwd
|
243
|
+
tgt.split(/\/+/).each do |dir|
|
244
|
+
next if dir.size == 0
|
245
|
+
# Let's try to go down
|
246
|
+
begin
|
247
|
+
ftp.chdir(dir)
|
248
|
+
# Ok .. So it was already existing ..
|
249
|
+
rescue Net::FTPPermError
|
250
|
+
if $!.to_s[0,3] == "550"
|
251
|
+
# 550 : No such file or directory : let's create ..
|
252
|
+
ftp.mkdir(dir)
|
253
|
+
# and retry
|
254
|
+
retry
|
255
|
+
end
|
256
|
+
raise Net::FTPPermError, $!
|
257
|
+
end
|
258
|
+
end
|
259
|
+
ftp.chdir(old_dir)
|
260
|
+
|
261
|
+
end
|
262
|
+
|
263
|
+
|
264
|
+
def cp(src,tgt)
|
265
|
+
ftp.putbinaryfile src, tgt
|
266
|
+
end
|
267
|
+
end
|
268
|
+
|
269
|
+
class ReplicateFS < Replicate
|
270
|
+
|
271
|
+
def directory?(d)
|
272
|
+
File.directory? d
|
273
|
+
end
|
274
|
+
|
275
|
+
def mkdir_p(tgt)
|
276
|
+
FileUtils.mkdir_p tgt
|
277
|
+
end
|
278
|
+
|
279
|
+
def cp(src,tgt)
|
280
|
+
FileUtils.cp src, tgt
|
281
|
+
end
|
282
|
+
|
283
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
#
|
2
|
+
# = hobix/publisher.rb
|
3
|
+
#
|
4
|
+
# Hobix command-line weblog system, web-based publishing interface.
|
5
|
+
#
|
6
|
+
# Copyright (c) 2003-2004 why the lucky stiff
|
7
|
+
#
|
8
|
+
# Written & maintained by why the lucky stiff <why@ruby-lang.org>
|
9
|
+
#
|
10
|
+
# This program is free software, released under a BSD license.
|
11
|
+
# See COPYING for details.
|
12
|
+
#
|
13
|
+
#--
|
14
|
+
# $Id$
|
15
|
+
#++
|
16
|
+
|
17
|
+
require 'hobix/facets/publisher'
|
18
|
+
|
@@ -0,0 +1,141 @@
|
|
1
|
+
# Maintain a dictionary mapping words to consecutive integers (the
|
2
|
+
# first unique word is 0, the second is 1 and so on)
|
3
|
+
|
4
|
+
require 'hobix/search/porter_stemmer'
|
5
|
+
module Hobix
|
6
|
+
module Search
|
7
|
+
module Simple
|
8
|
+
class Dictionary
|
9
|
+
STOP_WORDS = {
|
10
|
+
"a" => 1,
|
11
|
+
"again" => 1,
|
12
|
+
"all" => 1,
|
13
|
+
"along" => 1,
|
14
|
+
"also" => 1,
|
15
|
+
"an" => 1,
|
16
|
+
"and" => 1,
|
17
|
+
"arialhelvetica" => 1,
|
18
|
+
"as" => 1,
|
19
|
+
"at" => 1,
|
20
|
+
"but" => 1,
|
21
|
+
"by" => 1,
|
22
|
+
"came" => 1,
|
23
|
+
"can" => 1,
|
24
|
+
"cant" => 1,
|
25
|
+
"couldnt" => 1,
|
26
|
+
"did" => 1,
|
27
|
+
"didn" => 1,
|
28
|
+
"didnt" => 1,
|
29
|
+
"do" => 1,
|
30
|
+
"doesnt" => 1,
|
31
|
+
"dont" => 1,
|
32
|
+
"entrytitledetail" => 1,
|
33
|
+
"ever" => 1,
|
34
|
+
"first" => 1,
|
35
|
+
"fontvariant" => 1,
|
36
|
+
"from" => 1,
|
37
|
+
"have" => 1,
|
38
|
+
"her" => 1,
|
39
|
+
"here" => 1,
|
40
|
+
"him" => 1,
|
41
|
+
"how" => 1,
|
42
|
+
"i" => 1,
|
43
|
+
"if" => 1,
|
44
|
+
"in" => 1,
|
45
|
+
"into" => 1,
|
46
|
+
"is" => 1,
|
47
|
+
"isnt" => 1,
|
48
|
+
"it" => 1,
|
49
|
+
"itll" => 1,
|
50
|
+
"just" => 1,
|
51
|
+
"last" => 1,
|
52
|
+
"least" => 1,
|
53
|
+
"like" => 1,
|
54
|
+
"most" => 1,
|
55
|
+
"my" => 1,
|
56
|
+
"new" => 1,
|
57
|
+
"no" => 1,
|
58
|
+
"not" => 1,
|
59
|
+
"now" => 1,
|
60
|
+
"of" => 1,
|
61
|
+
"on" => 1,
|
62
|
+
"or" => 1,
|
63
|
+
"should" => 1,
|
64
|
+
"sidebartitl" => 1,
|
65
|
+
"sinc" => 1,
|
66
|
+
"so" => 1,
|
67
|
+
"some" => 1,
|
68
|
+
"textdecoration" => 1,
|
69
|
+
"th" => 1,
|
70
|
+
"than" => 1,
|
71
|
+
"that" => 1,
|
72
|
+
"the" => 1,
|
73
|
+
"their" => 1,
|
74
|
+
"then" => 1,
|
75
|
+
"those" => 1,
|
76
|
+
"to" => 1,
|
77
|
+
"told" => 1,
|
78
|
+
"too" => 1,
|
79
|
+
"true" => 1,
|
80
|
+
"try" => 1,
|
81
|
+
"until" => 1,
|
82
|
+
"url" => 1,
|
83
|
+
"us" => 1,
|
84
|
+
"were" => 1,
|
85
|
+
"when" => 1,
|
86
|
+
"whether" => 1,
|
87
|
+
"while" => 1,
|
88
|
+
"with" => 1,
|
89
|
+
"within" => 1,
|
90
|
+
"yes" => 1,
|
91
|
+
"you" => 1,
|
92
|
+
"youll" => 1,
|
93
|
+
}
|
94
|
+
|
95
|
+
attr_reader :total, :clsf, :words
|
96
|
+
|
97
|
+
def initialize
|
98
|
+
@total = 0
|
99
|
+
@clsf = {}
|
100
|
+
@words = {}
|
101
|
+
end
|
102
|
+
|
103
|
+
def add_word(word, classifications = [], mod = 1)
|
104
|
+
word = Stemmable::stem_porter(word)
|
105
|
+
if STOP_WORDS[word]
|
106
|
+
nil
|
107
|
+
else
|
108
|
+
@words[word] ||= {:pos => @words.size, :clsf => {}}
|
109
|
+
classifications.each do |c|
|
110
|
+
@clsf[c] ||= {}
|
111
|
+
@clsf[c][word] ||= 0
|
112
|
+
@clsf[c][word] += mod
|
113
|
+
@total += mod
|
114
|
+
end
|
115
|
+
@words[word][:pos]
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
def remove_word(word, classifications = [])
|
120
|
+
add_word(word, classifications, -1)
|
121
|
+
end
|
122
|
+
|
123
|
+
def find(word)
|
124
|
+
word = Stemmable::stem_porter(word)
|
125
|
+
if @words[word] and not STOP_WORDS[word]
|
126
|
+
@words[word][:pos]
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
def size
|
131
|
+
@words.size
|
132
|
+
end
|
133
|
+
|
134
|
+
def dump
|
135
|
+
puts @words.keys.sort
|
136
|
+
end
|
137
|
+
|
138
|
+
end
|
139
|
+
end
|
140
|
+
end
|
141
|
+
end
|
@@ -0,0 +1,203 @@
|
|
1
|
+
#! /local/ruby/bin/ruby
|
2
|
+
#
|
3
|
+
# $Id$
|
4
|
+
#
|
5
|
+
# Lifted from SimpleSearch by Chad Fowler / Dave Thomas / Allen Condit / perhaps other unseeable folks in the distance ...
|
6
|
+
#
|
7
|
+
# See example usage at the end of this file.
|
8
|
+
#
|
9
|
+
|
10
|
+
module Hobix
|
11
|
+
module Stemmable
|
12
|
+
|
13
|
+
STEMMED = {}
|
14
|
+
|
15
|
+
STEP_2_LIST = {
|
16
|
+
'ational'=>'ate', 'tional'=>'tion', 'enci'=>'ence', 'anci'=>'ance',
|
17
|
+
'izer'=>'ize', 'bli'=>'ble',
|
18
|
+
'alli'=>'al', 'entli'=>'ent', 'eli'=>'e', 'ousli'=>'ous',
|
19
|
+
'ization'=>'ize', 'ation'=>'ate',
|
20
|
+
'ator'=>'ate', 'alism'=>'al', 'iveness'=>'ive', 'fulness'=>'ful',
|
21
|
+
'ousness'=>'ous', 'aliti'=>'al',
|
22
|
+
'iviti'=>'ive', 'biliti'=>'ble', 'logi'=>'log'
|
23
|
+
}
|
24
|
+
|
25
|
+
STEP_3_LIST = {
|
26
|
+
'icate'=>'ic', 'ative'=>'', 'alize'=>'al', 'iciti'=>'ic',
|
27
|
+
'ical'=>'ic', 'ful'=>'', 'ness'=>''
|
28
|
+
}
|
29
|
+
|
30
|
+
|
31
|
+
SUFFIX_1_REGEXP = /(
|
32
|
+
ational |
|
33
|
+
tional |
|
34
|
+
enci |
|
35
|
+
anci |
|
36
|
+
izer |
|
37
|
+
bli |
|
38
|
+
alli |
|
39
|
+
entli |
|
40
|
+
eli |
|
41
|
+
ousli |
|
42
|
+
ization |
|
43
|
+
ation |
|
44
|
+
ator |
|
45
|
+
alism |
|
46
|
+
iveness |
|
47
|
+
fulness |
|
48
|
+
ousness |
|
49
|
+
aliti |
|
50
|
+
iviti |
|
51
|
+
biliti |
|
52
|
+
logi)$/x
|
53
|
+
|
54
|
+
|
55
|
+
SUFFIX_2_REGEXP = /(
|
56
|
+
al |
|
57
|
+
ance |
|
58
|
+
ence |
|
59
|
+
er |
|
60
|
+
ic |
|
61
|
+
able |
|
62
|
+
ible |
|
63
|
+
ant |
|
64
|
+
ement |
|
65
|
+
ment |
|
66
|
+
ent |
|
67
|
+
ou |
|
68
|
+
ism |
|
69
|
+
ate |
|
70
|
+
iti |
|
71
|
+
ous |
|
72
|
+
ive |
|
73
|
+
ize)$/x
|
74
|
+
|
75
|
+
|
76
|
+
C = "[^aeiou]" # consonant
|
77
|
+
V = "[aeiouy]" # vowel
|
78
|
+
CC = "#{C}(?>[^aeiouy]*)" # consonant sequence
|
79
|
+
VV = "#{V}(?>[aeiou]*)" # vowel sequence
|
80
|
+
|
81
|
+
MGR0 = /^(#{CC})?#{VV}#{CC}/o # [cc]vvcc... is m>0
|
82
|
+
MEQ1 = /^(#{CC})?#{VV}#{CC}(#{VV})?$/o # [cc]vvcc[vv] is m=1
|
83
|
+
MGR1 = /^(#{CC})?#{VV}#{CC}#{VV}#{CC}/o # [cc]vvccvvcc... is m>1
|
84
|
+
VOWEL_IN_STEM = /^(#{CC})?#{V}/o # vowel in stem
|
85
|
+
|
86
|
+
#
|
87
|
+
# Porter stemmer in Ruby.
|
88
|
+
#
|
89
|
+
# This is the Porter stemming algorithm, ported to Ruby from the
|
90
|
+
# version coded up in Perl. It's easy to follow against the rules
|
91
|
+
# in the original paper in:
|
92
|
+
#
|
93
|
+
# Porter, 1980, An algorithm for suffix stripping, Program, Vol. 14,
|
94
|
+
# no. 3, pp 130-137,
|
95
|
+
#
|
96
|
+
# See also http://www.tartarus.org/~martin/PorterStemmer
|
97
|
+
#
|
98
|
+
# Send comments to raypereda@hotmail.com
|
99
|
+
#
|
100
|
+
|
101
|
+
def stem_porter(w = self.to_str.dup)
|
102
|
+
|
103
|
+
# make a copy of the given object and convert it to a string.
|
104
|
+
original_word = w
|
105
|
+
|
106
|
+
return w if w.length < 3
|
107
|
+
|
108
|
+
result = STEMMED[w]
|
109
|
+
return result if result
|
110
|
+
|
111
|
+
# now map initial y to Y so that the patterns never treat it as vowel
|
112
|
+
w[0] = 'Y' if w[0] == ?y
|
113
|
+
|
114
|
+
# Step 1a
|
115
|
+
if w =~ /(ss|i)es$/
|
116
|
+
w = $` + $1
|
117
|
+
elsif w =~ /([^s])s$/
|
118
|
+
w = $` + $1
|
119
|
+
end
|
120
|
+
|
121
|
+
# Step 1b
|
122
|
+
if w =~ /eed$/
|
123
|
+
w.chop! if $` =~ MGR0
|
124
|
+
elsif w =~ /(ed|ing)$/
|
125
|
+
stem = $`
|
126
|
+
if stem =~ VOWEL_IN_STEM
|
127
|
+
w = stem
|
128
|
+
case w
|
129
|
+
when /(at|bl|iz)$/ then w << "e"
|
130
|
+
when /([^aeiouylsz])\1$/ then w.chop!
|
131
|
+
when /^#{CC}#{V}[^aeiouwxy]$/o then w << "e"
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
if w =~ /y$/
|
137
|
+
stem = $`
|
138
|
+
w = stem + "i" if stem =~ VOWEL_IN_STEM
|
139
|
+
end
|
140
|
+
|
141
|
+
# Step 2
|
142
|
+
if w =~ SUFFIX_1_REGEXP
|
143
|
+
stem = $`
|
144
|
+
suffix = $1
|
145
|
+
# print "stem= " + stem + "\n" + "suffix=" + suffix + "\n"
|
146
|
+
if stem =~ MGR0
|
147
|
+
w = stem + STEP_2_LIST[suffix]
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
# Step 3
|
152
|
+
if w =~ /(icate|ative|alize|iciti|ical|ful|ness)$/
|
153
|
+
stem = $`
|
154
|
+
suffix = $1
|
155
|
+
if stem =~ MGR0
|
156
|
+
w = stem + STEP_3_LIST[suffix]
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
# Step 4
|
161
|
+
if w =~ SUFFIX_2_REGEXP
|
162
|
+
stem = $`
|
163
|
+
if stem =~ MGR1
|
164
|
+
w = stem
|
165
|
+
end
|
166
|
+
elsif w =~ /(s|t)(ion)$/
|
167
|
+
stem = $` + $1
|
168
|
+
if stem =~ MGR1
|
169
|
+
w = stem
|
170
|
+
end
|
171
|
+
end
|
172
|
+
|
173
|
+
# Step 5
|
174
|
+
if w =~ /e$/
|
175
|
+
stem = $`
|
176
|
+
if (stem =~ MGR1) ||
|
177
|
+
(stem =~ MEQ1 && stem !~ /^#{CC}#{V}[^aeiouwxy]$/o)
|
178
|
+
w = stem
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
182
|
+
if w =~ /ll$/ && w =~ MGR1
|
183
|
+
w.chop!
|
184
|
+
end
|
185
|
+
|
186
|
+
# and turn initial Y back to y
|
187
|
+
w[0] = 'y' if w[0] == ?Y
|
188
|
+
|
189
|
+
STEMMED[original_word] = w
|
190
|
+
|
191
|
+
w
|
192
|
+
end
|
193
|
+
|
194
|
+
|
195
|
+
module_function :stem_porter
|
196
|
+
#
|
197
|
+
# make the stem_porter the default stem method, just in case we
|
198
|
+
# feel like having multiple stemmers available later.
|
199
|
+
#
|
200
|
+
alias stem stem_porter
|
201
|
+
|
202
|
+
end
|
203
|
+
end
|