thomaspeklak-OfflineSearch 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +100 -0
- data/bin/OfflineSearch +3 -0
- data/lib/action_controller.rb +70 -0
- data/lib/config.yaml +31 -0
- data/lib/config_default.yaml +32 -0
- data/lib/crawler.rb +237 -0
- data/lib/entity_converter.rb +33 -0
- data/lib/generate_default_config.rb +9 -0
- data/lib/generate_default_stopwords.rb +18 -0
- data/lib/generate_default_template.rb +38 -0
- data/lib/log_init.rb +16 -0
- data/lib/offline_search.rb +11 -0
- data/lib/option_parser.rb +61 -0
- data/lib/option_validator.rb +53 -0
- data/lib/search_generator.rb +112 -0
- data/lib/stop_words.rb +7 -0
- data/lib/stoplist/english/stopwords.txt +317 -0
- data/lib/stoplist/german/stopwords.txt +662 -0
- data/lib/temporary_storage.rb +257 -0
- data/templates/base+double_metaphone/jQueryDoubleMetaphone.js +290 -0
- data/templates/base+double_metaphone/jQueryDoubleMetaphone.packed.js +1 -0
- data/templates/base+double_metaphone/jquery-1.2.2.min.js +31 -0
- data/templates/base+double_metaphone/search.css +10 -0
- data/templates/base+double_metaphone/search.html +19 -0
- data/templates/base+double_metaphone/search.js +178 -0
- data/templates/base/jquery-1.2.2.min.js +31 -0
- data/templates/base/search.css +10 -0
- data/templates/base/search.html +18 -0
- data/templates/base/search.js +99 -0
- data/tests/notestsyet.rb +0 -0
- metadata +122 -0
@@ -0,0 +1,33 @@
|
|
1
|
+
# extends the string class to convert html entities
|
2
|
+
# use carefully, can not convert entities back, as some entities are just skipped, because the are not useful for the search generation process
|
3
|
+
#
|
4
|
+
# * $Author$
|
5
|
+
# * $Rev$
|
6
|
+
# * $LastChangedDate$
|
7
|
+
|
8
|
+
class String
|
9
|
+
# this method converts encoded entities to their utf-8 euqivalent. be careful this method strips out all unknown entities because they are of no special use for the semantic search
|
10
|
+
def decode_html_entities
|
11
|
+
mgsub([[/ä/,'ä'],[/Ä/,'Ä'],[/ö/,'ö'],[/Ö/,'Ö'],[/ü/,'ü'],[/Ü/,'Ü'],[/ß/,'ß'],[/&[a-zA-Z]{4,6};/,' ']])
|
12
|
+
end
|
13
|
+
|
14
|
+
# encodes html entities
|
15
|
+
def encode_html_entities
|
16
|
+
mgsub([[/ä/,'ä'],[/Ä/,'Ä'],[/ö/,'ö'],[/Ö/,'Ö'],[/ü/,'ü'],[/U/,'Ü'],[/ß/,'ß']])
|
17
|
+
end
|
18
|
+
|
19
|
+
# converts uppercase umlauts to downcase
|
20
|
+
def umlaut_to_downcase
|
21
|
+
mgsub([[/Ä/,'ä'],[/Ö/,'ö'],[/Ü/,'ü']])
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
# method to substitute multiple strings at once. [Author: Ruby Cookbook]
|
27
|
+
def mgsub(key_value_pairs=[].freeze)
|
28
|
+
regexp_fragments = key_value_pairs.collect { |k,v| k }
|
29
|
+
gsub(Regexp.union(*regexp_fragments)) do |match|
|
30
|
+
key_value_pairs.detect{|k,v| k =~ match}[1]
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
# generates the default stopword list in the current directory
|
2
|
+
# the language is taken from the language switch
|
3
|
+
#
|
4
|
+
# * $Author$
|
5
|
+
# * $Rev$
|
6
|
+
# * $LastChangedDate$
|
7
|
+
#
|
8
|
+
require 'fileutils'
|
9
|
+
include FileUtils
|
10
|
+
|
11
|
+
language = ['german','english']
|
12
|
+
unless(defined?($config) && language.include?($config['language']))
|
13
|
+
$logger.error('language must be english or german')
|
14
|
+
exit
|
15
|
+
end
|
16
|
+
|
17
|
+
|
18
|
+
cp(File.dirname(__FILE__) +"/stoplist/#{$config['language']}/stopwords.txt",'./')
|
@@ -0,0 +1,38 @@
|
|
1
|
+
# generates template files
|
2
|
+
# currently only one template is supported
|
3
|
+
#
|
4
|
+
# * $Author$
|
5
|
+
# * $Rev$
|
6
|
+
# * $LastChangedDate$
|
7
|
+
#
|
8
|
+
|
9
|
+
class TemplateGenerator
|
10
|
+
def initialize(template)
|
11
|
+
@template = template
|
12
|
+
find_files
|
13
|
+
copy_files_to_current_path
|
14
|
+
end
|
15
|
+
|
16
|
+
private
|
17
|
+
# serach the given docpath for files
|
18
|
+
# returns an array of files
|
19
|
+
def find_files()
|
20
|
+
require 'find'
|
21
|
+
directory = File.dirname(__FILE__) + '/../templates/' + @template
|
22
|
+
@files = Array.new()
|
23
|
+
Find.find(directory) do |f|
|
24
|
+
if FileTest.file?f
|
25
|
+
@files.push(f)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
@files
|
29
|
+
end
|
30
|
+
|
31
|
+
#copies the found files in the current path
|
32
|
+
def copy_files_to_current_path()
|
33
|
+
require 'fileutils'
|
34
|
+
@files.each do |f|
|
35
|
+
FileUtils::cp(f,'./')
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
data/lib/log_init.rb
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
# configures the ruby logger
|
2
|
+
#
|
3
|
+
# * $Author$
|
4
|
+
# * $Rev$
|
5
|
+
# * $LastChangedDate$
|
6
|
+
#
|
7
|
+
|
8
|
+
require 'logger'
|
9
|
+
unless ($config.has_key?('logger'))
|
10
|
+
$logger = Logger.new(STDOUT)
|
11
|
+
$logger.level = Logger::INFO
|
12
|
+
else
|
13
|
+
$logger = ($config['logger']['file'] == 'STDOUT')? Logger.new(STDOUT) : Logger.new($config['logger']['file'])
|
14
|
+
$logger.level = eval("Logger::#{$config['logger']['level'].upcase}")
|
15
|
+
end
|
16
|
+
|
@@ -0,0 +1,61 @@
|
|
1
|
+
#parses command line options and merges them into the config file
|
2
|
+
#
|
3
|
+
# * $Author$
|
4
|
+
# * $Rev$
|
5
|
+
# * $LastChangedDate$
|
6
|
+
#
|
7
|
+
require "YAML"
|
8
|
+
|
9
|
+
require 'optparse'
|
10
|
+
$config = Hash.new
|
11
|
+
OptionParser.new do |opts|
|
12
|
+
opts.banner = "Usage: OfflineSearch [options]"
|
13
|
+
opts.on('-c', '--config=CONFIG_FILE', String,'configuration file for the offline search') do |c|
|
14
|
+
if (File.exists?(c))
|
15
|
+
$config = YAML.load_file(c)
|
16
|
+
else
|
17
|
+
$logger.error('config file not found')
|
18
|
+
exit
|
19
|
+
end
|
20
|
+
end
|
21
|
+
opts.separator ""
|
22
|
+
opts.separator "Generators"
|
23
|
+
opts.on('-g','--generate-default-config','creates a default config file in the current directory') do
|
24
|
+
$action = 'generate_default_config'
|
25
|
+
end
|
26
|
+
opts.on('-w','--generate-default-stopwords','creates a default stopword list in the current directory. Language flag is required.') do
|
27
|
+
$action = 'generate_default_stopwords'
|
28
|
+
end
|
29
|
+
opts.on('-t','--generate-template=TEMPLATE','creates search template files in the current directory. Possible values: base, base+double_metaphone') do |t|
|
30
|
+
$action = 'generate_template'
|
31
|
+
$config['template']=t
|
32
|
+
end
|
33
|
+
opts.on('-o','--generate-search-data','crawler the documents in the given docpath and generates the search data file') do
|
34
|
+
$action = 'generate_search'
|
35
|
+
end
|
36
|
+
opts.separator ""
|
37
|
+
opts.separator "Optional arguments"
|
38
|
+
opts.separator "can also be specified in the config file"
|
39
|
+
opts.separator "command line arguments will overwrite any given value in the config file"
|
40
|
+
opts.on('-d', '--docpath=DOCPATH', String,'path of the documents') do |d|
|
41
|
+
$config['crawler']['docpath'] = d
|
42
|
+
end
|
43
|
+
opts.on('-f', '--search-data-file=SEARCH_DATA_FILE', String,'path and name of the search data file') do |f|
|
44
|
+
$config['search_generator']['search_data_file'] = f
|
45
|
+
end
|
46
|
+
opts.on('-s', '--stopword-list=STOPWORD_LIST', String,'stopword list, if none is specified the default stop word list is used') do |s|
|
47
|
+
$config['crawler']['stopwords'] = s
|
48
|
+
end
|
49
|
+
opts.on('-l','--language=LANGUAGE',String,'required if you want to generate a default stopword list') do |l|
|
50
|
+
$config['language'] = l
|
51
|
+
end
|
52
|
+
opts.separator ""
|
53
|
+
opts.on_tail('-h','--help','Show this message') do
|
54
|
+
puts opts
|
55
|
+
exit
|
56
|
+
end
|
57
|
+
if (opts.default_argv.size == 0)
|
58
|
+
puts opts
|
59
|
+
exit
|
60
|
+
end
|
61
|
+
end.parse!
|
@@ -0,0 +1,53 @@
|
|
1
|
+
# checks if all required arguments are specified and if their values are correct
|
2
|
+
#
|
3
|
+
# * $Author$
|
4
|
+
# * $Rev$
|
5
|
+
# * $LastChangedDate$
|
6
|
+
#
|
7
|
+
class OptionValidator
|
8
|
+
def initialize
|
9
|
+
storage = ['memory','sqlite']
|
10
|
+
language = ['german','english']
|
11
|
+
|
12
|
+
if ($config['crawler']['stopwords'].nil?) then
|
13
|
+
$config['crawler']['stopwords'] = File.dirname(__FILE__) +"/stoplist/#{$config['language']}/stopwords.txt"
|
14
|
+
end
|
15
|
+
|
16
|
+
unless(language.include?($config['language']))
|
17
|
+
$logger.error('language must be english or german')
|
18
|
+
exit
|
19
|
+
end
|
20
|
+
unless(storage.include?($config['storage']))
|
21
|
+
$logger.error('storage must be memory or sqlite')
|
22
|
+
exit
|
23
|
+
end
|
24
|
+
unless($config['crawler']['docs'].size>0)
|
25
|
+
$logger.error('doc types must be specified')
|
26
|
+
exit
|
27
|
+
end
|
28
|
+
unless (File.exists?($config['crawler']['stopwords']))
|
29
|
+
$logger.error('stopwords file does not exist')
|
30
|
+
exit
|
31
|
+
end
|
32
|
+
|
33
|
+
unless (directory_exists?($config['crawler']['docpath']))
|
34
|
+
$logger.error('docpath does not exist')
|
35
|
+
end
|
36
|
+
|
37
|
+
unless (base_directory_exists?($config['search_generator']['search_data_file']))
|
38
|
+
$logger.error('path to the search data file does not exits. Please create the directory first')
|
39
|
+
end
|
40
|
+
|
41
|
+
unless (base_directory_exists?($config['search_generator']['output_frequency_to']))
|
42
|
+
$logger.error('path to the frequency file does not exits. Please create the directory first')
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
private
|
47
|
+
def base_directory_exists?(file)
|
48
|
+
FileTest.directory?(File.dirname(file))
|
49
|
+
end
|
50
|
+
def directory_exists?(dir)
|
51
|
+
FileTest.directory?(dir)
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,112 @@
|
|
1
|
+
# generates the data for the search
|
2
|
+
#
|
3
|
+
# * $Author$
|
4
|
+
# * $Rev$
|
5
|
+
# * $LastChangedDate$
|
6
|
+
#
|
7
|
+
|
8
|
+
class SearchGenerator
|
9
|
+
# needs files and terms and an entry in the config representing the location of the javascript file
|
10
|
+
def initialize(files, terms)
|
11
|
+
@files = files
|
12
|
+
@terms = terms
|
13
|
+
$logger.info("writing data to #{$config['search_generator']['search_data_file']}")
|
14
|
+
@search_data_file = File.new($config['search_generator']['search_data_file'],'w')
|
15
|
+
end
|
16
|
+
|
17
|
+
# generates the search data
|
18
|
+
def generate
|
19
|
+
($config['search_generator']['use_double_metaphone'] && $config['search_generator']['use_double_metaphone'] == true)? generate_terms_for_dm : generate_terms
|
20
|
+
generate_files
|
21
|
+
generate_relative_path
|
22
|
+
generate_frequency_file if ($config['search_generator']['output_frequency_to'])
|
23
|
+
generate_double_metaphone if ($config['search_generator']['use_double_metaphone'] && $config['search_generator']['use_double_metaphone'] == true)
|
24
|
+
cleanup
|
25
|
+
end
|
26
|
+
|
27
|
+
private
|
28
|
+
# generates a javascript hash of the indexed terms and writes it to the javascript file
|
29
|
+
# term => document id, rank
|
30
|
+
def generate_terms
|
31
|
+
$logger.info("generating term base")
|
32
|
+
out = Array.new
|
33
|
+
out << "var terms = {"
|
34
|
+
@terms.each do |term, reference|
|
35
|
+
out << "'#{term}':["
|
36
|
+
docs = Hash.new
|
37
|
+
reference.each { |r| docs.has_key?(r.document.ID)? docs[r.document.ID]+=r.rank : docs[r.document.ID] = r.rank }
|
38
|
+
# because of a javascript performance issue with nested arrays, the page id and the page rank are put into a string and split in the javascript search an demand
|
39
|
+
docs.sort{ |a,b| a[1]<=>b[1]}.reverse.each{ |doc_ID, rank| out << "'#{doc_ID}-#{rank}',"}
|
40
|
+
out << "],"
|
41
|
+
end
|
42
|
+
@search_data_file.puts out.join.gsub(',]',']')[0..-2] + "};"
|
43
|
+
end
|
44
|
+
|
45
|
+
|
46
|
+
# generates a javascript hash of the indexed terms and writes it to the javascript file
|
47
|
+
# term => document id, rank
|
48
|
+
def generate_terms_for_dm
|
49
|
+
$logger.info("generating term base")
|
50
|
+
outTerms = Array.new
|
51
|
+
outTerms << "var terms = {"
|
52
|
+
out = Array.new
|
53
|
+
out << "var ranks = ["
|
54
|
+
i = 0
|
55
|
+
@terms.each do |term, reference|
|
56
|
+
outTerms << "'#{term}':#{i},"
|
57
|
+
i += 1
|
58
|
+
out<<"["
|
59
|
+
docs = Hash.new
|
60
|
+
reference.each { |r| docs.has_key?(r.document.ID)? docs[r.document.ID]+=r.rank : docs[r.document.ID] = r.rank }
|
61
|
+
# because of a javascript performance issue with nested arrays, the page id and the page rank are put into a string and split in the javascript search an demand
|
62
|
+
docs.sort{ |a,b| a[1]<=>b[1]}.reverse.each{ |doc_ID, rank| out << "'#{doc_ID}-#{rank}',"}
|
63
|
+
out << "],"
|
64
|
+
end
|
65
|
+
@search_data_file.puts outTerms.join.gsub(',]',']')[0..-2] + '};'
|
66
|
+
@search_data_file.puts out.join.gsub(',]',']')[0..-2] + "];"
|
67
|
+
end
|
68
|
+
|
69
|
+
# generates a javascript hash of file ids => title, file name, pagerank
|
70
|
+
def generate_files
|
71
|
+
$logger.info("generating file base")
|
72
|
+
out = Array.new
|
73
|
+
out << "var files = {"
|
74
|
+
@files.each_value do |f|
|
75
|
+
out << "#{f.ID}:[\"#{f.title}\",'#{f.name[1..-1]}',#{f.page_rank}],"
|
76
|
+
end
|
77
|
+
@search_data_file.puts out.join[0..-2] + "};"
|
78
|
+
end
|
79
|
+
|
80
|
+
# stores the relative path in a vairable
|
81
|
+
def generate_relative_path
|
82
|
+
$logger.info("generating relative path")
|
83
|
+
@search_data_file.puts "var rel_path = '#{$config['search_generator']['relative_path_to_files'].gsub(/\/$/,'')}/';" if $config['search_generator'].has_key?('relative_path_to_files')
|
84
|
+
end
|
85
|
+
|
86
|
+
def generate_frequency_file
|
87
|
+
$logger.info("generating frequency file")
|
88
|
+
File.open($config['search_generator']['output_frequency_to'],'w') do |f|
|
89
|
+
@terms.each do |term,reference|
|
90
|
+
f.puts "#{term} #{reference.size}"
|
91
|
+
end
|
92
|
+
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
def generate_double_metaphone()
|
97
|
+
$logger.info("generating double metaphone data")
|
98
|
+
require 'Text'
|
99
|
+
out = Array.new
|
100
|
+
out << 'var dm_data = ['
|
101
|
+
@terms.each do |t,r|
|
102
|
+
temp = Text::Metaphone.double_metaphone(t)
|
103
|
+
out << "['#{temp[0]}'#{(temp[1])? ',\''+temp[1]+'\'':nil}],"
|
104
|
+
end
|
105
|
+
@search_data_file.puts out.join[0..-2] + "];"
|
106
|
+
end
|
107
|
+
|
108
|
+
# performs cleanup operations
|
109
|
+
def cleanup
|
110
|
+
@search_data_file.close
|
111
|
+
end
|
112
|
+
end
|
data/lib/stop_words.rb
ADDED
@@ -0,0 +1,317 @@
|
|
1
|
+
a
|
2
|
+
abandon
|
3
|
+
abandoned
|
4
|
+
abc
|
5
|
+
able
|
6
|
+
about
|
7
|
+
above
|
8
|
+
absence
|
9
|
+
absent
|
10
|
+
absolute
|
11
|
+
absolutely
|
12
|
+
ac
|
13
|
+
accordance
|
14
|
+
according
|
15
|
+
accordingly
|
16
|
+
acct
|
17
|
+
across
|
18
|
+
active
|
19
|
+
actively
|
20
|
+
actual
|
21
|
+
actually
|
22
|
+
ad
|
23
|
+
adb
|
24
|
+
adds
|
25
|
+
adequate
|
26
|
+
adequately
|
27
|
+
adjacent
|
28
|
+
adversely
|
29
|
+
after
|
30
|
+
afterwards
|
31
|
+
again
|
32
|
+
against
|
33
|
+
ago
|
34
|
+
ahead
|
35
|
+
alike
|
36
|
+
all
|
37
|
+
almost
|
38
|
+
alone
|
39
|
+
along
|
40
|
+
already
|
41
|
+
also
|
42
|
+
although
|
43
|
+
always
|
44
|
+
am
|
45
|
+
among
|
46
|
+
amongst
|
47
|
+
an
|
48
|
+
analog
|
49
|
+
and
|
50
|
+
another
|
51
|
+
any
|
52
|
+
anybody
|
53
|
+
anymore
|
54
|
+
anyone
|
55
|
+
anything
|
56
|
+
anyway
|
57
|
+
anywhere
|
58
|
+
apart
|
59
|
+
approx
|
60
|
+
are
|
61
|
+
around
|
62
|
+
as
|
63
|
+
aside
|
64
|
+
at
|
65
|
+
auto
|
66
|
+
avail
|
67
|
+
available
|
68
|
+
aware
|
69
|
+
away
|
70
|
+
awhile
|
71
|
+
badly
|
72
|
+
be
|
73
|
+
became
|
74
|
+
because
|
75
|
+
become
|
76
|
+
becomes
|
77
|
+
becoming
|
78
|
+
been
|
79
|
+
before
|
80
|
+
being
|
81
|
+
below
|
82
|
+
beside
|
83
|
+
besides
|
84
|
+
better
|
85
|
+
between
|
86
|
+
beyond
|
87
|
+
brief
|
88
|
+
briefly
|
89
|
+
but
|
90
|
+
by
|
91
|
+
bye
|
92
|
+
can
|
93
|
+
come
|
94
|
+
comes
|
95
|
+
comfortable
|
96
|
+
common
|
97
|
+
commonly
|
98
|
+
completely
|
99
|
+
cons
|
100
|
+
continually
|
101
|
+
continue
|
102
|
+
continued
|
103
|
+
continues
|
104
|
+
continuing
|
105
|
+
continuous
|
106
|
+
continuously
|
107
|
+
could
|
108
|
+
couple
|
109
|
+
course
|
110
|
+
current
|
111
|
+
currently
|
112
|
+
definitely
|
113
|
+
despite
|
114
|
+
did
|
115
|
+
directly
|
116
|
+
do
|
117
|
+
does
|
118
|
+
doing
|
119
|
+
done
|
120
|
+
during
|
121
|
+
each
|
122
|
+
either
|
123
|
+
empty
|
124
|
+
enough
|
125
|
+
even
|
126
|
+
eventually
|
127
|
+
ever
|
128
|
+
every
|
129
|
+
everybody
|
130
|
+
everyone
|
131
|
+
everything
|
132
|
+
everywhere
|
133
|
+
ex
|
134
|
+
exact
|
135
|
+
exactly
|
136
|
+
for
|
137
|
+
from
|
138
|
+
generally
|
139
|
+
had
|
140
|
+
has
|
141
|
+
have
|
142
|
+
he
|
143
|
+
hello
|
144
|
+
her
|
145
|
+
hereby
|
146
|
+
herein
|
147
|
+
hi
|
148
|
+
him
|
149
|
+
himself
|
150
|
+
his
|
151
|
+
how
|
152
|
+
however
|
153
|
+
ideally
|
154
|
+
if
|
155
|
+
in
|
156
|
+
indeed
|
157
|
+
inner
|
158
|
+
into
|
159
|
+
is
|
160
|
+
it
|
161
|
+
its
|
162
|
+
itself
|
163
|
+
just
|
164
|
+
like
|
165
|
+
lot
|
166
|
+
lots
|
167
|
+
many
|
168
|
+
may
|
169
|
+
maybe
|
170
|
+
me
|
171
|
+
mean
|
172
|
+
mere
|
173
|
+
merely
|
174
|
+
might
|
175
|
+
more
|
176
|
+
most
|
177
|
+
mostly
|
178
|
+
must
|
179
|
+
my
|
180
|
+
myself
|
181
|
+
neither
|
182
|
+
not
|
183
|
+
obvious
|
184
|
+
obviously
|
185
|
+
of
|
186
|
+
off
|
187
|
+
on
|
188
|
+
once
|
189
|
+
one
|
190
|
+
ones
|
191
|
+
ongoing
|
192
|
+
only
|
193
|
+
onto
|
194
|
+
other
|
195
|
+
others
|
196
|
+
otherwise
|
197
|
+
ought
|
198
|
+
our
|
199
|
+
ours
|
200
|
+
out
|
201
|
+
outer
|
202
|
+
over
|
203
|
+
particular
|
204
|
+
particularly
|
205
|
+
please
|
206
|
+
previous
|
207
|
+
previously
|
208
|
+
ready
|
209
|
+
really
|
210
|
+
recent
|
211
|
+
recently
|
212
|
+
relative
|
213
|
+
relatively
|
214
|
+
same
|
215
|
+
see
|
216
|
+
seldom
|
217
|
+
self
|
218
|
+
serious
|
219
|
+
seriously
|
220
|
+
set
|
221
|
+
similar
|
222
|
+
since
|
223
|
+
sincerely
|
224
|
+
so
|
225
|
+
some
|
226
|
+
somebody
|
227
|
+
someday
|
228
|
+
somehow
|
229
|
+
someone
|
230
|
+
someplace
|
231
|
+
something
|
232
|
+
sometime
|
233
|
+
sometimes
|
234
|
+
somewhat
|
235
|
+
somewhere
|
236
|
+
sorely
|
237
|
+
sorry
|
238
|
+
successful
|
239
|
+
successfully
|
240
|
+
such
|
241
|
+
suddenly
|
242
|
+
suitable
|
243
|
+
sure
|
244
|
+
surely
|
245
|
+
than
|
246
|
+
thank
|
247
|
+
thanks
|
248
|
+
that
|
249
|
+
thats
|
250
|
+
the
|
251
|
+
their
|
252
|
+
them
|
253
|
+
theme
|
254
|
+
themselves
|
255
|
+
then
|
256
|
+
there
|
257
|
+
thereby
|
258
|
+
therefore
|
259
|
+
these
|
260
|
+
they
|
261
|
+
this
|
262
|
+
thorough
|
263
|
+
thoroughly
|
264
|
+
those
|
265
|
+
though
|
266
|
+
through
|
267
|
+
throughout
|
268
|
+
throughput
|
269
|
+
to
|
270
|
+
today
|
271
|
+
together
|
272
|
+
total
|
273
|
+
totally
|
274
|
+
toward
|
275
|
+
towards
|
276
|
+
typical
|
277
|
+
typically
|
278
|
+
until
|
279
|
+
up
|
280
|
+
upon
|
281
|
+
usually
|
282
|
+
various
|
283
|
+
very
|
284
|
+
want
|
285
|
+
was
|
286
|
+
we
|
287
|
+
well
|
288
|
+
went
|
289
|
+
were
|
290
|
+
what
|
291
|
+
whatever
|
292
|
+
whats
|
293
|
+
whatsoever
|
294
|
+
when
|
295
|
+
where
|
296
|
+
which
|
297
|
+
while
|
298
|
+
who
|
299
|
+
whoever
|
300
|
+
whole
|
301
|
+
whom
|
302
|
+
whose
|
303
|
+
why
|
304
|
+
will
|
305
|
+
with
|
306
|
+
within
|
307
|
+
without
|
308
|
+
worth
|
309
|
+
worthwhile
|
310
|
+
worthy
|
311
|
+
would
|
312
|
+
yes
|
313
|
+
yet
|
314
|
+
you
|
315
|
+
your
|
316
|
+
yours
|
317
|
+
yourself
|