thomaspeklak-OfflineSearch 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +100 -0
- data/bin/OfflineSearch +3 -0
- data/lib/action_controller.rb +70 -0
- data/lib/config.yaml +31 -0
- data/lib/config_default.yaml +32 -0
- data/lib/crawler.rb +237 -0
- data/lib/entity_converter.rb +33 -0
- data/lib/generate_default_config.rb +9 -0
- data/lib/generate_default_stopwords.rb +18 -0
- data/lib/generate_default_template.rb +38 -0
- data/lib/log_init.rb +16 -0
- data/lib/offline_search.rb +11 -0
- data/lib/option_parser.rb +61 -0
- data/lib/option_validator.rb +53 -0
- data/lib/search_generator.rb +112 -0
- data/lib/stop_words.rb +7 -0
- data/lib/stoplist/english/stopwords.txt +317 -0
- data/lib/stoplist/german/stopwords.txt +662 -0
- data/lib/temporary_storage.rb +257 -0
- data/templates/base+double_metaphone/jQueryDoubleMetaphone.js +290 -0
- data/templates/base+double_metaphone/jQueryDoubleMetaphone.packed.js +1 -0
- data/templates/base+double_metaphone/jquery-1.2.2.min.js +31 -0
- data/templates/base+double_metaphone/search.css +10 -0
- data/templates/base+double_metaphone/search.html +19 -0
- data/templates/base+double_metaphone/search.js +178 -0
- data/templates/base/jquery-1.2.2.min.js +31 -0
- data/templates/base/search.css +10 -0
- data/templates/base/search.html +18 -0
- data/templates/base/search.js +99 -0
- data/tests/notestsyet.rb +0 -0
- metadata +122 -0
@@ -0,0 +1,257 @@
|
|
1
|
+
# temporary storage
|
2
|
+
# class to store crawled data before the date is written to a file
|
3
|
+
#
|
4
|
+
# options:
|
5
|
+
# datebase
|
6
|
+
# sqlite, mysql
|
7
|
+
# filesystem
|
8
|
+
#
|
9
|
+
# * $Author$
|
10
|
+
# * $Rev$
|
11
|
+
# * $LastChangedDate$
|
12
|
+
|
13
|
+
class Temporary_Storage
|
14
|
+
attr_reader :storage_handler
|
15
|
+
|
16
|
+
# initializes the storage handler
|
17
|
+
def initialize(mode)
|
18
|
+
@storage_handler=case
|
19
|
+
when mode=='sqlite': Sqlite.new('storage.db')
|
20
|
+
when mode=='memory': Memory.new
|
21
|
+
else
|
22
|
+
$logger.error("no appropriate stroage is selected\nvalid options:\n\tsqlite\n\tmemory")
|
23
|
+
exit
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
# stores file name, title and page rank.
|
28
|
+
# start pagerank with 1 to enable mutliplicatoin pagerank
|
29
|
+
def store_file(filename,title,pagerank=1)
|
30
|
+
@storage_handler.store_file(filename.to_s,title.to_s.gsub('"','"'),pagerank)
|
31
|
+
end
|
32
|
+
|
33
|
+
#stores the term and term rank
|
34
|
+
def store_term(term,rank)
|
35
|
+
@storage_handler.store_term(term,rank)
|
36
|
+
end
|
37
|
+
|
38
|
+
# stores an array of links
|
39
|
+
def store_link(links)
|
40
|
+
@storage_handler.store_link(links)
|
41
|
+
end
|
42
|
+
|
43
|
+
# returns a hash of a stored file
|
44
|
+
def get_file(filename)
|
45
|
+
f=@storage_handler.get_file(filename)
|
46
|
+
{ 'filename'=>f[0],
|
47
|
+
'titel'=>f[1],
|
48
|
+
'pagerank'=>f[2] }
|
49
|
+
end
|
50
|
+
|
51
|
+
# returns a hash of stored files
|
52
|
+
def get_files
|
53
|
+
f=@storage_handler.get_files
|
54
|
+
end
|
55
|
+
|
56
|
+
# returns an array of links
|
57
|
+
def get_links
|
58
|
+
@storage_handler.get_links
|
59
|
+
end
|
60
|
+
|
61
|
+
# returns a hash of terms
|
62
|
+
def get_terms
|
63
|
+
@storage_handler.get_terms
|
64
|
+
end
|
65
|
+
|
66
|
+
# calculates the page rank
|
67
|
+
# the page rank equals the number of inbound links or if none 1
|
68
|
+
def calculate_pageranks_from_links
|
69
|
+
@storage_handler.calculate_pageranks_from_links
|
70
|
+
end
|
71
|
+
|
72
|
+
private
|
73
|
+
|
74
|
+
# implements a storage handler in the memory
|
75
|
+
class Memory
|
76
|
+
def initialize
|
77
|
+
@files = Hash.new
|
78
|
+
@terms = Terms.new
|
79
|
+
@links = Links.new
|
80
|
+
@current_doc = nil
|
81
|
+
end
|
82
|
+
|
83
|
+
#stores the file in the files hash
|
84
|
+
def store_file(filename,title,pagerank=1)
|
85
|
+
@files[filename] = @current_document = Document.new(filename, title, pagerank)
|
86
|
+
end
|
87
|
+
|
88
|
+
# stores a term in the terms class
|
89
|
+
def store_term(term,rank)
|
90
|
+
@terms.store(term, Term2Document.new(@current_document,rank))
|
91
|
+
end
|
92
|
+
|
93
|
+
# stores a link in the link class
|
94
|
+
def store_link(links)
|
95
|
+
@links.add(links)
|
96
|
+
end
|
97
|
+
|
98
|
+
def get_file(filename)
|
99
|
+
|
100
|
+
end
|
101
|
+
|
102
|
+
# returns the files hash
|
103
|
+
def get_files
|
104
|
+
@files
|
105
|
+
end
|
106
|
+
|
107
|
+
# returns a terms hash
|
108
|
+
def get_terms
|
109
|
+
@terms.get_all
|
110
|
+
end
|
111
|
+
|
112
|
+
# returns an array of links
|
113
|
+
def get_links
|
114
|
+
@links.get_all
|
115
|
+
end
|
116
|
+
|
117
|
+
# calculates the page rank
|
118
|
+
# the page rank equals the number of inbound links or if none 1
|
119
|
+
def calculate_pageranks_from_links
|
120
|
+
@links.get_all.each do |link, rank|
|
121
|
+
@files[link].page_rank=rank if @files.has_key?(link)
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
private
|
126
|
+
|
127
|
+
# represents a document. stores an internal id, the file name, title and page rank. all attributes are accessible
|
128
|
+
class Document
|
129
|
+
@@ID=0
|
130
|
+
attr_accessor :ID, :name, :title, :page_rank
|
131
|
+
def initialize(name,title,page_rank)
|
132
|
+
@ID= @@ID+=1
|
133
|
+
@name = name
|
134
|
+
@title = title
|
135
|
+
@page_rank = page_rank
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
# represents a hash of terms and their corresponding documents
|
140
|
+
class Terms
|
141
|
+
def initialize
|
142
|
+
@terms = Hash.new
|
143
|
+
end
|
144
|
+
|
145
|
+
# stores a term in the terms hash with the corresponding document or adds the document to a term if the term already exists in the hash
|
146
|
+
def store(term,term2document)
|
147
|
+
@terms.has_key?(term) ? @terms[term] << term2document : @terms[term]=[term2document]
|
148
|
+
end
|
149
|
+
|
150
|
+
# returns a term hash
|
151
|
+
def get_one(term)
|
152
|
+
@terms[term]
|
153
|
+
end
|
154
|
+
|
155
|
+
# returns the terms hash
|
156
|
+
def get_all
|
157
|
+
@terms
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
# represents a link from a term to a document. the link includes the semantic value of the term. all attributes are accesible
|
162
|
+
class Term2Document
|
163
|
+
attr_accessor :document, :rank
|
164
|
+
def initialize(document, rank)
|
165
|
+
@document = document
|
166
|
+
@rank = rank
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
# represents unique links of all indexed documents
|
171
|
+
class Links
|
172
|
+
def initialize
|
173
|
+
@links = Hash.new
|
174
|
+
end
|
175
|
+
|
176
|
+
# adds a link to the hash or increases the link value by one if the link already exists
|
177
|
+
def add(links)
|
178
|
+
links.each{ |link| @links.has_key?(link)? @links[link]+=1 : @links[link]=1 }
|
179
|
+
end
|
180
|
+
|
181
|
+
# returns all links
|
182
|
+
def get_all
|
183
|
+
@links
|
184
|
+
end
|
185
|
+
end
|
186
|
+
end
|
187
|
+
|
188
|
+
class Sqlite
|
189
|
+
def initialize(db)
|
190
|
+
require 'rubygems'
|
191
|
+
require 'sqlite3'
|
192
|
+
@current_file_id = nil
|
193
|
+
begin
|
194
|
+
File.delete(db)
|
195
|
+
rescue
|
196
|
+
end
|
197
|
+
@db = SQLite3::Database.new(db)
|
198
|
+
@db.type_translation = true
|
199
|
+
sql = "
|
200
|
+
create table files(
|
201
|
+
id integer not null primary key autoincrement,
|
202
|
+
filename varchar2(255),
|
203
|
+
title varchar2(255),
|
204
|
+
pagerank integer
|
205
|
+
);
|
206
|
+
create table terms(
|
207
|
+
id integer not null primary key autoincrement,
|
208
|
+
term varchar2(255) unique not null
|
209
|
+
);
|
210
|
+
create table files_terms(
|
211
|
+
file_id integer not null,
|
212
|
+
term_id integer not null,
|
213
|
+
rank integer not null
|
214
|
+
);
|
215
|
+
create table links(
|
216
|
+
link varchar2(255) not null primary key,
|
217
|
+
links_in integer
|
218
|
+
)
|
219
|
+
"
|
220
|
+
@db.execute_batch(sql)
|
221
|
+
end
|
222
|
+
def store_file(filename, title, pagerank)
|
223
|
+
@db.execute( "insert into files (filename, title, pagerank) values ( ?, ?, ? )", filename, title, pagerank)
|
224
|
+
@current_file_id = @db.last_insert_row_id()
|
225
|
+
end
|
226
|
+
|
227
|
+
def store_term(term, rank)
|
228
|
+
unless (term_id=@db.get_first_value('select id from terms where term = ?',term)) :
|
229
|
+
@db.execute("insert into terms (term) values (?)", term)
|
230
|
+
term_id=@db.last_insert_row_id()
|
231
|
+
end
|
232
|
+
@db.execute("insert into files_terms values (?,?,?)", @current_file_id,term_id,rank)
|
233
|
+
end
|
234
|
+
|
235
|
+
def store_link(links)
|
236
|
+
links.each do |link|
|
237
|
+
if(links_in = @db.get_first_value("select links_in from links where link = ?",link))
|
238
|
+
@db.execute("update links set links_in =? where link = ? ",links_in+1,link)
|
239
|
+
else
|
240
|
+
@db.execute("insert into links values (?,?)", link, 1)
|
241
|
+
end
|
242
|
+
end
|
243
|
+
end
|
244
|
+
|
245
|
+
def get_file(filename)
|
246
|
+
@db.get_first_row("select * from files where filename = ?", filename)
|
247
|
+
end
|
248
|
+
|
249
|
+
def get_files
|
250
|
+
@db.execute("select * from files f join files_term ft on f.id = ft.id")
|
251
|
+
end
|
252
|
+
|
253
|
+
def get_links
|
254
|
+
@db.execute("select * from links")
|
255
|
+
end
|
256
|
+
end
|
257
|
+
end
|
@@ -0,0 +1,290 @@
|
|
1
|
+
(function(){
|
2
|
+
String.prototype.doubleMetaphone = function(){
|
3
|
+
var primary = new Array, secondary = new Array, current = 0;
|
4
|
+
var original = this.toUpperCase()+' ', length = this.length, last = length -1;
|
5
|
+
if(/^GN|KN|PN|WR|PS$/.test(original.substr(0,2)))
|
6
|
+
current += 1;
|
7
|
+
if('X' == original.substr(0, 1)){
|
8
|
+
primary.push('S');
|
9
|
+
secondary.push('S');
|
10
|
+
current += 1;
|
11
|
+
}
|
12
|
+
//main loop
|
13
|
+
var resultset = new Array;
|
14
|
+
while ((primary.length<4 || secondary.length<4) && current <= length){
|
15
|
+
resultset = double_metaphone_lookup(original, current, length, last);
|
16
|
+
if(resultset[0]) primary.push(resultset[0]);
|
17
|
+
if(resultset[1]) secondary.push(resultset[1]);
|
18
|
+
current += resultset[2];
|
19
|
+
}
|
20
|
+
primary = primary.join('').substr(0,4);
|
21
|
+
secondary = secondary.join('').substr(0,4);
|
22
|
+
return [primary, (primary == secondary)? null : secondary];
|
23
|
+
};
|
24
|
+
String.prototype.slavo_germanic = function(){
|
25
|
+
return /W|K|CZ|WITZ/.test(this);
|
26
|
+
};
|
27
|
+
String.prototype.vowel = function() {
|
28
|
+
return /^[AEIOUY]$/.test(this);
|
29
|
+
};
|
30
|
+
var A='A',B='B',C='C',D='D',E='E',F='F',G='G',H='H',I='I',J='J',K='K',L='L',M='M',N='N',O='O',P='P',Q='Q',R='R',S='S',T='T',U='U',V='V',W='W',X='X',Y='Y',Z='Z';
|
31
|
+
function double_metaphone_lookup(str, pos, length, last){
|
32
|
+
var cl = str.charAt(pos); // current letter
|
33
|
+
switch (true){
|
34
|
+
case cl.vowel():
|
35
|
+
return (pos) ? [null, null, 1] : [A,A,1];
|
36
|
+
case cl==B:
|
37
|
+
return [P,P,(B == str.charAt(pos+1))?2:1];
|
38
|
+
case cl=='Ç':
|
39
|
+
return [S,S,1];
|
40
|
+
case cl==C:
|
41
|
+
if(pos>1 && !str.charAt(pos-2).vowel() && 'ACH' == str.substr(pos-1,3) && str.charAt(pos+2) != I && (str.charAt(pos+2) != E || /^(B|M)ACHER$/.test(str.substr(pos-2,6))))
|
42
|
+
return [K,K,2];
|
43
|
+
else if(!pos && 'CAESAR' == str.substr(pos,6))
|
44
|
+
return [S,S,2];
|
45
|
+
else if('CHIA' == str.substr(pos, 4))
|
46
|
+
return [K,K,2];
|
47
|
+
else if('CH' == str.substr(pos,2)){
|
48
|
+
if(pos && 'CHAE' == str.substr(pos,4))
|
49
|
+
return [K,X,2];
|
50
|
+
else if(!pos && ($.in_array(['HARAC', 'HARIS'],str.substr(pos+1,5)) || $.in_array(['HOR', 'HYM', 'HIA', 'HEM'],str.substr(pos+1,3))) && str.substr(0,5) != 'CHORE')
|
51
|
+
return [K,K,2];
|
52
|
+
else if($.in_array(['VON','VAN'],str.substr(0,4)) || 'SCH' == str.substr(0,3) || $.in_array(['ORCHES','ARCHIT','ORCHID'],str.substr(pos-2,6)) || /^T|S$/.test(str.charAt(pos+2)) || ((!pos || /^[AOUE]$/.test(str.charAt(pos-1))) && /^[LRNMBHFVW ]$/.test(str.charAt(pos+2))))
|
53
|
+
return [K,K,2];
|
54
|
+
else if(pos)
|
55
|
+
return [('MC' == str.substr(0,2))?K:X,K,2];
|
56
|
+
else return [X,X,2];
|
57
|
+
}
|
58
|
+
else if(Z == str.charAt(pos+1) && 'WI' != str.substr(pos-2,2))
|
59
|
+
return [S,X,2];
|
60
|
+
else if('CIA' == str.substr(pos+1,3))
|
61
|
+
return [X,X,3];
|
62
|
+
else if(C == str.charAt(pos+1) && 1 != pos && M != str.charAt(0)){
|
63
|
+
if(/^[IEH]$/.test(str.charAt(pos+2)) && 'HU' != str.substr(pos+2,2)){
|
64
|
+
if ((1 == pos && A == str.charAt(pos-1)) || /^UCCE(E|S)$/.test(str.substr(pos-1,5)))
|
65
|
+
return ['KS','KS',3];
|
66
|
+
else
|
67
|
+
return [X,X,3];
|
68
|
+
}
|
69
|
+
else
|
70
|
+
return [K,K,2];
|
71
|
+
}
|
72
|
+
else if(/^[KGQ]$/.test(str.charAt(pos+1)))
|
73
|
+
return [K,K,2];
|
74
|
+
else if(/^[IEY]$/.test(str.charAt(pos+1)))
|
75
|
+
return [S,(/^I(O|E|A)$/.test(str.substr(pos+1, 2)) ? X : S), 2];
|
76
|
+
else{
|
77
|
+
if(/^ (C|Q|G)$/.test(str.substr(pos+1,2)))
|
78
|
+
return [K,K,3];
|
79
|
+
else
|
80
|
+
return [K,K,(/^[CKQ]$/.test(str.charAt(pos+1)) && !($.in_array(['CE','CI'],str.substr(pos+1,2))))? 2: 1];
|
81
|
+
}
|
82
|
+
case cl==D:
|
83
|
+
if(str.charAt(pos+1)==G){
|
84
|
+
if(/^[IEY]$/.test(str.charAt(pos+2)))
|
85
|
+
return [J,J,3];
|
86
|
+
else
|
87
|
+
return ['TK','TK',2];
|
88
|
+
}
|
89
|
+
else
|
90
|
+
return [T,T,(/^[DT]$/.test(str.charAt(pos+1)))? 2:1];
|
91
|
+
case cl==F:
|
92
|
+
return [F,F,(F==str.charAt(pos+1))?2:1];
|
93
|
+
case cl==G:
|
94
|
+
if(H==str.charAt(pos+1)){
|
95
|
+
if(pos && !str.charAt(pos-1).vowel())
|
96
|
+
return [K,K,2];
|
97
|
+
else if(!pos){
|
98
|
+
if(I == str.charAt(pos+2))
|
99
|
+
return [J,J,2];
|
100
|
+
else
|
101
|
+
return [K,K,2];
|
102
|
+
}
|
103
|
+
else if((pos>1 && /^[BHD]$/.test(str.charAt(pos-2))) || (pos>2 && /^[BHD]$/.test(str.charAt(pos-3))) || (pos>3 && /^B|H$/.test(str.charAt(pos-4))))
|
104
|
+
return [null,null,2];
|
105
|
+
else{
|
106
|
+
if(pos>2 && U == str.charAt(pos-1) && /^[CGLRT]$/.test(str.charAt(pos-3)))
|
107
|
+
return [F,F,2];
|
108
|
+
else{
|
109
|
+
if(pos && I != str.charAt(pos-1))
|
110
|
+
return [K,K,2];
|
111
|
+
else
|
112
|
+
return [null,null,2];
|
113
|
+
}
|
114
|
+
}
|
115
|
+
}
|
116
|
+
else if(N==str.charAt(pos+1)){
|
117
|
+
if(1==pos && str.charAt(0).vowel() && !str.slavo_germanic())
|
118
|
+
return ['KN',N,2];
|
119
|
+
else{
|
120
|
+
if('EY' != str.substr(pos+2,2) && Y != str.charAt(pos+1) && !str.slavo_germanic())
|
121
|
+
return [N,'KN',2];
|
122
|
+
else
|
123
|
+
return ['KN',N,2];
|
124
|
+
}
|
125
|
+
}
|
126
|
+
else if('LI'==str.substr(pos+1,2))
|
127
|
+
return ['KL',L,2];
|
128
|
+
else if(!pos && (Y==str.charAt(pos+1) || /^(E(S|P|B|L|Y|I|R)|I(B|L|N|E))$/.test(str.substr(pos+1,2))))
|
129
|
+
return [K,J,2];
|
130
|
+
else if(('ER' == str.substr(pos+1,2) || Y == str.charAt(pos+1)) && !/^(D|R|M)ANGER$/.test(str.substr(0,6)) && !/^E|I$/.test(str.charAt(pos-1)) && !/^(R|O)GY$/.test(str.substr(pos-1,3)))
|
131
|
+
return [K,J,2];
|
132
|
+
else if(/^[EIY]$/.test(str.charAt(pos+1)) || /^(A|O)GGI$/.test(str.substr(pos-1,4))){
|
133
|
+
if(/^V(A|O)N $/.test(str.substr(0,4)) || 'SCH' == str.substr(0,3) || 'ET' == str.substr(pos+1,2))
|
134
|
+
return [K,K,2];
|
135
|
+
else{
|
136
|
+
if ('IER ' == str.substr(pos+1,4))
|
137
|
+
return [J,J,2];
|
138
|
+
else
|
139
|
+
return [J,K,2];
|
140
|
+
}
|
141
|
+
}
|
142
|
+
else if(G==str.charAt(pos+1))
|
143
|
+
return [K,K,2];
|
144
|
+
else
|
145
|
+
return [K,K,1];
|
146
|
+
case cl==H:
|
147
|
+
if(!pos || str.charAt(pos-1).vowel() && str.charAt(pos+1).vowel())
|
148
|
+
return [H,H,2];
|
149
|
+
else
|
150
|
+
return [null,null,1];
|
151
|
+
case cl==J:
|
152
|
+
if ('OSE' == str.substr(pos+1,3) || 'SAN ' == str.substr(0,4)){
|
153
|
+
if((!pos && ' ' == str.charAt(pos+4)) || 'SAN ' == str.substr(0,4))
|
154
|
+
return [H,H,1];
|
155
|
+
else
|
156
|
+
return [J,H,1];
|
157
|
+
}
|
158
|
+
else{
|
159
|
+
var current = (J==str.charAt(pos+1))? 2 : 1;
|
160
|
+
if(!pos && 'OSE' != str.substr(pos+1,3))
|
161
|
+
return [J,A,current];
|
162
|
+
else{
|
163
|
+
if (str.charAt(pos-1).vowel() && !str.slavo_germanic() && /^A|O$/.test(str.charAt(pos+1)))
|
164
|
+
return [J,H,current];
|
165
|
+
else{
|
166
|
+
if(last == pos)
|
167
|
+
return [J,null, current];
|
168
|
+
else{
|
169
|
+
if (!/^[LTKSNMBZ]$/.test(str.charAt(pos+1)) && !/^[SKL]$/.test(str.charAt(pos-1)))
|
170
|
+
return [J,J,current];
|
171
|
+
else
|
172
|
+
return [null,null,current];
|
173
|
+
}
|
174
|
+
}
|
175
|
+
}
|
176
|
+
}
|
177
|
+
case cl==K:
|
178
|
+
return [K,K,(K==str.charAt(pos+1))? 2 : 1];
|
179
|
+
case cl==L:
|
180
|
+
if(L==str.charAt(pos+1)){
|
181
|
+
if(((length-3)==pos && /^(ILL(O|A)|ALLE)$/.test(str.substr(pos-1,4))) || (/^(A|O)S$/.test(str.substr(last-1,2)) || /^A|O$/.test(str.charAt(last)) && 'ALLE'== str.substr(pos-1,4)))
|
182
|
+
return [L,null,2];
|
183
|
+
else
|
184
|
+
return [L,L,2];
|
185
|
+
}
|
186
|
+
else
|
187
|
+
return [L,L,1];
|
188
|
+
case cl==M:
|
189
|
+
if(('UMB' == str.substr(pos-1,3) && (last-1 == pos || 'ER' == str.substr(pos+2,2))) || M == str.charAt(pos+1))
|
190
|
+
return [M,M,2];
|
191
|
+
else
|
192
|
+
return [M,M,1];
|
193
|
+
case cl==N:
|
194
|
+
return [N,N,(N==str.charAt(pos+1))? 2: 1];
|
195
|
+
case cl=='Ñ':
|
196
|
+
return [N,N,1];
|
197
|
+
case cl==P:
|
198
|
+
if(H==str.charAt(pos+1))
|
199
|
+
return [F,F,2];
|
200
|
+
else
|
201
|
+
return [P,P,(/^P|B$/.test(str.charAt(pos+1)))? 2 : 1];
|
202
|
+
case cl==Q:
|
203
|
+
return [K,K,(Q==str.charAt(+1))? 2: 1];
|
204
|
+
case cl==R:
|
205
|
+
var current =(R==str.charAt(pos+1))? 2 : 1;
|
206
|
+
if(last == pos && !str.slavo_germanic() && 'IE' == str.substr(pos-2,2) && !/^M(E|A)$/.test(str.substr(pos-4,2)))
|
207
|
+
return [null,R,current];
|
208
|
+
else
|
209
|
+
return [R,R,current];
|
210
|
+
case cl==S:
|
211
|
+
if(/^(I|Y)SL$/.test(str.substr(pos-1,3)))
|
212
|
+
return [null,null,1];
|
213
|
+
else if (H==str.charAt(pos+1)){
|
214
|
+
if (/^H(EIM|OEK|OLM|OLZ)$/.test(str.substr(pos+1,4)))
|
215
|
+
return [S,S,2];
|
216
|
+
else
|
217
|
+
return [X,X,2];
|
218
|
+
}
|
219
|
+
else if (/^I(O|A)$/.test(str.substr(pos+1,2)))
|
220
|
+
return [S,(str.slavo_germanic())? S : X, 3];
|
221
|
+
else if ((!pos && /^[MNLW]$/.test(str.charAt(+1))) || Z==str.charAt(pos+1))
|
222
|
+
return [S,X,(Z==str.charAt(pos+1))? 2 : 1];
|
223
|
+
else if (C== str.charAt(pos+1)){
|
224
|
+
if (H== str.charAt(pos+2)){
|
225
|
+
if (/^OO|ER|EN|UY|ED|EM$/.test(str.substr(pos+3,2)))
|
226
|
+
return [(/^E(R|N)$/.test(str.substr(pos+3,2)))? X : 'SK','SK',3];
|
227
|
+
else
|
228
|
+
return [X,((!pos && !str.charAt(3).vowel()) && (W != str.charAt(pos+3)))? S : X,3];
|
229
|
+
}
|
230
|
+
else if (/^[IEY]$/.test(str.charAt(pos+2)))
|
231
|
+
return [S,S,3];
|
232
|
+
else
|
233
|
+
return ['SK','SK',3];
|
234
|
+
}
|
235
|
+
else
|
236
|
+
return [(last == pos && /^(A|O)I$/.test(str.substr(pos-2,2)))? null : S,S,(/^S|Z$/.test(str.charAt(pos+1)))? 2 : 1];
|
237
|
+
case cl==T:
|
238
|
+
if ('ION' == str.substr(pos+1,3) || /^IA|CH$/.test(str.substr(pos+1,2)))
|
239
|
+
return [X,X,3];
|
240
|
+
else if(H==str.charAt(pos+1) || 'TH' == str.substr(pos+1,2)){
|
241
|
+
if(/^(O|A)M$/.test(str.substr(pos+2,2)) || /^V(A|O)N $/.test(str.substr(0,4)) || 'SCH'== str.substr(0,3))
|
242
|
+
return [T,T,2];
|
243
|
+
else
|
244
|
+
return['0',T,2];
|
245
|
+
}
|
246
|
+
else return [T,T,(/^T|D$/.test(str.charAt(pos+1)))? 2 : 1];
|
247
|
+
case cl==V:
|
248
|
+
return [F,F,(V==str.charAt(pos+1))? 2 : 1];
|
249
|
+
case cl==W:
|
250
|
+
if(R==str.charAt(pos+1))
|
251
|
+
return [R,R,2];
|
252
|
+
var pri = '';
|
253
|
+
var sec = '';
|
254
|
+
if(!pos && str.charAt(pos+1).vowel() || H==str.charAt(pos+1)){
|
255
|
+
pri = A;
|
256
|
+
sec = (str.charAt(pos+1).vowel())? F : A;
|
257
|
+
}
|
258
|
+
if(last == pos && str.charAt(pos-1).vowel() || 'SCH' == str.substr(0,3) || /^EWSKI|EWSKY|OWSKI|OWSKY$/.test(str.substr(pos-1,5)))
|
259
|
+
return [pri,sec+F,1];
|
260
|
+
else if(/^I(C|T)Z$/.test(str.substr(pos+1,3)))
|
261
|
+
return [pri+'TS',sec+'FX',4];
|
262
|
+
else
|
263
|
+
return [pri,sec,1];
|
264
|
+
case cl==X:
|
265
|
+
var current = (/^C|X$/.test(str.charAt(pos+1)))? 2 : 1;
|
266
|
+
if (last == pos && (/^(I|E)AU$/.test(str.substr(pos-3,3)) || /^(A|O)U$/.test(str.substr(pos-2, 2))))
|
267
|
+
return [null,null, current];
|
268
|
+
else
|
269
|
+
return ['KS','KS',current];
|
270
|
+
case cl==Z:
|
271
|
+
if(H==str.charAt(pos+1))
|
272
|
+
return [J,J,2];
|
273
|
+
else{
|
274
|
+
var current = (Z==str.charAt(pos+1)) ? 2 : 1;
|
275
|
+
if(/^Z(O|I|A)$/.test(str.substr(pos+1,2)) || (str.slavo_germanic() && (pos > 0 && T != str.charAt(pos-1))))
|
276
|
+
return [S,'TS',current];
|
277
|
+
else
|
278
|
+
return [S,S,current];
|
279
|
+
}
|
280
|
+
}
|
281
|
+
return [null,null,1];
|
282
|
+
};
|
283
|
+
$.in_array = function(arr,p_val){
|
284
|
+
for(var i = 0, l = arr.length; i < l; i++){
|
285
|
+
if(arr[i] == p_val)
|
286
|
+
return true;
|
287
|
+
}
|
288
|
+
return false;
|
289
|
+
};
|
290
|
+
})();
|