phonet 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/COPYING ADDED
@@ -0,0 +1,56 @@
1
+ Ruby is copyrighted free software by Yukihiro Matsumoto <matz@netlab.jp>.
2
+ You can redistribute it and/or modify it under either the terms of the GPL
3
+ (see the file GPL), or the conditions below:
4
+
5
+ 1. You may make and give away verbatim copies of the source form of the
6
+ software without restriction, provided that you duplicate all of the
7
+ original copyright notices and associated disclaimers.
8
+
9
+ 2. You may modify your copy of the software in any way, provided that
10
+ you do at least ONE of the following:
11
+
12
+ a) place your modifications in the Public Domain or otherwise
13
+ make them Freely Available, such as by posting said
14
+ modifications to Usenet or an equivalent medium, or by allowing
15
+ the author to include your modifications in the software.
16
+
17
+ b) use the modified software only within your corporation or
18
+ organization.
19
+
20
+ c) give non-standard binaries non-standard names, with
21
+ instructions on where to get the original software distribution.
22
+
23
+ d) make other distribution arrangements with the author.
24
+
25
+ 3. You may distribute the software in object code or binary form,
26
+ provided that you do at least ONE of the following:
27
+
28
+ a) distribute the binaries and library files of the software,
29
+ together with instructions (in the manual page or equivalent)
30
+ on where to get the original distribution.
31
+
32
+ b) accompany the distribution with the machine-readable source of
33
+ the software.
34
+
35
+ c) give non-standard binaries non-standard names, with
36
+ instructions on where to get the original software distribution.
37
+
38
+ d) make other distribution arrangements with the author.
39
+
40
+ 4. You may modify and include the part of the software into any other
41
+ software (possibly commercial). But some files in the distribution
42
+ are not written by the author, so that they are not under these terms.
43
+
44
+ For the list of those files and their copying conditions, see the
45
+ file LEGAL.
46
+
47
+ 5. The scripts and library files supplied as input to or produced as
48
+ output from the software do not automatically fall under the
49
+ copyright of the software, but belong to whomever generated them,
50
+ and may be sold commercially, and may be aggregated with this
51
+ software.
52
+
53
+ 6. THIS SOFTWARE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR
54
+ IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
55
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
56
+ PURPOSE.
@@ -0,0 +1,51 @@
1
+ $BK\%W%m%0%i%`$O%U%j!<%=%U%H%&%'%"$G$9!%(BGPL(the GNU General
2
+ Public License)$B$^$?$O0J2<$K<($9>r7o$GK\%W%m%0%i%`$r:FG[I[$G(B
3
+ $B$-$^$9!%(BGPL$B$K$D$$$F$O(BGPL$B%U%!%$%k$r;2>H$7$F2<$5$$!%(B
4
+
5
+ 1. $BJ#@=$O@)8B$J$/<+M3$G$9!%(B
6
+
7
+ 2. $B0J2<$N>r7o$N$$$:$l$+$rK~$?$9;~$KK\%W%m%0%i%`$N%=!<%9$r(B
8
+ $B<+M3$KJQ99$G$-$^$9!%(B
9
+
10
+ (a) $B%M%C%H%K%e!<%:$K%]%9%H$7$?$j!$:n<T$KJQ99$rAwIU$9$k(B
11
+ $B$J$I$NJ}K!$G!$JQ99$r8x3+$9$k!%(B
12
+
13
+ (b) $BJQ99$7$?K\%W%m%0%i%`$r<+J,$N=jB0$9$kAH?%FbIt$@$1$G(B
14
+ $B;H$&!%(B
15
+
16
+ (c) $BJQ99E@$rL@<($7$?$&$(!$%=%U%H%&%'%"$NL>A0$rJQ99$9$k!%(B
17
+ $B$=$N%=%U%H%&%'%"$rG[I[$9$k;~$K$OJQ99A0$NK\%W%m%0%i(B
18
+ $B%`$bF1;~$KG[I[$9$k!%$^$?$OJQ99A0$NK\%W%m%0%i%`$N%=!<(B
19
+ $B%9$NF~<jK!$rL@<($9$k!%(B
20
+
21
+ (d) $B$=$NB>$NJQ99>r7o$r:n<T$H9g0U$9$k!%(B
22
+
23
+ 3. $B0J2<$N>r7o$N$$$:$l$+$rK~$?$9;~$KK\%W%m%0%i%`$r%3%s%Q%$(B
24
+ $B%k$7$?%*%V%8%'%/%H%3!<%I$d<B9T7A<0$G$bG[I[$G$-$^$9!%(B
25
+
26
+ (a) $B%P%$%J%j$r<u$1<h$C$??M$,%=!<%9$rF~<j$G$-$k$h$&$K!$(B
27
+ $B%=!<%9$NF~<jK!$rL@<($9$k!%(B
28
+
29
+ (b) $B5!3#2DFI$J%=!<%9%3!<%I$rE:IU$9$k!%(B
30
+
31
+ (c) $BJQ99$r9T$C$?%P%$%J%j$OL>A0$rJQ99$7$?$&$(!$%*%j%8%J(B
32
+ $B%k$N%=!<%9%3!<%I$NF~<jK!$rL@<($9$k!%(B
33
+
34
+ (d) $B$=$NB>$NG[I[>r7o$r:n<T$H9g0U$9$k!%(B
35
+
36
+ 4. $BB>$N%W%m%0%i%`$X$N0zMQ$O$$$+$J$kL\E*$G$"$l<+M3$G$9!%$?(B
37
+ $B$@$7!$K\%W%m%0%i%`$K4^$^$l$kB>$N:n<T$K$h$k%3!<%I$O!$$=(B
38
+ $B$l$>$l$N:n<T$N0U8~$K$h$k@)8B$,2C$($i$l$k>l9g$,$"$j$^$9!%(B
39
+
40
+ $B$=$l$i%U%!%$%k$N0lMw$H$=$l$>$l$NG[I[>r7o$J$I$KIU$$$F$O(B
41
+ LEGAL$B%U%!%$%k$r;2>H$7$F$/$@$5$$!%(B
42
+
43
+ 5. $BK\%W%m%0%i%`$X$NF~NO$H$J$k%9%/%j%W%H$*$h$S!$K\%W%m%0%i(B
44
+ $B%`$+$i$N=PNO$N8"Mx$OK\%W%m%0%i%`$N:n<T$G$O$J$/!$$=$l$>(B
45
+ $B$l$NF~=PNO$r@8@.$7$??M$KB0$7$^$9!%$^$?!$K\%W%m%0%i%`$K(B
46
+ $BAH$_9~$^$l$k$?$a$N3HD%%i%$%V%i%j$K$D$$$F$bF1MM$G$9!%(B
47
+
48
+ 6. $BK\%W%m%0%i%`$OL5J]>Z$G$9!%:n<T$OK\%W%m%0%i%`$r%5%]!<%H(B
49
+ $B$9$k0U;V$O$"$j$^$9$,!$%W%m%0%i%`<+?H$N%P%0$"$k$$$OK\%W(B
50
+ $B%m%0%i%`$N<B9T$J$I$+$iH/@8$9$k$$$+$J$kB;32$KBP$7$F$b@U(B
51
+ $BG$$r;}$A$^$;$s!%(B
@@ -0,0 +1,12 @@
1
+ === 2.8.1 / 2009-08-21
2
+
3
+ * New features:
4
+ * Early support for Ruby 1.9
5
+
6
+ * Enhancements:
7
+ * Improved gem release process using Hoe as development dependency
8
+ * Implemented support for cross compilation
9
+ * Binary version built against MySQL 5.0.83
10
+
11
+ * Bug fixes:
12
+ * Improved detection of mysql and it's configuration (mysql_config)
@@ -0,0 +1,16 @@
1
+ COPYING
2
+ COPYING.ja
3
+ History.txt
4
+ Manifest.txt
5
+ README.txt
6
+ Rakefile
7
+ ext/mysql_api/extconf.rb
8
+ ext/mysql_api/mysql.c
9
+ extra/README.html
10
+ extra/README_ja.html
11
+ extra/tommy.css
12
+ lib/mysql.rb
13
+ tasks/gem.rake
14
+ tasks/native.rake
15
+ tasks/vendor_mysql.rake
16
+ test/test_mysql.rb
@@ -0,0 +1,23 @@
1
+ = MySQL/Ruby Interface
2
+
3
+ * http://mysql-win.rubyforge.org
4
+ * http://rubyforge.org/projects/mysql-win
5
+ * http://github.com/luislaven/mysql-gem
6
+
7
+ == DESCRIPTION
8
+
9
+ This is the MySQL API module for Ruby. It provides the same functions for Ruby
10
+ programs that the MySQL C API provides for C programs.
11
+
12
+ This is a conversion of tmtm's original extension into a proper RubyGems.
13
+
14
+ === Warning about incompatible MySQL versions
15
+
16
+ Mixing MySQL versions will generate segmentation faults.
17
+
18
+ Running the binary version of this gem against a different version of MySQL
19
+ shared library <tt>libMySQL.dll</tt> will generate segmentation faults and
20
+ terminate your application.
21
+
22
+ Please use the exact same MAJOR.MINOR version of MySQL, see History.txt for
23
+ specific version of MySQL used to build the binaries.
@@ -0,0 +1,22 @@
1
+ #--
2
+ # Copyright (c) 2008 Luis Lavena
3
+ #
4
+ # This source code is released under the MIT License.
5
+ # See LICENSE file for details
6
+ #++
7
+
8
+ #
9
+ # NOTE: Keep this file clean.
10
+ # Add your customizations inside tasks directory.
11
+ # Thank You.
12
+ #
13
+
14
+ begin
15
+ require 'rake'
16
+ rescue LoadError
17
+ require 'rubygems'
18
+ require 'rake'
19
+ end
20
+
21
+ # load rakefile extensions (tasks)
22
+ Dir['tasks/*.rake'].sort.each { |f| load f }
@@ -0,0 +1,3 @@
1
+ require 'mkmf'
2
+ create_makefile("phonet_api")
3
+
@@ -0,0 +1,82 @@
1
+ /*
2
+ * ph_ext.h
3
+ * --------
4
+ *
5
+ * Constants and prototypes for "phonet.c".
6
+ *
7
+ * Copyright (c):
8
+ * 1999-2007: Joerg MICHAEL, Adalbert-Stifter-Str. 11, 30655 Hannover, Germany
9
+ *
10
+ * SCCS: @(#) ph_ext.h 1.4.2 2007-08-27
11
+ *
12
+ * This program is subject to the GNU Lesser General Public License (LGPL),
13
+ * (formerly known as GNU Library General Public Licence)
14
+ * as published by the Free Software Foundation; either version 2 of the
15
+ * License, or (at your option) any later version.
16
+ * This program is distributed in the hope that it will be useful,
17
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
18
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
19
+ *
20
+ * You should have received a copy of the GNU Library General Public License
21
+ * along with this program; if not, write to the
22
+ * Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23
+ *
24
+ * Actually, the LGPL is __less__ restrictive than the better known GNU General
25
+ * Public License (GPL). See the GNU Library General Public License or the file
26
+ * LIB_GPLP.TXT for more details and for a DISCLAIMER OF ALL WARRANTIES.
27
+ *
28
+ * There is one important restriction: If you modify this program in any way
29
+ * (e.g. add or change phonetic rules or modify the underlying logic or
30
+ * translate this program into another programming language), you must also
31
+ * release the changes under the terms of the LGPL.
32
+ * That means you have to give out the source code to your changes,
33
+ * and a very good way to do so is mailing them to the address given below.
34
+ * I think this is the best way to promote further development and use
35
+ * of this software.
36
+ *
37
+ * If you have any remarks, feel free to e-mail to:
38
+ * ct@ct.heise.de
39
+ *
40
+ * The author's email address is:
41
+ * astro.joerg@googlemail.com
42
+ */
43
+
44
+
45
+ #ifndef _PH_EXT_H_
46
+ #define _PH_EXT_H_
47
+
48
+ /**** If you want to use "phonet.c" as a library, ****/
49
+ /**** delete the following macro (Note: This will ****/
50
+ /**** also disable the function "check_rules"). ****/
51
+ /* #define PHONET_EXECUTABLE */
52
+
53
+
54
+ /**** constants for function "phonet" (do not change) ****/
55
+ #define PHONET_FIRST_RULES 0
56
+ #define PHONET_SECOND_RULES 1024
57
+
58
+ /**** If you don't want rules for one or more of the ****/
59
+ /**** following languages, delete the corresponding macro ****/
60
+ #define PHONET_NO_LANGUAGE 1
61
+ #define PHONET_GERMAN 2
62
+ /**** (under construction:) #define PHONET_ENGLISH 2 ****/
63
+
64
+
65
+ /**** the following macro _must_ be defined ****/
66
+ #define PHONET_DEFAULT_LANGUAGE PHONET_GERMAN
67
+
68
+
69
+
70
+ /************************************************************/
71
+ /**** function prototypes *********************************/
72
+ /************************************************************/
73
+
74
+ int phonet (char src[], char dest[], int len, int mode_language);
75
+ int check_rules (int language, int trace_rule);
76
+
77
+
78
+ #endif
79
+
80
+ /************************************************************/
81
+ /**** end of file "ph_ext.h" ******************************/
82
+ /************************************************************/
@@ -0,0 +1,1672 @@
1
+ /*
2
+ * phonet.c
3
+ * --------
4
+ *
5
+ * Program for phonetic string conversion.
6
+ *
7
+ * Copyright (c):
8
+ * 1999-2007: Joerg MICHAEL, Adalbert-Stifter-Str. 11, 30655 Hannover, Germany
9
+ * and
10
+ * (version 1.0) 1999: Heise Verlag, Helstorfer Str. 7, 30625 Hannover, Germany
11
+ *
12
+ * SCCS: @(#) phonet.c 1.4.2 2007-08-27
13
+ *
14
+ * This program is subject to the GNU Lesser General Public License (LGPL)
15
+ * (formerly known as GNU Library General Public Licence)
16
+ * as published by the Free Software Foundation; either version 2 of the
17
+ * License, or (at your option) any later version.
18
+ * This program is distributed in the hope that it will be useful,
19
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
20
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
21
+ *
22
+ * You should have received a copy of the GNU Library General Public License
23
+ * along with this program; if not, write to the
24
+ * Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25
+ *
26
+ * Actually, the LGPL is __less__ restrictive than the better known GNU General
27
+ * Public License (GPL). See the GNU Library General Public License or the file
28
+ * LIB_GPLP.TXT for more details and for a DISCLAIMER OF ALL WARRANTIES.
29
+ *
30
+ * There is one important restriction: If you modify this program in any way
31
+ * (e.g. add or change phonetic rules or modify the underlying logic or
32
+ * translate this program into another programming language), you must also
33
+ * release the changes under the terms of the LGPL.
34
+ * That means you have to give out the source code to your changes,
35
+ * and a very good way to do so is mailing them to the address given below.
36
+ * I think this is the best way to promote further development and use
37
+ * of this software.
38
+ *
39
+ * If you have any remarks, feel free to e-mail to:
40
+ * ct@ct.heise.de
41
+ *
42
+ * The author's email address is:
43
+ * astro.joerg@googlemail.com
44
+ */
45
+
46
+
47
+ #include <stdio.h>
48
+ #include <stdlib.h>
49
+ #include <string.h>
50
+
51
+ #include "umlaut_p.h"
52
+ #include "ph_ext.h"
53
+ #include "phonet.h"
54
+
55
+
56
+ #define TEST_char '\004'
57
+
58
+ /**** Macros for "phonet_init" and "internal_mode": ****/
59
+ #define PHONET_INITIALIZED 1
60
+ #define CHECK_PHONETIC_RULES 2
61
+ #define TRACE_PHONET 4
62
+
63
+
64
+ static int internal_mode = 0;
65
+ static int last_rule_set = -PHONET_SECOND_RULES;
66
+ static int alpha_pos[HASH_COUNT];
67
+ static int isletter[HASH_COUNT];
68
+ static char upperchar[HASH_COUNT];
69
+
70
+
71
+
72
+ /************************************************************/
73
+ /**** private (static) functions **************************/
74
+ /************************************************************/
75
+
76
+
77
+ static int initialize_phonet (void)
78
+ /**** language dependant initializations ****/
79
+ /**** resut: 0 : success ****/
80
+ /**** -1 : an error occured ****/
81
+ {
82
+ int i,k,n,*p;
83
+ int *p_hash1,*p_hash2;
84
+ char *s,*s2;
85
+ char temp[2];
86
+
87
+ if (! (internal_mode & PHONET_INITIALIZED))
88
+ {
89
+ if ((int)strlen (letters_a_to_z) > 26)
90
+ {
91
+ if (internal_mode & TRACE_PHONET)
92
+ {
93
+ printf ("Error: %s is not allowed\n",
94
+ "strlen (letters_a_to_z) > 26");
95
+ }
96
+ return (-1);
97
+ }
98
+ if ((int)strlen (letters_a_to_z) != (int)strlen (letters_A_to_Z))
99
+ {
100
+ if (internal_mode & TRACE_PHONET)
101
+ {
102
+ printf ("Error: %s is not allowed\n",
103
+ "strlen(letters_a_to_z) != strlen(letters_a_to_z)");
104
+ }
105
+ return (-1);
106
+ }
107
+ if ((int)strlen (umlaut_lower) != (int)strlen (umlaut_upper))
108
+ {
109
+ if (internal_mode & TRACE_PHONET)
110
+ {
111
+ printf ("Error: %s is not allowed\n",
112
+ "strlen(umlaut_lower) != strlen(umlaut_upper)");
113
+ }
114
+ return (-1);
115
+ }
116
+
117
+ internal_mode = internal_mode | PHONET_INITIALIZED;
118
+
119
+ /**** generate arrays "alpha_pos", "upperchar" and "isletter" ****/
120
+ for (i=0; i< HASH_COUNT; i++)
121
+ {
122
+ alpha_pos[i] = 0;
123
+ isletter[i] = 0;
124
+ upperchar[i] = (char) i;
125
+ }
126
+
127
+ for (k=-1; k<1; k++)
128
+ {
129
+ if (k == -1)
130
+ {
131
+ /**** German and international umlauts ****/
132
+ s = umlaut_lower;
133
+ s2 = umlaut_upper;
134
+ p = &k;
135
+ }
136
+ else
137
+ {
138
+ /**** "normal" letters ('a'-'z' and 'A'-'Z') ****/
139
+ s = letters_a_to_z;
140
+ s2 = letters_A_to_Z;
141
+ p = &i;
142
+ }
143
+
144
+ for (i=0; *(s+i) != '\0'; i++)
145
+ {
146
+ n = (unsigned char) *(s2+i); /** "s2" **/
147
+ alpha_pos[n] = *p + 2;
148
+ isletter[n] = 2;
149
+ upperchar[n] = *(s2+i);
150
+
151
+ n = (unsigned char) *(s+i); /** "s" **/
152
+ alpha_pos[n] = *p + 2;
153
+ isletter[n] = 1;
154
+ upperchar[n] = *(s2+i);
155
+ }
156
+ }
157
+ }
158
+
159
+ if (phonet_init == NULL || phonet_hash == NULL || phonet_rules == NULL)
160
+ {
161
+ return (-1);
162
+ }
163
+
164
+ if (! (*phonet_init & PHONET_INITIALIZED))
165
+ {
166
+ *phonet_init = *phonet_init | PHONET_INITIALIZED;
167
+
168
+ for (i=0; i< HASH_COUNT; i++)
169
+ {
170
+ phonet_hash[i] = -1;
171
+ }
172
+
173
+ for (i=0; i<26; i++)
174
+ {
175
+ p_hash1 = (* phonet_hash_1) [i];
176
+ p_hash2 = (* phonet_hash_2) [i];
177
+
178
+ for (k=0; k<28; k++)
179
+ {
180
+ p_hash1[k] = -1;
181
+ p_hash2[k] = -1;
182
+ }
183
+ }
184
+
185
+ for (i=0; phonet_rules[i] != PHONET_END; i += 3)
186
+ {
187
+ if ((s=phonet_rules[i]) != NULL)
188
+ {
189
+ /**** calculate first hash value ****/
190
+ k = (unsigned char) *s;
191
+
192
+ if (phonet_hash[k] < 0
193
+ && (phonet_rules[i+1] != NULL || phonet_rules[i+2] != NULL))
194
+ {
195
+ phonet_hash[k] = i;
196
+ }
197
+
198
+ /**** calculate second hash values ****/
199
+ if (k != 0 && alpha_pos[k] >= 2)
200
+ {
201
+ k = alpha_pos[k];
202
+ p_hash1 = (* phonet_hash_1) [k-2];
203
+ p_hash2 = (* phonet_hash_2) [k-2];
204
+ s++;
205
+
206
+ if (*s == '(')
207
+ {
208
+ s++;
209
+ }
210
+ else if (*s == '\0')
211
+ {
212
+ s = (char *) " ";
213
+ }
214
+ else
215
+ {
216
+ sprintf (temp, "%c", *s);
217
+ s = temp;
218
+ }
219
+
220
+ while (*s != '\0' && (unsigned char) *s != ')')
221
+ {
222
+ k = alpha_pos [(unsigned char) *s];
223
+
224
+ if (k > 0)
225
+ {
226
+ /**** add hash value for this letter ****/
227
+ if (p_hash1[k] < 0)
228
+ {
229
+ p_hash1[k] = i;
230
+ p_hash2[k] = i;
231
+ }
232
+
233
+ if (p_hash2[k] >= i - 30)
234
+ {
235
+ p_hash2[k] = i;
236
+ }
237
+ else
238
+ {
239
+ k = -1;
240
+ }
241
+ }
242
+
243
+ if (k <= 0)
244
+ {
245
+ /**** add hash value for all letters ****/
246
+ if (p_hash1[0] < 0)
247
+ {
248
+ p_hash1[0] = i;
249
+ }
250
+ p_hash2[0] = i;
251
+ }
252
+ s++;
253
+ }
254
+ }
255
+ }
256
+ }
257
+ }
258
+
259
+ return (0);
260
+ }
261
+
262
+
263
+
264
+ static void trace_info (char text[], int n, char err_text[])
265
+ /**** output trace info ****/
266
+ {
267
+ char *s,*s2,*s3;
268
+ s = (phonet_rules[n] == NULL) ? (char *) "(NULL)" : phonet_rules[n];
269
+ s2 = (phonet_rules[n+1] == NULL) ? (char *) "(NULL)" : phonet_rules[n+1];
270
+ s3 = (phonet_rules[n+2] == NULL) ? (char *) "(NULL)" : phonet_rules[n+2];
271
+
272
+ printf ("%s %d: \"%s\"%s\"%s\" %s\n", text, ((n/3)+1), s,s2,s3, err_text);
273
+ }
274
+
275
+
276
+
277
+
278
+ int phonet (char src[], char dest[], int len, int mode_language)
279
+
280
+ /**** Function for phonetic conversions ****/
281
+ /**** ("dest" == "src" is allowed). ****/
282
+ /**** "len" = max. length of "dest" incl. '\0'. ****/
283
+ /**** mode_language = <language> + PHONET_FIRST_RULES : ****/
284
+ /**** Use <language> and first rules ****/
285
+ /**** mode_language = <language> + PHONET_SECOND_RULES : ****/
286
+ /**** Use <language> and second rules ****/
287
+ /**** result: >= 0 : string length of "dest" ****/
288
+ /**** < 0 : an error occured ****/
289
+ {
290
+ int i,j,k,ml,n,p,z;
291
+ int k0,n0,p0,z0;
292
+ int start1,end1,start2,end2;
293
+ int start3,end3,start4,end4;
294
+ int *p_hash1,*p_hash2;
295
+ char c,c0,*s;
296
+ char *src_2,text[51];
297
+
298
+ if (dest == NULL || src == NULL || len <= 0)
299
+ {
300
+ /**** wrong arg's ****/
301
+ if (internal_mode & TRACE_PHONET)
302
+ {
303
+ printf ("Error: wrong arguments.\n");
304
+ }
305
+ return (-1);
306
+ }
307
+
308
+ /**** select language ****/
309
+ i = 0;
310
+ k = mode_language & ~PHONET_SECOND_RULES;
311
+ if (k != last_rule_set)
312
+ {
313
+ i = set_phonet_language (k);
314
+ last_rule_set = k;
315
+ }
316
+ if (i < 0)
317
+ {
318
+ s = "Notice: language not set, use current language";
319
+ i = 0;
320
+
321
+ if (phonet_init == NULL
322
+ || phonet_hash == NULL || phonet_rules == NULL)
323
+ {
324
+ i = set_phonet_language (PHONET_DEFAULT_LANGUAGE);
325
+ s = "Notice: language not set, use default language";
326
+
327
+ if (i < 0)
328
+ {
329
+ s = "Error: language not set; default language could not be set";
330
+ }
331
+ }
332
+
333
+ if (internal_mode & TRACE_PHONET)
334
+ {
335
+ if (i >= 0)
336
+ {
337
+ printf ("%s (%s).\n", s, phonet_language);
338
+ }
339
+ else
340
+ {
341
+ printf ("%s.\n", s);
342
+ }
343
+ }
344
+
345
+ if (phonet_init == NULL
346
+ || phonet_hash == NULL || phonet_rules == NULL)
347
+ {
348
+ strcpy (dest,"");
349
+ return (-2);
350
+ }
351
+ }
352
+
353
+ if (phonet_init == NULL || ! (*phonet_init & PHONET_INITIALIZED)
354
+ || phonet_hash == NULL || phonet_rules == NULL
355
+ || ! (internal_mode & PHONET_INITIALIZED))
356
+ {
357
+ /**** initialization (must be done ****/
358
+ /**** BEFORE converting "src" to upper char) ****/
359
+ i = initialize_phonet();
360
+ if (i < 0)
361
+ {
362
+ if (internal_mode & TRACE_PHONET)
363
+ {
364
+ printf ("Error: initialization failed\n");
365
+ }
366
+ strcpy (dest,"");
367
+ return (-3);
368
+ }
369
+ }
370
+
371
+ src_2 = text;
372
+ i = (int) strlen (src);
373
+ if (i > 50)
374
+ {
375
+ /**** "oversized" string ****/
376
+ src_2 = (char *) malloc ((size_t) (i+1));
377
+ if (src_2 == NULL)
378
+ {
379
+ /**** "malloc" failed ****/
380
+ if (internal_mode & TRACE_PHONET)
381
+ {
382
+ printf ("Error: \"malloc\" for %d Bytes failed.\n", i+1);
383
+ }
384
+ strcpy (dest,"");
385
+ return (-4);
386
+ }
387
+ }
388
+
389
+ /**** "strcpy" plus conversion to upper char ****/
390
+ i = 0;
391
+ while ((c=src[i]) != '\0')
392
+ {
393
+ src_2[i] = upperchar [(unsigned char) c];
394
+ i++;
395
+ }
396
+ src_2[i] = '\0';
397
+ src = src_2;
398
+
399
+ if (mode_language & PHONET_SECOND_RULES)
400
+ {
401
+ ml = 2;
402
+ s = "second";
403
+ }
404
+ else
405
+ {
406
+ ml = 1;
407
+ s = "first";
408
+ }
409
+ if (internal_mode & TRACE_PHONET)
410
+ {
411
+ printf ("\n\nphonetic conversion for : \"%s\"\n", src_2);
412
+ printf ("(%s rules)\n", s);
413
+ }
414
+
415
+ /**** check "src" ****/
416
+ i = 0;
417
+ j = 0;
418
+ z = 0;
419
+ while ((c = src[i]) != '\0')
420
+ {
421
+ if (internal_mode & TRACE_PHONET)
422
+ {
423
+ printf ("\ncheck position %d: src = \"%s\",", j, src+i);
424
+ printf (" dest = \"%.*s\"\n", j, dest);
425
+ }
426
+
427
+ n = alpha_pos [(unsigned char) c];
428
+ if (n >= 2)
429
+ {
430
+ p_hash1 = (* phonet_hash_1) [n-2];
431
+ p_hash2 = (* phonet_hash_2) [n-2];
432
+ n = alpha_pos [(unsigned char) src[i+1]];
433
+ start1 = p_hash1 [n];
434
+ start2 = p_hash1 [0];
435
+ end1 = p_hash2 [n];
436
+ end2 = p_hash2 [0];
437
+
438
+ /**** preserve rule priorities ****/
439
+ if (start2 >= 0
440
+ && (start1 < 0 || start2 < start1))
441
+ {
442
+ n = start1; start1 = start2; start2 = n;
443
+ n = end1; end1 = end2; end2 = n;
444
+ }
445
+
446
+ if (end1 >= start2 && start2 >= 0)
447
+ {
448
+ if (end2 > end1)
449
+ {
450
+ end1 = end2;
451
+ }
452
+ start2 = -1;
453
+ end2 = -1;
454
+ }
455
+ }
456
+ else
457
+ {
458
+ n = phonet_hash [(unsigned char) c];
459
+ start1 = n;
460
+ end1 = 10000;
461
+ start2 = -1;
462
+ end2 = -1;
463
+ }
464
+
465
+ n = start1;
466
+ z0 = 0;
467
+
468
+ if (n >= 0)
469
+ {
470
+ /**** check rules for this char ****/
471
+ while (phonet_rules[n] == NULL || phonet_rules[n][0] == c)
472
+ {
473
+ if (n > end1)
474
+ {
475
+ if (start2 > 0)
476
+ {
477
+ n = start2;
478
+ start1 = start2; start2 = -1;
479
+ end1 = end2; end2 = -1;
480
+ continue;
481
+ }
482
+ break;
483
+ }
484
+
485
+ if (phonet_rules [n] == NULL || phonet_rules [n+ml] == NULL)
486
+ {
487
+ /**** no conversion rule available ****/
488
+ n += 3;
489
+ continue;
490
+ }
491
+ if (internal_mode & TRACE_PHONET)
492
+ {
493
+ trace_info ("> rule no.", n, "is being checked");
494
+ }
495
+
496
+ /**** check whole string ****/
497
+ k = 1; /**** no. of matching letters ****/
498
+ p = 5; /**** default priority ****/
499
+ s = phonet_rules[n];
500
+ s++; /**** needed by "*(s-1)" below ****/
501
+
502
+ while (src[i+k] == *s && *s != '\0'
503
+ && strchr ("0123456789(-<^$", *s) == NULL)
504
+ {
505
+ k++;
506
+ s++;
507
+ }
508
+ if (internal_mode & CHECK_PHONETIC_RULES)
509
+ {
510
+ /**** we do "CHECK_PHONETIC_RULES" ****/
511
+ while (*s != '\0' && src[i+k] == *s)
512
+ {
513
+ k++;
514
+ s++;
515
+ }
516
+ }
517
+ if (*s == '(')
518
+ {
519
+ /**** check an array of letters ****/
520
+ if (isletter [(unsigned char) src[i+k]]
521
+ && strchr (s+1, src[i+k]) != NULL)
522
+ {
523
+ k++;
524
+ while (*s != '\0' && *s != ')')
525
+ {
526
+ s++;
527
+ }
528
+ if (*s == ')')
529
+ {
530
+ s++;
531
+ }
532
+ }
533
+ }
534
+ p0 = (int) *s;
535
+ k0 = k;
536
+ while (*s == '-' && k > 1)
537
+ {
538
+ k--;
539
+ s++;
540
+ }
541
+ if (*s == '<')
542
+ {
543
+ s++;
544
+ }
545
+ if (strchr ("0123456789",*s) != NULL && *s != '\0')
546
+ {
547
+ /**** read priority ****/
548
+ p = *s - '0';
549
+ s++;
550
+ }
551
+ if (*s == '^' && *(s+1) == '^')
552
+ {
553
+ s++;
554
+ if ((internal_mode & CHECK_PHONETIC_RULES)
555
+ && ! isletter [(unsigned char) src[i+k0]])
556
+ {
557
+ /**** we do "CHECK_PHONETIC_RULES" ****/
558
+ s = s-2;
559
+ }
560
+ }
561
+
562
+ if (*s == '\0'
563
+ || (*s == '^' && (i == 0 || ! isletter [(unsigned char)src[i-1]])
564
+ && (*(s+1) != '$'
565
+ || (! isletter [(unsigned char) src[i+k0]] && src[i+k0] != '.')))
566
+ || (*s == '$' && i > 0 && isletter [(unsigned char) src[i-1]]
567
+ && (! isletter [(unsigned char) src[i+k0]] && src[i+k0] != '.')))
568
+ {
569
+ /**** look for continuation, if: ****/
570
+ /**** k > 1 and NO '-' in first string ****/
571
+ n0 = -1;
572
+
573
+ if (k > 1 && src[i+k] != '\0' && p0 != (int) '-')
574
+ {
575
+ c0 = src [i+k-1];
576
+ n0 = alpha_pos [(unsigned char) c0];
577
+
578
+ if (n0 >= 2 && src[i+k] != '\0')
579
+ {
580
+ p_hash1 = (* phonet_hash_1) [n0-2];
581
+ p_hash2 = (* phonet_hash_2) [n0-2];
582
+ n0 = alpha_pos [(unsigned char) src[i+k]];
583
+ start3 = p_hash1 [n0];
584
+ start4 = p_hash1 [0];
585
+ end3 = p_hash2 [n0];
586
+ end4 = p_hash2 [0];
587
+
588
+ /**** preserve rule priorities ****/
589
+ if (start4 >= 0
590
+ && (start3 < 0 || start4 < start3))
591
+ {
592
+ n0 = start3; start3 = start4; start4 = n0;
593
+ n0 = end3; end3 = end4; end4 = n0;
594
+ }
595
+
596
+ if (end3 >= start4 && start4 >= 0)
597
+ {
598
+ if (end4 > end3)
599
+ {
600
+ end3 = end4;
601
+ }
602
+ start4 = -1;
603
+ end4 = -1;
604
+ }
605
+ }
606
+ else
607
+ {
608
+ n0 = phonet_hash [(unsigned char) c0];
609
+ start3 = n0;
610
+ end3 = 10000;
611
+ start4 = -1;
612
+ end4 = -1;
613
+ }
614
+
615
+ n0 = start3;
616
+ }
617
+
618
+ if (n0 >= 0)
619
+ {
620
+ /**** check continuation rules for "src[i+k]" ****/
621
+ while (phonet_rules[n0] == NULL
622
+ || phonet_rules[n0][0] == c0)
623
+ {
624
+ if (n0 > end3)
625
+ {
626
+ if (start4 > 0)
627
+ {
628
+ n0 = start4;
629
+ start3 = start4; start4 = -1;
630
+ end3 = end4; end4 = -1;
631
+ continue;
632
+ }
633
+ p0 = -1; /**** important ****/
634
+ break;
635
+ }
636
+
637
+ if (phonet_rules [n0] == NULL
638
+ || phonet_rules [n0+ml] == NULL)
639
+ {
640
+ /**** no conversion rule available ****/
641
+ n0 += 3;
642
+ continue;
643
+ }
644
+ if (internal_mode & TRACE_PHONET)
645
+ {
646
+ trace_info ("> > continuation rule no.",
647
+ n0, "is being checked");
648
+ }
649
+
650
+ /**** check whole string ****/
651
+ k0 = k;
652
+ p0 = 5;
653
+ s = phonet_rules[n0];
654
+ s++;
655
+ while (src[i+k0] == *s && *s != '\0'
656
+ && strchr("0123456789(-<^$", *s) == NULL)
657
+ {
658
+ k0++;
659
+ s++;
660
+ }
661
+ if (*s == '(')
662
+ {
663
+ /**** check an array of letters ****/
664
+ if (isletter [(unsigned char) src[i+k0]]
665
+ && strchr (s+1, src[i+k0]) != NULL)
666
+ {
667
+ k0++;
668
+ while (*s != '\0' && *s != ')')
669
+ {
670
+ s++;
671
+ }
672
+ if (*s == ')')
673
+ {
674
+ s++;
675
+ }
676
+ }
677
+ }
678
+ while (*s == '-')
679
+ {
680
+ /**** "k0" is NOT decremented ****/
681
+ /**** because of "if (k0 == k)" ****/
682
+ s++;
683
+ }
684
+ if (*s == '<')
685
+ {
686
+ s++;
687
+ }
688
+ if (strchr ("0123456789",*s) != NULL && *s != '\0')
689
+ {
690
+ p0 = *s - '0';
691
+ s++;
692
+ }
693
+
694
+ if (*s == '\0'
695
+ /**** *s == '^' is not possible here ****/
696
+ || (*s == '$' && ! isletter [(unsigned char) src[i+k0]]
697
+ && src[i+k0] != '.'))
698
+ {
699
+ if (k0 == k)
700
+ {
701
+ /**** this is only a partial string ****/
702
+ if (internal_mode & TRACE_PHONET)
703
+ {
704
+ trace_info ("> > continuation rule no.",
705
+ n0, "not used (too short)");
706
+ }
707
+ n0 += 3;
708
+ continue;
709
+ }
710
+
711
+ if (p0 < p)
712
+ {
713
+ /**** priority is too low ****/
714
+ if (internal_mode & TRACE_PHONET)
715
+ {
716
+ trace_info ("> > continuation rule no.",
717
+ n0, "not used (priority)");
718
+ }
719
+ n0 += 3;
720
+ continue;
721
+ }
722
+
723
+ /**** continuation rule found ****/
724
+ break;
725
+ }
726
+
727
+ if (internal_mode & TRACE_PHONET)
728
+ {
729
+ trace_info ("> > continuation rule no.",
730
+ n0, "not used");
731
+ }
732
+ n0 += 3;
733
+ } /**** end of "while" ****/
734
+
735
+ if (p0 >= p
736
+ && (phonet_rules[n0] != NULL && phonet_rules[n0][0] == c0))
737
+ {
738
+ if (internal_mode & TRACE_PHONET)
739
+ {
740
+ trace_info ("> rule no.", n,"");
741
+ trace_info ("> not used because of continuation",n0,"");
742
+ }
743
+ n += 3;
744
+ continue;
745
+ }
746
+ }
747
+
748
+ /**** replace string ****/
749
+ if (internal_mode & TRACE_PHONET)
750
+ {
751
+ trace_info ("Rule no.", n, "is applied");
752
+ }
753
+ p0 = (phonet_rules[n][0] != '\0'
754
+ && strchr (phonet_rules[n]+1,'<') != NULL) ? 1 : 0;
755
+ s = phonet_rules [n+ml];
756
+
757
+ if (p0 == 1 && z == 0)
758
+ {
759
+ /**** rule with '<' is applied ****/
760
+ if (j > 0 && *s != '\0'
761
+ && (dest[j-1] == c || dest[j-1] == *s))
762
+ {
763
+ j--;
764
+ }
765
+ z0 = 1;
766
+ z++;
767
+ k0 = 0;
768
+ while (*s != '\0' && src[i+k0] != '\0')
769
+ {
770
+ src[i+k0] = *s;
771
+ k0++;
772
+ s++;
773
+ }
774
+ if (k0 < k)
775
+ {
776
+ strcpy (src+i+k0, src+i+k);
777
+ }
778
+ if ((internal_mode & CHECK_PHONETIC_RULES)
779
+ && (*s != '\0' || k0 > k))
780
+ {
781
+ /**** we do "CHECK_PHONETIC_RULES": ****/
782
+ /**** replacement string is too long ****/
783
+ dest[j] = '\0';
784
+ return (-200);
785
+ }
786
+ /**** new "current char" ****/
787
+ c = src[i];
788
+ }
789
+ else
790
+ {
791
+ if ((internal_mode & CHECK_PHONETIC_RULES)
792
+ && p0 == 1 && z > 0)
793
+ {
794
+ /**** we do "CHECK_PHONETIC_RULES": ****/
795
+ /**** recursion found -> error ****/
796
+ dest[j] = '\0';
797
+ return (-100);
798
+ }
799
+ i = i+k-1;
800
+ z = 0;
801
+ while (*s != '\0'
802
+ && *(s+1) != '\0' && j < len-1)
803
+ {
804
+ if (j == 0 || dest[j-1] != *s)
805
+ {
806
+ dest[j] = *s;
807
+ j++;
808
+ }
809
+ s++;
810
+ }
811
+ /**** new "current char" ****/
812
+ c = *s;
813
+
814
+ if (phonet_rules[n][0] != '\0'
815
+ && strstr (phonet_rules[n]+1, "^^") != NULL)
816
+ {
817
+ if (c != '\0')
818
+ {
819
+ dest[j] = c;
820
+ j++;
821
+ }
822
+ src += i+1;
823
+ i = 0;
824
+ z0 = 1;
825
+ }
826
+ }
827
+
828
+ break;
829
+ }
830
+
831
+ n += 3;
832
+ if (n > end1 && start2 > 0)
833
+ {
834
+ n = start2;
835
+ start1 = start2;
836
+ end1 = end2;
837
+ start2 = -1;
838
+ end2 = -1;
839
+ }
840
+ }
841
+ }
842
+
843
+ if (z0 == 0)
844
+ {
845
+ if (j < len-1 && c != '\0'
846
+ && (j == 0 || dest[j-1] != c))
847
+ {
848
+ /**** delete multiple letters only ****/
849
+ dest[j] = c;
850
+ j++;
851
+ }
852
+ i++;
853
+ z = 0;
854
+ }
855
+ }
856
+
857
+ if (src_2 != text)
858
+ {
859
+ free (src_2);
860
+ }
861
+ dest[j] = '\0';
862
+
863
+ if (internal_mode & TRACE_PHONET)
864
+ {
865
+ printf ("\n");
866
+ printf ("internal phonetic string is: '%s'\n", dest);
867
+ }
868
+
869
+ return (j);
870
+ }
871
+
872
+
873
+
874
+
875
+
876
+ /************************************************************/
877
+ /**** functions used by "main" ****************************/
878
+ /************************************************************/
879
+
880
+
881
+ #ifdef PHONET_EXECUTABLE
882
+
883
+
884
+ static void string_prepare (char *text, char *s, char *s2)
885
+ /**** Auxiliary function for "check_rules": ****/
886
+ /**** "strcpy (text,s)" plus inclusion of ****/
887
+ /**** 'TEST_char' and '-', if necessary ****/
888
+ {
889
+ if (*s != '\0')
890
+ {
891
+ *text = *s;
892
+ text++;
893
+ s++;
894
+ }
895
+ while (strchr ("0123456789-<^$", *s) == NULL && *s != '\0')
896
+ {
897
+ *text = *s;
898
+ text++;
899
+ s++;
900
+ }
901
+ if (strchr (s2,'-') != NULL || strchr (s2,'$') == NULL)
902
+ {
903
+ *text = TEST_char;
904
+ text++;
905
+ *text = '-';
906
+ text++;
907
+ }
908
+ strcpy (text, s);
909
+ }
910
+
911
+
912
+
913
+ int check_rules (int language, int trace_rule)
914
+ /**** Check all phonetic rules of the current ****/
915
+ /**** language. ****/
916
+ /**** ("trace_rule" > 0: trace this rule only) ****/
917
+ /**** Result: Number of errors ****/
918
+ {
919
+ int i,k,n,n0;
920
+ int errors = 0;
921
+ int rule_count = 0;
922
+ char *r,*r0,rule[35];
923
+ char *s,err_text[201];
924
+ char orig[35],orig2[35];
925
+ char text[35],text2[35];
926
+
927
+ /**** initialization ****/
928
+ i = set_phonet_language (language);
929
+ if (i >= 0)
930
+ {
931
+ i = initialize_phonet();
932
+ }
933
+ if (i < 0)
934
+ {
935
+ printf ("Error: initialization for language %d failed\n", language);
936
+ return (-1);
937
+ }
938
+
939
+ isletter [(unsigned char) TEST_char] = 1;
940
+ internal_mode = internal_mode | CHECK_PHONETIC_RULES;
941
+ i = 0;
942
+
943
+ while (phonet_rules[i] != PHONET_END)
944
+ {
945
+ /**** syntax check for all strings ****/
946
+ if ((i/3)+1 == trace_rule)
947
+ {
948
+ internal_mode = internal_mode | TRACE_PHONET;
949
+ }
950
+ else if (trace_rule > 0)
951
+ {
952
+ internal_mode = internal_mode & ~TRACE_PHONET;
953
+ }
954
+
955
+ strcpy (err_text,"");
956
+ k = 0;
957
+ if (i % 3 == 0)
958
+ {
959
+ if (phonet_rules[i] == NULL
960
+ || (phonet_rules[i+1] == NULL && phonet_rules[i+2] == NULL))
961
+ {
962
+ strcpy (err_text," Forbidden null pointer");
963
+ k = -10;
964
+ }
965
+ rule_count++;
966
+ }
967
+
968
+ if (k >= 0)
969
+ {
970
+ if (phonet_rules[i] == NULL)
971
+ {
972
+ i++;
973
+ continue;
974
+ }
975
+
976
+ if (i % 3 == 0)
977
+ {
978
+ /**** check first letter ****/
979
+ s = phonet_rules[i];
980
+ n = phonet_hash [(unsigned char) *s];
981
+ if (i >= n+3 && n >= 0
982
+ && (s == NULL || *s != phonet_rules[i-3][0]))
983
+ {
984
+ strcpy (err_text," Wrong first char");
985
+ k = -10;
986
+ }
987
+
988
+ if (k >= 0)
989
+ {
990
+ /**** check length of search string ****/
991
+ k = 0;
992
+ while (strchr ("0123456789()<^$", *s) == NULL && *s != '\0')
993
+ {
994
+ k++;
995
+ s++;
996
+ }
997
+ if (k == 0)
998
+ {
999
+ strcpy (err_text," Search string is empty");
1000
+ if (*s != '\0' && strchr ("()<^$", *s) == NULL)
1001
+ {
1002
+ strcpy (err_text," First char is meta char");
1003
+ }
1004
+ k = -10;
1005
+ }
1006
+ }
1007
+ }
1008
+ }
1009
+
1010
+ if (k >= 0)
1011
+ {
1012
+ /**** syntax check for string ****/
1013
+ k = 0;
1014
+ s = phonet_rules[i];
1015
+ n = 0;
1016
+ if (*s != upperchar [(unsigned char) *s])
1017
+ {
1018
+ /**** forbidden lower-case char ****/
1019
+ k = -100;
1020
+ }
1021
+ if (i % 3 == 0 && *s != '\0')
1022
+ {
1023
+ s++;
1024
+ n++;
1025
+ }
1026
+ while (*s != '\0' && k >= 0)
1027
+ {
1028
+ if (*s != upperchar [(unsigned char) *s])
1029
+ {
1030
+ /**** forbidden lower-case char ****/
1031
+ k = -100;
1032
+ break;
1033
+ }
1034
+ if (*s == '(')
1035
+ {
1036
+ if (k >= 1 || ! isletter [(unsigned char) *(s+1)])
1037
+ {
1038
+ k = -10;
1039
+ break;
1040
+ }
1041
+ s++;
1042
+ n++;
1043
+ while (isletter[(unsigned char) *s])
1044
+ {
1045
+ s++;
1046
+ }
1047
+ if (*s != ')')
1048
+ {
1049
+ k = -10;
1050
+ break;
1051
+ }
1052
+ k = 1;
1053
+ }
1054
+ else if (*s == '-')
1055
+ {
1056
+ /**** "k > 2" is correct ****/
1057
+ /**** (more than one '-' is allowed) ****/
1058
+ n--;
1059
+ if (k > 2 || n <= 0)
1060
+ {
1061
+ k = -10;
1062
+ break;
1063
+ }
1064
+ k = 2;
1065
+ }
1066
+ else if (*s == '<')
1067
+ {
1068
+ if (k >= 3)
1069
+ {
1070
+ k = -10;
1071
+ break;
1072
+ }
1073
+ k = 3;
1074
+ }
1075
+ else if (strchr ("0123456789",*s) != NULL && *s != '\0')
1076
+ {
1077
+ if (k >= 4)
1078
+ {
1079
+ k = -10;
1080
+ break;
1081
+ }
1082
+ k = 4;
1083
+ }
1084
+ else if (*s == '^')
1085
+ {
1086
+ if (k >= 5)
1087
+ {
1088
+ k = -10;
1089
+ break;
1090
+ }
1091
+ if (*(s+1) == '^')
1092
+ {
1093
+ s++;
1094
+ }
1095
+ k = 5;
1096
+ }
1097
+ else if (*s == '$')
1098
+ {
1099
+ if (k >= 6 || *(s+1) != '\0')
1100
+ {
1101
+ k = -10;
1102
+ break;
1103
+ }
1104
+ k = 6;
1105
+ }
1106
+ else if (k > 0 || *s == ')')
1107
+ {
1108
+ k = -10;
1109
+ break;
1110
+ }
1111
+ else
1112
+ {
1113
+ n++;
1114
+ }
1115
+ s++;
1116
+ }
1117
+
1118
+ if (k > 0 && i % 3 != 0)
1119
+ {
1120
+ sprintf (err_text," Meta char in replacement string");
1121
+ k = -10;
1122
+ }
1123
+ else if (k <= -100)
1124
+ {
1125
+ sprintf (err_text," Lower-case letter in string");
1126
+ }
1127
+ else if (k < 0)
1128
+ {
1129
+ sprintf (err_text," Syntax error in search string");
1130
+ }
1131
+ else if ((int) strlen (phonet_rules[i]) > 30)
1132
+ {
1133
+ sprintf (err_text," String very long ( > 30 chars)");
1134
+ k = -1;
1135
+ }
1136
+ s = phonet_rules[i];
1137
+
1138
+ if (k >= 0 && i % 3 == 0
1139
+ && n > 0 && strchr (s,'<') != NULL)
1140
+ {
1141
+ /**** check lengths of search and replacement string ****/
1142
+ if ((phonet_rules[i+1] != NULL
1143
+ && strcmp (s,phonet_rules[i+1]) == 0)
1144
+ || (phonet_rules[i+2] != NULL
1145
+ && strcmp (s,phonet_rules[i+2]) == 0))
1146
+ {
1147
+ strcpy (err_text," Replacement string too long due to '<'");
1148
+ k = -10;
1149
+ }
1150
+ if ((phonet_rules[i+1] != NULL
1151
+ && (int) strlen (phonet_rules[i+1]) > n)
1152
+ || (phonet_rules[i+2] != NULL
1153
+ && (int) strlen (phonet_rules[i+2]) > n))
1154
+ {
1155
+ strcpy (err_text," Replacement string too long due to '<'");
1156
+ k = -10;
1157
+ }
1158
+ }
1159
+ }
1160
+
1161
+ if (k < 0)
1162
+ {
1163
+ /**** output error message ****/
1164
+ s = "Possible error in rule";
1165
+ if (k < -1)
1166
+ {
1167
+ s = "Error in rule";
1168
+ }
1169
+ trace_info (s, i-(i%3), err_text);
1170
+ errors++;
1171
+ }
1172
+
1173
+
1174
+ if (k >= 0 && i % 3 != 0)
1175
+ {
1176
+ /**** do phonetic conversion and check result ****/
1177
+ n = i % 3;
1178
+ n0 = (i % 3 == 1) ? PHONET_FIRST_RULES : PHONET_SECOND_RULES;
1179
+ r = strchr (phonet_rules[i-n], '(');
1180
+ if (r == NULL)
1181
+ {
1182
+ /**** There is no regular expression in search string ****/
1183
+ r = " ";
1184
+ }
1185
+ r++;
1186
+
1187
+ while (*r != ')' && *r != '\0')
1188
+ {
1189
+ /**** Split regular expression (e.g. "GS(CH)--") ****/
1190
+ /**** into simple rules and check each of them. ****/
1191
+ r0 = phonet_rules[i-n];
1192
+ strcpy (rule, r0);
1193
+ phonet_rules[i-n] = rule;
1194
+ s = strchr (rule,'(');
1195
+
1196
+ if (s != NULL)
1197
+ {
1198
+ *s = *r;
1199
+ s++;
1200
+ while (*s != ')' && *s != '\0')
1201
+ {
1202
+ strcpy (s,s+1);
1203
+ }
1204
+ if (*s == ')')
1205
+ {
1206
+ strcpy (s,s+1);
1207
+ }
1208
+ }
1209
+
1210
+ /**** do the check ****/
1211
+ sprintf (orig, "%c%s", TEST_char, phonet_rules[i-n]);
1212
+ sprintf (orig2, "%c%s", TEST_char, phonet_rules[i]);
1213
+
1214
+ if (strchr (phonet_rules[i-n],'^') != NULL)
1215
+ {
1216
+ sprintf (orig, orig+1);
1217
+ sprintf (orig2,orig2+1);
1218
+ }
1219
+ if (strchr (phonet_rules[i-n],'-') != NULL
1220
+ || strchr (phonet_rules[i-n],'$') == NULL)
1221
+ {
1222
+ sprintf (orig, "%s%c", orig, TEST_char);
1223
+ sprintf (orig2,"%s%c", orig2,TEST_char);
1224
+ }
1225
+ if (orig2[0] == orig2[1] && orig2[2] == '\0')
1226
+ {
1227
+ /**** e.g. orig2 == "<TEST_char><TEST_char>" ****/
1228
+ orig2[1] = '\0';
1229
+ }
1230
+
1231
+ /**** check conversion result ****/
1232
+ k = phonet (orig,text, 33,n0);
1233
+ if (k > -100)
1234
+ {
1235
+ k = phonet (orig2,text2, 33,n0);
1236
+ }
1237
+
1238
+ if (k <= -100)
1239
+ {
1240
+ /**** error found ****/
1241
+ phonet_rules[i-n] = r0;
1242
+ strcpy (err_text," Recursion found");
1243
+ if (k == -200)
1244
+ {
1245
+ strcpy (err_text," Replacement string too long due to '<'");
1246
+ }
1247
+ trace_info ("Error in rule", i-(i%3), err_text);
1248
+ errors++;
1249
+ break;
1250
+ }
1251
+
1252
+ /**** second rule check ****/
1253
+ if (strcmp (text,orig2) != 0)
1254
+ {
1255
+ string_prepare (err_text+80, rule,rule);
1256
+ string_prepare (err_text, orig,orig);
1257
+
1258
+ phonet_rules[i-n] = err_text+80;
1259
+ (void) phonet (err_text, err_text+40, 33,n0);
1260
+ phonet_rules[i-n] = rule;
1261
+ err_text[0] = '\0';
1262
+ if (strcmp (err_text+40, orig2) == 0)
1263
+ {
1264
+ strcpy (text,orig2);
1265
+ }
1266
+ }
1267
+
1268
+ if (strcmp (text2,orig2) != 0
1269
+ && ((strcmp (phonet_rules[i-n],"AVIER$") == 0 && n==1
1270
+ && strcmp (phonet_rules[i],"AWIE") == 0)
1271
+ || (strcmp (phonet_rules[i-n],"GH") == 0 && n == 1
1272
+ && strcmp (phonet_rules[i],"G") == 0)
1273
+ || (strcmp (phonet_rules[i-n],"HEAD-") == 0 && n == 1
1274
+ && strcmp (phonet_rules[i],"HE") == 0)
1275
+ || (strcmp (phonet_rules[i-n],"IERRE$") == 0
1276
+ && strcmp (phonet_rules[i],"IER") == 0)
1277
+ || (strcmp (phonet_rules[i-n],"IVIER$") == 0 && n == 1
1278
+ && strcmp (phonet_rules[i],"IWIE") == 0)
1279
+ || (strcmp (phonet_rules[i-n],"SHST") == 0 && n == 1
1280
+ && strcmp (phonet_rules[i],"SHT") == 0)))
1281
+ {
1282
+ /**** these are exceptions ****/
1283
+ strcpy (text2, orig2);
1284
+ }
1285
+
1286
+ #ifdef PHONET_GERMAN
1287
+ if (strcmp (text2,orig2) != 0
1288
+ && language == PHONET_GERMAN
1289
+ && ((strncmp (phonet_rules[i-n],"GEGEN",5) == 0 && n == 1
1290
+ && strncmp (phonet_rules[i],"GEGN",4) == 0)
1291
+ || (strcmp (phonet_rules[i-n],"GGF.") == 0 && n == 1
1292
+ && strcmp (phonet_rules[i],"GF.") == 0)
1293
+ || (strcmp (phonet_rules[i-n],"HAVEN7$") == 0 && n == 1
1294
+ && strcmp (phonet_rules[i],"HAFN") == 0)
1295
+ || (strcmp (phonet_rules[i-n],"IEDENSTELLE------") == 0
1296
+ && n == 1 && strcmp (phonet_rules[i],"IDN ") == 0)
1297
+ || (strcmp (phonet_rules[i-n],"INDELERREGE------") == 0
1298
+ && n == 1 && strcmp (phonet_rules[i],"INDL ") == 0)
1299
+ || (strcmp (phonet_rules[i-n],"VAN DEN ^") == 0
1300
+ && n == 1 && strcmp (phonet_rules[i],"FANDN") == 0)))
1301
+ {
1302
+ /**** exceptions in German ****/
1303
+ strcpy (text2, orig2);
1304
+ }
1305
+ #endif
1306
+
1307
+ if (strcmp (text2,orig2) != 0
1308
+ && (s = strchr (orig2,'I')) != NULL)
1309
+ {
1310
+ /**** extra check for replacement strings with an 'I' ****/
1311
+ if (strchr (s+1,'I') != NULL)
1312
+ {
1313
+ /**** take second 'I', if found ****/
1314
+ s = strchr (s+1,'I');
1315
+ }
1316
+ *s = 'J';
1317
+ (void) phonet (orig2,text2, 33,n0);
1318
+ *s = 'I';
1319
+ }
1320
+
1321
+ /**** extra check for search strings with a '-' ****/
1322
+ s = orig;
1323
+ k = 0;
1324
+ while (strchr ("0123456789-<^$",*s) == NULL && *s != '\0')
1325
+ {
1326
+ s++;
1327
+ k++;
1328
+ }
1329
+ while (*s != '\0')
1330
+ {
1331
+ if (*s == '-')
1332
+ {
1333
+ k--;
1334
+ }
1335
+ s++;
1336
+ }
1337
+
1338
+ if (strcmp (text2,orig2) != 0
1339
+ && ((strchr (orig,'-') != NULL && k > 0)
1340
+ || (phonet_rules[i-n][0] == phonet_rules[i-n][1]
1341
+ && phonet_rules[i-n][0] == phonet_rules[i][0])
1342
+ || (strncmp (phonet_rules[i-n],"AI",2) == 0
1343
+ && phonet_rules[i][0] == 'E'
1344
+ && k > 1 && strncmp (s-2,"E$",2) == 0)))
1345
+ {
1346
+ s = orig + k;
1347
+ k = (int) strlen (orig2);
1348
+ if (k > 0)
1349
+ {
1350
+ if (orig2[k-1] == TEST_char)
1351
+ {
1352
+ k--;
1353
+ }
1354
+ strcpy (err_text+1, orig2);
1355
+ strcpy (err_text+1+k, s);
1356
+ k = 1;
1357
+
1358
+ if (phonet_rules[i-n][0] == phonet_rules[i-n][1]
1359
+ && phonet_rules[i-n][0] == phonet_rules[i][0]
1360
+ && phonet_rules[i][1] == '\0')
1361
+ {
1362
+ /**** extra check for double letters ****/
1363
+ err_text[0] = TEST_char;
1364
+ err_text[1] = phonet_rules[i][0];
1365
+ k = 0;
1366
+ }
1367
+ if (phonet_rules[i-n][0] == 'H'
1368
+ && phonet_rules[i-n][1] != '\0'
1369
+ && phonet_rules[i-n][2] == 'H'
1370
+ && phonet_rules[i-n][1] == phonet_rules[i][0]
1371
+ && phonet_rules[i-n][2] == phonet_rules[i][1])
1372
+ {
1373
+ /**** special case "H?H" ****/
1374
+ err_text[0] = TEST_char;
1375
+ err_text[1] = 'H';
1376
+ k = 0;
1377
+ }
1378
+ if (strncmp (phonet_rules[i-n],"LV",2) == 0
1379
+ && strncmp (phonet_rules[i], "LW",2) == 0)
1380
+ {
1381
+ /**** special case "LV*" ****/
1382
+ err_text[3] = 'V';
1383
+ }
1384
+ if (strncmp (phonet_rules[i-n],"AI",2) == 0
1385
+ && phonet_rules[i][0] == 'E')
1386
+ {
1387
+ /**** special case "AI*E$" ****/
1388
+ err_text[0] = TEST_char;
1389
+ err_text[1] = err_text[2];
1390
+ strcpy (err_text+2, phonet_rules[i]);
1391
+ k = 0;
1392
+ }
1393
+
1394
+ (void) phonet (err_text+k, err_text+40, 33,n0);
1395
+
1396
+ if (strcmp (err_text+40, orig2) != 0)
1397
+ {
1398
+ string_prepare (err_text+80, err_text+k,rule);
1399
+ string_prepare (err_text, rule,rule);
1400
+
1401
+ phonet_rules[i-n] = err_text;
1402
+ (void) phonet (err_text+80, err_text+40, 33,n0);
1403
+ phonet_rules[i-n] = rule;
1404
+ }
1405
+ err_text[0] = '\0';
1406
+ if (strcmp (err_text+40, orig2) == 0)
1407
+ {
1408
+ strcpy (text2, orig2);
1409
+ }
1410
+ }
1411
+ }
1412
+
1413
+ phonet_rules[i-n] = r0;
1414
+
1415
+ if (strcmp (text, orig2) != 0
1416
+ || strcmp (text2,orig2) != 0)
1417
+ {
1418
+ orig[0] = '\0';
1419
+ if (*r != ' ')
1420
+ {
1421
+ sprintf (orig," for '%c'", *r);
1422
+ }
1423
+ sprintf (err_text, " result %d%s: \"%s\"%s\"",
1424
+ n,orig, text,text2);
1425
+
1426
+ /**** delete 'TEST_char' from "error" string ****/
1427
+ s = err_text;
1428
+ while (*s != '\0')
1429
+ {
1430
+ while (*s == TEST_char)
1431
+ {
1432
+ strcpy (s,s+1);
1433
+ }
1434
+ s++;
1435
+ }
1436
+
1437
+ /**** output error message ****/
1438
+ s = "Possible error in rule";
1439
+ if (strcmp (text,orig2) != 0)
1440
+ {
1441
+ s = "Error in rule";
1442
+ }
1443
+ trace_info (s, i-(i%3), err_text);
1444
+ errors++;
1445
+ }
1446
+ r++;
1447
+ }
1448
+ }
1449
+ i++;
1450
+ }
1451
+
1452
+ if (i % 3 != 0)
1453
+ {
1454
+ printf ("Error: string count is not a multiple of 3.\n");
1455
+ errors++;
1456
+ }
1457
+ isletter [(unsigned char) TEST_char] = 0;
1458
+ internal_mode = internal_mode & ~CHECK_PHONETIC_RULES;
1459
+
1460
+ printf ("Language \"%s\" (%d phonetic rules):\n", phonet_language, rule_count);
1461
+ printf ("Check of all phonetic rules: ");
1462
+
1463
+ if (errors == 0)
1464
+ {
1465
+ printf ("No syntax error or inconsistency found.\n");
1466
+ }
1467
+ else
1468
+ {
1469
+ printf ("%d errors have been found.\n\n", errors);
1470
+ printf ("Remarks:\n");
1471
+ printf ("a) The correct syntax for search strings is:\n");
1472
+ printf (" <word> [<->..] [<] [<0-9>] [^[^]] [$]\n");
1473
+ printf (" The end of <word> may contain as a simple regular expression\n");
1474
+ printf (" one array of letters that must be enclosed in '(' and ')'.\n");
1475
+ printf ("b) Rules with a '<' demand that the replacement string may not\n");
1476
+ printf (" be longer than the search string.\n");
1477
+ printf ("c) The placement of rules determines their priority.\n");
1478
+ printf (" Therefore, the rules for \"SH\" must be placed before the rules\n");
1479
+ printf (" for \"S\" (otherwise, a conversion error will occur for \"SH\").\n");
1480
+ printf ("d) Another common source of errors is ignorance of dependencies.\n");
1481
+ printf (" For example, in German the replacement string \"NJE\" would be wrong,\n");
1482
+ printf (" because the 'J' is subject to another phonetic rule.\n");
1483
+ }
1484
+
1485
+ return (errors);
1486
+ }
1487
+
1488
+
1489
+
1490
+
1491
+ int main (int argc, char *argv[])
1492
+ {
1493
+ FILE *fr;
1494
+ char *s,text[201];
1495
+ int n=0,i=-1,r=-1;
1496
+
1497
+ if (argc < 2
1498
+ || strcmp (argv[1], "-?") == 0
1499
+ || strcmp (argv[1], "-h") == 0
1500
+ || strcmp (argv[1], "-help") == 0)
1501
+ {
1502
+ printf ("Program for phonetic string conversion (%s).\n", PHONET_VERSION);
1503
+ printf ("\n");
1504
+ printf ("Usage: phonet <orig_string> [ <language> ] [ -trace ]\n");
1505
+ printf (" or : phonet -file <file> <FIRST_RULES | SECOND_RULES> [ <language> ]\n");
1506
+ printf (" or : phonet -check_rules [ <language> ] [ -trace [<rule_no>] ]\n");
1507
+ printf ("\n");
1508
+ printf ("Options:\n");
1509
+ printf ("-file <file> : Phonetically convert the given file.\n");
1510
+ printf ("-check_rules : Check all phonetic rules. If no language is\n");
1511
+ printf (" specified, all rules of all languages are checked.\n");
1512
+ printf ("\n");
1513
+ printf ("-trace : Output trace info. If a rule number is specified\n");
1514
+ printf (" for \"-check_rules\", then only this rule will be\n");
1515
+ printf (" traced.\n\n");
1516
+ printf ("Language may be one of the following numbers:\n");
1517
+
1518
+ for (i=PHONET_FIRST_RULES; i< PHONET_SECOND_RULES; i++)
1519
+ {
1520
+ if (set_phonet_language(i) >= 0)
1521
+ {
1522
+ s = "";
1523
+ if (i == PHONET_DEFAULT_LANGUAGE)
1524
+ {
1525
+ s = " (default language)";
1526
+ }
1527
+ printf (" %2d: %s%s\n", i,phonet_language,s);
1528
+ }
1529
+ }
1530
+ return (1);
1531
+ }
1532
+
1533
+
1534
+ /**** parse arguments ****/
1535
+ if (argc >= 4 && strcmp (argv[1], "-file") == 0)
1536
+ {
1537
+ if (strncmp (argv[3], "FIRST",5) == 0
1538
+ || strncmp (argv[3], "first",5) == 0)
1539
+ {
1540
+ r = PHONET_FIRST_RULES;
1541
+ }
1542
+ else if (strncmp (argv[3], "SECOND",6) == 0
1543
+ || strncmp (argv[3], "second",6) == 0)
1544
+ {
1545
+ r = PHONET_SECOND_RULES;
1546
+ }
1547
+ else
1548
+ {
1549
+ printf ("Warning: rule set not specified; using first rules\n");
1550
+ r = PHONET_FIRST_RULES;
1551
+ }
1552
+
1553
+ i = PHONET_DEFAULT_LANGUAGE;
1554
+ if (argc >= 5)
1555
+ {
1556
+ i = atoi (argv[4]);
1557
+ }
1558
+ if (i < 0 || set_phonet_language(i) < 0)
1559
+ {
1560
+ i = PHONET_DEFAULT_LANGUAGE;
1561
+ }
1562
+ (void) set_phonet_language (i);
1563
+
1564
+ /**** convert file ****/
1565
+ if ((fr = fopen (argv[2],"r")) == NULL)
1566
+ {
1567
+ printf ("Error: could not open source file '%s'\n", argv[2]);
1568
+ return (1);
1569
+ }
1570
+
1571
+ while (! feof (fr))
1572
+ {
1573
+ /**** read data ****/
1574
+ if (fgets (text,200,fr) != NULL)
1575
+ {
1576
+ i = (int) strlen (text);
1577
+ if (i > 0 && text[i-1] == '\n')
1578
+ {
1579
+ /**** important ****/
1580
+ text[i-1] = '\0';
1581
+ i--;
1582
+ }
1583
+ if (i == 0)
1584
+ {
1585
+ continue;
1586
+ }
1587
+
1588
+ phonet (text, text,201, r);
1589
+ printf ("%s\n", text);
1590
+ }
1591
+ }
1592
+
1593
+ fclose (fr);
1594
+ return (0);
1595
+ }
1596
+
1597
+ if (argc >= 3 && argv[2][0] != '\0'
1598
+ && strchr ("0123456789", argv[2][0]) != NULL)
1599
+ {
1600
+ /**** language has been specified ****/
1601
+ i = atoi (argv[2]);
1602
+ if (argc >= 4 && strcmp (argv[3], "-trace") == 0)
1603
+ {
1604
+ if (argc >= 5 && atoi (argv[4]) > 0)
1605
+ {
1606
+ r = atoi (argv[4]);
1607
+ }
1608
+ internal_mode = internal_mode | TRACE_PHONET;
1609
+ }
1610
+ }
1611
+ if (argc >= 3 && strcmp (argv[2], "-trace") == 0)
1612
+ {
1613
+ if (argc >= 4 && atoi (argv[3]) > 0)
1614
+ {
1615
+ r = atoi (argv[3]);
1616
+ }
1617
+ internal_mode = internal_mode | TRACE_PHONET;
1618
+ }
1619
+
1620
+ /**** check_rules ****/
1621
+ if (strcmp (argv[1], "-check_rules") == 0)
1622
+ {
1623
+ if (i >= 0)
1624
+ {
1625
+ n = check_rules (i,r);
1626
+ }
1627
+ else
1628
+ {
1629
+ for (i=PHONET_FIRST_RULES; i< PHONET_SECOND_RULES; i++)
1630
+ {
1631
+ if (set_phonet_language(i) >= 0)
1632
+ {
1633
+ n += check_rules (i,r);
1634
+ printf ("\n\n");
1635
+ }
1636
+ }
1637
+ }
1638
+ return (n);
1639
+ }
1640
+
1641
+ /**** phonet conversion of string "argv[1]" ****/
1642
+ if (i < 0 || set_phonet_language(i) < 0)
1643
+ {
1644
+ i = PHONET_DEFAULT_LANGUAGE;
1645
+ }
1646
+ (void) set_phonet_language (i);
1647
+
1648
+ strcpy (text," ");
1649
+ s = argv[1];
1650
+ if ((int) strlen (s) > 200)
1651
+ {
1652
+ strcpy (text, "(too long; shortened)");
1653
+ s[200] = '\0';
1654
+ }
1655
+ printf ("Original string %s: \"%s\"\n", text, s);
1656
+ printf ("(language = %s)\n\n", phonet_language);
1657
+
1658
+ phonet (s, text,201, PHONET_FIRST_RULES);
1659
+ printf ("Conversion with first rules: \"%s\"\n", text);
1660
+
1661
+ phonet (s, text,201, PHONET_SECOND_RULES);
1662
+ printf ("Conversion with second rules: \"%s\"\n", text);
1663
+
1664
+ return (0);
1665
+ }
1666
+
1667
+ #endif
1668
+
1669
+
1670
+ /************************************************************/
1671
+ /**** end of file "phonet.c" ******************************/
1672
+ /************************************************************/