phonet 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/COPYING ADDED
@@ -0,0 +1,56 @@
1
+ Ruby is copyrighted free software by Yukihiro Matsumoto <matz@netlab.jp>.
2
+ You can redistribute it and/or modify it under either the terms of the GPL
3
+ (see the file GPL), or the conditions below:
4
+
5
+ 1. You may make and give away verbatim copies of the source form of the
6
+ software without restriction, provided that you duplicate all of the
7
+ original copyright notices and associated disclaimers.
8
+
9
+ 2. You may modify your copy of the software in any way, provided that
10
+ you do at least ONE of the following:
11
+
12
+ a) place your modifications in the Public Domain or otherwise
13
+ make them Freely Available, such as by posting said
14
+ modifications to Usenet or an equivalent medium, or by allowing
15
+ the author to include your modifications in the software.
16
+
17
+ b) use the modified software only within your corporation or
18
+ organization.
19
+
20
+ c) give non-standard binaries non-standard names, with
21
+ instructions on where to get the original software distribution.
22
+
23
+ d) make other distribution arrangements with the author.
24
+
25
+ 3. You may distribute the software in object code or binary form,
26
+ provided that you do at least ONE of the following:
27
+
28
+ a) distribute the binaries and library files of the software,
29
+ together with instructions (in the manual page or equivalent)
30
+ on where to get the original distribution.
31
+
32
+ b) accompany the distribution with the machine-readable source of
33
+ the software.
34
+
35
+ c) give non-standard binaries non-standard names, with
36
+ instructions on where to get the original software distribution.
37
+
38
+ d) make other distribution arrangements with the author.
39
+
40
+ 4. You may modify and include the part of the software into any other
41
+ software (possibly commercial). But some files in the distribution
42
+ are not written by the author, so that they are not under these terms.
43
+
44
+ For the list of those files and their copying conditions, see the
45
+ file LEGAL.
46
+
47
+ 5. The scripts and library files supplied as input to or produced as
48
+ output from the software do not automatically fall under the
49
+ copyright of the software, but belong to whomever generated them,
50
+ and may be sold commercially, and may be aggregated with this
51
+ software.
52
+
53
+ 6. THIS SOFTWARE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR
54
+ IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
55
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
56
+ PURPOSE.
@@ -0,0 +1,51 @@
1
+ $BK\%W%m%0%i%`$O%U%j!<%=%U%H%&%'%"$G$9!%(BGPL(the GNU General
2
+ Public License)$B$^$?$O0J2<$K<($9>r7o$GK\%W%m%0%i%`$r:FG[I[$G(B
3
+ $B$-$^$9!%(BGPL$B$K$D$$$F$O(BGPL$B%U%!%$%k$r;2>H$7$F2<$5$$!%(B
4
+
5
+ 1. $BJ#@=$O@)8B$J$/<+M3$G$9!%(B
6
+
7
+ 2. $B0J2<$N>r7o$N$$$:$l$+$rK~$?$9;~$KK\%W%m%0%i%`$N%=!<%9$r(B
8
+ $B<+M3$KJQ99$G$-$^$9!%(B
9
+
10
+ (a) $B%M%C%H%K%e!<%:$K%]%9%H$7$?$j!$:n<T$KJQ99$rAwIU$9$k(B
11
+ $B$J$I$NJ}K!$G!$JQ99$r8x3+$9$k!%(B
12
+
13
+ (b) $BJQ99$7$?K\%W%m%0%i%`$r<+J,$N=jB0$9$kAH?%FbIt$@$1$G(B
14
+ $B;H$&!%(B
15
+
16
+ (c) $BJQ99E@$rL@<($7$?$&$(!$%=%U%H%&%'%"$NL>A0$rJQ99$9$k!%(B
17
+ $B$=$N%=%U%H%&%'%"$rG[I[$9$k;~$K$OJQ99A0$NK\%W%m%0%i(B
18
+ $B%`$bF1;~$KG[I[$9$k!%$^$?$OJQ99A0$NK\%W%m%0%i%`$N%=!<(B
19
+ $B%9$NF~<jK!$rL@<($9$k!%(B
20
+
21
+ (d) $B$=$NB>$NJQ99>r7o$r:n<T$H9g0U$9$k!%(B
22
+
23
+ 3. $B0J2<$N>r7o$N$$$:$l$+$rK~$?$9;~$KK\%W%m%0%i%`$r%3%s%Q%$(B
24
+ $B%k$7$?%*%V%8%'%/%H%3!<%I$d<B9T7A<0$G$bG[I[$G$-$^$9!%(B
25
+
26
+ (a) $B%P%$%J%j$r<u$1<h$C$??M$,%=!<%9$rF~<j$G$-$k$h$&$K!$(B
27
+ $B%=!<%9$NF~<jK!$rL@<($9$k!%(B
28
+
29
+ (b) $B5!3#2DFI$J%=!<%9%3!<%I$rE:IU$9$k!%(B
30
+
31
+ (c) $BJQ99$r9T$C$?%P%$%J%j$OL>A0$rJQ99$7$?$&$(!$%*%j%8%J(B
32
+ $B%k$N%=!<%9%3!<%I$NF~<jK!$rL@<($9$k!%(B
33
+
34
+ (d) $B$=$NB>$NG[I[>r7o$r:n<T$H9g0U$9$k!%(B
35
+
36
+ 4. $BB>$N%W%m%0%i%`$X$N0zMQ$O$$$+$J$kL\E*$G$"$l<+M3$G$9!%$?(B
37
+ $B$@$7!$K\%W%m%0%i%`$K4^$^$l$kB>$N:n<T$K$h$k%3!<%I$O!$$=(B
38
+ $B$l$>$l$N:n<T$N0U8~$K$h$k@)8B$,2C$($i$l$k>l9g$,$"$j$^$9!%(B
39
+
40
+ $B$=$l$i%U%!%$%k$N0lMw$H$=$l$>$l$NG[I[>r7o$J$I$KIU$$$F$O(B
41
+ LEGAL$B%U%!%$%k$r;2>H$7$F$/$@$5$$!%(B
42
+
43
+ 5. $BK\%W%m%0%i%`$X$NF~NO$H$J$k%9%/%j%W%H$*$h$S!$K\%W%m%0%i(B
44
+ $B%`$+$i$N=PNO$N8"Mx$OK\%W%m%0%i%`$N:n<T$G$O$J$/!$$=$l$>(B
45
+ $B$l$NF~=PNO$r@8@.$7$??M$KB0$7$^$9!%$^$?!$K\%W%m%0%i%`$K(B
46
+ $BAH$_9~$^$l$k$?$a$N3HD%%i%$%V%i%j$K$D$$$F$bF1MM$G$9!%(B
47
+
48
+ 6. $BK\%W%m%0%i%`$OL5J]>Z$G$9!%:n<T$OK\%W%m%0%i%`$r%5%]!<%H(B
49
+ $B$9$k0U;V$O$"$j$^$9$,!$%W%m%0%i%`<+?H$N%P%0$"$k$$$OK\%W(B
50
+ $B%m%0%i%`$N<B9T$J$I$+$iH/@8$9$k$$$+$J$kB;32$KBP$7$F$b@U(B
51
+ $BG$$r;}$A$^$;$s!%(B
@@ -0,0 +1,12 @@
1
+ === 2.8.1 / 2009-08-21
2
+
3
+ * New features:
4
+ * Early support for Ruby 1.9
5
+
6
+ * Enhancements:
7
+ * Improved gem release process using Hoe as development dependency
8
+ * Implemented support for cross compilation
9
+ * Binary version built against MySQL 5.0.83
10
+
11
+ * Bug fixes:
12
+ * Improved detection of mysql and it's configuration (mysql_config)
@@ -0,0 +1,16 @@
1
+ COPYING
2
+ COPYING.ja
3
+ History.txt
4
+ Manifest.txt
5
+ README.txt
6
+ Rakefile
7
+ ext/mysql_api/extconf.rb
8
+ ext/mysql_api/mysql.c
9
+ extra/README.html
10
+ extra/README_ja.html
11
+ extra/tommy.css
12
+ lib/mysql.rb
13
+ tasks/gem.rake
14
+ tasks/native.rake
15
+ tasks/vendor_mysql.rake
16
+ test/test_mysql.rb
@@ -0,0 +1,23 @@
1
+ = MySQL/Ruby Interface
2
+
3
+ * http://mysql-win.rubyforge.org
4
+ * http://rubyforge.org/projects/mysql-win
5
+ * http://github.com/luislaven/mysql-gem
6
+
7
+ == DESCRIPTION
8
+
9
+ This is the MySQL API module for Ruby. It provides the same functions for Ruby
10
+ programs that the MySQL C API provides for C programs.
11
+
12
+ This is a conversion of tmtm's original extension into a proper RubyGems.
13
+
14
+ === Warning about incompatible MySQL versions
15
+
16
+ Mixing MySQL versions will generate segmentation faults.
17
+
18
+ Running the binary version of this gem against a different version of MySQL
19
+ shared library <tt>libMySQL.dll</tt> will generate segmentation faults and
20
+ terminate your application.
21
+
22
+ Please use the exact same MAJOR.MINOR version of MySQL, see History.txt for
23
+ specific version of MySQL used to build the binaries.
@@ -0,0 +1,22 @@
1
+ #--
2
+ # Copyright (c) 2008 Luis Lavena
3
+ #
4
+ # This source code is released under the MIT License.
5
+ # See LICENSE file for details
6
+ #++
7
+
8
+ #
9
+ # NOTE: Keep this file clean.
10
+ # Add your customizations inside tasks directory.
11
+ # Thank You.
12
+ #
13
+
14
+ begin
15
+ require 'rake'
16
+ rescue LoadError
17
+ require 'rubygems'
18
+ require 'rake'
19
+ end
20
+
21
+ # load rakefile extensions (tasks)
22
+ Dir['tasks/*.rake'].sort.each { |f| load f }
@@ -0,0 +1,3 @@
1
+ require 'mkmf'
2
+ create_makefile("phonet_api")
3
+
@@ -0,0 +1,82 @@
1
+ /*
2
+ * ph_ext.h
3
+ * --------
4
+ *
5
+ * Constants and prototypes for "phonet.c".
6
+ *
7
+ * Copyright (c):
8
+ * 1999-2007: Joerg MICHAEL, Adalbert-Stifter-Str. 11, 30655 Hannover, Germany
9
+ *
10
+ * SCCS: @(#) ph_ext.h 1.4.2 2007-08-27
11
+ *
12
+ * This program is subject to the GNU Lesser General Public License (LGPL),
13
+ * (formerly known as GNU Library General Public Licence)
14
+ * as published by the Free Software Foundation; either version 2 of the
15
+ * License, or (at your option) any later version.
16
+ * This program is distributed in the hope that it will be useful,
17
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
18
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
19
+ *
20
+ * You should have received a copy of the GNU Library General Public License
21
+ * along with this program; if not, write to the
22
+ * Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23
+ *
24
+ * Actually, the LGPL is __less__ restrictive than the better known GNU General
25
+ * Public License (GPL). See the GNU Library General Public License or the file
26
+ * LIB_GPLP.TXT for more details and for a DISCLAIMER OF ALL WARRANTIES.
27
+ *
28
+ * There is one important restriction: If you modify this program in any way
29
+ * (e.g. add or change phonetic rules or modify the underlying logic or
30
+ * translate this program into another programming language), you must also
31
+ * release the changes under the terms of the LGPL.
32
+ * That means you have to give out the source code to your changes,
33
+ * and a very good way to do so is mailing them to the address given below.
34
+ * I think this is the best way to promote further development and use
35
+ * of this software.
36
+ *
37
+ * If you have any remarks, feel free to e-mail to:
38
+ * ct@ct.heise.de
39
+ *
40
+ * The author's email address is:
41
+ * astro.joerg@googlemail.com
42
+ */
43
+
44
+
45
+ #ifndef _PH_EXT_H_
46
+ #define _PH_EXT_H_
47
+
48
+ /**** If you want to use "phonet.c" as a library, ****/
49
+ /**** delete the following macro (Note: This will ****/
50
+ /**** also disable the function "check_rules"). ****/
51
+ /* #define PHONET_EXECUTABLE */
52
+
53
+
54
+ /**** constants for function "phonet" (do not change) ****/
55
+ #define PHONET_FIRST_RULES 0
56
+ #define PHONET_SECOND_RULES 1024
57
+
58
+ /**** If you don't want rules for one or more of the ****/
59
+ /**** following languages, delete the corresponding macro ****/
60
+ #define PHONET_NO_LANGUAGE 1
61
+ #define PHONET_GERMAN 2
62
+ /**** (under construction:) #define PHONET_ENGLISH 2 ****/
63
+
64
+
65
+ /**** the following macro _must_ be defined ****/
66
+ #define PHONET_DEFAULT_LANGUAGE PHONET_GERMAN
67
+
68
+
69
+
70
+ /************************************************************/
71
+ /**** function prototypes *********************************/
72
+ /************************************************************/
73
+
74
+ int phonet (char src[], char dest[], int len, int mode_language);
75
+ int check_rules (int language, int trace_rule);
76
+
77
+
78
+ #endif
79
+
80
+ /************************************************************/
81
+ /**** end of file "ph_ext.h" ******************************/
82
+ /************************************************************/
@@ -0,0 +1,1672 @@
1
+ /*
2
+ * phonet.c
3
+ * --------
4
+ *
5
+ * Program for phonetic string conversion.
6
+ *
7
+ * Copyright (c):
8
+ * 1999-2007: Joerg MICHAEL, Adalbert-Stifter-Str. 11, 30655 Hannover, Germany
9
+ * and
10
+ * (version 1.0) 1999: Heise Verlag, Helstorfer Str. 7, 30625 Hannover, Germany
11
+ *
12
+ * SCCS: @(#) phonet.c 1.4.2 2007-08-27
13
+ *
14
+ * This program is subject to the GNU Lesser General Public License (LGPL)
15
+ * (formerly known as GNU Library General Public Licence)
16
+ * as published by the Free Software Foundation; either version 2 of the
17
+ * License, or (at your option) any later version.
18
+ * This program is distributed in the hope that it will be useful,
19
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
20
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
21
+ *
22
+ * You should have received a copy of the GNU Library General Public License
23
+ * along with this program; if not, write to the
24
+ * Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25
+ *
26
+ * Actually, the LGPL is __less__ restrictive than the better known GNU General
27
+ * Public License (GPL). See the GNU Library General Public License or the file
28
+ * LIB_GPLP.TXT for more details and for a DISCLAIMER OF ALL WARRANTIES.
29
+ *
30
+ * There is one important restriction: If you modify this program in any way
31
+ * (e.g. add or change phonetic rules or modify the underlying logic or
32
+ * translate this program into another programming language), you must also
33
+ * release the changes under the terms of the LGPL.
34
+ * That means you have to give out the source code to your changes,
35
+ * and a very good way to do so is mailing them to the address given below.
36
+ * I think this is the best way to promote further development and use
37
+ * of this software.
38
+ *
39
+ * If you have any remarks, feel free to e-mail to:
40
+ * ct@ct.heise.de
41
+ *
42
+ * The author's email address is:
43
+ * astro.joerg@googlemail.com
44
+ */
45
+
46
+
47
+ #include <stdio.h>
48
+ #include <stdlib.h>
49
+ #include <string.h>
50
+
51
+ #include "umlaut_p.h"
52
+ #include "ph_ext.h"
53
+ #include "phonet.h"
54
+
55
+
56
+ #define TEST_char '\004'
57
+
58
+ /**** Macros for "phonet_init" and "internal_mode": ****/
59
+ #define PHONET_INITIALIZED 1
60
+ #define CHECK_PHONETIC_RULES 2
61
+ #define TRACE_PHONET 4
62
+
63
+
64
+ static int internal_mode = 0;
65
+ static int last_rule_set = -PHONET_SECOND_RULES;
66
+ static int alpha_pos[HASH_COUNT];
67
+ static int isletter[HASH_COUNT];
68
+ static char upperchar[HASH_COUNT];
69
+
70
+
71
+
72
+ /************************************************************/
73
+ /**** private (static) functions **************************/
74
+ /************************************************************/
75
+
76
+
77
+ static int initialize_phonet (void)
78
+ /**** language dependant initializations ****/
79
+ /**** resut: 0 : success ****/
80
+ /**** -1 : an error occured ****/
81
+ {
82
+ int i,k,n,*p;
83
+ int *p_hash1,*p_hash2;
84
+ char *s,*s2;
85
+ char temp[2];
86
+
87
+ if (! (internal_mode & PHONET_INITIALIZED))
88
+ {
89
+ if ((int)strlen (letters_a_to_z) > 26)
90
+ {
91
+ if (internal_mode & TRACE_PHONET)
92
+ {
93
+ printf ("Error: %s is not allowed\n",
94
+ "strlen (letters_a_to_z) > 26");
95
+ }
96
+ return (-1);
97
+ }
98
+ if ((int)strlen (letters_a_to_z) != (int)strlen (letters_A_to_Z))
99
+ {
100
+ if (internal_mode & TRACE_PHONET)
101
+ {
102
+ printf ("Error: %s is not allowed\n",
103
+ "strlen(letters_a_to_z) != strlen(letters_a_to_z)");
104
+ }
105
+ return (-1);
106
+ }
107
+ if ((int)strlen (umlaut_lower) != (int)strlen (umlaut_upper))
108
+ {
109
+ if (internal_mode & TRACE_PHONET)
110
+ {
111
+ printf ("Error: %s is not allowed\n",
112
+ "strlen(umlaut_lower) != strlen(umlaut_upper)");
113
+ }
114
+ return (-1);
115
+ }
116
+
117
+ internal_mode = internal_mode | PHONET_INITIALIZED;
118
+
119
+ /**** generate arrays "alpha_pos", "upperchar" and "isletter" ****/
120
+ for (i=0; i< HASH_COUNT; i++)
121
+ {
122
+ alpha_pos[i] = 0;
123
+ isletter[i] = 0;
124
+ upperchar[i] = (char) i;
125
+ }
126
+
127
+ for (k=-1; k<1; k++)
128
+ {
129
+ if (k == -1)
130
+ {
131
+ /**** German and international umlauts ****/
132
+ s = umlaut_lower;
133
+ s2 = umlaut_upper;
134
+ p = &k;
135
+ }
136
+ else
137
+ {
138
+ /**** "normal" letters ('a'-'z' and 'A'-'Z') ****/
139
+ s = letters_a_to_z;
140
+ s2 = letters_A_to_Z;
141
+ p = &i;
142
+ }
143
+
144
+ for (i=0; *(s+i) != '\0'; i++)
145
+ {
146
+ n = (unsigned char) *(s2+i); /** "s2" **/
147
+ alpha_pos[n] = *p + 2;
148
+ isletter[n] = 2;
149
+ upperchar[n] = *(s2+i);
150
+
151
+ n = (unsigned char) *(s+i); /** "s" **/
152
+ alpha_pos[n] = *p + 2;
153
+ isletter[n] = 1;
154
+ upperchar[n] = *(s2+i);
155
+ }
156
+ }
157
+ }
158
+
159
+ if (phonet_init == NULL || phonet_hash == NULL || phonet_rules == NULL)
160
+ {
161
+ return (-1);
162
+ }
163
+
164
+ if (! (*phonet_init & PHONET_INITIALIZED))
165
+ {
166
+ *phonet_init = *phonet_init | PHONET_INITIALIZED;
167
+
168
+ for (i=0; i< HASH_COUNT; i++)
169
+ {
170
+ phonet_hash[i] = -1;
171
+ }
172
+
173
+ for (i=0; i<26; i++)
174
+ {
175
+ p_hash1 = (* phonet_hash_1) [i];
176
+ p_hash2 = (* phonet_hash_2) [i];
177
+
178
+ for (k=0; k<28; k++)
179
+ {
180
+ p_hash1[k] = -1;
181
+ p_hash2[k] = -1;
182
+ }
183
+ }
184
+
185
+ for (i=0; phonet_rules[i] != PHONET_END; i += 3)
186
+ {
187
+ if ((s=phonet_rules[i]) != NULL)
188
+ {
189
+ /**** calculate first hash value ****/
190
+ k = (unsigned char) *s;
191
+
192
+ if (phonet_hash[k] < 0
193
+ && (phonet_rules[i+1] != NULL || phonet_rules[i+2] != NULL))
194
+ {
195
+ phonet_hash[k] = i;
196
+ }
197
+
198
+ /**** calculate second hash values ****/
199
+ if (k != 0 && alpha_pos[k] >= 2)
200
+ {
201
+ k = alpha_pos[k];
202
+ p_hash1 = (* phonet_hash_1) [k-2];
203
+ p_hash2 = (* phonet_hash_2) [k-2];
204
+ s++;
205
+
206
+ if (*s == '(')
207
+ {
208
+ s++;
209
+ }
210
+ else if (*s == '\0')
211
+ {
212
+ s = (char *) " ";
213
+ }
214
+ else
215
+ {
216
+ sprintf (temp, "%c", *s);
217
+ s = temp;
218
+ }
219
+
220
+ while (*s != '\0' && (unsigned char) *s != ')')
221
+ {
222
+ k = alpha_pos [(unsigned char) *s];
223
+
224
+ if (k > 0)
225
+ {
226
+ /**** add hash value for this letter ****/
227
+ if (p_hash1[k] < 0)
228
+ {
229
+ p_hash1[k] = i;
230
+ p_hash2[k] = i;
231
+ }
232
+
233
+ if (p_hash2[k] >= i - 30)
234
+ {
235
+ p_hash2[k] = i;
236
+ }
237
+ else
238
+ {
239
+ k = -1;
240
+ }
241
+ }
242
+
243
+ if (k <= 0)
244
+ {
245
+ /**** add hash value for all letters ****/
246
+ if (p_hash1[0] < 0)
247
+ {
248
+ p_hash1[0] = i;
249
+ }
250
+ p_hash2[0] = i;
251
+ }
252
+ s++;
253
+ }
254
+ }
255
+ }
256
+ }
257
+ }
258
+
259
+ return (0);
260
+ }
261
+
262
+
263
+
264
+ static void trace_info (char text[], int n, char err_text[])
265
+ /**** output trace info ****/
266
+ {
267
+ char *s,*s2,*s3;
268
+ s = (phonet_rules[n] == NULL) ? (char *) "(NULL)" : phonet_rules[n];
269
+ s2 = (phonet_rules[n+1] == NULL) ? (char *) "(NULL)" : phonet_rules[n+1];
270
+ s3 = (phonet_rules[n+2] == NULL) ? (char *) "(NULL)" : phonet_rules[n+2];
271
+
272
+ printf ("%s %d: \"%s\"%s\"%s\" %s\n", text, ((n/3)+1), s,s2,s3, err_text);
273
+ }
274
+
275
+
276
+
277
+
278
+ int phonet (char src[], char dest[], int len, int mode_language)
279
+
280
+ /**** Function for phonetic conversions ****/
281
+ /**** ("dest" == "src" is allowed). ****/
282
+ /**** "len" = max. length of "dest" incl. '\0'. ****/
283
+ /**** mode_language = <language> + PHONET_FIRST_RULES : ****/
284
+ /**** Use <language> and first rules ****/
285
+ /**** mode_language = <language> + PHONET_SECOND_RULES : ****/
286
+ /**** Use <language> and second rules ****/
287
+ /**** result: >= 0 : string length of "dest" ****/
288
+ /**** < 0 : an error occured ****/
289
+ {
290
+ int i,j,k,ml,n,p,z;
291
+ int k0,n0,p0,z0;
292
+ int start1,end1,start2,end2;
293
+ int start3,end3,start4,end4;
294
+ int *p_hash1,*p_hash2;
295
+ char c,c0,*s;
296
+ char *src_2,text[51];
297
+
298
+ if (dest == NULL || src == NULL || len <= 0)
299
+ {
300
+ /**** wrong arg's ****/
301
+ if (internal_mode & TRACE_PHONET)
302
+ {
303
+ printf ("Error: wrong arguments.\n");
304
+ }
305
+ return (-1);
306
+ }
307
+
308
+ /**** select language ****/
309
+ i = 0;
310
+ k = mode_language & ~PHONET_SECOND_RULES;
311
+ if (k != last_rule_set)
312
+ {
313
+ i = set_phonet_language (k);
314
+ last_rule_set = k;
315
+ }
316
+ if (i < 0)
317
+ {
318
+ s = "Notice: language not set, use current language";
319
+ i = 0;
320
+
321
+ if (phonet_init == NULL
322
+ || phonet_hash == NULL || phonet_rules == NULL)
323
+ {
324
+ i = set_phonet_language (PHONET_DEFAULT_LANGUAGE);
325
+ s = "Notice: language not set, use default language";
326
+
327
+ if (i < 0)
328
+ {
329
+ s = "Error: language not set; default language could not be set";
330
+ }
331
+ }
332
+
333
+ if (internal_mode & TRACE_PHONET)
334
+ {
335
+ if (i >= 0)
336
+ {
337
+ printf ("%s (%s).\n", s, phonet_language);
338
+ }
339
+ else
340
+ {
341
+ printf ("%s.\n", s);
342
+ }
343
+ }
344
+
345
+ if (phonet_init == NULL
346
+ || phonet_hash == NULL || phonet_rules == NULL)
347
+ {
348
+ strcpy (dest,"");
349
+ return (-2);
350
+ }
351
+ }
352
+
353
+ if (phonet_init == NULL || ! (*phonet_init & PHONET_INITIALIZED)
354
+ || phonet_hash == NULL || phonet_rules == NULL
355
+ || ! (internal_mode & PHONET_INITIALIZED))
356
+ {
357
+ /**** initialization (must be done ****/
358
+ /**** BEFORE converting "src" to upper char) ****/
359
+ i = initialize_phonet();
360
+ if (i < 0)
361
+ {
362
+ if (internal_mode & TRACE_PHONET)
363
+ {
364
+ printf ("Error: initialization failed\n");
365
+ }
366
+ strcpy (dest,"");
367
+ return (-3);
368
+ }
369
+ }
370
+
371
+ src_2 = text;
372
+ i = (int) strlen (src);
373
+ if (i > 50)
374
+ {
375
+ /**** "oversized" string ****/
376
+ src_2 = (char *) malloc ((size_t) (i+1));
377
+ if (src_2 == NULL)
378
+ {
379
+ /**** "malloc" failed ****/
380
+ if (internal_mode & TRACE_PHONET)
381
+ {
382
+ printf ("Error: \"malloc\" for %d Bytes failed.\n", i+1);
383
+ }
384
+ strcpy (dest,"");
385
+ return (-4);
386
+ }
387
+ }
388
+
389
+ /**** "strcpy" plus conversion to upper char ****/
390
+ i = 0;
391
+ while ((c=src[i]) != '\0')
392
+ {
393
+ src_2[i] = upperchar [(unsigned char) c];
394
+ i++;
395
+ }
396
+ src_2[i] = '\0';
397
+ src = src_2;
398
+
399
+ if (mode_language & PHONET_SECOND_RULES)
400
+ {
401
+ ml = 2;
402
+ s = "second";
403
+ }
404
+ else
405
+ {
406
+ ml = 1;
407
+ s = "first";
408
+ }
409
+ if (internal_mode & TRACE_PHONET)
410
+ {
411
+ printf ("\n\nphonetic conversion for : \"%s\"\n", src_2);
412
+ printf ("(%s rules)\n", s);
413
+ }
414
+
415
+ /**** check "src" ****/
416
+ i = 0;
417
+ j = 0;
418
+ z = 0;
419
+ while ((c = src[i]) != '\0')
420
+ {
421
+ if (internal_mode & TRACE_PHONET)
422
+ {
423
+ printf ("\ncheck position %d: src = \"%s\",", j, src+i);
424
+ printf (" dest = \"%.*s\"\n", j, dest);
425
+ }
426
+
427
+ n = alpha_pos [(unsigned char) c];
428
+ if (n >= 2)
429
+ {
430
+ p_hash1 = (* phonet_hash_1) [n-2];
431
+ p_hash2 = (* phonet_hash_2) [n-2];
432
+ n = alpha_pos [(unsigned char) src[i+1]];
433
+ start1 = p_hash1 [n];
434
+ start2 = p_hash1 [0];
435
+ end1 = p_hash2 [n];
436
+ end2 = p_hash2 [0];
437
+
438
+ /**** preserve rule priorities ****/
439
+ if (start2 >= 0
440
+ && (start1 < 0 || start2 < start1))
441
+ {
442
+ n = start1; start1 = start2; start2 = n;
443
+ n = end1; end1 = end2; end2 = n;
444
+ }
445
+
446
+ if (end1 >= start2 && start2 >= 0)
447
+ {
448
+ if (end2 > end1)
449
+ {
450
+ end1 = end2;
451
+ }
452
+ start2 = -1;
453
+ end2 = -1;
454
+ }
455
+ }
456
+ else
457
+ {
458
+ n = phonet_hash [(unsigned char) c];
459
+ start1 = n;
460
+ end1 = 10000;
461
+ start2 = -1;
462
+ end2 = -1;
463
+ }
464
+
465
+ n = start1;
466
+ z0 = 0;
467
+
468
+ if (n >= 0)
469
+ {
470
+ /**** check rules for this char ****/
471
+ while (phonet_rules[n] == NULL || phonet_rules[n][0] == c)
472
+ {
473
+ if (n > end1)
474
+ {
475
+ if (start2 > 0)
476
+ {
477
+ n = start2;
478
+ start1 = start2; start2 = -1;
479
+ end1 = end2; end2 = -1;
480
+ continue;
481
+ }
482
+ break;
483
+ }
484
+
485
+ if (phonet_rules [n] == NULL || phonet_rules [n+ml] == NULL)
486
+ {
487
+ /**** no conversion rule available ****/
488
+ n += 3;
489
+ continue;
490
+ }
491
+ if (internal_mode & TRACE_PHONET)
492
+ {
493
+ trace_info ("> rule no.", n, "is being checked");
494
+ }
495
+
496
+ /**** check whole string ****/
497
+ k = 1; /**** no. of matching letters ****/
498
+ p = 5; /**** default priority ****/
499
+ s = phonet_rules[n];
500
+ s++; /**** needed by "*(s-1)" below ****/
501
+
502
+ while (src[i+k] == *s && *s != '\0'
503
+ && strchr ("0123456789(-<^$", *s) == NULL)
504
+ {
505
+ k++;
506
+ s++;
507
+ }
508
+ if (internal_mode & CHECK_PHONETIC_RULES)
509
+ {
510
+ /**** we do "CHECK_PHONETIC_RULES" ****/
511
+ while (*s != '\0' && src[i+k] == *s)
512
+ {
513
+ k++;
514
+ s++;
515
+ }
516
+ }
517
+ if (*s == '(')
518
+ {
519
+ /**** check an array of letters ****/
520
+ if (isletter [(unsigned char) src[i+k]]
521
+ && strchr (s+1, src[i+k]) != NULL)
522
+ {
523
+ k++;
524
+ while (*s != '\0' && *s != ')')
525
+ {
526
+ s++;
527
+ }
528
+ if (*s == ')')
529
+ {
530
+ s++;
531
+ }
532
+ }
533
+ }
534
+ p0 = (int) *s;
535
+ k0 = k;
536
+ while (*s == '-' && k > 1)
537
+ {
538
+ k--;
539
+ s++;
540
+ }
541
+ if (*s == '<')
542
+ {
543
+ s++;
544
+ }
545
+ if (strchr ("0123456789",*s) != NULL && *s != '\0')
546
+ {
547
+ /**** read priority ****/
548
+ p = *s - '0';
549
+ s++;
550
+ }
551
+ if (*s == '^' && *(s+1) == '^')
552
+ {
553
+ s++;
554
+ if ((internal_mode & CHECK_PHONETIC_RULES)
555
+ && ! isletter [(unsigned char) src[i+k0]])
556
+ {
557
+ /**** we do "CHECK_PHONETIC_RULES" ****/
558
+ s = s-2;
559
+ }
560
+ }
561
+
562
+ if (*s == '\0'
563
+ || (*s == '^' && (i == 0 || ! isletter [(unsigned char)src[i-1]])
564
+ && (*(s+1) != '$'
565
+ || (! isletter [(unsigned char) src[i+k0]] && src[i+k0] != '.')))
566
+ || (*s == '$' && i > 0 && isletter [(unsigned char) src[i-1]]
567
+ && (! isletter [(unsigned char) src[i+k0]] && src[i+k0] != '.')))
568
+ {
569
+ /**** look for continuation, if: ****/
570
+ /**** k > 1 and NO '-' in first string ****/
571
+ n0 = -1;
572
+
573
+ if (k > 1 && src[i+k] != '\0' && p0 != (int) '-')
574
+ {
575
+ c0 = src [i+k-1];
576
+ n0 = alpha_pos [(unsigned char) c0];
577
+
578
+ if (n0 >= 2 && src[i+k] != '\0')
579
+ {
580
+ p_hash1 = (* phonet_hash_1) [n0-2];
581
+ p_hash2 = (* phonet_hash_2) [n0-2];
582
+ n0 = alpha_pos [(unsigned char) src[i+k]];
583
+ start3 = p_hash1 [n0];
584
+ start4 = p_hash1 [0];
585
+ end3 = p_hash2 [n0];
586
+ end4 = p_hash2 [0];
587
+
588
+ /**** preserve rule priorities ****/
589
+ if (start4 >= 0
590
+ && (start3 < 0 || start4 < start3))
591
+ {
592
+ n0 = start3; start3 = start4; start4 = n0;
593
+ n0 = end3; end3 = end4; end4 = n0;
594
+ }
595
+
596
+ if (end3 >= start4 && start4 >= 0)
597
+ {
598
+ if (end4 > end3)
599
+ {
600
+ end3 = end4;
601
+ }
602
+ start4 = -1;
603
+ end4 = -1;
604
+ }
605
+ }
606
+ else
607
+ {
608
+ n0 = phonet_hash [(unsigned char) c0];
609
+ start3 = n0;
610
+ end3 = 10000;
611
+ start4 = -1;
612
+ end4 = -1;
613
+ }
614
+
615
+ n0 = start3;
616
+ }
617
+
618
+ if (n0 >= 0)
619
+ {
620
+ /**** check continuation rules for "src[i+k]" ****/
621
+ while (phonet_rules[n0] == NULL
622
+ || phonet_rules[n0][0] == c0)
623
+ {
624
+ if (n0 > end3)
625
+ {
626
+ if (start4 > 0)
627
+ {
628
+ n0 = start4;
629
+ start3 = start4; start4 = -1;
630
+ end3 = end4; end4 = -1;
631
+ continue;
632
+ }
633
+ p0 = -1; /**** important ****/
634
+ break;
635
+ }
636
+
637
+ if (phonet_rules [n0] == NULL
638
+ || phonet_rules [n0+ml] == NULL)
639
+ {
640
+ /**** no conversion rule available ****/
641
+ n0 += 3;
642
+ continue;
643
+ }
644
+ if (internal_mode & TRACE_PHONET)
645
+ {
646
+ trace_info ("> > continuation rule no.",
647
+ n0, "is being checked");
648
+ }
649
+
650
+ /**** check whole string ****/
651
+ k0 = k;
652
+ p0 = 5;
653
+ s = phonet_rules[n0];
654
+ s++;
655
+ while (src[i+k0] == *s && *s != '\0'
656
+ && strchr("0123456789(-<^$", *s) == NULL)
657
+ {
658
+ k0++;
659
+ s++;
660
+ }
661
+ if (*s == '(')
662
+ {
663
+ /**** check an array of letters ****/
664
+ if (isletter [(unsigned char) src[i+k0]]
665
+ && strchr (s+1, src[i+k0]) != NULL)
666
+ {
667
+ k0++;
668
+ while (*s != '\0' && *s != ')')
669
+ {
670
+ s++;
671
+ }
672
+ if (*s == ')')
673
+ {
674
+ s++;
675
+ }
676
+ }
677
+ }
678
+ while (*s == '-')
679
+ {
680
+ /**** "k0" is NOT decremented ****/
681
+ /**** because of "if (k0 == k)" ****/
682
+ s++;
683
+ }
684
+ if (*s == '<')
685
+ {
686
+ s++;
687
+ }
688
+ if (strchr ("0123456789",*s) != NULL && *s != '\0')
689
+ {
690
+ p0 = *s - '0';
691
+ s++;
692
+ }
693
+
694
+ if (*s == '\0'
695
+ /**** *s == '^' is not possible here ****/
696
+ || (*s == '$' && ! isletter [(unsigned char) src[i+k0]]
697
+ && src[i+k0] != '.'))
698
+ {
699
+ if (k0 == k)
700
+ {
701
+ /**** this is only a partial string ****/
702
+ if (internal_mode & TRACE_PHONET)
703
+ {
704
+ trace_info ("> > continuation rule no.",
705
+ n0, "not used (too short)");
706
+ }
707
+ n0 += 3;
708
+ continue;
709
+ }
710
+
711
+ if (p0 < p)
712
+ {
713
+ /**** priority is too low ****/
714
+ if (internal_mode & TRACE_PHONET)
715
+ {
716
+ trace_info ("> > continuation rule no.",
717
+ n0, "not used (priority)");
718
+ }
719
+ n0 += 3;
720
+ continue;
721
+ }
722
+
723
+ /**** continuation rule found ****/
724
+ break;
725
+ }
726
+
727
+ if (internal_mode & TRACE_PHONET)
728
+ {
729
+ trace_info ("> > continuation rule no.",
730
+ n0, "not used");
731
+ }
732
+ n0 += 3;
733
+ } /**** end of "while" ****/
734
+
735
+ if (p0 >= p
736
+ && (phonet_rules[n0] != NULL && phonet_rules[n0][0] == c0))
737
+ {
738
+ if (internal_mode & TRACE_PHONET)
739
+ {
740
+ trace_info ("> rule no.", n,"");
741
+ trace_info ("> not used because of continuation",n0,"");
742
+ }
743
+ n += 3;
744
+ continue;
745
+ }
746
+ }
747
+
748
+ /**** replace string ****/
749
+ if (internal_mode & TRACE_PHONET)
750
+ {
751
+ trace_info ("Rule no.", n, "is applied");
752
+ }
753
+ p0 = (phonet_rules[n][0] != '\0'
754
+ && strchr (phonet_rules[n]+1,'<') != NULL) ? 1 : 0;
755
+ s = phonet_rules [n+ml];
756
+
757
+ if (p0 == 1 && z == 0)
758
+ {
759
+ /**** rule with '<' is applied ****/
760
+ if (j > 0 && *s != '\0'
761
+ && (dest[j-1] == c || dest[j-1] == *s))
762
+ {
763
+ j--;
764
+ }
765
+ z0 = 1;
766
+ z++;
767
+ k0 = 0;
768
+ while (*s != '\0' && src[i+k0] != '\0')
769
+ {
770
+ src[i+k0] = *s;
771
+ k0++;
772
+ s++;
773
+ }
774
+ if (k0 < k)
775
+ {
776
+ strcpy (src+i+k0, src+i+k);
777
+ }
778
+ if ((internal_mode & CHECK_PHONETIC_RULES)
779
+ && (*s != '\0' || k0 > k))
780
+ {
781
+ /**** we do "CHECK_PHONETIC_RULES": ****/
782
+ /**** replacement string is too long ****/
783
+ dest[j] = '\0';
784
+ return (-200);
785
+ }
786
+ /**** new "current char" ****/
787
+ c = src[i];
788
+ }
789
+ else
790
+ {
791
+ if ((internal_mode & CHECK_PHONETIC_RULES)
792
+ && p0 == 1 && z > 0)
793
+ {
794
+ /**** we do "CHECK_PHONETIC_RULES": ****/
795
+ /**** recursion found -> error ****/
796
+ dest[j] = '\0';
797
+ return (-100);
798
+ }
799
+ i = i+k-1;
800
+ z = 0;
801
+ while (*s != '\0'
802
+ && *(s+1) != '\0' && j < len-1)
803
+ {
804
+ if (j == 0 || dest[j-1] != *s)
805
+ {
806
+ dest[j] = *s;
807
+ j++;
808
+ }
809
+ s++;
810
+ }
811
+ /**** new "current char" ****/
812
+ c = *s;
813
+
814
+ if (phonet_rules[n][0] != '\0'
815
+ && strstr (phonet_rules[n]+1, "^^") != NULL)
816
+ {
817
+ if (c != '\0')
818
+ {
819
+ dest[j] = c;
820
+ j++;
821
+ }
822
+ src += i+1;
823
+ i = 0;
824
+ z0 = 1;
825
+ }
826
+ }
827
+
828
+ break;
829
+ }
830
+
831
+ n += 3;
832
+ if (n > end1 && start2 > 0)
833
+ {
834
+ n = start2;
835
+ start1 = start2;
836
+ end1 = end2;
837
+ start2 = -1;
838
+ end2 = -1;
839
+ }
840
+ }
841
+ }
842
+
843
+ if (z0 == 0)
844
+ {
845
+ if (j < len-1 && c != '\0'
846
+ && (j == 0 || dest[j-1] != c))
847
+ {
848
+ /**** delete multiple letters only ****/
849
+ dest[j] = c;
850
+ j++;
851
+ }
852
+ i++;
853
+ z = 0;
854
+ }
855
+ }
856
+
857
+ if (src_2 != text)
858
+ {
859
+ free (src_2);
860
+ }
861
+ dest[j] = '\0';
862
+
863
+ if (internal_mode & TRACE_PHONET)
864
+ {
865
+ printf ("\n");
866
+ printf ("internal phonetic string is: '%s'\n", dest);
867
+ }
868
+
869
+ return (j);
870
+ }
871
+
872
+
873
+
874
+
875
+
876
+ /************************************************************/
877
+ /**** functions used by "main" ****************************/
878
+ /************************************************************/
879
+
880
+
881
+ #ifdef PHONET_EXECUTABLE
882
+
883
+
884
+ static void string_prepare (char *text, char *s, char *s2)
885
+ /**** Auxiliary function for "check_rules": ****/
886
+ /**** "strcpy (text,s)" plus inclusion of ****/
887
+ /**** 'TEST_char' and '-', if necessary ****/
888
+ {
889
+ if (*s != '\0')
890
+ {
891
+ *text = *s;
892
+ text++;
893
+ s++;
894
+ }
895
+ while (strchr ("0123456789-<^$", *s) == NULL && *s != '\0')
896
+ {
897
+ *text = *s;
898
+ text++;
899
+ s++;
900
+ }
901
+ if (strchr (s2,'-') != NULL || strchr (s2,'$') == NULL)
902
+ {
903
+ *text = TEST_char;
904
+ text++;
905
+ *text = '-';
906
+ text++;
907
+ }
908
+ strcpy (text, s);
909
+ }
910
+
911
+
912
+
913
+ int check_rules (int language, int trace_rule)
914
+ /**** Check all phonetic rules of the current ****/
915
+ /**** language. ****/
916
+ /**** ("trace_rule" > 0: trace this rule only) ****/
917
+ /**** Result: Number of errors ****/
918
+ {
919
+ int i,k,n,n0;
920
+ int errors = 0;
921
+ int rule_count = 0;
922
+ char *r,*r0,rule[35];
923
+ char *s,err_text[201];
924
+ char orig[35],orig2[35];
925
+ char text[35],text2[35];
926
+
927
+ /**** initialization ****/
928
+ i = set_phonet_language (language);
929
+ if (i >= 0)
930
+ {
931
+ i = initialize_phonet();
932
+ }
933
+ if (i < 0)
934
+ {
935
+ printf ("Error: initialization for language %d failed\n", language);
936
+ return (-1);
937
+ }
938
+
939
+ isletter [(unsigned char) TEST_char] = 1;
940
+ internal_mode = internal_mode | CHECK_PHONETIC_RULES;
941
+ i = 0;
942
+
943
+ while (phonet_rules[i] != PHONET_END)
944
+ {
945
+ /**** syntax check for all strings ****/
946
+ if ((i/3)+1 == trace_rule)
947
+ {
948
+ internal_mode = internal_mode | TRACE_PHONET;
949
+ }
950
+ else if (trace_rule > 0)
951
+ {
952
+ internal_mode = internal_mode & ~TRACE_PHONET;
953
+ }
954
+
955
+ strcpy (err_text,"");
956
+ k = 0;
957
+ if (i % 3 == 0)
958
+ {
959
+ if (phonet_rules[i] == NULL
960
+ || (phonet_rules[i+1] == NULL && phonet_rules[i+2] == NULL))
961
+ {
962
+ strcpy (err_text," Forbidden null pointer");
963
+ k = -10;
964
+ }
965
+ rule_count++;
966
+ }
967
+
968
+ if (k >= 0)
969
+ {
970
+ if (phonet_rules[i] == NULL)
971
+ {
972
+ i++;
973
+ continue;
974
+ }
975
+
976
+ if (i % 3 == 0)
977
+ {
978
+ /**** check first letter ****/
979
+ s = phonet_rules[i];
980
+ n = phonet_hash [(unsigned char) *s];
981
+ if (i >= n+3 && n >= 0
982
+ && (s == NULL || *s != phonet_rules[i-3][0]))
983
+ {
984
+ strcpy (err_text," Wrong first char");
985
+ k = -10;
986
+ }
987
+
988
+ if (k >= 0)
989
+ {
990
+ /**** check length of search string ****/
991
+ k = 0;
992
+ while (strchr ("0123456789()<^$", *s) == NULL && *s != '\0')
993
+ {
994
+ k++;
995
+ s++;
996
+ }
997
+ if (k == 0)
998
+ {
999
+ strcpy (err_text," Search string is empty");
1000
+ if (*s != '\0' && strchr ("()<^$", *s) == NULL)
1001
+ {
1002
+ strcpy (err_text," First char is meta char");
1003
+ }
1004
+ k = -10;
1005
+ }
1006
+ }
1007
+ }
1008
+ }
1009
+
1010
+ if (k >= 0)
1011
+ {
1012
+ /**** syntax check for string ****/
1013
+ k = 0;
1014
+ s = phonet_rules[i];
1015
+ n = 0;
1016
+ if (*s != upperchar [(unsigned char) *s])
1017
+ {
1018
+ /**** forbidden lower-case char ****/
1019
+ k = -100;
1020
+ }
1021
+ if (i % 3 == 0 && *s != '\0')
1022
+ {
1023
+ s++;
1024
+ n++;
1025
+ }
1026
+ while (*s != '\0' && k >= 0)
1027
+ {
1028
+ if (*s != upperchar [(unsigned char) *s])
1029
+ {
1030
+ /**** forbidden lower-case char ****/
1031
+ k = -100;
1032
+ break;
1033
+ }
1034
+ if (*s == '(')
1035
+ {
1036
+ if (k >= 1 || ! isletter [(unsigned char) *(s+1)])
1037
+ {
1038
+ k = -10;
1039
+ break;
1040
+ }
1041
+ s++;
1042
+ n++;
1043
+ while (isletter[(unsigned char) *s])
1044
+ {
1045
+ s++;
1046
+ }
1047
+ if (*s != ')')
1048
+ {
1049
+ k = -10;
1050
+ break;
1051
+ }
1052
+ k = 1;
1053
+ }
1054
+ else if (*s == '-')
1055
+ {
1056
+ /**** "k > 2" is correct ****/
1057
+ /**** (more than one '-' is allowed) ****/
1058
+ n--;
1059
+ if (k > 2 || n <= 0)
1060
+ {
1061
+ k = -10;
1062
+ break;
1063
+ }
1064
+ k = 2;
1065
+ }
1066
+ else if (*s == '<')
1067
+ {
1068
+ if (k >= 3)
1069
+ {
1070
+ k = -10;
1071
+ break;
1072
+ }
1073
+ k = 3;
1074
+ }
1075
+ else if (strchr ("0123456789",*s) != NULL && *s != '\0')
1076
+ {
1077
+ if (k >= 4)
1078
+ {
1079
+ k = -10;
1080
+ break;
1081
+ }
1082
+ k = 4;
1083
+ }
1084
+ else if (*s == '^')
1085
+ {
1086
+ if (k >= 5)
1087
+ {
1088
+ k = -10;
1089
+ break;
1090
+ }
1091
+ if (*(s+1) == '^')
1092
+ {
1093
+ s++;
1094
+ }
1095
+ k = 5;
1096
+ }
1097
+ else if (*s == '$')
1098
+ {
1099
+ if (k >= 6 || *(s+1) != '\0')
1100
+ {
1101
+ k = -10;
1102
+ break;
1103
+ }
1104
+ k = 6;
1105
+ }
1106
+ else if (k > 0 || *s == ')')
1107
+ {
1108
+ k = -10;
1109
+ break;
1110
+ }
1111
+ else
1112
+ {
1113
+ n++;
1114
+ }
1115
+ s++;
1116
+ }
1117
+
1118
+ if (k > 0 && i % 3 != 0)
1119
+ {
1120
+ sprintf (err_text," Meta char in replacement string");
1121
+ k = -10;
1122
+ }
1123
+ else if (k <= -100)
1124
+ {
1125
+ sprintf (err_text," Lower-case letter in string");
1126
+ }
1127
+ else if (k < 0)
1128
+ {
1129
+ sprintf (err_text," Syntax error in search string");
1130
+ }
1131
+ else if ((int) strlen (phonet_rules[i]) > 30)
1132
+ {
1133
+ sprintf (err_text," String very long ( > 30 chars)");
1134
+ k = -1;
1135
+ }
1136
+ s = phonet_rules[i];
1137
+
1138
+ if (k >= 0 && i % 3 == 0
1139
+ && n > 0 && strchr (s,'<') != NULL)
1140
+ {
1141
+ /**** check lengths of search and replacement string ****/
1142
+ if ((phonet_rules[i+1] != NULL
1143
+ && strcmp (s,phonet_rules[i+1]) == 0)
1144
+ || (phonet_rules[i+2] != NULL
1145
+ && strcmp (s,phonet_rules[i+2]) == 0))
1146
+ {
1147
+ strcpy (err_text," Replacement string too long due to '<'");
1148
+ k = -10;
1149
+ }
1150
+ if ((phonet_rules[i+1] != NULL
1151
+ && (int) strlen (phonet_rules[i+1]) > n)
1152
+ || (phonet_rules[i+2] != NULL
1153
+ && (int) strlen (phonet_rules[i+2]) > n))
1154
+ {
1155
+ strcpy (err_text," Replacement string too long due to '<'");
1156
+ k = -10;
1157
+ }
1158
+ }
1159
+ }
1160
+
1161
+ if (k < 0)
1162
+ {
1163
+ /**** output error message ****/
1164
+ s = "Possible error in rule";
1165
+ if (k < -1)
1166
+ {
1167
+ s = "Error in rule";
1168
+ }
1169
+ trace_info (s, i-(i%3), err_text);
1170
+ errors++;
1171
+ }
1172
+
1173
+
1174
+ if (k >= 0 && i % 3 != 0)
1175
+ {
1176
+ /**** do phonetic conversion and check result ****/
1177
+ n = i % 3;
1178
+ n0 = (i % 3 == 1) ? PHONET_FIRST_RULES : PHONET_SECOND_RULES;
1179
+ r = strchr (phonet_rules[i-n], '(');
1180
+ if (r == NULL)
1181
+ {
1182
+ /**** There is no regular expression in search string ****/
1183
+ r = " ";
1184
+ }
1185
+ r++;
1186
+
1187
+ while (*r != ')' && *r != '\0')
1188
+ {
1189
+ /**** Split regular expression (e.g. "GS(CH)--") ****/
1190
+ /**** into simple rules and check each of them. ****/
1191
+ r0 = phonet_rules[i-n];
1192
+ strcpy (rule, r0);
1193
+ phonet_rules[i-n] = rule;
1194
+ s = strchr (rule,'(');
1195
+
1196
+ if (s != NULL)
1197
+ {
1198
+ *s = *r;
1199
+ s++;
1200
+ while (*s != ')' && *s != '\0')
1201
+ {
1202
+ strcpy (s,s+1);
1203
+ }
1204
+ if (*s == ')')
1205
+ {
1206
+ strcpy (s,s+1);
1207
+ }
1208
+ }
1209
+
1210
+ /**** do the check ****/
1211
+ sprintf (orig, "%c%s", TEST_char, phonet_rules[i-n]);
1212
+ sprintf (orig2, "%c%s", TEST_char, phonet_rules[i]);
1213
+
1214
+ if (strchr (phonet_rules[i-n],'^') != NULL)
1215
+ {
1216
+ sprintf (orig, orig+1);
1217
+ sprintf (orig2,orig2+1);
1218
+ }
1219
+ if (strchr (phonet_rules[i-n],'-') != NULL
1220
+ || strchr (phonet_rules[i-n],'$') == NULL)
1221
+ {
1222
+ sprintf (orig, "%s%c", orig, TEST_char);
1223
+ sprintf (orig2,"%s%c", orig2,TEST_char);
1224
+ }
1225
+ if (orig2[0] == orig2[1] && orig2[2] == '\0')
1226
+ {
1227
+ /**** e.g. orig2 == "<TEST_char><TEST_char>" ****/
1228
+ orig2[1] = '\0';
1229
+ }
1230
+
1231
+ /**** check conversion result ****/
1232
+ k = phonet (orig,text, 33,n0);
1233
+ if (k > -100)
1234
+ {
1235
+ k = phonet (orig2,text2, 33,n0);
1236
+ }
1237
+
1238
+ if (k <= -100)
1239
+ {
1240
+ /**** error found ****/
1241
+ phonet_rules[i-n] = r0;
1242
+ strcpy (err_text," Recursion found");
1243
+ if (k == -200)
1244
+ {
1245
+ strcpy (err_text," Replacement string too long due to '<'");
1246
+ }
1247
+ trace_info ("Error in rule", i-(i%3), err_text);
1248
+ errors++;
1249
+ break;
1250
+ }
1251
+
1252
+ /**** second rule check ****/
1253
+ if (strcmp (text,orig2) != 0)
1254
+ {
1255
+ string_prepare (err_text+80, rule,rule);
1256
+ string_prepare (err_text, orig,orig);
1257
+
1258
+ phonet_rules[i-n] = err_text+80;
1259
+ (void) phonet (err_text, err_text+40, 33,n0);
1260
+ phonet_rules[i-n] = rule;
1261
+ err_text[0] = '\0';
1262
+ if (strcmp (err_text+40, orig2) == 0)
1263
+ {
1264
+ strcpy (text,orig2);
1265
+ }
1266
+ }
1267
+
1268
+ if (strcmp (text2,orig2) != 0
1269
+ && ((strcmp (phonet_rules[i-n],"AVIER$") == 0 && n==1
1270
+ && strcmp (phonet_rules[i],"AWIE") == 0)
1271
+ || (strcmp (phonet_rules[i-n],"GH") == 0 && n == 1
1272
+ && strcmp (phonet_rules[i],"G") == 0)
1273
+ || (strcmp (phonet_rules[i-n],"HEAD-") == 0 && n == 1
1274
+ && strcmp (phonet_rules[i],"HE") == 0)
1275
+ || (strcmp (phonet_rules[i-n],"IERRE$") == 0
1276
+ && strcmp (phonet_rules[i],"IER") == 0)
1277
+ || (strcmp (phonet_rules[i-n],"IVIER$") == 0 && n == 1
1278
+ && strcmp (phonet_rules[i],"IWIE") == 0)
1279
+ || (strcmp (phonet_rules[i-n],"SHST") == 0 && n == 1
1280
+ && strcmp (phonet_rules[i],"SHT") == 0)))
1281
+ {
1282
+ /**** these are exceptions ****/
1283
+ strcpy (text2, orig2);
1284
+ }
1285
+
1286
+ #ifdef PHONET_GERMAN
1287
+ if (strcmp (text2,orig2) != 0
1288
+ && language == PHONET_GERMAN
1289
+ && ((strncmp (phonet_rules[i-n],"GEGEN",5) == 0 && n == 1
1290
+ && strncmp (phonet_rules[i],"GEGN",4) == 0)
1291
+ || (strcmp (phonet_rules[i-n],"GGF.") == 0 && n == 1
1292
+ && strcmp (phonet_rules[i],"GF.") == 0)
1293
+ || (strcmp (phonet_rules[i-n],"HAVEN7$") == 0 && n == 1
1294
+ && strcmp (phonet_rules[i],"HAFN") == 0)
1295
+ || (strcmp (phonet_rules[i-n],"IEDENSTELLE------") == 0
1296
+ && n == 1 && strcmp (phonet_rules[i],"IDN ") == 0)
1297
+ || (strcmp (phonet_rules[i-n],"INDELERREGE------") == 0
1298
+ && n == 1 && strcmp (phonet_rules[i],"INDL ") == 0)
1299
+ || (strcmp (phonet_rules[i-n],"VAN DEN ^") == 0
1300
+ && n == 1 && strcmp (phonet_rules[i],"FANDN") == 0)))
1301
+ {
1302
+ /**** exceptions in German ****/
1303
+ strcpy (text2, orig2);
1304
+ }
1305
+ #endif
1306
+
1307
+ if (strcmp (text2,orig2) != 0
1308
+ && (s = strchr (orig2,'I')) != NULL)
1309
+ {
1310
+ /**** extra check for replacement strings with an 'I' ****/
1311
+ if (strchr (s+1,'I') != NULL)
1312
+ {
1313
+ /**** take second 'I', if found ****/
1314
+ s = strchr (s+1,'I');
1315
+ }
1316
+ *s = 'J';
1317
+ (void) phonet (orig2,text2, 33,n0);
1318
+ *s = 'I';
1319
+ }
1320
+
1321
+ /**** extra check for search strings with a '-' ****/
1322
+ s = orig;
1323
+ k = 0;
1324
+ while (strchr ("0123456789-<^$",*s) == NULL && *s != '\0')
1325
+ {
1326
+ s++;
1327
+ k++;
1328
+ }
1329
+ while (*s != '\0')
1330
+ {
1331
+ if (*s == '-')
1332
+ {
1333
+ k--;
1334
+ }
1335
+ s++;
1336
+ }
1337
+
1338
+ if (strcmp (text2,orig2) != 0
1339
+ && ((strchr (orig,'-') != NULL && k > 0)
1340
+ || (phonet_rules[i-n][0] == phonet_rules[i-n][1]
1341
+ && phonet_rules[i-n][0] == phonet_rules[i][0])
1342
+ || (strncmp (phonet_rules[i-n],"AI",2) == 0
1343
+ && phonet_rules[i][0] == 'E'
1344
+ && k > 1 && strncmp (s-2,"E$",2) == 0)))
1345
+ {
1346
+ s = orig + k;
1347
+ k = (int) strlen (orig2);
1348
+ if (k > 0)
1349
+ {
1350
+ if (orig2[k-1] == TEST_char)
1351
+ {
1352
+ k--;
1353
+ }
1354
+ strcpy (err_text+1, orig2);
1355
+ strcpy (err_text+1+k, s);
1356
+ k = 1;
1357
+
1358
+ if (phonet_rules[i-n][0] == phonet_rules[i-n][1]
1359
+ && phonet_rules[i-n][0] == phonet_rules[i][0]
1360
+ && phonet_rules[i][1] == '\0')
1361
+ {
1362
+ /**** extra check for double letters ****/
1363
+ err_text[0] = TEST_char;
1364
+ err_text[1] = phonet_rules[i][0];
1365
+ k = 0;
1366
+ }
1367
+ if (phonet_rules[i-n][0] == 'H'
1368
+ && phonet_rules[i-n][1] != '\0'
1369
+ && phonet_rules[i-n][2] == 'H'
1370
+ && phonet_rules[i-n][1] == phonet_rules[i][0]
1371
+ && phonet_rules[i-n][2] == phonet_rules[i][1])
1372
+ {
1373
+ /**** special case "H?H" ****/
1374
+ err_text[0] = TEST_char;
1375
+ err_text[1] = 'H';
1376
+ k = 0;
1377
+ }
1378
+ if (strncmp (phonet_rules[i-n],"LV",2) == 0
1379
+ && strncmp (phonet_rules[i], "LW",2) == 0)
1380
+ {
1381
+ /**** special case "LV*" ****/
1382
+ err_text[3] = 'V';
1383
+ }
1384
+ if (strncmp (phonet_rules[i-n],"AI",2) == 0
1385
+ && phonet_rules[i][0] == 'E')
1386
+ {
1387
+ /**** special case "AI*E$" ****/
1388
+ err_text[0] = TEST_char;
1389
+ err_text[1] = err_text[2];
1390
+ strcpy (err_text+2, phonet_rules[i]);
1391
+ k = 0;
1392
+ }
1393
+
1394
+ (void) phonet (err_text+k, err_text+40, 33,n0);
1395
+
1396
+ if (strcmp (err_text+40, orig2) != 0)
1397
+ {
1398
+ string_prepare (err_text+80, err_text+k,rule);
1399
+ string_prepare (err_text, rule,rule);
1400
+
1401
+ phonet_rules[i-n] = err_text;
1402
+ (void) phonet (err_text+80, err_text+40, 33,n0);
1403
+ phonet_rules[i-n] = rule;
1404
+ }
1405
+ err_text[0] = '\0';
1406
+ if (strcmp (err_text+40, orig2) == 0)
1407
+ {
1408
+ strcpy (text2, orig2);
1409
+ }
1410
+ }
1411
+ }
1412
+
1413
+ phonet_rules[i-n] = r0;
1414
+
1415
+ if (strcmp (text, orig2) != 0
1416
+ || strcmp (text2,orig2) != 0)
1417
+ {
1418
+ orig[0] = '\0';
1419
+ if (*r != ' ')
1420
+ {
1421
+ sprintf (orig," for '%c'", *r);
1422
+ }
1423
+ sprintf (err_text, " result %d%s: \"%s\"%s\"",
1424
+ n,orig, text,text2);
1425
+
1426
+ /**** delete 'TEST_char' from "error" string ****/
1427
+ s = err_text;
1428
+ while (*s != '\0')
1429
+ {
1430
+ while (*s == TEST_char)
1431
+ {
1432
+ strcpy (s,s+1);
1433
+ }
1434
+ s++;
1435
+ }
1436
+
1437
+ /**** output error message ****/
1438
+ s = "Possible error in rule";
1439
+ if (strcmp (text,orig2) != 0)
1440
+ {
1441
+ s = "Error in rule";
1442
+ }
1443
+ trace_info (s, i-(i%3), err_text);
1444
+ errors++;
1445
+ }
1446
+ r++;
1447
+ }
1448
+ }
1449
+ i++;
1450
+ }
1451
+
1452
+ if (i % 3 != 0)
1453
+ {
1454
+ printf ("Error: string count is not a multiple of 3.\n");
1455
+ errors++;
1456
+ }
1457
+ isletter [(unsigned char) TEST_char] = 0;
1458
+ internal_mode = internal_mode & ~CHECK_PHONETIC_RULES;
1459
+
1460
+ printf ("Language \"%s\" (%d phonetic rules):\n", phonet_language, rule_count);
1461
+ printf ("Check of all phonetic rules: ");
1462
+
1463
+ if (errors == 0)
1464
+ {
1465
+ printf ("No syntax error or inconsistency found.\n");
1466
+ }
1467
+ else
1468
+ {
1469
+ printf ("%d errors have been found.\n\n", errors);
1470
+ printf ("Remarks:\n");
1471
+ printf ("a) The correct syntax for search strings is:\n");
1472
+ printf (" <word> [<->..] [<] [<0-9>] [^[^]] [$]\n");
1473
+ printf (" The end of <word> may contain as a simple regular expression\n");
1474
+ printf (" one array of letters that must be enclosed in '(' and ')'.\n");
1475
+ printf ("b) Rules with a '<' demand that the replacement string may not\n");
1476
+ printf (" be longer than the search string.\n");
1477
+ printf ("c) The placement of rules determines their priority.\n");
1478
+ printf (" Therefore, the rules for \"SH\" must be placed before the rules\n");
1479
+ printf (" for \"S\" (otherwise, a conversion error will occur for \"SH\").\n");
1480
+ printf ("d) Another common source of errors is ignorance of dependencies.\n");
1481
+ printf (" For example, in German the replacement string \"NJE\" would be wrong,\n");
1482
+ printf (" because the 'J' is subject to another phonetic rule.\n");
1483
+ }
1484
+
1485
+ return (errors);
1486
+ }
1487
+
1488
+
1489
+
1490
+
1491
+ int main (int argc, char *argv[])
1492
+ {
1493
+ FILE *fr;
1494
+ char *s,text[201];
1495
+ int n=0,i=-1,r=-1;
1496
+
1497
+ if (argc < 2
1498
+ || strcmp (argv[1], "-?") == 0
1499
+ || strcmp (argv[1], "-h") == 0
1500
+ || strcmp (argv[1], "-help") == 0)
1501
+ {
1502
+ printf ("Program for phonetic string conversion (%s).\n", PHONET_VERSION);
1503
+ printf ("\n");
1504
+ printf ("Usage: phonet <orig_string> [ <language> ] [ -trace ]\n");
1505
+ printf (" or : phonet -file <file> <FIRST_RULES | SECOND_RULES> [ <language> ]\n");
1506
+ printf (" or : phonet -check_rules [ <language> ] [ -trace [<rule_no>] ]\n");
1507
+ printf ("\n");
1508
+ printf ("Options:\n");
1509
+ printf ("-file <file> : Phonetically convert the given file.\n");
1510
+ printf ("-check_rules : Check all phonetic rules. If no language is\n");
1511
+ printf (" specified, all rules of all languages are checked.\n");
1512
+ printf ("\n");
1513
+ printf ("-trace : Output trace info. If a rule number is specified\n");
1514
+ printf (" for \"-check_rules\", then only this rule will be\n");
1515
+ printf (" traced.\n\n");
1516
+ printf ("Language may be one of the following numbers:\n");
1517
+
1518
+ for (i=PHONET_FIRST_RULES; i< PHONET_SECOND_RULES; i++)
1519
+ {
1520
+ if (set_phonet_language(i) >= 0)
1521
+ {
1522
+ s = "";
1523
+ if (i == PHONET_DEFAULT_LANGUAGE)
1524
+ {
1525
+ s = " (default language)";
1526
+ }
1527
+ printf (" %2d: %s%s\n", i,phonet_language,s);
1528
+ }
1529
+ }
1530
+ return (1);
1531
+ }
1532
+
1533
+
1534
+ /**** parse arguments ****/
1535
+ if (argc >= 4 && strcmp (argv[1], "-file") == 0)
1536
+ {
1537
+ if (strncmp (argv[3], "FIRST",5) == 0
1538
+ || strncmp (argv[3], "first",5) == 0)
1539
+ {
1540
+ r = PHONET_FIRST_RULES;
1541
+ }
1542
+ else if (strncmp (argv[3], "SECOND",6) == 0
1543
+ || strncmp (argv[3], "second",6) == 0)
1544
+ {
1545
+ r = PHONET_SECOND_RULES;
1546
+ }
1547
+ else
1548
+ {
1549
+ printf ("Warning: rule set not specified; using first rules\n");
1550
+ r = PHONET_FIRST_RULES;
1551
+ }
1552
+
1553
+ i = PHONET_DEFAULT_LANGUAGE;
1554
+ if (argc >= 5)
1555
+ {
1556
+ i = atoi (argv[4]);
1557
+ }
1558
+ if (i < 0 || set_phonet_language(i) < 0)
1559
+ {
1560
+ i = PHONET_DEFAULT_LANGUAGE;
1561
+ }
1562
+ (void) set_phonet_language (i);
1563
+
1564
+ /**** convert file ****/
1565
+ if ((fr = fopen (argv[2],"r")) == NULL)
1566
+ {
1567
+ printf ("Error: could not open source file '%s'\n", argv[2]);
1568
+ return (1);
1569
+ }
1570
+
1571
+ while (! feof (fr))
1572
+ {
1573
+ /**** read data ****/
1574
+ if (fgets (text,200,fr) != NULL)
1575
+ {
1576
+ i = (int) strlen (text);
1577
+ if (i > 0 && text[i-1] == '\n')
1578
+ {
1579
+ /**** important ****/
1580
+ text[i-1] = '\0';
1581
+ i--;
1582
+ }
1583
+ if (i == 0)
1584
+ {
1585
+ continue;
1586
+ }
1587
+
1588
+ phonet (text, text,201, r);
1589
+ printf ("%s\n", text);
1590
+ }
1591
+ }
1592
+
1593
+ fclose (fr);
1594
+ return (0);
1595
+ }
1596
+
1597
+ if (argc >= 3 && argv[2][0] != '\0'
1598
+ && strchr ("0123456789", argv[2][0]) != NULL)
1599
+ {
1600
+ /**** language has been specified ****/
1601
+ i = atoi (argv[2]);
1602
+ if (argc >= 4 && strcmp (argv[3], "-trace") == 0)
1603
+ {
1604
+ if (argc >= 5 && atoi (argv[4]) > 0)
1605
+ {
1606
+ r = atoi (argv[4]);
1607
+ }
1608
+ internal_mode = internal_mode | TRACE_PHONET;
1609
+ }
1610
+ }
1611
+ if (argc >= 3 && strcmp (argv[2], "-trace") == 0)
1612
+ {
1613
+ if (argc >= 4 && atoi (argv[3]) > 0)
1614
+ {
1615
+ r = atoi (argv[3]);
1616
+ }
1617
+ internal_mode = internal_mode | TRACE_PHONET;
1618
+ }
1619
+
1620
+ /**** check_rules ****/
1621
+ if (strcmp (argv[1], "-check_rules") == 0)
1622
+ {
1623
+ if (i >= 0)
1624
+ {
1625
+ n = check_rules (i,r);
1626
+ }
1627
+ else
1628
+ {
1629
+ for (i=PHONET_FIRST_RULES; i< PHONET_SECOND_RULES; i++)
1630
+ {
1631
+ if (set_phonet_language(i) >= 0)
1632
+ {
1633
+ n += check_rules (i,r);
1634
+ printf ("\n\n");
1635
+ }
1636
+ }
1637
+ }
1638
+ return (n);
1639
+ }
1640
+
1641
+ /**** phonet conversion of string "argv[1]" ****/
1642
+ if (i < 0 || set_phonet_language(i) < 0)
1643
+ {
1644
+ i = PHONET_DEFAULT_LANGUAGE;
1645
+ }
1646
+ (void) set_phonet_language (i);
1647
+
1648
+ strcpy (text," ");
1649
+ s = argv[1];
1650
+ if ((int) strlen (s) > 200)
1651
+ {
1652
+ strcpy (text, "(too long; shortened)");
1653
+ s[200] = '\0';
1654
+ }
1655
+ printf ("Original string %s: \"%s\"\n", text, s);
1656
+ printf ("(language = %s)\n\n", phonet_language);
1657
+
1658
+ phonet (s, text,201, PHONET_FIRST_RULES);
1659
+ printf ("Conversion with first rules: \"%s\"\n", text);
1660
+
1661
+ phonet (s, text,201, PHONET_SECOND_RULES);
1662
+ printf ("Conversion with second rules: \"%s\"\n", text);
1663
+
1664
+ return (0);
1665
+ }
1666
+
1667
+ #endif
1668
+
1669
+
1670
+ /************************************************************/
1671
+ /**** end of file "phonet.c" ******************************/
1672
+ /************************************************************/