ruby-sfst 0.4.3 → 0.4.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (51) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +1 -0
  3. data/COPYING +280 -0
  4. data/Gemfile +3 -0
  5. data/Gemfile.lock +54 -0
  6. data/README.md +1 -1
  7. data/Rakefile +9 -18
  8. data/bin/console +7 -0
  9. data/bin/setup +6 -0
  10. data/ext/sfst/alphabet.cc +879 -0
  11. data/ext/sfst/alphabet.h +302 -0
  12. data/ext/sfst/basic.cc +85 -0
  13. data/ext/{sfst_machine → sfst}/basic.h +7 -4
  14. data/ext/sfst/compact.cc +629 -0
  15. data/ext/sfst/compact.h +100 -0
  16. data/ext/sfst/determinise.cc +279 -0
  17. data/ext/{sfst_machine → sfst}/extconf.rb +2 -1
  18. data/ext/sfst/fst.cc +1150 -0
  19. data/ext/sfst/fst.h +374 -0
  20. data/ext/sfst/hopcroft.cc +681 -0
  21. data/ext/sfst/interface.cc +1921 -0
  22. data/ext/sfst/interface.h +171 -0
  23. data/ext/sfst/make-compact.cc +323 -0
  24. data/ext/{sfst_machine → sfst}/make-compact.h +15 -13
  25. data/ext/sfst/mem.h +80 -0
  26. data/ext/sfst/operators.cc +1273 -0
  27. data/ext/{sfst_machine → sfst}/sfst_machine.cc +89 -78
  28. data/ext/sfst/sgi.h +72 -0
  29. data/ext/sfst/utf8.cc +149 -0
  30. data/ext/{sfst_machine → sfst}/utf8.h +7 -4
  31. data/lib/sfst.rb +2 -1
  32. data/lib/sfst/version.rb +1 -1
  33. data/ruby-sfst.gemspec +23 -23
  34. metadata +107 -35
  35. data/ext/sfst_machine/alphabet.cc +0 -812
  36. data/ext/sfst_machine/alphabet.h +0 -273
  37. data/ext/sfst_machine/basic.cc +0 -84
  38. data/ext/sfst_machine/compact.cc +0 -616
  39. data/ext/sfst_machine/compact.h +0 -98
  40. data/ext/sfst_machine/determinise.cc +0 -303
  41. data/ext/sfst_machine/fst.cc +0 -1000
  42. data/ext/sfst_machine/fst.h +0 -369
  43. data/ext/sfst_machine/interface.cc +0 -1842
  44. data/ext/sfst_machine/interface.h +0 -93
  45. data/ext/sfst_machine/make-compact.cc +0 -327
  46. data/ext/sfst_machine/mem.h +0 -74
  47. data/ext/sfst_machine/operators.cc +0 -1131
  48. data/ext/sfst_machine/sgi.h +0 -44
  49. data/ext/sfst_machine/utf8.cc +0 -146
  50. data/test/test_sfst.fst +0 -3
  51. data/test/test_sfst.rb +0 -114
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 19ac73a1ab1fdac44819260e4e522ad822cb99bf
4
- data.tar.gz: 16a8b37a117b2367a0701202e1e6ef178ef6907a
3
+ metadata.gz: 4892aa62071212d5f65b05afb0476d59852c75c2
4
+ data.tar.gz: a00ef4a9b22a7b1fda9c0c6e8f89df2a1fe24205
5
5
  SHA512:
6
- metadata.gz: bf06fe247b61f4d4d1f7cc516a34d9b706bfab7116f7859b419548956a16e160313ac6523a85979241d644abaa216cb79087be092b6ddb7f30fa62763e8d58b5
7
- data.tar.gz: 35e93cf5b6f8f88656a19f11dd68561a3541a4c5faf1d0f7bf15b9b78d91bffde33767e4f683bdfd46ffc51869678d83b461a4fcd651ef91bc8af05f4768681f
6
+ metadata.gz: 194dbde4538a06b21367d1083c7f32d75558539b31e4df0bdcb6a53793ec619e3ace0243e41253117a95a0ec36b26dbe4c81f65ad23e33fe366e4c0d7758a82e
7
+ data.tar.gz: 2b6074dc37444cd3bdce4c75db8b3dfb4574841c16cc67d82bc39ecc5539f2c1af36b2c0f456e0bf021ac89898efdb8898a4ae4b82dd5221ec6e9724b7ec665e
@@ -1,5 +1,6 @@
1
1
  # Changelog
2
2
 
3
+ * Update to SFST 1.4.7c
3
4
  * Dropped support for compiling FSTs from Ruby; use fst-compiler-utf8 instead
4
5
 
5
6
  ## 0.4.1
data/COPYING ADDED
@@ -0,0 +1,280 @@
1
+ GNU GENERAL PUBLIC LICENSE
2
+ Version 2, June 1991
3
+
4
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
5
+ 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
6
+ Everyone is permitted to copy and distribute verbatim copies
7
+ of this license document, but changing it is not allowed.
8
+
9
+ Preamble
10
+
11
+ The licenses for most software are designed to take away your
12
+ freedom to share and change it. By contrast, the GNU General Public
13
+ License is intended to guarantee your freedom to share and change free
14
+ software--to make sure the software is free for all its users. This
15
+ General Public License applies to most of the Free Software
16
+ Foundation's software and to any other program whose authors commit to
17
+ using it. (Some other Free Software Foundation software is covered by
18
+ the GNU Library General Public License instead.) You can apply it to
19
+ your programs, too.
20
+
21
+ When we speak of free software, we are referring to freedom, not
22
+ price. Our General Public Licenses are designed to make sure that you
23
+ have the freedom to distribute copies of free software (and charge for
24
+ this service if you wish), that you receive source code or can get it
25
+ if you want it, that you can change the software or use pieces of it
26
+ in new free programs; and that you know you can do these things.
27
+
28
+ To protect your rights, we need to make restrictions that forbid
29
+ anyone to deny you these rights or to ask you to surrender the rights.
30
+ These restrictions translate to certain responsibilities for you if you
31
+ distribute copies of the software, or if you modify it.
32
+
33
+ For example, if you distribute copies of such a program, whether
34
+ gratis or for a fee, you must give the recipients all the rights that
35
+ you have. You must make sure that they, too, receive or can get the
36
+ source code. And you must show them these terms so they know their
37
+ rights.
38
+
39
+ We protect your rights with two steps: (1) copyright the software, and
40
+ (2) offer you this license which gives you legal permission to copy,
41
+ distribute and/or modify the software.
42
+
43
+ Also, for each author's protection and ours, we want to make certain
44
+ that everyone understands that there is no warranty for this free
45
+ software. If the software is modified by someone else and passed on, we
46
+ want its recipients to know that what they have is not the original, so
47
+ that any problems introduced by others will not reflect on the original
48
+ authors' reputations.
49
+
50
+ Finally, any free program is threatened constantly by software
51
+ patents. We wish to avoid the danger that redistributors of a free
52
+ program will individually obtain patent licenses, in effect making the
53
+ program proprietary. To prevent this, we have made it clear that any
54
+ patent must be licensed for everyone's free use or not licensed at all.
55
+
56
+ The precise terms and conditions for copying, distribution and
57
+ modification follow.
58
+
59
+ GNU GENERAL PUBLIC LICENSE
60
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
61
+
62
+ 0. This License applies to any program or other work which contains
63
+ a notice placed by the copyright holder saying it may be distributed
64
+ under the terms of this General Public License. The "Program", below,
65
+ refers to any such program or work, and a "work based on the Program"
66
+ means either the Program or any derivative work under copyright law:
67
+ that is to say, a work containing the Program or a portion of it,
68
+ either verbatim or with modifications and/or translated into another
69
+ language. (Hereinafter, translation is included without limitation in
70
+ the term "modification".) Each licensee is addressed as "you".
71
+
72
+ Activities other than copying, distribution and modification are not
73
+ covered by this License; they are outside its scope. The act of
74
+ running the Program is not restricted, and the output from the Program
75
+ is covered only if its contents constitute a work based on the
76
+ Program (independent of having been made by running the Program).
77
+ Whether that is true depends on what the Program does.
78
+
79
+ 1. You may copy and distribute verbatim copies of the Program's
80
+ source code as you receive it, in any medium, provided that you
81
+ conspicuously and appropriately publish on each copy an appropriate
82
+ copyright notice and disclaimer of warranty; keep intact all the
83
+ notices that refer to this License and to the absence of any warranty;
84
+ and give any other recipients of the Program a copy of this License
85
+ along with the Program.
86
+
87
+ You may charge a fee for the physical act of transferring a copy, and
88
+ you may at your option offer warranty protection in exchange for a fee.
89
+
90
+ 2. You may modify your copy or copies of the Program or any portion
91
+ of it, thus forming a work based on the Program, and copy and
92
+ distribute such modifications or work under the terms of Section 1
93
+ above, provided that you also meet all of these conditions:
94
+
95
+ a) You must cause the modified files to carry prominent notices
96
+ stating that you changed the files and the date of any change.
97
+
98
+ b) You must cause any work that you distribute or publish, that in
99
+ whole or in part contains or is derived from the Program or any
100
+ part thereof, to be licensed as a whole at no charge to all third
101
+ parties under the terms of this License.
102
+
103
+ c) If the modified program normally reads commands interactively
104
+ when run, you must cause it, when started running for such
105
+ interactive use in the most ordinary way, to print or display an
106
+ announcement including an appropriate copyright notice and a
107
+ notice that there is no warranty (or else, saying that you provide
108
+ a warranty) and that users may redistribute the program under
109
+ these conditions, and telling the user how to view a copy of this
110
+ License. (Exception: if the Program itself is interactive but
111
+ does not normally print such an announcement, your work based on
112
+ the Program is not required to print an announcement.)
113
+
114
+ These requirements apply to the modified work as a whole. If
115
+ identifiable sections of that work are not derived from the Program,
116
+ and can be reasonably considered independent and separate works in
117
+ themselves, then this License, and its terms, do not apply to those
118
+ sections when you distribute them as separate works. But when you
119
+ distribute the same sections as part of a whole which is a work based
120
+ on the Program, the distribution of the whole must be on the terms of
121
+ this License, whose permissions for other licensees extend to the
122
+ entire whole, and thus to each and every part regardless of who wrote it.
123
+
124
+ Thus, it is not the intent of this section to claim rights or contest
125
+ your rights to work written entirely by you; rather, the intent is to
126
+ exercise the right to control the distribution of derivative or
127
+ collective works based on the Program.
128
+
129
+ In addition, mere aggregation of another work not based on the Program
130
+ with the Program (or with a work based on the Program) on a volume of
131
+ a storage or distribution medium does not bring the other work under
132
+ the scope of this License.
133
+
134
+ 3. You may copy and distribute the Program (or a work based on it,
135
+ under Section 2) in object code or executable form under the terms of
136
+ Sections 1 and 2 above provided that you also do one of the following:
137
+
138
+ a) Accompany it with the complete corresponding machine-readable
139
+ source code, which must be distributed under the terms of Sections
140
+ 1 and 2 above on a medium customarily used for software interchange; or,
141
+
142
+ b) Accompany it with a written offer, valid for at least three
143
+ years, to give any third party, for a charge no more than your
144
+ cost of physically performing source distribution, a complete
145
+ machine-readable copy of the corresponding source code, to be
146
+ distributed under the terms of Sections 1 and 2 above on a medium
147
+ customarily used for software interchange; or,
148
+
149
+ c) Accompany it with the information you received as to the offer
150
+ to distribute corresponding source code. (This alternative is
151
+ allowed only for noncommercial distribution and only if you
152
+ received the program in object code or executable form with such
153
+ an offer, in accord with Subsection b above.)
154
+
155
+ The source code for a work means the preferred form of the work for
156
+ making modifications to it. For an executable work, complete source
157
+ code means all the source code for all modules it contains, plus any
158
+ associated interface definition files, plus the scripts used to
159
+ control compilation and installation of the executable. However, as a
160
+ special exception, the source code distributed need not include
161
+ anything that is normally distributed (in either source or binary
162
+ form) with the major components (compiler, kernel, and so on) of the
163
+ operating system on which the executable runs, unless that component
164
+ itself accompanies the executable.
165
+
166
+ If distribution of executable or object code is made by offering
167
+ access to copy from a designated place, then offering equivalent
168
+ access to copy the source code from the same place counts as
169
+ distribution of the source code, even though third parties are not
170
+ compelled to copy the source along with the object code.
171
+
172
+ 4. You may not copy, modify, sublicense, or distribute the Program
173
+ except as expressly provided under this License. Any attempt
174
+ otherwise to copy, modify, sublicense or distribute the Program is
175
+ void, and will automatically terminate your rights under this License.
176
+ However, parties who have received copies, or rights, from you under
177
+ this License will not have their licenses terminated so long as such
178
+ parties remain in full compliance.
179
+
180
+ 5. You are not required to accept this License, since you have not
181
+ signed it. However, nothing else grants you permission to modify or
182
+ distribute the Program or its derivative works. These actions are
183
+ prohibited by law if you do not accept this License. Therefore, by
184
+ modifying or distributing the Program (or any work based on the
185
+ Program), you indicate your acceptance of this License to do so, and
186
+ all its terms and conditions for copying, distributing or modifying
187
+ the Program or works based on it.
188
+
189
+ 6. Each time you redistribute the Program (or any work based on the
190
+ Program), the recipient automatically receives a license from the
191
+ original licensor to copy, distribute or modify the Program subject to
192
+ these terms and conditions. You may not impose any further
193
+ restrictions on the recipients' exercise of the rights granted herein.
194
+ You are not responsible for enforcing compliance by third parties to
195
+ this License.
196
+
197
+ 7. If, as a consequence of a court judgment or allegation of patent
198
+ infringement or for any other reason (not limited to patent issues),
199
+ conditions are imposed on you (whether by court order, agreement or
200
+ otherwise) that contradict the conditions of this License, they do not
201
+ excuse you from the conditions of this License. If you cannot
202
+ distribute so as to satisfy simultaneously your obligations under this
203
+ License and any other pertinent obligations, then as a consequence you
204
+ may not distribute the Program at all. For example, if a patent
205
+ license would not permit royalty-free redistribution of the Program by
206
+ all those who receive copies directly or indirectly through you, then
207
+ the only way you could satisfy both it and this License would be to
208
+ refrain entirely from distribution of the Program.
209
+
210
+ If any portion of this section is held invalid or unenforceable under
211
+ any particular circumstance, the balance of the section is intended to
212
+ apply and the section as a whole is intended to apply in other
213
+ circumstances.
214
+
215
+ It is not the purpose of this section to induce you to infringe any
216
+ patents or other property right claims or to contest validity of any
217
+ such claims; this section has the sole purpose of protecting the
218
+ integrity of the free software distribution system, which is
219
+ implemented by public license practices. Many people have made
220
+ generous contributions to the wide range of software distributed
221
+ through that system in reliance on consistent application of that
222
+ system; it is up to the author/donor to decide if he or she is willing
223
+ to distribute software through any other system and a licensee cannot
224
+ impose that choice.
225
+
226
+ This section is intended to make thoroughly clear what is believed to
227
+ be a consequence of the rest of this License.
228
+
229
+ 8. If the distribution and/or use of the Program is restricted in
230
+ certain countries either by patents or by copyrighted interfaces, the
231
+ original copyright holder who places the Program under this License
232
+ may add an explicit geographical distribution limitation excluding
233
+ those countries, so that distribution is permitted only in or among
234
+ countries not thus excluded. In such case, this License incorporates
235
+ the limitation as if written in the body of this License.
236
+
237
+ 9. The Free Software Foundation may publish revised and/or new versions
238
+ of the General Public License from time to time. Such new versions will
239
+ be similar in spirit to the present version, but may differ in detail to
240
+ address new problems or concerns.
241
+
242
+ Each version is given a distinguishing version number. If the Program
243
+ specifies a version number of this License which applies to it and "any
244
+ later version", you have the option of following the terms and conditions
245
+ either of that version or of any later version published by the Free
246
+ Software Foundation. If the Program does not specify a version number of
247
+ this License, you may choose any version ever published by the Free Software
248
+ Foundation.
249
+
250
+ 10. If you wish to incorporate parts of the Program into other free
251
+ programs whose distribution conditions are different, write to the author
252
+ to ask for permission. For software which is copyrighted by the Free
253
+ Software Foundation, write to the Free Software Foundation; we sometimes
254
+ make exceptions for this. Our decision will be guided by the two goals
255
+ of preserving the free status of all derivatives of our free software and
256
+ of promoting the sharing and reuse of software generally.
257
+
258
+ NO WARRANTY
259
+
260
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
261
+ FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
262
+ OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
263
+ PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
264
+ OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
265
+ MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
266
+ TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
267
+ PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
268
+ REPAIR OR CORRECTION.
269
+
270
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
271
+ WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
272
+ REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
273
+ INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
274
+ OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
275
+ TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
276
+ YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
277
+ PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
278
+ POSSIBILITY OF SUCH DAMAGES.
279
+
280
+ END OF TERMS AND CONDITIONS
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gemspec
@@ -0,0 +1,54 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ ruby-sfst (0.4.4)
5
+
6
+ GEM
7
+ remote: https://rubygems.org/
8
+ specs:
9
+ coderay (1.1.2)
10
+ diff-lcs (1.3)
11
+ docile (1.1.5)
12
+ json (2.1.0)
13
+ method_source (0.9.0)
14
+ pry (0.11.3)
15
+ coderay (~> 1.1.0)
16
+ method_source (~> 0.9.0)
17
+ rake (12.3.0)
18
+ rake-compiler (1.0.4)
19
+ rake
20
+ rspec (3.7.0)
21
+ rspec-core (~> 3.7.0)
22
+ rspec-expectations (~> 3.7.0)
23
+ rspec-mocks (~> 3.7.0)
24
+ rspec-core (3.7.0)
25
+ rspec-support (~> 3.7.0)
26
+ rspec-expectations (3.7.0)
27
+ diff-lcs (>= 1.2.0, < 2.0)
28
+ rspec-support (~> 3.7.0)
29
+ rspec-mocks (3.7.0)
30
+ diff-lcs (>= 1.2.0, < 2.0)
31
+ rspec-support (~> 3.7.0)
32
+ rspec-support (3.7.0)
33
+ simplecov (0.15.1)
34
+ docile (~> 1.1.0)
35
+ json (>= 1.8, < 3)
36
+ simplecov-html (~> 0.10.0)
37
+ simplecov-html (0.10.2)
38
+ yard (0.9.12)
39
+
40
+ PLATFORMS
41
+ ruby
42
+
43
+ DEPENDENCIES
44
+ bundler (~> 1.16)
45
+ pry (~> 0.11)
46
+ rake (~> 12.3)
47
+ rake-compiler (~> 1.0)
48
+ rspec (~> 3.7)
49
+ ruby-sfst!
50
+ simplecov (~> 0.15)
51
+ yard (~> 0.9)
52
+
53
+ BUNDLED WITH
54
+ 1.16.1
data/README.md CHANGED
@@ -3,7 +3,7 @@
3
3
  `ruby-sfst` is a wrapper for the Stuttgart Finite State Transducer Tools
4
4
  (SFST).
5
5
 
6
- The wrapper is based on SFST 1.3. See
6
+ The wrapper is based on SFST 1.4.7c. See
7
7
  http://www.cis.uni-muenchen.de/~schmid/tools/SFST/ for details on how to obtain
8
8
  SFST and how to write SFST transducers.
9
9
 
data/Rakefile CHANGED
@@ -1,23 +1,14 @@
1
- # coding: utf-8
2
- require 'bundler'
3
- Bundler::GemHelper.install_tasks
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
4
3
 
5
- namespace :doc do
6
- require 'yard'
7
- YARD::Rake::YardocTask.new do |task|
8
- task.files = ['README.md', 'lib/**/*.rb']
9
- task.options = [
10
- '--output-dir', 'doc/yard',
11
- '--markup', 'markdown',
12
- ]
13
- end
14
- end
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ require "rake/extensiontask"
15
7
 
16
- require 'rake/testtask'
8
+ task :build => :compile
17
9
 
18
- Rake::TestTask.new do |t|
19
- t.libs << 'test'
10
+ Rake::ExtensionTask.new("sfst") do |ext|
11
+ ext.lib_dir = "lib/sfst"
20
12
  end
21
13
 
22
- desc "Run tests"
23
- task :default => :test
14
+ task :default => [:clobber, :compile, :spec]
@@ -0,0 +1,7 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "sfst"
5
+
6
+ require "pry"
7
+ Pry.start
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
@@ -0,0 +1,879 @@
1
+
2
+ /*******************************************************************/
3
+ /* */
4
+ /* FILE alphabet.C */
5
+ /* MODULE alphabet */
6
+ /* PROGRAM SFST */
7
+ /* AUTHOR Helmut Schmid, IMS, University of Stuttgart */
8
+ /* */
9
+ /* PURPOSE basic FST functions */
10
+ /* */
11
+ /*******************************************************************/
12
+
13
+ #include <climits>
14
+ #include <cstring>
15
+
16
+ #include "utf8.h"
17
+ #include "alphabet.h"
18
+
19
+ #include <map>
20
+ using std::map;
21
+
22
+ namespace SFST {
23
+
24
+ using std::vector;
25
+ using std::ostream;
26
+
27
+ const int BUFFER_SIZE=100000;
28
+
29
+ char EpsilonString[]="<>";
30
+
31
+
32
+
33
+ /*******************************************************************/
34
+ /* */
35
+ /* Alphabet::add */
36
+ /* */
37
+ /*******************************************************************/
38
+
39
+ void Alphabet::add( const char *symbol, Character c )
40
+
41
+ {
42
+ char *s = fst_strdup(symbol);
43
+ cm[c] = s;
44
+ sm[s] = c;
45
+ }
46
+
47
+
48
+ /*******************************************************************/
49
+ /* */
50
+ /* Alphabet::Alphabet */
51
+ /* */
52
+ /*******************************************************************/
53
+
54
+ Alphabet::Alphabet()
55
+
56
+ {
57
+ utf8 = false;
58
+ add(EpsilonString, Label::epsilon);
59
+ }
60
+
61
+
62
+ /*******************************************************************/
63
+ /* */
64
+ /* Alphabet::clear */
65
+ /* */
66
+ /*******************************************************************/
67
+
68
+ void Alphabet::clear()
69
+
70
+ {
71
+ char **s=new char*[cm.size()];
72
+ ls.clear();
73
+ sm.clear();
74
+
75
+ size_t i, n=0;
76
+ for( CharMap::iterator it=cm.begin(); it!=cm.end(); it++ )
77
+ s[n++] = it->second;
78
+ cm.clear();
79
+
80
+ for( i=0; i<n; i++ )
81
+ free(s[i]);
82
+ delete[] s;
83
+ }
84
+
85
+
86
+ /*******************************************************************/
87
+ /* */
88
+ /* Alphabet::print */
89
+ /* */
90
+ /*******************************************************************/
91
+
92
+ void Alphabet::print(void)
93
+
94
+ {
95
+ for( LabelSet::const_iterator it=begin(); it!=end(); it++ ) {
96
+ Label l = *it;
97
+ fprintf(stderr, "%s\n", write_label(l));
98
+ }
99
+ }
100
+
101
+
102
+ /*******************************************************************/
103
+ /* */
104
+ /* Alphabet::new_marker */
105
+ /* */
106
+ /*******************************************************************/
107
+
108
+ Character Alphabet::new_marker()
109
+
110
+ {
111
+ // find some unused character code
112
+ for(Character i=1; i!=0; i++)
113
+ if (cm.find(i) == cm.end()) {
114
+ // create a unique identifier string
115
+ char symbol[100];
116
+ sprintf(symbol,">%ld<",(long)i);
117
+ add(symbol, i);
118
+ return i;
119
+ }
120
+
121
+ throw "Error: too many symbols in transducer definition";
122
+ }
123
+
124
+
125
+ /*******************************************************************/
126
+ /* */
127
+ /* is_marker_symbol */
128
+ /* */
129
+ /*******************************************************************/
130
+
131
+ static bool is_marker_symbol( const char *s )
132
+
133
+ {
134
+ // recogize strings matching the expression ">[0-9]+<"
135
+ if (s != NULL && *s == '>') {
136
+ do { s++; } while (*s >= '0' && *s <= '9');
137
+ if (*s=='<' && *(s+1) == 0 && *(s-1) != '>')
138
+ return true;
139
+ }
140
+ return false;
141
+ }
142
+
143
+
144
+ /*******************************************************************/
145
+ /* */
146
+ /* Alphabet::delete_markers */
147
+ /* */
148
+ /*******************************************************************/
149
+
150
+ void Alphabet::delete_markers()
151
+
152
+ {
153
+ vector<char*> sym;
154
+ vector<Character> code;
155
+ vector<Label> label;
156
+
157
+ for( CharMap::const_iterator it=cm.begin(); it!=cm.end(); it++ ) {
158
+ Character c=it->first;
159
+ char *s=it->second;
160
+ if (!is_marker_symbol(s)) {
161
+ sym.push_back(fst_strdup(s));
162
+ code.push_back(c);
163
+ }
164
+ }
165
+
166
+ for( LabelSet::const_iterator it=begin(); it!=end(); it++ ) {
167
+ Label l=*it;
168
+ if (!is_marker_symbol(code2symbol(l.upper_char())) &&
169
+ !is_marker_symbol(code2symbol(l.lower_char())))
170
+ label.push_back(l);
171
+ }
172
+
173
+ clear();
174
+
175
+ for( size_t i=0; i<sym.size(); i++ ) {
176
+ add_symbol(sym[i], code[i]);
177
+ free(sym[i]);
178
+ }
179
+ for( size_t i=0; i<label.size(); i++ )
180
+ insert( label[i] );
181
+ }
182
+
183
+
184
+ /*******************************************************************/
185
+ /* */
186
+ /* Alphabet::add_symbol */
187
+ /* */
188
+ /*******************************************************************/
189
+
190
+ Character Alphabet::add_symbol(const char *symbol)
191
+
192
+ {
193
+ if (sm.find(symbol) != sm.end())
194
+ return sm[symbol];
195
+
196
+ // assign the symbol to some unused character
197
+ for(Character i=1; i!=0; i++)
198
+ if (cm.find(i) == cm.end()) {
199
+ add(symbol, i);
200
+ return i;
201
+ }
202
+
203
+ throw "Error: too many symbols in transducer definition";
204
+ }
205
+
206
+
207
+ /*******************************************************************/
208
+ /* */
209
+ /* Alphabet::add_symbol */
210
+ /* */
211
+ /*******************************************************************/
212
+
213
+ void Alphabet::add_symbol( const char *symbol, Character c )
214
+
215
+ {
216
+ // check whether the symbol was previously defined
217
+ int sc=symbol2code(symbol);
218
+ if (sc != EOF) {
219
+ if ((Character)sc == c)
220
+ return;
221
+
222
+ if (strlen(symbol) < 60) {
223
+ static char message[100];
224
+ sprintf(message, "Error: reinserting symbol '%s' in alphabet with incompatible character value %u %u", symbol, (unsigned)sc, (unsigned)c);
225
+ throw message;
226
+ }
227
+ else
228
+ throw "reinserting symbol in alphabet with incompatible character value";
229
+ }
230
+
231
+ // check whether the character is already in use
232
+ const char *s=code2symbol(c);
233
+ if (s == NULL)
234
+ add(symbol, c);
235
+ else {
236
+ if (strcmp(s, symbol) != 0) {
237
+ static char message[100];
238
+ if (strlen(symbol) < 70)
239
+ sprintf(message,"Error: defining symbol %s as character %d (previously defined as %s)", symbol, (unsigned)c, s);
240
+ else
241
+ sprintf(message,"Error: defining a (very long) symbol with previously used character");
242
+ throw message;
243
+ }
244
+ }
245
+ }
246
+
247
+
248
+ /*******************************************************************/
249
+ /* */
250
+ /* Alphabet::write_char */
251
+ /* */
252
+ /*******************************************************************/
253
+
254
+ void Alphabet::write_char( Character c, char *buffer, int *pos,
255
+ bool with_brackets) const
256
+ {
257
+ const char *s = code2symbol(c);
258
+
259
+ // quote colons
260
+ if (strcmp(s,":") == 0 || strcmp(s,"\\") == 0) {
261
+ buffer[(*pos)++] = '\\';
262
+ buffer[(*pos)++] = s[0];
263
+ }
264
+ else if (s) {
265
+ int i = 0;
266
+ int l=(int)strlen(s)-1;
267
+ if (!with_brackets && s[i] == '<' && s[l] == '>') { i++; l--; }
268
+ while (i <= l)
269
+ buffer[(*pos)++] = s[i++];
270
+ }
271
+ else {
272
+ unsigned int uc = c;
273
+ if (uc>=32 && uc<256)
274
+ buffer[(*pos)++] = (char)c;
275
+ else {
276
+ sprintf(buffer+(*pos),"\\%u", uc);
277
+ *pos += (int)strlen(buffer+(*pos));
278
+ }
279
+ }
280
+ buffer[*pos] = '\0';
281
+ }
282
+
283
+
284
+ /*******************************************************************/
285
+ /* */
286
+ /* Alphabet::write_char */
287
+ /* */
288
+ /*******************************************************************/
289
+
290
+ const char *Alphabet::write_char( Character c, bool with_brackets ) const
291
+
292
+ {
293
+ static char buffer[1000];
294
+ int n=0;
295
+
296
+ write_char( c, buffer, &n, with_brackets );
297
+ return buffer;
298
+ }
299
+
300
+
301
+ /*******************************************************************/
302
+ /* */
303
+ /* Alphabet::write_label */
304
+ /* */
305
+ /*******************************************************************/
306
+
307
+ void Alphabet::write_label( Label l, char *buffer, int *pos,
308
+ bool with_brackets ) const
309
+ {
310
+ Character lc=l.lower_char();
311
+ Character uc=l.upper_char();
312
+ write_char( lc, buffer, pos, with_brackets );
313
+ if (lc != uc) {
314
+ buffer[(*pos)++] = ':';
315
+ write_char( uc, buffer, pos, with_brackets );
316
+ }
317
+ }
318
+
319
+
320
+ /*******************************************************************/
321
+ /* */
322
+ /* Alphabet::write_label */
323
+ /* */
324
+ /*******************************************************************/
325
+
326
+ const char *Alphabet::write_label( Label l, bool with_brackets ) const
327
+
328
+ {
329
+ static char buffer[1000];
330
+ int n=0;
331
+ write_label( l, buffer, &n, with_brackets );
332
+ return buffer;
333
+ }
334
+
335
+
336
+ /*******************************************************************/
337
+ /* */
338
+ /* Alphabet::insert_symbols */
339
+ /* */
340
+ /*******************************************************************/
341
+
342
+ void Alphabet::insert_symbols( const Alphabet &a )
343
+
344
+ {
345
+ for( CharMap::const_iterator it=a.cm.begin(); it!=a.cm.end(); it++ )
346
+ add_symbol(it->second, it->first);
347
+ }
348
+
349
+
350
+ /*******************************************************************/
351
+ /* */
352
+ /* Alphabet::complement */
353
+ /* */
354
+ /*******************************************************************/
355
+
356
+ void Alphabet::complement( vector<Character> &sym )
357
+
358
+ {
359
+ vector<Character> result;
360
+ for( CharMap::const_iterator it=cm.begin(); it!=cm.end(); it++ ) {
361
+ Character c = it->first;
362
+ if (c != Label::epsilon) {
363
+ size_t i;
364
+ for( i=0; i<sym.size(); i++ )
365
+ if (sym[i] == c)
366
+ break;
367
+ if (i == sym.size())
368
+ result.push_back(c);
369
+ }
370
+ }
371
+ sym.swap(result);
372
+ }
373
+
374
+
375
+ /*******************************************************************/
376
+ /* */
377
+ /* Alphabet::copy */
378
+ /* */
379
+ /*******************************************************************/
380
+
381
+ void Alphabet::copy( const Alphabet &a, Level level )
382
+
383
+ {
384
+ utf8 = a.utf8;
385
+ sm.resize(a.sm.size());
386
+ cm.resize(a.sm.size());
387
+ insert_symbols( a );
388
+ for( LabelSet::const_iterator it=a.begin(); it!=a.end(); it++ ) {
389
+ Label l = *it;
390
+ if (level == lower)
391
+ ls.insert( Label(l.lower_char()) );
392
+ else if (level == upper)
393
+ ls.insert( Label(l.upper_char()) );
394
+ else
395
+ ls.insert( l );
396
+ }
397
+ }
398
+
399
+
400
+ /*******************************************************************/
401
+ /* */
402
+ /* Alphabet::compose */
403
+ /* */
404
+ /*******************************************************************/
405
+
406
+ void Alphabet::compose( const Alphabet &la, const Alphabet &ua )
407
+
408
+ {
409
+ // insert the symbols
410
+ insert_symbols(la);
411
+ insert_symbols(ua);
412
+ utf8 = la.utf8;
413
+
414
+ map<Character, set<Character> > cs;
415
+
416
+ // create a table for a quick lookup of the target characters
417
+ for( iterator it=ua.begin(); it!=ua.end(); it++ ) {
418
+ Character lc=it->lower_char();
419
+ if (lc == Label::epsilon)
420
+ insert(*it);
421
+ else
422
+ cs[lc].insert(it->upper_char());
423
+ }
424
+
425
+ for( iterator it=la.begin(); it!=la.end(); it++ ) {
426
+ Character uc=it->upper_char();
427
+ if (uc == Label::epsilon)
428
+ insert(*it);
429
+ else {
430
+ if (cs.find(uc) != cs.end()) {
431
+ set<Character> s=cs[uc];
432
+ Character lc=it->lower_char();
433
+ for( set<Character>::iterator it=s.begin(); it!=s.end(); it++)
434
+ insert(Label(lc, *it));
435
+ }
436
+ }
437
+ }
438
+ }
439
+
440
+
441
+ /*******************************************************************/
442
+ /* */
443
+ /* operator<<(Alphabet) */
444
+ /* */
445
+ /*******************************************************************/
446
+
447
+ ostream &operator<<( ostream &s, const Alphabet &a )
448
+
449
+ {
450
+ for( Alphabet::CharMap::const_iterator it=a.cm.begin(); it!=a.cm.end(); it++ )
451
+ s << it->first << " -> " << it->second << "\n";
452
+ for( Alphabet::iterator it=a.begin(); it!=a.end(); it++ )
453
+ s << a.write_label(*it) << " ";
454
+ s << "\n";
455
+ return s;
456
+ }
457
+
458
+
459
+ /*******************************************************************/
460
+ /* */
461
+ /* Alphabet::next_mcsym */
462
+ /* */
463
+ /* recognizes multi-character symbols which are enclosed with */
464
+ /* angle brackets <...>. If the argument flag insert is false, */
465
+ /* the multi-character symbol must already be in the lexicon in */
466
+ /* order to be recognized. */
467
+ /* */
468
+ /*******************************************************************/
469
+
470
+ int Alphabet::next_mcsym( char* &string, bool insert )
471
+
472
+ {
473
+ char *start=string;
474
+
475
+ if (*start == '<')
476
+ // symbol might start here
477
+ for( char *end=start+1; *end; end++ )
478
+ if (*end == '>') {
479
+ // matching pair of angle brackets found
480
+ // mark the end of the substring with \0
481
+ char lastc = *(++end);
482
+ *end = 0;
483
+
484
+ int c;
485
+ if (insert)
486
+ c = add_symbol( start );
487
+ else
488
+ c = symbol2code(start);
489
+ // restore the original string
490
+ *end = lastc;
491
+
492
+ if (c != EOF) {
493
+ // symbol found
494
+ // return its code
495
+ string = end;
496
+ return (Character)c;
497
+ }
498
+ else
499
+ // not a complex character
500
+ break;
501
+ }
502
+ return EOF;
503
+ }
504
+
505
+
506
+ /*******************************************************************/
507
+ /* */
508
+ /* Alphabet::next_code */
509
+ /* */
510
+ /*******************************************************************/
511
+
512
+ int Alphabet::next_code( char* &string, bool extended, bool insert )
513
+
514
+ {
515
+ if (*string == 0)
516
+ return EOF; // finished
517
+
518
+ int c = next_mcsym(string, insert);
519
+ if (c != EOF)
520
+ return c;
521
+
522
+ if (extended && *string == '\\')
523
+ string++; // remove quotation
524
+
525
+ if (utf8) {
526
+ unsigned int c = utf8toint( &string );
527
+ if (c == 0) {
528
+ fprintf(stderr, "Error in UTF-8 encoding at: <%s>\n", string);
529
+ return EOF; // error encountered in utf8 character
530
+ }
531
+ return (int)add_symbol(int2utf8(c));
532
+ }
533
+ else {
534
+ char buffer[2];
535
+ buffer[0] = *string;
536
+ buffer[1] = 0;
537
+ string++;
538
+ return (int)add_symbol(buffer);
539
+ }
540
+ }
541
+
542
+
543
+ /*******************************************************************/
544
+ /* */
545
+ /* Alphabet::next_label */
546
+ /* */
547
+ /*******************************************************************/
548
+
549
+ Label Alphabet::next_label( char* &string, bool extended )
550
+
551
+ {
552
+ // read first character
553
+ int c = next_code( string, extended );
554
+ if (c == EOF)
555
+ return Label(); // end of string reached
556
+
557
+ Character lc=(Character)c;
558
+ if (!extended || *string != ':') { // single character?
559
+ if (lc == Label::epsilon)
560
+ return next_label(string, extended); // ignore epsilon
561
+ return Label(lc);
562
+ }
563
+
564
+ // read second character
565
+ string++; // jump over ':'
566
+ c = next_code( string, extended );
567
+ if (c == EOF) {
568
+ static char buffer[1000];
569
+ sprintf(buffer,"Error: incomplete symbol in input file: %s", string);
570
+ throw buffer;
571
+ }
572
+
573
+ Label l(lc, (Character)c);
574
+ if (l.is_epsilon())
575
+ return next_label(string, extended); // ignore epsilon transitions
576
+ return l;
577
+ }
578
+
579
+
580
+ /*******************************************************************/
581
+ /* */
582
+ /* Alphabet::string2symseq */
583
+ /* */
584
+ /*******************************************************************/
585
+
586
+ void Alphabet::string2symseq( char *s, vector<Character> &ch )
587
+
588
+ {
589
+ int c;
590
+ while ((c = next_code(s, false, false)) != EOF)
591
+ ch.push_back((Character)c);
592
+ }
593
+
594
+
595
+ /*******************************************************************/
596
+ /* */
597
+ /* Alphabet::string2labelseq */
598
+ /* */
599
+ /*******************************************************************/
600
+
601
+ void Alphabet::string2labelseq( char *s, vector<Label> &labels )
602
+
603
+ {
604
+ Label l;
605
+ while ((l = next_label(s)) != Label::epsilon)
606
+ labels.push_back(l);
607
+ }
608
+
609
+
610
+ /*******************************************************************/
611
+ /* */
612
+ /* Alphabet::store */
613
+ /* */
614
+ /*******************************************************************/
615
+
616
+ void Alphabet::store( FILE *file ) const
617
+
618
+ {
619
+ char c=(utf8)? (char)1: (char)0;
620
+ fputc(c, file);
621
+
622
+ // write the symbol mapping
623
+ Character n=(Character)cm.size();
624
+ fwrite(&n, sizeof(n), 1, file);
625
+ for( CharMap::const_iterator it=cm.begin(); it!=cm.end(); it++ ) {
626
+ Character c=it->first;
627
+ char *s=it->second;
628
+ fwrite(&c, sizeof(c), 1, file);
629
+ fwrite(s, sizeof(char), strlen(s)+1, file);
630
+ }
631
+
632
+ // write the character pairs
633
+ n = (Character)size();
634
+ fwrite(&n, sizeof(n), 1, file);
635
+ for( LabelSet::const_iterator p=ls.begin(); p!=ls.end(); p++ ) {
636
+ Character c=p->lower_char();
637
+ fwrite(&c, sizeof(c), 1, file);
638
+ c = p->upper_char();
639
+ fwrite(&c, sizeof(c), 1, file);
640
+ }
641
+
642
+ if (ferror(file))
643
+ throw "Error encountered while writing alphabet to file\n";
644
+ }
645
+
646
+
647
+ /*******************************************************************/
648
+ /* */
649
+ /* Alphabet::read */
650
+ /* */
651
+ /*******************************************************************/
652
+
653
+ void Alphabet::read( FILE *file )
654
+
655
+ {
656
+ utf8 = (fgetc(file) != 0);
657
+
658
+ // read the symbol mapping
659
+ Character n=0;
660
+ read_num(&n, sizeof(n), file);
661
+ for( unsigned i=0; i<n; i++) {
662
+ char buffer[BUFFER_SIZE];
663
+ Character c;
664
+ read_num(&c, sizeof(c), file);
665
+ if (!read_string(buffer, BUFFER_SIZE, file) ||
666
+ feof(file) || ferror(file))
667
+ throw "Error1 occurred while reading alphabet!\n";
668
+ add_symbol(buffer, c);
669
+ }
670
+
671
+ // read the character pairs
672
+ read_num(&n, sizeof(n), file);
673
+ if (ferror(file))
674
+ throw "Error2 occurred while reading alphabet!\n";
675
+ for( unsigned i=0; i<n; i++) {
676
+ Character lc, uc;
677
+ read_num(&lc, sizeof(lc), file);
678
+ read_num(&uc, sizeof(uc), file);
679
+ insert(Label(lc, uc));
680
+ }
681
+ if (ferror(file))
682
+ throw "Error3 occurred while reading alphabet!\n";
683
+ }
684
+
685
+
686
+ /*******************************************************************/
687
+ /* */
688
+ /* Alphabet::compute_score */
689
+ /* */
690
+ /*******************************************************************/
691
+
692
+ int Alphabet::compute_score( Analysis &ana )
693
+
694
+ {
695
+ // check whether the morpheme boundaries are explicitly marked
696
+ // with <X> tags
697
+ int score=0;
698
+ for( size_t i=0; i<ana.size(); i++ ) {
699
+
700
+ // get next symbol
701
+ const char *sym=write_char(ana[i].lower_char());
702
+
703
+ if (strcmp(sym,"<X>") == 0)
704
+ score--;
705
+ }
706
+ if (score < 0)
707
+ return score;
708
+
709
+ // No explicit morpheme boundary markers have been found.
710
+ // Count the number of part-of-speech and PREF tags.
711
+ for( size_t i=0; i<ana.size(); i++ ) {
712
+
713
+ // get next symbol
714
+ const char *sym=write_char(ana[i].lower_char());
715
+
716
+ // Is it not a multi-character symbol
717
+ if (sym[0] != '<' || sym[1] == 0)
718
+ continue;
719
+
720
+ // Is it a POS tag starting with "+" like <+NN>?
721
+ if (sym[1] == '+') {
722
+ const char *t=sym+2;
723
+ for( ; *t >= 'A' && *t <= 'Z'; t++) ;
724
+ if (t > sym+2 && *t == '>')
725
+ return score;
726
+ }
727
+
728
+ // Is it a potential POS tag (i.e. all uppercase)?
729
+ const char *t = sym+1;
730
+ for( ; *t >= 'A' && *t <= 'Z'; t++) ;
731
+ if (t == sym+1 || *t != '>')
732
+ continue;
733
+
734
+ // uppercase symbol found
735
+ if (strcmp(sym,"<SUFF>") == 0 ||
736
+ strcmp(sym,"<OLDORTH>") == 0 ||
737
+ strcmp(sym,"<NEWORTH>") == 0)
738
+ continue; // not what we are looking for
739
+
740
+ // disprefer nouns with prefixes
741
+ if (strcmp(sym,"<PREF>") == 0)
742
+ score-=2;
743
+
744
+ if (strcmp(sym,"<V>") == 0 || strcmp(sym,"<ADJ>") == 0) {
745
+ bool is_verb=(strcmp(sym,"<V>")==0);
746
+ // get the next non-empty symbol
747
+ Character c=Label::epsilon;
748
+ size_t k;
749
+ for( k=i+1; k<ana.size(); k++ )
750
+ if ((c = ana[k].lower_char()) != Label::epsilon)
751
+ break;
752
+ // Is it a participle
753
+ if (c != Label::epsilon) {
754
+ sym = write_char(c);
755
+ if (strcmp(sym,"<OLDORTH>") == 0 ||
756
+ strcmp(sym,"<NEWORTH>") == 0 ||
757
+ strcmp(sym,"<SUFF>") == 0) {
758
+ for( k++; k<ana.size(); k++ )
759
+ if ((c = ana[k].lower_char()) != Label::epsilon)
760
+ break;
761
+ if (c != Label::epsilon)
762
+ sym = write_char(c);
763
+ }
764
+ if (is_verb &&
765
+ (strcmp(sym,"<PPres>") == 0 || strcmp(sym,"<PPast>") == 0))
766
+ continue; // don't consider participles as complex
767
+ if (!is_verb &&
768
+ (strcmp(sym,"<Sup>") == 0 || strcmp(sym,"<Comp>") == 0))
769
+ continue;
770
+ }
771
+ }
772
+ score--;
773
+ }
774
+ return score;
775
+ }
776
+
777
+
778
+
779
+ /*******************************************************************/
780
+ /* */
781
+ /* Alphabet::disambiguate */
782
+ /* */
783
+ /*******************************************************************/
784
+
785
+ void Alphabet::disambiguate( vector<Analysis> &analyses )
786
+
787
+ {
788
+ // compute the scores
789
+ int bestscore=INT_MIN;
790
+ vector<int> score;
791
+
792
+ for( size_t i=0; i<analyses.size(); i++ ) {
793
+ score.push_back(compute_score(analyses[i]));
794
+ if (bestscore < score[i])
795
+ bestscore = score[i];
796
+ }
797
+
798
+ // delete suboptimal analyses
799
+ size_t k=0;
800
+ for( size_t i=0; i<analyses.size(); i++ )
801
+ if (score[i] == bestscore)
802
+ analyses[k++] = analyses[i];
803
+ analyses.resize(k);
804
+ }
805
+
806
+
807
+
808
+ /*******************************************************************/
809
+ /* */
810
+ /* Alphabet::print_analysis */
811
+ /* */
812
+ /*******************************************************************/
813
+
814
+ char *Alphabet::print_analysis( Analysis &ana, bool both_layers )
815
+
816
+ {
817
+ vector<char> ch;
818
+
819
+ // for each transition
820
+ for( size_t i=0; i<ana.size(); i++ ) {
821
+
822
+ // get the transition label
823
+ Label l=ana[i];
824
+ const char *s;
825
+
826
+ // either print the analysis symbol or the whole label
827
+ if (both_layers)
828
+ s = write_label(l);
829
+ else if (l.lower_char() != Label::epsilon)
830
+ s = write_char(l.lower_char());
831
+ else
832
+ continue;
833
+
834
+ // copy the characters to the character array
835
+ while (*s)
836
+ ch.push_back(*(s++));
837
+ }
838
+ ch.push_back(0); // terminate the string
839
+
840
+ static char *result=NULL;
841
+ if (result != NULL)
842
+ delete[] result;
843
+ result = new char[ch.size()];
844
+ for( size_t i=0; i<ch.size(); i++ )
845
+ result[i] = ch[i];
846
+
847
+ return result;
848
+ }
849
+
850
+
851
+ /*******************************************************************/
852
+ /* */
853
+ /* Alphabet::operator== */
854
+ /* */
855
+ /*******************************************************************/
856
+
857
+ bool Alphabet::operator==(const Alphabet &alpha) const
858
+
859
+ {
860
+ for ( SymbolMap::const_iterator it = this->sm.begin(); it != this->sm.end(); it++ )
861
+ {
862
+ SymbolMap::const_iterator alpha_it = alpha.sm.find(it->first);
863
+ if ( alpha_it == alpha.sm.end() )
864
+ return false;
865
+ if ( alpha_it->second == it->second )
866
+ return false;
867
+ }
868
+ for ( SymbolMap::const_iterator alpha_it = alpha.sm.begin(); alpha_it != alpha.sm.end(); alpha_it++ )
869
+ {
870
+ SymbolMap::const_iterator it = this->sm.find(alpha_it->first);
871
+ if ( it == this->sm.end() )
872
+ return false;
873
+ if ( it->second == alpha_it->second )
874
+ return false;
875
+ }
876
+ return true;
877
+ }
878
+
879
+ }