RubyGems - ruby-sfst - Versions diffs - 0.4.3 → 0.4.4 - Mend

ruby-sfst 0.4.3 → 0.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (51) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +1 -0
data/COPYING +280 -0
data/Gemfile +3 -0
data/Gemfile.lock +54 -0
data/README.md +1 -1
data/Rakefile +9 -18
data/bin/console +7 -0
data/bin/setup +6 -0
data/ext/sfst/alphabet.cc +879 -0
data/ext/sfst/alphabet.h +302 -0
data/ext/sfst/basic.cc +85 -0
data/ext/{sfst_machine → sfst}/basic.h +7 -4
data/ext/sfst/compact.cc +629 -0
data/ext/sfst/compact.h +100 -0
data/ext/sfst/determinise.cc +279 -0
data/ext/{sfst_machine → sfst}/extconf.rb +2 -1
data/ext/sfst/fst.cc +1150 -0
data/ext/sfst/fst.h +374 -0
data/ext/sfst/hopcroft.cc +681 -0
data/ext/sfst/interface.cc +1921 -0
data/ext/sfst/interface.h +171 -0
data/ext/sfst/make-compact.cc +323 -0
data/ext/{sfst_machine → sfst}/make-compact.h +15 -13
data/ext/sfst/mem.h +80 -0
data/ext/sfst/operators.cc +1273 -0
data/ext/{sfst_machine → sfst}/sfst_machine.cc +89 -78
data/ext/sfst/sgi.h +72 -0
data/ext/sfst/utf8.cc +149 -0
data/ext/{sfst_machine → sfst}/utf8.h +7 -4
data/lib/sfst.rb +2 -1
data/lib/sfst/version.rb +1 -1
data/ruby-sfst.gemspec +23 -23
metadata +107 -35
data/ext/sfst_machine/alphabet.cc +0 -812
data/ext/sfst_machine/alphabet.h +0 -273
data/ext/sfst_machine/basic.cc +0 -84
data/ext/sfst_machine/compact.cc +0 -616
data/ext/sfst_machine/compact.h +0 -98
data/ext/sfst_machine/determinise.cc +0 -303
data/ext/sfst_machine/fst.cc +0 -1000
data/ext/sfst_machine/fst.h +0 -369
data/ext/sfst_machine/interface.cc +0 -1842
data/ext/sfst_machine/interface.h +0 -93
data/ext/sfst_machine/make-compact.cc +0 -327
data/ext/sfst_machine/mem.h +0 -74
data/ext/sfst_machine/operators.cc +0 -1131
data/ext/sfst_machine/sgi.h +0 -44
data/ext/sfst_machine/utf8.cc +0 -146
data/test/test_sfst.fst +0 -3
data/test/test_sfst.rb +0 -114

data/ext/{sfst_machine → sfst}/utf8.h RENAMED

@@ -12,8 +12,11 @@
 #ifndef _UTF8_H_
 #define _UTF8_H_
-unsigned int utf8toint( char *s );
-unsigned int utf8toint( char **s );
-char *int2utf8( unsigned int );
+namespace SFST {
+  unsigned int utf8toint( char *s );
+  unsigned int utf8toint( char **s );
+  char *int2utf8( unsigned int );
+}
 #endif

data/lib/sfst.rb CHANGED

@@ -4,7 +4,8 @@
 #
 # Written by Marius L. Jøhndal, 2008.
 #
-require 'sfst_machine'
+require 'sfst/version'
+require 'sfst/sfst'
 module SFST
   # Compiles an SFST transducer +source+ and saves it as +machine+.

data/lib/sfst/version.rb CHANGED

@@ -1,3 +1,3 @@
 module SFST
-  VERSION = "0.4.3" unless defined?(SFST::VERSION)
+  VERSION = "0.4.4" unless defined?(SFST::VERSION)
 end

data/ruby-sfst.gemspec CHANGED

@@ -3,28 +3,28 @@ lib = File.expand_path('../lib', __FILE__)
 $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
 require 'sfst/version'
-Gem::Specification.new do |s|
-  s.authors = ["Marius L. Jøhndal"]
-  s.description = %q{A wrapper for the Stuttgart Finite State Transducer Tools (SFST).}
-  s.summary = %q{Stuttgart Finite State Transducer Tools interface}
-  s.email = ['mariuslj (at) ifi [dot] uio (dot) no']
-  s.files = %w(CHANGELOG.md README.md Rakefile ruby-sfst.gemspec)
-  s.files += Dir.glob("ext/**/*.C")
-  s.files += Dir.glob("ext/**/*.h")
-  s.files += Dir.glob("ext/**/*.rb")
-  s.files += Dir.glob("ext/**/*.cc")
-  s.files += Dir.glob("lib/**/*.rb")
-  s.files += Dir.glob("test/*.fst")
-  s.files += Dir.glob("test/*.rb")
-  s.homepage = "http://github.com/mlj/ruby-sfst"
-  s.licenses = ['GPL2']
-  s.name = "ruby-sfst"
-  s.require_paths = ["lib"]
-  s.required_rubygems_version = '>= 1.3.5'
-  s.extensions = ["ext/sfst_machine/extconf.rb"]
-  s.test_files += Dir.glob("test/*.rb")
-  s.version = SFST::VERSION
+Gem::Specification.new do |spec|
+  spec.name          = "ruby-sfst"
+  spec.version       = SFST::VERSION
+  spec.authors       = ["Marius L. Jøhndal"]
+  spec.email         = ["mariuslj@ifi.uio.no"]
+  spec.license       = 'GPL2'
-  s.add_development_dependency 'bundler', '~> 1.0'
-  s.add_development_dependency 'test-unit', '~> 3.0'
+  spec.summary       = %q{Stuttgart Finite State Transducer Tools interface}
+  spec.description   = %q{A wrapper for the Stuttgart Finite State Transducer Tools (SFST).}
+  spec.homepage      = "http://github.com/mlj/ruby-sfst"
+  spec.files         = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^spec/}) } - %w(.gitignore .rspec .travis.yml)
+  spec.require_paths = ["lib"]
+  spec.extensions    = ["ext/sfst/extconf.rb"]
+  spec.required_ruby_version = '>= 1.9'
+  spec.add_development_dependency 'bundler', '~> 1.16'
+  spec.add_development_dependency 'rake', '~> 12.3'
+  spec.add_development_dependency 'rake-compiler', '~> 1.0'
+  spec.add_development_dependency 'rspec', '~> 3.7'
+  spec.add_development_dependency 'pry', '~> 0.11'
+  spec.add_development_dependency 'simplecov', '~> 0.15'
+  spec.add_development_dependency 'yard', '~> 0.9'
 end

metadata CHANGED

@@ -1,17 +1,45 @@
 --- !ruby/object:Gem::Specification
 name: ruby-sfst
 version: !ruby/object:Gem::Version
-  version: 0.4.3
+  version: 0.4.4
 platform: ruby
 authors:
 - Marius L. Jøhndal
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2015-05-18 00:00:00.000000000 Z
+date: 2017-12-23 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: bundler
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '1.16'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '1.16'
+- !ruby/object:Gem::Dependency
+  name: rake
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '12.3'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '12.3'
+- !ruby/object:Gem::Dependency
+  name: rake-compiler
   requirement: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
@@ -25,55 +53,101 @@ dependencies:
       - !ruby/object:Gem::Version
         version: '1.0'
 - !ruby/object:Gem::Dependency
-  name: test-unit
+  name: rspec
   requirement: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '3.0'
+        version: '3.7'
   type: :development
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '3.0'
+        version: '3.7'
+- !ruby/object:Gem::Dependency
+  name: pry
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '0.11'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '0.11'
+- !ruby/object:Gem::Dependency
+  name: simplecov
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '0.15'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '0.15'
+- !ruby/object:Gem::Dependency
+  name: yard
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '0.9'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '0.9'
 description: A wrapper for the Stuttgart Finite State Transducer Tools (SFST).
 email:
-- mariuslj (at) ifi [dot] uio (dot) no
+- mariuslj@ifi.uio.no
 executables: []
 extensions:
-- ext/sfst_machine/extconf.rb
+- ext/sfst/extconf.rb
 extra_rdoc_files: []
 files:
 - CHANGELOG.md
+- COPYING
+- Gemfile
+- Gemfile.lock
 - README.md
 - Rakefile
-- ext/sfst_machine/alphabet.cc
-- ext/sfst_machine/alphabet.h
-- ext/sfst_machine/basic.cc
-- ext/sfst_machine/basic.h
-- ext/sfst_machine/compact.cc
-- ext/sfst_machine/compact.h
-- ext/sfst_machine/determinise.cc
-- ext/sfst_machine/extconf.rb
-- ext/sfst_machine/fst.cc
-- ext/sfst_machine/fst.h
-- ext/sfst_machine/interface.cc
-- ext/sfst_machine/interface.h
-- ext/sfst_machine/make-compact.cc
-- ext/sfst_machine/make-compact.h
-- ext/sfst_machine/mem.h
-- ext/sfst_machine/operators.cc
-- ext/sfst_machine/sfst_machine.cc
-- ext/sfst_machine/sgi.h
-- ext/sfst_machine/utf8.cc
-- ext/sfst_machine/utf8.h
+- bin/console
+- bin/setup
+- ext/sfst/alphabet.cc
+- ext/sfst/alphabet.h
+- ext/sfst/basic.cc
+- ext/sfst/basic.h
+- ext/sfst/compact.cc
+- ext/sfst/compact.h
+- ext/sfst/determinise.cc
+- ext/sfst/extconf.rb
+- ext/sfst/fst.cc
+- ext/sfst/fst.h
+- ext/sfst/hopcroft.cc
+- ext/sfst/interface.cc
+- ext/sfst/interface.h
+- ext/sfst/make-compact.cc
+- ext/sfst/make-compact.h
+- ext/sfst/mem.h
+- ext/sfst/operators.cc
+- ext/sfst/sfst_machine.cc
+- ext/sfst/sgi.h
+- ext/sfst/utf8.cc
+- ext/sfst/utf8.h
 - lib/sfst.rb
 - lib/sfst/version.rb
 - ruby-sfst.gemspec
-- test/test_sfst.fst
-- test/test_sfst.rb
 homepage: http://github.com/mlj/ruby-sfst
 licenses:
 - GPL2
@@ -86,18 +160,16 @@ required_ruby_version: !ruby/object:Gem::Requirement
   requirements:
   - - ">="
     - !ruby/object:Gem::Version
-      version: '0'
+      version: '1.9'
 required_rubygems_version: !ruby/object:Gem::Requirement
   requirements:
   - - ">="
     - !ruby/object:Gem::Version
-      version: 1.3.5
+      version: '0'
 requirements: []
 rubyforge_project:
-rubygems_version: 2.4.5
+rubygems_version: 2.6.14
 signing_key:
 specification_version: 4
 summary: Stuttgart Finite State Transducer Tools interface
-test_files:
-- test/test_sfst.rb
-has_rdoc:
+test_files: []

data/ext/sfst_machine/alphabet.cc DELETED

@@ -1,812 +0,0 @@
-/*******************************************************************/
-/*                                                                 */
-/*  FILE     alphabet.C                                            */
-/*  MODULE   alphabet                                              */
-/*  PROGRAM  SFST                                                  */
-/*  AUTHOR   Helmut Schmid, IMS, University of Stuttgart           */
-/*                                                                 */
-/*  PURPOSE  basic FST functions                                   */
-/*                                                                 */
-/*******************************************************************/
-#include <climits>
-#include <cstring>
-#include "utf8.h"
-#include "alphabet.h"
-using std::vector;
-using std::ostream;
-const int BUFFER_SIZE=100000;
-char EpsilonString[]="<>";
-/*******************************************************************/
-/*                                                                 */
-/*  Alphabet::add                                                  */
-/*                                                                 */
-/*******************************************************************/
-void Alphabet::add( const char *symbol, Character c )
-{
-  char *s = fst_strdup(symbol);
-  cm[c] = s;
-  sm[s] = c;
-}
-/*******************************************************************/
-/*                                                                 */
-/*  Alphabet::Alphabet                                             */
-/*                                                                 */
-/*******************************************************************/
-Alphabet::Alphabet()
-{
-  utf8 = false;
-  add(EpsilonString, Label::epsilon);
-}
-/*******************************************************************/
-/*                                                                 */
-/*  Alphabet::clear                                                */
-/*                                                                 */
-/*******************************************************************/
-void Alphabet::clear()
-{
-  char **s=new char*[cm.size()];
-  ls.clear();
-  sm.clear();
-  size_t i, n=0;
-  for( CharMap::iterator it=cm.begin(); it!=cm.end(); it++ )
-    s[n++] = it->second;
-  cm.clear();
-  for( i=0; i<n; i++ )
-    free(s[i]);
-  delete[] s;
-}
-/*******************************************************************/
-/*                                                                 */
-/*  Alphabet::new_marker                                           */
-/*                                                                 */
-/*******************************************************************/
-Character Alphabet::new_marker()
-{
-  // find some unused character code
-  for(Character i=1; i!=0; i++)
-    if (cm.find(i) == cm.end()) {
-      // create a unique identifier string
-      char symbol[100];
-      sprintf(symbol,">%ld<",(long)i);
-      add(symbol, i);
-      return i;
-    }
-  throw "Error: too many symbols in transducer definition";
-}
-/*******************************************************************/
-/*                                                                 */
-/*  is_marker_symbol                                               */
-/*                                                                 */
-/*******************************************************************/
-static bool is_marker_symbol( const char *s )
-{
-  // recogize strings matching the expression ">[0-9]+<"
-  if (s != NULL && *s == '>') {
-    do { s++; } while (*s >= '0' && *s <= '9');
-    if (*s=='<' && *(s+1) == 0 && *(s-1) != '>')
-      return true;
-  }
-  return false;
-}
-/*******************************************************************/
-/*                                                                 */
-/*  Alphabet::delete_markers                                       */
-/*                                                                 */
-/*******************************************************************/
-void Alphabet::delete_markers()
-{
-  vector<char*> sym;
-  vector<Character> code;
-  vector<Label> label;
-  for( CharMap::const_iterator it=cm.begin(); it!=cm.end(); it++ ) {
-    Character c=it->first;
-    char *s=it->second;
-    if (!is_marker_symbol(s)) {
-      sym.push_back(fst_strdup(s));
-      code.push_back(c);
-    }
-  }
-  for( LabelSet::const_iterator it=begin(); it!=end(); it++ ) {
-    Label l=*it;
-    if (!is_marker_symbol(code2symbol(l.upper_char())) &&
-	!is_marker_symbol(code2symbol(l.lower_char())))
-      label.push_back(l);
-  }
-  clear();
-  for( size_t i=0; i<sym.size(); i++ ) {
-    add_symbol(sym[i], code[i]);
-    free(sym[i]);
-  }
-  for( size_t i=0; i<label.size(); i++ )
-    insert( label[i] );
-}
-/*******************************************************************/
-/*                                                                 */
-/*  Alphabet::add_symbol                                           */
-/*                                                                 */
-/*******************************************************************/
-Character Alphabet::add_symbol(const char *symbol)
-{
-  if (sm.find(symbol) != sm.end())
-    return sm[symbol];
-  // assign the symbol to some unused character
-  for(Character i=1; i!=0; i++)
-    if (cm.find(i) == cm.end()) {
-      add(symbol, i);
-      return i;
-    }
-  throw "Error: too many symbols in transducer definition";
-}
-/*******************************************************************/
-/*                                                                 */
-/*  Alphabet::add_symbol                                           */
-/*                                                                 */
-/*******************************************************************/
-void Alphabet::add_symbol( const char *symbol, Character c )
-{
-  // check whether the symbol was previously defined
-  int sc=symbol2code(symbol);
-  if (sc != EOF) {
-    if ((Character)sc == c)
-      return;
-    if (strlen(symbol) < 60) {
-      static char message[100];
-      sprintf(message, "Error: reinserting symbol '%s' in alphabet with incompatible character value %u %u", symbol, (unsigned)sc, (unsigned)c);
-      throw message;
-    }
-    else
-      throw "reinserting symbol in alphabet with incompatible character value";
-  }
-  // check whether the character is already in use
-  const char *s=code2symbol(c);
-  if (s == NULL)
-    add(symbol, c);
-  else {
-    if (strcmp(s, symbol) != 0) {
-      static char message[100];
-      if (strlen(symbol) < 70)
-	sprintf(message,"Error: defining symbol %s as character %d (previously defined as %s)", symbol, (unsigned)c, s);
-      else
-	sprintf(message,"Error: defining a (very long) symbol with previously used character");
-      throw message;
-    }
-  }
-}
-/*******************************************************************/
-/*                                                                 */
-/*  Alphabet::write_char                                           */
-/*                                                                 */
-/*******************************************************************/
-void Alphabet::write_char( Character c, char *buffer, int *pos,
-			   bool with_brackets) const
-{
-  const char *s = code2symbol(c);
-  if (s) {
-    int i = 0;
-    int l=strlen(s)-1;
-    if (!with_brackets && s[i] == '<' && s[l] == '>') { i++; l--; }
-    while (i <= l)
-      buffer[(*pos)++] = s[i++];
-  }
-  else {
-    unsigned int uc = c;
-    if (uc>=32 && uc<256)
-      buffer[(*pos)++] = (char)c;
-    else {
-      sprintf(buffer+(*pos),"\\%u", uc);
-      *pos += strlen(buffer+(*pos));
-    }
-  }
-  buffer[*pos] = '\0';
-}
-/*******************************************************************/
-/*                                                                 */
-/*  Alphabet::write_char                                           */
-/*                                                                 */
-/*******************************************************************/
-const char *Alphabet::write_char( Character c, bool with_brackets ) const
-{
-  static char buffer[1000];
-  int n=0;
-  write_char( c, buffer, &n, with_brackets );
-  return buffer;
-}
-/*******************************************************************/
-/*                                                                 */
-/*  Alphabet::write_label                                          */
-/*                                                                 */
-/*******************************************************************/
-void Alphabet::write_label( Label l, char *buffer, int *pos,
-			    bool with_brackets ) const
-{
-  Character lc=l.lower_char();
-  Character uc=l.upper_char();
-  write_char( lc, buffer, pos, with_brackets );
-  if (lc != uc) {
-    buffer[(*pos)++] = ':';
-    write_char( uc, buffer, pos, with_brackets );
-  }
-}
-/*******************************************************************/
-/*                                                                 */
-/*  Alphabet::write_label                                          */
-/*                                                                 */
-/*******************************************************************/
-const char *Alphabet::write_label( Label l, bool with_brackets  ) const
-{
-  static char buffer[1000];
-  int n=0;
-  write_label( l, buffer, &n, with_brackets );
-  return buffer;
-}
-/*******************************************************************/
-/*                                                                 */
-/*  Alphabet::insert_symbols                                       */
-/*                                                                 */
-/*******************************************************************/
-void Alphabet::insert_symbols( const Alphabet &a )
-{
-  for( CharMap::const_iterator it=a.cm.begin(); it!=a.cm.end(); it++ )
-    add_symbol(it->second, it->first);
-}
-/*******************************************************************/
-/*                                                                 */
-/*  Alphabet::complement                                           */
-/*                                                                 */
-/*******************************************************************/
-void Alphabet::complement( vector<Character> &sym )
-{
-  vector<Character> result;
-  for( CharMap::const_iterator it=cm.begin(); it!=cm.end(); it++ ) {
-    Character c = it->first;
-    if (c != Label::epsilon) {
-      size_t i;
-      for( i=0; i<sym.size(); i++ )
-	if (sym[i] == c)
-	  break;
-      if (i == sym.size())
-	result.push_back(c);
-    }
-  }
-  sym.swap(result);
-}
-/*******************************************************************/
-/*                                                                 */
-/*  Alphabet::copy                                                 */
-/*                                                                 */
-/*******************************************************************/
-void Alphabet::copy( const Alphabet &a )
-{
-  insert_symbols( a );
-  utf8 = a.utf8;
-  for( LabelSet::const_iterator it=a.begin(); it!=a.end(); it++ )
-    ls.insert( *it );
-}
-/*******************************************************************/
-/*                                                                 */
-/*  Alphabet::compose                                              */
-/*                                                                 */
-/*******************************************************************/
-void Alphabet::compose( const Alphabet &la, const Alphabet &ua )
-{
-  // insert the symbols
-  insert_symbols(la);
-  insert_symbols(ua);
-  utf8 = la.utf8;
-  hash_map<Character, hash_set<Character> > cs;
-  // create a hash table for a quick lookup of the target characters
-  for( iterator it=ua.begin(); it!=ua.end(); it++ ) {
-    Character lc=it->lower_char();
-    if (lc == Label::epsilon)
-      insert(*it);
-    else
-      cs[lc].insert(it->upper_char());
-  }
-  for( iterator it=la.begin(); it!=la.end(); it++ ) {
-    Character uc=it->upper_char();
-    if (uc == Label::epsilon)
-      insert(*it);
-    else {
-      if (cs.find(uc) != cs.end()) {
-	hash_set<Character> s=cs[uc];
-	Character lc=it->lower_char();
-	for( hash_set<Character>::iterator it=s.begin(); it!=s.end(); it++)
-	  insert(Label(lc, *it));
-      }
-    }
-  }
-}
-/*******************************************************************/
-/*                                                                 */
-/*  operator<<(Alphabet)                                           */
-/*                                                                 */
-/*******************************************************************/
-ostream &operator<<( ostream &s, const Alphabet &a )
-{
-  for( Alphabet::CharMap::const_iterator it=a.cm.begin(); it!=a.cm.end(); it++ )
-    s << it->first << " -> " << it->second << "\n";
-  for( Alphabet::iterator it=a.begin(); it!=a.end(); it++ )
-    s << a.write_label(*it) << " ";
-  s << "\n";
-  return s;
-}
-/*******************************************************************/
-/*                                                                 */
-/*  Alphabet::next_mcsym                                           */
-/*                                                                 */
-/*  recognizes multi-character symbols which are enclosed with     */
-/*  angle brackets <...>. If the argument flag insert is true,     */
-/*  the multi-character symbol must be already in the lexicon in   */
-/*  order to be recognized.                                        */
-/*                                                                 */
-/*******************************************************************/
-int Alphabet::next_mcsym( char* &string, bool insert )
-{
-  char *start=string;
-  if (*start == '<')
-    // symbol might start here
-    for( char *end=start+1; *end; end++ )
-      if (*end == '>') {
-	// matching pair of angle brackets found
-	// mark the end of the substring with \0
-	char lastc = *(++end);
-	*end = 0;
-	int c;
-	if (insert)
-	  c = add_symbol( start );
-	else
-	  c = symbol2code(start);
-	// restore the original string
-	*end = lastc;
-	if (c != EOF) {
-	  // symbol found
-	  // return its code
-	  string = end;
-	  return (Character)c;
-	}
-	else
-	  // not a complex character
-	  break;
-      }
-  return EOF;
-}
-/*******************************************************************/
-/*                                                                 */
-/*  Alphabet::next_code                                            */
-/*                                                                 */
-/*******************************************************************/
-int Alphabet::next_code( char* &string, bool extended, bool insert )
-{
-  if (*string == 0)
-    return EOF; // finished
-  int c = next_mcsym(string, insert);
-  if (c != EOF)
-    return c;
-  if (extended && *string == '\\')
-    string++; // remove quotation
-  if (utf8) {
-    unsigned int c = utf8toint( &string );
-    return (int)add_symbol(int2utf8(c));
-  }
-  else {
-    char buffer[2];
-    buffer[0] = *string;
-    buffer[1] = 0;
-    string++;
-    return (int)add_symbol(buffer);
-  }
-}
-/*******************************************************************/
-/*                                                                 */
-/*  Alphabet::next_label                                           */
-/*                                                                 */
-/*******************************************************************/
-Label Alphabet::next_label( char* &string, bool extended )
-{
-  // read first character
-  int c = next_code( string, extended );
-  if (c == EOF)
-    return Label(); // end of string reached
-  Character lc=(Character)c;
-  if (!extended || *string != ':') { // single character?
-    if (lc == Label::epsilon)
-      return next_label(string, extended); // ignore epsilon
-    return Label(lc);
-  }
-  // read second character
-  string++; // jump over ':'
-  c = next_code( string );
-  if (c == EOF) {
-    static char buffer[1000];
-    sprintf(buffer,"Error: incomplete symbol in input file: %s", string);
-    throw buffer;
-  }
-  Label l(lc, (Character)c);
-  if (l.is_epsilon())
-    return next_label(string, extended); // ignore epsilon transitions
-  return l;
-}
-/*******************************************************************/
-/*                                                                 */
-/*  Alphabet::string2symseq                                        */
-/*                                                                 */
-/*******************************************************************/
-void Alphabet::string2symseq( char *s, vector<Character> &ch )
-{
-  int c;
-  while ((c = next_code(s, false)) != EOF)
-    ch.push_back((Character)c);
-}
-/*******************************************************************/
-/*                                                                 */
-/*  Alphabet::string2labelseq                                      */
-/*                                                                 */
-/*******************************************************************/
-void Alphabet::string2labelseq( char *s, vector<Label> &labels )
-{
-  Label l;
-  while ((l = next_label(s)) != Label::epsilon)
-    labels.push_back(l);
-}
-/*******************************************************************/
-/*                                                                 */
-/*  Alphabet::store                                                */
-/*                                                                 */
-/*******************************************************************/
-void Alphabet::store( FILE *file ) const
-{
-  char c=(utf8)? 1: 0;
-  fputc(c, file);
-  // write the symbol mapping
-  Character n=cm.size();
-  fwrite(&n, sizeof(n), 1, file);
-  for( CharMap::const_iterator it=cm.begin(); it!=cm.end(); it++ ) {
-    Character c=it->first;
-    char *s=it->second;
-    fwrite(&c, sizeof(c), 1, file);
-    fwrite(s, sizeof(char), strlen(s)+1, file);
-  }
-  // write the character pairs
-  n = size();
-  fwrite(&n, sizeof(n), 1, file);
-  for( LabelSet::const_iterator p=ls.begin(); p!=ls.end(); p++ ) {
-    Character c=p->lower_char();
-    fwrite(&c, sizeof(c), 1, file);
-    c = p->upper_char();
-    fwrite(&c, sizeof(c), 1, file);
-  }
-  if (ferror(file))
-    throw "Error encountered while writing alphabet to file\n";
-}
-/*******************************************************************/
-/*                                                                 */
-/*  Alphabet::read                                                 */
-/*                                                                 */
-/*******************************************************************/
-void Alphabet::read( FILE *file )
-{
-  utf8 = (fgetc(file) != 0);
-  // read the symbol mapping
-  Character n=0;
-  read_num(&n, sizeof(n), file);
-  for( unsigned i=0; i<n; i++) {
-    char buffer[BUFFER_SIZE];
-    Character c;
-    read_num(&c, sizeof(c), file);
-    if (!read_string(buffer, BUFFER_SIZE, file) ||
-	feof(file) || ferror(file))
-      throw "Error1 occurred while reading alphabet!\n";
-    add_symbol(buffer, c);
-  }
-  // read the character pairs
-  read_num(&n, sizeof(n), file);
-  if (ferror(file))
-    throw "Error2 occurred while reading alphabet!\n";
-  for( unsigned i=0; i<n; i++) {
-    Character lc, uc;
-    read_num(&lc, sizeof(lc), file);
-    read_num(&uc, sizeof(uc), file);
-    insert(Label(lc, uc));
-  }
-  if (ferror(file))
-    throw "Error3 occurred while reading alphabet!\n";
-}
-/*******************************************************************/
-/*                                                                 */
-/*  Alphabet::compute_score                                        */
-/*                                                                 */
-/*******************************************************************/
-int Alphabet::compute_score( Analysis &ana )
-{
-  // check whether the morpheme boundaries are explicitly marked
-  // with <X> tags
-  int score=0;
-  for( size_t i=0; i<ana.size(); i++ ) {
-    // get next symbol
-    const char *sym=write_char(ana[i].lower_char());
-    if (strcmp(sym,"<X>") == 0)
-      score--;
-  }
-  if (score <  0)
-    return score;
-  // No explicit morphome boundary markers have been found.
-  // Count the number of part-of-speech and PREF tags.
-  for( size_t i=0; i<ana.size(); i++ ) {
-    // get next symbol
-    const char *sym=write_char(ana[i].lower_char());
-    // Is it not a multi-character symbol
-    if (sym[0] != '<' || sym[1] == 0)
-      continue;
-    // Is it a POS tag starting with "+" like <+NN>?
-    if (sym[1] == '+') {
-      const char *t=sym+2;
-      for( ; *t >= 'A' && *t <= 'Z'; t++) ;
-      if (t > sym+2 && *t == '>')
-	return score;
-    }
-    // Is it a potential POS tag (i.e. all uppercase)?
-    const char *t = sym+1;
-    for( ; *t >= 'A' && *t <= 'Z'; t++) ;
-    if (t == sym+1 || *t != '>')
-      continue;
-    // uppercase symbol found
-    if (strcmp(sym,"<SUFF>") == 0 ||
-	strcmp(sym,"<OLDORTH>") == 0 ||
-	strcmp(sym,"<NEWORTH>") == 0)
-      continue; // not what we are looking for
-    // disprefer nouns with prefixes
-    if (strcmp(sym,"<PREF>") == 0)
-      score-=2;
-    if (strcmp(sym,"<V>") == 0 || strcmp(sym,"<ADJ>") == 0) {
-      bool is_verb=(strcmp(sym,"<V>")==0);
-      // get the next non-empty symbol
-      Character c=Label::epsilon;
-      size_t k;
-      for( k=i+1; k<ana.size(); k++ )
-	if ((c = ana[k].lower_char()) != Label::epsilon)
-	  break;
-      // Is it a participle
-      if (c != Label::epsilon) {
-	sym = write_char(c);
-	if (strcmp(sym,"<OLDORTH>") == 0 || strcmp(sym,"<NEWORTH>") == 0) {
-	  for( k++; k<ana.size(); k++ )
-	    if ((c = ana[k].lower_char()) != Label::epsilon)
-	      break;
-	  if (c != Label::epsilon)
-	    sym = write_char(c);
-	}
-	if (is_verb &&
-	    (strcmp(sym,"<PPres>") == 0 || strcmp(sym,"<PPast>") == 0))
-	  continue; // don't consider participles as complex
-	if (!is_verb &&
-	    (strcmp(sym,"<Sup>") == 0 || strcmp(sym,"<Comp>") == 0))
-	  continue; // don't consider participles as complex
-      }
-    }
-    score--;
-  }
-  return score;
-}
-/*******************************************************************/
-/*                                                                 */
-/*  Alphabet::disambiguate                                         */
-/*                                                                 */
-/*******************************************************************/
-void Alphabet::disambiguate( vector<Analysis> &analyses )
-{
-  // compute the scores
-  int bestscore=INT_MIN;
-  vector<int> score;
-  for( size_t i=0; i<analyses.size(); i++ ) {
-    score.push_back(compute_score(analyses[i]));
-    if (bestscore < score[i])
-      bestscore = score[i];
-  }
-  // delete suboptimal analyses
-  size_t k=0;
-  for( size_t i=0; i<analyses.size(); i++ )
-    if (score[i] == bestscore)
-      analyses[k++] = analyses[i];
-  analyses.resize(k);
-}
-/*******************************************************************/
-/*                                                                 */
-/*  Alphabet::print_analysis                                       */
-/*                                                                 */
-/*******************************************************************/
-char *Alphabet::print_analysis( Analysis &ana, bool both_layers )
-{
-  vector<char> ch;
-  // for each transition
-  for( size_t i=0; i<ana.size(); i++ ) {
-    // get the transition label
-    Label l=ana[i];
-    const char *s;
-    // either print the analysis symbol or the whole label
-    if (both_layers) {
-      s = write_label(l);
-      // quote colons
-      if (strcmp(s,":") == 0)
-	ch.push_back('\\');
-    }
-    else if (l.lower_char() != Label::epsilon)
-      s = write_char(l.lower_char());
-    else
-      continue;
-    // copy the characters to the character array
-    while (*s)
-      ch.push_back(*(s++));
-  }
-  ch.push_back(0); // terminate the string
-  static char *result=NULL;
-  if (result != NULL)
-    delete[] result;
-  result = new char[ch.size()];
-  for( size_t i=0; i<ch.size(); i++ )
-    result[i] = ch[i];
-  return result;
-}