ruby-sfst 0.3.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +25 -0
- data/{README.rdoc → README.md} +14 -10
- data/Rakefile +11 -13
- data/lib/sfst/version.rb +3 -0
- data/ruby-sfst.gemspec +23 -68
- metadata +51 -42
- data/CHANGELOG +0 -5
- data/Manifest +0 -32
- data/VERSION +0 -1
- data/ext/sfst_machine/.gitignore +0 -3
- data/ext/sfst_machine/fst-compiler.yy +0 -213
- data/ext/sfst_machine/utf8-scanner.ll +0 -175
- data/lib/.gitignore +0 -1
- data/test/.gitignore +0 -1
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 33ebe15e44bf43836a7b0f40b47ea75a9203a45f
|
4
|
+
data.tar.gz: 12ae1d808be8c3602a72191138cff2da664fc54c
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 4fce794f851b888960202b2d624d9ebcee447a5fe396e600826412ea2bf0859dd7ced63fe3f472b8deeb51cae5792fe0d15a66ffdb96483912e57a11bbaadd5a
|
7
|
+
data.tar.gz: 6212bba57e50c5d6ebaab638ebe89d3690ae9bc9baadcd9581b13953b91f2dbd1a618bf5461a61178030e12abb07f8bdccbed385d5aa673709c8b0707b2b938c
|
data/CHANGELOG.md
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
# Changelog
|
2
|
+
|
3
|
+
## 0.4.0
|
4
|
+
|
5
|
+
* Replaced jeweler with a plain Rakefile
|
6
|
+
|
7
|
+
## 0.3.1
|
8
|
+
|
9
|
+
* Fixes for GCC 4.7
|
10
|
+
|
11
|
+
## 0.3.0
|
12
|
+
|
13
|
+
* Fixes for Ruby 1.9
|
14
|
+
|
15
|
+
## 0.2.0
|
16
|
+
|
17
|
+
* Update to SFST 1.3
|
18
|
+
|
19
|
+
## 0.1.1
|
20
|
+
|
21
|
+
* Overflow and g++ 4.3 fixes
|
22
|
+
|
23
|
+
## 0.1.0
|
24
|
+
|
25
|
+
* Initial release
|
data/{README.rdoc → README.md}
RENAMED
@@ -1,18 +1,18 @@
|
|
1
|
-
|
1
|
+
# ruby-sfst
|
2
2
|
|
3
3
|
A wrapper for the Stuttgart Finite State Transducer Tools (SFST).
|
4
4
|
|
5
5
|
The wrapper is based on SFST 1.3. See
|
6
|
-
http://www.
|
6
|
+
http://www.cis.uni-muenchen.de/~schmid/tools/SFST/ for
|
7
7
|
details on how to obtain SFST and how to write SFST transducers.
|
8
8
|
|
9
|
-
|
9
|
+
## Installation
|
10
10
|
|
11
|
-
|
11
|
+
gem install ruby-sfst
|
12
12
|
|
13
|
-
|
13
|
+
## Usage
|
14
14
|
|
15
|
-
If you have the following compiled as
|
15
|
+
If you have the following compiled as `test.a`
|
16
16
|
|
17
17
|
ALPHABET = [a-z]
|
18
18
|
$test$ = ({bar}:{foo} | {baz}:{foo})
|
@@ -20,6 +20,7 @@ If you have the following compiled as <tt>test.a</tt>
|
|
20
20
|
|
21
21
|
it can be used like this:
|
22
22
|
|
23
|
+
```ruby
|
23
24
|
fst = SFST::RegularTransducer.new("test.a")
|
24
25
|
fst.analyse('foo')
|
25
26
|
# => ['bar', 'baz']
|
@@ -32,21 +33,24 @@ it can be used like this:
|
|
32
33
|
fst.generate_language { |f| p f }
|
33
34
|
# [["b", "f"], ["a", "o"], ["z", "o"]]
|
34
35
|
# [["b", "f"], ["a", "o"], ["r", "o"]]
|
36
|
+
```
|
35
37
|
|
36
38
|
You can also compile an SFST file. This requires the SFST tools to be
|
37
39
|
installed and available on the current search path.
|
38
40
|
|
41
|
+
```ruby
|
39
42
|
SFST::compile("test.fst", "test.a", :compact => true)
|
43
|
+
```
|
40
44
|
|
41
|
-
Currently,
|
45
|
+
Currently, `ruby-sfst` only supports simple compilation, analysis
|
42
46
|
and generation using regular and compact transducers. It also only
|
43
47
|
supports UTF-8.
|
44
48
|
|
45
|
-
|
49
|
+
## Development
|
46
50
|
|
47
51
|
The project is hosted on github on http://github.com/mlj/ruby-sfst.
|
48
52
|
|
49
|
-
|
53
|
+
## License
|
50
54
|
|
51
55
|
As it includes the SFST code directly, the wrapper inherits the GPL2
|
52
|
-
license of the SFST project.
|
56
|
+
license of the SFST project.
|
data/Rakefile
CHANGED
@@ -1,16 +1,14 @@
|
|
1
|
-
#
|
2
|
-
|
3
|
-
|
1
|
+
# coding: utf-8
|
2
|
+
require 'bundler'
|
3
|
+
Bundler::GemHelper.install_tasks
|
4
4
|
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
5
|
+
namespace :doc do
|
6
|
+
require 'yard'
|
7
|
+
YARD::Rake::YardocTask.new do |task|
|
8
|
+
task.files = ['README.md', 'lib/**/*.rb']
|
9
|
+
task.options = [
|
10
|
+
'--output-dir', 'doc/yard',
|
11
|
+
'--markup', 'markdown',
|
12
|
+
]
|
13
13
|
end
|
14
|
-
rescue LoadError
|
15
|
-
puts "Jeweler not available. Install it with: sudo gem install jeweler"
|
16
14
|
end
|
data/lib/sfst/version.rb
ADDED
data/ruby-sfst.gemspec
CHANGED
@@ -1,73 +1,28 @@
|
|
1
|
-
#
|
2
|
-
|
3
|
-
|
4
|
-
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'sfst/version'
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
|
-
s.
|
8
|
-
s.
|
9
|
-
|
10
|
-
s.
|
11
|
-
s.
|
12
|
-
s.
|
13
|
-
s.
|
14
|
-
s.
|
15
|
-
s.
|
16
|
-
s.
|
17
|
-
|
18
|
-
|
19
|
-
s.files
|
20
|
-
"CHANGELOG",
|
21
|
-
"Manifest",
|
22
|
-
"README.rdoc",
|
23
|
-
"Rakefile",
|
24
|
-
"VERSION",
|
25
|
-
"ext/sfst_machine/.gitignore",
|
26
|
-
"ext/sfst_machine/alphabet.C",
|
27
|
-
"ext/sfst_machine/alphabet.h",
|
28
|
-
"ext/sfst_machine/basic.C",
|
29
|
-
"ext/sfst_machine/basic.h",
|
30
|
-
"ext/sfst_machine/compact.C",
|
31
|
-
"ext/sfst_machine/compact.h",
|
32
|
-
"ext/sfst_machine/determinise.C",
|
33
|
-
"ext/sfst_machine/extconf.rb",
|
34
|
-
"ext/sfst_machine/fst-compiler.C",
|
35
|
-
"ext/sfst_machine/fst-compiler.h",
|
36
|
-
"ext/sfst_machine/fst-compiler.yy",
|
37
|
-
"ext/sfst_machine/fst.C",
|
38
|
-
"ext/sfst_machine/fst.h",
|
39
|
-
"ext/sfst_machine/interface.C",
|
40
|
-
"ext/sfst_machine/interface.h",
|
41
|
-
"ext/sfst_machine/make-compact.C",
|
42
|
-
"ext/sfst_machine/make-compact.h",
|
43
|
-
"ext/sfst_machine/mem.h",
|
44
|
-
"ext/sfst_machine/operators.C",
|
45
|
-
"ext/sfst_machine/sfst_machine.cc",
|
46
|
-
"ext/sfst_machine/sgi.h",
|
47
|
-
"ext/sfst_machine/utf8-scanner.C",
|
48
|
-
"ext/sfst_machine/utf8-scanner.ll",
|
49
|
-
"ext/sfst_machine/utf8.C",
|
50
|
-
"ext/sfst_machine/utf8.h",
|
51
|
-
"lib/.gitignore",
|
52
|
-
"lib/sfst.rb",
|
53
|
-
"ruby-sfst.gemspec",
|
54
|
-
"test/.gitignore",
|
55
|
-
"test/test_sfst.fst",
|
56
|
-
"test/test_sfst.rb"
|
57
|
-
]
|
7
|
+
s.add_development_dependency 'bundler', '~> 1.0'
|
8
|
+
s.authors = ["Marius L. Jøhndal"]
|
9
|
+
s.description = %q{A wrapper for the Stuttgart Finite State Transducer Tools (SFST).}
|
10
|
+
s.summary = %q{Stuttgart Finite State Transducer Tools interface}
|
11
|
+
s.email = ['mariuslj (at) ifi [dot] uio (dot) no']
|
12
|
+
s.files = %w(CHANGELOG.md README.md Rakefile ruby-sfst.gemspec)
|
13
|
+
s.files += Dir.glob("ext/**/*.C")
|
14
|
+
s.files += Dir.glob("ext/**/*.h")
|
15
|
+
s.files += Dir.glob("ext/**/*.rb")
|
16
|
+
s.files += Dir.glob("ext/**/*.cc")
|
17
|
+
s.files += Dir.glob("lib/**/*.rb")
|
18
|
+
s.files += Dir.glob("test/*.fst")
|
19
|
+
s.files += Dir.glob("test/*.rb")
|
58
20
|
s.homepage = "http://github.com/mlj/ruby-sfst"
|
21
|
+
s.licenses = ['GPL2']
|
22
|
+
s.name = "ruby-sfst"
|
59
23
|
s.require_paths = ["lib"]
|
60
|
-
s.
|
61
|
-
s.
|
62
|
-
s.
|
63
|
-
|
64
|
-
if s.respond_to? :specification_version then
|
65
|
-
s.specification_version = 3
|
66
|
-
|
67
|
-
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
68
|
-
else
|
69
|
-
end
|
70
|
-
else
|
71
|
-
end
|
24
|
+
s.required_rubygems_version = '>= 1.3.5'
|
25
|
+
s.extensions = ["ext/sfst_machine/extconf.rb"]
|
26
|
+
s.test_files += Dir.glob("test/*.rb")
|
27
|
+
s.version = SFST::VERSION
|
72
28
|
end
|
73
|
-
|
metadata
CHANGED
@@ -1,83 +1,92 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby-sfst
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
5
|
-
prerelease:
|
4
|
+
version: 0.4.0
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Marius L. Jøhndal
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date: 2013-
|
13
|
-
dependencies:
|
11
|
+
date: 2013-09-10 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ~>
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.0'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ~>
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.0'
|
14
27
|
description: A wrapper for the Stuttgart Finite State Transducer Tools (SFST).
|
15
|
-
email:
|
28
|
+
email:
|
29
|
+
- mariuslj (at) ifi [dot] uio (dot) no
|
16
30
|
executables: []
|
17
31
|
extensions:
|
18
32
|
- ext/sfst_machine/extconf.rb
|
19
|
-
extra_rdoc_files:
|
20
|
-
- README.rdoc
|
33
|
+
extra_rdoc_files: []
|
21
34
|
files:
|
22
|
-
- CHANGELOG
|
23
|
-
-
|
24
|
-
- README.rdoc
|
35
|
+
- CHANGELOG.md
|
36
|
+
- README.md
|
25
37
|
- Rakefile
|
26
|
-
-
|
27
|
-
- ext/sfst_machine/.gitignore
|
28
|
-
- ext/sfst_machine/alphabet.C
|
29
|
-
- ext/sfst_machine/alphabet.h
|
30
|
-
- ext/sfst_machine/basic.C
|
31
|
-
- ext/sfst_machine/basic.h
|
38
|
+
- ruby-sfst.gemspec
|
32
39
|
- ext/sfst_machine/compact.C
|
33
|
-
- ext/sfst_machine/
|
40
|
+
- ext/sfst_machine/fst.C
|
41
|
+
- ext/sfst_machine/interface.C
|
42
|
+
- ext/sfst_machine/make-compact.C
|
34
43
|
- ext/sfst_machine/determinise.C
|
35
|
-
- ext/sfst_machine/extconf.rb
|
36
44
|
- ext/sfst_machine/fst-compiler.C
|
37
|
-
- ext/sfst_machine/
|
38
|
-
- ext/sfst_machine/
|
39
|
-
- ext/sfst_machine/
|
45
|
+
- ext/sfst_machine/utf8.C
|
46
|
+
- ext/sfst_machine/utf8-scanner.C
|
47
|
+
- ext/sfst_machine/alphabet.C
|
48
|
+
- ext/sfst_machine/basic.C
|
49
|
+
- ext/sfst_machine/operators.C
|
40
50
|
- ext/sfst_machine/fst.h
|
41
|
-
- ext/sfst_machine/
|
51
|
+
- ext/sfst_machine/fst-compiler.h
|
52
|
+
- ext/sfst_machine/mem.h
|
53
|
+
- ext/sfst_machine/basic.h
|
54
|
+
- ext/sfst_machine/alphabet.h
|
42
55
|
- ext/sfst_machine/interface.h
|
43
|
-
- ext/sfst_machine/make-compact.C
|
44
56
|
- ext/sfst_machine/make-compact.h
|
45
|
-
- ext/sfst_machine/
|
46
|
-
- ext/sfst_machine/operators.C
|
47
|
-
- ext/sfst_machine/sfst_machine.cc
|
57
|
+
- ext/sfst_machine/compact.h
|
48
58
|
- ext/sfst_machine/sgi.h
|
49
|
-
- ext/sfst_machine/utf8-scanner.C
|
50
|
-
- ext/sfst_machine/utf8-scanner.ll
|
51
|
-
- ext/sfst_machine/utf8.C
|
52
59
|
- ext/sfst_machine/utf8.h
|
53
|
-
-
|
60
|
+
- ext/sfst_machine/extconf.rb
|
61
|
+
- ext/sfst_machine/sfst_machine.cc
|
54
62
|
- lib/sfst.rb
|
55
|
-
-
|
56
|
-
- test/.gitignore
|
63
|
+
- lib/sfst/version.rb
|
57
64
|
- test/test_sfst.fst
|
58
65
|
- test/test_sfst.rb
|
59
66
|
homepage: http://github.com/mlj/ruby-sfst
|
60
|
-
licenses:
|
67
|
+
licenses:
|
68
|
+
- GPL2
|
69
|
+
metadata: {}
|
61
70
|
post_install_message:
|
62
71
|
rdoc_options: []
|
63
72
|
require_paths:
|
64
73
|
- lib
|
65
74
|
required_ruby_version: !ruby/object:Gem::Requirement
|
66
|
-
none: false
|
67
75
|
requirements:
|
68
|
-
- -
|
76
|
+
- - '>='
|
69
77
|
- !ruby/object:Gem::Version
|
70
78
|
version: '0'
|
71
79
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
72
|
-
none: false
|
73
80
|
requirements:
|
74
|
-
- -
|
81
|
+
- - '>='
|
75
82
|
- !ruby/object:Gem::Version
|
76
|
-
version:
|
83
|
+
version: 1.3.5
|
77
84
|
requirements: []
|
78
|
-
rubyforge_project:
|
79
|
-
rubygems_version:
|
85
|
+
rubyforge_project:
|
86
|
+
rubygems_version: 2.0.3
|
80
87
|
signing_key:
|
81
|
-
specification_version:
|
88
|
+
specification_version: 4
|
82
89
|
summary: Stuttgart Finite State Transducer Tools interface
|
83
|
-
test_files:
|
90
|
+
test_files:
|
91
|
+
- test/test_sfst.rb
|
92
|
+
has_rdoc:
|
data/CHANGELOG
DELETED
data/Manifest
DELETED
@@ -1,32 +0,0 @@
|
|
1
|
-
README.rdoc
|
2
|
-
Rakefile
|
3
|
-
Manifest
|
4
|
-
test/test_sfst.rb
|
5
|
-
test/test_sfst.fst
|
6
|
-
CHANGELOG
|
7
|
-
ext/sfst_machine/fst-compiler.h
|
8
|
-
ext/sfst_machine/utf8.C
|
9
|
-
ext/sfst_machine/operators.C
|
10
|
-
ext/sfst_machine/utf8-scanner.ll
|
11
|
-
ext/sfst_machine/determinise.C
|
12
|
-
ext/sfst_machine/interface.C
|
13
|
-
ext/sfst_machine/compact.h
|
14
|
-
ext/sfst_machine/sgi.h
|
15
|
-
ext/sfst_machine/basic.h
|
16
|
-
ext/sfst_machine/fst.h
|
17
|
-
ext/sfst_machine/make-compact.h
|
18
|
-
ext/sfst_machine/fst-compiler.yy
|
19
|
-
ext/sfst_machine/mem.h
|
20
|
-
ext/sfst_machine/compact.C
|
21
|
-
ext/sfst_machine/basic.C
|
22
|
-
ext/sfst_machine/interface.h
|
23
|
-
ext/sfst_machine/sfst_machine.cc
|
24
|
-
ext/sfst_machine/extconf.rb
|
25
|
-
ext/sfst_machine/alphabet.C
|
26
|
-
ext/sfst_machine/fst.C
|
27
|
-
ext/sfst_machine/alphabet.h
|
28
|
-
ext/sfst_machine/make-compact.C
|
29
|
-
ext/sfst_machine/fst-compiler.C
|
30
|
-
ext/sfst_machine/utf8.h
|
31
|
-
ext/sfst_machine/utf8-scanner.C
|
32
|
-
lib/sfst.rb
|
data/VERSION
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
0.3.1
|
data/ext/sfst_machine/.gitignore
DELETED
@@ -1,213 +0,0 @@
|
|
1
|
-
%{
|
2
|
-
/*******************************************************************/
|
3
|
-
/* */
|
4
|
-
/* FILE fst-compiler.yy */
|
5
|
-
/* MODULE fst-compiler */
|
6
|
-
/* PROGRAM SFST */
|
7
|
-
/* AUTHOR Helmut Schmid, IMS, University of Stuttgart */
|
8
|
-
/* */
|
9
|
-
/*******************************************************************/
|
10
|
-
|
11
|
-
#include <stdio.h>
|
12
|
-
|
13
|
-
#include "make-compact.h"
|
14
|
-
#include "interface.h"
|
15
|
-
|
16
|
-
using std::cerr;
|
17
|
-
|
18
|
-
extern int yylineno;
|
19
|
-
extern char *yytext;
|
20
|
-
|
21
|
-
void yyerror(char *text);
|
22
|
-
void warn(char *text);
|
23
|
-
void warn2(char *text, char *text2);
|
24
|
-
int yylex( void );
|
25
|
-
int yyparse( void );
|
26
|
-
|
27
|
-
static int Switch=0;
|
28
|
-
Transducer *Result;
|
29
|
-
%}
|
30
|
-
|
31
|
-
%union {
|
32
|
-
int number;
|
33
|
-
Twol_Type type;
|
34
|
-
Repl_Type rtype;
|
35
|
-
char *name;
|
36
|
-
char *value;
|
37
|
-
unsigned char uchar;
|
38
|
-
unsigned int longchar;
|
39
|
-
Character character;
|
40
|
-
Transducer *expression;
|
41
|
-
Range *range;
|
42
|
-
Ranges *ranges;
|
43
|
-
Contexts *contexts;
|
44
|
-
}
|
45
|
-
|
46
|
-
%token <number> NEWLINE ALPHA COMPOSE PRINT POS INSERT SWITCH
|
47
|
-
%token <type> ARROW
|
48
|
-
%token <rtype> REPLACE
|
49
|
-
%token <name> SYMBOL VAR SVAR RVAR RSVAR
|
50
|
-
%token <value> STRING STRING2 UTF8CHAR
|
51
|
-
%token <uchar> CHARACTER
|
52
|
-
|
53
|
-
%type <uchar> SCHAR
|
54
|
-
%type <longchar> LCHAR
|
55
|
-
%type <character> CODE
|
56
|
-
%type <expression> RE
|
57
|
-
%type <range> RANGE VALUE VALUES
|
58
|
-
%type <ranges> RANGES
|
59
|
-
%type <contexts> CONTEXT CONTEXT2 CONTEXTS CONTEXTS2
|
60
|
-
|
61
|
-
%left PRINT INSERT
|
62
|
-
%left ARROW REPLACE
|
63
|
-
%left COMPOSE
|
64
|
-
%left '|'
|
65
|
-
%left '-'
|
66
|
-
%left '&'
|
67
|
-
%left SEQ
|
68
|
-
%left '!' '^' '_'
|
69
|
-
%left '*' '+'
|
70
|
-
%%
|
71
|
-
|
72
|
-
ALL: ASSIGNMENTS RE NEWLINES { Result=result($2, Switch); }
|
73
|
-
;
|
74
|
-
|
75
|
-
ASSIGNMENTS: ASSIGNMENTS ASSIGNMENT {}
|
76
|
-
| ASSIGNMENTS NEWLINE {}
|
77
|
-
| /* nothing */ {}
|
78
|
-
;
|
79
|
-
|
80
|
-
ASSIGNMENT: VAR '=' RE { if (def_var($1,$3)) warn2("assignment of empty transducer to",$1); }
|
81
|
-
| RVAR '=' RE { if (def_rvar($1,$3)) warn2("assignment of empty transducer to",$1); }
|
82
|
-
| SVAR '=' VALUES { if (def_svar($1,$3)) warn2("assignment of empty symbol range to",$1); }
|
83
|
-
| RSVAR '=' VALUES { if (def_svar($1,$3)) warn2("assignment of empty symbol range to",$1); }
|
84
|
-
| RE PRINT STRING { write_to_file($1, $3); }
|
85
|
-
| ALPHA RE { def_alphabet($2); }
|
86
|
-
;
|
87
|
-
|
88
|
-
RE: RE ARROW CONTEXTS2 { $$ = restriction($1,$2,$3,0); }
|
89
|
-
| RE '^' ARROW CONTEXTS2 { $$ = restriction($1,$3,$4,1); }
|
90
|
-
| RE '_' ARROW CONTEXTS2 { $$ = restriction($1,$3,$4,-1); }
|
91
|
-
| RE REPLACE CONTEXT2 { $$ = replace_in_context(minimise(explode($1)),$2,$3,false); }
|
92
|
-
| RE REPLACE '?' CONTEXT2 { $$ = replace_in_context(minimise(explode($1)),$2,$4,true);}
|
93
|
-
| RE REPLACE '(' ')' { $$ = replace(minimise(explode($1)), $2, false); }
|
94
|
-
| RE REPLACE '?' '(' ')' { $$ = replace(minimise(explode($1)), $2, true); }
|
95
|
-
| RE RANGE ARROW RANGE RE { $$ = make_rule($1,$2,$3,$4,$5); }
|
96
|
-
| RE RANGE ARROW RANGE { $$ = make_rule($1,$2,$3,$4,NULL); }
|
97
|
-
| RANGE ARROW RANGE RE { $$ = make_rule(NULL,$1,$2,$3,$4); }
|
98
|
-
| RANGE ARROW RANGE { $$ = make_rule(NULL,$1,$2,$3,NULL); }
|
99
|
-
| RE COMPOSE RE { $$ = composition($1, $3); }
|
100
|
-
| '{' RANGES '}' ':' '{' RANGES '}' { $$ = make_mapping($2,$6); }
|
101
|
-
| RANGE ':' '{' RANGES '}' { $$ = make_mapping(add_range($1,NULL),$4); }
|
102
|
-
| '{' RANGES '}' ':' RANGE { $$ = make_mapping($2,add_range($5,NULL)); }
|
103
|
-
| RE INSERT CODE ':' CODE { $$ = freely_insert($1, $3, $5); }
|
104
|
-
| RE INSERT CODE { $$ = freely_insert($1, $3, $3); }
|
105
|
-
| RANGE ':' RANGE { $$ = new_transducer($1,$3); }
|
106
|
-
| RANGE { $$ = new_transducer($1,$1); }
|
107
|
-
| VAR { $$ = var_value($1); }
|
108
|
-
| RVAR { $$ = rvar_value($1); }
|
109
|
-
| RE '*' { $$ = repetition($1); }
|
110
|
-
| RE '+' { $$ = repetition2($1); }
|
111
|
-
| RE '?' { $$ = optional($1); }
|
112
|
-
| RE RE %prec SEQ { $$ = catenate($1, $2); }
|
113
|
-
| '!' RE { $$ = negation($2); }
|
114
|
-
| SWITCH RE { $$ = switch_levels($2); }
|
115
|
-
| '^' RE { $$ = upper_level($2); }
|
116
|
-
| '_' RE { $$ = lower_level($2); }
|
117
|
-
| RE '&' RE { $$ = conjunction($1, $3); }
|
118
|
-
| RE '-' RE { $$ = subtraction($1, $3); }
|
119
|
-
| RE '|' RE { $$ = disjunction($1, $3); }
|
120
|
-
| '(' RE ')' { $$ = $2; }
|
121
|
-
| STRING { $$ = read_words($1); }
|
122
|
-
| STRING2 { $$ = read_transducer($1); }
|
123
|
-
;
|
124
|
-
|
125
|
-
RANGES: RANGE RANGES { $$ = add_range($1,$2); }
|
126
|
-
| { $$ = NULL; }
|
127
|
-
;
|
128
|
-
|
129
|
-
RANGE: '[' VALUES ']' { $$=$2; }
|
130
|
-
| '[' '^' VALUES ']' { $$=complement_range($3); }
|
131
|
-
| '[' RSVAR ']' { $$=rsvar_value($2); }
|
132
|
-
| '.' { $$=NULL; }
|
133
|
-
| CODE { $$=add_value($1,NULL); }
|
134
|
-
;
|
135
|
-
|
136
|
-
CONTEXTS2: CONTEXTS { $$ = $1; }
|
137
|
-
| '(' CONTEXTS ')' { $$ = $2; }
|
138
|
-
;
|
139
|
-
|
140
|
-
CONTEXTS: CONTEXT ',' CONTEXTS { $$ = add_context($1,$3); }
|
141
|
-
| CONTEXT { $$ = $1; }
|
142
|
-
;
|
143
|
-
|
144
|
-
CONTEXT2: CONTEXT { $$ = $1; }
|
145
|
-
| '(' CONTEXT ')' { $$ = $2; }
|
146
|
-
;
|
147
|
-
|
148
|
-
CONTEXT : RE POS RE { $$ = make_context($1, $3); }
|
149
|
-
| POS RE { $$ = make_context(NULL, $2); }
|
150
|
-
| RE POS { $$ = make_context($1, NULL); }
|
151
|
-
;
|
152
|
-
|
153
|
-
VALUES: VALUE VALUES { $$=append_values($1,$2); }
|
154
|
-
| VALUE { $$ = $1; }
|
155
|
-
;
|
156
|
-
|
157
|
-
VALUE: LCHAR '-' LCHAR { $$=add_values($1,$3,NULL); }
|
158
|
-
| SVAR { $$=svar_value($1); }
|
159
|
-
| LCHAR { $$=add_value(character_code($1),NULL); }
|
160
|
-
| CODE { $$=add_value($1,NULL); }
|
161
|
-
| SCHAR { $$=add_value($1,NULL); }
|
162
|
-
;
|
163
|
-
|
164
|
-
LCHAR: CHARACTER { $$=$1; }
|
165
|
-
| UTF8CHAR { $$=utf8toint($1); free($1); }
|
166
|
-
| SCHAR { $$=$1; }
|
167
|
-
;
|
168
|
-
|
169
|
-
CODE: CHARACTER { $$=character_code($1); }
|
170
|
-
| UTF8CHAR { $$=symbol_code($1); }
|
171
|
-
| SYMBOL { $$=symbol_code($1); }
|
172
|
-
;
|
173
|
-
|
174
|
-
SCHAR: '.' { $$=character_code('.'); }
|
175
|
-
| '!' { $$=character_code('!'); }
|
176
|
-
| '?' { $$=character_code('?'); }
|
177
|
-
| '{' { $$=character_code('{'); }
|
178
|
-
| '}' { $$=character_code('}'); }
|
179
|
-
| ')' { $$=character_code(')'); }
|
180
|
-
| '(' { $$=character_code('('); }
|
181
|
-
| '&' { $$=character_code('&'); }
|
182
|
-
| '|' { $$=character_code('|'); }
|
183
|
-
| '*' { $$=character_code('*'); }
|
184
|
-
| '+' { $$=character_code('+'); }
|
185
|
-
| ':' { $$=character_code(':'); }
|
186
|
-
| ',' { $$=character_code(','); }
|
187
|
-
| '=' { $$=character_code('='); }
|
188
|
-
| '_' { $$=character_code('_'); }
|
189
|
-
| '^' { $$=character_code('^'); }
|
190
|
-
| '-' { $$=character_code('-'); }
|
191
|
-
;
|
192
|
-
|
193
|
-
NEWLINES: NEWLINE NEWLINES {}
|
194
|
-
| /* nothing */ {}
|
195
|
-
;
|
196
|
-
|
197
|
-
%%
|
198
|
-
|
199
|
-
extern FILE *yyin;
|
200
|
-
|
201
|
-
/*******************************************************************/
|
202
|
-
/* */
|
203
|
-
/* yyerror */
|
204
|
-
/* */
|
205
|
-
/*******************************************************************/
|
206
|
-
|
207
|
-
void yyerror(char *text)
|
208
|
-
|
209
|
-
{
|
210
|
-
cerr << "\n" << FileName << ":" << yylineno << ": " << text << " at: ";
|
211
|
-
cerr << yytext << "\naborted.\n";
|
212
|
-
exit(1);
|
213
|
-
}
|
@@ -1,175 +0,0 @@
|
|
1
|
-
%option 8Bit batch yylineno noyywrap
|
2
|
-
|
3
|
-
/* the "incl" state is used to pick up the name of an include file */
|
4
|
-
%x incl
|
5
|
-
|
6
|
-
%{
|
7
|
-
/*******************************************************************/
|
8
|
-
/* */
|
9
|
-
/* FILE scanner.ll */
|
10
|
-
/* MODULE scanner */
|
11
|
-
/* PROGRAM SFST */
|
12
|
-
/* AUTHOR Helmut Schmid, IMS, University of Stuttgart */
|
13
|
-
/* */
|
14
|
-
/*******************************************************************/
|
15
|
-
|
16
|
-
#include <string.h>
|
17
|
-
|
18
|
-
#include "interface.h"
|
19
|
-
#include "fst-compiler.h"
|
20
|
-
|
21
|
-
#define MAX_INCLUDE_DEPTH 10
|
22
|
-
|
23
|
-
int Include_Stack_Ptr = 0;
|
24
|
-
YY_BUFFER_STATE Include_Stack[MAX_INCLUDE_DEPTH];
|
25
|
-
char *Name_Stack[MAX_INCLUDE_DEPTH];
|
26
|
-
int Lineno_Stack[MAX_INCLUDE_DEPTH];
|
27
|
-
|
28
|
-
char *FileName;
|
29
|
-
|
30
|
-
bool UTF8=true;
|
31
|
-
|
32
|
-
static char *unquote(char *string, bool del_quote=true) {
|
33
|
-
char *s=string, *result=string;
|
34
|
-
if (del_quote)
|
35
|
-
string++;
|
36
|
-
|
37
|
-
while (*string) {
|
38
|
-
if (*string == '\\')
|
39
|
-
string++;
|
40
|
-
*(s++) = *(string++);
|
41
|
-
}
|
42
|
-
|
43
|
-
if (del_quote)
|
44
|
-
s--;
|
45
|
-
*s = '\0';
|
46
|
-
|
47
|
-
return fst_strdup(result);
|
48
|
-
}
|
49
|
-
|
50
|
-
static void print_lineno() {
|
51
|
-
if (!Verbose)
|
52
|
-
return;
|
53
|
-
fputc('\r',stderr);
|
54
|
-
for( int i=0; i<Include_Stack_Ptr; i++ )
|
55
|
-
fputs(" ", stderr);
|
56
|
-
fprintf(stderr,"%s: %d", FileName, yylineno);
|
57
|
-
}
|
58
|
-
|
59
|
-
extern void yyerror(char *text);
|
60
|
-
|
61
|
-
%}
|
62
|
-
|
63
|
-
CC [\x80-\xbf]
|
64
|
-
C1 [A-Za-z0-9._/\-]
|
65
|
-
C2 [A-Za-z0-9._/\-&()+,=?\^|~]
|
66
|
-
C3 [A-Za-z0-9._/\-&()+,=?\^|~#<>]
|
67
|
-
C4 [A-Za-z0-9._/\-&()+,=?\^|~$<>]
|
68
|
-
C5 [\!-;\?-\[\]-\~=]
|
69
|
-
FN [A-Za-z0-9._/\-*+]
|
70
|
-
|
71
|
-
%%
|
72
|
-
|
73
|
-
#include BEGIN(incl);
|
74
|
-
<incl>[ \t]* /* eat the whitespace */
|
75
|
-
<incl>{FN}+ { error2("Missing quotes",yytext); }
|
76
|
-
<incl>\"{FN}+\" { /* got the include file name */
|
77
|
-
FILE *file;
|
78
|
-
char *name=unquote(yytext);
|
79
|
-
if ( Include_Stack_Ptr >= MAX_INCLUDE_DEPTH ) {
|
80
|
-
fprintf( stderr, "Includes nested too deeply" );
|
81
|
-
exit( 1 );
|
82
|
-
}
|
83
|
-
if (Verbose) fputc('\n', stderr);
|
84
|
-
file = fopen( name, "rt" );
|
85
|
-
if (!file)
|
86
|
-
error2("Can't open include file", name);
|
87
|
-
else {
|
88
|
-
Name_Stack[Include_Stack_Ptr] = FileName;
|
89
|
-
FileName = name;
|
90
|
-
Lineno_Stack[Include_Stack_Ptr] = yylineno;
|
91
|
-
yylineno = 1;
|
92
|
-
Include_Stack[Include_Stack_Ptr++]=YY_CURRENT_BUFFER;
|
93
|
-
yy_switch_to_buffer(yy_create_buffer(yyin, YY_BUF_SIZE));
|
94
|
-
yyin = file;
|
95
|
-
print_lineno();
|
96
|
-
BEGIN(INITIAL);
|
97
|
-
}
|
98
|
-
}
|
99
|
-
<<EOF>> {
|
100
|
-
if (Verbose)
|
101
|
-
fputc('\n', stderr);
|
102
|
-
if ( --Include_Stack_Ptr < 0 )
|
103
|
-
yyterminate();
|
104
|
-
else {
|
105
|
-
free(FileName);
|
106
|
-
FileName = Name_Stack[Include_Stack_Ptr];
|
107
|
-
yylineno = Lineno_Stack[Include_Stack_Ptr];
|
108
|
-
yy_delete_buffer( YY_CURRENT_BUFFER );
|
109
|
-
yy_switch_to_buffer(Include_Stack[Include_Stack_Ptr]);
|
110
|
-
}
|
111
|
-
}
|
112
|
-
|
113
|
-
|
114
|
-
^[ \t]*\%.*\r?\n { print_lineno(); /* ignore comments */ }
|
115
|
-
|
116
|
-
\%.*\\[ \t]*\r?\n { print_lineno(); /* ignore comments */ }
|
117
|
-
|
118
|
-
\%.* { /* ignore comments */ }
|
119
|
-
|
120
|
-
|
121
|
-
^[ \t]*ALPHABET[ \t]*= { return ALPHA; }
|
122
|
-
|
123
|
-
\|\| { return COMPOSE; }
|
124
|
-
"<=>" { yylval.type = twol_both; return ARROW; }
|
125
|
-
"=>" { yylval.type = twol_right;return ARROW; }
|
126
|
-
"<=" { yylval.type = twol_left; return ARROW; }
|
127
|
-
"^->" { yylval.rtype = repl_up; return REPLACE; }
|
128
|
-
"_->" { yylval.rtype = repl_down; return REPLACE; }
|
129
|
-
"/->" { yylval.rtype = repl_right;return REPLACE; }
|
130
|
-
"\\->" { yylval.rtype = repl_left; return REPLACE; }
|
131
|
-
">>" { return PRINT; }
|
132
|
-
"<<" { return INSERT; }
|
133
|
-
"__" { return POS; }
|
134
|
-
"^_" { return SWITCH; }
|
135
|
-
|
136
|
-
[.,{}\[\]()&!?|*+:=_\^\-] { return yytext[0]; }
|
137
|
-
|
138
|
-
\$=({C3}|(\\.))+\$ { yylval.name = fst_strdup(yytext); return RVAR; }
|
139
|
-
|
140
|
-
\$({C3}|(\\.))+\$ { yylval.name = fst_strdup(yytext); return VAR; }
|
141
|
-
|
142
|
-
#=({C4}|(\\.))+# { yylval.name = fst_strdup(yytext); return RSVAR; }
|
143
|
-
|
144
|
-
#({C4}|(\\.))+# { yylval.name = fst_strdup(yytext); return SVAR; }
|
145
|
-
|
146
|
-
\<({C5}|\\.)*\> { yylval.name = unquote(yytext,false); return SYMBOL; }
|
147
|
-
|
148
|
-
\"<{FN}+>\" {
|
149
|
-
yylval.value = unquote(yytext)+1;
|
150
|
-
yylval.value[strlen(yylval.value)-1] = 0;
|
151
|
-
return STRING2;
|
152
|
-
}
|
153
|
-
|
154
|
-
\"{FN}+\" {
|
155
|
-
yylval.value = unquote(yytext);
|
156
|
-
return STRING;
|
157
|
-
}
|
158
|
-
|
159
|
-
[ \t] { /* ignored */ }
|
160
|
-
\\[ \t]*([ \t]\%.*)?\r?\n { print_lineno(); /* ignored */ }
|
161
|
-
\r?\n { print_lineno(); return NEWLINE; }
|
162
|
-
|
163
|
-
\\[0-9]+ { long l=atol(yytext+1);
|
164
|
-
if (l <= 1114112) { yylval.uchar=l; return CHARACTER; }
|
165
|
-
yyerror("invalid expression");
|
166
|
-
}
|
167
|
-
|
168
|
-
|
169
|
-
\\. { yylval.value=fst_strdup(yytext+1); return UTF8CHAR; }
|
170
|
-
[\x00-\x7f] { yylval.value=fst_strdup(yytext); return UTF8CHAR; }
|
171
|
-
[\xc0-\xdf]{CC} { yylval.value=fst_strdup(yytext); return UTF8CHAR; }
|
172
|
-
[\xe0-\xef]{CC}{2} { yylval.value=fst_strdup(yytext); return UTF8CHAR; }
|
173
|
-
[\xf0-\xff]{CC}{3} { yylval.value=fst_strdup(yytext); return UTF8CHAR; }
|
174
|
-
|
175
|
-
%%
|
data/lib/.gitignore
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
*.so
|
data/test/.gitignore
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
*.a
|