ruby-sfst 0.4.3 → 0.4.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (51) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +1 -0
  3. data/COPYING +280 -0
  4. data/Gemfile +3 -0
  5. data/Gemfile.lock +54 -0
  6. data/README.md +1 -1
  7. data/Rakefile +9 -18
  8. data/bin/console +7 -0
  9. data/bin/setup +6 -0
  10. data/ext/sfst/alphabet.cc +879 -0
  11. data/ext/sfst/alphabet.h +302 -0
  12. data/ext/sfst/basic.cc +85 -0
  13. data/ext/{sfst_machine → sfst}/basic.h +7 -4
  14. data/ext/sfst/compact.cc +629 -0
  15. data/ext/sfst/compact.h +100 -0
  16. data/ext/sfst/determinise.cc +279 -0
  17. data/ext/{sfst_machine → sfst}/extconf.rb +2 -1
  18. data/ext/sfst/fst.cc +1150 -0
  19. data/ext/sfst/fst.h +374 -0
  20. data/ext/sfst/hopcroft.cc +681 -0
  21. data/ext/sfst/interface.cc +1921 -0
  22. data/ext/sfst/interface.h +171 -0
  23. data/ext/sfst/make-compact.cc +323 -0
  24. data/ext/{sfst_machine → sfst}/make-compact.h +15 -13
  25. data/ext/sfst/mem.h +80 -0
  26. data/ext/sfst/operators.cc +1273 -0
  27. data/ext/{sfst_machine → sfst}/sfst_machine.cc +89 -78
  28. data/ext/sfst/sgi.h +72 -0
  29. data/ext/sfst/utf8.cc +149 -0
  30. data/ext/{sfst_machine → sfst}/utf8.h +7 -4
  31. data/lib/sfst.rb +2 -1
  32. data/lib/sfst/version.rb +1 -1
  33. data/ruby-sfst.gemspec +23 -23
  34. metadata +107 -35
  35. data/ext/sfst_machine/alphabet.cc +0 -812
  36. data/ext/sfst_machine/alphabet.h +0 -273
  37. data/ext/sfst_machine/basic.cc +0 -84
  38. data/ext/sfst_machine/compact.cc +0 -616
  39. data/ext/sfst_machine/compact.h +0 -98
  40. data/ext/sfst_machine/determinise.cc +0 -303
  41. data/ext/sfst_machine/fst.cc +0 -1000
  42. data/ext/sfst_machine/fst.h +0 -369
  43. data/ext/sfst_machine/interface.cc +0 -1842
  44. data/ext/sfst_machine/interface.h +0 -93
  45. data/ext/sfst_machine/make-compact.cc +0 -327
  46. data/ext/sfst_machine/mem.h +0 -74
  47. data/ext/sfst_machine/operators.cc +0 -1131
  48. data/ext/sfst_machine/sgi.h +0 -44
  49. data/ext/sfst_machine/utf8.cc +0 -146
  50. data/test/test_sfst.fst +0 -3
  51. data/test/test_sfst.rb +0 -114
@@ -1,44 +0,0 @@
1
-
2
- /*******************************************************************/
3
- /* */
4
- /* File: sgi.h */
5
- /* Author: Helmut Schmid */
6
- /* Purpose: */
7
- /* Created: Thu Sep 11 15:58:25 2008 */
8
- /* Modified: Fri Sep 12 08:17:03 2008 (schmid) */
9
- /* */
10
- /*******************************************************************/
11
-
12
- #ifndef _SGI_INCLUDED
13
- #define _SGI_INCLUDED
14
-
15
-
16
- #ifdef SGIext
17
-
18
- #include <ext/hash_map>
19
- #include <ext/hash_set>
20
- using std::hash_map;
21
- using std::hash_set;
22
- using std::hash;
23
-
24
- #else
25
-
26
- #ifdef SGI__gnu_cxx
27
-
28
- #include <ext/hash_map>
29
- #include <ext/hash_set>
30
-
31
- #else
32
-
33
- #include <backward/hash_map>
34
- #include <backward/hash_set>
35
-
36
- #endif
37
-
38
- using __gnu_cxx::hash_map;
39
- using __gnu_cxx::hash_set;
40
- using __gnu_cxx::hash;
41
-
42
- #endif
43
-
44
- #endif
@@ -1,146 +0,0 @@
1
-
2
- /*******************************************************************/
3
- /* */
4
- /* File: utf8.C */
5
- /* Author: Helmut Schmid */
6
- /* Purpose: */
7
- /* Created: Mon Sep 5 17:49:16 2005 */
8
- /* Modified: Mon Mar 3 11:00:53 2008 (schmid) */
9
- /* */
10
- /*******************************************************************/
11
-
12
- #include "string.h"
13
-
14
- #include "utf8.h"
15
-
16
- const unsigned char get3LSbits=7;
17
- const unsigned char get4LSbits=15;
18
- const unsigned char get5LSbits=31;
19
- const unsigned char get6LSbits=63;
20
-
21
- const unsigned char set1MSbits=128;
22
- const unsigned char set2MSbits=192;
23
- const unsigned char set3MSbits=224;
24
- const unsigned char set4MSbits=240;
25
-
26
-
27
-
28
- /*******************************************************************/
29
- /* */
30
- /* int2utf8 */
31
- /* */
32
- /*******************************************************************/
33
-
34
- char *int2utf8( unsigned int sym )
35
-
36
- {
37
- static unsigned char ch[5];
38
-
39
- if (sym < 128) {
40
- // 1-byte UTF8 symbol, 7 bits
41
- ch[0] = sym;
42
- ch[1] = 0;
43
- }
44
-
45
- else if (sym < 2048) {
46
- // 2-byte UTF8 symbol, 5+6 bits
47
- ch[0] = (sym >> 6) | set2MSbits;
48
- ch[1] = (sym & get6LSbits) | set1MSbits;
49
- ch[2] = 0;
50
- }
51
-
52
- else if (sym < 65536) {
53
- // 3-byte UTF8 symbol, 4+6+6 bits
54
- ch[0] = (sym >> 12) | set3MSbits;
55
- ch[1] = ((sym >> 6) & get6LSbits) | set1MSbits;
56
- ch[2] = (sym & get6LSbits) | set1MSbits;
57
- ch[3] = 0;
58
- }
59
-
60
- else if (sym < 2097152) {
61
- // 4-byte UTF8 symbol, 3+6+6+6 bits
62
- ch[0] = (sym >> 18) | set4MSbits;
63
- ch[1] = ((sym >> 12) & get6LSbits) | set1MSbits;
64
- ch[2] = ((sym >> 6) & get6LSbits) | set1MSbits;
65
- ch[3] = (sym & get6LSbits) | set1MSbits;
66
- ch[4] = 0;
67
- }
68
-
69
- else
70
- return NULL;
71
-
72
- return (char*)ch;
73
- }
74
-
75
-
76
- /*******************************************************************/
77
- /* */
78
- /* utf8toint */
79
- /* */
80
- /*******************************************************************/
81
-
82
- unsigned int utf8toint( char **s )
83
-
84
- {
85
- int bytes_to_come;
86
- unsigned int result=0;
87
- unsigned char c=(unsigned char)**s;
88
-
89
- if (c >= (unsigned char)set4MSbits) { // 1111xxxx
90
- bytes_to_come = 3;
91
- result = (result << 3) | (c & get3LSbits);
92
- }
93
-
94
- else if (c >= (unsigned char) set3MSbits) { // 1110xxxx
95
- // start of a three-byte symbol
96
- bytes_to_come = 2;
97
- result = (result << 4) | (c & get4LSbits);
98
- }
99
-
100
- else if (c >= (unsigned char) set2MSbits) { // 1100xxxx
101
- // start of a two-byte symbol
102
- bytes_to_come = 1;
103
- result = (result << 5) | (c & get5LSbits);
104
- }
105
-
106
- else if (c < (unsigned char) set1MSbits) { // 0100xxxx
107
- // one-byte symbol
108
- bytes_to_come = 0;
109
- result = c;
110
- }
111
-
112
- else
113
- return 0; // error
114
-
115
- while (bytes_to_come > 0) {
116
- bytes_to_come--;
117
- (*s)++;
118
- c = (unsigned char)**s;
119
- if (c < (unsigned char) set2MSbits &&
120
- c >= (unsigned char) set1MSbits) // 1000xxxx
121
- {
122
- result = (result << 6) | (c & get6LSbits);
123
- }
124
- else
125
- return 0;
126
- }
127
-
128
- (*s)++;
129
- return result;
130
- }
131
-
132
-
133
- /*******************************************************************/
134
- /* */
135
- /* utf8toint */
136
- /* */
137
- /*******************************************************************/
138
-
139
- unsigned int utf8toint( char *s )
140
-
141
- {
142
- unsigned int result = utf8toint( &s );
143
- if (*s == 0) // all bytes converted?
144
- return result;
145
- return 0;
146
- }
@@ -1,3 +0,0 @@
1
- ALPHABET = [a-z]
2
- $test$ = ({bar}:{foo} | {baz}:{foo})
3
- $test$
@@ -1,114 +0,0 @@
1
- require 'sfst'
2
- require 'test/unit'
3
-
4
- TEST_DIRECTORY = File.expand_path(File.dirname(__FILE__))
5
- TEST_COMPILED_COMPACT_FILE = File.join(TEST_DIRECTORY, 'test_sfst_compact.a')
6
- TEST_COMPILED_REGULAR_FILE = File.join(TEST_DIRECTORY, 'test_sfst_regular.a')
7
-
8
- class RegularTransducerTestCase < Test::Unit::TestCase
9
- def test_analyze_acceptance
10
- fst = SFST::RegularTransducer.new(TEST_COMPILED_REGULAR_FILE)
11
- assert_equal true, fst.accepted_analysis?('foo')
12
- assert_equal false, fst.accepted_analysis?('fox')
13
- end
14
-
15
- def test_analyze
16
- fst = SFST::RegularTransducer.new(TEST_COMPILED_REGULAR_FILE)
17
- assert_equal ['bar', 'baz'], fst.analyse('foo').sort
18
-
19
- fst = SFST::RegularTransducer.new(TEST_COMPILED_REGULAR_FILE)
20
- assert_equal [], fst.analyse('fox').sort
21
- end
22
-
23
- def test_analyze_symbol_sequence
24
- fst = SFST::RegularTransducer.new(TEST_COMPILED_REGULAR_FILE)
25
- assert_equal [['b', 'a', 'r'], ['b', 'a', 'z']], fst.analyse('foo', :symbol_sequence => true).sort
26
-
27
- fst = SFST::RegularTransducer.new(TEST_COMPILED_REGULAR_FILE)
28
- assert_equal [], fst.analyse('fox', :symbol_sequence => true).sort
29
- end
30
-
31
- def test_generate_acceptance
32
- fst = SFST::RegularTransducer.new(TEST_COMPILED_REGULAR_FILE)
33
- assert_equal true, fst.accepted_generating?('bar')
34
- assert_equal true, fst.accepted_generating?('baz')
35
- assert_equal false, fst.accepted_generating?('bax')
36
- end
37
-
38
- def test_generate
39
- fst = SFST::RegularTransducer.new(TEST_COMPILED_REGULAR_FILE)
40
- assert_equal ['foo'], fst.generate('bar').sort
41
-
42
- fst = SFST::RegularTransducer.new(TEST_COMPILED_REGULAR_FILE)
43
- assert_equal ['foo'], fst.generate('baz').sort
44
-
45
- fst = SFST::RegularTransducer.new(TEST_COMPILED_REGULAR_FILE)
46
- assert_equal [], fst.generate('bax').sort
47
- end
48
-
49
- def test_generate_language_default
50
- fst = SFST::RegularTransducer.new(TEST_COMPILED_REGULAR_FILE)
51
- a = []
52
- fst.generate_language do |u|
53
- a << u.collect { |pair| pair.join(':') }.join
54
- end
55
- assert_equal ['b:fa:or:o', 'b:fa:oz:o'], a.sort
56
- end
57
-
58
- def test_generate_language_both
59
- fst = SFST::RegularTransducer.new(TEST_COMPILED_REGULAR_FILE)
60
- a = []
61
- fst.generate_language(:levels => :both) do |u|
62
- a << u.collect { |pair| pair.join(':') }.join
63
- end
64
- assert_equal ['b:fa:or:o', 'b:fa:oz:o'], a.sort
65
- end
66
-
67
- def test_generate_language_upper
68
- fst = SFST::RegularTransducer.new(TEST_COMPILED_REGULAR_FILE)
69
- a = []
70
- fst.generate_language(:levels => :upper) do |u|
71
- a << u.join
72
- end
73
- assert_equal ['foo'], a.sort
74
- end
75
-
76
- def test_generate_language_lower
77
- fst = SFST::RegularTransducer.new(TEST_COMPILED_REGULAR_FILE)
78
- a = []
79
- fst.generate_language(:levels => :lower) do |u|
80
- a << u.join
81
- end
82
- assert_equal ['bar', 'baz'], a.sort
83
- end
84
- end
85
-
86
- class CompactTransducerTestCase < Test::Unit::TestCase
87
- def test_analyze_acceptance
88
- fst = SFST::CompactTransducer.new(TEST_COMPILED_COMPACT_FILE)
89
- assert_equal true, fst.accepted_analysis?('foo')
90
- assert_equal false, fst.accepted_analysis?('fox')
91
- end
92
-
93
- def test_analyze
94
- fst = SFST::CompactTransducer.new(TEST_COMPILED_COMPACT_FILE)
95
- assert_equal ['bar', 'baz'], fst.analyse('foo').sort
96
- assert_equal [], fst.analyse('fox').sort
97
- end
98
- end
99
-
100
- class StressTestCase < Test::Unit::TestCase
101
- def test_repeated_analyses_regular
102
- fst = SFST::RegularTransducer.new(TEST_COMPILED_REGULAR_FILE)
103
- 65536.times do
104
- assert_equal ['bar', 'baz'], fst.analyse('foo').sort
105
- end
106
- end
107
-
108
- def test_repeated_analyses_compact
109
- fst = SFST::CompactTransducer.new(TEST_COMPILED_COMPACT_FILE)
110
- 65536.times do
111
- assert_equal ['bar', 'baz'], fst.analyse('foo').sort
112
- end
113
- end
114
- end