ruby-sfst 0.4.3 → 0.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +1 -0
  3. data/COPYING +280 -0
  4. data/Gemfile +3 -0
  5. data/Gemfile.lock +54 -0
  6. data/README.md +1 -1
  7. data/Rakefile +9 -18
  8. data/bin/console +7 -0
  9. data/bin/setup +6 -0
  10. data/ext/sfst/alphabet.cc +879 -0
  11. data/ext/sfst/alphabet.h +302 -0
  12. data/ext/sfst/basic.cc +85 -0
  13. data/ext/{sfst_machine → sfst}/basic.h +7 -4
  14. data/ext/sfst/compact.cc +629 -0
  15. data/ext/sfst/compact.h +100 -0
  16. data/ext/sfst/determinise.cc +279 -0
  17. data/ext/{sfst_machine → sfst}/extconf.rb +2 -1
  18. data/ext/sfst/fst.cc +1150 -0
  19. data/ext/sfst/fst.h +374 -0
  20. data/ext/sfst/hopcroft.cc +681 -0
  21. data/ext/sfst/interface.cc +1921 -0
  22. data/ext/sfst/interface.h +171 -0
  23. data/ext/sfst/make-compact.cc +323 -0
  24. data/ext/{sfst_machine → sfst}/make-compact.h +15 -13
  25. data/ext/sfst/mem.h +80 -0
  26. data/ext/sfst/operators.cc +1273 -0
  27. data/ext/{sfst_machine → sfst}/sfst_machine.cc +89 -78
  28. data/ext/sfst/sgi.h +72 -0
  29. data/ext/sfst/utf8.cc +149 -0
  30. data/ext/{sfst_machine → sfst}/utf8.h +7 -4
  31. data/lib/sfst.rb +2 -1
  32. data/lib/sfst/version.rb +1 -1
  33. data/ruby-sfst.gemspec +23 -23
  34. metadata +107 -35
  35. data/ext/sfst_machine/alphabet.cc +0 -812
  36. data/ext/sfst_machine/alphabet.h +0 -273
  37. data/ext/sfst_machine/basic.cc +0 -84
  38. data/ext/sfst_machine/compact.cc +0 -616
  39. data/ext/sfst_machine/compact.h +0 -98
  40. data/ext/sfst_machine/determinise.cc +0 -303
  41. data/ext/sfst_machine/fst.cc +0 -1000
  42. data/ext/sfst_machine/fst.h +0 -369
  43. data/ext/sfst_machine/interface.cc +0 -1842
  44. data/ext/sfst_machine/interface.h +0 -93
  45. data/ext/sfst_machine/make-compact.cc +0 -327
  46. data/ext/sfst_machine/mem.h +0 -74
  47. data/ext/sfst_machine/operators.cc +0 -1131
  48. data/ext/sfst_machine/sgi.h +0 -44
  49. data/ext/sfst_machine/utf8.cc +0 -146
  50. data/test/test_sfst.fst +0 -3
  51. data/test/test_sfst.rb +0 -114
@@ -1,44 +0,0 @@
1
-
2
- /*******************************************************************/
3
- /* */
4
- /* File: sgi.h */
5
- /* Author: Helmut Schmid */
6
- /* Purpose: */
7
- /* Created: Thu Sep 11 15:58:25 2008 */
8
- /* Modified: Fri Sep 12 08:17:03 2008 (schmid) */
9
- /* */
10
- /*******************************************************************/
11
-
12
- #ifndef _SGI_INCLUDED
13
- #define _SGI_INCLUDED
14
-
15
-
16
- #ifdef SGIext
17
-
18
- #include <ext/hash_map>
19
- #include <ext/hash_set>
20
- using std::hash_map;
21
- using std::hash_set;
22
- using std::hash;
23
-
24
- #else
25
-
26
- #ifdef SGI__gnu_cxx
27
-
28
- #include <ext/hash_map>
29
- #include <ext/hash_set>
30
-
31
- #else
32
-
33
- #include <backward/hash_map>
34
- #include <backward/hash_set>
35
-
36
- #endif
37
-
38
- using __gnu_cxx::hash_map;
39
- using __gnu_cxx::hash_set;
40
- using __gnu_cxx::hash;
41
-
42
- #endif
43
-
44
- #endif
@@ -1,146 +0,0 @@
1
-
2
- /*******************************************************************/
3
- /* */
4
- /* File: utf8.C */
5
- /* Author: Helmut Schmid */
6
- /* Purpose: */
7
- /* Created: Mon Sep 5 17:49:16 2005 */
8
- /* Modified: Mon Mar 3 11:00:53 2008 (schmid) */
9
- /* */
10
- /*******************************************************************/
11
-
12
- #include "string.h"
13
-
14
- #include "utf8.h"
15
-
16
- const unsigned char get3LSbits=7;
17
- const unsigned char get4LSbits=15;
18
- const unsigned char get5LSbits=31;
19
- const unsigned char get6LSbits=63;
20
-
21
- const unsigned char set1MSbits=128;
22
- const unsigned char set2MSbits=192;
23
- const unsigned char set3MSbits=224;
24
- const unsigned char set4MSbits=240;
25
-
26
-
27
-
28
- /*******************************************************************/
29
- /* */
30
- /* int2utf8 */
31
- /* */
32
- /*******************************************************************/
33
-
34
- char *int2utf8( unsigned int sym )
35
-
36
- {
37
- static unsigned char ch[5];
38
-
39
- if (sym < 128) {
40
- // 1-byte UTF8 symbol, 7 bits
41
- ch[0] = sym;
42
- ch[1] = 0;
43
- }
44
-
45
- else if (sym < 2048) {
46
- // 2-byte UTF8 symbol, 5+6 bits
47
- ch[0] = (sym >> 6) | set2MSbits;
48
- ch[1] = (sym & get6LSbits) | set1MSbits;
49
- ch[2] = 0;
50
- }
51
-
52
- else if (sym < 65536) {
53
- // 3-byte UTF8 symbol, 4+6+6 bits
54
- ch[0] = (sym >> 12) | set3MSbits;
55
- ch[1] = ((sym >> 6) & get6LSbits) | set1MSbits;
56
- ch[2] = (sym & get6LSbits) | set1MSbits;
57
- ch[3] = 0;
58
- }
59
-
60
- else if (sym < 2097152) {
61
- // 4-byte UTF8 symbol, 3+6+6+6 bits
62
- ch[0] = (sym >> 18) | set4MSbits;
63
- ch[1] = ((sym >> 12) & get6LSbits) | set1MSbits;
64
- ch[2] = ((sym >> 6) & get6LSbits) | set1MSbits;
65
- ch[3] = (sym & get6LSbits) | set1MSbits;
66
- ch[4] = 0;
67
- }
68
-
69
- else
70
- return NULL;
71
-
72
- return (char*)ch;
73
- }
74
-
75
-
76
- /*******************************************************************/
77
- /* */
78
- /* utf8toint */
79
- /* */
80
- /*******************************************************************/
81
-
82
- unsigned int utf8toint( char **s )
83
-
84
- {
85
- int bytes_to_come;
86
- unsigned int result=0;
87
- unsigned char c=(unsigned char)**s;
88
-
89
- if (c >= (unsigned char)set4MSbits) { // 1111xxxx
90
- bytes_to_come = 3;
91
- result = (result << 3) | (c & get3LSbits);
92
- }
93
-
94
- else if (c >= (unsigned char) set3MSbits) { // 1110xxxx
95
- // start of a three-byte symbol
96
- bytes_to_come = 2;
97
- result = (result << 4) | (c & get4LSbits);
98
- }
99
-
100
- else if (c >= (unsigned char) set2MSbits) { // 1100xxxx
101
- // start of a two-byte symbol
102
- bytes_to_come = 1;
103
- result = (result << 5) | (c & get5LSbits);
104
- }
105
-
106
- else if (c < (unsigned char) set1MSbits) { // 0100xxxx
107
- // one-byte symbol
108
- bytes_to_come = 0;
109
- result = c;
110
- }
111
-
112
- else
113
- return 0; // error
114
-
115
- while (bytes_to_come > 0) {
116
- bytes_to_come--;
117
- (*s)++;
118
- c = (unsigned char)**s;
119
- if (c < (unsigned char) set2MSbits &&
120
- c >= (unsigned char) set1MSbits) // 1000xxxx
121
- {
122
- result = (result << 6) | (c & get6LSbits);
123
- }
124
- else
125
- return 0;
126
- }
127
-
128
- (*s)++;
129
- return result;
130
- }
131
-
132
-
133
- /*******************************************************************/
134
- /* */
135
- /* utf8toint */
136
- /* */
137
- /*******************************************************************/
138
-
139
- unsigned int utf8toint( char *s )
140
-
141
- {
142
- unsigned int result = utf8toint( &s );
143
- if (*s == 0) // all bytes converted?
144
- return result;
145
- return 0;
146
- }
@@ -1,3 +0,0 @@
1
- ALPHABET = [a-z]
2
- $test$ = ({bar}:{foo} | {baz}:{foo})
3
- $test$
@@ -1,114 +0,0 @@
1
- require 'sfst'
2
- require 'test/unit'
3
-
4
- TEST_DIRECTORY = File.expand_path(File.dirname(__FILE__))
5
- TEST_COMPILED_COMPACT_FILE = File.join(TEST_DIRECTORY, 'test_sfst_compact.a')
6
- TEST_COMPILED_REGULAR_FILE = File.join(TEST_DIRECTORY, 'test_sfst_regular.a')
7
-
8
- class RegularTransducerTestCase < Test::Unit::TestCase
9
- def test_analyze_acceptance
10
- fst = SFST::RegularTransducer.new(TEST_COMPILED_REGULAR_FILE)
11
- assert_equal true, fst.accepted_analysis?('foo')
12
- assert_equal false, fst.accepted_analysis?('fox')
13
- end
14
-
15
- def test_analyze
16
- fst = SFST::RegularTransducer.new(TEST_COMPILED_REGULAR_FILE)
17
- assert_equal ['bar', 'baz'], fst.analyse('foo').sort
18
-
19
- fst = SFST::RegularTransducer.new(TEST_COMPILED_REGULAR_FILE)
20
- assert_equal [], fst.analyse('fox').sort
21
- end
22
-
23
- def test_analyze_symbol_sequence
24
- fst = SFST::RegularTransducer.new(TEST_COMPILED_REGULAR_FILE)
25
- assert_equal [['b', 'a', 'r'], ['b', 'a', 'z']], fst.analyse('foo', :symbol_sequence => true).sort
26
-
27
- fst = SFST::RegularTransducer.new(TEST_COMPILED_REGULAR_FILE)
28
- assert_equal [], fst.analyse('fox', :symbol_sequence => true).sort
29
- end
30
-
31
- def test_generate_acceptance
32
- fst = SFST::RegularTransducer.new(TEST_COMPILED_REGULAR_FILE)
33
- assert_equal true, fst.accepted_generating?('bar')
34
- assert_equal true, fst.accepted_generating?('baz')
35
- assert_equal false, fst.accepted_generating?('bax')
36
- end
37
-
38
- def test_generate
39
- fst = SFST::RegularTransducer.new(TEST_COMPILED_REGULAR_FILE)
40
- assert_equal ['foo'], fst.generate('bar').sort
41
-
42
- fst = SFST::RegularTransducer.new(TEST_COMPILED_REGULAR_FILE)
43
- assert_equal ['foo'], fst.generate('baz').sort
44
-
45
- fst = SFST::RegularTransducer.new(TEST_COMPILED_REGULAR_FILE)
46
- assert_equal [], fst.generate('bax').sort
47
- end
48
-
49
- def test_generate_language_default
50
- fst = SFST::RegularTransducer.new(TEST_COMPILED_REGULAR_FILE)
51
- a = []
52
- fst.generate_language do |u|
53
- a << u.collect { |pair| pair.join(':') }.join
54
- end
55
- assert_equal ['b:fa:or:o', 'b:fa:oz:o'], a.sort
56
- end
57
-
58
- def test_generate_language_both
59
- fst = SFST::RegularTransducer.new(TEST_COMPILED_REGULAR_FILE)
60
- a = []
61
- fst.generate_language(:levels => :both) do |u|
62
- a << u.collect { |pair| pair.join(':') }.join
63
- end
64
- assert_equal ['b:fa:or:o', 'b:fa:oz:o'], a.sort
65
- end
66
-
67
- def test_generate_language_upper
68
- fst = SFST::RegularTransducer.new(TEST_COMPILED_REGULAR_FILE)
69
- a = []
70
- fst.generate_language(:levels => :upper) do |u|
71
- a << u.join
72
- end
73
- assert_equal ['foo'], a.sort
74
- end
75
-
76
- def test_generate_language_lower
77
- fst = SFST::RegularTransducer.new(TEST_COMPILED_REGULAR_FILE)
78
- a = []
79
- fst.generate_language(:levels => :lower) do |u|
80
- a << u.join
81
- end
82
- assert_equal ['bar', 'baz'], a.sort
83
- end
84
- end
85
-
86
- class CompactTransducerTestCase < Test::Unit::TestCase
87
- def test_analyze_acceptance
88
- fst = SFST::CompactTransducer.new(TEST_COMPILED_COMPACT_FILE)
89
- assert_equal true, fst.accepted_analysis?('foo')
90
- assert_equal false, fst.accepted_analysis?('fox')
91
- end
92
-
93
- def test_analyze
94
- fst = SFST::CompactTransducer.new(TEST_COMPILED_COMPACT_FILE)
95
- assert_equal ['bar', 'baz'], fst.analyse('foo').sort
96
- assert_equal [], fst.analyse('fox').sort
97
- end
98
- end
99
-
100
- class StressTestCase < Test::Unit::TestCase
101
- def test_repeated_analyses_regular
102
- fst = SFST::RegularTransducer.new(TEST_COMPILED_REGULAR_FILE)
103
- 65536.times do
104
- assert_equal ['bar', 'baz'], fst.analyse('foo').sort
105
- end
106
- end
107
-
108
- def test_repeated_analyses_compact
109
- fst = SFST::CompactTransducer.new(TEST_COMPILED_COMPACT_FILE)
110
- 65536.times do
111
- assert_equal ['bar', 'baz'], fst.analyse('foo').sort
112
- end
113
- end
114
- end