formosa 0.0.1 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/ext/native_syllable_composer/LibHolo.h +43 -40
- data/lib/ext/native_syllable_composer/compose.cpp +6 -3
- data/lib/ext/native_syllable_composer/compose.h +1 -1
- data/lib/ext/native_syllable_composer/native_syllable_composer.c +7 -6
- data/lib/formosa/syllable_utility.rb +2 -2
- data/lib/formosa/version.rb +2 -2
- data/website/index.html +25 -10
- data/website/index.txt +22 -8
- metadata +3 -3
@@ -565,12 +565,17 @@ namespace LibHolo
|
|
565
565
|
if (FLV("a")) SETLOUDEST(p);
|
566
566
|
|
567
567
|
// the last "ere" override
|
568
|
-
|
569
|
-
|
570
|
-
|
571
|
-
|
572
|
-
|
573
|
-
|
568
|
+
string lastSymbolStr = _symvec[end-1].symbolInLowerCase();
|
569
|
+
|
570
|
+
if (end >= 4 && (lastSymbolStr=="t" || lastSymbolStr=="p" || lastSymbolStr=="k" || lastSymbolStr=="h") &&
|
571
|
+
_symvec[end-2].symbolInLowerCase() == "e" && _symvec[end-3].symbolInLowerCase() == "r" && _symvec[end-4].symbolInLowerCase() == "e")
|
572
|
+
{
|
573
|
+
SETLOUDEST(end-2);
|
574
|
+
}
|
575
|
+
else if (end >= 3 && lastSymbolStr == "e" && _symvec[end-2].symbolInLowerCase() == "r" && _symvec[end-3].symbolInLowerCase() == "e")
|
576
|
+
{
|
577
|
+
SETLOUDEST(end-1);
|
578
|
+
}
|
574
579
|
|
575
580
|
if (loudestVowel==end) return;
|
576
581
|
// fprintf(stderr, "found loudest vowel=%d (%s), loudest tone=%d\n", loudestVowel, _symvec[loudestVowel].symbol().c_str(), loudestTone);
|
@@ -580,8 +585,6 @@ namespace LibHolo
|
|
580
585
|
|
581
586
|
for (unsigned int i=0; i<end; i++) _symvec[i].setTone(0);
|
582
587
|
|
583
|
-
string lastSymbolStr = _symvec[end-1].symbolInLowerCase();
|
584
|
-
|
585
588
|
// if the symbol is "i", and there's a next "u", we shift
|
586
589
|
// the vowel to "u"
|
587
590
|
|
@@ -646,12 +649,12 @@ namespace LibHolo
|
|
646
649
|
// detect case
|
647
650
|
if (str1[0] == tolower(str1[0])) {
|
648
651
|
syl.insertCharacterAtCursor('o', sym1.tone());
|
649
|
-
syl.insertCharacterAtCursor('u');
|
652
|
+
syl.insertCharacterAtCursor(str1[1] == tolower(str1[1]) ? 'u' : 'U');
|
650
653
|
}
|
651
654
|
else
|
652
655
|
{
|
653
656
|
syl.insertCharacterAtCursor('O', sym1.tone());
|
654
|
-
syl.insertCharacterAtCursor('U');
|
657
|
+
syl.insertCharacterAtCursor(str1[1] == tolower(str1[1]) ? 'u' : 'U');
|
655
658
|
}
|
656
659
|
continue;
|
657
660
|
}
|
@@ -667,11 +670,11 @@ namespace LibHolo
|
|
667
670
|
// detect case
|
668
671
|
if (str1[0] == tolower(str1[0])) {
|
669
672
|
syl.insertCharacterAtCursor('c');
|
670
|
-
syl.insertCharacterAtCursor('h');
|
673
|
+
syl.insertCharacterAtCursor(str2[0] == tolower(str2[0]) ? 'h' : 'H');
|
671
674
|
}
|
672
675
|
else {
|
673
676
|
syl.insertCharacterAtCursor('C');
|
674
|
-
syl.insertCharacterAtCursor('H');
|
677
|
+
syl.insertCharacterAtCursor(str2[0] == tolower(str2[0]) ? 'h' : 'H');
|
675
678
|
}
|
676
679
|
|
677
680
|
i++;
|
@@ -683,11 +686,11 @@ namespace LibHolo
|
|
683
686
|
// detect case
|
684
687
|
if (str1[0] == tolower(str1[0])) {
|
685
688
|
syl.insertCharacterAtCursor('o', sym1.tone());
|
686
|
-
syl.insertCharacterAtCursor(
|
689
|
+
syl.insertCharacterAtCursor(str2[0], sym2.tone());
|
687
690
|
}
|
688
691
|
else {
|
689
692
|
syl.insertCharacterAtCursor('O', sym1.tone());
|
690
|
-
syl.insertCharacterAtCursor(
|
693
|
+
syl.insertCharacterAtCursor(str2[0], sym2.tone());
|
691
694
|
}
|
692
695
|
|
693
696
|
i++;
|
@@ -699,11 +702,11 @@ namespace LibHolo
|
|
699
702
|
// detect case
|
700
703
|
if (str1[0] == tolower(str1[0])) {
|
701
704
|
syl.insertCharacterAtCursor('o', sym1.tone());
|
702
|
-
syl.insertCharacterAtCursor(
|
705
|
+
syl.insertCharacterAtCursor(str2[0], sym2.tone());
|
703
706
|
}
|
704
707
|
else {
|
705
708
|
syl.insertCharacterAtCursor('O', sym1.tone());
|
706
|
-
syl.insertCharacterAtCursor(
|
709
|
+
syl.insertCharacterAtCursor(str2[0], sym2.tone());
|
707
710
|
}
|
708
711
|
|
709
712
|
i++;
|
@@ -715,11 +718,11 @@ namespace LibHolo
|
|
715
718
|
// detect case
|
716
719
|
if (str1[0] == tolower(str1[0])) {
|
717
720
|
syl.insertCharacterAtCursor('e', sym1.tone());
|
718
|
-
syl.insertCharacterAtCursor(
|
721
|
+
syl.insertCharacterAtCursor(str2[0], sym2.tone());
|
719
722
|
}
|
720
723
|
else {
|
721
724
|
syl.insertCharacterAtCursor('E', sym1.tone());
|
722
|
-
syl.insertCharacterAtCursor(
|
725
|
+
syl.insertCharacterAtCursor(str2[0], sym2.tone());
|
723
726
|
}
|
724
727
|
|
725
728
|
i++;
|
@@ -736,13 +739,13 @@ namespace LibHolo
|
|
736
739
|
// detect case
|
737
740
|
if (str1[0] == tolower(str1[0])) {
|
738
741
|
syl.insertCharacterAtCursor('e', sym1.tone());
|
739
|
-
syl.insertCharacterAtCursor(
|
740
|
-
syl.insertCharacterAtCursor(
|
742
|
+
syl.insertCharacterAtCursor(str2[0], sym2.tone());
|
743
|
+
syl.insertCharacterAtCursor(str3[0], sym3.tone());
|
741
744
|
}
|
742
745
|
else {
|
743
746
|
syl.insertCharacterAtCursor('E', sym1.tone());
|
744
|
-
syl.insertCharacterAtCursor(
|
745
|
-
syl.insertCharacterAtCursor(
|
747
|
+
syl.insertCharacterAtCursor(str2[0], sym2.tone());
|
748
|
+
syl.insertCharacterAtCursor(str3[0], sym3.tone());
|
746
749
|
}
|
747
750
|
|
748
751
|
i+=2;
|
@@ -753,12 +756,12 @@ namespace LibHolo
|
|
753
756
|
if (lowstr1=="o" && lowstr2=="u" && lowstr3=="h") {
|
754
757
|
// detect case
|
755
758
|
if (str2[0] == tolower(str2[0])) {
|
756
|
-
syl.insertCharacterAtCursor(
|
757
|
-
syl.insertCharacterAtCursor(
|
759
|
+
syl.insertCharacterAtCursor(str1[0], sym1.tone());
|
760
|
+
syl.insertCharacterAtCursor(str3[0], sym2.tone());
|
758
761
|
}
|
759
762
|
else {
|
760
|
-
syl.insertCharacterAtCursor(
|
761
|
-
syl.insertCharacterAtCursor(
|
763
|
+
syl.insertCharacterAtCursor(str1[0], sym1.tone());
|
764
|
+
syl.insertCharacterAtCursor(str3[0], sym2.tone());
|
762
765
|
}
|
763
766
|
|
764
767
|
i+=2;
|
@@ -799,12 +802,12 @@ namespace LibHolo
|
|
799
802
|
// detect case
|
800
803
|
if (str1[0] == tolower(str1[0])) {
|
801
804
|
syl.insertCharacterAtCursor('o', sym1.tone());
|
802
|
-
syl.insertCharacterAtCursor('o');
|
805
|
+
syl.insertCharacterAtCursor(str1[1] == tolower(str1[1]) ? 'o' : 'O');
|
803
806
|
}
|
804
807
|
else
|
805
808
|
{
|
806
809
|
syl.insertCharacterAtCursor('O', sym1.tone());
|
807
|
-
syl.insertCharacterAtCursor('O');
|
810
|
+
syl.insertCharacterAtCursor(str1[1] == tolower(str1[1]) ? 'o' : 'O');
|
808
811
|
}
|
809
812
|
continue;
|
810
813
|
}
|
@@ -820,11 +823,11 @@ namespace LibHolo
|
|
820
823
|
// detect case
|
821
824
|
if (str1[0] == tolower(str1[0])) {
|
822
825
|
syl.insertCharacterAtCursor('t');
|
823
|
-
syl.insertCharacterAtCursor('s');
|
826
|
+
syl.insertCharacterAtCursor(str2[0] == tolower(str2[0]) ? 's' : 'H');
|
824
827
|
}
|
825
828
|
else {
|
826
829
|
syl.insertCharacterAtCursor('T');
|
827
|
-
syl.insertCharacterAtCursor('
|
830
|
+
syl.insertCharacterAtCursor(str2[0] == tolower(str2[0]) ? 's' : 'H');
|
828
831
|
}
|
829
832
|
|
830
833
|
i++;
|
@@ -836,11 +839,11 @@ namespace LibHolo
|
|
836
839
|
// detect case
|
837
840
|
if (str1[0] == tolower(str1[0])) {
|
838
841
|
syl.insertCharacterAtCursor('u', sym1.tone());
|
839
|
-
syl.insertCharacterAtCursor(
|
842
|
+
syl.insertCharacterAtCursor(str2[0], sym2.tone());
|
840
843
|
}
|
841
844
|
else {
|
842
845
|
syl.insertCharacterAtCursor('U', sym1.tone());
|
843
|
-
syl.insertCharacterAtCursor(
|
846
|
+
syl.insertCharacterAtCursor(str2[0], sym2.tone());
|
844
847
|
}
|
845
848
|
|
846
849
|
i++;
|
@@ -852,11 +855,11 @@ namespace LibHolo
|
|
852
855
|
// detect case
|
853
856
|
if (str1[0] == tolower(str1[0])) {
|
854
857
|
syl.insertCharacterAtCursor('u', sym1.tone());
|
855
|
-
syl.insertCharacterAtCursor(
|
858
|
+
syl.insertCharacterAtCursor(str2[0], sym2.tone());
|
856
859
|
}
|
857
860
|
else {
|
858
861
|
syl.insertCharacterAtCursor('U', sym1.tone());
|
859
|
-
syl.insertCharacterAtCursor(
|
862
|
+
syl.insertCharacterAtCursor(str2[0], sym2.tone());
|
860
863
|
}
|
861
864
|
|
862
865
|
i++;
|
@@ -868,11 +871,11 @@ namespace LibHolo
|
|
868
871
|
// detect case
|
869
872
|
if (str1[0] == tolower(str1[0])) {
|
870
873
|
syl.insertCharacterAtCursor('i', sym1.tone());
|
871
|
-
syl.insertCharacterAtCursor(
|
874
|
+
syl.insertCharacterAtCursor(str2[0], sym2.tone());
|
872
875
|
}
|
873
876
|
else {
|
874
877
|
syl.insertCharacterAtCursor('I', sym1.tone());
|
875
|
-
syl.insertCharacterAtCursor(
|
878
|
+
syl.insertCharacterAtCursor(str2[0], sym2.tone());
|
876
879
|
}
|
877
880
|
|
878
881
|
i++;
|
@@ -889,13 +892,13 @@ namespace LibHolo
|
|
889
892
|
// detect case
|
890
893
|
if (str1[0] == tolower(str1[0])) {
|
891
894
|
syl.insertCharacterAtCursor('i', sym1.tone());
|
892
|
-
syl.insertCharacterAtCursor(
|
893
|
-
syl.insertCharacterAtCursor(
|
895
|
+
syl.insertCharacterAtCursor(str2[0], sym2.tone());
|
896
|
+
syl.insertCharacterAtCursor(str3[0], sym3.tone());
|
894
897
|
}
|
895
898
|
else {
|
896
899
|
syl.insertCharacterAtCursor('I', sym1.tone());
|
897
|
-
syl.insertCharacterAtCursor(
|
898
|
-
syl.insertCharacterAtCursor(
|
900
|
+
syl.insertCharacterAtCursor(str2[0], sym2.tone());
|
901
|
+
syl.insertCharacterAtCursor(str3[0], sym3.tone());
|
899
902
|
}
|
900
903
|
|
901
904
|
i+=2;
|
@@ -8,7 +8,7 @@ using namespace LibHolo;
|
|
8
8
|
|
9
9
|
#include "ruby.h"
|
10
10
|
|
11
|
-
VALUE ComposeTLSyllable(int inputType, int outputType, const char *syllable)
|
11
|
+
VALUE ComposeTLSyllable(int inputType, int outputType, const char *syllable, int forcePOJStyle)
|
12
12
|
{
|
13
13
|
int c;
|
14
14
|
bool composing = false;
|
@@ -45,8 +45,11 @@ VALUE ComposeTLSyllable(int inputType, int outputType, const char *syllable)
|
|
45
45
|
|
46
46
|
if (outputType == POJSyllable)
|
47
47
|
output += syl.convertToPOJSyllable().composedForm();
|
48
|
-
else
|
49
|
-
|
48
|
+
else {
|
49
|
+
HoloSyllable tl = syl.convertToTLSyllable();
|
50
|
+
tl.setForcePOJStyle(forcePOJStyle ? true : false);
|
51
|
+
output += tl.composedForm();
|
52
|
+
}
|
50
53
|
|
51
54
|
if (emitchar) {
|
52
55
|
output += string(1, emitchar);
|
@@ -5,16 +5,17 @@
|
|
5
5
|
VALUE syllable_composer = Qnil;
|
6
6
|
|
7
7
|
void Init_native_syllable_composer();
|
8
|
-
VALUE native_syllable_composer_compose(VALUE self, VALUE rInputType, VALUE rOutputType, VALUE rSyllable);
|
8
|
+
VALUE native_syllable_composer_compose(VALUE self, VALUE rInputType, VALUE rOutputType, VALUE rSyllable, VALUE rForcePOJStyleOutput);
|
9
9
|
|
10
10
|
void Init_native_syllable_composer() {
|
11
11
|
syllable_composer = rb_define_module("NativeSyllableComposer");
|
12
|
-
rb_define_singleton_method(syllable_composer, "compose", native_syllable_composer_compose,
|
12
|
+
rb_define_singleton_method(syllable_composer, "compose", native_syllable_composer_compose, 4);
|
13
13
|
}
|
14
14
|
|
15
|
-
VALUE native_syllable_composer_compose(VALUE self, VALUE rInputType, VALUE rOutputType, VALUE rSyllable) {
|
15
|
+
VALUE native_syllable_composer_compose(VALUE self, VALUE rInputType, VALUE rOutputType, VALUE rSyllable, VALUE rForcePOJStyleOutput) {
|
16
16
|
int inputType = NUM2INT(rInputType);
|
17
17
|
int outputType = NUM2INT(rOutputType);
|
18
|
+
int forcePOJStyleOutput = NUM2INT(rForcePOJStyleOutput);
|
18
19
|
|
19
20
|
VALUE rStr = StringValue(rSyllable);
|
20
21
|
|
@@ -23,10 +24,10 @@ VALUE native_syllable_composer_compose(VALUE self, VALUE rInputType, VALUE rOutp
|
|
23
24
|
|
24
25
|
if (!rStrPtr) return Qnil;
|
25
26
|
|
26
|
-
char *string = (char*)calloc(1, rStrLen);
|
27
|
-
memcpy(string, rStrPtr,rStrLen);
|
27
|
+
char *string = (char*)calloc(1, rStrLen + 1);
|
28
|
+
memcpy(string, rStrPtr, rStrLen);
|
28
29
|
|
29
|
-
VALUE result = ComposeTLSyllable(inputType, outputType, string);
|
30
|
+
VALUE result = ComposeTLSyllable(inputType, outputType, string, forcePOJStyleOutput);
|
30
31
|
free(string);
|
31
32
|
|
32
33
|
return result;
|
@@ -16,8 +16,8 @@ module Formosa
|
|
16
16
|
# * syllable: the query form of the syllable, such as "goa2", "tai5"
|
17
17
|
#
|
18
18
|
# Conversion is done automatically when input and output types are different
|
19
|
-
def self.compose_syllable(input_type, output_type, syllable)
|
20
|
-
NativeSyllableComposer.compose(input_type, output_type, syllable)
|
19
|
+
def self.compose_syllable(input_type, output_type, syllable, force_poj_style_output = false)
|
20
|
+
NativeSyllableComposer.compose(input_type, output_type, syllable, force_poj_style_output ? 1 : 0)
|
21
21
|
end
|
22
22
|
|
23
23
|
TONE_SAMPLE = ["a", "a", "á", "à", "a", "â", "ǎ", "ā", "a̍", "a̋"]
|
data/lib/formosa/version.rb
CHANGED
data/website/index.html
CHANGED
@@ -35,12 +35,22 @@
|
|
35
35
|
<p>Get Version</p>
|
36
36
|
<a href="http://rubyforge.org/projects/formosa" class="numbers">0.0.1</a>
|
37
37
|
</div>
|
38
|
-
<
|
38
|
+
<p>→ ‘Taiwanese language processing’</p>
|
39
39
|
|
40
40
|
|
41
41
|
<h2>What</h2>
|
42
42
|
|
43
43
|
|
44
|
+
<p>Formosa is a Ruby library for processing Taiwanese languages. Major languages
|
45
|
+
spoken in Taiwan include Holo, Hakka, Mandarin and those of the indigenous
|
46
|
+
people. Formosa is the Ruby branch of the lib-formosa project
|
47
|
+
(http://code.google.com/p/lib-formosa/).</p>
|
48
|
+
|
49
|
+
|
50
|
+
<p>Currently, we focus on the processing of the Holo (Southern Min) language,
|
51
|
+
with necessary tools such as SyllableComposer available for general use.</p>
|
52
|
+
|
53
|
+
|
44
54
|
<h2>Installing</h2>
|
45
55
|
|
46
56
|
|
@@ -52,13 +62,18 @@
|
|
52
62
|
<h2>Demonstration of usage</h2>
|
53
63
|
|
54
64
|
|
55
|
-
|
65
|
+
<pre>
|
66
|
+
$KCODE="u" # set the Ruby environment to use UTF-8
|
67
|
+
require "rubygems"
|
68
|
+
require "formosa"
|
69
|
+
include Formosa::Holo
|
70
|
+
poj = SyllableType::POJ
|
71
|
+
tl = SyllableType::TL
|
72
|
+
SyllableUtility.compose_syllable(poj, tl, "goa2") # => guá
|
73
|
+
SyllableUtility.compose_syllable(tl, poj, "gua2") # => goá
|
74
|
+
</pre>
|
56
75
|
|
57
|
-
|
58
|
-
<p><a href="http://groups.google.com/group/formosa">http://groups.google.com/group/formosa</a></p>
|
59
|
-
|
60
|
-
|
61
|
-
<p><span class="caps">TODO</span> – create Google Group – formosa</p>
|
76
|
+
<h2>Forum</h2>
|
62
77
|
|
63
78
|
|
64
79
|
<h2>How to submit patches</h2>
|
@@ -73,15 +88,15 @@
|
|
73
88
|
<h2>License</h2>
|
74
89
|
|
75
90
|
|
76
|
-
<p>This code is free to use under the terms of the <span class="caps">
|
91
|
+
<p>This code is free to use under the terms of the New <span class="caps">BSD</span> license.</p>
|
77
92
|
|
78
93
|
|
79
94
|
<h2>Contact</h2>
|
80
95
|
|
81
96
|
|
82
|
-
<p>Comments are welcome. Send an email to
|
97
|
+
<p>Comments are welcome. Send an email to lukhnos (at) gmail (dot) com.</p>
|
83
98
|
<p class="coda">
|
84
|
-
<a href="mailto:drnicwilliams@gmail.com">Dr Nic</a>,
|
99
|
+
<a href="mailto:drnicwilliams@gmail.com">Dr Nic</a>, 30th July 2007<br>
|
85
100
|
Theme extended from <a href="http://rb2js.rubyforge.org/">Paul Battley</a>
|
86
101
|
</p>
|
87
102
|
</div>
|
data/website/index.txt
CHANGED
@@ -1,11 +1,20 @@
|
|
1
1
|
h1. formosa
|
2
2
|
|
3
|
-
|
3
|
+
→ 'Taiwanese language processing'
|
4
4
|
|
5
5
|
|
6
6
|
h2. What
|
7
7
|
|
8
8
|
|
9
|
+
Formosa is a Ruby library for processing Taiwanese languages. Major languages
|
10
|
+
spoken in Taiwan include Holo, Hakka, Mandarin and those of the indigenous
|
11
|
+
people. Formosa is the Ruby branch of the lib-formosa project
|
12
|
+
(http://code.google.com/p/lib-formosa/).
|
13
|
+
|
14
|
+
Currently, we focus on the processing of the Holo (Southern Min) language,
|
15
|
+
with necessary tools such as SyllableComposer available for general use.
|
16
|
+
|
17
|
+
|
9
18
|
h2. Installing
|
10
19
|
|
11
20
|
<pre syntax="ruby">sudo gem install formosa</pre>
|
@@ -15,14 +24,19 @@ h2. The basics
|
|
15
24
|
|
16
25
|
h2. Demonstration of usage
|
17
26
|
|
18
|
-
|
27
|
+
<pre>
|
28
|
+
$KCODE="u" # set the Ruby environment to use UTF-8
|
29
|
+
require "rubygems"
|
30
|
+
require "formosa"
|
31
|
+
include Formosa::Holo
|
32
|
+
poj = SyllableType::POJ
|
33
|
+
tl = SyllableType::TL
|
34
|
+
SyllableUtility.compose_syllable(poj, tl, "goa2") # => guá
|
35
|
+
SyllableUtility.compose_syllable(tl, poj, "gua2") # => goá
|
36
|
+
</pre>
|
19
37
|
|
20
38
|
h2. Forum
|
21
39
|
|
22
|
-
"http://groups.google.com/group/formosa":http://groups.google.com/group/formosa
|
23
|
-
|
24
|
-
TODO - create Google Group - formosa
|
25
|
-
|
26
40
|
h2. How to submit patches
|
27
41
|
|
28
42
|
Read the "8 steps for fixing other people's code":http://drnicwilliams.com/2007/06/01/8-steps-for-fixing-other-peoples-code/ and for section "8b: Submit patch to Google Groups":http://drnicwilliams.com/2007/06/01/8-steps-for-fixing-other-peoples-code/#8b-google-groups, use the Google Group above.
|
@@ -31,8 +45,8 @@ The trunk repository is <code>svn://rubyforge.org/var/svn/formosa/trunk</code> f
|
|
31
45
|
|
32
46
|
h2. License
|
33
47
|
|
34
|
-
This code is free to use under the terms of the
|
48
|
+
This code is free to use under the terms of the New BSD license.
|
35
49
|
|
36
50
|
h2. Contact
|
37
51
|
|
38
|
-
Comments are welcome. Send an email to
|
52
|
+
Comments are welcome. Send an email to lukhnos (at) gmail (dot) com.
|
metadata
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
|
-
rubygems_version: 0.9.
|
2
|
+
rubygems_version: 0.9.4
|
3
3
|
specification_version: 1
|
4
4
|
name: formosa
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.0
|
7
|
-
date:
|
6
|
+
version: 0.2.0
|
7
|
+
date: 2008-01-20 00:00:00 +08:00
|
8
8
|
summary: A collection of libraries for Taiwanese languages processing
|
9
9
|
require_paths:
|
10
10
|
- lib
|