formosa 0.0.1 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -565,12 +565,17 @@ namespace LibHolo
565
565
  if (FLV("a")) SETLOUDEST(p);
566
566
 
567
567
  // the last "ere" override
568
- if (end >= 3) {
569
- if (_symvec[end-1].symbolInLowerCase() == "e" && _symvec[end-2].symbolInLowerCase() == "r" && _symvec[end-3].symbolInLowerCase() == "e")
570
- {
571
- SETLOUDEST(end-1);
572
- }
573
- }
568
+ string lastSymbolStr = _symvec[end-1].symbolInLowerCase();
569
+
570
+ if (end >= 4 && (lastSymbolStr=="t" || lastSymbolStr=="p" || lastSymbolStr=="k" || lastSymbolStr=="h") &&
571
+ _symvec[end-2].symbolInLowerCase() == "e" && _symvec[end-3].symbolInLowerCase() == "r" && _symvec[end-4].symbolInLowerCase() == "e")
572
+ {
573
+ SETLOUDEST(end-2);
574
+ }
575
+ else if (end >= 3 && lastSymbolStr == "e" && _symvec[end-2].symbolInLowerCase() == "r" && _symvec[end-3].symbolInLowerCase() == "e")
576
+ {
577
+ SETLOUDEST(end-1);
578
+ }
574
579
 
575
580
  if (loudestVowel==end) return;
576
581
  // fprintf(stderr, "found loudest vowel=%d (%s), loudest tone=%d\n", loudestVowel, _symvec[loudestVowel].symbol().c_str(), loudestTone);
@@ -580,8 +585,6 @@ namespace LibHolo
580
585
 
581
586
  for (unsigned int i=0; i<end; i++) _symvec[i].setTone(0);
582
587
 
583
- string lastSymbolStr = _symvec[end-1].symbolInLowerCase();
584
-
585
588
  // if the symbol is "i", and there's a next "u", we shift
586
589
  // the vowel to "u"
587
590
 
@@ -646,12 +649,12 @@ namespace LibHolo
646
649
  // detect case
647
650
  if (str1[0] == tolower(str1[0])) {
648
651
  syl.insertCharacterAtCursor('o', sym1.tone());
649
- syl.insertCharacterAtCursor('u');
652
+ syl.insertCharacterAtCursor(str1[1] == tolower(str1[1]) ? 'u' : 'U');
650
653
  }
651
654
  else
652
655
  {
653
656
  syl.insertCharacterAtCursor('O', sym1.tone());
654
- syl.insertCharacterAtCursor('U');
657
+ syl.insertCharacterAtCursor(str1[1] == tolower(str1[1]) ? 'u' : 'U');
655
658
  }
656
659
  continue;
657
660
  }
@@ -667,11 +670,11 @@ namespace LibHolo
667
670
  // detect case
668
671
  if (str1[0] == tolower(str1[0])) {
669
672
  syl.insertCharacterAtCursor('c');
670
- syl.insertCharacterAtCursor('h');
673
+ syl.insertCharacterAtCursor(str2[0] == tolower(str2[0]) ? 'h' : 'H');
671
674
  }
672
675
  else {
673
676
  syl.insertCharacterAtCursor('C');
674
- syl.insertCharacterAtCursor('H');
677
+ syl.insertCharacterAtCursor(str2[0] == tolower(str2[0]) ? 'h' : 'H');
675
678
  }
676
679
 
677
680
  i++;
@@ -683,11 +686,11 @@ namespace LibHolo
683
686
  // detect case
684
687
  if (str1[0] == tolower(str1[0])) {
685
688
  syl.insertCharacterAtCursor('o', sym1.tone());
686
- syl.insertCharacterAtCursor('e', sym2.tone());
689
+ syl.insertCharacterAtCursor(str2[0], sym2.tone());
687
690
  }
688
691
  else {
689
692
  syl.insertCharacterAtCursor('O', sym1.tone());
690
- syl.insertCharacterAtCursor('E', sym2.tone());
693
+ syl.insertCharacterAtCursor(str2[0], sym2.tone());
691
694
  }
692
695
 
693
696
  i++;
@@ -699,11 +702,11 @@ namespace LibHolo
699
702
  // detect case
700
703
  if (str1[0] == tolower(str1[0])) {
701
704
  syl.insertCharacterAtCursor('o', sym1.tone());
702
- syl.insertCharacterAtCursor('a', sym2.tone());
705
+ syl.insertCharacterAtCursor(str2[0], sym2.tone());
703
706
  }
704
707
  else {
705
708
  syl.insertCharacterAtCursor('O', sym1.tone());
706
- syl.insertCharacterAtCursor('A', sym2.tone());
709
+ syl.insertCharacterAtCursor(str2[0], sym2.tone());
707
710
  }
708
711
 
709
712
  i++;
@@ -715,11 +718,11 @@ namespace LibHolo
715
718
  // detect case
716
719
  if (str1[0] == tolower(str1[0])) {
717
720
  syl.insertCharacterAtCursor('e', sym1.tone());
718
- syl.insertCharacterAtCursor('k', sym2.tone());
721
+ syl.insertCharacterAtCursor(str2[0], sym2.tone());
719
722
  }
720
723
  else {
721
724
  syl.insertCharacterAtCursor('E', sym1.tone());
722
- syl.insertCharacterAtCursor('K', sym2.tone());
725
+ syl.insertCharacterAtCursor(str2[0], sym2.tone());
723
726
  }
724
727
 
725
728
  i++;
@@ -736,13 +739,13 @@ namespace LibHolo
736
739
  // detect case
737
740
  if (str1[0] == tolower(str1[0])) {
738
741
  syl.insertCharacterAtCursor('e', sym1.tone());
739
- syl.insertCharacterAtCursor('n', sym2.tone());
740
- syl.insertCharacterAtCursor('g', sym3.tone());
742
+ syl.insertCharacterAtCursor(str2[0], sym2.tone());
743
+ syl.insertCharacterAtCursor(str3[0], sym3.tone());
741
744
  }
742
745
  else {
743
746
  syl.insertCharacterAtCursor('E', sym1.tone());
744
- syl.insertCharacterAtCursor('N', sym2.tone());
745
- syl.insertCharacterAtCursor('G', sym3.tone());
747
+ syl.insertCharacterAtCursor(str2[0], sym2.tone());
748
+ syl.insertCharacterAtCursor(str3[0], sym3.tone());
746
749
  }
747
750
 
748
751
  i+=2;
@@ -753,12 +756,12 @@ namespace LibHolo
753
756
  if (lowstr1=="o" && lowstr2=="u" && lowstr3=="h") {
754
757
  // detect case
755
758
  if (str2[0] == tolower(str2[0])) {
756
- syl.insertCharacterAtCursor('o', sym1.tone());
757
- syl.insertCharacterAtCursor('h', sym2.tone());
759
+ syl.insertCharacterAtCursor(str1[0], sym1.tone());
760
+ syl.insertCharacterAtCursor(str3[0], sym2.tone());
758
761
  }
759
762
  else {
760
- syl.insertCharacterAtCursor('O', sym1.tone());
761
- syl.insertCharacterAtCursor('H', sym2.tone());
763
+ syl.insertCharacterAtCursor(str1[0], sym1.tone());
764
+ syl.insertCharacterAtCursor(str3[0], sym2.tone());
762
765
  }
763
766
 
764
767
  i+=2;
@@ -799,12 +802,12 @@ namespace LibHolo
799
802
  // detect case
800
803
  if (str1[0] == tolower(str1[0])) {
801
804
  syl.insertCharacterAtCursor('o', sym1.tone());
802
- syl.insertCharacterAtCursor('o');
805
+ syl.insertCharacterAtCursor(str1[1] == tolower(str1[1]) ? 'o' : 'O');
803
806
  }
804
807
  else
805
808
  {
806
809
  syl.insertCharacterAtCursor('O', sym1.tone());
807
- syl.insertCharacterAtCursor('O');
810
+ syl.insertCharacterAtCursor(str1[1] == tolower(str1[1]) ? 'o' : 'O');
808
811
  }
809
812
  continue;
810
813
  }
@@ -820,11 +823,11 @@ namespace LibHolo
820
823
  // detect case
821
824
  if (str1[0] == tolower(str1[0])) {
822
825
  syl.insertCharacterAtCursor('t');
823
- syl.insertCharacterAtCursor('s');
826
+ syl.insertCharacterAtCursor(str2[0] == tolower(str2[0]) ? 's' : 'H');
824
827
  }
825
828
  else {
826
829
  syl.insertCharacterAtCursor('T');
827
- syl.insertCharacterAtCursor('S');
830
+ syl.insertCharacterAtCursor(str2[0] == tolower(str2[0]) ? 's' : 'H');
828
831
  }
829
832
 
830
833
  i++;
@@ -836,11 +839,11 @@ namespace LibHolo
836
839
  // detect case
837
840
  if (str1[0] == tolower(str1[0])) {
838
841
  syl.insertCharacterAtCursor('u', sym1.tone());
839
- syl.insertCharacterAtCursor('e', sym2.tone());
842
+ syl.insertCharacterAtCursor(str2[0], sym2.tone());
840
843
  }
841
844
  else {
842
845
  syl.insertCharacterAtCursor('U', sym1.tone());
843
- syl.insertCharacterAtCursor('E', sym2.tone());
846
+ syl.insertCharacterAtCursor(str2[0], sym2.tone());
844
847
  }
845
848
 
846
849
  i++;
@@ -852,11 +855,11 @@ namespace LibHolo
852
855
  // detect case
853
856
  if (str1[0] == tolower(str1[0])) {
854
857
  syl.insertCharacterAtCursor('u', sym1.tone());
855
- syl.insertCharacterAtCursor('a', sym2.tone());
858
+ syl.insertCharacterAtCursor(str2[0], sym2.tone());
856
859
  }
857
860
  else {
858
861
  syl.insertCharacterAtCursor('U', sym1.tone());
859
- syl.insertCharacterAtCursor('A', sym2.tone());
862
+ syl.insertCharacterAtCursor(str2[0], sym2.tone());
860
863
  }
861
864
 
862
865
  i++;
@@ -868,11 +871,11 @@ namespace LibHolo
868
871
  // detect case
869
872
  if (str1[0] == tolower(str1[0])) {
870
873
  syl.insertCharacterAtCursor('i', sym1.tone());
871
- syl.insertCharacterAtCursor('k', sym2.tone());
874
+ syl.insertCharacterAtCursor(str2[0], sym2.tone());
872
875
  }
873
876
  else {
874
877
  syl.insertCharacterAtCursor('I', sym1.tone());
875
- syl.insertCharacterAtCursor('K', sym2.tone());
878
+ syl.insertCharacterAtCursor(str2[0], sym2.tone());
876
879
  }
877
880
 
878
881
  i++;
@@ -889,13 +892,13 @@ namespace LibHolo
889
892
  // detect case
890
893
  if (str1[0] == tolower(str1[0])) {
891
894
  syl.insertCharacterAtCursor('i', sym1.tone());
892
- syl.insertCharacterAtCursor('n', sym2.tone());
893
- syl.insertCharacterAtCursor('g', sym3.tone());
895
+ syl.insertCharacterAtCursor(str2[0], sym2.tone());
896
+ syl.insertCharacterAtCursor(str3[0], sym3.tone());
894
897
  }
895
898
  else {
896
899
  syl.insertCharacterAtCursor('I', sym1.tone());
897
- syl.insertCharacterAtCursor('N', sym2.tone());
898
- syl.insertCharacterAtCursor('G', sym3.tone());
900
+ syl.insertCharacterAtCursor(str2[0], sym2.tone());
901
+ syl.insertCharacterAtCursor(str3[0], sym3.tone());
899
902
  }
900
903
 
901
904
  i+=2;
@@ -8,7 +8,7 @@ using namespace LibHolo;
8
8
 
9
9
  #include "ruby.h"
10
10
 
11
- VALUE ComposeTLSyllable(int inputType, int outputType, const char *syllable)
11
+ VALUE ComposeTLSyllable(int inputType, int outputType, const char *syllable, int forcePOJStyle)
12
12
  {
13
13
  int c;
14
14
  bool composing = false;
@@ -45,8 +45,11 @@ VALUE ComposeTLSyllable(int inputType, int outputType, const char *syllable)
45
45
 
46
46
  if (outputType == POJSyllable)
47
47
  output += syl.convertToPOJSyllable().composedForm();
48
- else
49
- output += syl.convertToTLSyllable().composedForm();
48
+ else {
49
+ HoloSyllable tl = syl.convertToTLSyllable();
50
+ tl.setForcePOJStyle(forcePOJStyle ? true : false);
51
+ output += tl.composedForm();
52
+ }
50
53
 
51
54
  if (emitchar) {
52
55
  output += string(1, emitchar);
@@ -4,7 +4,7 @@
4
4
  extern "C" {
5
5
  #endif
6
6
 
7
- VALUE ComposeTLSyllable(int inputType, int outputType, const char *syllable);
7
+ VALUE ComposeTLSyllable(int inputType, int outputType, const char *syllable, int forcePOJStyle);
8
8
 
9
9
  #ifdef __cplusplus
10
10
  }
@@ -5,16 +5,17 @@
5
5
  VALUE syllable_composer = Qnil;
6
6
 
7
7
  void Init_native_syllable_composer();
8
- VALUE native_syllable_composer_compose(VALUE self, VALUE rInputType, VALUE rOutputType, VALUE rSyllable);
8
+ VALUE native_syllable_composer_compose(VALUE self, VALUE rInputType, VALUE rOutputType, VALUE rSyllable, VALUE rForcePOJStyleOutput);
9
9
 
10
10
  void Init_native_syllable_composer() {
11
11
  syllable_composer = rb_define_module("NativeSyllableComposer");
12
- rb_define_singleton_method(syllable_composer, "compose", native_syllable_composer_compose, 3);
12
+ rb_define_singleton_method(syllable_composer, "compose", native_syllable_composer_compose, 4);
13
13
  }
14
14
 
15
- VALUE native_syllable_composer_compose(VALUE self, VALUE rInputType, VALUE rOutputType, VALUE rSyllable) {
15
+ VALUE native_syllable_composer_compose(VALUE self, VALUE rInputType, VALUE rOutputType, VALUE rSyllable, VALUE rForcePOJStyleOutput) {
16
16
  int inputType = NUM2INT(rInputType);
17
17
  int outputType = NUM2INT(rOutputType);
18
+ int forcePOJStyleOutput = NUM2INT(rForcePOJStyleOutput);
18
19
 
19
20
  VALUE rStr = StringValue(rSyllable);
20
21
 
@@ -23,10 +24,10 @@ VALUE native_syllable_composer_compose(VALUE self, VALUE rInputType, VALUE rOutp
23
24
 
24
25
  if (!rStrPtr) return Qnil;
25
26
 
26
- char *string = (char*)calloc(1, rStrLen);
27
- memcpy(string, rStrPtr,rStrLen);
27
+ char *string = (char*)calloc(1, rStrLen + 1);
28
+ memcpy(string, rStrPtr, rStrLen);
28
29
 
29
- VALUE result = ComposeTLSyllable(inputType, outputType, string);
30
+ VALUE result = ComposeTLSyllable(inputType, outputType, string, forcePOJStyleOutput);
30
31
  free(string);
31
32
 
32
33
  return result;
@@ -16,8 +16,8 @@ module Formosa
16
16
  # * syllable: the query form of the syllable, such as "goa2", "tai5"
17
17
  #
18
18
  # Conversion is done automatically when input and output types are different
19
- def self.compose_syllable(input_type, output_type, syllable)
20
- NativeSyllableComposer.compose(input_type, output_type, syllable)
19
+ def self.compose_syllable(input_type, output_type, syllable, force_poj_style_output = false)
20
+ NativeSyllableComposer.compose(input_type, output_type, syllable, force_poj_style_output ? 1 : 0)
21
21
  end
22
22
 
23
23
  TONE_SAMPLE = ["a", "a", "á", "à", "a", "â", "ǎ", "ā", "a̍", "a̋"]
@@ -1,8 +1,8 @@
1
1
  module Formosa #:nodoc:
2
2
  module VERSION #:nodoc:
3
3
  MAJOR = 0
4
- MINOR = 0
5
- TINY = 1
4
+ MINOR = 2
5
+ TINY = 0
6
6
 
7
7
  STRING = [MAJOR, MINOR, TINY].join('.')
8
8
  end
data/website/index.html CHANGED
@@ -35,12 +35,22 @@
35
35
  <p>Get Version</p>
36
36
  <a href="http://rubyforge.org/projects/formosa" class="numbers">0.0.1</a>
37
37
  </div>
38
- <h1>&#x2192; &#8216;formosa&#8217;</h1>
38
+ <p>&#x2192; &#8216;Taiwanese language processing&#8217;</p>
39
39
 
40
40
 
41
41
  <h2>What</h2>
42
42
 
43
43
 
44
+ <p>Formosa is a Ruby library for processing Taiwanese languages. Major languages
45
+ spoken in Taiwan include Holo, Hakka, Mandarin and those of the indigenous
46
+ people. Formosa is the Ruby branch of the lib-formosa project
47
+ (http://code.google.com/p/lib-formosa/).</p>
48
+
49
+
50
+ <p>Currently, we focus on the processing of the Holo (Southern Min) language,
51
+ with necessary tools such as SyllableComposer available for general use.</p>
52
+
53
+
44
54
  <h2>Installing</h2>
45
55
 
46
56
 
@@ -52,13 +62,18 @@
52
62
  <h2>Demonstration of usage</h2>
53
63
 
54
64
 
55
- <h2>Forum</h2>
65
+ <pre>
66
+ $KCODE="u" # set the Ruby environment to use UTF-8
67
+ require "rubygems"
68
+ require "formosa"
69
+ include Formosa::Holo
70
+ poj = SyllableType::POJ
71
+ tl = SyllableType::TL
72
+ SyllableUtility.compose_syllable(poj, tl, "goa2") # =&gt; guá
73
+ SyllableUtility.compose_syllable(tl, poj, "gua2") # =&gt; goá
74
+ </pre>
56
75
 
57
-
58
- <p><a href="http://groups.google.com/group/formosa">http://groups.google.com/group/formosa</a></p>
59
-
60
-
61
- <p><span class="caps">TODO</span> &#8211; create Google Group &#8211; formosa</p>
76
+ <h2>Forum</h2>
62
77
 
63
78
 
64
79
  <h2>How to submit patches</h2>
@@ -73,15 +88,15 @@
73
88
  <h2>License</h2>
74
89
 
75
90
 
76
- <p>This code is free to use under the terms of the <span class="caps">MIT</span> license.</p>
91
+ <p>This code is free to use under the terms of the New <span class="caps">BSD</span> license.</p>
77
92
 
78
93
 
79
94
  <h2>Contact</h2>
80
95
 
81
96
 
82
- <p>Comments are welcome. Send an email to <a href="mailto:FIXME"><span class="caps">FIXME</span> full name</a> email.</p>
97
+ <p>Comments are welcome. Send an email to lukhnos (at) gmail (dot) com.</p>
83
98
  <p class="coda">
84
- <a href="mailto:drnicwilliams@gmail.com">Dr Nic</a>, 29th July 2007<br>
99
+ <a href="mailto:drnicwilliams@gmail.com">Dr Nic</a>, 30th July 2007<br>
85
100
  Theme extended from <a href="http://rb2js.rubyforge.org/">Paul Battley</a>
86
101
  </p>
87
102
  </div>
data/website/index.txt CHANGED
@@ -1,11 +1,20 @@
1
1
  h1. formosa
2
2
 
3
- h1. &#x2192; 'formosa'
3
+ &#x2192; 'Taiwanese language processing'
4
4
 
5
5
 
6
6
  h2. What
7
7
 
8
8
 
9
+ Formosa is a Ruby library for processing Taiwanese languages. Major languages
10
+ spoken in Taiwan include Holo, Hakka, Mandarin and those of the indigenous
11
+ people. Formosa is the Ruby branch of the lib-formosa project
12
+ (http://code.google.com/p/lib-formosa/).
13
+
14
+ Currently, we focus on the processing of the Holo (Southern Min) language,
15
+ with necessary tools such as SyllableComposer available for general use.
16
+
17
+
9
18
  h2. Installing
10
19
 
11
20
  <pre syntax="ruby">sudo gem install formosa</pre>
@@ -15,14 +24,19 @@ h2. The basics
15
24
 
16
25
  h2. Demonstration of usage
17
26
 
18
-
27
+ <pre>
28
+ $KCODE="u" # set the Ruby environment to use UTF-8
29
+ require "rubygems"
30
+ require "formosa"
31
+ include Formosa::Holo
32
+ poj = SyllableType::POJ
33
+ tl = SyllableType::TL
34
+ SyllableUtility.compose_syllable(poj, tl, "goa2") # => guá
35
+ SyllableUtility.compose_syllable(tl, poj, "gua2") # => goá
36
+ </pre>
19
37
 
20
38
  h2. Forum
21
39
 
22
- "http://groups.google.com/group/formosa":http://groups.google.com/group/formosa
23
-
24
- TODO - create Google Group - formosa
25
-
26
40
  h2. How to submit patches
27
41
 
28
42
  Read the "8 steps for fixing other people's code":http://drnicwilliams.com/2007/06/01/8-steps-for-fixing-other-peoples-code/ and for section "8b: Submit patch to Google Groups":http://drnicwilliams.com/2007/06/01/8-steps-for-fixing-other-peoples-code/#8b-google-groups, use the Google Group above.
@@ -31,8 +45,8 @@ The trunk repository is <code>svn://rubyforge.org/var/svn/formosa/trunk</code> f
31
45
 
32
46
  h2. License
33
47
 
34
- This code is free to use under the terms of the MIT license.
48
+ This code is free to use under the terms of the New BSD license.
35
49
 
36
50
  h2. Contact
37
51
 
38
- Comments are welcome. Send an email to "FIXME full name":mailto:FIXME email.
52
+ Comments are welcome. Send an email to lukhnos (at) gmail (dot) com.
metadata CHANGED
@@ -1,10 +1,10 @@
1
1
  --- !ruby/object:Gem::Specification
2
- rubygems_version: 0.9.2
2
+ rubygems_version: 0.9.4
3
3
  specification_version: 1
4
4
  name: formosa
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.0.1
7
- date: 2007-07-30 00:00:00 +08:00
6
+ version: 0.2.0
7
+ date: 2008-01-20 00:00:00 +08:00
8
8
  summary: A collection of libraries for Taiwanese languages processing
9
9
  require_paths:
10
10
  - lib