formosa 0.0.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -565,12 +565,17 @@ namespace LibHolo
565
565
  if (FLV("a")) SETLOUDEST(p);
566
566
 
567
567
  // the last "ere" override
568
- if (end >= 3) {
569
- if (_symvec[end-1].symbolInLowerCase() == "e" && _symvec[end-2].symbolInLowerCase() == "r" && _symvec[end-3].symbolInLowerCase() == "e")
570
- {
571
- SETLOUDEST(end-1);
572
- }
573
- }
568
+ string lastSymbolStr = _symvec[end-1].symbolInLowerCase();
569
+
570
+ if (end >= 4 && (lastSymbolStr=="t" || lastSymbolStr=="p" || lastSymbolStr=="k" || lastSymbolStr=="h") &&
571
+ _symvec[end-2].symbolInLowerCase() == "e" && _symvec[end-3].symbolInLowerCase() == "r" && _symvec[end-4].symbolInLowerCase() == "e")
572
+ {
573
+ SETLOUDEST(end-2);
574
+ }
575
+ else if (end >= 3 && lastSymbolStr == "e" && _symvec[end-2].symbolInLowerCase() == "r" && _symvec[end-3].symbolInLowerCase() == "e")
576
+ {
577
+ SETLOUDEST(end-1);
578
+ }
574
579
 
575
580
  if (loudestVowel==end) return;
576
581
  // fprintf(stderr, "found loudest vowel=%d (%s), loudest tone=%d\n", loudestVowel, _symvec[loudestVowel].symbol().c_str(), loudestTone);
@@ -580,8 +585,6 @@ namespace LibHolo
580
585
 
581
586
  for (unsigned int i=0; i<end; i++) _symvec[i].setTone(0);
582
587
 
583
- string lastSymbolStr = _symvec[end-1].symbolInLowerCase();
584
-
585
588
  // if the symbol is "i", and there's a next "u", we shift
586
589
  // the vowel to "u"
587
590
 
@@ -646,12 +649,12 @@ namespace LibHolo
646
649
  // detect case
647
650
  if (str1[0] == tolower(str1[0])) {
648
651
  syl.insertCharacterAtCursor('o', sym1.tone());
649
- syl.insertCharacterAtCursor('u');
652
+ syl.insertCharacterAtCursor(str1[1] == tolower(str1[1]) ? 'u' : 'U');
650
653
  }
651
654
  else
652
655
  {
653
656
  syl.insertCharacterAtCursor('O', sym1.tone());
654
- syl.insertCharacterAtCursor('U');
657
+ syl.insertCharacterAtCursor(str1[1] == tolower(str1[1]) ? 'u' : 'U');
655
658
  }
656
659
  continue;
657
660
  }
@@ -667,11 +670,11 @@ namespace LibHolo
667
670
  // detect case
668
671
  if (str1[0] == tolower(str1[0])) {
669
672
  syl.insertCharacterAtCursor('c');
670
- syl.insertCharacterAtCursor('h');
673
+ syl.insertCharacterAtCursor(str2[0] == tolower(str2[0]) ? 'h' : 'H');
671
674
  }
672
675
  else {
673
676
  syl.insertCharacterAtCursor('C');
674
- syl.insertCharacterAtCursor('H');
677
+ syl.insertCharacterAtCursor(str2[0] == tolower(str2[0]) ? 'h' : 'H');
675
678
  }
676
679
 
677
680
  i++;
@@ -683,11 +686,11 @@ namespace LibHolo
683
686
  // detect case
684
687
  if (str1[0] == tolower(str1[0])) {
685
688
  syl.insertCharacterAtCursor('o', sym1.tone());
686
- syl.insertCharacterAtCursor('e', sym2.tone());
689
+ syl.insertCharacterAtCursor(str2[0], sym2.tone());
687
690
  }
688
691
  else {
689
692
  syl.insertCharacterAtCursor('O', sym1.tone());
690
- syl.insertCharacterAtCursor('E', sym2.tone());
693
+ syl.insertCharacterAtCursor(str2[0], sym2.tone());
691
694
  }
692
695
 
693
696
  i++;
@@ -699,11 +702,11 @@ namespace LibHolo
699
702
  // detect case
700
703
  if (str1[0] == tolower(str1[0])) {
701
704
  syl.insertCharacterAtCursor('o', sym1.tone());
702
- syl.insertCharacterAtCursor('a', sym2.tone());
705
+ syl.insertCharacterAtCursor(str2[0], sym2.tone());
703
706
  }
704
707
  else {
705
708
  syl.insertCharacterAtCursor('O', sym1.tone());
706
- syl.insertCharacterAtCursor('A', sym2.tone());
709
+ syl.insertCharacterAtCursor(str2[0], sym2.tone());
707
710
  }
708
711
 
709
712
  i++;
@@ -715,11 +718,11 @@ namespace LibHolo
715
718
  // detect case
716
719
  if (str1[0] == tolower(str1[0])) {
717
720
  syl.insertCharacterAtCursor('e', sym1.tone());
718
- syl.insertCharacterAtCursor('k', sym2.tone());
721
+ syl.insertCharacterAtCursor(str2[0], sym2.tone());
719
722
  }
720
723
  else {
721
724
  syl.insertCharacterAtCursor('E', sym1.tone());
722
- syl.insertCharacterAtCursor('K', sym2.tone());
725
+ syl.insertCharacterAtCursor(str2[0], sym2.tone());
723
726
  }
724
727
 
725
728
  i++;
@@ -736,13 +739,13 @@ namespace LibHolo
736
739
  // detect case
737
740
  if (str1[0] == tolower(str1[0])) {
738
741
  syl.insertCharacterAtCursor('e', sym1.tone());
739
- syl.insertCharacterAtCursor('n', sym2.tone());
740
- syl.insertCharacterAtCursor('g', sym3.tone());
742
+ syl.insertCharacterAtCursor(str2[0], sym2.tone());
743
+ syl.insertCharacterAtCursor(str3[0], sym3.tone());
741
744
  }
742
745
  else {
743
746
  syl.insertCharacterAtCursor('E', sym1.tone());
744
- syl.insertCharacterAtCursor('N', sym2.tone());
745
- syl.insertCharacterAtCursor('G', sym3.tone());
747
+ syl.insertCharacterAtCursor(str2[0], sym2.tone());
748
+ syl.insertCharacterAtCursor(str3[0], sym3.tone());
746
749
  }
747
750
 
748
751
  i+=2;
@@ -753,12 +756,12 @@ namespace LibHolo
753
756
  if (lowstr1=="o" && lowstr2=="u" && lowstr3=="h") {
754
757
  // detect case
755
758
  if (str2[0] == tolower(str2[0])) {
756
- syl.insertCharacterAtCursor('o', sym1.tone());
757
- syl.insertCharacterAtCursor('h', sym2.tone());
759
+ syl.insertCharacterAtCursor(str1[0], sym1.tone());
760
+ syl.insertCharacterAtCursor(str3[0], sym2.tone());
758
761
  }
759
762
  else {
760
- syl.insertCharacterAtCursor('O', sym1.tone());
761
- syl.insertCharacterAtCursor('H', sym2.tone());
763
+ syl.insertCharacterAtCursor(str1[0], sym1.tone());
764
+ syl.insertCharacterAtCursor(str3[0], sym2.tone());
762
765
  }
763
766
 
764
767
  i+=2;
@@ -799,12 +802,12 @@ namespace LibHolo
799
802
  // detect case
800
803
  if (str1[0] == tolower(str1[0])) {
801
804
  syl.insertCharacterAtCursor('o', sym1.tone());
802
- syl.insertCharacterAtCursor('o');
805
+ syl.insertCharacterAtCursor(str1[1] == tolower(str1[1]) ? 'o' : 'O');
803
806
  }
804
807
  else
805
808
  {
806
809
  syl.insertCharacterAtCursor('O', sym1.tone());
807
- syl.insertCharacterAtCursor('O');
810
+ syl.insertCharacterAtCursor(str1[1] == tolower(str1[1]) ? 'o' : 'O');
808
811
  }
809
812
  continue;
810
813
  }
@@ -820,11 +823,11 @@ namespace LibHolo
820
823
  // detect case
821
824
  if (str1[0] == tolower(str1[0])) {
822
825
  syl.insertCharacterAtCursor('t');
823
- syl.insertCharacterAtCursor('s');
826
+ syl.insertCharacterAtCursor(str2[0] == tolower(str2[0]) ? 's' : 'H');
824
827
  }
825
828
  else {
826
829
  syl.insertCharacterAtCursor('T');
827
- syl.insertCharacterAtCursor('S');
830
+ syl.insertCharacterAtCursor(str2[0] == tolower(str2[0]) ? 's' : 'H');
828
831
  }
829
832
 
830
833
  i++;
@@ -836,11 +839,11 @@ namespace LibHolo
836
839
  // detect case
837
840
  if (str1[0] == tolower(str1[0])) {
838
841
  syl.insertCharacterAtCursor('u', sym1.tone());
839
- syl.insertCharacterAtCursor('e', sym2.tone());
842
+ syl.insertCharacterAtCursor(str2[0], sym2.tone());
840
843
  }
841
844
  else {
842
845
  syl.insertCharacterAtCursor('U', sym1.tone());
843
- syl.insertCharacterAtCursor('E', sym2.tone());
846
+ syl.insertCharacterAtCursor(str2[0], sym2.tone());
844
847
  }
845
848
 
846
849
  i++;
@@ -852,11 +855,11 @@ namespace LibHolo
852
855
  // detect case
853
856
  if (str1[0] == tolower(str1[0])) {
854
857
  syl.insertCharacterAtCursor('u', sym1.tone());
855
- syl.insertCharacterAtCursor('a', sym2.tone());
858
+ syl.insertCharacterAtCursor(str2[0], sym2.tone());
856
859
  }
857
860
  else {
858
861
  syl.insertCharacterAtCursor('U', sym1.tone());
859
- syl.insertCharacterAtCursor('A', sym2.tone());
862
+ syl.insertCharacterAtCursor(str2[0], sym2.tone());
860
863
  }
861
864
 
862
865
  i++;
@@ -868,11 +871,11 @@ namespace LibHolo
868
871
  // detect case
869
872
  if (str1[0] == tolower(str1[0])) {
870
873
  syl.insertCharacterAtCursor('i', sym1.tone());
871
- syl.insertCharacterAtCursor('k', sym2.tone());
874
+ syl.insertCharacterAtCursor(str2[0], sym2.tone());
872
875
  }
873
876
  else {
874
877
  syl.insertCharacterAtCursor('I', sym1.tone());
875
- syl.insertCharacterAtCursor('K', sym2.tone());
878
+ syl.insertCharacterAtCursor(str2[0], sym2.tone());
876
879
  }
877
880
 
878
881
  i++;
@@ -889,13 +892,13 @@ namespace LibHolo
889
892
  // detect case
890
893
  if (str1[0] == tolower(str1[0])) {
891
894
  syl.insertCharacterAtCursor('i', sym1.tone());
892
- syl.insertCharacterAtCursor('n', sym2.tone());
893
- syl.insertCharacterAtCursor('g', sym3.tone());
895
+ syl.insertCharacterAtCursor(str2[0], sym2.tone());
896
+ syl.insertCharacterAtCursor(str3[0], sym3.tone());
894
897
  }
895
898
  else {
896
899
  syl.insertCharacterAtCursor('I', sym1.tone());
897
- syl.insertCharacterAtCursor('N', sym2.tone());
898
- syl.insertCharacterAtCursor('G', sym3.tone());
900
+ syl.insertCharacterAtCursor(str2[0], sym2.tone());
901
+ syl.insertCharacterAtCursor(str3[0], sym3.tone());
899
902
  }
900
903
 
901
904
  i+=2;
@@ -8,7 +8,7 @@ using namespace LibHolo;
8
8
 
9
9
  #include "ruby.h"
10
10
 
11
- VALUE ComposeTLSyllable(int inputType, int outputType, const char *syllable)
11
+ VALUE ComposeTLSyllable(int inputType, int outputType, const char *syllable, int forcePOJStyle)
12
12
  {
13
13
  int c;
14
14
  bool composing = false;
@@ -45,8 +45,11 @@ VALUE ComposeTLSyllable(int inputType, int outputType, const char *syllable)
45
45
 
46
46
  if (outputType == POJSyllable)
47
47
  output += syl.convertToPOJSyllable().composedForm();
48
- else
49
- output += syl.convertToTLSyllable().composedForm();
48
+ else {
49
+ HoloSyllable tl = syl.convertToTLSyllable();
50
+ tl.setForcePOJStyle(forcePOJStyle ? true : false);
51
+ output += tl.composedForm();
52
+ }
50
53
 
51
54
  if (emitchar) {
52
55
  output += string(1, emitchar);
@@ -4,7 +4,7 @@
4
4
  extern "C" {
5
5
  #endif
6
6
 
7
- VALUE ComposeTLSyllable(int inputType, int outputType, const char *syllable);
7
+ VALUE ComposeTLSyllable(int inputType, int outputType, const char *syllable, int forcePOJStyle);
8
8
 
9
9
  #ifdef __cplusplus
10
10
  }
@@ -5,16 +5,17 @@
5
5
  VALUE syllable_composer = Qnil;
6
6
 
7
7
  void Init_native_syllable_composer();
8
- VALUE native_syllable_composer_compose(VALUE self, VALUE rInputType, VALUE rOutputType, VALUE rSyllable);
8
+ VALUE native_syllable_composer_compose(VALUE self, VALUE rInputType, VALUE rOutputType, VALUE rSyllable, VALUE rForcePOJStyleOutput);
9
9
 
10
10
  void Init_native_syllable_composer() {
11
11
  syllable_composer = rb_define_module("NativeSyllableComposer");
12
- rb_define_singleton_method(syllable_composer, "compose", native_syllable_composer_compose, 3);
12
+ rb_define_singleton_method(syllable_composer, "compose", native_syllable_composer_compose, 4);
13
13
  }
14
14
 
15
- VALUE native_syllable_composer_compose(VALUE self, VALUE rInputType, VALUE rOutputType, VALUE rSyllable) {
15
+ VALUE native_syllable_composer_compose(VALUE self, VALUE rInputType, VALUE rOutputType, VALUE rSyllable, VALUE rForcePOJStyleOutput) {
16
16
  int inputType = NUM2INT(rInputType);
17
17
  int outputType = NUM2INT(rOutputType);
18
+ int forcePOJStyleOutput = NUM2INT(rForcePOJStyleOutput);
18
19
 
19
20
  VALUE rStr = StringValue(rSyllable);
20
21
 
@@ -23,10 +24,10 @@ VALUE native_syllable_composer_compose(VALUE self, VALUE rInputType, VALUE rOutp
23
24
 
24
25
  if (!rStrPtr) return Qnil;
25
26
 
26
- char *string = (char*)calloc(1, rStrLen);
27
- memcpy(string, rStrPtr,rStrLen);
27
+ char *string = (char*)calloc(1, rStrLen + 1);
28
+ memcpy(string, rStrPtr, rStrLen);
28
29
 
29
- VALUE result = ComposeTLSyllable(inputType, outputType, string);
30
+ VALUE result = ComposeTLSyllable(inputType, outputType, string, forcePOJStyleOutput);
30
31
  free(string);
31
32
 
32
33
  return result;
@@ -16,8 +16,8 @@ module Formosa
16
16
  # * syllable: the query form of the syllable, such as "goa2", "tai5"
17
17
  #
18
18
  # Conversion is done automatically when input and output types are different
19
- def self.compose_syllable(input_type, output_type, syllable)
20
- NativeSyllableComposer.compose(input_type, output_type, syllable)
19
+ def self.compose_syllable(input_type, output_type, syllable, force_poj_style_output = false)
20
+ NativeSyllableComposer.compose(input_type, output_type, syllable, force_poj_style_output ? 1 : 0)
21
21
  end
22
22
 
23
23
  TONE_SAMPLE = ["a", "a", "á", "à", "a", "â", "ǎ", "ā", "a̍", "a̋"]
@@ -1,8 +1,8 @@
1
1
  module Formosa #:nodoc:
2
2
  module VERSION #:nodoc:
3
3
  MAJOR = 0
4
- MINOR = 0
5
- TINY = 1
4
+ MINOR = 2
5
+ TINY = 0
6
6
 
7
7
  STRING = [MAJOR, MINOR, TINY].join('.')
8
8
  end
data/website/index.html CHANGED
@@ -35,12 +35,22 @@
35
35
  <p>Get Version</p>
36
36
  <a href="http://rubyforge.org/projects/formosa" class="numbers">0.0.1</a>
37
37
  </div>
38
- <h1>&#x2192; &#8216;formosa&#8217;</h1>
38
+ <p>&#x2192; &#8216;Taiwanese language processing&#8217;</p>
39
39
 
40
40
 
41
41
  <h2>What</h2>
42
42
 
43
43
 
44
+ <p>Formosa is a Ruby library for processing Taiwanese languages. Major languages
45
+ spoken in Taiwan include Holo, Hakka, Mandarin and those of the indigenous
46
+ people. Formosa is the Ruby branch of the lib-formosa project
47
+ (http://code.google.com/p/lib-formosa/).</p>
48
+
49
+
50
+ <p>Currently, we focus on the processing of the Holo (Southern Min) language,
51
+ with necessary tools such as SyllableComposer available for general use.</p>
52
+
53
+
44
54
  <h2>Installing</h2>
45
55
 
46
56
 
@@ -52,13 +62,18 @@
52
62
  <h2>Demonstration of usage</h2>
53
63
 
54
64
 
55
- <h2>Forum</h2>
65
+ <pre>
66
+ $KCODE="u" # set the Ruby environment to use UTF-8
67
+ require "rubygems"
68
+ require "formosa"
69
+ include Formosa::Holo
70
+ poj = SyllableType::POJ
71
+ tl = SyllableType::TL
72
+ SyllableUtility.compose_syllable(poj, tl, "goa2") # =&gt; guá
73
+ SyllableUtility.compose_syllable(tl, poj, "gua2") # =&gt; goá
74
+ </pre>
56
75
 
57
-
58
- <p><a href="http://groups.google.com/group/formosa">http://groups.google.com/group/formosa</a></p>
59
-
60
-
61
- <p><span class="caps">TODO</span> &#8211; create Google Group &#8211; formosa</p>
76
+ <h2>Forum</h2>
62
77
 
63
78
 
64
79
  <h2>How to submit patches</h2>
@@ -73,15 +88,15 @@
73
88
  <h2>License</h2>
74
89
 
75
90
 
76
- <p>This code is free to use under the terms of the <span class="caps">MIT</span> license.</p>
91
+ <p>This code is free to use under the terms of the New <span class="caps">BSD</span> license.</p>
77
92
 
78
93
 
79
94
  <h2>Contact</h2>
80
95
 
81
96
 
82
- <p>Comments are welcome. Send an email to <a href="mailto:FIXME"><span class="caps">FIXME</span> full name</a> email.</p>
97
+ <p>Comments are welcome. Send an email to lukhnos (at) gmail (dot) com.</p>
83
98
  <p class="coda">
84
- <a href="mailto:drnicwilliams@gmail.com">Dr Nic</a>, 29th July 2007<br>
99
+ <a href="mailto:drnicwilliams@gmail.com">Dr Nic</a>, 30th July 2007<br>
85
100
  Theme extended from <a href="http://rb2js.rubyforge.org/">Paul Battley</a>
86
101
  </p>
87
102
  </div>
data/website/index.txt CHANGED
@@ -1,11 +1,20 @@
1
1
  h1. formosa
2
2
 
3
- h1. &#x2192; 'formosa'
3
+ &#x2192; 'Taiwanese language processing'
4
4
 
5
5
 
6
6
  h2. What
7
7
 
8
8
 
9
+ Formosa is a Ruby library for processing Taiwanese languages. Major languages
10
+ spoken in Taiwan include Holo, Hakka, Mandarin and those of the indigenous
11
+ people. Formosa is the Ruby branch of the lib-formosa project
12
+ (http://code.google.com/p/lib-formosa/).
13
+
14
+ Currently, we focus on the processing of the Holo (Southern Min) language,
15
+ with necessary tools such as SyllableComposer available for general use.
16
+
17
+
9
18
  h2. Installing
10
19
 
11
20
  <pre syntax="ruby">sudo gem install formosa</pre>
@@ -15,14 +24,19 @@ h2. The basics
15
24
 
16
25
  h2. Demonstration of usage
17
26
 
18
-
27
+ <pre>
28
+ $KCODE="u" # set the Ruby environment to use UTF-8
29
+ require "rubygems"
30
+ require "formosa"
31
+ include Formosa::Holo
32
+ poj = SyllableType::POJ
33
+ tl = SyllableType::TL
34
+ SyllableUtility.compose_syllable(poj, tl, "goa2") # => guá
35
+ SyllableUtility.compose_syllable(tl, poj, "gua2") # => goá
36
+ </pre>
19
37
 
20
38
  h2. Forum
21
39
 
22
- "http://groups.google.com/group/formosa":http://groups.google.com/group/formosa
23
-
24
- TODO - create Google Group - formosa
25
-
26
40
  h2. How to submit patches
27
41
 
28
42
  Read the "8 steps for fixing other people's code":http://drnicwilliams.com/2007/06/01/8-steps-for-fixing-other-peoples-code/ and for section "8b: Submit patch to Google Groups":http://drnicwilliams.com/2007/06/01/8-steps-for-fixing-other-peoples-code/#8b-google-groups, use the Google Group above.
@@ -31,8 +45,8 @@ The trunk repository is <code>svn://rubyforge.org/var/svn/formosa/trunk</code> f
31
45
 
32
46
  h2. License
33
47
 
34
- This code is free to use under the terms of the MIT license.
48
+ This code is free to use under the terms of the New BSD license.
35
49
 
36
50
  h2. Contact
37
51
 
38
- Comments are welcome. Send an email to "FIXME full name":mailto:FIXME email.
52
+ Comments are welcome. Send an email to lukhnos (at) gmail (dot) com.
metadata CHANGED
@@ -1,10 +1,10 @@
1
1
  --- !ruby/object:Gem::Specification
2
- rubygems_version: 0.9.2
2
+ rubygems_version: 0.9.4
3
3
  specification_version: 1
4
4
  name: formosa
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.0.1
7
- date: 2007-07-30 00:00:00 +08:00
6
+ version: 0.2.0
7
+ date: 2008-01-20 00:00:00 +08:00
8
8
  summary: A collection of libraries for Taiwanese languages processing
9
9
  require_paths:
10
10
  - lib