formosa 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,67 @@
1
+ #include "compose.h"
2
+ #include <string>
3
+ #include <stdio.h>
4
+ #include "LibHolo.h"
5
+
6
+ using namespace std;
7
+ using namespace LibHolo;
8
+
9
+ #include "ruby.h"
10
+
11
+ VALUE ComposeTLSyllable(int inputType, int outputType, const char *syllable)
12
+ {
13
+ int c;
14
+ bool composing = false;
15
+ HoloSyllable syl;
16
+ string output;
17
+
18
+ if (inputType != 0 && inputType != 1)
19
+ syl.setInputType(TLSyllable);
20
+ else
21
+ syl.setInputType((SyllableType)inputType);
22
+
23
+ while (1) {
24
+ c = *syllable++;
25
+
26
+ if (isalpha(c)) {
27
+ if (!composing) composing = true;
28
+ syl.insertCharacterAtCursor(c);
29
+ }
30
+ else {
31
+ if (composing) {
32
+ int finaltone = 0;
33
+ int emitchar = 0;
34
+
35
+ // some UNIX doesn't have isnumber(c)
36
+ if (c >= '0' && c <= '9') {
37
+ finaltone = c - '0';
38
+ }
39
+ else {
40
+ // since it's non-numeric char, we'll emit it after the composed syllable
41
+ emitchar = c;
42
+ }
43
+
44
+ syl.normalize(finaltone);
45
+
46
+ if (outputType == POJSyllable)
47
+ output += syl.convertToPOJSyllable().composedForm();
48
+ else
49
+ output += syl.convertToTLSyllable().composedForm();
50
+
51
+ if (emitchar) {
52
+ output += string(1, emitchar);
53
+ }
54
+
55
+ composing = false;
56
+ syl.clear();
57
+ }
58
+ else {
59
+ if (c) output += string(1, c);
60
+ }
61
+ }
62
+
63
+ if (!c) break;
64
+ }
65
+
66
+ return rb_str_new2(output.c_str());
67
+ }
@@ -0,0 +1,11 @@
1
+ #include "ruby.h"
2
+
3
+ #ifdef __cplusplus
4
+ extern "C" {
5
+ #endif
6
+
7
+ VALUE ComposeTLSyllable(int inputType, int outputType, const char *syllable);
8
+
9
+ #ifdef __cplusplus
10
+ }
11
+ #endif
@@ -0,0 +1,7 @@
1
+ require 'mkmf'
2
+
3
+ # replace TRY_LINK's CC into CXX because we want to use g++ as linker
4
+ CONFIG['LDSHARED'].gsub!(/cc/, "g++")
5
+ CONFIG['LDSHARED'].gsub!(/\(CC\)/, "(CXX)")
6
+
7
+ create_makefile "native_syllable_composer/native_syllable_composer"
@@ -0,0 +1,34 @@
1
+ // cf. http://www.rubyinside.com/how-to-create-a-ruby-extension-in-c-in-under-5-minutes-100.html
2
+ // cf. http://cesare.seesaa.net/article/32850625.html
3
+ #include "ruby.h"
4
+ #include "compose.h"
5
+ VALUE syllable_composer = Qnil;
6
+
7
+ void Init_native_syllable_composer();
8
+ VALUE native_syllable_composer_compose(VALUE self, VALUE rInputType, VALUE rOutputType, VALUE rSyllable);
9
+
10
+ void Init_native_syllable_composer() {
11
+ syllable_composer = rb_define_module("NativeSyllableComposer");
12
+ rb_define_singleton_method(syllable_composer, "compose", native_syllable_composer_compose, 3);
13
+ }
14
+
15
+ VALUE native_syllable_composer_compose(VALUE self, VALUE rInputType, VALUE rOutputType, VALUE rSyllable) {
16
+ int inputType = NUM2INT(rInputType);
17
+ int outputType = NUM2INT(rOutputType);
18
+
19
+ VALUE rStr = StringValue(rSyllable);
20
+
21
+ size_t rStrLen = RSTRING(rStr)->len;
22
+ char *rStrPtr = RSTRING(rStr)->ptr;
23
+
24
+ if (!rStrPtr) return Qnil;
25
+
26
+ char *string = (char*)calloc(1, rStrLen);
27
+ memcpy(string, rStrPtr,rStrLen);
28
+
29
+ VALUE result = ComposeTLSyllable(inputType, outputType, string);
30
+ free(string);
31
+
32
+ return result;
33
+ }
34
+
@@ -0,0 +1,86 @@
1
+ module Formosa
2
+ # Holo language (Southern Min) module
3
+ module Holo
4
+ # Syllable type supported by Formosa
5
+ module SyllableType
6
+ POJ = 0
7
+ TL = 1
8
+ end
9
+
10
+ # Holo syllable utiliy
11
+ module SyllableUtility
12
+ # Compose Holo syllable from the query form ("ASCII form")
13
+ #
14
+ # * input_type: can be either SyllableType::POJ or Syllable::TL
15
+ # * output_type: same as above
16
+ # * syllable: the query form of the syllable, such as "goa2", "tai5"
17
+ #
18
+ # Conversion is done automatically when input and output types are different
19
+ def self.compose_syllable(input_type, output_type, syllable)
20
+ NativeSyllableComposer.compose(input_type, output_type, syllable)
21
+ end
22
+
23
+ TONE_SAMPLE = ["a", "a", "á", "à", "a", "â", "ǎ", "ā", "a̍", "a̋"]
24
+ LOOKUP_TABLE = {}
25
+
26
+ 9.times do |index|
27
+ t = TONE_SAMPLE[index]
28
+ c = t.chars.decompose
29
+
30
+ if c.size > 1
31
+ tone_char = [c[1]].pack("U")
32
+ LOOKUP_TABLE[tone_char] = index
33
+ end
34
+ end
35
+
36
+ # Converts a long text of composed syllables into the query form
37
+ def self.convert_text_into_query_form(input_type, input_string)
38
+ output_string = ""
39
+
40
+ input_string.split(/([^A-Za-z\x7f-\xff])/).each do |str|
41
+ qform = self.convert_syllable_into_query_form(input_type, str)
42
+ output_string += qform
43
+ end
44
+
45
+ output_string
46
+ end
47
+
48
+ # Converts a composed syllable (the "composed form") into the query form.
49
+ # For example, "guá" is converted to "gua2"
50
+ def self.convert_syllable_into_query_form(input_type, string)
51
+ c = string.chars
52
+
53
+ # decomposed code points
54
+ dcps = c.decompose
55
+
56
+ loudest_tone = 0
57
+
58
+ composed = []
59
+
60
+ dcps.size.times do |index|
61
+ chr = [dcps[index]].pack("U")
62
+
63
+ if t = LOOKUP_TABLE[chr]
64
+ loudest_tone = t
65
+ else
66
+ composed << dcps[index]
67
+ end
68
+ end
69
+
70
+ result = composed.pack("U*") + (loudest_tone > 0 ? loudest_tone.to_s : "")
71
+
72
+ if input_type == SyllableType::TL
73
+ result.gsub!(/O\315\230/, "OO")
74
+ result.gsub!(/o\315\230/, "oo")
75
+ else
76
+ result.gsub!(/O\315\230/, "OU")
77
+ result.gsub!(/o\315\230/, "ou")
78
+ end
79
+
80
+ result.gsub!(/ⁿ/, "nn")
81
+ result
82
+ end
83
+
84
+ end
85
+ end
86
+ end
@@ -0,0 +1,9 @@
1
+ module Formosa #:nodoc:
2
+ module VERSION #:nodoc:
3
+ MAJOR = 0
4
+ MINOR = 0
5
+ TINY = 1
6
+
7
+ STRING = [MAJOR, MINOR, TINY].join('.')
8
+ end
9
+ end
data/lib/formosa.rb ADDED
@@ -0,0 +1,31 @@
1
+ # Authors:: Lukhnos D. Liu (mailto:lukhnos@handlino.com)
2
+ # Copyright:: Copyright (c) 2007 Lukhnos D. Liu
3
+ # License:: Distributed under the New BSD License
4
+ #
5
+ # Formosa is a Ruby library for processing Taiwanese languages. Major languages
6
+ # spoken in Taiwan include Holo, Hakka, Mandarin and those of the indigenous
7
+ # people. Formosa is the Ruby branch of the lib-formosa project
8
+ # (http://code.google.com/p/lib-formosa/).
9
+ #
10
+ # Currently, we focus on the processing of the Holo (Southern Min) language,
11
+ # with necessary tools such as SyllableComposer available for general use.
12
+ #
13
+ # The following example shows how to use Formosa:
14
+ #
15
+ # $KCODE="u" # set the Ruby environment to use UTF-8
16
+ # require "rubygems"
17
+ # require "formosa"
18
+ # include Formosa::Holo
19
+ # poj = SyllableType::POJ
20
+ # tl = SyllableType::TL
21
+ # SyllableUtility.compose_syllable(poj, tl, "goa2") # => guá
22
+ # SyllableUtility.compose_syllable(tl, poj, "gua2") # => goá
23
+
24
+ require 'active_support'
25
+ require 'formosa/version'
26
+ require "native_syllable_composer/native_syllable_composer"
27
+ require 'formosa/syllable_utility'
28
+
29
+ module Formosa
30
+ end
31
+
data/scripts/txt2html ADDED
@@ -0,0 +1,67 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rubygems'
4
+ require 'redcloth'
5
+ require 'syntax/convertors/html'
6
+ require 'erb'
7
+ require File.dirname(__FILE__) + '/../lib/formosa/version.rb'
8
+
9
+ version = Formosa::VERSION::STRING
10
+ download = 'http://rubyforge.org/projects/formosa'
11
+
12
+ class Fixnum
13
+ def ordinal
14
+ # teens
15
+ return 'th' if (10..19).include?(self % 100)
16
+ # others
17
+ case self % 10
18
+ when 1: return 'st'
19
+ when 2: return 'nd'
20
+ when 3: return 'rd'
21
+ else return 'th'
22
+ end
23
+ end
24
+ end
25
+
26
+ class Time
27
+ def pretty
28
+ return "#{mday}#{mday.ordinal} #{strftime('%B')} #{year}"
29
+ end
30
+ end
31
+
32
+ def convert_syntax(syntax, source)
33
+ return Syntax::Convertors::HTML.for_syntax(syntax).convert(source).gsub(%r!^<pre>|</pre>$!,'')
34
+ end
35
+
36
+ if ARGV.length >= 1
37
+ src, template = ARGV
38
+ template ||= File.dirname(__FILE__) + '/../website/template.rhtml'
39
+
40
+ else
41
+ puts("Usage: #{File.split($0).last} source.txt [template.rhtml] > output.html")
42
+ exit!
43
+ end
44
+
45
+ template = ERB.new(File.open(template).read)
46
+
47
+ title = nil
48
+ body = nil
49
+ File.open(src) do |fsrc|
50
+ title_text = fsrc.readline
51
+ body_text = fsrc.read
52
+ syntax_items = []
53
+ body_text.gsub!(%r!<(pre|code)[^>]*?syntax=['"]([^'"]+)[^>]*>(.*?)</>!m){
54
+ ident = syntax_items.length
55
+ element, syntax, source = $1, $2, $3
56
+ syntax_items << "<#{element} class='syntax'>#{convert_syntax(syntax, source)}</#{element}>"
57
+ "syntax-temp-#{ident}"
58
+ }
59
+ title = RedCloth.new(title_text).to_html.gsub(%r!<.*?>!,'').strip
60
+ body = RedCloth.new(body_text).to_html
61
+ body.gsub!(%r!(?:<pre><code>)?syntax-temp-(d+)(?:</code></pre>)?!){ syntax_items[$1.to_i] }
62
+ end
63
+ stat = File.stat(src)
64
+ created = stat.ctime
65
+ modified = stat.mtime
66
+
67
+ $stdout << template.result(binding)