formosa 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +4 -0
- data/License.txt +20 -0
- data/Manifest.txt +23 -0
- data/README.txt +18 -0
- data/Rakefile +127 -0
- data/lib/ext/native_syllable_composer/HoloVowels.h +242 -0
- data/lib/ext/native_syllable_composer/LibHolo.h +1223 -0
- data/lib/ext/native_syllable_composer/compose.cpp +67 -0
- data/lib/ext/native_syllable_composer/compose.h +11 -0
- data/lib/ext/native_syllable_composer/extconf.rb +7 -0
- data/lib/ext/native_syllable_composer/native_syllable_composer.c +34 -0
- data/lib/formosa/syllable_utility.rb +86 -0
- data/lib/formosa/version.rb +9 -0
- data/lib/formosa.rb +31 -0
- data/scripts/txt2html +67 -0
- data/setup.rb +1585 -0
- data/test/test_formosa.rb +11 -0
- data/test/test_helper.rb +2 -0
- data/website/index.html +92 -0
- data/website/index.txt +38 -0
- data/website/javascripts/rounded_corners_lite.inc.js +285 -0
- data/website/stylesheets/screen.css +138 -0
- data/website/template.rhtml +48 -0
- metadata +74 -0
@@ -0,0 +1,67 @@
|
|
1
|
+
#include "compose.h"
|
2
|
+
#include <string>
|
3
|
+
#include <stdio.h>
|
4
|
+
#include "LibHolo.h"
|
5
|
+
|
6
|
+
using namespace std;
|
7
|
+
using namespace LibHolo;
|
8
|
+
|
9
|
+
#include "ruby.h"
|
10
|
+
|
11
|
+
VALUE ComposeTLSyllable(int inputType, int outputType, const char *syllable)
|
12
|
+
{
|
13
|
+
int c;
|
14
|
+
bool composing = false;
|
15
|
+
HoloSyllable syl;
|
16
|
+
string output;
|
17
|
+
|
18
|
+
if (inputType != 0 && inputType != 1)
|
19
|
+
syl.setInputType(TLSyllable);
|
20
|
+
else
|
21
|
+
syl.setInputType((SyllableType)inputType);
|
22
|
+
|
23
|
+
while (1) {
|
24
|
+
c = *syllable++;
|
25
|
+
|
26
|
+
if (isalpha(c)) {
|
27
|
+
if (!composing) composing = true;
|
28
|
+
syl.insertCharacterAtCursor(c);
|
29
|
+
}
|
30
|
+
else {
|
31
|
+
if (composing) {
|
32
|
+
int finaltone = 0;
|
33
|
+
int emitchar = 0;
|
34
|
+
|
35
|
+
// some UNIX doesn't have isnumber(c)
|
36
|
+
if (c >= '0' && c <= '9') {
|
37
|
+
finaltone = c - '0';
|
38
|
+
}
|
39
|
+
else {
|
40
|
+
// since it's non-numeric char, we'll emit it after the composed syllable
|
41
|
+
emitchar = c;
|
42
|
+
}
|
43
|
+
|
44
|
+
syl.normalize(finaltone);
|
45
|
+
|
46
|
+
if (outputType == POJSyllable)
|
47
|
+
output += syl.convertToPOJSyllable().composedForm();
|
48
|
+
else
|
49
|
+
output += syl.convertToTLSyllable().composedForm();
|
50
|
+
|
51
|
+
if (emitchar) {
|
52
|
+
output += string(1, emitchar);
|
53
|
+
}
|
54
|
+
|
55
|
+
composing = false;
|
56
|
+
syl.clear();
|
57
|
+
}
|
58
|
+
else {
|
59
|
+
if (c) output += string(1, c);
|
60
|
+
}
|
61
|
+
}
|
62
|
+
|
63
|
+
if (!c) break;
|
64
|
+
}
|
65
|
+
|
66
|
+
return rb_str_new2(output.c_str());
|
67
|
+
}
|
@@ -0,0 +1,34 @@
|
|
1
|
+
// cf. http://www.rubyinside.com/how-to-create-a-ruby-extension-in-c-in-under-5-minutes-100.html
|
2
|
+
// cf. http://cesare.seesaa.net/article/32850625.html
|
3
|
+
#include "ruby.h"
|
4
|
+
#include "compose.h"
|
5
|
+
VALUE syllable_composer = Qnil;
|
6
|
+
|
7
|
+
void Init_native_syllable_composer();
|
8
|
+
VALUE native_syllable_composer_compose(VALUE self, VALUE rInputType, VALUE rOutputType, VALUE rSyllable);
|
9
|
+
|
10
|
+
void Init_native_syllable_composer() {
|
11
|
+
syllable_composer = rb_define_module("NativeSyllableComposer");
|
12
|
+
rb_define_singleton_method(syllable_composer, "compose", native_syllable_composer_compose, 3);
|
13
|
+
}
|
14
|
+
|
15
|
+
VALUE native_syllable_composer_compose(VALUE self, VALUE rInputType, VALUE rOutputType, VALUE rSyllable) {
|
16
|
+
int inputType = NUM2INT(rInputType);
|
17
|
+
int outputType = NUM2INT(rOutputType);
|
18
|
+
|
19
|
+
VALUE rStr = StringValue(rSyllable);
|
20
|
+
|
21
|
+
size_t rStrLen = RSTRING(rStr)->len;
|
22
|
+
char *rStrPtr = RSTRING(rStr)->ptr;
|
23
|
+
|
24
|
+
if (!rStrPtr) return Qnil;
|
25
|
+
|
26
|
+
char *string = (char*)calloc(1, rStrLen);
|
27
|
+
memcpy(string, rStrPtr,rStrLen);
|
28
|
+
|
29
|
+
VALUE result = ComposeTLSyllable(inputType, outputType, string);
|
30
|
+
free(string);
|
31
|
+
|
32
|
+
return result;
|
33
|
+
}
|
34
|
+
|
@@ -0,0 +1,86 @@
|
|
1
|
+
module Formosa
|
2
|
+
# Holo language (Southern Min) module
|
3
|
+
module Holo
|
4
|
+
# Syllable type supported by Formosa
|
5
|
+
module SyllableType
|
6
|
+
POJ = 0
|
7
|
+
TL = 1
|
8
|
+
end
|
9
|
+
|
10
|
+
# Holo syllable utiliy
|
11
|
+
module SyllableUtility
|
12
|
+
# Compose Holo syllable from the query form ("ASCII form")
|
13
|
+
#
|
14
|
+
# * input_type: can be either SyllableType::POJ or Syllable::TL
|
15
|
+
# * output_type: same as above
|
16
|
+
# * syllable: the query form of the syllable, such as "goa2", "tai5"
|
17
|
+
#
|
18
|
+
# Conversion is done automatically when input and output types are different
|
19
|
+
def self.compose_syllable(input_type, output_type, syllable)
|
20
|
+
NativeSyllableComposer.compose(input_type, output_type, syllable)
|
21
|
+
end
|
22
|
+
|
23
|
+
TONE_SAMPLE = ["a", "a", "á", "à", "a", "â", "ǎ", "ā", "a̍", "a̋"]
|
24
|
+
LOOKUP_TABLE = {}
|
25
|
+
|
26
|
+
9.times do |index|
|
27
|
+
t = TONE_SAMPLE[index]
|
28
|
+
c = t.chars.decompose
|
29
|
+
|
30
|
+
if c.size > 1
|
31
|
+
tone_char = [c[1]].pack("U")
|
32
|
+
LOOKUP_TABLE[tone_char] = index
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
# Converts a long text of composed syllables into the query form
|
37
|
+
def self.convert_text_into_query_form(input_type, input_string)
|
38
|
+
output_string = ""
|
39
|
+
|
40
|
+
input_string.split(/([^A-Za-z\x7f-\xff])/).each do |str|
|
41
|
+
qform = self.convert_syllable_into_query_form(input_type, str)
|
42
|
+
output_string += qform
|
43
|
+
end
|
44
|
+
|
45
|
+
output_string
|
46
|
+
end
|
47
|
+
|
48
|
+
# Converts a composed syllable (the "composed form") into the query form.
|
49
|
+
# For example, "guá" is converted to "gua2"
|
50
|
+
def self.convert_syllable_into_query_form(input_type, string)
|
51
|
+
c = string.chars
|
52
|
+
|
53
|
+
# decomposed code points
|
54
|
+
dcps = c.decompose
|
55
|
+
|
56
|
+
loudest_tone = 0
|
57
|
+
|
58
|
+
composed = []
|
59
|
+
|
60
|
+
dcps.size.times do |index|
|
61
|
+
chr = [dcps[index]].pack("U")
|
62
|
+
|
63
|
+
if t = LOOKUP_TABLE[chr]
|
64
|
+
loudest_tone = t
|
65
|
+
else
|
66
|
+
composed << dcps[index]
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
result = composed.pack("U*") + (loudest_tone > 0 ? loudest_tone.to_s : "")
|
71
|
+
|
72
|
+
if input_type == SyllableType::TL
|
73
|
+
result.gsub!(/O\315\230/, "OO")
|
74
|
+
result.gsub!(/o\315\230/, "oo")
|
75
|
+
else
|
76
|
+
result.gsub!(/O\315\230/, "OU")
|
77
|
+
result.gsub!(/o\315\230/, "ou")
|
78
|
+
end
|
79
|
+
|
80
|
+
result.gsub!(/ⁿ/, "nn")
|
81
|
+
result
|
82
|
+
end
|
83
|
+
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
data/lib/formosa.rb
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
# Authors:: Lukhnos D. Liu (mailto:lukhnos@handlino.com)
|
2
|
+
# Copyright:: Copyright (c) 2007 Lukhnos D. Liu
|
3
|
+
# License:: Distributed under the New BSD License
|
4
|
+
#
|
5
|
+
# Formosa is a Ruby library for processing Taiwanese languages. Major languages
|
6
|
+
# spoken in Taiwan include Holo, Hakka, Mandarin and those of the indigenous
|
7
|
+
# people. Formosa is the Ruby branch of the lib-formosa project
|
8
|
+
# (http://code.google.com/p/lib-formosa/).
|
9
|
+
#
|
10
|
+
# Currently, we focus on the processing of the Holo (Southern Min) language,
|
11
|
+
# with necessary tools such as SyllableComposer available for general use.
|
12
|
+
#
|
13
|
+
# The following example shows how to use Formosa:
|
14
|
+
#
|
15
|
+
# $KCODE="u" # set the Ruby environment to use UTF-8
|
16
|
+
# require "rubygems"
|
17
|
+
# require "formosa"
|
18
|
+
# include Formosa::Holo
|
19
|
+
# poj = SyllableType::POJ
|
20
|
+
# tl = SyllableType::TL
|
21
|
+
# SyllableUtility.compose_syllable(poj, tl, "goa2") # => guá
|
22
|
+
# SyllableUtility.compose_syllable(tl, poj, "gua2") # => goá
|
23
|
+
|
24
|
+
require 'active_support'
|
25
|
+
require 'formosa/version'
|
26
|
+
require "native_syllable_composer/native_syllable_composer"
|
27
|
+
require 'formosa/syllable_utility'
|
28
|
+
|
29
|
+
module Formosa
|
30
|
+
end
|
31
|
+
|
data/scripts/txt2html
ADDED
@@ -0,0 +1,67 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'redcloth'
|
5
|
+
require 'syntax/convertors/html'
|
6
|
+
require 'erb'
|
7
|
+
require File.dirname(__FILE__) + '/../lib/formosa/version.rb'
|
8
|
+
|
9
|
+
version = Formosa::VERSION::STRING
|
10
|
+
download = 'http://rubyforge.org/projects/formosa'
|
11
|
+
|
12
|
+
class Fixnum
|
13
|
+
def ordinal
|
14
|
+
# teens
|
15
|
+
return 'th' if (10..19).include?(self % 100)
|
16
|
+
# others
|
17
|
+
case self % 10
|
18
|
+
when 1: return 'st'
|
19
|
+
when 2: return 'nd'
|
20
|
+
when 3: return 'rd'
|
21
|
+
else return 'th'
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
class Time
|
27
|
+
def pretty
|
28
|
+
return "#{mday}#{mday.ordinal} #{strftime('%B')} #{year}"
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def convert_syntax(syntax, source)
|
33
|
+
return Syntax::Convertors::HTML.for_syntax(syntax).convert(source).gsub(%r!^<pre>|</pre>$!,'')
|
34
|
+
end
|
35
|
+
|
36
|
+
if ARGV.length >= 1
|
37
|
+
src, template = ARGV
|
38
|
+
template ||= File.dirname(__FILE__) + '/../website/template.rhtml'
|
39
|
+
|
40
|
+
else
|
41
|
+
puts("Usage: #{File.split($0).last} source.txt [template.rhtml] > output.html")
|
42
|
+
exit!
|
43
|
+
end
|
44
|
+
|
45
|
+
template = ERB.new(File.open(template).read)
|
46
|
+
|
47
|
+
title = nil
|
48
|
+
body = nil
|
49
|
+
File.open(src) do |fsrc|
|
50
|
+
title_text = fsrc.readline
|
51
|
+
body_text = fsrc.read
|
52
|
+
syntax_items = []
|
53
|
+
body_text.gsub!(%r!<(pre|code)[^>]*?syntax=['"]([^'"]+)[^>]*>(.*?)</>!m){
|
54
|
+
ident = syntax_items.length
|
55
|
+
element, syntax, source = $1, $2, $3
|
56
|
+
syntax_items << "<#{element} class='syntax'>#{convert_syntax(syntax, source)}</#{element}>"
|
57
|
+
"syntax-temp-#{ident}"
|
58
|
+
}
|
59
|
+
title = RedCloth.new(title_text).to_html.gsub(%r!<.*?>!,'').strip
|
60
|
+
body = RedCloth.new(body_text).to_html
|
61
|
+
body.gsub!(%r!(?:<pre><code>)?syntax-temp-(d+)(?:</code></pre>)?!){ syntax_items[$1.to_i] }
|
62
|
+
end
|
63
|
+
stat = File.stat(src)
|
64
|
+
created = stat.ctime
|
65
|
+
modified = stat.mtime
|
66
|
+
|
67
|
+
$stdout << template.result(binding)
|