formosa 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +4 -0
- data/License.txt +20 -0
- data/Manifest.txt +23 -0
- data/README.txt +18 -0
- data/Rakefile +127 -0
- data/lib/ext/native_syllable_composer/HoloVowels.h +242 -0
- data/lib/ext/native_syllable_composer/LibHolo.h +1223 -0
- data/lib/ext/native_syllable_composer/compose.cpp +67 -0
- data/lib/ext/native_syllable_composer/compose.h +11 -0
- data/lib/ext/native_syllable_composer/extconf.rb +7 -0
- data/lib/ext/native_syllable_composer/native_syllable_composer.c +34 -0
- data/lib/formosa/syllable_utility.rb +86 -0
- data/lib/formosa/version.rb +9 -0
- data/lib/formosa.rb +31 -0
- data/scripts/txt2html +67 -0
- data/setup.rb +1585 -0
- data/test/test_formosa.rb +11 -0
- data/test/test_helper.rb +2 -0
- data/website/index.html +92 -0
- data/website/index.txt +38 -0
- data/website/javascripts/rounded_corners_lite.inc.js +285 -0
- data/website/stylesheets/screen.css +138 -0
- data/website/template.rhtml +48 -0
- metadata +74 -0
@@ -0,0 +1,67 @@
|
|
1
|
+
#include "compose.h"
|
2
|
+
#include <string>
|
3
|
+
#include <stdio.h>
|
4
|
+
#include "LibHolo.h"
|
5
|
+
|
6
|
+
using namespace std;
|
7
|
+
using namespace LibHolo;
|
8
|
+
|
9
|
+
#include "ruby.h"
|
10
|
+
|
11
|
+
VALUE ComposeTLSyllable(int inputType, int outputType, const char *syllable)
|
12
|
+
{
|
13
|
+
int c;
|
14
|
+
bool composing = false;
|
15
|
+
HoloSyllable syl;
|
16
|
+
string output;
|
17
|
+
|
18
|
+
if (inputType != 0 && inputType != 1)
|
19
|
+
syl.setInputType(TLSyllable);
|
20
|
+
else
|
21
|
+
syl.setInputType((SyllableType)inputType);
|
22
|
+
|
23
|
+
while (1) {
|
24
|
+
c = *syllable++;
|
25
|
+
|
26
|
+
if (isalpha(c)) {
|
27
|
+
if (!composing) composing = true;
|
28
|
+
syl.insertCharacterAtCursor(c);
|
29
|
+
}
|
30
|
+
else {
|
31
|
+
if (composing) {
|
32
|
+
int finaltone = 0;
|
33
|
+
int emitchar = 0;
|
34
|
+
|
35
|
+
// some UNIX doesn't have isnumber(c)
|
36
|
+
if (c >= '0' && c <= '9') {
|
37
|
+
finaltone = c - '0';
|
38
|
+
}
|
39
|
+
else {
|
40
|
+
// since it's non-numeric char, we'll emit it after the composed syllable
|
41
|
+
emitchar = c;
|
42
|
+
}
|
43
|
+
|
44
|
+
syl.normalize(finaltone);
|
45
|
+
|
46
|
+
if (outputType == POJSyllable)
|
47
|
+
output += syl.convertToPOJSyllable().composedForm();
|
48
|
+
else
|
49
|
+
output += syl.convertToTLSyllable().composedForm();
|
50
|
+
|
51
|
+
if (emitchar) {
|
52
|
+
output += string(1, emitchar);
|
53
|
+
}
|
54
|
+
|
55
|
+
composing = false;
|
56
|
+
syl.clear();
|
57
|
+
}
|
58
|
+
else {
|
59
|
+
if (c) output += string(1, c);
|
60
|
+
}
|
61
|
+
}
|
62
|
+
|
63
|
+
if (!c) break;
|
64
|
+
}
|
65
|
+
|
66
|
+
return rb_str_new2(output.c_str());
|
67
|
+
}
|
@@ -0,0 +1,34 @@
|
|
1
|
+
// cf. http://www.rubyinside.com/how-to-create-a-ruby-extension-in-c-in-under-5-minutes-100.html
|
2
|
+
// cf. http://cesare.seesaa.net/article/32850625.html
|
3
|
+
#include "ruby.h"
|
4
|
+
#include "compose.h"
|
5
|
+
VALUE syllable_composer = Qnil;
|
6
|
+
|
7
|
+
void Init_native_syllable_composer();
|
8
|
+
VALUE native_syllable_composer_compose(VALUE self, VALUE rInputType, VALUE rOutputType, VALUE rSyllable);
|
9
|
+
|
10
|
+
void Init_native_syllable_composer() {
|
11
|
+
syllable_composer = rb_define_module("NativeSyllableComposer");
|
12
|
+
rb_define_singleton_method(syllable_composer, "compose", native_syllable_composer_compose, 3);
|
13
|
+
}
|
14
|
+
|
15
|
+
VALUE native_syllable_composer_compose(VALUE self, VALUE rInputType, VALUE rOutputType, VALUE rSyllable) {
|
16
|
+
int inputType = NUM2INT(rInputType);
|
17
|
+
int outputType = NUM2INT(rOutputType);
|
18
|
+
|
19
|
+
VALUE rStr = StringValue(rSyllable);
|
20
|
+
|
21
|
+
size_t rStrLen = RSTRING(rStr)->len;
|
22
|
+
char *rStrPtr = RSTRING(rStr)->ptr;
|
23
|
+
|
24
|
+
if (!rStrPtr) return Qnil;
|
25
|
+
|
26
|
+
char *string = (char*)calloc(1, rStrLen);
|
27
|
+
memcpy(string, rStrPtr,rStrLen);
|
28
|
+
|
29
|
+
VALUE result = ComposeTLSyllable(inputType, outputType, string);
|
30
|
+
free(string);
|
31
|
+
|
32
|
+
return result;
|
33
|
+
}
|
34
|
+
|
@@ -0,0 +1,86 @@
|
|
1
|
+
module Formosa
|
2
|
+
# Holo language (Southern Min) module
|
3
|
+
module Holo
|
4
|
+
# Syllable type supported by Formosa
|
5
|
+
module SyllableType
|
6
|
+
POJ = 0
|
7
|
+
TL = 1
|
8
|
+
end
|
9
|
+
|
10
|
+
# Holo syllable utiliy
|
11
|
+
module SyllableUtility
|
12
|
+
# Compose Holo syllable from the query form ("ASCII form")
|
13
|
+
#
|
14
|
+
# * input_type: can be either SyllableType::POJ or Syllable::TL
|
15
|
+
# * output_type: same as above
|
16
|
+
# * syllable: the query form of the syllable, such as "goa2", "tai5"
|
17
|
+
#
|
18
|
+
# Conversion is done automatically when input and output types are different
|
19
|
+
def self.compose_syllable(input_type, output_type, syllable)
|
20
|
+
NativeSyllableComposer.compose(input_type, output_type, syllable)
|
21
|
+
end
|
22
|
+
|
23
|
+
TONE_SAMPLE = ["a", "a", "á", "à", "a", "â", "ǎ", "ā", "a̍", "a̋"]
|
24
|
+
LOOKUP_TABLE = {}
|
25
|
+
|
26
|
+
9.times do |index|
|
27
|
+
t = TONE_SAMPLE[index]
|
28
|
+
c = t.chars.decompose
|
29
|
+
|
30
|
+
if c.size > 1
|
31
|
+
tone_char = [c[1]].pack("U")
|
32
|
+
LOOKUP_TABLE[tone_char] = index
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
# Converts a long text of composed syllables into the query form
|
37
|
+
def self.convert_text_into_query_form(input_type, input_string)
|
38
|
+
output_string = ""
|
39
|
+
|
40
|
+
input_string.split(/([^A-Za-z\x7f-\xff])/).each do |str|
|
41
|
+
qform = self.convert_syllable_into_query_form(input_type, str)
|
42
|
+
output_string += qform
|
43
|
+
end
|
44
|
+
|
45
|
+
output_string
|
46
|
+
end
|
47
|
+
|
48
|
+
# Converts a composed syllable (the "composed form") into the query form.
|
49
|
+
# For example, "guá" is converted to "gua2"
|
50
|
+
def self.convert_syllable_into_query_form(input_type, string)
|
51
|
+
c = string.chars
|
52
|
+
|
53
|
+
# decomposed code points
|
54
|
+
dcps = c.decompose
|
55
|
+
|
56
|
+
loudest_tone = 0
|
57
|
+
|
58
|
+
composed = []
|
59
|
+
|
60
|
+
dcps.size.times do |index|
|
61
|
+
chr = [dcps[index]].pack("U")
|
62
|
+
|
63
|
+
if t = LOOKUP_TABLE[chr]
|
64
|
+
loudest_tone = t
|
65
|
+
else
|
66
|
+
composed << dcps[index]
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
result = composed.pack("U*") + (loudest_tone > 0 ? loudest_tone.to_s : "")
|
71
|
+
|
72
|
+
if input_type == SyllableType::TL
|
73
|
+
result.gsub!(/O\315\230/, "OO")
|
74
|
+
result.gsub!(/o\315\230/, "oo")
|
75
|
+
else
|
76
|
+
result.gsub!(/O\315\230/, "OU")
|
77
|
+
result.gsub!(/o\315\230/, "ou")
|
78
|
+
end
|
79
|
+
|
80
|
+
result.gsub!(/ⁿ/, "nn")
|
81
|
+
result
|
82
|
+
end
|
83
|
+
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
data/lib/formosa.rb
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
# Authors:: Lukhnos D. Liu (mailto:lukhnos@handlino.com)
|
2
|
+
# Copyright:: Copyright (c) 2007 Lukhnos D. Liu
|
3
|
+
# License:: Distributed under the New BSD License
|
4
|
+
#
|
5
|
+
# Formosa is a Ruby library for processing Taiwanese languages. Major languages
|
6
|
+
# spoken in Taiwan include Holo, Hakka, Mandarin and those of the indigenous
|
7
|
+
# people. Formosa is the Ruby branch of the lib-formosa project
|
8
|
+
# (http://code.google.com/p/lib-formosa/).
|
9
|
+
#
|
10
|
+
# Currently, we focus on the processing of the Holo (Southern Min) language,
|
11
|
+
# with necessary tools such as SyllableComposer available for general use.
|
12
|
+
#
|
13
|
+
# The following example shows how to use Formosa:
|
14
|
+
#
|
15
|
+
# $KCODE="u" # set the Ruby environment to use UTF-8
|
16
|
+
# require "rubygems"
|
17
|
+
# require "formosa"
|
18
|
+
# include Formosa::Holo
|
19
|
+
# poj = SyllableType::POJ
|
20
|
+
# tl = SyllableType::TL
|
21
|
+
# SyllableUtility.compose_syllable(poj, tl, "goa2") # => guá
|
22
|
+
# SyllableUtility.compose_syllable(tl, poj, "gua2") # => goá
|
23
|
+
|
24
|
+
require 'active_support'
|
25
|
+
require 'formosa/version'
|
26
|
+
require "native_syllable_composer/native_syllable_composer"
|
27
|
+
require 'formosa/syllable_utility'
|
28
|
+
|
29
|
+
module Formosa
|
30
|
+
end
|
31
|
+
|
data/scripts/txt2html
ADDED
@@ -0,0 +1,67 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'redcloth'
|
5
|
+
require 'syntax/convertors/html'
|
6
|
+
require 'erb'
|
7
|
+
require File.dirname(__FILE__) + '/../lib/formosa/version.rb'
|
8
|
+
|
9
|
+
version = Formosa::VERSION::STRING
|
10
|
+
download = 'http://rubyforge.org/projects/formosa'
|
11
|
+
|
12
|
+
class Fixnum
|
13
|
+
def ordinal
|
14
|
+
# teens
|
15
|
+
return 'th' if (10..19).include?(self % 100)
|
16
|
+
# others
|
17
|
+
case self % 10
|
18
|
+
when 1: return 'st'
|
19
|
+
when 2: return 'nd'
|
20
|
+
when 3: return 'rd'
|
21
|
+
else return 'th'
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
class Time
|
27
|
+
def pretty
|
28
|
+
return "#{mday}#{mday.ordinal} #{strftime('%B')} #{year}"
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def convert_syntax(syntax, source)
|
33
|
+
return Syntax::Convertors::HTML.for_syntax(syntax).convert(source).gsub(%r!^<pre>|</pre>$!,'')
|
34
|
+
end
|
35
|
+
|
36
|
+
if ARGV.length >= 1
|
37
|
+
src, template = ARGV
|
38
|
+
template ||= File.dirname(__FILE__) + '/../website/template.rhtml'
|
39
|
+
|
40
|
+
else
|
41
|
+
puts("Usage: #{File.split($0).last} source.txt [template.rhtml] > output.html")
|
42
|
+
exit!
|
43
|
+
end
|
44
|
+
|
45
|
+
template = ERB.new(File.open(template).read)
|
46
|
+
|
47
|
+
title = nil
|
48
|
+
body = nil
|
49
|
+
File.open(src) do |fsrc|
|
50
|
+
title_text = fsrc.readline
|
51
|
+
body_text = fsrc.read
|
52
|
+
syntax_items = []
|
53
|
+
body_text.gsub!(%r!<(pre|code)[^>]*?syntax=['"]([^'"]+)[^>]*>(.*?)</>!m){
|
54
|
+
ident = syntax_items.length
|
55
|
+
element, syntax, source = $1, $2, $3
|
56
|
+
syntax_items << "<#{element} class='syntax'>#{convert_syntax(syntax, source)}</#{element}>"
|
57
|
+
"syntax-temp-#{ident}"
|
58
|
+
}
|
59
|
+
title = RedCloth.new(title_text).to_html.gsub(%r!<.*?>!,'').strip
|
60
|
+
body = RedCloth.new(body_text).to_html
|
61
|
+
body.gsub!(%r!(?:<pre><code>)?syntax-temp-(d+)(?:</code></pre>)?!){ syntax_items[$1.to_i] }
|
62
|
+
end
|
63
|
+
stat = File.stat(src)
|
64
|
+
created = stat.ctime
|
65
|
+
modified = stat.mtime
|
66
|
+
|
67
|
+
$stdout << template.result(binding)
|