formosa 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,67 @@
1
+ #include "compose.h"
2
+ #include <string>
3
+ #include <stdio.h>
4
+ #include "LibHolo.h"
5
+
6
+ using namespace std;
7
+ using namespace LibHolo;
8
+
9
+ #include "ruby.h"
10
+
11
+ VALUE ComposeTLSyllable(int inputType, int outputType, const char *syllable)
12
+ {
13
+ int c;
14
+ bool composing = false;
15
+ HoloSyllable syl;
16
+ string output;
17
+
18
+ if (inputType != 0 && inputType != 1)
19
+ syl.setInputType(TLSyllable);
20
+ else
21
+ syl.setInputType((SyllableType)inputType);
22
+
23
+ while (1) {
24
+ c = *syllable++;
25
+
26
+ if (isalpha(c)) {
27
+ if (!composing) composing = true;
28
+ syl.insertCharacterAtCursor(c);
29
+ }
30
+ else {
31
+ if (composing) {
32
+ int finaltone = 0;
33
+ int emitchar = 0;
34
+
35
+ // some UNIX doesn't have isnumber(c)
36
+ if (c >= '0' && c <= '9') {
37
+ finaltone = c - '0';
38
+ }
39
+ else {
40
+ // since it's non-numeric char, we'll emit it after the composed syllable
41
+ emitchar = c;
42
+ }
43
+
44
+ syl.normalize(finaltone);
45
+
46
+ if (outputType == POJSyllable)
47
+ output += syl.convertToPOJSyllable().composedForm();
48
+ else
49
+ output += syl.convertToTLSyllable().composedForm();
50
+
51
+ if (emitchar) {
52
+ output += string(1, emitchar);
53
+ }
54
+
55
+ composing = false;
56
+ syl.clear();
57
+ }
58
+ else {
59
+ if (c) output += string(1, c);
60
+ }
61
+ }
62
+
63
+ if (!c) break;
64
+ }
65
+
66
+ return rb_str_new2(output.c_str());
67
+ }
@@ -0,0 +1,11 @@
1
+ #include "ruby.h"
2
+
3
+ #ifdef __cplusplus
4
+ extern "C" {
5
+ #endif
6
+
7
+ VALUE ComposeTLSyllable(int inputType, int outputType, const char *syllable);
8
+
9
+ #ifdef __cplusplus
10
+ }
11
+ #endif
@@ -0,0 +1,7 @@
1
+ require 'mkmf'
2
+
3
+ # replace TRY_LINK's CC into CXX because we want to use g++ as linker
4
+ CONFIG['LDSHARED'].gsub!(/cc/, "g++")
5
+ CONFIG['LDSHARED'].gsub!(/\(CC\)/, "(CXX)")
6
+
7
+ create_makefile "native_syllable_composer/native_syllable_composer"
@@ -0,0 +1,34 @@
1
+ // cf. http://www.rubyinside.com/how-to-create-a-ruby-extension-in-c-in-under-5-minutes-100.html
2
+ // cf. http://cesare.seesaa.net/article/32850625.html
3
+ #include "ruby.h"
4
+ #include "compose.h"
5
+ VALUE syllable_composer = Qnil;
6
+
7
+ void Init_native_syllable_composer();
8
+ VALUE native_syllable_composer_compose(VALUE self, VALUE rInputType, VALUE rOutputType, VALUE rSyllable);
9
+
10
+ void Init_native_syllable_composer() {
11
+ syllable_composer = rb_define_module("NativeSyllableComposer");
12
+ rb_define_singleton_method(syllable_composer, "compose", native_syllable_composer_compose, 3);
13
+ }
14
+
15
+ VALUE native_syllable_composer_compose(VALUE self, VALUE rInputType, VALUE rOutputType, VALUE rSyllable) {
16
+ int inputType = NUM2INT(rInputType);
17
+ int outputType = NUM2INT(rOutputType);
18
+
19
+ VALUE rStr = StringValue(rSyllable);
20
+
21
+ size_t rStrLen = RSTRING(rStr)->len;
22
+ char *rStrPtr = RSTRING(rStr)->ptr;
23
+
24
+ if (!rStrPtr) return Qnil;
25
+
26
+ char *string = (char*)calloc(1, rStrLen);
27
+ memcpy(string, rStrPtr,rStrLen);
28
+
29
+ VALUE result = ComposeTLSyllable(inputType, outputType, string);
30
+ free(string);
31
+
32
+ return result;
33
+ }
34
+
@@ -0,0 +1,86 @@
1
+ module Formosa
2
+ # Holo language (Southern Min) module
3
+ module Holo
4
+ # Syllable type supported by Formosa
5
+ module SyllableType
6
+ POJ = 0
7
+ TL = 1
8
+ end
9
+
10
+ # Holo syllable utiliy
11
+ module SyllableUtility
12
+ # Compose Holo syllable from the query form ("ASCII form")
13
+ #
14
+ # * input_type: can be either SyllableType::POJ or Syllable::TL
15
+ # * output_type: same as above
16
+ # * syllable: the query form of the syllable, such as "goa2", "tai5"
17
+ #
18
+ # Conversion is done automatically when input and output types are different
19
+ def self.compose_syllable(input_type, output_type, syllable)
20
+ NativeSyllableComposer.compose(input_type, output_type, syllable)
21
+ end
22
+
23
+ TONE_SAMPLE = ["a", "a", "á", "à", "a", "â", "ǎ", "ā", "a̍", "a̋"]
24
+ LOOKUP_TABLE = {}
25
+
26
+ 9.times do |index|
27
+ t = TONE_SAMPLE[index]
28
+ c = t.chars.decompose
29
+
30
+ if c.size > 1
31
+ tone_char = [c[1]].pack("U")
32
+ LOOKUP_TABLE[tone_char] = index
33
+ end
34
+ end
35
+
36
+ # Converts a long text of composed syllables into the query form
37
+ def self.convert_text_into_query_form(input_type, input_string)
38
+ output_string = ""
39
+
40
+ input_string.split(/([^A-Za-z\x7f-\xff])/).each do |str|
41
+ qform = self.convert_syllable_into_query_form(input_type, str)
42
+ output_string += qform
43
+ end
44
+
45
+ output_string
46
+ end
47
+
48
+ # Converts a composed syllable (the "composed form") into the query form.
49
+ # For example, "guá" is converted to "gua2"
50
+ def self.convert_syllable_into_query_form(input_type, string)
51
+ c = string.chars
52
+
53
+ # decomposed code points
54
+ dcps = c.decompose
55
+
56
+ loudest_tone = 0
57
+
58
+ composed = []
59
+
60
+ dcps.size.times do |index|
61
+ chr = [dcps[index]].pack("U")
62
+
63
+ if t = LOOKUP_TABLE[chr]
64
+ loudest_tone = t
65
+ else
66
+ composed << dcps[index]
67
+ end
68
+ end
69
+
70
+ result = composed.pack("U*") + (loudest_tone > 0 ? loudest_tone.to_s : "")
71
+
72
+ if input_type == SyllableType::TL
73
+ result.gsub!(/O\315\230/, "OO")
74
+ result.gsub!(/o\315\230/, "oo")
75
+ else
76
+ result.gsub!(/O\315\230/, "OU")
77
+ result.gsub!(/o\315\230/, "ou")
78
+ end
79
+
80
+ result.gsub!(/ⁿ/, "nn")
81
+ result
82
+ end
83
+
84
+ end
85
+ end
86
+ end
@@ -0,0 +1,9 @@
1
+ module Formosa #:nodoc:
2
+ module VERSION #:nodoc:
3
+ MAJOR = 0
4
+ MINOR = 0
5
+ TINY = 1
6
+
7
+ STRING = [MAJOR, MINOR, TINY].join('.')
8
+ end
9
+ end
data/lib/formosa.rb ADDED
@@ -0,0 +1,31 @@
1
+ # Authors:: Lukhnos D. Liu (mailto:lukhnos@handlino.com)
2
+ # Copyright:: Copyright (c) 2007 Lukhnos D. Liu
3
+ # License:: Distributed under the New BSD License
4
+ #
5
+ # Formosa is a Ruby library for processing Taiwanese languages. Major languages
6
+ # spoken in Taiwan include Holo, Hakka, Mandarin and those of the indigenous
7
+ # people. Formosa is the Ruby branch of the lib-formosa project
8
+ # (http://code.google.com/p/lib-formosa/).
9
+ #
10
+ # Currently, we focus on the processing of the Holo (Southern Min) language,
11
+ # with necessary tools such as SyllableComposer available for general use.
12
+ #
13
+ # The following example shows how to use Formosa:
14
+ #
15
+ # $KCODE="u" # set the Ruby environment to use UTF-8
16
+ # require "rubygems"
17
+ # require "formosa"
18
+ # include Formosa::Holo
19
+ # poj = SyllableType::POJ
20
+ # tl = SyllableType::TL
21
+ # SyllableUtility.compose_syllable(poj, tl, "goa2") # => guá
22
+ # SyllableUtility.compose_syllable(tl, poj, "gua2") # => goá
23
+
24
+ require 'active_support'
25
+ require 'formosa/version'
26
+ require "native_syllable_composer/native_syllable_composer"
27
+ require 'formosa/syllable_utility'
28
+
29
+ module Formosa
30
+ end
31
+
data/scripts/txt2html ADDED
@@ -0,0 +1,67 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rubygems'
4
+ require 'redcloth'
5
+ require 'syntax/convertors/html'
6
+ require 'erb'
7
+ require File.dirname(__FILE__) + '/../lib/formosa/version.rb'
8
+
9
+ version = Formosa::VERSION::STRING
10
+ download = 'http://rubyforge.org/projects/formosa'
11
+
12
+ class Fixnum
13
+ def ordinal
14
+ # teens
15
+ return 'th' if (10..19).include?(self % 100)
16
+ # others
17
+ case self % 10
18
+ when 1: return 'st'
19
+ when 2: return 'nd'
20
+ when 3: return 'rd'
21
+ else return 'th'
22
+ end
23
+ end
24
+ end
25
+
26
+ class Time
27
+ def pretty
28
+ return "#{mday}#{mday.ordinal} #{strftime('%B')} #{year}"
29
+ end
30
+ end
31
+
32
+ def convert_syntax(syntax, source)
33
+ return Syntax::Convertors::HTML.for_syntax(syntax).convert(source).gsub(%r!^<pre>|</pre>$!,'')
34
+ end
35
+
36
+ if ARGV.length >= 1
37
+ src, template = ARGV
38
+ template ||= File.dirname(__FILE__) + '/../website/template.rhtml'
39
+
40
+ else
41
+ puts("Usage: #{File.split($0).last} source.txt [template.rhtml] > output.html")
42
+ exit!
43
+ end
44
+
45
+ template = ERB.new(File.open(template).read)
46
+
47
+ title = nil
48
+ body = nil
49
+ File.open(src) do |fsrc|
50
+ title_text = fsrc.readline
51
+ body_text = fsrc.read
52
+ syntax_items = []
53
+ body_text.gsub!(%r!<(pre|code)[^>]*?syntax=['"]([^'"]+)[^>]*>(.*?)</>!m){
54
+ ident = syntax_items.length
55
+ element, syntax, source = $1, $2, $3
56
+ syntax_items << "<#{element} class='syntax'>#{convert_syntax(syntax, source)}</#{element}>"
57
+ "syntax-temp-#{ident}"
58
+ }
59
+ title = RedCloth.new(title_text).to_html.gsub(%r!<.*?>!,'').strip
60
+ body = RedCloth.new(body_text).to_html
61
+ body.gsub!(%r!(?:<pre><code>)?syntax-temp-(d+)(?:</code></pre>)?!){ syntax_items[$1.to_i] }
62
+ end
63
+ stat = File.stat(src)
64
+ created = stat.ctime
65
+ modified = stat.mtime
66
+
67
+ $stdout << template.result(binding)