sanzang 1.1.1 → 1.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/NEWS.rdoc +5 -0
- data/lib/sanzang/translation_table.rb +25 -24
- data/lib/sanzang/version.rb +1 -1
- data/test/tc_simple_translation.rb +8 -8
- data/test/utf-8/table.txt +8 -8
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ed9732f3291f8bebe17d1abc1cceb68765aed70e
|
4
|
+
data.tar.gz: c3560faf30781e7e30b76ef8b6c41234725606e9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 64942f119d7618ba75ed87e0e47911b0c8179445d4bdd2425b6b85665e6c5967a32a57492adba2ad3c5a9a45423dbec5540f74b3440bbce182c43408758d3b07
|
7
|
+
data.tar.gz: 85624b92a30b276e0148b3d7a1449fe6b987be2fb70b1ffc1eebb1f59b0e2d1d200c07731efdb38168304e1c2d494ba2d7d54c254b8b2a866bda36ad924983d1
|
data/NEWS.rdoc
CHANGED
@@ -2,6 +2,11 @@
|
|
2
2
|
|
3
3
|
== Release History
|
4
4
|
|
5
|
+
=== v1.1.2
|
6
|
+
* Cleaned up table loading code to be faster and simpler
|
7
|
+
* Added RDoc option to set documentation encoding to UTF-8 (RDoc 3.x)
|
8
|
+
* Adjusted example and test translation tables to not use leading spaces
|
9
|
+
|
5
10
|
=== v1.1.1
|
6
11
|
* Updated horizontal space handling to be more robust.
|
7
12
|
* Horizontal spaces will not be added at the end of any lines.
|
@@ -31,9 +31,7 @@ module Sanzang
|
|
31
31
|
#
|
32
32
|
attr_reader :source_encoding
|
33
33
|
|
34
|
-
#
|
35
|
-
# string is in the format of delimited text. The text format can be
|
36
|
-
# summarized as follows:
|
34
|
+
# The translation table file format is summarized as follows:
|
37
35
|
#
|
38
36
|
# - Each line of text is a record for a translation rule.
|
39
37
|
# - Fields in the record are separated by the "|" character.
|
@@ -41,44 +39,47 @@ module Sanzang
|
|
41
39
|
# - Subsequent fields are equivalent terms in destination languages.
|
42
40
|
# - The number of columns must be consistent for the entire table.
|
43
41
|
#
|
44
|
-
# The first element in a record is a term in the source language, and
|
45
|
-
# subsequent elements are are equivalent terms in destination languages.
|
46
|
-
# The number of "columns" in a translation table must be consistent across
|
47
|
-
# the entire table.
|
48
|
-
#
|
49
42
|
def initialize(rules)
|
50
43
|
contents = rules.kind_of?(String) ? rules : rules.read
|
51
44
|
@source_encoding = contents.encoding
|
52
45
|
contents.encode!(Encoding::UTF_8)
|
53
46
|
|
54
|
-
contents
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
47
|
+
if contents =~ /~\||\|~|\| / # If there is any old formatting...
|
48
|
+
contents.gsub!(/~\||\|~/, "") # Rm old style "~|" and "|~"
|
49
|
+
contents.gsub!(/^\s+|\s+$/, "") # Rm WS around lines
|
50
|
+
contents.gsub!(/\s*\|\s*/, "|") # Rm WS around delimiters
|
51
|
+
end
|
59
52
|
|
60
|
-
@records = contents.split("\n").collect {|r| r.split("|") }
|
53
|
+
@records = contents.strip.split("\n").collect {|r| r.strip.split("|") }
|
54
|
+
check_dims
|
55
|
+
sort!
|
56
|
+
end
|
61
57
|
|
62
|
-
|
58
|
+
# Retrieve a record by its numeric index.
|
59
|
+
#
|
60
|
+
def [](index)
|
61
|
+
@records[index]
|
62
|
+
end
|
63
|
+
|
64
|
+
# Check the basic dimensions of the translation table
|
65
|
+
#
|
66
|
+
def check_dims
|
67
|
+
if @records.size < 1
|
63
68
|
raise "Table must have at least 1 row"
|
64
|
-
elsif
|
69
|
+
elsif records[0].size < 2
|
65
70
|
raise "Table must have at least 2 columns"
|
66
71
|
end
|
67
|
-
|
68
|
-
width = records[0].length
|
69
72
|
@records.each do |r|
|
70
|
-
if r.
|
73
|
+
if r.size != width
|
71
74
|
raise "Column mismatch: Line #{i + 1}"
|
72
75
|
end
|
73
76
|
end
|
74
|
-
|
75
|
-
@records.sort! {|x,y| y[0].length <=> x[0].length }
|
76
77
|
end
|
77
78
|
|
78
|
-
#
|
79
|
+
# Reverse sort all records by length
|
79
80
|
#
|
80
|
-
def
|
81
|
-
@records[
|
81
|
+
def sort!
|
82
|
+
@records.sort! {|x,y| y[0].size <=> x[0].size }
|
82
83
|
end
|
83
84
|
|
84
85
|
# The text encoding used internally for all translation table data
|
data/lib/sanzang/version.rb
CHANGED
@@ -7,14 +7,14 @@ require_relative File.join("..", "lib", "sanzang")
|
|
7
7
|
class TestSanzang < Test::Unit::TestCase
|
8
8
|
|
9
9
|
def table_string
|
10
|
-
"
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
10
|
+
"三藏|sānzàng|tripiṭaka|
|
11
|
+
法師|fǎshī|dharma-master
|
12
|
+
玄奘|xuánzàng|xuanzang
|
13
|
+
奉|fèng|reverently
|
14
|
+
唐|táng|tang
|
15
|
+
大|dà|great
|
16
|
+
詔|zhào|imperial-order
|
17
|
+
譯|yì|translate/interpret"
|
18
18
|
end
|
19
19
|
|
20
20
|
def stage_1
|
data/test/utf-8/table.txt
CHANGED
@@ -1,8 +1,8 @@
|
|
1
|
-
三藏|
|
2
|
-
法師|
|
3
|
-
玄奘|
|
4
|
-
奉|
|
5
|
-
唐|
|
6
|
-
大|
|
7
|
-
詔|
|
8
|
-
譯|
|
1
|
+
三藏|sānzàng|tripiṭaka
|
2
|
+
法師|fǎshī|dharma-master
|
3
|
+
玄奘|xuánzàng|xuanzang
|
4
|
+
奉|fèng|reverently
|
5
|
+
唐|táng|tang
|
6
|
+
大|dà|great
|
7
|
+
詔|zhào|imperial-order
|
8
|
+
譯|yì|translate/interpret
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sanzang
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Lapis Lazuli Texts
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-02-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: parallel
|
@@ -73,7 +73,8 @@ licenses:
|
|
73
73
|
- GPL-3
|
74
74
|
metadata: {}
|
75
75
|
post_install_message:
|
76
|
-
rdoc_options:
|
76
|
+
rdoc_options:
|
77
|
+
- "--encoding=UTF-8"
|
77
78
|
require_paths:
|
78
79
|
- lib
|
79
80
|
required_ruby_version: !ruby/object:Gem::Requirement
|