sanzang 1.1.1 → 1.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/NEWS.rdoc +5 -0
- data/lib/sanzang/translation_table.rb +25 -24
- data/lib/sanzang/version.rb +1 -1
- data/test/tc_simple_translation.rb +8 -8
- data/test/utf-8/table.txt +8 -8
- metadata +4 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: ed9732f3291f8bebe17d1abc1cceb68765aed70e
|
|
4
|
+
data.tar.gz: c3560faf30781e7e30b76ef8b6c41234725606e9
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 64942f119d7618ba75ed87e0e47911b0c8179445d4bdd2425b6b85665e6c5967a32a57492adba2ad3c5a9a45423dbec5540f74b3440bbce182c43408758d3b07
|
|
7
|
+
data.tar.gz: 85624b92a30b276e0148b3d7a1449fe6b987be2fb70b1ffc1eebb1f59b0e2d1d200c07731efdb38168304e1c2d494ba2d7d54c254b8b2a866bda36ad924983d1
|
data/NEWS.rdoc
CHANGED
|
@@ -2,6 +2,11 @@
|
|
|
2
2
|
|
|
3
3
|
== Release History
|
|
4
4
|
|
|
5
|
+
=== v1.1.2
|
|
6
|
+
* Cleaned up table loading code to be faster and simpler
|
|
7
|
+
* Added RDoc option to set documentation encoding to UTF-8 (RDoc 3.x)
|
|
8
|
+
* Adjusted example and test translation tables to not use leading spaces
|
|
9
|
+
|
|
5
10
|
=== v1.1.1
|
|
6
11
|
* Updated horizontal space handling to be more robust.
|
|
7
12
|
* Horizontal spaces will not be added at the end of any lines.
|
|
@@ -31,9 +31,7 @@ module Sanzang
|
|
|
31
31
|
#
|
|
32
32
|
attr_reader :source_encoding
|
|
33
33
|
|
|
34
|
-
#
|
|
35
|
-
# string is in the format of delimited text. The text format can be
|
|
36
|
-
# summarized as follows:
|
|
34
|
+
# The translation table file format is summarized as follows:
|
|
37
35
|
#
|
|
38
36
|
# - Each line of text is a record for a translation rule.
|
|
39
37
|
# - Fields in the record are separated by the "|" character.
|
|
@@ -41,44 +39,47 @@ module Sanzang
|
|
|
41
39
|
# - Subsequent fields are equivalent terms in destination languages.
|
|
42
40
|
# - The number of columns must be consistent for the entire table.
|
|
43
41
|
#
|
|
44
|
-
# The first element in a record is a term in the source language, and
|
|
45
|
-
# subsequent elements are are equivalent terms in destination languages.
|
|
46
|
-
# The number of "columns" in a translation table must be consistent across
|
|
47
|
-
# the entire table.
|
|
48
|
-
#
|
|
49
42
|
def initialize(rules)
|
|
50
43
|
contents = rules.kind_of?(String) ? rules : rules.read
|
|
51
44
|
@source_encoding = contents.encoding
|
|
52
45
|
contents.encode!(Encoding::UTF_8)
|
|
53
46
|
|
|
54
|
-
contents
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
47
|
+
if contents =~ /~\||\|~|\| / # If there is any old formatting...
|
|
48
|
+
contents.gsub!(/~\||\|~/, "") # Rm old style "~|" and "|~"
|
|
49
|
+
contents.gsub!(/^\s+|\s+$/, "") # Rm WS around lines
|
|
50
|
+
contents.gsub!(/\s*\|\s*/, "|") # Rm WS around delimiters
|
|
51
|
+
end
|
|
59
52
|
|
|
60
|
-
@records = contents.split("\n").collect {|r| r.split("|") }
|
|
53
|
+
@records = contents.strip.split("\n").collect {|r| r.strip.split("|") }
|
|
54
|
+
check_dims
|
|
55
|
+
sort!
|
|
56
|
+
end
|
|
61
57
|
|
|
62
|
-
|
|
58
|
+
# Retrieve a record by its numeric index.
|
|
59
|
+
#
|
|
60
|
+
def [](index)
|
|
61
|
+
@records[index]
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# Check the basic dimensions of the translation table
|
|
65
|
+
#
|
|
66
|
+
def check_dims
|
|
67
|
+
if @records.size < 1
|
|
63
68
|
raise "Table must have at least 1 row"
|
|
64
|
-
elsif
|
|
69
|
+
elsif records[0].size < 2
|
|
65
70
|
raise "Table must have at least 2 columns"
|
|
66
71
|
end
|
|
67
|
-
|
|
68
|
-
width = records[0].length
|
|
69
72
|
@records.each do |r|
|
|
70
|
-
if r.
|
|
73
|
+
if r.size != width
|
|
71
74
|
raise "Column mismatch: Line #{i + 1}"
|
|
72
75
|
end
|
|
73
76
|
end
|
|
74
|
-
|
|
75
|
-
@records.sort! {|x,y| y[0].length <=> x[0].length }
|
|
76
77
|
end
|
|
77
78
|
|
|
78
|
-
#
|
|
79
|
+
# Reverse sort all records by length
|
|
79
80
|
#
|
|
80
|
-
def
|
|
81
|
-
@records[
|
|
81
|
+
def sort!
|
|
82
|
+
@records.sort! {|x,y| y[0].size <=> x[0].size }
|
|
82
83
|
end
|
|
83
84
|
|
|
84
85
|
# The text encoding used internally for all translation table data
|
data/lib/sanzang/version.rb
CHANGED
|
@@ -7,14 +7,14 @@ require_relative File.join("..", "lib", "sanzang")
|
|
|
7
7
|
class TestSanzang < Test::Unit::TestCase
|
|
8
8
|
|
|
9
9
|
def table_string
|
|
10
|
-
"
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
10
|
+
"三藏|sānzàng|tripiṭaka|
|
|
11
|
+
法師|fǎshī|dharma-master
|
|
12
|
+
玄奘|xuánzàng|xuanzang
|
|
13
|
+
奉|fèng|reverently
|
|
14
|
+
唐|táng|tang
|
|
15
|
+
大|dà|great
|
|
16
|
+
詔|zhào|imperial-order
|
|
17
|
+
譯|yì|translate/interpret"
|
|
18
18
|
end
|
|
19
19
|
|
|
20
20
|
def stage_1
|
data/test/utf-8/table.txt
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
三藏|
|
|
2
|
-
法師|
|
|
3
|
-
玄奘|
|
|
4
|
-
奉|
|
|
5
|
-
唐|
|
|
6
|
-
大|
|
|
7
|
-
詔|
|
|
8
|
-
譯|
|
|
1
|
+
三藏|sānzàng|tripiṭaka
|
|
2
|
+
法師|fǎshī|dharma-master
|
|
3
|
+
玄奘|xuánzàng|xuanzang
|
|
4
|
+
奉|fèng|reverently
|
|
5
|
+
唐|táng|tang
|
|
6
|
+
大|dà|great
|
|
7
|
+
詔|zhào|imperial-order
|
|
8
|
+
譯|yì|translate/interpret
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: sanzang
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.1.
|
|
4
|
+
version: 1.1.2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Lapis Lazuli Texts
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2014-
|
|
11
|
+
date: 2014-02-13 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: parallel
|
|
@@ -73,7 +73,8 @@ licenses:
|
|
|
73
73
|
- GPL-3
|
|
74
74
|
metadata: {}
|
|
75
75
|
post_install_message:
|
|
76
|
-
rdoc_options:
|
|
76
|
+
rdoc_options:
|
|
77
|
+
- "--encoding=UTF-8"
|
|
77
78
|
require_paths:
|
|
78
79
|
- lib
|
|
79
80
|
required_ruby_version: !ruby/object:Gem::Requirement
|