interscript 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/README.adoc +65 -0
- data/bin/interscript +22 -0
- data/lib/interscript/version.rb +3 -0
- data/lib/interscript.rb +53 -0
- metadata +89 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 19905541367b50886e3d130aac4132cbb5e9050bff1409f924b23848ea5b3a32
|
4
|
+
data.tar.gz: d9d6a9531009778797f2826cf51dfc2b68b27651608be5c91ed7f343d75ff75f
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 247a535a7fd1203b2331b2c6c99fa9a9e0747c3fc5ff1f5476c034d1c72ae5bd16bbf2866cf639cadd321989dd653a461eda2b0db662db303c381b2de8b18285
|
7
|
+
data.tar.gz: a9c259539a46c33dc5c7de38edc9b5d330727064509dbc6cc17cdfa3baf1ae950fa3e8c645e45e9e04fa3afc213f50f3e1cfe8c44470bf8024e93f3621e32b87
|
data/README.adoc
ADDED
@@ -0,0 +1,65 @@
|
|
1
|
+
= Interscript: Interoperable Script Conversion Systems and a Ruby implementation
|
2
|
+
|
3
|
+
== Introducation
|
4
|
+
|
5
|
+
This repository contains a number of transliteration schemes from:
|
6
|
+
|
7
|
+
* BGN/PCGN
|
8
|
+
* ICAO
|
9
|
+
* ISO
|
10
|
+
* UN (by UNGEGN)
|
11
|
+
|
12
|
+
The goal is to achieve interoperable transliteration schemes allowing quality comparisons.
|
13
|
+
|
14
|
+
|
15
|
+
== STATUS (work in progress!)
|
16
|
+
|
17
|
+
These transliteration systems currently work:
|
18
|
+
|
19
|
+
`bgnpcgn-rus-Cyrl-Latn-1947`:: BGN/PCGN Romanization of Russian
|
20
|
+
`iso-rus-Cyrl-Latn-iso9`:: ISO 9 Romanization of Russian
|
21
|
+
`icao-rus-Cyrl-Latn-9303`:: ICAO MRZ Romanization of Russian
|
22
|
+
`bas-rus-Cyrl-Latn-bss`:: Bulgaria Academy of Science Streamlined System for Russian
|
23
|
+
|
24
|
+
|
25
|
+
== Usage
|
26
|
+
|
27
|
+
|
28
|
+
[source,sh]
|
29
|
+
----
|
30
|
+
# Transliterating Russian Cyrillic to Latin using the Streamlined System for Russian
|
31
|
+
interscript samples/rus-Cyrl.txt --system=bas-rus-Cyrl-Latn-bss --output=rus-Latn.txt
|
32
|
+
|
33
|
+
# Transliterating Russian Cyrillic to Latin using the BGN/PCGN Romanization of Russian
|
34
|
+
interscript samples/rus-Cyrl.txt --system=bgnpcgn-rus-Cyrl-Latn-1947 --output=rus-Latn.txt
|
35
|
+
----
|
36
|
+
|
37
|
+
|
38
|
+
== ISCS system codes
|
39
|
+
|
40
|
+
The system code identifying a script conversion system has a few components:
|
41
|
+
|
42
|
+
e.g. `bgnpcgn-rus-Cyrl-Latn-1947`
|
43
|
+
|
44
|
+
`bgnpcgn`:: the authority identifier
|
45
|
+
`rus`:: an ISO 639-2 3-letter language code that this system applies to
|
46
|
+
`Cyrl`:: an ISO 15924 script code, identifying the source script
|
47
|
+
`Latn`:: an ISO 15924 script code, identifying the target script
|
48
|
+
`1947`:: an identifier unit within the authority to identify this system
|
49
|
+
|
50
|
+
|
51
|
+
== Covered languages
|
52
|
+
|
53
|
+
Currently the schemes cover Cyrillic, Armenian, Greek, Arabic and Hebrew.
|
54
|
+
|
55
|
+
|
56
|
+
== Sources
|
57
|
+
|
58
|
+
* `rus-Cyrl-1.txt`: Copied from the XLS output from http://www.primorsk.vybory.izbirkom.ru/region/primorsk?action=show&global=true&root=254017025&tvd=4254017212287&vrn=100100067795849&prver=0&pronetvd=0®ion=25&sub_region=25&type=242&vibid=4254017212287
|
59
|
+
|
60
|
+
* `rus-Cyrl-2.txt`: Copied from the XLS output from http://www.yaroslavl.vybory.izbirkom.ru/region/yaroslavl?action=show&root=764013001&tvd=4764013188704&vrn=4764013188693&prver=0&pronetvd=0®ion=76&sub_region=76&type=426&vibid=4764013188704
|
61
|
+
|
62
|
+
|
63
|
+
== Credits
|
64
|
+
|
65
|
+
This is a Ribose project.
|
data/bin/interscript
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'rubygems'
|
3
|
+
require_relative '../lib/interscript'
|
4
|
+
|
5
|
+
if ARGV.empty?
|
6
|
+
puts "write source file, source format, and output file"
|
7
|
+
else
|
8
|
+
args = Hash[ ARGV.flat_map{|s| s.scan(/--?([^=\s]+)(?:=(\S+))?/) } ]
|
9
|
+
input = ARGV[0]
|
10
|
+
system_code = args["system"]
|
11
|
+
output_file = args["output"]
|
12
|
+
|
13
|
+
raise "Please enter the system code with --system={system_code}" unless system_code
|
14
|
+
|
15
|
+
if output_file
|
16
|
+
Interscript.instance.transliterate_file(system_code, input, output_file)
|
17
|
+
else
|
18
|
+
puts Interscript.instance.transliterate(system_code, IO.read(input))
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
|
data/lib/interscript.rb
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
require 'singleton'
|
3
|
+
|
4
|
+
class Interscript
|
5
|
+
include Singleton
|
6
|
+
|
7
|
+
SYSTEM_DEFINITIONS_PATH = File.expand_path('../../maps', __FILE__)
|
8
|
+
|
9
|
+
def initialize
|
10
|
+
@systems = {}
|
11
|
+
end
|
12
|
+
|
13
|
+
def transliterate_file(system_code, input_file, output_file)
|
14
|
+
input = File.read(input_file)
|
15
|
+
output = transliterate(system_code, input)
|
16
|
+
|
17
|
+
File.open(output_file, "w") do |f|
|
18
|
+
f.puts(output)
|
19
|
+
end
|
20
|
+
puts "Output written to: #{output_file}"
|
21
|
+
end
|
22
|
+
|
23
|
+
def load_system_definition(system_code)
|
24
|
+
@systems[system_code] ||= YAML.load_file(File.join(SYSTEM_DEFINITIONS_PATH, "#{system_code}.yaml"))
|
25
|
+
end
|
26
|
+
|
27
|
+
def get_system(system_code)
|
28
|
+
@systems[system_code]
|
29
|
+
end
|
30
|
+
|
31
|
+
def system_char_map(system_code)
|
32
|
+
get_system(system_code)["map"]["characters"]
|
33
|
+
end
|
34
|
+
|
35
|
+
def system_rules(system_code)
|
36
|
+
get_system(system_code)["map"]["rules"]
|
37
|
+
end
|
38
|
+
|
39
|
+
def transliterate(system_code, string)
|
40
|
+
load_system_definition(system_code)
|
41
|
+
|
42
|
+
# TODO: also need to support regular expressions via system_rules(system_code), before system_char_map
|
43
|
+
|
44
|
+
character_map = system_char_map(system_code)
|
45
|
+
|
46
|
+
string.split('').map do |char|
|
47
|
+
converted_char = character_map[char] ? character_map[char] : char
|
48
|
+
string[char] = converted_char
|
49
|
+
end.join('')
|
50
|
+
end
|
51
|
+
|
52
|
+
end
|
53
|
+
|
metadata
ADDED
@@ -0,0 +1,89 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: interscript
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- project_contibutors
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2019-11-17 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.7'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.7'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '10.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rspec
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
description: Interoperable script conversion systems
|
56
|
+
email:
|
57
|
+
executables:
|
58
|
+
- interscript
|
59
|
+
extensions: []
|
60
|
+
extra_rdoc_files: []
|
61
|
+
files:
|
62
|
+
- README.adoc
|
63
|
+
- bin/interscript
|
64
|
+
- lib/interscript.rb
|
65
|
+
- lib/interscript/version.rb
|
66
|
+
homepage: ''
|
67
|
+
licenses:
|
68
|
+
- MIT
|
69
|
+
metadata: {}
|
70
|
+
post_install_message:
|
71
|
+
rdoc_options: []
|
72
|
+
require_paths:
|
73
|
+
- lib
|
74
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
75
|
+
requirements:
|
76
|
+
- - ">="
|
77
|
+
- !ruby/object:Gem::Version
|
78
|
+
version: '0'
|
79
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
80
|
+
requirements:
|
81
|
+
- - ">="
|
82
|
+
- !ruby/object:Gem::Version
|
83
|
+
version: '0'
|
84
|
+
requirements: []
|
85
|
+
rubygems_version: 3.0.3
|
86
|
+
signing_key:
|
87
|
+
specification_version: 4
|
88
|
+
summary: Interoperable script conversion systems
|
89
|
+
test_files: []
|