interscript 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.adoc +65 -0
- data/bin/interscript +22 -0
- data/lib/interscript/version.rb +3 -0
- data/lib/interscript.rb +53 -0
- metadata +89 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 19905541367b50886e3d130aac4132cbb5e9050bff1409f924b23848ea5b3a32
|
4
|
+
data.tar.gz: d9d6a9531009778797f2826cf51dfc2b68b27651608be5c91ed7f343d75ff75f
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 247a535a7fd1203b2331b2c6c99fa9a9e0747c3fc5ff1f5476c034d1c72ae5bd16bbf2866cf639cadd321989dd653a461eda2b0db662db303c381b2de8b18285
|
7
|
+
data.tar.gz: a9c259539a46c33dc5c7de38edc9b5d330727064509dbc6cc17cdfa3baf1ae950fa3e8c645e45e9e04fa3afc213f50f3e1cfe8c44470bf8024e93f3621e32b87
|
data/README.adoc
ADDED
@@ -0,0 +1,65 @@
|
|
1
|
+
= Interscript: Interoperable Script Conversion Systems and a Ruby implementation
|
2
|
+
|
3
|
+
== Introducation
|
4
|
+
|
5
|
+
This repository contains a number of transliteration schemes from:
|
6
|
+
|
7
|
+
* BGN/PCGN
|
8
|
+
* ICAO
|
9
|
+
* ISO
|
10
|
+
* UN (by UNGEGN)
|
11
|
+
|
12
|
+
The goal is to achieve interoperable transliteration schemes allowing quality comparisons.
|
13
|
+
|
14
|
+
|
15
|
+
== STATUS (work in progress!)
|
16
|
+
|
17
|
+
These transliteration systems currently work:
|
18
|
+
|
19
|
+
`bgnpcgn-rus-Cyrl-Latn-1947`:: BGN/PCGN Romanization of Russian
|
20
|
+
`iso-rus-Cyrl-Latn-iso9`:: ISO 9 Romanization of Russian
|
21
|
+
`icao-rus-Cyrl-Latn-9303`:: ICAO MRZ Romanization of Russian
|
22
|
+
`bas-rus-Cyrl-Latn-bss`:: Bulgaria Academy of Science Streamlined System for Russian
|
23
|
+
|
24
|
+
|
25
|
+
== Usage
|
26
|
+
|
27
|
+
|
28
|
+
[source,sh]
|
29
|
+
----
|
30
|
+
# Transliterating Russian Cyrillic to Latin using the Streamlined System for Russian
|
31
|
+
interscript samples/rus-Cyrl.txt --system=bas-rus-Cyrl-Latn-bss --output=rus-Latn.txt
|
32
|
+
|
33
|
+
# Transliterating Russian Cyrillic to Latin using the BGN/PCGN Romanization of Russian
|
34
|
+
interscript samples/rus-Cyrl.txt --system=bgnpcgn-rus-Cyrl-Latn-1947 --output=rus-Latn.txt
|
35
|
+
----
|
36
|
+
|
37
|
+
|
38
|
+
== ISCS system codes
|
39
|
+
|
40
|
+
The system code identifying a script conversion system has a few components:
|
41
|
+
|
42
|
+
e.g. `bgnpcgn-rus-Cyrl-Latn-1947`
|
43
|
+
|
44
|
+
`bgnpcgn`:: the authority identifier
|
45
|
+
`rus`:: an ISO 639-2 3-letter language code that this system applies to
|
46
|
+
`Cyrl`:: an ISO 15924 script code, identifying the source script
|
47
|
+
`Latn`:: an ISO 15924 script code, identifying the target script
|
48
|
+
`1947`:: an identifier unit within the authority to identify this system
|
49
|
+
|
50
|
+
|
51
|
+
== Covered languages
|
52
|
+
|
53
|
+
Currently the schemes cover Cyrillic, Armenian, Greek, Arabic and Hebrew.
|
54
|
+
|
55
|
+
|
56
|
+
== Sources
|
57
|
+
|
58
|
+
* `rus-Cyrl-1.txt`: Copied from the XLS output from http://www.primorsk.vybory.izbirkom.ru/region/primorsk?action=show&global=true&root=254017025&tvd=4254017212287&vrn=100100067795849&prver=0&pronetvd=0®ion=25&sub_region=25&type=242&vibid=4254017212287
|
59
|
+
|
60
|
+
* `rus-Cyrl-2.txt`: Copied from the XLS output from http://www.yaroslavl.vybory.izbirkom.ru/region/yaroslavl?action=show&root=764013001&tvd=4764013188704&vrn=4764013188693&prver=0&pronetvd=0®ion=76&sub_region=76&type=426&vibid=4764013188704
|
61
|
+
|
62
|
+
|
63
|
+
== Credits
|
64
|
+
|
65
|
+
This is a Ribose project.
|
data/bin/interscript
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'rubygems'
|
3
|
+
require_relative '../lib/interscript'
|
4
|
+
|
5
|
+
if ARGV.empty?
|
6
|
+
puts "write source file, source format, and output file"
|
7
|
+
else
|
8
|
+
args = Hash[ ARGV.flat_map{|s| s.scan(/--?([^=\s]+)(?:=(\S+))?/) } ]
|
9
|
+
input = ARGV[0]
|
10
|
+
system_code = args["system"]
|
11
|
+
output_file = args["output"]
|
12
|
+
|
13
|
+
raise "Please enter the system code with --system={system_code}" unless system_code
|
14
|
+
|
15
|
+
if output_file
|
16
|
+
Interscript.instance.transliterate_file(system_code, input, output_file)
|
17
|
+
else
|
18
|
+
puts Interscript.instance.transliterate(system_code, IO.read(input))
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
|
data/lib/interscript.rb
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
require 'singleton'
|
3
|
+
|
4
|
+
class Interscript
|
5
|
+
include Singleton
|
6
|
+
|
7
|
+
SYSTEM_DEFINITIONS_PATH = File.expand_path('../../maps', __FILE__)
|
8
|
+
|
9
|
+
def initialize
|
10
|
+
@systems = {}
|
11
|
+
end
|
12
|
+
|
13
|
+
def transliterate_file(system_code, input_file, output_file)
|
14
|
+
input = File.read(input_file)
|
15
|
+
output = transliterate(system_code, input)
|
16
|
+
|
17
|
+
File.open(output_file, "w") do |f|
|
18
|
+
f.puts(output)
|
19
|
+
end
|
20
|
+
puts "Output written to: #{output_file}"
|
21
|
+
end
|
22
|
+
|
23
|
+
def load_system_definition(system_code)
|
24
|
+
@systems[system_code] ||= YAML.load_file(File.join(SYSTEM_DEFINITIONS_PATH, "#{system_code}.yaml"))
|
25
|
+
end
|
26
|
+
|
27
|
+
def get_system(system_code)
|
28
|
+
@systems[system_code]
|
29
|
+
end
|
30
|
+
|
31
|
+
def system_char_map(system_code)
|
32
|
+
get_system(system_code)["map"]["characters"]
|
33
|
+
end
|
34
|
+
|
35
|
+
def system_rules(system_code)
|
36
|
+
get_system(system_code)["map"]["rules"]
|
37
|
+
end
|
38
|
+
|
39
|
+
def transliterate(system_code, string)
|
40
|
+
load_system_definition(system_code)
|
41
|
+
|
42
|
+
# TODO: also need to support regular expressions via system_rules(system_code), before system_char_map
|
43
|
+
|
44
|
+
character_map = system_char_map(system_code)
|
45
|
+
|
46
|
+
string.split('').map do |char|
|
47
|
+
converted_char = character_map[char] ? character_map[char] : char
|
48
|
+
string[char] = converted_char
|
49
|
+
end.join('')
|
50
|
+
end
|
51
|
+
|
52
|
+
end
|
53
|
+
|
metadata
ADDED
@@ -0,0 +1,89 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: interscript
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- project_contibutors
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2019-11-17 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.7'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.7'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '10.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rspec
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
description: Interoperable script conversion systems
|
56
|
+
email:
|
57
|
+
executables:
|
58
|
+
- interscript
|
59
|
+
extensions: []
|
60
|
+
extra_rdoc_files: []
|
61
|
+
files:
|
62
|
+
- README.adoc
|
63
|
+
- bin/interscript
|
64
|
+
- lib/interscript.rb
|
65
|
+
- lib/interscript/version.rb
|
66
|
+
homepage: ''
|
67
|
+
licenses:
|
68
|
+
- MIT
|
69
|
+
metadata: {}
|
70
|
+
post_install_message:
|
71
|
+
rdoc_options: []
|
72
|
+
require_paths:
|
73
|
+
- lib
|
74
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
75
|
+
requirements:
|
76
|
+
- - ">="
|
77
|
+
- !ruby/object:Gem::Version
|
78
|
+
version: '0'
|
79
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
80
|
+
requirements:
|
81
|
+
- - ">="
|
82
|
+
- !ruby/object:Gem::Version
|
83
|
+
version: '0'
|
84
|
+
requirements: []
|
85
|
+
rubygems_version: 3.0.3
|
86
|
+
signing_key:
|
87
|
+
specification_version: 4
|
88
|
+
summary: Interoperable script conversion systems
|
89
|
+
test_files: []
|