interscript 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 19905541367b50886e3d130aac4132cbb5e9050bff1409f924b23848ea5b3a32
4
+ data.tar.gz: d9d6a9531009778797f2826cf51dfc2b68b27651608be5c91ed7f343d75ff75f
5
+ SHA512:
6
+ metadata.gz: 247a535a7fd1203b2331b2c6c99fa9a9e0747c3fc5ff1f5476c034d1c72ae5bd16bbf2866cf639cadd321989dd653a461eda2b0db662db303c381b2de8b18285
7
+ data.tar.gz: a9c259539a46c33dc5c7de38edc9b5d330727064509dbc6cc17cdfa3baf1ae950fa3e8c645e45e9e04fa3afc213f50f3e1cfe8c44470bf8024e93f3621e32b87
data/README.adoc ADDED
@@ -0,0 +1,65 @@
1
+ = Interscript: Interoperable Script Conversion Systems and a Ruby implementation
2
+
3
+ == Introducation
4
+
5
+ This repository contains a number of transliteration schemes from:
6
+
7
+ * BGN/PCGN
8
+ * ICAO
9
+ * ISO
10
+ * UN (by UNGEGN)
11
+
12
+ The goal is to achieve interoperable transliteration schemes allowing quality comparisons.
13
+
14
+
15
+ == STATUS (work in progress!)
16
+
17
+ These transliteration systems currently work:
18
+
19
+ `bgnpcgn-rus-Cyrl-Latn-1947`:: BGN/PCGN Romanization of Russian
20
+ `iso-rus-Cyrl-Latn-iso9`:: ISO 9 Romanization of Russian
21
+ `icao-rus-Cyrl-Latn-9303`:: ICAO MRZ Romanization of Russian
22
+ `bas-rus-Cyrl-Latn-bss`:: Bulgaria Academy of Science Streamlined System for Russian
23
+
24
+
25
+ == Usage
26
+
27
+
28
+ [source,sh]
29
+ ----
30
+ # Transliterating Russian Cyrillic to Latin using the Streamlined System for Russian
31
+ interscript samples/rus-Cyrl.txt --system=bas-rus-Cyrl-Latn-bss --output=rus-Latn.txt
32
+
33
+ # Transliterating Russian Cyrillic to Latin using the BGN/PCGN Romanization of Russian
34
+ interscript samples/rus-Cyrl.txt --system=bgnpcgn-rus-Cyrl-Latn-1947 --output=rus-Latn.txt
35
+ ----
36
+
37
+
38
+ == ISCS system codes
39
+
40
+ The system code identifying a script conversion system has a few components:
41
+
42
+ e.g. `bgnpcgn-rus-Cyrl-Latn-1947`
43
+
44
+ `bgnpcgn`:: the authority identifier
45
+ `rus`:: an ISO 639-2 3-letter language code that this system applies to
46
+ `Cyrl`:: an ISO 15924 script code, identifying the source script
47
+ `Latn`:: an ISO 15924 script code, identifying the target script
48
+ `1947`:: an identifier unit within the authority to identify this system
49
+
50
+
51
+ == Covered languages
52
+
53
+ Currently the schemes cover Cyrillic, Armenian, Greek, Arabic and Hebrew.
54
+
55
+
56
+ == Sources
57
+
58
+ * `rus-Cyrl-1.txt`: Copied from the XLS output from http://www.primorsk.vybory.izbirkom.ru/region/primorsk?action=show&global=true&root=254017025&tvd=4254017212287&vrn=100100067795849&prver=0&pronetvd=0&region=25&sub_region=25&type=242&vibid=4254017212287
59
+
60
+ * `rus-Cyrl-2.txt`: Copied from the XLS output from http://www.yaroslavl.vybory.izbirkom.ru/region/yaroslavl?action=show&root=764013001&tvd=4764013188704&vrn=4764013188693&prver=0&pronetvd=0&region=76&sub_region=76&type=426&vibid=4764013188704
61
+
62
+
63
+ == Credits
64
+
65
+ This is a Ribose project.
data/bin/interscript ADDED
@@ -0,0 +1,22 @@
1
+ #!/usr/bin/env ruby
2
+ require 'rubygems'
3
+ require_relative '../lib/interscript'
4
+
5
+ if ARGV.empty?
6
+ puts "write source file, source format, and output file"
7
+ else
8
+ args = Hash[ ARGV.flat_map{|s| s.scan(/--?([^=\s]+)(?:=(\S+))?/) } ]
9
+ input = ARGV[0]
10
+ system_code = args["system"]
11
+ output_file = args["output"]
12
+
13
+ raise "Please enter the system code with --system={system_code}" unless system_code
14
+
15
+ if output_file
16
+ Interscript.instance.transliterate_file(system_code, input, output_file)
17
+ else
18
+ puts Interscript.instance.transliterate(system_code, IO.read(input))
19
+ end
20
+ end
21
+
22
+
@@ -0,0 +1,3 @@
1
+ module Interscript
2
+ VERSION = "0.1.0"
3
+ end
@@ -0,0 +1,53 @@
1
+ require 'yaml'
2
+ require 'singleton'
3
+
4
+ class Interscript
5
+ include Singleton
6
+
7
+ SYSTEM_DEFINITIONS_PATH = File.expand_path('../../maps', __FILE__)
8
+
9
+ def initialize
10
+ @systems = {}
11
+ end
12
+
13
+ def transliterate_file(system_code, input_file, output_file)
14
+ input = File.read(input_file)
15
+ output = transliterate(system_code, input)
16
+
17
+ File.open(output_file, "w") do |f|
18
+ f.puts(output)
19
+ end
20
+ puts "Output written to: #{output_file}"
21
+ end
22
+
23
+ def load_system_definition(system_code)
24
+ @systems[system_code] ||= YAML.load_file(File.join(SYSTEM_DEFINITIONS_PATH, "#{system_code}.yaml"))
25
+ end
26
+
27
+ def get_system(system_code)
28
+ @systems[system_code]
29
+ end
30
+
31
+ def system_char_map(system_code)
32
+ get_system(system_code)["map"]["characters"]
33
+ end
34
+
35
+ def system_rules(system_code)
36
+ get_system(system_code)["map"]["rules"]
37
+ end
38
+
39
+ def transliterate(system_code, string)
40
+ load_system_definition(system_code)
41
+
42
+ # TODO: also need to support regular expressions via system_rules(system_code), before system_char_map
43
+
44
+ character_map = system_char_map(system_code)
45
+
46
+ string.split('').map do |char|
47
+ converted_char = character_map[char] ? character_map[char] : char
48
+ string[char] = converted_char
49
+ end.join('')
50
+ end
51
+
52
+ end
53
+
metadata ADDED
@@ -0,0 +1,89 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: interscript
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - project_contibutors
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2019-11-17 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.7'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.7'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ description: Interoperable script conversion systems
56
+ email:
57
+ executables:
58
+ - interscript
59
+ extensions: []
60
+ extra_rdoc_files: []
61
+ files:
62
+ - README.adoc
63
+ - bin/interscript
64
+ - lib/interscript.rb
65
+ - lib/interscript/version.rb
66
+ homepage: ''
67
+ licenses:
68
+ - MIT
69
+ metadata: {}
70
+ post_install_message:
71
+ rdoc_options: []
72
+ require_paths:
73
+ - lib
74
+ required_ruby_version: !ruby/object:Gem::Requirement
75
+ requirements:
76
+ - - ">="
77
+ - !ruby/object:Gem::Version
78
+ version: '0'
79
+ required_rubygems_version: !ruby/object:Gem::Requirement
80
+ requirements:
81
+ - - ">="
82
+ - !ruby/object:Gem::Version
83
+ version: '0'
84
+ requirements: []
85
+ rubygems_version: 3.0.3
86
+ signing_key:
87
+ specification_version: 4
88
+ summary: Interoperable script conversion systems
89
+ test_files: []