interscript 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 19905541367b50886e3d130aac4132cbb5e9050bff1409f924b23848ea5b3a32
4
+ data.tar.gz: d9d6a9531009778797f2826cf51dfc2b68b27651608be5c91ed7f343d75ff75f
5
+ SHA512:
6
+ metadata.gz: 247a535a7fd1203b2331b2c6c99fa9a9e0747c3fc5ff1f5476c034d1c72ae5bd16bbf2866cf639cadd321989dd653a461eda2b0db662db303c381b2de8b18285
7
+ data.tar.gz: a9c259539a46c33dc5c7de38edc9b5d330727064509dbc6cc17cdfa3baf1ae950fa3e8c645e45e9e04fa3afc213f50f3e1cfe8c44470bf8024e93f3621e32b87
data/README.adoc ADDED
@@ -0,0 +1,65 @@
1
+ = Interscript: Interoperable Script Conversion Systems and a Ruby implementation
2
+
3
+ == Introducation
4
+
5
+ This repository contains a number of transliteration schemes from:
6
+
7
+ * BGN/PCGN
8
+ * ICAO
9
+ * ISO
10
+ * UN (by UNGEGN)
11
+
12
+ The goal is to achieve interoperable transliteration schemes allowing quality comparisons.
13
+
14
+
15
+ == STATUS (work in progress!)
16
+
17
+ These transliteration systems currently work:
18
+
19
+ `bgnpcgn-rus-Cyrl-Latn-1947`:: BGN/PCGN Romanization of Russian
20
+ `iso-rus-Cyrl-Latn-iso9`:: ISO 9 Romanization of Russian
21
+ `icao-rus-Cyrl-Latn-9303`:: ICAO MRZ Romanization of Russian
22
+ `bas-rus-Cyrl-Latn-bss`:: Bulgaria Academy of Science Streamlined System for Russian
23
+
24
+
25
+ == Usage
26
+
27
+
28
+ [source,sh]
29
+ ----
30
+ # Transliterating Russian Cyrillic to Latin using the Streamlined System for Russian
31
+ interscript samples/rus-Cyrl.txt --system=bas-rus-Cyrl-Latn-bss --output=rus-Latn.txt
32
+
33
+ # Transliterating Russian Cyrillic to Latin using the BGN/PCGN Romanization of Russian
34
+ interscript samples/rus-Cyrl.txt --system=bgnpcgn-rus-Cyrl-Latn-1947 --output=rus-Latn.txt
35
+ ----
36
+
37
+
38
+ == ISCS system codes
39
+
40
+ The system code identifying a script conversion system has a few components:
41
+
42
+ e.g. `bgnpcgn-rus-Cyrl-Latn-1947`
43
+
44
+ `bgnpcgn`:: the authority identifier
45
+ `rus`:: an ISO 639-2 3-letter language code that this system applies to
46
+ `Cyrl`:: an ISO 15924 script code, identifying the source script
47
+ `Latn`:: an ISO 15924 script code, identifying the target script
48
+ `1947`:: an identifier unit within the authority to identify this system
49
+
50
+
51
+ == Covered languages
52
+
53
+ Currently the schemes cover Cyrillic, Armenian, Greek, Arabic and Hebrew.
54
+
55
+
56
+ == Sources
57
+
58
+ * `rus-Cyrl-1.txt`: Copied from the XLS output from http://www.primorsk.vybory.izbirkom.ru/region/primorsk?action=show&global=true&root=254017025&tvd=4254017212287&vrn=100100067795849&prver=0&pronetvd=0&region=25&sub_region=25&type=242&vibid=4254017212287
59
+
60
+ * `rus-Cyrl-2.txt`: Copied from the XLS output from http://www.yaroslavl.vybory.izbirkom.ru/region/yaroslavl?action=show&root=764013001&tvd=4764013188704&vrn=4764013188693&prver=0&pronetvd=0&region=76&sub_region=76&type=426&vibid=4764013188704
61
+
62
+
63
+ == Credits
64
+
65
+ This is a Ribose project.
data/bin/interscript ADDED
@@ -0,0 +1,22 @@
1
+ #!/usr/bin/env ruby
2
+ require 'rubygems'
3
+ require_relative '../lib/interscript'
4
+
5
+ if ARGV.empty?
6
+ puts "write source file, source format, and output file"
7
+ else
8
+ args = Hash[ ARGV.flat_map{|s| s.scan(/--?([^=\s]+)(?:=(\S+))?/) } ]
9
+ input = ARGV[0]
10
+ system_code = args["system"]
11
+ output_file = args["output"]
12
+
13
+ raise "Please enter the system code with --system={system_code}" unless system_code
14
+
15
+ if output_file
16
+ Interscript.instance.transliterate_file(system_code, input, output_file)
17
+ else
18
+ puts Interscript.instance.transliterate(system_code, IO.read(input))
19
+ end
20
+ end
21
+
22
+
@@ -0,0 +1,3 @@
1
+ module Interscript
2
+ VERSION = "0.1.0"
3
+ end
@@ -0,0 +1,53 @@
1
+ require 'yaml'
2
+ require 'singleton'
3
+
4
+ class Interscript
5
+ include Singleton
6
+
7
+ SYSTEM_DEFINITIONS_PATH = File.expand_path('../../maps', __FILE__)
8
+
9
+ def initialize
10
+ @systems = {}
11
+ end
12
+
13
+ def transliterate_file(system_code, input_file, output_file)
14
+ input = File.read(input_file)
15
+ output = transliterate(system_code, input)
16
+
17
+ File.open(output_file, "w") do |f|
18
+ f.puts(output)
19
+ end
20
+ puts "Output written to: #{output_file}"
21
+ end
22
+
23
+ def load_system_definition(system_code)
24
+ @systems[system_code] ||= YAML.load_file(File.join(SYSTEM_DEFINITIONS_PATH, "#{system_code}.yaml"))
25
+ end
26
+
27
+ def get_system(system_code)
28
+ @systems[system_code]
29
+ end
30
+
31
+ def system_char_map(system_code)
32
+ get_system(system_code)["map"]["characters"]
33
+ end
34
+
35
+ def system_rules(system_code)
36
+ get_system(system_code)["map"]["rules"]
37
+ end
38
+
39
+ def transliterate(system_code, string)
40
+ load_system_definition(system_code)
41
+
42
+ # TODO: also need to support regular expressions via system_rules(system_code), before system_char_map
43
+
44
+ character_map = system_char_map(system_code)
45
+
46
+ string.split('').map do |char|
47
+ converted_char = character_map[char] ? character_map[char] : char
48
+ string[char] = converted_char
49
+ end.join('')
50
+ end
51
+
52
+ end
53
+
metadata ADDED
@@ -0,0 +1,89 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: interscript
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - project_contibutors
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2019-11-17 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.7'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.7'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ description: Interoperable script conversion systems
56
+ email:
57
+ executables:
58
+ - interscript
59
+ extensions: []
60
+ extra_rdoc_files: []
61
+ files:
62
+ - README.adoc
63
+ - bin/interscript
64
+ - lib/interscript.rb
65
+ - lib/interscript/version.rb
66
+ homepage: ''
67
+ licenses:
68
+ - MIT
69
+ metadata: {}
70
+ post_install_message:
71
+ rdoc_options: []
72
+ require_paths:
73
+ - lib
74
+ required_ruby_version: !ruby/object:Gem::Requirement
75
+ requirements:
76
+ - - ">="
77
+ - !ruby/object:Gem::Version
78
+ version: '0'
79
+ required_rubygems_version: !ruby/object:Gem::Requirement
80
+ requirements:
81
+ - - ">="
82
+ - !ruby/object:Gem::Version
83
+ version: '0'
84
+ requirements: []
85
+ rubygems_version: 3.0.3
86
+ signing_key:
87
+ specification_version: 4
88
+ summary: Interoperable script conversion systems
89
+ test_files: []