interscript 0.1.0 → 0.1.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (145) hide show
  1. checksums.yaml +4 -4
  2. data/README.adoc +250 -17
  3. data/bin/interscript +36 -17
  4. data/bin/rspec +29 -0
  5. data/bin/setup +8 -0
  6. data/lib/__pycache__/g2pwrapper.cpython-38.pyc +0 -0
  7. data/lib/g2pwrapper.py +34 -0
  8. data/lib/interscript-opal.rb +2 -0
  9. data/lib/interscript.rb +138 -38
  10. data/lib/interscript/command.rb +28 -0
  11. data/lib/interscript/fs.rb +69 -0
  12. data/lib/interscript/mapping.rb +142 -0
  13. data/lib/interscript/opal.rb +23 -0
  14. data/lib/interscript/opal/maps.js.erb +7 -0
  15. data/lib/interscript/opal_map_translate.rb +12 -0
  16. data/lib/interscript/version.rb +1 -1
  17. data/lib/model-7 +0 -0
  18. data/lib/tha-pt-b-7 +0 -0
  19. data/maps/acadsin-zho-Hani-Latn-2002.yaml +38912 -0
  20. data/maps/alalc-aze-Cyrl-Latn-1997.yaml +141 -0
  21. data/maps/alalc-bel-cyrl-latn-1997.yaml +125 -0
  22. data/maps/alalc-ben-Beng-Latn-2017.yaml +130 -0
  23. data/maps/alalc-bul-Cyrl-Latn-1997.yaml +94 -0
  24. data/maps/alalc-ell-Grek-Latn-1997.yaml +625 -0
  25. data/maps/alalc-ell-Grek-Latn-2010.yaml +628 -0
  26. data/maps/alalc-kat-Geok-Latn-1997.yaml +112 -0
  27. data/maps/alalc-kat-Geor-Latn-1997.yaml +146 -0
  28. data/maps/alalc-kor-Hang-Latn-1997.yaml +94 -0
  29. data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +103 -0
  30. data/maps/alalc-mkd-cyrl-latn-1997.yaml +114 -0
  31. data/maps/alalc-rus-Cyrl-Latn-1997.yaml +222 -0
  32. data/maps/alalc-rus-Cyrl-Latn-2012.yaml +162 -0
  33. data/maps/alalc-srp-Cyrl-Latn-1997.yaml +114 -0
  34. data/maps/alalc-srp-cyrl-latn-2013.yaml +135 -0
  35. data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +141 -0
  36. data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +16 -0
  37. data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +283 -0
  38. data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +175 -0
  39. data/maps/bas-rus-Cyrl-Latn-2017-oss.yaml +169 -0
  40. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +294 -0
  41. data/maps/bgn-kor-Hang-Latn-1943.yaml +31 -0
  42. data/maps/bgn-kor-Kore-Latn-1943.yaml +31 -0
  43. data/maps/bgna-bul-Cyrl-Latn-2006.yaml +208 -0
  44. data/maps/bgna-bul-Cyrl-Latn-2009.yaml +208 -0
  45. data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +108 -0
  46. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +104 -0
  47. data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +184 -0
  48. data/maps/bgnpcgn-bel-cyrl-latn-1979.yaml +285 -0
  49. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +115 -0
  50. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +38 -0
  51. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +702 -0
  52. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +20 -0
  53. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +257 -0
  54. data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +127 -0
  55. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +43 -0
  56. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +253 -0
  57. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +48 -0
  58. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +48 -0
  59. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +159 -0
  60. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +190 -0
  61. data/maps/bgnpcgn-per-Arab-Latn-1956.yaml +93 -0
  62. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +314 -0
  63. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +166 -0
  64. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +163 -0
  65. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +208 -0
  66. data/maps/bgnpcgn-zho-Hans-Latn-1979.yaml +7456 -0
  67. data/maps/by-bel-Cyrl-Latn-1998.yaml +168 -0
  68. data/maps/by-bel-Cyrl-Latn-2007.yaml +115 -0
  69. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +685 -0
  70. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +681 -0
  71. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +20 -0
  72. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +32 -0
  73. data/maps/ggg-kat-Geor-Latn-2002.yaml +89 -0
  74. data/maps/gki-bel-cyrl-latn-1992.yaml +33 -0
  75. data/maps/gki-bel-cyrl-latn-2000.yaml +201 -0
  76. data/maps/gost-rus-cyrl-latn-16876-71-1983.yaml +186 -0
  77. data/maps/hk-yue-Hani-Latn-1888.yaml +38497 -0
  78. data/maps/icao-bel-Cyrl-Latn-9303.yaml +141 -0
  79. data/maps/icao-bul-Cyrl-Latn-9303.yaml +122 -0
  80. data/maps/icao-heb-Hebr-Latn-9303.yaml +151 -0
  81. data/maps/icao-mkd-Cyrl-Latn-9303.yaml +117 -0
  82. data/maps/icao-per-Arab-Latn-9303.yaml +104 -0
  83. data/maps/icao-rus-Cyrl-Latn-9303.yaml +118 -0
  84. data/maps/icao-srp-Cyrl-Latn-9303.yaml +117 -0
  85. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +120 -0
  86. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +610 -0
  87. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +41 -0
  88. data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +62 -0
  89. data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +272 -0
  90. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +109 -0
  91. data/maps/kp-kor-Hang-Latn-2002.yaml +901 -0
  92. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +44820 -0
  93. data/maps/mext-jpn-Hrkt-Latn-1954.yaml +411 -0
  94. data/maps/moct-kor-Hang-Latn-2000.yaml +803 -0
  95. data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +541 -0
  96. data/maps/mvd-bel-Cyrl-Latn-2008.yaml +225 -0
  97. data/maps/mvd-bel-Cyrl-Latn-2010.yaml +63 -0
  98. data/maps/mvd-rus-Cyrl-Latn-2008.yaml +110 -0
  99. data/maps/mvd-rus-Cyrl-Latn-2010.yaml +37 -0
  100. data/maps/nil-kor-Hang-Hang-jamo.yaml +11193 -0
  101. data/maps/odni-aze-Cyrl-Latn-2015.yaml +144 -0
  102. data/maps/odni-bel-Cyrl-Latn-2015.yaml +148 -0
  103. data/maps/odni-bul-Cyrl-Latn-2015.yaml +96 -0
  104. data/maps/odni-kat-Geor-Latn-2015.yaml +88 -0
  105. data/maps/odni-kaz-Cyrl-Latn-2015.yaml +148 -0
  106. data/maps/odni-kir-Cyrl-Latn-2015.yaml +136 -0
  107. data/maps/odni-mkd-cyrl-latn-2015.yaml +122 -0
  108. data/maps/odni-rus-Cyrl-Latn-2015.yaml +77 -0
  109. data/maps/odni-srp-Cyrl-Latn-2015.yaml +129 -0
  110. data/maps/odni-tat-Cyrl-Latn-2015.yaml +142 -0
  111. data/maps/odni-tgk-Cyrl-Latn-2015.yaml +148 -0
  112. data/maps/odni-uig-Cyrl-Latn-2015.yaml +138 -0
  113. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +157 -0
  114. data/maps/odni-uzb-Cyrl-Latn-2015.yaml +167 -0
  115. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +90 -0
  116. data/maps/royin-tha-Thai-Latn-1968.yaml +179 -0
  117. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +180 -0
  118. data/maps/royin-tha-Thai-Latn-1999.yaml +76 -0
  119. data/maps/sac-zho-Hans-Latn-1979.yaml +24759 -0
  120. data/maps/ses-ara-arab-latn-1930.yaml +275 -0
  121. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +222 -0
  122. data/maps/ua-ukr-Cyrl-Latn-1996.yaml +193 -0
  123. data/maps/un-ara-Arab-Latn-1971.yaml +127 -0
  124. data/maps/un-ara-Arab-Latn-1972.yaml +152 -0
  125. data/maps/un-ara-Arab-Latn-2017.yaml +383 -0
  126. data/maps/un-bel-Cyrl-Latn-2007.yaml +114 -0
  127. data/maps/un-ben-Beng-Latn-2016.yaml +534 -0
  128. data/maps/un-ell-Grek-Latn-1987-tl.yaml +32 -0
  129. data/maps/un-ell-Grek-Latn-1987-ts.yaml +20 -0
  130. data/maps/un-ell-Grek-Latn-phonetic-1987.yaml +780 -0
  131. data/maps/un-mon-Mong-Latn-2013.yaml +93 -0
  132. data/maps/un-rus-Cyrl-Latn-1987.yaml +166 -0
  133. data/maps/un-ukr-cyrl-latn-1998.yaml +30 -0
  134. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +406 -0
  135. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +386 -0
  136. data/maps/var-kor-Hang-Latn-mr-1939.yaml +1054 -0
  137. data/maps/var-kor-Kore-Hang-2013.yaml +59754 -0
  138. data/maps/var-kor-Kore-Latn-mr-1939.yaml +37 -0
  139. data/maps/var-tha-Thai-Thai-phonemic.yaml +59 -0
  140. data/maps/var-tha-Thai-Zsym-ipa.yaml +301 -0
  141. data/maps/var-zho-Hani-Latn-1979.yaml +38908 -0
  142. data/spec/interscript/mapping_spec.rb +42 -0
  143. data/spec/interscript_spec.rb +26 -0
  144. data/spec/spec_helper.rb +3 -0
  145. metadata +295 -11
@@ -0,0 +1,2 @@
1
+ require "opal"
2
+ require "interscript"
@@ -1,53 +1,153 @@
1
- require 'yaml'
2
- require 'singleton'
1
+ # frozen_string_literal: true
3
2
 
4
- class Interscript
5
- include Singleton
6
-
7
- SYSTEM_DEFINITIONS_PATH = File.expand_path('../../maps', __FILE__)
3
+ require "interscript/opal/maps" if RUBY_ENGINE == "opal"
4
+ require "interscript/mapping"
8
5
 
9
- def initialize
10
- @systems = {}
11
- end
6
+ # Transliteration
7
+ module Interscript
12
8
 
13
- def transliterate_file(system_code, input_file, output_file)
14
- input = File.read(input_file)
15
- output = transliterate(system_code, input)
9
+ class InvalidSystemError < StandardError; end
10
+ class ExternalProcessNotRecognizedError < StandardError; end
11
+ class ExternalProcessUnavailableError < StandardError; end
16
12
 
17
- File.open(output_file, "w") do |f|
18
- f.puts(output)
19
- end
20
- puts "Output written to: #{output_file}"
13
+ if RUBY_ENGINE == 'opal'
14
+ require "interscript/opal"
15
+ extend Opal
16
+ else
17
+ require "interscript/fs"
18
+ extend Fs
21
19
  end
22
20
 
23
- def load_system_definition(system_code)
24
- @systems[system_code] ||= YAML.load_file(File.join(SYSTEM_DEFINITIONS_PATH, "#{system_code}.yaml"))
25
- end
21
+ class << self
26
22
 
27
- def get_system(system_code)
28
- @systems[system_code]
29
- end
23
+ def transliterate(system_code, string, maps={})
24
+ unless maps.has_key? system_code
25
+ maps[system_code] = Interscript::Mapping.for(system_code)
26
+ end
27
+ # mapping = Interscript::Mapping.for(system_code)
28
+ mapping = maps[system_code]
30
29
 
31
- def system_char_map(system_code)
32
- get_system(system_code)["map"]["characters"]
33
- end
30
+ # First, apply chained transliteration as specified in the list `chain`
31
+ chain = mapping.chain.dup
32
+ while chain.length > 0
33
+ string = transliterate(chain.shift, string, maps)
34
+ end
34
35
 
35
- def system_rules(system_code)
36
- get_system(system_code)["map"]["rules"]
37
- end
36
+ # Then, apply the rest of the map
37
+ separator = mapping.character_separator || ""
38
+ word_separator = mapping.word_separator || ""
39
+ title_case = mapping.title_case
40
+ downcase = mapping.downcase
38
41
 
39
- def transliterate(system_code, string)
40
- load_system_definition(system_code)
42
+ # charmap = mapping.characters&.sort_by { |k, _v| k.size }&.reverse&.to_h
43
+ # dictmap = mapping.dictionary&.sort_by { |k, _v| k.size }&.reverse&.to_h
44
+ charmap = mapping.characters_hash
45
+ dictmap = mapping.dictionary_hash
46
+ trie = mapping.dictionary_trie
41
47
 
42
- # TODO: also need to support regular expressions via system_rules(system_code), before system_char_map
48
+ string = external_processing(mapping, string)
43
49
 
44
- character_map = system_char_map(system_code)
50
+ pos = 0
51
+ while pos < string.to_s.size
52
+ m = 0
53
+ wordmatch = ""
45
54
 
46
- string.split('').map do |char|
47
- converted_char = character_map[char] ? character_map[char] : char
48
- string[char] = converted_char
49
- end.join('')
50
- end
55
+ # Using Trie, find the longest matching substring
56
+ while (pos + m < string.to_s.size) && (trie.partial_word?string[pos..pos+m])
57
+ wordmatch = string[pos..pos+m] if trie.word?string[pos..pos+m]
58
+ m += 1
59
+ end
51
60
 
52
- end
61
+ m = wordmatch.length
62
+ if m > 0
63
+ repl = dictmap[string[pos..pos+m-1]]
64
+ string = sub_replace(string, pos, m, repl)
65
+ pos += repl.length
66
+ else
67
+ pos += 1
68
+ end
69
+ end
70
+
71
+ output = string.clone
72
+ offsets = Array.new string.to_s.size, 1
73
+
74
+ # mapping.rules.each do |r|
75
+ # string.to_s.scan(/#{r['pattern']}/) do |matches|
76
+ # match = Regexp.last_match
77
+ # pos = match.offset(0).first
78
+ # result = r['result'].clone
79
+ # matches.each.with_index { |v, i| result.sub!(/\\#{i + 1}/, v) } if matches.is_a? Array
80
+ # result.upcase! if up_case_around?(string, pos)
81
+ # output[offsets[0...pos].sum, match[0].size] = result
82
+ # offsets[pos] += result.size - match[0].size
83
+ # end
84
+ # end
85
+
86
+ mapping.rules.each do |r|
87
+ next unless output
88
+ re = mkregexp(r["pattern"])
89
+ output = output.gsub(re, r["result"])
90
+ end
91
+
92
+ charmap.each do |k, v|
93
+ while (match = output&.match(/#{k}/))
94
+ pos = match.offset(0).first
95
+ result = !downcase && up_case_around?(output, pos) ? v.upcase : v
96
+
97
+ # if more than one, choose the first one
98
+ result = result[0] if result.is_a?(Array)
99
+
100
+ output = sub_replace(
101
+ output,
102
+ pos,
103
+ match[0].size,
104
+ add_separator(separator, pos, result)
105
+ )
106
+ end
107
+ end
108
+
109
+ mapping.postrules.each do |r|
110
+ next unless output
111
+ re = mkregexp(r["pattern"])
112
+ output = output.gsub(re, r["result"])
113
+ end
53
114
 
115
+ return unless output
116
+
117
+ output = output.sub(/^(.)/, &:upcase) if title_case
118
+ if word_separator != ''
119
+ output = output.gsub(/#{word_separator}#{separator}/u, word_separator)
120
+
121
+ if title_case
122
+ output = output.gsub(/#{word_separator}(.)/u, &:upcase)
123
+ end
124
+ end
125
+
126
+ output.unicode_normalize
127
+ end
128
+
129
+ private
130
+
131
+ def add_separator(separator, pos, result)
132
+ pos == 0 ? result : separator + result
133
+ end
134
+
135
+ def up_case_around?(string, pos)
136
+ return false if string[pos] == string[pos].downcase
137
+
138
+ i = pos - 1
139
+ i -= 1 while i.positive? && string[i] !~ Regexp.new(ALPHA_REGEXP)
140
+ before = i >= 0 && i < pos ? string[i].to_s.strip : ''
141
+
142
+ i = pos + 1
143
+ i += 1 while i < string.size - 1 && string[i] !~ Regexp.new(ALPHA_REGEXP)
144
+ after = i > pos ? string[i].to_s.strip : ''
145
+
146
+ before_uc = !before.empty? && before == before.upcase
147
+ after_uc = !after.empty? && after == after.upcase
148
+ # before_uc && (after.empty? || after_uc) || after_uc && (before.empty? || before_uc)
149
+ before_uc || after_uc
150
+ end
151
+
152
+ end
153
+ end
@@ -0,0 +1,28 @@
1
+ require 'thor'
2
+ require 'interscript'
3
+ require 'json'
4
+ module Interscript
5
+ # Command line interface
6
+ class Command < Thor
7
+ desc '<file>', 'Transliterate text'
8
+ option :system, aliases: '-s', required: true, desc: 'Transliteration system'
9
+ option :output, aliases: '-o', required: false, desc: 'Output file'
10
+ option :map, aliases: '-m', required: false, default: "{}", desc: 'Transliteration mapping json'
11
+
12
+ def translit(input)
13
+ if options[:output]
14
+ Interscript.transliterate_file(options[:system], input, options[:output], JSON.parse(options[:map]))
15
+ else
16
+ puts Interscript.transliterate(options[:system], IO.read(input))
17
+ end
18
+ end
19
+
20
+ desc 'list', 'Prints allowed transliteration systems'
21
+ def list
22
+ dir = File.expand_path '../../maps/*.yaml', __dir__
23
+ Dir[dir].each do |path|
24
+ puts File.basename path, '.yaml'
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,69 @@
1
+ module Interscript
2
+ module Fs
3
+ ALPHA_REGEXP = '[[:alpha:]]'
4
+
5
+ def sub_replace(string, pos, size, repl)
6
+ string[pos..pos + size - 1] = repl
7
+ string
8
+ end
9
+
10
+ def root_path
11
+ @root_path ||= Pathname.new(File.join(File.dirname(__dir__), ".."))
12
+ end
13
+
14
+ def transliterate_file(system_code, input_file, output_file, maps={})
15
+ input = File.read(input_file)
16
+ output = transliterate(system_code, input, maps)
17
+
18
+ File.open(output_file, 'w') do |f|
19
+ f.puts(output)
20
+ end
21
+
22
+ puts "Output written to: #{output_file}"
23
+ output_file
24
+ end
25
+
26
+ def import_python_modules
27
+ begin
28
+ pyimport :g2pwrapper
29
+ rescue
30
+ pyimport :sys
31
+ sys.path.append(root_path.to_s + "/lib/")
32
+ pyimport :g2pwrapper
33
+ end
34
+ end
35
+
36
+ def external_process(process_name, string)
37
+ import_python_modules
38
+
39
+ case process_name
40
+ when 'sequitur.pythainlp_lexicon'
41
+ return g2pwrapper.transliterate('pythainlp_lexicon', string)
42
+ when 'sequitur.wiktionary_phonemic'
43
+ return g2pwrapper.transliterate('wiktionary_phonemic', string)
44
+ else
45
+ raise ExternalProcessNotRecognizedError.new
46
+ end
47
+
48
+ rescue
49
+ raise ExternalProcessUnavailableError.new
50
+ end
51
+
52
+ def external_processing(mapping, string)
53
+ # Segmentation
54
+ string = external_process(mapping.segmentation, string) if mapping.segmentation
55
+
56
+ # Transliteration/Transcription
57
+ string = external_process(mapping.transcription, string) if mapping.transcription
58
+
59
+ string
60
+ end
61
+
62
+ private
63
+
64
+ def mkregexp(regexpstring)
65
+ /#{regexpstring}/u
66
+ end
67
+
68
+ end
69
+ end
@@ -0,0 +1,142 @@
1
+ require 'rambling-trie'
2
+ require 'yaml'
3
+ require 'json'
4
+
5
+ module Interscript
6
+
7
+ class Mapping
8
+ attr_reader(
9
+ :id,
10
+ :url,
11
+ :name,
12
+ :notes,
13
+ :rules,
14
+ :tests,
15
+ :language,
16
+ :postrules,
17
+ :characters,
18
+ :description,
19
+ :authority_id,
20
+ :creation_date,
21
+ :source_script,
22
+ :destination_script,
23
+ :chain,
24
+ :character_separator,
25
+ :word_separator,
26
+ :title_case,
27
+ :downcase,
28
+ :dictionary,
29
+ :characters_hash,
30
+ :dictionary_hash,
31
+ :segmentation,
32
+ :transcription,
33
+ :dictionary_trie
34
+ )
35
+
36
+ def initialize(system_code, options = {})
37
+ @system_code = system_code
38
+ @depth = options.fetch(:depth, 0).to_i
39
+
40
+ unless RUBY_ENGINE == 'opal'
41
+ @system_path = options.fetch(:system_code, default_path)
42
+ end
43
+
44
+ load_and_serialize_system_mappings
45
+ end
46
+
47
+ def self.for(system_code, options = {})
48
+ new(system_code, options)
49
+ end
50
+
51
+ def load_and_serialize_system_mappings
52
+ return if depth >= 5
53
+
54
+ mappings = load_system_mappings
55
+ serialize_system_mappings(mappings)
56
+ end
57
+
58
+ private
59
+
60
+ attr_reader :depth, :system_code, :system_path
61
+
62
+ def system_code_file
63
+ [system_code, "yaml"].join(".")
64
+ end
65
+
66
+ def default_path
67
+ @default_path ||= Interscript.root_path.join("maps")
68
+ end
69
+
70
+ def load_system_mappings
71
+ if RUBY_ENGINE == 'opal'
72
+ load_opal_mappings
73
+ else
74
+ load_fs_mappings
75
+ end
76
+ end
77
+
78
+ def load_opal_mappings
79
+ JSON.parse(`InterscriptMaps[#{system_code}]`)
80
+ end
81
+
82
+ def load_fs_mappings
83
+ YAML.load_file(system_path.join(system_code_file))
84
+ rescue Errno::ENOENT
85
+ raise Interscript::InvalidSystemError.new("No system mappings found")
86
+ end
87
+
88
+ def serialize_system_mappings(mappings)
89
+ @id = mappings.fetch("id", nil)
90
+ @url = mappings.fetch("url", nil)
91
+ @name = mappings.fetch("name", nil)
92
+ @notes = mappings.fetch("notes", nil)
93
+ @tests = mappings.fetch("tests", [])
94
+ @language = mappings.fetch("language", nil)
95
+ @description = mappings.fetch("description", nil)
96
+ @authority_id = mappings.fetch("authority_id", nil)
97
+ @creation_date = mappings.fetch("creation_date", nil)
98
+ @source_script = mappings.fetch("source_script", nil)
99
+ @destination_script = mappings.fetch("destination_script", nil)
100
+ @chain = mappings.fetch("chain", [])
101
+ @character_separator = mappings["map"]["character_separator"] || nil
102
+ @word_separator = mappings["map"]["word_separator"] || nil
103
+ @title_case = mappings["map"]["title_case"] || false
104
+ @downcase = mappings["map"]["downcase"] || false
105
+ @rules = mappings["map"]["rules"] || []
106
+ @postrules = mappings["map"]["postrules"] || []
107
+ @characters = mappings["map"]["characters"] || {}
108
+ @dictionary = mappings["map"]["dictionary"] || {}
109
+ @segmentation = mappings["map"]["segementation"] || nil
110
+ @transcription = mappings["map"]["transcription"] || nil
111
+
112
+ include_inherited_mappings(mappings)
113
+ build_hashes
114
+ build_trie
115
+ end
116
+
117
+ def include_inherited_mappings(mappings)
118
+ inherit_systems = [].push(mappings["map"]["inherit"]).flatten
119
+
120
+ inherit_systems.each do |inherit_system|
121
+ next unless inherit_system
122
+
123
+ inherited_mapping = Mapping.for(inherit_system, depth: depth + 1)
124
+
125
+ @rules = [inherited_mapping.rules, rules].flatten
126
+ @postrules = [inherited_mapping.postrules, postrules].flatten
127
+ @characters = (inherited_mapping.characters|| {}).merge(characters)
128
+ @dictionary = (inherited_mapping.dictionary|| {}).merge(dictionary)
129
+ end
130
+ end
131
+
132
+ def build_hashes
133
+ @characters_hash = characters&.sort_by { |k, _v| k.size }&.reverse&.to_h
134
+ @dictionary_hash = dictionary&.sort_by { |k, _v| k.size }&.reverse&.to_h
135
+ end
136
+
137
+ def build_trie
138
+ @dictionary_trie = Rambling::Trie.create
139
+ dictionary_trie.concat dictionary.keys
140
+ end
141
+ end
142
+ end
@@ -0,0 +1,23 @@
1
+ module Interscript
2
+ module Opal
3
+ ALPHA_REGEXP = '\p{L}'
4
+
5
+ def mkregexp(regexpstring)
6
+ flags = 'u'
7
+ if regexpstring.include? "(?i)"
8
+ regexpstring = regexpstring.gsub("(?i)", "").gsub("(?-i)", "")
9
+ flags = 'ui'
10
+ end
11
+ Regexp.new("/#{regexpstring}/#{flags}")
12
+ end
13
+
14
+ def sub_replace(string, pos, size, repl)
15
+ string[0, pos] + repl + string[pos + size..-1]
16
+ end
17
+
18
+ def external_processing(mapping, string)
19
+ string
20
+ end
21
+
22
+ end
23
+ end