interscript 0.1.0 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (145) hide show
  1. checksums.yaml +4 -4
  2. data/README.adoc +250 -17
  3. data/bin/interscript +36 -17
  4. data/bin/rspec +29 -0
  5. data/bin/setup +8 -0
  6. data/lib/__pycache__/g2pwrapper.cpython-38.pyc +0 -0
  7. data/lib/g2pwrapper.py +34 -0
  8. data/lib/interscript-opal.rb +2 -0
  9. data/lib/interscript.rb +138 -38
  10. data/lib/interscript/command.rb +28 -0
  11. data/lib/interscript/fs.rb +69 -0
  12. data/lib/interscript/mapping.rb +142 -0
  13. data/lib/interscript/opal.rb +23 -0
  14. data/lib/interscript/opal/maps.js.erb +7 -0
  15. data/lib/interscript/opal_map_translate.rb +12 -0
  16. data/lib/interscript/version.rb +1 -1
  17. data/lib/model-7 +0 -0
  18. data/lib/tha-pt-b-7 +0 -0
  19. data/maps/acadsin-zho-Hani-Latn-2002.yaml +38912 -0
  20. data/maps/alalc-aze-Cyrl-Latn-1997.yaml +141 -0
  21. data/maps/alalc-bel-cyrl-latn-1997.yaml +125 -0
  22. data/maps/alalc-ben-Beng-Latn-2017.yaml +130 -0
  23. data/maps/alalc-bul-Cyrl-Latn-1997.yaml +94 -0
  24. data/maps/alalc-ell-Grek-Latn-1997.yaml +625 -0
  25. data/maps/alalc-ell-Grek-Latn-2010.yaml +628 -0
  26. data/maps/alalc-kat-Geok-Latn-1997.yaml +112 -0
  27. data/maps/alalc-kat-Geor-Latn-1997.yaml +146 -0
  28. data/maps/alalc-kor-Hang-Latn-1997.yaml +94 -0
  29. data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +103 -0
  30. data/maps/alalc-mkd-cyrl-latn-1997.yaml +114 -0
  31. data/maps/alalc-rus-Cyrl-Latn-1997.yaml +222 -0
  32. data/maps/alalc-rus-Cyrl-Latn-2012.yaml +162 -0
  33. data/maps/alalc-srp-Cyrl-Latn-1997.yaml +114 -0
  34. data/maps/alalc-srp-cyrl-latn-2013.yaml +135 -0
  35. data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +141 -0
  36. data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +16 -0
  37. data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +283 -0
  38. data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +175 -0
  39. data/maps/bas-rus-Cyrl-Latn-2017-oss.yaml +169 -0
  40. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +294 -0
  41. data/maps/bgn-kor-Hang-Latn-1943.yaml +31 -0
  42. data/maps/bgn-kor-Kore-Latn-1943.yaml +31 -0
  43. data/maps/bgna-bul-Cyrl-Latn-2006.yaml +208 -0
  44. data/maps/bgna-bul-Cyrl-Latn-2009.yaml +208 -0
  45. data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +108 -0
  46. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +104 -0
  47. data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +184 -0
  48. data/maps/bgnpcgn-bel-cyrl-latn-1979.yaml +285 -0
  49. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +115 -0
  50. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +38 -0
  51. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +702 -0
  52. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +20 -0
  53. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +257 -0
  54. data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +127 -0
  55. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +43 -0
  56. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +253 -0
  57. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +48 -0
  58. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +48 -0
  59. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +159 -0
  60. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +190 -0
  61. data/maps/bgnpcgn-per-Arab-Latn-1956.yaml +93 -0
  62. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +314 -0
  63. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +166 -0
  64. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +163 -0
  65. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +208 -0
  66. data/maps/bgnpcgn-zho-Hans-Latn-1979.yaml +7456 -0
  67. data/maps/by-bel-Cyrl-Latn-1998.yaml +168 -0
  68. data/maps/by-bel-Cyrl-Latn-2007.yaml +115 -0
  69. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +685 -0
  70. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +681 -0
  71. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +20 -0
  72. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +32 -0
  73. data/maps/ggg-kat-Geor-Latn-2002.yaml +89 -0
  74. data/maps/gki-bel-cyrl-latn-1992.yaml +33 -0
  75. data/maps/gki-bel-cyrl-latn-2000.yaml +201 -0
  76. data/maps/gost-rus-cyrl-latn-16876-71-1983.yaml +186 -0
  77. data/maps/hk-yue-Hani-Latn-1888.yaml +38497 -0
  78. data/maps/icao-bel-Cyrl-Latn-9303.yaml +141 -0
  79. data/maps/icao-bul-Cyrl-Latn-9303.yaml +122 -0
  80. data/maps/icao-heb-Hebr-Latn-9303.yaml +151 -0
  81. data/maps/icao-mkd-Cyrl-Latn-9303.yaml +117 -0
  82. data/maps/icao-per-Arab-Latn-9303.yaml +104 -0
  83. data/maps/icao-rus-Cyrl-Latn-9303.yaml +118 -0
  84. data/maps/icao-srp-Cyrl-Latn-9303.yaml +117 -0
  85. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +120 -0
  86. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +610 -0
  87. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +41 -0
  88. data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +62 -0
  89. data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +272 -0
  90. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +109 -0
  91. data/maps/kp-kor-Hang-Latn-2002.yaml +901 -0
  92. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +44820 -0
  93. data/maps/mext-jpn-Hrkt-Latn-1954.yaml +411 -0
  94. data/maps/moct-kor-Hang-Latn-2000.yaml +803 -0
  95. data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +541 -0
  96. data/maps/mvd-bel-Cyrl-Latn-2008.yaml +225 -0
  97. data/maps/mvd-bel-Cyrl-Latn-2010.yaml +63 -0
  98. data/maps/mvd-rus-Cyrl-Latn-2008.yaml +110 -0
  99. data/maps/mvd-rus-Cyrl-Latn-2010.yaml +37 -0
  100. data/maps/nil-kor-Hang-Hang-jamo.yaml +11193 -0
  101. data/maps/odni-aze-Cyrl-Latn-2015.yaml +144 -0
  102. data/maps/odni-bel-Cyrl-Latn-2015.yaml +148 -0
  103. data/maps/odni-bul-Cyrl-Latn-2015.yaml +96 -0
  104. data/maps/odni-kat-Geor-Latn-2015.yaml +88 -0
  105. data/maps/odni-kaz-Cyrl-Latn-2015.yaml +148 -0
  106. data/maps/odni-kir-Cyrl-Latn-2015.yaml +136 -0
  107. data/maps/odni-mkd-cyrl-latn-2015.yaml +122 -0
  108. data/maps/odni-rus-Cyrl-Latn-2015.yaml +77 -0
  109. data/maps/odni-srp-Cyrl-Latn-2015.yaml +129 -0
  110. data/maps/odni-tat-Cyrl-Latn-2015.yaml +142 -0
  111. data/maps/odni-tgk-Cyrl-Latn-2015.yaml +148 -0
  112. data/maps/odni-uig-Cyrl-Latn-2015.yaml +138 -0
  113. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +157 -0
  114. data/maps/odni-uzb-Cyrl-Latn-2015.yaml +167 -0
  115. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +90 -0
  116. data/maps/royin-tha-Thai-Latn-1968.yaml +179 -0
  117. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +180 -0
  118. data/maps/royin-tha-Thai-Latn-1999.yaml +76 -0
  119. data/maps/sac-zho-Hans-Latn-1979.yaml +24759 -0
  120. data/maps/ses-ara-arab-latn-1930.yaml +275 -0
  121. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +222 -0
  122. data/maps/ua-ukr-Cyrl-Latn-1996.yaml +193 -0
  123. data/maps/un-ara-Arab-Latn-1971.yaml +127 -0
  124. data/maps/un-ara-Arab-Latn-1972.yaml +152 -0
  125. data/maps/un-ara-Arab-Latn-2017.yaml +383 -0
  126. data/maps/un-bel-Cyrl-Latn-2007.yaml +114 -0
  127. data/maps/un-ben-Beng-Latn-2016.yaml +534 -0
  128. data/maps/un-ell-Grek-Latn-1987-tl.yaml +32 -0
  129. data/maps/un-ell-Grek-Latn-1987-ts.yaml +20 -0
  130. data/maps/un-ell-Grek-Latn-phonetic-1987.yaml +780 -0
  131. data/maps/un-mon-Mong-Latn-2013.yaml +93 -0
  132. data/maps/un-rus-Cyrl-Latn-1987.yaml +166 -0
  133. data/maps/un-ukr-cyrl-latn-1998.yaml +30 -0
  134. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +406 -0
  135. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +386 -0
  136. data/maps/var-kor-Hang-Latn-mr-1939.yaml +1054 -0
  137. data/maps/var-kor-Kore-Hang-2013.yaml +59754 -0
  138. data/maps/var-kor-Kore-Latn-mr-1939.yaml +37 -0
  139. data/maps/var-tha-Thai-Thai-phonemic.yaml +59 -0
  140. data/maps/var-tha-Thai-Zsym-ipa.yaml +301 -0
  141. data/maps/var-zho-Hani-Latn-1979.yaml +38908 -0
  142. data/spec/interscript/mapping_spec.rb +42 -0
  143. data/spec/interscript_spec.rb +26 -0
  144. data/spec/spec_helper.rb +3 -0
  145. metadata +295 -11
@@ -0,0 +1,2 @@
1
+ require "opal"
2
+ require "interscript"
@@ -1,53 +1,153 @@
1
- require 'yaml'
2
- require 'singleton'
1
+ # frozen_string_literal: true
3
2
 
4
- class Interscript
5
- include Singleton
6
-
7
- SYSTEM_DEFINITIONS_PATH = File.expand_path('../../maps', __FILE__)
3
+ require "interscript/opal/maps" if RUBY_ENGINE == "opal"
4
+ require "interscript/mapping"
8
5
 
9
- def initialize
10
- @systems = {}
11
- end
6
+ # Transliteration
7
+ module Interscript
12
8
 
13
- def transliterate_file(system_code, input_file, output_file)
14
- input = File.read(input_file)
15
- output = transliterate(system_code, input)
9
+ class InvalidSystemError < StandardError; end
10
+ class ExternalProcessNotRecognizedError < StandardError; end
11
+ class ExternalProcessUnavailableError < StandardError; end
16
12
 
17
- File.open(output_file, "w") do |f|
18
- f.puts(output)
19
- end
20
- puts "Output written to: #{output_file}"
13
+ if RUBY_ENGINE == 'opal'
14
+ require "interscript/opal"
15
+ extend Opal
16
+ else
17
+ require "interscript/fs"
18
+ extend Fs
21
19
  end
22
20
 
23
- def load_system_definition(system_code)
24
- @systems[system_code] ||= YAML.load_file(File.join(SYSTEM_DEFINITIONS_PATH, "#{system_code}.yaml"))
25
- end
21
+ class << self
26
22
 
27
- def get_system(system_code)
28
- @systems[system_code]
29
- end
23
+ def transliterate(system_code, string, maps={})
24
+ unless maps.has_key? system_code
25
+ maps[system_code] = Interscript::Mapping.for(system_code)
26
+ end
27
+ # mapping = Interscript::Mapping.for(system_code)
28
+ mapping = maps[system_code]
30
29
 
31
- def system_char_map(system_code)
32
- get_system(system_code)["map"]["characters"]
33
- end
30
+ # First, apply chained transliteration as specified in the list `chain`
31
+ chain = mapping.chain.dup
32
+ while chain.length > 0
33
+ string = transliterate(chain.shift, string, maps)
34
+ end
34
35
 
35
- def system_rules(system_code)
36
- get_system(system_code)["map"]["rules"]
37
- end
36
+ # Then, apply the rest of the map
37
+ separator = mapping.character_separator || ""
38
+ word_separator = mapping.word_separator || ""
39
+ title_case = mapping.title_case
40
+ downcase = mapping.downcase
38
41
 
39
- def transliterate(system_code, string)
40
- load_system_definition(system_code)
42
+ # charmap = mapping.characters&.sort_by { |k, _v| k.size }&.reverse&.to_h
43
+ # dictmap = mapping.dictionary&.sort_by { |k, _v| k.size }&.reverse&.to_h
44
+ charmap = mapping.characters_hash
45
+ dictmap = mapping.dictionary_hash
46
+ trie = mapping.dictionary_trie
41
47
 
42
- # TODO: also need to support regular expressions via system_rules(system_code), before system_char_map
48
+ string = external_processing(mapping, string)
43
49
 
44
- character_map = system_char_map(system_code)
50
+ pos = 0
51
+ while pos < string.to_s.size
52
+ m = 0
53
+ wordmatch = ""
45
54
 
46
- string.split('').map do |char|
47
- converted_char = character_map[char] ? character_map[char] : char
48
- string[char] = converted_char
49
- end.join('')
50
- end
55
+ # Using Trie, find the longest matching substring
56
+ while (pos + m < string.to_s.size) && (trie.partial_word?string[pos..pos+m])
57
+ wordmatch = string[pos..pos+m] if trie.word?string[pos..pos+m]
58
+ m += 1
59
+ end
51
60
 
52
- end
61
+ m = wordmatch.length
62
+ if m > 0
63
+ repl = dictmap[string[pos..pos+m-1]]
64
+ string = sub_replace(string, pos, m, repl)
65
+ pos += repl.length
66
+ else
67
+ pos += 1
68
+ end
69
+ end
70
+
71
+ output = string.clone
72
+ offsets = Array.new string.to_s.size, 1
73
+
74
+ # mapping.rules.each do |r|
75
+ # string.to_s.scan(/#{r['pattern']}/) do |matches|
76
+ # match = Regexp.last_match
77
+ # pos = match.offset(0).first
78
+ # result = r['result'].clone
79
+ # matches.each.with_index { |v, i| result.sub!(/\\#{i + 1}/, v) } if matches.is_a? Array
80
+ # result.upcase! if up_case_around?(string, pos)
81
+ # output[offsets[0...pos].sum, match[0].size] = result
82
+ # offsets[pos] += result.size - match[0].size
83
+ # end
84
+ # end
85
+
86
+ mapping.rules.each do |r|
87
+ next unless output
88
+ re = mkregexp(r["pattern"])
89
+ output = output.gsub(re, r["result"])
90
+ end
91
+
92
+ charmap.each do |k, v|
93
+ while (match = output&.match(/#{k}/))
94
+ pos = match.offset(0).first
95
+ result = !downcase && up_case_around?(output, pos) ? v.upcase : v
96
+
97
+ # if more than one, choose the first one
98
+ result = result[0] if result.is_a?(Array)
99
+
100
+ output = sub_replace(
101
+ output,
102
+ pos,
103
+ match[0].size,
104
+ add_separator(separator, pos, result)
105
+ )
106
+ end
107
+ end
108
+
109
+ mapping.postrules.each do |r|
110
+ next unless output
111
+ re = mkregexp(r["pattern"])
112
+ output = output.gsub(re, r["result"])
113
+ end
53
114
 
115
+ return unless output
116
+
117
+ output = output.sub(/^(.)/, &:upcase) if title_case
118
+ if word_separator != ''
119
+ output = output.gsub(/#{word_separator}#{separator}/u, word_separator)
120
+
121
+ if title_case
122
+ output = output.gsub(/#{word_separator}(.)/u, &:upcase)
123
+ end
124
+ end
125
+
126
+ output.unicode_normalize
127
+ end
128
+
129
+ private
130
+
131
+ def add_separator(separator, pos, result)
132
+ pos == 0 ? result : separator + result
133
+ end
134
+
135
+ def up_case_around?(string, pos)
136
+ return false if string[pos] == string[pos].downcase
137
+
138
+ i = pos - 1
139
+ i -= 1 while i.positive? && string[i] !~ Regexp.new(ALPHA_REGEXP)
140
+ before = i >= 0 && i < pos ? string[i].to_s.strip : ''
141
+
142
+ i = pos + 1
143
+ i += 1 while i < string.size - 1 && string[i] !~ Regexp.new(ALPHA_REGEXP)
144
+ after = i > pos ? string[i].to_s.strip : ''
145
+
146
+ before_uc = !before.empty? && before == before.upcase
147
+ after_uc = !after.empty? && after == after.upcase
148
+ # before_uc && (after.empty? || after_uc) || after_uc && (before.empty? || before_uc)
149
+ before_uc || after_uc
150
+ end
151
+
152
+ end
153
+ end
@@ -0,0 +1,28 @@
1
+ require 'thor'
2
+ require 'interscript'
3
+ require 'json'
4
+ module Interscript
5
+ # Command line interface
6
+ class Command < Thor
7
+ desc '<file>', 'Transliterate text'
8
+ option :system, aliases: '-s', required: true, desc: 'Transliteration system'
9
+ option :output, aliases: '-o', required: false, desc: 'Output file'
10
+ option :map, aliases: '-m', required: false, default: "{}", desc: 'Transliteration mapping json'
11
+
12
+ def translit(input)
13
+ if options[:output]
14
+ Interscript.transliterate_file(options[:system], input, options[:output], JSON.parse(options[:map]))
15
+ else
16
+ puts Interscript.transliterate(options[:system], IO.read(input))
17
+ end
18
+ end
19
+
20
+ desc 'list', 'Prints allowed transliteration systems'
21
+ def list
22
+ dir = File.expand_path '../../maps/*.yaml', __dir__
23
+ Dir[dir].each do |path|
24
+ puts File.basename path, '.yaml'
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,69 @@
1
+ module Interscript
2
+ module Fs
3
+ ALPHA_REGEXP = '[[:alpha:]]'
4
+
5
+ def sub_replace(string, pos, size, repl)
6
+ string[pos..pos + size - 1] = repl
7
+ string
8
+ end
9
+
10
+ def root_path
11
+ @root_path ||= Pathname.new(File.join(File.dirname(__dir__), ".."))
12
+ end
13
+
14
+ def transliterate_file(system_code, input_file, output_file, maps={})
15
+ input = File.read(input_file)
16
+ output = transliterate(system_code, input, maps)
17
+
18
+ File.open(output_file, 'w') do |f|
19
+ f.puts(output)
20
+ end
21
+
22
+ puts "Output written to: #{output_file}"
23
+ output_file
24
+ end
25
+
26
+ def import_python_modules
27
+ begin
28
+ pyimport :g2pwrapper
29
+ rescue
30
+ pyimport :sys
31
+ sys.path.append(root_path.to_s + "/lib/")
32
+ pyimport :g2pwrapper
33
+ end
34
+ end
35
+
36
+ def external_process(process_name, string)
37
+ import_python_modules
38
+
39
+ case process_name
40
+ when 'sequitur.pythainlp_lexicon'
41
+ return g2pwrapper.transliterate('pythainlp_lexicon', string)
42
+ when 'sequitur.wiktionary_phonemic'
43
+ return g2pwrapper.transliterate('wiktionary_phonemic', string)
44
+ else
45
+ raise ExternalProcessNotRecognizedError.new
46
+ end
47
+
48
+ rescue
49
+ raise ExternalProcessUnavailableError.new
50
+ end
51
+
52
+ def external_processing(mapping, string)
53
+ # Segmentation
54
+ string = external_process(mapping.segmentation, string) if mapping.segmentation
55
+
56
+ # Transliteration/Transcription
57
+ string = external_process(mapping.transcription, string) if mapping.transcription
58
+
59
+ string
60
+ end
61
+
62
+ private
63
+
64
+ def mkregexp(regexpstring)
65
+ /#{regexpstring}/u
66
+ end
67
+
68
+ end
69
+ end
@@ -0,0 +1,142 @@
1
+ require 'rambling-trie'
2
+ require 'yaml'
3
+ require 'json'
4
+
5
+ module Interscript
6
+
7
+ class Mapping
8
+ attr_reader(
9
+ :id,
10
+ :url,
11
+ :name,
12
+ :notes,
13
+ :rules,
14
+ :tests,
15
+ :language,
16
+ :postrules,
17
+ :characters,
18
+ :description,
19
+ :authority_id,
20
+ :creation_date,
21
+ :source_script,
22
+ :destination_script,
23
+ :chain,
24
+ :character_separator,
25
+ :word_separator,
26
+ :title_case,
27
+ :downcase,
28
+ :dictionary,
29
+ :characters_hash,
30
+ :dictionary_hash,
31
+ :segmentation,
32
+ :transcription,
33
+ :dictionary_trie
34
+ )
35
+
36
+ def initialize(system_code, options = {})
37
+ @system_code = system_code
38
+ @depth = options.fetch(:depth, 0).to_i
39
+
40
+ unless RUBY_ENGINE == 'opal'
41
+ @system_path = options.fetch(:system_code, default_path)
42
+ end
43
+
44
+ load_and_serialize_system_mappings
45
+ end
46
+
47
+ def self.for(system_code, options = {})
48
+ new(system_code, options)
49
+ end
50
+
51
+ def load_and_serialize_system_mappings
52
+ return if depth >= 5
53
+
54
+ mappings = load_system_mappings
55
+ serialize_system_mappings(mappings)
56
+ end
57
+
58
+ private
59
+
60
+ attr_reader :depth, :system_code, :system_path
61
+
62
+ def system_code_file
63
+ [system_code, "yaml"].join(".")
64
+ end
65
+
66
+ def default_path
67
+ @default_path ||= Interscript.root_path.join("maps")
68
+ end
69
+
70
+ def load_system_mappings
71
+ if RUBY_ENGINE == 'opal'
72
+ load_opal_mappings
73
+ else
74
+ load_fs_mappings
75
+ end
76
+ end
77
+
78
+ def load_opal_mappings
79
+ JSON.parse(`InterscriptMaps[#{system_code}]`)
80
+ end
81
+
82
+ def load_fs_mappings
83
+ YAML.load_file(system_path.join(system_code_file))
84
+ rescue Errno::ENOENT
85
+ raise Interscript::InvalidSystemError.new("No system mappings found")
86
+ end
87
+
88
+ def serialize_system_mappings(mappings)
89
+ @id = mappings.fetch("id", nil)
90
+ @url = mappings.fetch("url", nil)
91
+ @name = mappings.fetch("name", nil)
92
+ @notes = mappings.fetch("notes", nil)
93
+ @tests = mappings.fetch("tests", [])
94
+ @language = mappings.fetch("language", nil)
95
+ @description = mappings.fetch("description", nil)
96
+ @authority_id = mappings.fetch("authority_id", nil)
97
+ @creation_date = mappings.fetch("creation_date", nil)
98
+ @source_script = mappings.fetch("source_script", nil)
99
+ @destination_script = mappings.fetch("destination_script", nil)
100
+ @chain = mappings.fetch("chain", [])
101
+ @character_separator = mappings["map"]["character_separator"] || nil
102
+ @word_separator = mappings["map"]["word_separator"] || nil
103
+ @title_case = mappings["map"]["title_case"] || false
104
+ @downcase = mappings["map"]["downcase"] || false
105
+ @rules = mappings["map"]["rules"] || []
106
+ @postrules = mappings["map"]["postrules"] || []
107
+ @characters = mappings["map"]["characters"] || {}
108
+ @dictionary = mappings["map"]["dictionary"] || {}
109
+ @segmentation = mappings["map"]["segementation"] || nil
110
+ @transcription = mappings["map"]["transcription"] || nil
111
+
112
+ include_inherited_mappings(mappings)
113
+ build_hashes
114
+ build_trie
115
+ end
116
+
117
+ def include_inherited_mappings(mappings)
118
+ inherit_systems = [].push(mappings["map"]["inherit"]).flatten
119
+
120
+ inherit_systems.each do |inherit_system|
121
+ next unless inherit_system
122
+
123
+ inherited_mapping = Mapping.for(inherit_system, depth: depth + 1)
124
+
125
+ @rules = [inherited_mapping.rules, rules].flatten
126
+ @postrules = [inherited_mapping.postrules, postrules].flatten
127
+ @characters = (inherited_mapping.characters|| {}).merge(characters)
128
+ @dictionary = (inherited_mapping.dictionary|| {}).merge(dictionary)
129
+ end
130
+ end
131
+
132
+ def build_hashes
133
+ @characters_hash = characters&.sort_by { |k, _v| k.size }&.reverse&.to_h
134
+ @dictionary_hash = dictionary&.sort_by { |k, _v| k.size }&.reverse&.to_h
135
+ end
136
+
137
+ def build_trie
138
+ @dictionary_trie = Rambling::Trie.create
139
+ dictionary_trie.concat dictionary.keys
140
+ end
141
+ end
142
+ end
@@ -0,0 +1,23 @@
1
+ module Interscript
2
+ module Opal
3
+ ALPHA_REGEXP = '\p{L}'
4
+
5
+ def mkregexp(regexpstring)
6
+ flags = 'u'
7
+ if regexpstring.include? "(?i)"
8
+ regexpstring = regexpstring.gsub("(?i)", "").gsub("(?-i)", "")
9
+ flags = 'ui'
10
+ end
11
+ Regexp.new("/#{regexpstring}/#{flags}")
12
+ end
13
+
14
+ def sub_replace(string, pos, size, repl)
15
+ string[0, pos] + repl + string[pos + size..-1]
16
+ end
17
+
18
+ def external_processing(mapping, string)
19
+ string
20
+ end
21
+
22
+ end
23
+ end