bidi 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 1e84e59022f13c923a1a5a9d89f3ccda0eb8fd00
4
+ data.tar.gz: 12fb80c3da23660d578c4748e8677e8eca1c7221
5
+ SHA512:
6
+ metadata.gz: 3e52bd9c42d30666cda2214177025d8a4bfe2de716d4c61d4dbd63672fb46ecf87fdcaa8096d2c03fdc555b16087adc876d23630cb7a1b0e79595887625136b2
7
+ data.tar.gz: 50cf772702417cdb69e1e2dcb819f1c80b5b86d4ef43b93f16b0b0ebfc232ce2b1f05af4d6ed4c9038d5044ecffe922841df4a2e91c65b1964de0f1caf5b1da0
data/LICENSE ADDED
@@ -0,0 +1,19 @@
1
+ Copyright (c) 2014 Amit Yaron <amit@phpandmore.net>
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ of this software and associated documentation files (the "Software"), to deal
5
+ in the Software without restriction, including without limitation the rights
6
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ copies of the Software, and to permit persons to whom the Software is
8
+ furnished to do so, subject to the following conditions:
9
+
10
+ The above copyright notice and this permission notice shall be included in
11
+ all copies or substantial portions of the Software.
12
+
13
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
+ THE SOFTWARE.
@@ -0,0 +1,57 @@
1
+ # Ruby BiDi
2
+
3
+ Ruby gem to help working with bidirectional (left-to-right and right-to-left) text.
4
+
5
+ ### Install
6
+
7
+ ```shell
8
+ gem install bidi
9
+ ```
10
+
11
+ ### Use
12
+
13
+ Require the `bidi` module and use `to_visual`:
14
+
15
+ ```ruby
16
+ require "bidi"
17
+
18
+ bidi = Bidi.new
19
+ bidi_string = bidi.to_visual "משפט עם עברית ו-English. מספרים: 12345 (וגם כל מיני סימני פיסוק) וגם סימן קריאה!"
20
+ ```
21
+
22
+ When rendering right-to-left text, some writers require reversing the string before passing it to them. [Prawn](https://github.com/prawnpdf/prawn) is one such example. The `render_visual` function does this for you:
23
+
24
+ ```ruby
25
+ require "prawn"
26
+ require "bidi"
27
+
28
+ Prawn::Document.generate("hello.pdf") do
29
+ self.text_direction = :rtl
30
+
31
+ bidi = Bidi.new
32
+ text bidi.render_visual "משפט עם עברית ו-English. מספרים: 12345 (וגם כל מיני סימני פיסוק) וגם סימן קריאה!"
33
+ end
34
+
35
+ ```
36
+
37
+ ### License
38
+
39
+ Copyright (c) 2014 Amit Yaron <<amit@phpandmore.net>>
40
+
41
+ Permission is hereby granted, free of charge, to any person obtaining a copy
42
+ of this software and associated documentation files (the "Software"), to deal
43
+ in the Software without restriction, including without limitation the rights
44
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
45
+ copies of the Software, and to permit persons to whom the Software is
46
+ furnished to do so, subject to the following conditions:
47
+
48
+ The above copyright notice and this permission notice shall be included in
49
+ all copies or substantial portions of the Software.
50
+
51
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
52
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
53
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
54
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
55
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
56
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
57
+ THE SOFTWARE.
@@ -0,0 +1,36 @@
1
+ Bidirectional Text
2
+ ==================
3
+ This package contains a function that converts your input logical UTF-8 string into a visual string according to the Bidi algorithm
4
+ found in http://www.unicode.org/reports/tr9/
5
+
6
+ Requirements:
7
+ * Ruby 1.9
8
+ * The Ruby llibrary 'weakref'
9
+
10
+ The conversion function is found in "bidi.rb"
11
+
12
+ To use the conversion function:
13
+ 1. Define an object of class 'Bidi'. We'll call this object bidi.
14
+ 2. call 'bidi.to_visual <your string> <default paragraph direction>'
15
+ The values for default paragraph direction:
16
+ * 'R' or 'RTL' - Right to Left text.
17
+ * 'L' or 'LTR' - Left to right text.
18
+ * other values or omitted - the default for each paragraph.
19
+
20
+ Constants:
21
+ * Bidi.RLE - Right to left embedding.
22
+ * Bidi.LRE - Left to right embedding.
23
+ * Bidi.RLO - Right to left override.
24
+ * Bidi.LRO - Left to right override.
25
+ * Bidi.PDF - Pop Directional Formatting.
26
+ * Bidi.RLM - Right to left mark.
27
+ * Bidi.LRM - Left to right mark.
28
+
29
+ To run a script that calls 'bidi.to_visual', type
30
+ ruby -Ku <script name.rb>
31
+
32
+ 'K' stands for Kanji, letters commonly used in japan and in China. this will cause Ruby to interpret the extended character set as UTF-8 character set, and will prevent the embarrassing error message 'invalid multibyte char (US-ASCII)'.
33
+
34
+
35
+ "bidi.rb" also contains a method named "to_utf8_char", which extends the Integer class. You can use it to define additional UTF-8 characters.
36
+
@@ -0,0 +1,2 @@
1
+ require "bidi/bidi"
2
+
@@ -0,0 +1,771 @@
1
+ #!/usr/bin/ruby
2
+
3
+ require 'bidi/weakhashmap'
4
+
5
+ class Integer
6
+ def to_utf8_char
7
+ raise RangeError "Value #{self} is out of range for UTF8 Char" if self<0 or self > 0x10fffd
8
+ if self >> 7 == 0 # less than 0x80? If so, return an ASCII char
9
+ return self.chr
10
+ end
11
+ prefix = 0x80 # First UTF-8 byte, the initial value of the
12
+ # prefix is 110b
13
+ temp = self
14
+ byte_arr=Array.new
15
+ bytes_to_shift=0
16
+ rem_digits = 6
17
+ while true
18
+ rest=temp >> rem_digits
19
+ rem_digits -= 1
20
+ if rest == 0
21
+ byte_arr.push prefix | temp
22
+ break
23
+ else
24
+ byte_arr.push 0x80 | (temp & 0x3f)
25
+ temp >>= 6
26
+ prefix >>= 1
27
+ prefix |= 0x80
28
+ end
29
+ end
30
+ last_pos=byte_arr.length - 1
31
+ ret_value=String.new
32
+ last_pos.downto 0 do |i|
33
+ ret_value << byte_arr[i].chr
34
+ end
35
+ ret_value.force_encoding 'UTF-8'
36
+ end
37
+ end
38
+
39
+ $weakHashMap = WeakHashMap.new
40
+ $mirrorMap = WeakHashMap.new
41
+
42
+ class StringError < Exception
43
+ def initialize byte, afterString
44
+ @byte=byte
45
+ @afterString=afterString
46
+ end
47
+ def message
48
+ 'Unexpected byte(s): ' + byte + ' after \'' + afterString + '\''
49
+ end
50
+ end
51
+
52
+ class Bidi
53
+ # constants
54
+ def Bidi.RLE
55
+ 0x202b.to_utf8_char
56
+ end
57
+
58
+ def Bidi.LRE
59
+ 0x202a.to_utf8_char
60
+ end
61
+
62
+ def Bidi.RLO
63
+ 0x202e.to_utf8_char
64
+ end
65
+
66
+ def Bidi.LRO
67
+ 0x202d.to_utf8_char
68
+ end
69
+
70
+ def Bidi.LRM
71
+ 0x200e.to_utf8_char
72
+ end
73
+
74
+ def Bidi.RLM
75
+ 0x200f.to_utf8_char
76
+ end
77
+
78
+ def Bidi.PDF
79
+ 0x202c.to_utf8_char
80
+ end
81
+
82
+ class ParagraphType < Hash
83
+ def initialize default_direction=nil
84
+ upcase_default_direction = default_direction.upcase if default_direction
85
+ case upcase_default_direction
86
+ when 'R', 'RTL'
87
+ self['level']=1
88
+ when 'L', 'LTR'
89
+ self['level']=0
90
+ else
91
+ self['level']=-1
92
+ end
93
+ self['characters']=Array.new
94
+ end
95
+ end
96
+
97
+ class UtfChar < Hash
98
+ def initialize value, bidiType, mirroredInd
99
+ self['value']=value
100
+ self['bidiType']=bidiType
101
+ self['mirroredInd']=mirroredInd
102
+ end
103
+
104
+ def is_neutral
105
+ bidiType = self["bidiType"]
106
+ bidiType == 'B' or bidiType == 'S' or bidiType == 'WS' or bidiType == 'ON'
107
+ end
108
+ end
109
+
110
+ CHAR_START=1
111
+ CHAR_END=2
112
+ CHAR_BEFORE_LAST=3
113
+ CHAR_SECOND_OF_FOUR=4
114
+
115
+ def initialize
116
+ @@idx_record_len=7
117
+ @dataPath = Gem.loaded_specs["bidi"].full_gem_path + "/lib/data/";
118
+ @idxFile = File.open(@dataPath + "UnicodeData.idx", "r");
119
+ @dataFile = File.open(@dataPath + "UnicodeData.txt", "r");
120
+ @mirrorFile = File.open(@dataPath + "BidiMirroring.dat", "r");
121
+ ObjectSpace.define_finalizer(self, proc {@idxFile.close; @dataFile.close; @mirrorFile.close})
122
+ @num_of_indexes = @idxFile.stat.size / @@idx_record_len
123
+ @mirror_record_len=6
124
+ @num_of_mirror_chars=@mirrorFile.stat.size / @mirror_record_len
125
+ end # initialize
126
+
127
+ def retrieve_rec key
128
+ value = $weakHashMap[key]
129
+ return value if value
130
+
131
+ # Binary search of the key
132
+ bottom=0
133
+ top = @num_of_indexes
134
+ while (top >= bottom) do
135
+ middle = (top + bottom) / 2
136
+ addr = middle * @@idx_record_len
137
+ @idxFile.pos=addr
138
+ str=@idxFile.read 3
139
+ intValue=0;
140
+ str.each_byte do |b|
141
+ intValue <<= 8
142
+ intValue |= b
143
+ end # each
144
+ if intValue == key # Found - read the record
145
+ str=@idxFile.read 4
146
+ dataPos = 0
147
+ str.each_byte do |b|
148
+ dataPos <<= 8
149
+ dataPos |= b
150
+ end # each
151
+ @dataFile.pos=dataPos
152
+ record=@dataFile.readline
153
+ arr=record.split ';', -1
154
+ $weakHashMap[key]=arr
155
+ return arr
156
+ end # if
157
+ if key < intValue
158
+ top = middle - 1
159
+ else
160
+ bottom = middle + 1
161
+ end # if
162
+ end
163
+ nil
164
+ end
165
+
166
+ # Run = run of characters of the same level
167
+ def split_into_runs par
168
+ sor=0
169
+ sor_level=par['level']
170
+ run = Hash.new
171
+ run['sor']=sor
172
+ chars=par['characters']
173
+ len=chars.length
174
+ par['runs']=Array.new
175
+ 0.upto(len - 1) do |index|
176
+ char=chars[index]
177
+ next unless char['level']
178
+ if char['level'] != sor_level
179
+ run['sor']=sor
180
+ run['sorType']=chars[sor]['level'].odd? ? 'R' : 'L'
181
+ run['eor']=index
182
+ run['eorType']=chars[index]['level'].odd? ? 'R' : 'L'
183
+ sor=index
184
+ par['runs'].push run
185
+ run=Hash.new
186
+ sor_level=char['level']
187
+ end
188
+ end # upto
189
+ run['sor']=sor
190
+ run['sorType']=chars[sor]['level'].odd? ? 'R' : 'L'
191
+ run['eor']=len
192
+ run['eorType']=par['level'].odd? ? 'R' : 'L'
193
+ par['runs'].push run
194
+ end
195
+
196
+ # Determine the direction ('L', 'R') of the nonspacing mark
197
+ # and a little bit of European Number handling
198
+ def resolve_nsm par, run
199
+ previous_direction = run['sorType']
200
+ sor=run['sor']
201
+ eor_m1=run['eor'] - 1
202
+ chars=par['characters']
203
+ sor.upto eor_m1 do |ind|
204
+ case chars[ind]['bidiType']
205
+ when 'NSM'
206
+ chars[ind]['bidiType']=previous_direction
207
+ chars[ind]['origType']='NSM'
208
+ when 'L','R', 'AL'
209
+ previous_direction=chars[ind]['bidiType']
210
+ when 'EN'
211
+ chars[ind]['bidiType']='AN' if previous_direction=='AL'
212
+ end
213
+ end
214
+ end
215
+
216
+ # Change the AL bidiType to R
217
+ def change_AL_to_R par, run
218
+ sor=run['sor']
219
+ eor_m1=run['eor'] - 1
220
+ chars=par['characters']
221
+ sor.upto eor_m1 do |ind|
222
+ chars[ind]['bidiType']='R' if chars[ind]['bidiType']=='AL'
223
+ end
224
+ end
225
+
226
+ # 'ES' between two 'EN's' is change to EN
227
+ # 'CS' between two numbers of the same type is changed to that
228
+ # type.
229
+ def handle_cs_and_es par, run
230
+ sor=run['sor']
231
+ eor_m1=run['eor'] - 1
232
+ chars=par['characters']
233
+ sor.upto eor_m1 do |ind|
234
+ case chars[ind]['bidiType']
235
+ when 'ES'
236
+ before_sep = ind>sor ? chars[ind-1]['bidiType'] : nil
237
+ after_sep = ind<eor_m1 ? chars[ind+1]['bidiType'] : nil
238
+ if (before_sep == 'EN' and after_sep=='EN')
239
+ chars[ind]['bidiType']='EN'
240
+ else
241
+ chars[ind]['bidiType']='ON'
242
+ end
243
+ when 'CS'
244
+ before_sep = ind>sor ? chars[ind-1]['bidiType'] : nil
245
+ after_sep = ind<eor_m1 ? chars[ind+1]['bidiType'] : nil
246
+ if (before_sep == 'EN' and after_sep=='EN')
247
+ chars[ind]['bidiType']='EN'
248
+ else if (before_sep == 'AN' and after_sep=='AN')
249
+ chars[ind]['bidiType']='AN'
250
+ else
251
+ chars[ind]['bidiType']='ON'
252
+ end
253
+ end # if
254
+ end # case
255
+ end # upto
256
+ end
257
+
258
+ def handle_en_et_sequences par, run
259
+ sOTHERS=0
260
+ sET_FOUND=1
261
+ sEN_FOUND=2
262
+ state=sOTHERS
263
+ sor=run['sor']
264
+ eor_m1=run['eor'] - 1
265
+ seq_start=nil
266
+ seq_end=nil
267
+ chars=par['characters']
268
+ sor.upto eor_m1 do |ind|
269
+ case state
270
+ when sOTHERS
271
+ case chars[ind]['bidiType']
272
+ when 'EN'
273
+ state=sEN_FOUND
274
+ seq_start = seq_end = ind
275
+ when 'ET'
276
+ state=sET_FOUND
277
+ seq_start = seq_end = ind
278
+ end
279
+ when sET_FOUND
280
+ case chars[ind]['bidiType']
281
+ when 'EN'
282
+ state=sEN_FOUND
283
+ seq_end = ind
284
+ when 'ET'
285
+ seq_end = ind
286
+ else
287
+ seq_start.upto seq_end do |ind1|
288
+ chars[ind1]['bidiType']='ON'
289
+ end
290
+ seq_start = seq_end = nil
291
+ state=sOTHERS
292
+ end
293
+ when sEN_FOUND
294
+ case chars[ind]['bidiType']
295
+ when 'EN', 'ET'
296
+ seq_end = ind
297
+ else
298
+ seq_start.upto seq_end do |ind1|
299
+ chars[ind1]['bidiType']='EN'
300
+ end
301
+ seq_start = seq_end = nil
302
+ state=sOTHERS
303
+ end
304
+ end
305
+ end
306
+ end
307
+
308
+ def resolve_neutral_types par, run
309
+ sNO_N_FOUND=0
310
+ sN_FOUND=1
311
+ start_direction=run['sorType']
312
+ sor=run['sor']
313
+ eor_m1=run['eor']-1
314
+ chars=par['characters']
315
+ seq_start=0
316
+ seq_end=-1
317
+ state=sNO_N_FOUND
318
+ sor.upto eor_m1 do |ind|
319
+ type=chars[ind]['bidiType']
320
+ case type
321
+ when 'R','AN','EN'
322
+ l_or_r='R'
323
+ when 'L'
324
+ l_or_r='L'
325
+ else
326
+ l_or_r=nil
327
+ end #case
328
+
329
+ case state
330
+ when sNO_N_FOUND
331
+ if chars[ind].is_neutral
332
+ seq_start=seq_end=ind
333
+ state=sN_FOUND
334
+ else
335
+ start_direction=l_or_r
336
+ end
337
+ when sN_FOUND
338
+ if l_or_r or ind=eor_m1
339
+ end_direction=l_or_r ? l_or_r : run['eorType']
340
+ change_n_to=start_direction==end_direction ? end_direction : nil
341
+ seq_start.upto seq_end do |ind1|
342
+ if chars[ind1].is_neutral
343
+ if change_n_to
344
+ chars[ind1]['bidiType']=change_n_to
345
+ else
346
+ chars[ind1]['bidiType']=chars[ind1]['level'].odd? ? 'R' : 'L'
347
+ end
348
+ end
349
+ end
350
+ state=sNO_N_FOUND
351
+ else
352
+ if chars[ind].is_neutral
353
+ seq_end=ind
354
+ end
355
+ end
356
+ end
357
+ end
358
+ end
359
+
360
+ # Change each character's level according to its embedding level
361
+ # and bidiType.
362
+ def resolve_implicit_levels par
363
+ par['characters'].each {|char|
364
+ embedding_level=char['level']
365
+ bidiType=char['bidiType']
366
+ case bidiType
367
+ when 'L'
368
+ char['level']=embedding_level + 1 if embedding_level.odd?
369
+ when 'R'
370
+ char['level']=embedding_level + 1 if embedding_level.even?
371
+ when 'AN','EN'
372
+ char['level']=embedding_level + (embedding_level.odd? ? 1 : 2)
373
+ end
374
+ char['level']=0 if char['value']==0x0A or char['value']==0x0D
375
+ }
376
+ end
377
+
378
+ # Reset the embedding level of paragraph and segment separators
379
+ # to the paragraph level. Do the same with spaces preceding them
380
+ def reset_separator_levels par
381
+ paragraph_level=par['level']
382
+ chars=par['characters']
383
+ len=chars.length
384
+ before_sep=true
385
+ (len-1).downto 0 do |ind|
386
+ char=chars[ind]
387
+ if char['bidiType']=='B' or char['bidiType']=='S'
388
+ before_sep=true
389
+ char['level']=paragraph_level
390
+ next
391
+ end
392
+ char['level']=paragraph_level if char['bidiType']=='WS' and before_sep
393
+ before_sep = false if char['bidiType'] != 'WS'
394
+ end
395
+ end
396
+
397
+
398
+ def resolve_weak_types par
399
+ runs = par['runs']
400
+ runs.each do |run|
401
+ resolve_nsm par, run
402
+ change_AL_to_R par, run
403
+ handle_cs_and_es par, run
404
+ handle_en_et_sequences par, run
405
+ resolve_neutral_types par, run
406
+ par.delete 'runs'
407
+ resolve_implicit_levels par
408
+ reset_separator_levels par
409
+ end #each
410
+ end
411
+
412
+ #
413
+ # Reverse odd levels (i.e. levels of characters written right-to-left
414
+ #
415
+ def reverse_rtl_chars par
416
+ min_odd_level = max_level = nil
417
+ levels = Hash.new # Where I want to store info about the level
418
+ chars=par['characters']
419
+ last=chars.length - 1
420
+ 0.upto last do |ind|
421
+ char=chars[ind]
422
+ level=char['level']
423
+ min_odd_level = level if level.odd? && (!min_odd_level or level<min_odd_level)
424
+ max_level=level if !max_level or level>max_level
425
+ if !levels[level] then
426
+ hsh = levels[level] = Hash.new
427
+ hsh['start']=ind
428
+ else
429
+ hsh = levels[level]
430
+ end
431
+ hsh['end']=ind
432
+ end # upto
433
+ return unless min_odd_level
434
+
435
+ done=false
436
+ cur_lvl=max_level
437
+ while !done do
438
+ lvl=cur_lvl - 1
439
+ if cur_lvl > min_odd_level then
440
+ while !levels[lvl] do
441
+ lvl -= 1
442
+ end
443
+ end
444
+ hsh_cur=levels[cur_lvl]
445
+ if lvl >= min_odd_level
446
+ hsh_low=levels[lvl]
447
+ hsh_low['start'] = hsh_cur['start'] if hsh_cur['start'] < hsh_low['start']
448
+ hsh_low['end'] = hsh_cur['end'] if hsh_cur['end'] > hsh_low['end']
449
+ end
450
+ if (cur_lvl==min_odd_level) or (lvl.odd? != cur_lvl.odd?)
451
+ rearrange_level par, cur_lvl, hsh_cur
452
+ end
453
+
454
+ done=true if cur_lvl == min_odd_level
455
+ cur_lvl=lvl
456
+ end
457
+ end
458
+
459
+
460
+ def handle_paragraph par
461
+ par['level']=0 if par['level']==-1
462
+ embedding_level = par['level']
463
+ override_status=nil
464
+ level_stack=Array.new
465
+ invalid_level_changes=0
466
+ par['characters'].each do |char|
467
+ bidi_type=char['bidiType']
468
+ case bidi_type
469
+ #--------------------#
470
+ # Explicit Embedding #
471
+ #--------------------#
472
+ when 'RLE'
473
+ next_odd = embedding_level + (embedding_level.odd? ? 2 : 1)
474
+ if (next_odd <= 61)
475
+ hsh=Hash.new
476
+ hsh['level']=embedding_level
477
+ hsh['override_status']=override_status
478
+ embedding_level = next_odd
479
+ override_status=nil
480
+ level_stack.push hsh
481
+ else
482
+ invalid_level_changes += 1
483
+ end
484
+ when 'LRE'
485
+ next_even = embedding_level + (embedding_level.even? ? 2 : 1)
486
+ if (next_even <= 61)
487
+ hsh=Hash.new
488
+ hsh['level']=embedding_level
489
+ hsh['override_status']=override_status
490
+ embedding_level = next_even
491
+ override_status=nil
492
+ level_stack.push hsh
493
+ else
494
+ invalid_level_changes += 1
495
+ end
496
+ #-------------------#
497
+ # Explicit Override #
498
+ #-------------------#
499
+ when 'RLO'
500
+ next_odd = embedding_level + (embedding_level.odd? ? 2 : 1)
501
+ if (next_odd <= 61)
502
+ hsh=Hash.new
503
+ hsh['level']=embedding_level
504
+ hsh['override_status']=override_status
505
+ embedding_level = next_odd
506
+ override_status='R'
507
+ level_stack.push hsh
508
+ else
509
+ invalid_level_changes += 1
510
+ end
511
+ when 'LRO'
512
+ next_even = embedding_level + (embedding_level.even? ? 2 : 1)
513
+ if (next_even <= 61)
514
+ hsh=Hash.new
515
+ hsh['level']=embedding_level
516
+ hsh['override_status']=override_status
517
+ embedding_level = next_even
518
+ override_status='L'
519
+ level_stack.push hsh
520
+ else
521
+ invalid_level_changes += 1
522
+ end
523
+ # PDF - End of embedding/override
524
+ when 'PDF'
525
+ if invalid_level_changes == 0
526
+ hsh = level_stack.pop
527
+ embedding_level=hsh['level']
528
+ override_status = hsh['override_status']
529
+ else
530
+ invalid_level_changes -= 1
531
+ end
532
+ else # of 'case'
533
+ if bidi_type != 'BN'
534
+ char['level']=embedding_level
535
+ char['bidiType']=override_status if override_status
536
+ end
537
+ end # case
538
+ end # each
539
+ par['characters'].delete_if {|char|
540
+ char['bidiType']=='RLE' or
541
+ char['bidiType']=='LRE' or
542
+ char['bidiType']=='RLO' or
543
+ char['bidiType']=='LRO' or
544
+ char['bidiType']=='PDF' or
545
+ char['bidiType']=='BN'
546
+ }
547
+ split_into_runs par
548
+ resolve_weak_types par
549
+ reverse_rtl_chars par
550
+ end # function
551
+
552
+
553
+ def to_paragraphs default_direction=nil
554
+ ret_value = Array.new
555
+ first_utf8_char=true
556
+ new_par=true
557
+ par=nil
558
+ @valueArray.each do |value|
559
+ if first_utf8_char
560
+ first_utf8_char=false
561
+ new_par=true
562
+ par=ParagraphType.new default_direction
563
+ ret_value.push par
564
+ end
565
+ if value==0x0A or value==0x0D
566
+ # Add new lines to the current paragaph
567
+ par['characters'].push UtfChar.new value, nil, 'N'
568
+ new_par=false
569
+ else
570
+ unless new_par
571
+ new_par=true
572
+ par=ParagraphType.new default_direction
573
+ ret_value.push par
574
+ end
575
+ rec=retrieve_rec value
576
+ bidiType=rec ? rec[4] : nil
577
+ mirroredInd = rec ? rec[9] : nil
578
+
579
+ par['characters'].push UtfChar.new value, bidiType, mirroredInd
580
+ if par['level']==-1
581
+ if bidiType=='R' or bidiType=='AL'
582
+ par['level']=1
583
+ else
584
+ par['level']=0 if bidiType=='L'
585
+ end
586
+ end
587
+ end
588
+ end
589
+ ret_value
590
+ end
591
+
592
+ def search_mirrored_value key
593
+ bottom=0
594
+ top=@num_of_mirror_chars
595
+ while top>=bottom
596
+ middle=(top + bottom) / 2
597
+ addr=middle * @mirror_record_len
598
+ @mirrorFile.pos=addr
599
+ str=@mirrorFile.read 3
600
+ intValue = 0
601
+ str.each_byte do |byte|
602
+ intValue <<= 8
603
+ intValue |= byte
604
+ end
605
+ if key == intValue
606
+ str=@mirrorFile.read 3
607
+ retValue=0
608
+ str.each_byte do |byte|
609
+ retValue <<= 8
610
+ retValue |= byte
611
+ end
612
+ $mirrorMap[key]=[retValue]
613
+ return retValue
614
+ end
615
+ if key < intValue
616
+ top=middle - 1
617
+ else
618
+ bottom=middle + 1
619
+ end
620
+ end
621
+ key
622
+ end
623
+
624
+ def get_mirrored_value char
625
+ key=char['value']
626
+ ret_value=$mirrorMap[key]
627
+ return ret_value[0] if ret_value
628
+ search_mirrored_value key
629
+ end
630
+
631
+ #
632
+ # to_visual - the function that converts a UTF-8 string
633
+ # to visual.
634
+ #
635
+ # i_string - the input string.
636
+ # default_direction - each paragraph's default direction.
637
+ # values:
638
+ # 'R', 'RTL' - right to left text.
639
+ # 'L', 'LTR' - left to right text.
640
+ # Not set, other values - default behaviour.
641
+ #
642
+ def to_visual i_string, default_direction=nil
643
+ @valueArray = Array.new # Array of values
644
+ state=CHAR_START
645
+ charVal=0;
646
+ handledString=''
647
+ charForError=''
648
+ byteList='q'
649
+ i_string.each_byte do |byte|
650
+ charForError += byte.chr;
651
+ case state
652
+ when CHAR_START
653
+ byteList=byte.to_s
654
+ charVal=byte
655
+ if byte & 0x80 == 0 # regular ASCII
656
+ @valueArray.push byte
657
+ handledString=handledString + charForError
658
+ charForError=''
659
+ next
660
+ end
661
+ if byte & 0xE0 == 0xC0 # Begins with 110b - two bytes
662
+ charVal = byte & 0x1F
663
+ state = CHAR_END
664
+ next
665
+ end
666
+ if byte & 0xF0 == 0xE0 # Begins with 1110b - three bytes
667
+ charVal = byte & 0x0F
668
+ state = CHAR_BEFORE_LAST
669
+ next
670
+ end
671
+ if byte & 0xF8 == 0xF0 # Begins with 11110b - four bytes
672
+ charVal = byte & 0x07
673
+ state = CHAR_SECOND_OF_FOUR
674
+ next
675
+ end
676
+ raise StringError.new byteList, handledstring
677
+ when CHAR_END
678
+ byteList += ', ' + byte.to_s
679
+ if byte & 0xC0 != 0x80 # The byte should begin with 10b
680
+ raise StringError.new byteList, handledstring
681
+ end
682
+ charVal <<= 6
683
+ charVal |= (byte & 0x3F)
684
+ @valueArray. push charVal
685
+ state = CHAR_START
686
+ handledString=handledString + charForError
687
+ charForError=''
688
+ when CHAR_BEFORE_LAST
689
+ byteList += ', ' + byte.to_s
690
+ if byte & 0xC0 != 0x80 # The byte should begin with 10b
691
+ raise StringError.new byteList, handledstring
692
+ end
693
+ charVal <<= 6
694
+ charVal |= (byte & 0x3F)
695
+ state = CHAR_END
696
+ when CHAR_SECOND_OF_FOUR
697
+ byteList += ', ' + byte.to_s
698
+ if byte & 0xC0 != 0x80 # The byte should begin with 10b
699
+ raise StringError.new byteList, handledstring
700
+ end
701
+ charVal <<= 6
702
+ charVal |= (byte & 0x3F)
703
+ state = CHAR_BEFORE_LAST
704
+ end
705
+ end
706
+ # First step - split the text into paragraphs
707
+ paragraphs = to_paragraphs default_direction
708
+ paragraphs.each do |par|
709
+ handle_paragraph par
710
+ end
711
+
712
+ # Now, make a string
713
+ ret_value=''
714
+ paragraphs.each do |par|
715
+ chars=par['characters']
716
+ nsm_stack=Array.new
717
+ chars.each do |char|
718
+ char['value']=get_mirrored_value char if char['mirroredInd']=='Y' and char['level'].odd?
719
+
720
+ if char['origType']=='NSM' and char['bidiType']=='R'
721
+ nsm_stack.push char['value']
722
+ else
723
+ ret_value += char['value'].to_utf8_char if char['bidiType']=='R'
724
+ ret_value += (nsm_stack.pop).to_utf8_char while not nsm_stack.empty?
725
+ ret_value += char['value'].to_utf8_char if char['bidiType']!='R'
726
+ end
727
+ end
728
+ ret_value += (nsm_stack.pop).to_utf8_char while not nsm_stack.empty?
729
+ end
730
+
731
+ ret_value
732
+ end
733
+
734
+ # Helper function to reverse the string for us before rendering.
735
+ def render_visual i_string, default_direction=nil
736
+ return to_visual(i_string, default_direction).reverse!
737
+ end
738
+
739
+ def rearrange_level par, lvl, hsh_cur
740
+ start=hsh_cur['start']
741
+ end_p1=hsh_cur['end'] + 1
742
+ run_started=false
743
+ forward_index=nil
744
+ start.upto end_p1 do |ind|
745
+ chars=par['characters']
746
+ char=chars[ind]
747
+ if !run_started and char and char['level']>=lvl
748
+ forward_index=ind
749
+ end
750
+ run_started=true if char and char['level']>=lvl
751
+ if run_started and (ind==end_p1 or char['level']<lvl) then
752
+ backward_index=ind - 1
753
+ interval_length = backward_index - forward_index
754
+ halfway = interval_length / 2
755
+ halfway -= 1 if interval_length.even?
756
+ 0.upto halfway do
757
+ temp = chars[forward_index]
758
+ chars[forward_index]=chars[backward_index]
759
+ chars[backward_index] = temp
760
+ forward_index += 1
761
+ backward_index -= 1
762
+ end
763
+ run_started=false
764
+ next
765
+ end
766
+
767
+
768
+ end
769
+ end
770
+ end
771
+