id_pack 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,352 @@
1
+ # Based on work by Oleksandr Korniienko
2
+ module IdPack
3
+ class UuidPacker
4
+
5
+ # calculate bits in number
6
+ def bin_pow(num)
7
+ pow = 0
8
+ pow += 1 until num >> pow == 0
9
+ pow
10
+ end
11
+
12
+ # transform string of valid characters to useful array (del = true if we need
13
+ # delimiter)
14
+ def alphanum_to_array(alphanum_string, del)
15
+
16
+ alphanum_array = []
17
+ el = alphanum_string.length
18
+
19
+ # max number of bits coding by one character (some characters will be one
20
+ # bit less)
21
+ pow = bin_pow (el - 1)
22
+
23
+ # how many characters will be one bit less
24
+ lowhi = 2**pow - el
25
+
26
+ # if delimited we can't use last characters
27
+ if del
28
+ el -= 1
29
+ pow = bin_pow(el - 1)
30
+ lowhi = 2**pow - el
31
+ lowhi = -1 if lowhi.zero?
32
+ # first element include main data about alphabet and delimiter character
33
+ alphanum_array.push [lowhi, alphanum_string[el], pow]
34
+ else
35
+ # first element include main data about alphabet
36
+ lowhi = -1 if lowhi.zero?
37
+ alphanum_array.push [lowhi, '', pow]
38
+ end
39
+
40
+ lowhi = 0 if lowhi == -1
41
+ char_item = 0
42
+
43
+ # loop by characters and get code for each one
44
+ until char_item == el
45
+ if char_item < lowhi
46
+ alphanum_array.push [char_item, alphanum_string[char_item], pow - 1]
47
+ else
48
+ alphanum_array.push [lowhi + char_item, alphanum_string[char_item], pow]
49
+ end
50
+ char_item += 1
51
+ end
52
+ alphanum_array
53
+ end
54
+
55
+ # compress UUIDs array
56
+ def alphanum_compress(arr, alphanum_string, order)
57
+ # length of UUID in bits
58
+ uuid_bit_length = 128
59
+
60
+ # compress without delta
61
+ nresult = ''
62
+ alphanum_array = alphanum_to_array alphanum_string, false
63
+ pow = alphanum_array[0][2]
64
+ lowhi = alphanum_array[0][0]
65
+
66
+ # first bit equal 0 => compress without delta
67
+ achr = 0
68
+ rest = 1
69
+
70
+ # loop by UUIDs
71
+ arr.each do |item|
72
+
73
+ # remove '-' characters from UUID
74
+ curr = item.delete('-').to_i(16)
75
+
76
+ # get base binary code (BBC)
77
+ achr += (curr << rest)
78
+
79
+ # look for number of bits in BBC
80
+ rest += uuid_bit_length
81
+
82
+ # create symbols to compressed string
83
+ until rest < pow
84
+
85
+ power_c = pow - 1
86
+ code = (
87
+ (achr & (2**power_c - 1)) + 2**power_c
88
+ ).to_s(2).reverse.to_i(2) >> 1
89
+
90
+ power_c += 1 if code >= lowhi
91
+
92
+ # decrease number of bits in BBC
93
+ rest -= power_c
94
+
95
+ # get reverse bits from the end of BBC to create new symbol
96
+ code = (
97
+ (achr & (2**power_c - 1)) + 2**power_c
98
+ ).to_s(2).reverse.to_i(2) >> 1
99
+
100
+ # add new symbol
101
+ nresult += alphanum_array.assoc(code)[1]
102
+
103
+ # remove used bits from BBC
104
+ achr >>= power_c
105
+ end
106
+ end
107
+
108
+ # check if we have tail of BBC
109
+ if rest > 0
110
+ code = ((achr & (2**rest - 1)) + 2**rest).to_s(2).reverse.to_i(2) >> 1
111
+ code <<= pow - rest - 1
112
+ code <<= 1 if code >= lowhi
113
+
114
+ # add tail symbol
115
+ nresult += alphanum_array.assoc(code)[1]
116
+ end
117
+
118
+ # compress with delta
119
+ arr = arr.sort
120
+
121
+ # first character is delimiter => compress with delta : delimiter (last
122
+ # character in alphabet) always has code of all ones
123
+ dresult = alphanum_array[-1][1]
124
+ alphanum_array = alphanum_to_array alphanum_string, true
125
+ pow = alphanum_array[0][2]
126
+
127
+ if pow > 1
128
+ lowhi = alphanum_array[0][0]
129
+ prev = 0
130
+
131
+ # loop by UUIDs
132
+ arr.each do |item|
133
+
134
+ # remove '-' characters from UUID
135
+ curr = item.delete('-').to_i(16)
136
+
137
+ # calculate delta
138
+ curr -= prev
139
+ prev = item.delete('-').to_i(16)
140
+ binlog = bin_pow curr
141
+ binlog = uuid_bit_length if binlog >= uuid_bit_length - pow
142
+
143
+ # get BBC for only current UUID
144
+ achr = curr
145
+
146
+ # look for number of bits in BBC (also for only current UUID)
147
+ rest = binlog
148
+
149
+ # create symbols to compressed string
150
+ until rest < pow
151
+ power_c = pow - 1
152
+ code = (
153
+ (achr & (2**power_c - 1)) +
154
+ 2**power_c
155
+ ).to_s(2).reverse.to_i(2) >> 1
156
+
157
+ power_c += 1 if code >= lowhi
158
+
159
+ # decrease number of bits in BBC
160
+ rest -= power_c
161
+
162
+ # get reverse bits from the end of BBC to create new symbol
163
+ code = (
164
+ (achr & (2**power_c - 1)) +
165
+ 2**power_c
166
+ ).to_s(2).reverse.to_i(2) >> 1
167
+
168
+ # add new symbol
169
+ dresult += alphanum_array.assoc(code)[1]
170
+
171
+ # remove used bits from BBC
172
+ achr >>= power_c
173
+ end
174
+
175
+ # check if we have tail of BBC for current UUID
176
+ if rest > 0
177
+ code = (
178
+ (achr & (2**rest - 1)) + 2**rest
179
+ ).to_s(2).reverse.to_i(2) >> 1
180
+
181
+ code <<= pow - rest - 1
182
+ code <<= 1 if code >= lowhi
183
+
184
+ # add tail symbol for current UUID
185
+ dresult += alphanum_array.assoc(code)[1]
186
+ end
187
+
188
+ # add delimiter if we use less symbols than for whole UUID
189
+ dresult += alphanum_array[0][1] if binlog < uuid_bit_length
190
+ end
191
+ else
192
+ order = true
193
+ end
194
+
195
+ result = nresult
196
+
197
+ # get better result or non delta if we need to keep order
198
+ result = dresult if dresult.length < nresult.length && !order
199
+ result
200
+ end
201
+
202
+ # decompress UUIDs array
203
+ def alphanum_decompress(str, alphanum_string)
204
+ # length of UUID in bits
205
+ uuid_bit_length = 128
206
+
207
+ result = []
208
+ alphanum_array = alphanum_to_array alphanum_string, false
209
+
210
+ # check if delta used when compress
211
+ if (
212
+ alphanum_array.rassoc(str[0])[0] &
213
+ (2**(alphanum_array.rassoc(str[0])[2] - 1))
214
+ ) != 0
215
+
216
+ # delta used
217
+ alphanum_array = alphanum_to_array alphanum_string, true
218
+ pow = alphanum_array[0][2]
219
+ lowhi = alphanum_array[0][0]
220
+ prev = 0
221
+ item = 1
222
+ achr = 0
223
+ rest = 0
224
+
225
+ # loop by symbols of compressed string starting from second (the first is
226
+ # header) to next after last (for BBC length processing after last)
227
+ while item <= str.length
228
+
229
+ # we catch delimiter or we get BBC with length equal whole UUID
230
+ if str[item] == alphanum_array[0][1] || rest >= uuid_bit_length
231
+
232
+ # if BBC length than we need to look to current symbol one more time
233
+ # if it is delimiter
234
+ item -= 1 if rest >= uuid_bit_length
235
+
236
+ # calculate UUID from delta
237
+ achr += prev
238
+ prev = achr
239
+
240
+ # transform UUID to hexadecimal
241
+ curr = prev.to_s(16)
242
+
243
+ # add first characters if UUID start with 0
244
+ curr = '0' * (uuid_bit_length / 4 - curr.length) + curr
245
+
246
+ # add '-' characters from UUID
247
+ curr = [
248
+ curr[0..7],
249
+ curr[8..11],
250
+ curr[12..15],
251
+ curr[16..19],
252
+ curr[20..31],
253
+ ].join('-')
254
+
255
+ # add new UUID to array
256
+ result.push curr
257
+ achr = 0
258
+ rest = 0
259
+
260
+ # if we become last symbol we need no to symbol processing
261
+ elsif item < str.length
262
+
263
+ # reverse symbol code to BBC bits
264
+ code = (
265
+ alphanum_array.rassoc(str[item])[0] + 2**alphanum_array.rassoc(str[item])[2]
266
+ ).to_s(2).reverse.to_i(2) >> 1
267
+
268
+ # add bits to BBC
269
+ achr += code << rest
270
+
271
+ # look for number of bits in BBC
272
+ rest += pow
273
+ rest -= 1 if code < lowhi
274
+
275
+ end
276
+
277
+ item += 1
278
+
279
+ end
280
+
281
+ else
282
+ # delta not used
283
+ achr = 0
284
+ rest = 0
285
+ pow = alphanum_array[0][2]
286
+ lowhi = alphanum_array[0][0]
287
+
288
+ # for first bit processing
289
+ frst = true
290
+ item = 0
291
+
292
+ # loop by symbols of compressed string
293
+ while item < str.length
294
+
295
+ # reverse symbol code to BBC bits
296
+ code =
297
+ (
298
+ alphanum_array.rassoc(str[item])[0] +
299
+ 2**alphanum_array.rassoc(str[item])[2]
300
+ )
301
+ .to_s(2).reverse.to_i(2) >> 1
302
+
303
+ # add bits to BBC
304
+ achr += code << rest
305
+
306
+ # look for number of bits in BBC
307
+ rest += pow
308
+ rest -= 1 if alphanum_array.rassoc(str[item])[0] < lowhi
309
+
310
+ # first bit processing
311
+ if frst
312
+ frst = false
313
+ achr >>= 1
314
+ rest -= 1
315
+ end
316
+
317
+ # we get BBC with length equal whole UUID
318
+ if rest >= uuid_bit_length
319
+
320
+ # calculate number of bits in BBC
321
+ rest -= uuid_bit_length
322
+
323
+ # transform UUID to hexadecimal
324
+ curr = (achr & (2**uuid_bit_length - 1)).to_s(16)
325
+
326
+ # add first characters if UUID start with 0
327
+ curr = '0' * (uuid_bit_length / 4 - curr.length) + curr
328
+
329
+ # add '-' characters from UUID
330
+ curr = [
331
+ curr[0..7],
332
+ curr[8..11],
333
+ curr[12..15],
334
+ curr[16..19],
335
+ curr[20..31],
336
+ ].join('-')
337
+
338
+ # add new UUID to array
339
+ result.push curr
340
+
341
+ # remove used bits from BBC
342
+ achr >>= uuid_bit_length
343
+
344
+ end
345
+
346
+ item += 1
347
+ end
348
+ end
349
+ result
350
+ end
351
+ end
352
+ end
@@ -0,0 +1,3 @@
1
+ module IdPack
2
+ VERSION = "0.1.0".freeze
3
+ end
data/lib/id_pack.rb ADDED
@@ -0,0 +1,6 @@
1
+ require "id_pack/version"
2
+
3
+ require 'id_pack/lz_string'
4
+ require 'id_pack/uuid_packer'
5
+ require 'id_pack/id_packer'
6
+ require 'id_pack/engine' if defined? Rails