id_pack 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,352 @@
1
+ # Based on work by Oleksandr Korniienko
2
+ module IdPack
3
+ class UuidPacker
4
+
5
+ # calculate bits in number
6
+ def bin_pow(num)
7
+ pow = 0
8
+ pow += 1 until num >> pow == 0
9
+ pow
10
+ end
11
+
12
+ # transform string of valid characters to useful array (del = true if we need
13
+ # delimiter)
14
+ def alphanum_to_array(alphanum_string, del)
15
+
16
+ alphanum_array = []
17
+ el = alphanum_string.length
18
+
19
+ # max number of bits coding by one character (some characters will be one
20
+ # bit less)
21
+ pow = bin_pow (el - 1)
22
+
23
+ # how many characters will be one bit less
24
+ lowhi = 2**pow - el
25
+
26
+ # if delimited we can't use last characters
27
+ if del
28
+ el -= 1
29
+ pow = bin_pow(el - 1)
30
+ lowhi = 2**pow - el
31
+ lowhi = -1 if lowhi.zero?
32
+ # first element include main data about alphabet and delimiter character
33
+ alphanum_array.push [lowhi, alphanum_string[el], pow]
34
+ else
35
+ # first element include main data about alphabet
36
+ lowhi = -1 if lowhi.zero?
37
+ alphanum_array.push [lowhi, '', pow]
38
+ end
39
+
40
+ lowhi = 0 if lowhi == -1
41
+ char_item = 0
42
+
43
+ # loop by characters and get code for each one
44
+ until char_item == el
45
+ if char_item < lowhi
46
+ alphanum_array.push [char_item, alphanum_string[char_item], pow - 1]
47
+ else
48
+ alphanum_array.push [lowhi + char_item, alphanum_string[char_item], pow]
49
+ end
50
+ char_item += 1
51
+ end
52
+ alphanum_array
53
+ end
54
+
55
+ # compress UUIDs array
56
+ def alphanum_compress(arr, alphanum_string, order)
57
+ # length of UUID in bits
58
+ uuid_bit_length = 128
59
+
60
+ # compress without delta
61
+ nresult = ''
62
+ alphanum_array = alphanum_to_array alphanum_string, false
63
+ pow = alphanum_array[0][2]
64
+ lowhi = alphanum_array[0][0]
65
+
66
+ # first bit equal 0 => compress without delta
67
+ achr = 0
68
+ rest = 1
69
+
70
+ # loop by UUIDs
71
+ arr.each do |item|
72
+
73
+ # remove '-' characters from UUID
74
+ curr = item.delete('-').to_i(16)
75
+
76
+ # get base binary code (BBC)
77
+ achr += (curr << rest)
78
+
79
+ # look for number of bits in BBC
80
+ rest += uuid_bit_length
81
+
82
+ # create symbols to compressed string
83
+ until rest < pow
84
+
85
+ power_c = pow - 1
86
+ code = (
87
+ (achr & (2**power_c - 1)) + 2**power_c
88
+ ).to_s(2).reverse.to_i(2) >> 1
89
+
90
+ power_c += 1 if code >= lowhi
91
+
92
+ # decrease number of bits in BBC
93
+ rest -= power_c
94
+
95
+ # get reverse bits from the end of BBC to create new symbol
96
+ code = (
97
+ (achr & (2**power_c - 1)) + 2**power_c
98
+ ).to_s(2).reverse.to_i(2) >> 1
99
+
100
+ # add new symbol
101
+ nresult += alphanum_array.assoc(code)[1]
102
+
103
+ # remove used bits from BBC
104
+ achr >>= power_c
105
+ end
106
+ end
107
+
108
+ # check if we have tail of BBC
109
+ if rest > 0
110
+ code = ((achr & (2**rest - 1)) + 2**rest).to_s(2).reverse.to_i(2) >> 1
111
+ code <<= pow - rest - 1
112
+ code <<= 1 if code >= lowhi
113
+
114
+ # add tail symbol
115
+ nresult += alphanum_array.assoc(code)[1]
116
+ end
117
+
118
+ # compress with delta
119
+ arr = arr.sort
120
+
121
+ # first character is delimiter => compress with delta : delimiter (last
122
+ # character in alphabet) always has code of all ones
123
+ dresult = alphanum_array[-1][1]
124
+ alphanum_array = alphanum_to_array alphanum_string, true
125
+ pow = alphanum_array[0][2]
126
+
127
+ if pow > 1
128
+ lowhi = alphanum_array[0][0]
129
+ prev = 0
130
+
131
+ # loop by UUIDs
132
+ arr.each do |item|
133
+
134
+ # remove '-' characters from UUID
135
+ curr = item.delete('-').to_i(16)
136
+
137
+ # calculate delta
138
+ curr -= prev
139
+ prev = item.delete('-').to_i(16)
140
+ binlog = bin_pow curr
141
+ binlog = uuid_bit_length if binlog >= uuid_bit_length - pow
142
+
143
+ # get BBC for only current UUID
144
+ achr = curr
145
+
146
+ # look for number of bits in BBC (also for only current UUID)
147
+ rest = binlog
148
+
149
+ # create symbols to compressed string
150
+ until rest < pow
151
+ power_c = pow - 1
152
+ code = (
153
+ (achr & (2**power_c - 1)) +
154
+ 2**power_c
155
+ ).to_s(2).reverse.to_i(2) >> 1
156
+
157
+ power_c += 1 if code >= lowhi
158
+
159
+ # decrease number of bits in BBC
160
+ rest -= power_c
161
+
162
+ # get reverse bits from the end of BBC to create new symbol
163
+ code = (
164
+ (achr & (2**power_c - 1)) +
165
+ 2**power_c
166
+ ).to_s(2).reverse.to_i(2) >> 1
167
+
168
+ # add new symbol
169
+ dresult += alphanum_array.assoc(code)[1]
170
+
171
+ # remove used bits from BBC
172
+ achr >>= power_c
173
+ end
174
+
175
+ # check if we have tail of BBC for current UUID
176
+ if rest > 0
177
+ code = (
178
+ (achr & (2**rest - 1)) + 2**rest
179
+ ).to_s(2).reverse.to_i(2) >> 1
180
+
181
+ code <<= pow - rest - 1
182
+ code <<= 1 if code >= lowhi
183
+
184
+ # add tail symbol for current UUID
185
+ dresult += alphanum_array.assoc(code)[1]
186
+ end
187
+
188
+ # add delimiter if we use less symbols than for whole UUID
189
+ dresult += alphanum_array[0][1] if binlog < uuid_bit_length
190
+ end
191
+ else
192
+ order = true
193
+ end
194
+
195
+ result = nresult
196
+
197
+ # get better result or non delta if we need to keep order
198
+ result = dresult if dresult.length < nresult.length && !order
199
+ result
200
+ end
201
+
202
+ # decompress UUIDs array
203
+ def alphanum_decompress(str, alphanum_string)
204
+ # length of UUID in bits
205
+ uuid_bit_length = 128
206
+
207
+ result = []
208
+ alphanum_array = alphanum_to_array alphanum_string, false
209
+
210
+ # check if delta used when compress
211
+ if (
212
+ alphanum_array.rassoc(str[0])[0] &
213
+ (2**(alphanum_array.rassoc(str[0])[2] - 1))
214
+ ) != 0
215
+
216
+ # delta used
217
+ alphanum_array = alphanum_to_array alphanum_string, true
218
+ pow = alphanum_array[0][2]
219
+ lowhi = alphanum_array[0][0]
220
+ prev = 0
221
+ item = 1
222
+ achr = 0
223
+ rest = 0
224
+
225
+ # loop by symbols of compressed string starting from second (the first is
226
+ # header) to next after last (for BBC length processing after last)
227
+ while item <= str.length
228
+
229
+ # we catch delimiter or we get BBC with length equal whole UUID
230
+ if str[item] == alphanum_array[0][1] || rest >= uuid_bit_length
231
+
232
+ # if BBC length than we need to look to current symbol one more time
233
+ # if it is delimiter
234
+ item -= 1 if rest >= uuid_bit_length
235
+
236
+ # calculate UUID from delta
237
+ achr += prev
238
+ prev = achr
239
+
240
+ # transform UUID to hexadecimal
241
+ curr = prev.to_s(16)
242
+
243
+ # add first characters if UUID start with 0
244
+ curr = '0' * (uuid_bit_length / 4 - curr.length) + curr
245
+
246
+ # add '-' characters from UUID
247
+ curr = [
248
+ curr[0..7],
249
+ curr[8..11],
250
+ curr[12..15],
251
+ curr[16..19],
252
+ curr[20..31],
253
+ ].join('-')
254
+
255
+ # add new UUID to array
256
+ result.push curr
257
+ achr = 0
258
+ rest = 0
259
+
260
+ # if we become last symbol we need no to symbol processing
261
+ elsif item < str.length
262
+
263
+ # reverse symbol code to BBC bits
264
+ code = (
265
+ alphanum_array.rassoc(str[item])[0] + 2**alphanum_array.rassoc(str[item])[2]
266
+ ).to_s(2).reverse.to_i(2) >> 1
267
+
268
+ # add bits to BBC
269
+ achr += code << rest
270
+
271
+ # look for number of bits in BBC
272
+ rest += pow
273
+ rest -= 1 if code < lowhi
274
+
275
+ end
276
+
277
+ item += 1
278
+
279
+ end
280
+
281
+ else
282
+ # delta not used
283
+ achr = 0
284
+ rest = 0
285
+ pow = alphanum_array[0][2]
286
+ lowhi = alphanum_array[0][0]
287
+
288
+ # for first bit processing
289
+ frst = true
290
+ item = 0
291
+
292
+ # loop by symbols of compressed string
293
+ while item < str.length
294
+
295
+ # reverse symbol code to BBC bits
296
+ code =
297
+ (
298
+ alphanum_array.rassoc(str[item])[0] +
299
+ 2**alphanum_array.rassoc(str[item])[2]
300
+ )
301
+ .to_s(2).reverse.to_i(2) >> 1
302
+
303
+ # add bits to BBC
304
+ achr += code << rest
305
+
306
+ # look for number of bits in BBC
307
+ rest += pow
308
+ rest -= 1 if alphanum_array.rassoc(str[item])[0] < lowhi
309
+
310
+ # first bit processing
311
+ if frst
312
+ frst = false
313
+ achr >>= 1
314
+ rest -= 1
315
+ end
316
+
317
+ # we get BBC with length equal whole UUID
318
+ if rest >= uuid_bit_length
319
+
320
+ # calculate number of bits in BBC
321
+ rest -= uuid_bit_length
322
+
323
+ # transform UUID to hexadecimal
324
+ curr = (achr & (2**uuid_bit_length - 1)).to_s(16)
325
+
326
+ # add first characters if UUID start with 0
327
+ curr = '0' * (uuid_bit_length / 4 - curr.length) + curr
328
+
329
+ # add '-' characters from UUID
330
+ curr = [
331
+ curr[0..7],
332
+ curr[8..11],
333
+ curr[12..15],
334
+ curr[16..19],
335
+ curr[20..31],
336
+ ].join('-')
337
+
338
+ # add new UUID to array
339
+ result.push curr
340
+
341
+ # remove used bits from BBC
342
+ achr >>= uuid_bit_length
343
+
344
+ end
345
+
346
+ item += 1
347
+ end
348
+ end
349
+ result
350
+ end
351
+ end
352
+ end
@@ -0,0 +1,3 @@
1
+ module IdPack
2
+ VERSION = "0.1.0".freeze
3
+ end
data/lib/id_pack.rb ADDED
@@ -0,0 +1,6 @@
1
+ require "id_pack/version"
2
+
3
+ require 'id_pack/lz_string'
4
+ require 'id_pack/uuid_packer'
5
+ require 'id_pack/id_packer'
6
+ require 'id_pack/engine' if defined? Rails