id_pack 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +12 -0
- data/.rspec +2 -0
- data/.travis.yml +5 -0
- data/CODE_OF_CONDUCT.md +74 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +35 -0
- data/LICENSE +21 -0
- data/README.md +39 -0
- data/Rakefile +6 -0
- data/app/assets/javascripts/lib/id-packer.js +296 -0
- data/app/assets/javascripts/lib/lz-string.js +511 -0
- data/app/assets/javascripts/lib/uuid-packer.js +372 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/id_pack.gemspec +37 -0
- data/lib/id_pack/engine.rb +3 -0
- data/lib/id_pack/id_packer.rb +395 -0
- data/lib/id_pack/lz_string.rb +579 -0
- data/lib/id_pack/uuid_packer.rb +352 -0
- data/lib/id_pack/version.rb +3 -0
- data/lib/id_pack.rb +6 -0
- data/vendor/assets/javascripts/require.js +2145 -0
- metadata +108 -0
@@ -0,0 +1,352 @@
|
|
1
|
+
# Based on work by Oleksandr Korniienko
|
2
|
+
module IdPack
|
3
|
+
class UuidPacker
|
4
|
+
|
5
|
+
# calculate bits in number
|
6
|
+
def bin_pow(num)
|
7
|
+
pow = 0
|
8
|
+
pow += 1 until num >> pow == 0
|
9
|
+
pow
|
10
|
+
end
|
11
|
+
|
12
|
+
# transform string of valid characters to useful array (del = true if we need
|
13
|
+
# delimiter)
|
14
|
+
def alphanum_to_array(alphanum_string, del)
|
15
|
+
|
16
|
+
alphanum_array = []
|
17
|
+
el = alphanum_string.length
|
18
|
+
|
19
|
+
# max number of bits coding by one character (some characters will be one
|
20
|
+
# bit less)
|
21
|
+
pow = bin_pow (el - 1)
|
22
|
+
|
23
|
+
# how many characters will be one bit less
|
24
|
+
lowhi = 2**pow - el
|
25
|
+
|
26
|
+
# if delimited we can't use last characters
|
27
|
+
if del
|
28
|
+
el -= 1
|
29
|
+
pow = bin_pow(el - 1)
|
30
|
+
lowhi = 2**pow - el
|
31
|
+
lowhi = -1 if lowhi.zero?
|
32
|
+
# first element include main data about alphabet and delimiter character
|
33
|
+
alphanum_array.push [lowhi, alphanum_string[el], pow]
|
34
|
+
else
|
35
|
+
# first element include main data about alphabet
|
36
|
+
lowhi = -1 if lowhi.zero?
|
37
|
+
alphanum_array.push [lowhi, '', pow]
|
38
|
+
end
|
39
|
+
|
40
|
+
lowhi = 0 if lowhi == -1
|
41
|
+
char_item = 0
|
42
|
+
|
43
|
+
# loop by characters and get code for each one
|
44
|
+
until char_item == el
|
45
|
+
if char_item < lowhi
|
46
|
+
alphanum_array.push [char_item, alphanum_string[char_item], pow - 1]
|
47
|
+
else
|
48
|
+
alphanum_array.push [lowhi + char_item, alphanum_string[char_item], pow]
|
49
|
+
end
|
50
|
+
char_item += 1
|
51
|
+
end
|
52
|
+
alphanum_array
|
53
|
+
end
|
54
|
+
|
55
|
+
# compress UUIDs array
|
56
|
+
def alphanum_compress(arr, alphanum_string, order)
|
57
|
+
# length of UUID in bits
|
58
|
+
uuid_bit_length = 128
|
59
|
+
|
60
|
+
# compress without delta
|
61
|
+
nresult = ''
|
62
|
+
alphanum_array = alphanum_to_array alphanum_string, false
|
63
|
+
pow = alphanum_array[0][2]
|
64
|
+
lowhi = alphanum_array[0][0]
|
65
|
+
|
66
|
+
# first bit equal 0 => compress without delta
|
67
|
+
achr = 0
|
68
|
+
rest = 1
|
69
|
+
|
70
|
+
# loop by UUIDs
|
71
|
+
arr.each do |item|
|
72
|
+
|
73
|
+
# remove '-' characters from UUID
|
74
|
+
curr = item.delete('-').to_i(16)
|
75
|
+
|
76
|
+
# get base binary code (BBC)
|
77
|
+
achr += (curr << rest)
|
78
|
+
|
79
|
+
# look for number of bits in BBC
|
80
|
+
rest += uuid_bit_length
|
81
|
+
|
82
|
+
# create symbols to compressed string
|
83
|
+
until rest < pow
|
84
|
+
|
85
|
+
power_c = pow - 1
|
86
|
+
code = (
|
87
|
+
(achr & (2**power_c - 1)) + 2**power_c
|
88
|
+
).to_s(2).reverse.to_i(2) >> 1
|
89
|
+
|
90
|
+
power_c += 1 if code >= lowhi
|
91
|
+
|
92
|
+
# decrease number of bits in BBC
|
93
|
+
rest -= power_c
|
94
|
+
|
95
|
+
# get reverse bits from the end of BBC to create new symbol
|
96
|
+
code = (
|
97
|
+
(achr & (2**power_c - 1)) + 2**power_c
|
98
|
+
).to_s(2).reverse.to_i(2) >> 1
|
99
|
+
|
100
|
+
# add new symbol
|
101
|
+
nresult += alphanum_array.assoc(code)[1]
|
102
|
+
|
103
|
+
# remove used bits from BBC
|
104
|
+
achr >>= power_c
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
# check if we have tail of BBC
|
109
|
+
if rest > 0
|
110
|
+
code = ((achr & (2**rest - 1)) + 2**rest).to_s(2).reverse.to_i(2) >> 1
|
111
|
+
code <<= pow - rest - 1
|
112
|
+
code <<= 1 if code >= lowhi
|
113
|
+
|
114
|
+
# add tail symbol
|
115
|
+
nresult += alphanum_array.assoc(code)[1]
|
116
|
+
end
|
117
|
+
|
118
|
+
# compress with delta
|
119
|
+
arr = arr.sort
|
120
|
+
|
121
|
+
# first character is delimiter => compress with delta : delimiter (last
|
122
|
+
# character in alphabet) always has code of all ones
|
123
|
+
dresult = alphanum_array[-1][1]
|
124
|
+
alphanum_array = alphanum_to_array alphanum_string, true
|
125
|
+
pow = alphanum_array[0][2]
|
126
|
+
|
127
|
+
if pow > 1
|
128
|
+
lowhi = alphanum_array[0][0]
|
129
|
+
prev = 0
|
130
|
+
|
131
|
+
# loop by UUIDs
|
132
|
+
arr.each do |item|
|
133
|
+
|
134
|
+
# remove '-' characters from UUID
|
135
|
+
curr = item.delete('-').to_i(16)
|
136
|
+
|
137
|
+
# calculate delta
|
138
|
+
curr -= prev
|
139
|
+
prev = item.delete('-').to_i(16)
|
140
|
+
binlog = bin_pow curr
|
141
|
+
binlog = uuid_bit_length if binlog >= uuid_bit_length - pow
|
142
|
+
|
143
|
+
# get BBC for only current UUID
|
144
|
+
achr = curr
|
145
|
+
|
146
|
+
# look for number of bits in BBC (also for only current UUID)
|
147
|
+
rest = binlog
|
148
|
+
|
149
|
+
# create symbols to compressed string
|
150
|
+
until rest < pow
|
151
|
+
power_c = pow - 1
|
152
|
+
code = (
|
153
|
+
(achr & (2**power_c - 1)) +
|
154
|
+
2**power_c
|
155
|
+
).to_s(2).reverse.to_i(2) >> 1
|
156
|
+
|
157
|
+
power_c += 1 if code >= lowhi
|
158
|
+
|
159
|
+
# decrease number of bits in BBC
|
160
|
+
rest -= power_c
|
161
|
+
|
162
|
+
# get reverse bits from the end of BBC to create new symbol
|
163
|
+
code = (
|
164
|
+
(achr & (2**power_c - 1)) +
|
165
|
+
2**power_c
|
166
|
+
).to_s(2).reverse.to_i(2) >> 1
|
167
|
+
|
168
|
+
# add new symbol
|
169
|
+
dresult += alphanum_array.assoc(code)[1]
|
170
|
+
|
171
|
+
# remove used bits from BBC
|
172
|
+
achr >>= power_c
|
173
|
+
end
|
174
|
+
|
175
|
+
# check if we have tail of BBC for current UUID
|
176
|
+
if rest > 0
|
177
|
+
code = (
|
178
|
+
(achr & (2**rest - 1)) + 2**rest
|
179
|
+
).to_s(2).reverse.to_i(2) >> 1
|
180
|
+
|
181
|
+
code <<= pow - rest - 1
|
182
|
+
code <<= 1 if code >= lowhi
|
183
|
+
|
184
|
+
# add tail symbol for current UUID
|
185
|
+
dresult += alphanum_array.assoc(code)[1]
|
186
|
+
end
|
187
|
+
|
188
|
+
# add delimiter if we use less symbols than for whole UUID
|
189
|
+
dresult += alphanum_array[0][1] if binlog < uuid_bit_length
|
190
|
+
end
|
191
|
+
else
|
192
|
+
order = true
|
193
|
+
end
|
194
|
+
|
195
|
+
result = nresult
|
196
|
+
|
197
|
+
# get better result or non delta if we need to keep order
|
198
|
+
result = dresult if dresult.length < nresult.length && !order
|
199
|
+
result
|
200
|
+
end
|
201
|
+
|
202
|
+
# decompress UUIDs array
|
203
|
+
def alphanum_decompress(str, alphanum_string)
|
204
|
+
# length of UUID in bits
|
205
|
+
uuid_bit_length = 128
|
206
|
+
|
207
|
+
result = []
|
208
|
+
alphanum_array = alphanum_to_array alphanum_string, false
|
209
|
+
|
210
|
+
# check if delta used when compress
|
211
|
+
if (
|
212
|
+
alphanum_array.rassoc(str[0])[0] &
|
213
|
+
(2**(alphanum_array.rassoc(str[0])[2] - 1))
|
214
|
+
) != 0
|
215
|
+
|
216
|
+
# delta used
|
217
|
+
alphanum_array = alphanum_to_array alphanum_string, true
|
218
|
+
pow = alphanum_array[0][2]
|
219
|
+
lowhi = alphanum_array[0][0]
|
220
|
+
prev = 0
|
221
|
+
item = 1
|
222
|
+
achr = 0
|
223
|
+
rest = 0
|
224
|
+
|
225
|
+
# loop by symbols of compressed string starting from second (the first is
|
226
|
+
# header) to next after last (for BBC length processing after last)
|
227
|
+
while item <= str.length
|
228
|
+
|
229
|
+
# we catch delimiter or we get BBC with length equal whole UUID
|
230
|
+
if str[item] == alphanum_array[0][1] || rest >= uuid_bit_length
|
231
|
+
|
232
|
+
# if BBC length than we need to look to current symbol one more time
|
233
|
+
# if it is delimiter
|
234
|
+
item -= 1 if rest >= uuid_bit_length
|
235
|
+
|
236
|
+
# calculate UUID from delta
|
237
|
+
achr += prev
|
238
|
+
prev = achr
|
239
|
+
|
240
|
+
# transform UUID to hexadecimal
|
241
|
+
curr = prev.to_s(16)
|
242
|
+
|
243
|
+
# add first characters if UUID start with 0
|
244
|
+
curr = '0' * (uuid_bit_length / 4 - curr.length) + curr
|
245
|
+
|
246
|
+
# add '-' characters from UUID
|
247
|
+
curr = [
|
248
|
+
curr[0..7],
|
249
|
+
curr[8..11],
|
250
|
+
curr[12..15],
|
251
|
+
curr[16..19],
|
252
|
+
curr[20..31],
|
253
|
+
].join('-')
|
254
|
+
|
255
|
+
# add new UUID to array
|
256
|
+
result.push curr
|
257
|
+
achr = 0
|
258
|
+
rest = 0
|
259
|
+
|
260
|
+
# if we become last symbol we need no to symbol processing
|
261
|
+
elsif item < str.length
|
262
|
+
|
263
|
+
# reverse symbol code to BBC bits
|
264
|
+
code = (
|
265
|
+
alphanum_array.rassoc(str[item])[0] + 2**alphanum_array.rassoc(str[item])[2]
|
266
|
+
).to_s(2).reverse.to_i(2) >> 1
|
267
|
+
|
268
|
+
# add bits to BBC
|
269
|
+
achr += code << rest
|
270
|
+
|
271
|
+
# look for number of bits in BBC
|
272
|
+
rest += pow
|
273
|
+
rest -= 1 if code < lowhi
|
274
|
+
|
275
|
+
end
|
276
|
+
|
277
|
+
item += 1
|
278
|
+
|
279
|
+
end
|
280
|
+
|
281
|
+
else
|
282
|
+
# delta not used
|
283
|
+
achr = 0
|
284
|
+
rest = 0
|
285
|
+
pow = alphanum_array[0][2]
|
286
|
+
lowhi = alphanum_array[0][0]
|
287
|
+
|
288
|
+
# for first bit processing
|
289
|
+
frst = true
|
290
|
+
item = 0
|
291
|
+
|
292
|
+
# loop by symbols of compressed string
|
293
|
+
while item < str.length
|
294
|
+
|
295
|
+
# reverse symbol code to BBC bits
|
296
|
+
code =
|
297
|
+
(
|
298
|
+
alphanum_array.rassoc(str[item])[0] +
|
299
|
+
2**alphanum_array.rassoc(str[item])[2]
|
300
|
+
)
|
301
|
+
.to_s(2).reverse.to_i(2) >> 1
|
302
|
+
|
303
|
+
# add bits to BBC
|
304
|
+
achr += code << rest
|
305
|
+
|
306
|
+
# look for number of bits in BBC
|
307
|
+
rest += pow
|
308
|
+
rest -= 1 if alphanum_array.rassoc(str[item])[0] < lowhi
|
309
|
+
|
310
|
+
# first bit processing
|
311
|
+
if frst
|
312
|
+
frst = false
|
313
|
+
achr >>= 1
|
314
|
+
rest -= 1
|
315
|
+
end
|
316
|
+
|
317
|
+
# we get BBC with length equal whole UUID
|
318
|
+
if rest >= uuid_bit_length
|
319
|
+
|
320
|
+
# calculate number of bits in BBC
|
321
|
+
rest -= uuid_bit_length
|
322
|
+
|
323
|
+
# transform UUID to hexadecimal
|
324
|
+
curr = (achr & (2**uuid_bit_length - 1)).to_s(16)
|
325
|
+
|
326
|
+
# add first characters if UUID start with 0
|
327
|
+
curr = '0' * (uuid_bit_length / 4 - curr.length) + curr
|
328
|
+
|
329
|
+
# add '-' characters from UUID
|
330
|
+
curr = [
|
331
|
+
curr[0..7],
|
332
|
+
curr[8..11],
|
333
|
+
curr[12..15],
|
334
|
+
curr[16..19],
|
335
|
+
curr[20..31],
|
336
|
+
].join('-')
|
337
|
+
|
338
|
+
# add new UUID to array
|
339
|
+
result.push curr
|
340
|
+
|
341
|
+
# remove used bits from BBC
|
342
|
+
achr >>= uuid_bit_length
|
343
|
+
|
344
|
+
end
|
345
|
+
|
346
|
+
item += 1
|
347
|
+
end
|
348
|
+
end
|
349
|
+
result
|
350
|
+
end
|
351
|
+
end
|
352
|
+
end
|