id_pack 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,395 @@
1
+ module IdPack
2
+
3
+ # This is a module to encode an integer array into our compressed format.
4
+ # Basically there are only 2 methods in this module, encode and decode.
5
+ #
6
+ # Usage:
7
+ # encode:
8
+ # a usual use case of encode is to provide the server with object ids
9
+ # that have already been fetched and hence we don't need their data to
10
+ # be returned
11
+ #
12
+ # Example:
13
+ #
14
+ # IdPack::IdPacker.encode([5, 6, 21, 23, 25]) # => "_F~C_P.V"
15
+ #
16
+ # decode:
17
+ # mainly used by the server to convert the compressed string back into
18
+ # the integer array
19
+ #
20
+ # Example:
21
+ #
22
+ # IdPack::IdPacker.decode("_F~C_P.V") # => [5, 6, 21, 23, 25]
23
+
24
+ class IdPacker
25
+
26
+ class InvalidEncodedCharException < StandardError; end
27
+
28
+ SPACES_PREFIX = '_'
29
+ BINARY_PREFIX = '.'
30
+ RANGE_PREFIX = '~'
31
+ WINDOW_SIZE = 10
32
+ EXCLUDE_NIL = true
33
+ ENCODED_NUMBER_CHARS = (('A'..'Z').to_a + ('a'..'z').to_a + ('0'..'9').to_a).join + '-'
34
+
35
+
36
+ # [5, 6, 21, 23, 25]
37
+ # => "_F~C_P.V"
38
+ def encode array, window_size = WINDOW_SIZE, exclude_nil = EXCLUDE_NIL, output_charset = ENCODED_NUMBER_CHARS
39
+ encoded_array = ''
40
+
41
+ ranges = convert_numbers_to_ranges array.uniq.sort
42
+ prev_end = 0
43
+ curr_start = 1
44
+ spaces = 0
45
+ group_with_prev = false
46
+ ranges_to_group = []
47
+ binary_number = ''
48
+ decimal_number = 0
49
+ encoded_string = ''
50
+
51
+ ranges.each_with_index do |range, i|
52
+ spaces = range.begin - prev_end
53
+
54
+ if group_with_prev
55
+ if range.end - curr_start + 1 == window_size
56
+ ranges_to_group << range
57
+ binary_number = convert_ranges_to_binary_number ranges_to_group
58
+ decimal_number = convert_binary_number_to_decimal_number binary_number
59
+ encoded_string = BINARY_PREFIX + encode_decimal_number(decimal_number, output_charset)
60
+ encoded_array += encoded_string
61
+ ranges_to_group = []
62
+ group_with_prev = false
63
+ elsif range.end - curr_start + 1 >= window_size
64
+ if ranges_to_group.length == 1
65
+ encoded_string = RANGE_PREFIX + encode_decimal_number(ranges_to_group.first.size, output_charset)
66
+ encoded_array += encoded_string
67
+ else
68
+ binary_number = convert_ranges_to_binary_number ranges_to_group
69
+ decimal_number = convert_binary_number_to_decimal_number binary_number
70
+ encoded_string = BINARY_PREFIX + encode_decimal_number(decimal_number, output_charset)
71
+ encoded_array += encoded_string
72
+ end
73
+ ranges_to_group = []
74
+ encoded_string = SPACES_PREFIX + encode_decimal_number(spaces, output_charset)
75
+ encoded_array += encoded_string
76
+
77
+ if range.size >= window_size
78
+ encoded_string = RANGE_PREFIX + encode_decimal_number(range.size, output_charset)
79
+ encoded_array += encoded_string
80
+ group_with_prev = false
81
+ else
82
+ ranges_to_group.push range
83
+ curr_start = range.begin
84
+ group_with_prev = true
85
+ end
86
+ else
87
+ ranges_to_group.push range
88
+ end
89
+ else
90
+ if spaces >= 0
91
+ encoded_string = SPACES_PREFIX + encode_decimal_number(spaces, output_charset)
92
+ encoded_array += encoded_string
93
+ end
94
+
95
+ if range.size >= window_size
96
+ encoded_string = RANGE_PREFIX + encode_decimal_number(range.size, output_charset)
97
+ encoded_array += encoded_string
98
+ else
99
+ ranges_to_group.push range
100
+ curr_start = range.begin
101
+ group_with_prev = true
102
+ end
103
+ end
104
+
105
+ prev_end = range.end
106
+ end
107
+
108
+ if ranges_to_group.length == 1
109
+ encoded_string = RANGE_PREFIX + encode_decimal_number(ranges_to_group.first.size, output_charset)
110
+ encoded_array += encoded_string
111
+ elsif ranges_to_group.length > 0
112
+ binary_number = convert_ranges_to_binary_number ranges_to_group
113
+ decimal_number = convert_binary_number_to_decimal_number binary_number
114
+ encoded_string = BINARY_PREFIX + encode_decimal_number(decimal_number, output_charset)
115
+ encoded_array += encoded_string
116
+ end
117
+
118
+ encoded_array
119
+ end
120
+
121
+ # "_F~C_P.V"
122
+ # => [5, 6, 21, 23, 25]
123
+ def decode(encoded_caches)
124
+ curr_encoded_string_prefix = nil
125
+
126
+ ids = []
127
+ start_id = 0
128
+ encoded_number = ''
129
+
130
+ encoded_caches.each_char do |c|
131
+ if [SPACES_PREFIX, BINARY_PREFIX, RANGE_PREFIX].include?(c)
132
+ unless curr_encoded_string_prefix == nil
133
+ ids_to_include, end_id = convert_encoded_number_to_ids(curr_encoded_string_prefix, encoded_number, start_id)
134
+ ids = ids.concat(ids_to_include)
135
+ start_id = end_id + (c == SPACES_PREFIX ? 0 : 1)
136
+ end
137
+ curr_encoded_string_prefix = c
138
+ encoded_number = ''
139
+ else
140
+ encoded_number = encoded_number + c
141
+ end
142
+
143
+ end
144
+
145
+ unless curr_encoded_string_prefix == nil
146
+ ids_to_include, end_id = convert_encoded_number_to_ids(curr_encoded_string_prefix, encoded_number, start_id)
147
+ ids = ids.concat(ids_to_include)
148
+ start_id = end_id + 1
149
+ end
150
+
151
+ ids
152
+ rescue InvalidEncodedCharException
153
+ # corrupted encoded_caches, assume nothing cached
154
+ []
155
+ end
156
+
157
+ # Input: id_synced_at:
158
+ # {
159
+ # 1 => synced_at_1_timestamp,
160
+ # 2 => synced_at_2_timestamp,
161
+ # 10 => synced_at_10_timestamp, ...
162
+ # }
163
+ #
164
+ # Expected output of sync_str:
165
+ # min_last_synced_at,\
166
+ # "encoded_0",diff_last_synced_at_0,\
167
+ # "encoded_1",diff_last_synced_at_1,\
168
+ # "encoded_2",diff_last_synced_at_2, ...
169
+ def encode_sync_str id_synced_at
170
+ min_synced_at = id_synced_at.values.min
171
+ encoded_min_synced_at = LZString.compress_to_encoded_uri_component(min_synced_at.to_s)
172
+
173
+ grouped_synced_at = id_synced_at.group_by do |id, synced_at|
174
+ synced_at
175
+ end
176
+
177
+ grouped_synced_at.inject([encoded_min_synced_at]) do |sync_str_arr, (synced_at, ids_group)|
178
+ ids = ids_group.map do |id_group|
179
+ int_id = id_group[0].to_s.to_i
180
+
181
+ int_id && int_id.to_s == id_group[0].to_s ?
182
+ int_id :
183
+ id_group[0].to_s
184
+ end
185
+
186
+ joined_ids = ids.first.is_a?(String) ? ids.join("").gsub(/-/, "") : ids.join(",")
187
+
188
+ encoded_indices = LZString.compress_to_encoded_uri_component(joined_ids)
189
+ diff_synced_at = synced_at - min_synced_at
190
+ encoded_diff_synced_at = LZString.compress_to_encoded_uri_component(diff_synced_at.to_s)
191
+
192
+ sync_str_arr << "#{encoded_indices},#{encoded_diff_synced_at}"
193
+ end.join(",")
194
+ end
195
+
196
+ def decode_sync_str sync_str, base_timestamp = 0
197
+ # format of sync_str:
198
+ # min_last_synced_at,
199
+ # "encoded_0", diff_last_requested_at_0,
200
+ # "encoded_1", diff_last_requested_at_1,
201
+ # "encoded_2", diff_last_requested_at_2, ...
202
+
203
+ sync_str = sync_str.encode('UTF-8', 'UTF-8', :invalid => :replace)
204
+
205
+ encoded_min_last_synced_at, *encoded_ranges = sync_str.split(',')
206
+ min_last_synced_at = LZString.decompress_from_encoded_uri_component(encoded_min_last_synced_at).to_i
207
+
208
+ grouped_encoded_ranges = encoded_ranges.inject([]) do |grouped, encoded_range|
209
+ grouped << [] if grouped.last.nil? || grouped.last.length >= 2
210
+ grouped.last << encoded_range
211
+ grouped
212
+ end
213
+
214
+ grouped_encoded_ranges.inject({}) do |synced_at_map, (encoded_caches, encoded_diff_last_synced_at)|
215
+ primary_keys_str = LZString.decompress_from_encoded_uri_component(encoded_caches)
216
+ primary_keys = primary_keys_str.split(",")
217
+
218
+ if primary_keys.first.to_i.to_s == primary_keys.first
219
+ primary_keys.map!(&:to_i)
220
+ else
221
+ primary_keys = primary_keys_str.scan(/.{32}/).map do |uuid_str|
222
+ [uuid_str[0,8], uuid_str[8,4], uuid_str[12,4], uuid_str[16,4], uuid_str[20,16]].join("-")
223
+ end
224
+ end
225
+
226
+ diff_last_synced_at = LZString.decompress_from_encoded_uri_component(encoded_diff_last_synced_at).to_i
227
+ last_synced_at = min_last_synced_at + diff_last_synced_at + base_timestamp
228
+
229
+ primary_keys.each do |key|
230
+ synced_at_map[key] = [synced_at_map[key], last_synced_at].compact.max
231
+ end
232
+
233
+ synced_at_map
234
+ end
235
+ rescue
236
+ # invalid sync_str, return empty map
237
+ {}
238
+ end
239
+
240
+
241
+ private
242
+
243
+ # [1,2,3,6,7,8]
244
+ # => [1..3, 6..8]
245
+ def convert_numbers_to_ranges numbers
246
+ return [] unless numbers.length > 0
247
+
248
+ ranges = []
249
+ range = nil
250
+
251
+ numbers.each_with_index do |number, i|
252
+ range = Range.new(
253
+ (
254
+ range && number == numbers[i - 1] + 1 ?
255
+ range.begin :
256
+ number
257
+ ),
258
+ number
259
+ )
260
+
261
+ ranges << range unless numbers[i + 1] && numbers[i + 1] == number + 1
262
+ end
263
+
264
+ ranges
265
+ end
266
+
267
+ # [1..3, 6..8]
268
+ # => "11100111"
269
+ def convert_ranges_to_binary_number ranges
270
+ binary_number = ''
271
+
272
+ ranges.each_with_index do |range, i|
273
+ binary_number += '0' * (range.begin - ranges[i - 1].end - 1) if i > 0
274
+ binary_number += '1' * (range.end - range.begin + 1)
275
+ end
276
+
277
+ binary_number
278
+ end
279
+
280
+ # "10101"
281
+ # => 21
282
+ def convert_binary_number_to_decimal_number binary_number
283
+ decimal_number = 0
284
+
285
+ binary_number.length.times do |i|
286
+ decimal_number += 2 ** (binary_number.length - i - 1) * binary_number[i].to_i
287
+ end
288
+
289
+ decimal_number
290
+ end
291
+
292
+ # 5
293
+ # => F"
294
+ def encode_decimal_number decimal_number, output_charset = ENCODED_NUMBER_CHARS
295
+ return nil if !decimal_number.is_a?(Integer) || decimal_number < 0
296
+
297
+ encoded_number = ""
298
+ base = output_charset.length
299
+ quotient = decimal_number
300
+ remainder = nil
301
+
302
+ while true do
303
+ remainder = quotient % base
304
+ encoded_number = output_charset[remainder] + encoded_number
305
+ quotient = (quotient - remainder) / base
306
+ break if quotient == 0
307
+ end
308
+
309
+ encoded_number
310
+ end
311
+ alias_method :encode_integer, :encode_decimal_number
312
+
313
+ # 21
314
+ # => "10101"
315
+ def convert_decimal_number_to_binary_number(decimal_number)
316
+ binary_number = ""
317
+ base = 2
318
+ quotient = decimal_number
319
+ remainder = 0
320
+
321
+ while quotient != 0
322
+ remainder = quotient % base
323
+ binary_number = remainder.to_s + binary_number
324
+ quotient = (quotient - remainder) / base
325
+ end
326
+
327
+ binary_number
328
+ end
329
+
330
+ # "F"
331
+ # => 5
332
+ def convert_encoded_number_to_decimal_number(encoded_number)
333
+ decimal_number = 0
334
+ index = 0
335
+
336
+ encoded_number.each_char do |c|
337
+ char_index = ENCODED_NUMBER_CHARS.index(c)
338
+
339
+ # current char not found in chars, implies corrupted encoded_caches
340
+ raise InvalidEncodedCharException if char_index.nil?
341
+
342
+ decimal_number += ENCODED_NUMBER_CHARS.length**(encoded_number.length - index - 1) * char_index
343
+ index += 1
344
+ end
345
+
346
+ decimal_number
347
+ end
348
+ alias_method :decode_integer, :convert_encoded_number_to_decimal_number
349
+
350
+ # encoded_string_prefix, encoded_number, start_id
351
+ # => [ids_to_include, end_id]
352
+ #
353
+ # "_", "E", 1
354
+ # => [[], 4]
355
+ #
356
+ # "~", "C", 5
357
+ # => [[5, 6], 6]
358
+ #
359
+ # "_", "O", 7
360
+ # => [[], 20]
361
+ #
362
+ # ".", "V", 21
363
+ # => [[21, 23, 25], 25]
364
+ def convert_encoded_number_to_ids(encoded_string_prefix, encoded_number, start_id)
365
+ ids = []
366
+
367
+ case encoded_string_prefix
368
+ when SPACES_PREFIX
369
+ decimal_number = convert_encoded_number_to_decimal_number(encoded_number)
370
+ end_id = start_id + decimal_number - 1
371
+ when BINARY_PREFIX
372
+ decimal_number = convert_encoded_number_to_decimal_number(encoded_number)
373
+ binary_number = convert_decimal_number_to_binary_number(decimal_number)
374
+ id = start_id
375
+ binary_number.each_char do |c|
376
+ if c == '1'
377
+ ids << id
378
+ end
379
+ id = id + 1
380
+ end
381
+ end_id = id - 1
382
+ when RANGE_PREFIX
383
+ decimal_number = convert_encoded_number_to_decimal_number(encoded_number)
384
+ (start_id..(start_id + decimal_number - 1)).each do |id|
385
+ ids << id
386
+ end
387
+ end_id = start_id + decimal_number - 1
388
+ end
389
+
390
+ [ ids, end_id ]
391
+ end
392
+
393
+ end
394
+
395
+ end