id_pack 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,395 @@
1
+ module IdPack
2
+
3
+ # This is a module to encode an integer array into our compressed format.
4
+ # Basically there are only 2 methods in this module, encode and decode.
5
+ #
6
+ # Usage:
7
+ # encode:
8
+ # a usual use case of encode is to provide the server with object ids
9
+ # that have already been fetched and hence we don't need their data to
10
+ # be returned
11
+ #
12
+ # Example:
13
+ #
14
+ # IdPack::IdPacker.encode([5, 6, 21, 23, 25]) # => "_F~C_P.V"
15
+ #
16
+ # decode:
17
+ # mainly used by the server to convert the compressed string back into
18
+ # the integer array
19
+ #
20
+ # Example:
21
+ #
22
+ # IdPack::IdPacker.decode("_F~C_P.V") # => [5, 6, 21, 23, 25]
23
+
24
+ class IdPacker
25
+
26
+ class InvalidEncodedCharException < StandardError; end
27
+
28
+ SPACES_PREFIX = '_'
29
+ BINARY_PREFIX = '.'
30
+ RANGE_PREFIX = '~'
31
+ WINDOW_SIZE = 10
32
+ EXCLUDE_NIL = true
33
+ ENCODED_NUMBER_CHARS = (('A'..'Z').to_a + ('a'..'z').to_a + ('0'..'9').to_a).join + '-'
34
+
35
+
36
+ # [5, 6, 21, 23, 25]
37
+ # => "_F~C_P.V"
38
+ def encode array, window_size = WINDOW_SIZE, exclude_nil = EXCLUDE_NIL, output_charset = ENCODED_NUMBER_CHARS
39
+ encoded_array = ''
40
+
41
+ ranges = convert_numbers_to_ranges array.uniq.sort
42
+ prev_end = 0
43
+ curr_start = 1
44
+ spaces = 0
45
+ group_with_prev = false
46
+ ranges_to_group = []
47
+ binary_number = ''
48
+ decimal_number = 0
49
+ encoded_string = ''
50
+
51
+ ranges.each_with_index do |range, i|
52
+ spaces = range.begin - prev_end
53
+
54
+ if group_with_prev
55
+ if range.end - curr_start + 1 == window_size
56
+ ranges_to_group << range
57
+ binary_number = convert_ranges_to_binary_number ranges_to_group
58
+ decimal_number = convert_binary_number_to_decimal_number binary_number
59
+ encoded_string = BINARY_PREFIX + encode_decimal_number(decimal_number, output_charset)
60
+ encoded_array += encoded_string
61
+ ranges_to_group = []
62
+ group_with_prev = false
63
+ elsif range.end - curr_start + 1 >= window_size
64
+ if ranges_to_group.length == 1
65
+ encoded_string = RANGE_PREFIX + encode_decimal_number(ranges_to_group.first.size, output_charset)
66
+ encoded_array += encoded_string
67
+ else
68
+ binary_number = convert_ranges_to_binary_number ranges_to_group
69
+ decimal_number = convert_binary_number_to_decimal_number binary_number
70
+ encoded_string = BINARY_PREFIX + encode_decimal_number(decimal_number, output_charset)
71
+ encoded_array += encoded_string
72
+ end
73
+ ranges_to_group = []
74
+ encoded_string = SPACES_PREFIX + encode_decimal_number(spaces, output_charset)
75
+ encoded_array += encoded_string
76
+
77
+ if range.size >= window_size
78
+ encoded_string = RANGE_PREFIX + encode_decimal_number(range.size, output_charset)
79
+ encoded_array += encoded_string
80
+ group_with_prev = false
81
+ else
82
+ ranges_to_group.push range
83
+ curr_start = range.begin
84
+ group_with_prev = true
85
+ end
86
+ else
87
+ ranges_to_group.push range
88
+ end
89
+ else
90
+ if spaces >= 0
91
+ encoded_string = SPACES_PREFIX + encode_decimal_number(spaces, output_charset)
92
+ encoded_array += encoded_string
93
+ end
94
+
95
+ if range.size >= window_size
96
+ encoded_string = RANGE_PREFIX + encode_decimal_number(range.size, output_charset)
97
+ encoded_array += encoded_string
98
+ else
99
+ ranges_to_group.push range
100
+ curr_start = range.begin
101
+ group_with_prev = true
102
+ end
103
+ end
104
+
105
+ prev_end = range.end
106
+ end
107
+
108
+ if ranges_to_group.length == 1
109
+ encoded_string = RANGE_PREFIX + encode_decimal_number(ranges_to_group.first.size, output_charset)
110
+ encoded_array += encoded_string
111
+ elsif ranges_to_group.length > 0
112
+ binary_number = convert_ranges_to_binary_number ranges_to_group
113
+ decimal_number = convert_binary_number_to_decimal_number binary_number
114
+ encoded_string = BINARY_PREFIX + encode_decimal_number(decimal_number, output_charset)
115
+ encoded_array += encoded_string
116
+ end
117
+
118
+ encoded_array
119
+ end
120
+
121
+ # "_F~C_P.V"
122
+ # => [5, 6, 21, 23, 25]
123
+ def decode(encoded_caches)
124
+ curr_encoded_string_prefix = nil
125
+
126
+ ids = []
127
+ start_id = 0
128
+ encoded_number = ''
129
+
130
+ encoded_caches.each_char do |c|
131
+ if [SPACES_PREFIX, BINARY_PREFIX, RANGE_PREFIX].include?(c)
132
+ unless curr_encoded_string_prefix == nil
133
+ ids_to_include, end_id = convert_encoded_number_to_ids(curr_encoded_string_prefix, encoded_number, start_id)
134
+ ids = ids.concat(ids_to_include)
135
+ start_id = end_id + (c == SPACES_PREFIX ? 0 : 1)
136
+ end
137
+ curr_encoded_string_prefix = c
138
+ encoded_number = ''
139
+ else
140
+ encoded_number = encoded_number + c
141
+ end
142
+
143
+ end
144
+
145
+ unless curr_encoded_string_prefix == nil
146
+ ids_to_include, end_id = convert_encoded_number_to_ids(curr_encoded_string_prefix, encoded_number, start_id)
147
+ ids = ids.concat(ids_to_include)
148
+ start_id = end_id + 1
149
+ end
150
+
151
+ ids
152
+ rescue InvalidEncodedCharException
153
+ # corrupted encoded_caches, assume nothing cached
154
+ []
155
+ end
156
+
157
+ # Input: id_synced_at:
158
+ # {
159
+ # 1 => synced_at_1_timestamp,
160
+ # 2 => synced_at_2_timestamp,
161
+ # 10 => synced_at_10_timestamp, ...
162
+ # }
163
+ #
164
+ # Expected output of sync_str:
165
+ # min_last_synced_at,\
166
+ # "encoded_0",diff_last_synced_at_0,\
167
+ # "encoded_1",diff_last_synced_at_1,\
168
+ # "encoded_2",diff_last_synced_at_2, ...
169
+ def encode_sync_str id_synced_at
170
+ min_synced_at = id_synced_at.values.min
171
+ encoded_min_synced_at = LZString.compress_to_encoded_uri_component(min_synced_at.to_s)
172
+
173
+ grouped_synced_at = id_synced_at.group_by do |id, synced_at|
174
+ synced_at
175
+ end
176
+
177
+ grouped_synced_at.inject([encoded_min_synced_at]) do |sync_str_arr, (synced_at, ids_group)|
178
+ ids = ids_group.map do |id_group|
179
+ int_id = id_group[0].to_s.to_i
180
+
181
+ int_id && int_id.to_s == id_group[0].to_s ?
182
+ int_id :
183
+ id_group[0].to_s
184
+ end
185
+
186
+ joined_ids = ids.first.is_a?(String) ? ids.join("").gsub(/-/, "") : ids.join(",")
187
+
188
+ encoded_indices = LZString.compress_to_encoded_uri_component(joined_ids)
189
+ diff_synced_at = synced_at - min_synced_at
190
+ encoded_diff_synced_at = LZString.compress_to_encoded_uri_component(diff_synced_at.to_s)
191
+
192
+ sync_str_arr << "#{encoded_indices},#{encoded_diff_synced_at}"
193
+ end.join(",")
194
+ end
195
+
196
+ def decode_sync_str sync_str, base_timestamp = 0
197
+ # format of sync_str:
198
+ # min_last_synced_at,
199
+ # "encoded_0", diff_last_requested_at_0,
200
+ # "encoded_1", diff_last_requested_at_1,
201
+ # "encoded_2", diff_last_requested_at_2, ...
202
+
203
+ sync_str = sync_str.encode('UTF-8', 'UTF-8', :invalid => :replace)
204
+
205
+ encoded_min_last_synced_at, *encoded_ranges = sync_str.split(',')
206
+ min_last_synced_at = LZString.decompress_from_encoded_uri_component(encoded_min_last_synced_at).to_i
207
+
208
+ grouped_encoded_ranges = encoded_ranges.inject([]) do |grouped, encoded_range|
209
+ grouped << [] if grouped.last.nil? || grouped.last.length >= 2
210
+ grouped.last << encoded_range
211
+ grouped
212
+ end
213
+
214
+ grouped_encoded_ranges.inject({}) do |synced_at_map, (encoded_caches, encoded_diff_last_synced_at)|
215
+ primary_keys_str = LZString.decompress_from_encoded_uri_component(encoded_caches)
216
+ primary_keys = primary_keys_str.split(",")
217
+
218
+ if primary_keys.first.to_i.to_s == primary_keys.first
219
+ primary_keys.map!(&:to_i)
220
+ else
221
+ primary_keys = primary_keys_str.scan(/.{32}/).map do |uuid_str|
222
+ [uuid_str[0,8], uuid_str[8,4], uuid_str[12,4], uuid_str[16,4], uuid_str[20,16]].join("-")
223
+ end
224
+ end
225
+
226
+ diff_last_synced_at = LZString.decompress_from_encoded_uri_component(encoded_diff_last_synced_at).to_i
227
+ last_synced_at = min_last_synced_at + diff_last_synced_at + base_timestamp
228
+
229
+ primary_keys.each do |key|
230
+ synced_at_map[key] = [synced_at_map[key], last_synced_at].compact.max
231
+ end
232
+
233
+ synced_at_map
234
+ end
235
+ rescue
236
+ # invalid sync_str, return empty map
237
+ {}
238
+ end
239
+
240
+
241
+ private
242
+
243
+ # [1,2,3,6,7,8]
244
+ # => [1..3, 6..8]
245
+ def convert_numbers_to_ranges numbers
246
+ return [] unless numbers.length > 0
247
+
248
+ ranges = []
249
+ range = nil
250
+
251
+ numbers.each_with_index do |number, i|
252
+ range = Range.new(
253
+ (
254
+ range && number == numbers[i - 1] + 1 ?
255
+ range.begin :
256
+ number
257
+ ),
258
+ number
259
+ )
260
+
261
+ ranges << range unless numbers[i + 1] && numbers[i + 1] == number + 1
262
+ end
263
+
264
+ ranges
265
+ end
266
+
267
+ # [1..3, 6..8]
268
+ # => "11100111"
269
+ def convert_ranges_to_binary_number ranges
270
+ binary_number = ''
271
+
272
+ ranges.each_with_index do |range, i|
273
+ binary_number += '0' * (range.begin - ranges[i - 1].end - 1) if i > 0
274
+ binary_number += '1' * (range.end - range.begin + 1)
275
+ end
276
+
277
+ binary_number
278
+ end
279
+
280
+ # "10101"
281
+ # => 21
282
+ def convert_binary_number_to_decimal_number binary_number
283
+ decimal_number = 0
284
+
285
+ binary_number.length.times do |i|
286
+ decimal_number += 2 ** (binary_number.length - i - 1) * binary_number[i].to_i
287
+ end
288
+
289
+ decimal_number
290
+ end
291
+
292
+ # 5
293
+ # => F"
294
+ def encode_decimal_number decimal_number, output_charset = ENCODED_NUMBER_CHARS
295
+ return nil if !decimal_number.is_a?(Integer) || decimal_number < 0
296
+
297
+ encoded_number = ""
298
+ base = output_charset.length
299
+ quotient = decimal_number
300
+ remainder = nil
301
+
302
+ while true do
303
+ remainder = quotient % base
304
+ encoded_number = output_charset[remainder] + encoded_number
305
+ quotient = (quotient - remainder) / base
306
+ break if quotient == 0
307
+ end
308
+
309
+ encoded_number
310
+ end
311
+ alias_method :encode_integer, :encode_decimal_number
312
+
313
+ # 21
314
+ # => "10101"
315
+ def convert_decimal_number_to_binary_number(decimal_number)
316
+ binary_number = ""
317
+ base = 2
318
+ quotient = decimal_number
319
+ remainder = 0
320
+
321
+ while quotient != 0
322
+ remainder = quotient % base
323
+ binary_number = remainder.to_s + binary_number
324
+ quotient = (quotient - remainder) / base
325
+ end
326
+
327
+ binary_number
328
+ end
329
+
330
+ # "F"
331
+ # => 5
332
+ def convert_encoded_number_to_decimal_number(encoded_number)
333
+ decimal_number = 0
334
+ index = 0
335
+
336
+ encoded_number.each_char do |c|
337
+ char_index = ENCODED_NUMBER_CHARS.index(c)
338
+
339
+ # current char not found in chars, implies corrupted encoded_caches
340
+ raise InvalidEncodedCharException if char_index.nil?
341
+
342
+ decimal_number += ENCODED_NUMBER_CHARS.length**(encoded_number.length - index - 1) * char_index
343
+ index += 1
344
+ end
345
+
346
+ decimal_number
347
+ end
348
+ alias_method :decode_integer, :convert_encoded_number_to_decimal_number
349
+
350
+ # encoded_string_prefix, encoded_number, start_id
351
+ # => [ids_to_include, end_id]
352
+ #
353
+ # "_", "E", 1
354
+ # => [[], 4]
355
+ #
356
+ # "~", "C", 5
357
+ # => [[5, 6], 6]
358
+ #
359
+ # "_", "O", 7
360
+ # => [[], 20]
361
+ #
362
+ # ".", "V", 21
363
+ # => [[21, 23, 25], 25]
364
+ def convert_encoded_number_to_ids(encoded_string_prefix, encoded_number, start_id)
365
+ ids = []
366
+
367
+ case encoded_string_prefix
368
+ when SPACES_PREFIX
369
+ decimal_number = convert_encoded_number_to_decimal_number(encoded_number)
370
+ end_id = start_id + decimal_number - 1
371
+ when BINARY_PREFIX
372
+ decimal_number = convert_encoded_number_to_decimal_number(encoded_number)
373
+ binary_number = convert_decimal_number_to_binary_number(decimal_number)
374
+ id = start_id
375
+ binary_number.each_char do |c|
376
+ if c == '1'
377
+ ids << id
378
+ end
379
+ id = id + 1
380
+ end
381
+ end_id = id - 1
382
+ when RANGE_PREFIX
383
+ decimal_number = convert_encoded_number_to_decimal_number(encoded_number)
384
+ (start_id..(start_id + decimal_number - 1)).each do |id|
385
+ ids << id
386
+ end
387
+ end_id = start_id + decimal_number - 1
388
+ end
389
+
390
+ [ ids, end_id ]
391
+ end
392
+
393
+ end
394
+
395
+ end