encoded_id 1.0.0.rc4 → 1.0.0.rc5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: '0844f07baeecd48a70bf0b2564dfb9f02998b002279b44bf75bd0172ccc265be'
4
- data.tar.gz: 0bcd81de8a4ea7e034097a92dff9ba82478d92f87f7820dda872a9f35b5294d3
3
+ metadata.gz: b93ebfafa557cd45d9158fa850a474cb3f56de3255b650aeed11bbc131b67ed2
4
+ data.tar.gz: a3a9feb5004a637c6190ea3d969e03de6066d3b94e7b9b54d8be1c639b8b17af
5
5
  SHA512:
6
- metadata.gz: 4dcf3c99891f8a3a195d188656d1c45c9471760dfbc715fe994fac4c4145a086d9573b663c5a4a79894c71c4e3a15bf68ff0d30b20e44b8fe661c68f1707c065
7
- data.tar.gz: 8edb3675d565ff7be88441fcdb2a55783415f170fcd8d9102c6bb0fff8cc6fa0d22f1af19e72dd5558dedb37ba2af23c29fd6171d12da2032a0e2ce187bedeca
6
+ metadata.gz: 3f06797187a84df0d6436868d82d9d096220e88e9087bad13a78cc766e09681ffdd1db73daa7f9af90288e988b0e4034881b1fc0f7c9129bc6b64a9b8e332589
7
+ data.tar.gz: 764f505dfb1209bb358784906cfc648bdfee6cf58d79fb5872105d1234ff2839d072b3e899176f4e765f69caccabc00d9d105c9bf484ce2ccf170fdc594c7f44
@@ -0,0 +1,9 @@
1
+ # Make sure RUBY_VERSION matches the Ruby version in .ruby-version or gemspec
2
+ ARG RUBY_VERSION=3.4.2
3
+ FROM ghcr.io/rails/devcontainer/images/ruby:$RUBY_VERSION
4
+
5
+ USER vscode
6
+
7
+ # Ensure binding is always 0.0.0.0
8
+ # Binds the server to all IP addresses of the container, so it can be accessed from outside the container.
9
+ ENV BINDING="0.0.0.0"
@@ -0,0 +1,8 @@
1
+ name: "encoded_id"
2
+
3
+ services:
4
+ encoded-id-dev-env:
5
+ container_name: encoded-id-dev-env
6
+ build:
7
+ context: ..
8
+ dockerfile: .devcontainer/Dockerfile
@@ -0,0 +1,8 @@
1
+ {
2
+ "name": "Encoded ID Gem Development",
3
+ "dockerComposeFile": "compose.yml",
4
+ "service": "encoded-id-dev-env",
5
+ "postCreateCommand": "bundle install",
6
+ "postStartCommand": "bundle exec rake test",
7
+ "remoteUser": "vscode"
8
+ }
data/.standard.yml ADDED
@@ -0,0 +1,2 @@
1
+ parallel: true # default: false
2
+ ruby_version: 3.3
data/CHANGELOG.md CHANGED
@@ -4,11 +4,18 @@
4
4
 
5
5
  ### Breaking changes
6
6
 
7
- - `ReversibleId` now no longer downcases the encodedid input string by default on decode, ie the `decode` option `downcase` is now `false`. In a future release the `downcase` option will be removed.
7
+ - `ReversibleId` now no longer downcases the encodedid input string by default on decode, ie the `decode` option `downcase` is now `false`. In a future release the `downcase` option will be removed. Generation of the encoded ID is 1.5 times faster and uses less memory.
8
8
 
9
- ## [1.0.0.rc4] - unreleased
9
+ ## [1.0.0] - unreleased
10
+
11
+ ## [1.0.0.rc5] - 2025-04-09
12
+
13
+ - `encoded_id` now uses its own implementation of `hashids` which is more efficient and has a smaller memory footprint. This massively reduces the GC churn in high-throughput applications. This is an implementation based on the original `hashids` gem but with many optimisations and improvements. Functionally it is identical to the original `hashids` gem.
14
+
15
+ ## [1.0.0.rc4] - 2024-04-29
10
16
 
11
17
  - Add an optional `max_inputs_per_id` argument to `ReversibleId`, thanks to [@avcwisesa](https://github.com/avcwisesa)
18
+ - The option `split_with:` can also now be set to nil to disable splitting of the encoded ID string
12
19
 
13
20
  ## [1.0.0.rc3] - 2023-10-23
14
21
 
data/Gemfile CHANGED
@@ -5,15 +5,15 @@ source "https://rubygems.org"
5
5
  # Specify your gem's dependencies in encoded_id.gemspec
6
6
  gemspec
7
7
 
8
- gem "rake", "~> 13.0"
8
+ gem "rake"
9
9
 
10
10
  gem "minitest"
11
11
 
12
- gem "standard", "~> 1.30"
12
+ gem "standard"
13
13
 
14
- gem "rbs"
15
-
16
- gem "steep"
14
+ # gem "rbs"
15
+ #
16
+ # gem "steep"
17
17
 
18
18
  gem "simplecov"
19
19
 
@@ -22,3 +22,15 @@ gem "benchmark-ips"
22
22
  gem "benchmark-memory"
23
23
 
24
24
  gem "fuzzbert"
25
+
26
+ gem "singed"
27
+
28
+ gem "memory_profiler"
29
+
30
+ gem "hashids" # For benchmarking against
31
+
32
+ gem "base64"
33
+
34
+ # gem "pf2", require: false, github: "osyoyu/pf2", branch: "main"
35
+
36
+ # gem "vernier", require: false
data/LICENSE.txt CHANGED
@@ -1,6 +1,6 @@
1
1
  The MIT License (MIT)
2
2
 
3
- Copyright (c) 2022 Stephen Ierodiaconou
3
+ Copyright (c) 2022-2024 Stephen Ierodiaconou
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal
data/README.md CHANGED
@@ -33,7 +33,7 @@ coder.decode("z2j7-Odmw") # (note the capital 'o' instead of zero)
33
33
 
34
34
  ## Features
35
35
 
36
- * 🔄 encoded IDs are reversible (uses with https://hashids.org))
36
+ * 🔄 encoded IDs are reversible (uses Hashids, the old site is here https://github.com/hashids/hashids.github.io))
37
37
  * 👥 supports multiple IDs encoded in one encoded string (eg `7aq6-0zqw` decodes to `[78, 45]`)
38
38
  * 🔡 supports custom alphabets for the encoded string (at least 16 characters needed)
39
39
  - by default uses a variation of the Crockford reduced character set (https://www.crockford.com/base32.html)
@@ -51,9 +51,12 @@ I aim for 100% test coverage and have fuzz tested quite extensively. But please
51
51
 
52
52
  * support for encoding of hex strings (eg UUIDs), including multiple IDs encoded in one string
53
53
 
54
- ### Coming soon
54
+ ### Performance and benchmarking
55
+
56
+ This gem uses a custom HashId implementation that is significantly faster and more memory-efficient than the original `hashids` gem.
57
+
58
+ For detailed benchmarks and performance metrics, see the [Custom HashId Implementation](#custom-hashid-implementation) section at the end of this README.
55
59
 
56
- Performance improvements and benchmarking!
57
60
 
58
61
  ### Rails support `encoded_id-rails`
59
62
 
@@ -208,6 +211,8 @@ For readability, the encoded string can be split into groups of characters.
208
211
 
209
212
  `split_with`: specifies the separator to use between the groups. Default is `-`.
210
213
 
214
+ Set either to `nil` to disable splitting.
215
+
211
216
  ### `hex_digit_encoding_group_size`
212
217
 
213
218
  **Experimental**
@@ -333,5 +338,47 @@ Bug reports and pull requests are welcome on GitHub at https://github.com/steveg
333
338
 
334
339
  The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
335
340
 
341
+ ## Custom HashId Implementation
342
+
343
+ Internally, `encoded_id` uses its own HashId implementation (`EncodedId::HashId`) instead of the original `hashids` gem. This custom implementation was created to improve both performance and memory usage.
344
+
345
+ Recent benchmarks show significant improvements:
346
+
347
+ ### Performance Comparison
348
+
349
+ ```
350
+ | Test | Hashids (i/s) | EncodedId::HashId (i/s) | Speedup |
351
+ | ------------------------- | ------------ | --------------------- | ------- |
352
+ | #encode - 1 ID | 131,000.979 | 197,586.231 | 1.51x |
353
+ | #decode - 1 ID | 65,791.334 | 92,425.571 | 1.40x |
354
+ | #encode - 10 IDs | 13,773.355 | 20,669.715 | 1.50x |
355
+ | #decode - 10 IDs | 6,911.872 | 9,990.078 | 1.45x |
356
+ | #encode w YJIT - 1 ID | 265,764.969 | 877,551.362 | 3.30x |
357
+ | #decode w YJIT - 1 ID | 130,154.837 | 348,000.817 | 2.67x |
358
+ | #encode w YJIT - 10 IDs | 27,966.457 | 100,461.237 | 3.59x |
359
+ | #decode w YJIT - 10 IDs | 14,187.346 | 43,974.011 | 3.10x |
360
+ | #encode w YJIT - 1000 IDs | 268.140 | 1,077.855 | 4.02x |
361
+ | #decode w YJIT - 1000 IDs | 136.217 | 464.579 | 3.41x |
362
+ ```
363
+
364
+ With YJIT enabled, the performance improvements are even more significant, with up to 4x faster operation for large inputs.
365
+
366
+ ### Memory Usage Comparison
367
+
368
+ ```
369
+ | Test | Implementation | Allocated Memory | Allocated Objects | Memory Reduction |
370
+ | ------------------- | ---------------- | ---------------- | ----------------- | ---------------- |
371
+ | encode small input | Hashids | 7.28 KB | 120 | - |
372
+ | | EncodedId::HashId | 920 B | 6 | 87.66% |
373
+ | encode large input | Hashids | 403.36 KB | 5998 | - |
374
+ | | EncodedId::HashId | 8.36 KB | 104 | 97.93% |
375
+ | decode large input | Hashids | 366.88 KB | 5761 | - |
376
+ | | EncodedId::HashId | 14.63 KB | 264 | 96.01% |
377
+ ```
378
+
379
+ The memory usage improvements are dramatic, with up to 98% reduction in memory allocation for large inputs.
380
+
381
+ Run `bin/are_we_fast_yet` and `bin/memory_profile` in your environment to see the current performance difference.
382
+
336
383
  ## keywords
337
384
  hash ID, friendly ID, obfuscate ID, rails, ActiveRecord, model, slug, vanity URL, friendly URL
data/Rakefile CHANGED
@@ -9,6 +9,12 @@ Rake::TestTask.new(:test) do |t|
9
9
  t.test_files = FileList["test/**/test_*.rb"]
10
10
  end
11
11
 
12
- require "standard/rake"
13
-
14
12
  task default: %i[test standard]
13
+
14
+ task :compile_ext do
15
+ puts "Compiling extension"
16
+ `cd ext/encoded_id && make clean`
17
+ `cd ext/encoded_id && ruby extconf.rb`
18
+ `cd ext/encoded_id && make`
19
+ puts "Done"
20
+ end
@@ -0,0 +1,3 @@
1
+ require "mkmf"
2
+
3
+ create_makefile "encoded_id/extension"
@@ -0,0 +1,123 @@
1
+ #include "ruby/ruby.h"
2
+ #include "hashids.h"
3
+
4
+ void wrapped_hashids_free(void* data)
5
+ {
6
+ hashids_free(data);
7
+ }
8
+
9
+ size_t wrapped_hashids_size(const void* data)
10
+ {
11
+ return sizeof(hashids_t);
12
+ }
13
+
14
+ static const rb_data_type_t wrapped_hashids_type = {
15
+ .wrap_struct_name = "hashids_t",
16
+ .function = {
17
+ .dmark = NULL,
18
+ .dfree = wrapped_hashids_free,
19
+ .dsize = wrapped_hashids_size,
20
+ },
21
+ .data = NULL,
22
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY,
23
+ };
24
+
25
+ VALUE hashids_alloc(VALUE self)
26
+ {
27
+ hashids_t *data = hashids_init("salt!");
28
+ return TypedData_Wrap_Struct(self, &wrapped_hashids_type, data);
29
+ }
30
+
31
+ //VALUE rb_hashids_m_initialize(VALUE self, VALUE val)
32
+ //{
33
+ // return self;
34
+ //}
35
+
36
+
37
+ static VALUE rb_hash_id_c_encode(VALUE self, VALUE ids) {
38
+ Check_Type(ids, T_ARRAY);
39
+
40
+ long length = RARRAY_LEN(ids);
41
+
42
+ unsigned long long* inputs = ALLOC_N(unsigned long long, length);
43
+
44
+ for (long i = 0; i < length; i++) {
45
+ VALUE rb_element = rb_ary_entry(ids, i);
46
+ Check_Type(rb_element, T_FIXNUM);
47
+ inputs[i] = NUM2ULL(rb_element);
48
+ }
49
+
50
+ hashids_t* hashids;
51
+
52
+ TypedData_Get_Struct(self, hashids_t, &wrapped_hashids_type, hashids);
53
+
54
+ size_t bytes_encoded;
55
+
56
+ size_t bytes_needed;
57
+ bytes_needed = hashids_estimate_encoded_size(hashids, sizeof(&inputs) / sizeof(unsigned long long), &inputs);
58
+ char *hash = ALLOC_N(char, bytes_needed);
59
+
60
+ // unsigned long long numbers[] = {1ull, 2ull, 3ull, 4ull, 5ull};
61
+
62
+ // printf("length: %ld\n", length);
63
+ // printf("inputs[0]: %llu\n", inputs[0]);
64
+ // printf("inputs[1]: %llu\n", inputs[1]);
65
+ // printf("inputs[2]: %llu\n", inputs[2]);
66
+ // printf("inputs[3]: %llu\n", inputs[3]);
67
+ // printf("inputs[4]: %llu\n", inputs[4]);
68
+ //
69
+ // printf("hashids: %p\n", hashids);
70
+ // printf("hashids->alphabet: %s\n", hashids->alphabet);
71
+ // printf("hashids->salt: %s\n", hashids->salt);
72
+ // printf("hashids->min_hash_length: %lu\n", hashids->min_hash_length);
73
+ // printf("numbers: %p\n", numbers);
74
+ // printf("numbers[0]: %llu\n", numbers[0]);
75
+ // printf("numbers[1]: %llu\n", numbers[1]);
76
+ // printf("numbers[2]: %llu\n", numbers[2]);
77
+ // printf("numbers[3]: %llu\n", numbers[3]);
78
+ // printf("numbers[4]: %llu\n", numbers[4]);
79
+ //
80
+ // printf("sizeof(*inputs) / sizeof(unsigned long long): %lu\n", sizeof(*inputs) / sizeof(unsigned long long));
81
+ // printf("sizeof(numbers) / sizeof(unsigned long long): %lu\n", sizeof(numbers) / sizeof(unsigned long long));
82
+ // bytes_encoded = hashids_encode(hashids, hash, sizeof(numbers) / sizeof(unsigned long long), numbers);
83
+ bytes_encoded = hashids_encode(hashids, hash, length, inputs);
84
+
85
+ ruby_xfree(inputs);
86
+ VALUE return_value = rb_str_new2(hash);
87
+ ruby_xfree(hash);
88
+ return return_value;
89
+ }
90
+
91
+ static VALUE rb_hash_id_c_decode(VALUE self, VALUE str) {
92
+ Check_Type(str, T_STRING);
93
+
94
+ hashids_t* hashids;
95
+
96
+ TypedData_Get_Struct(self, hashids_t, &wrapped_hashids_type, hashids);
97
+
98
+ size_t numbers_count = hashids_numbers_count(hashids, RSTRING_PTR(str));
99
+
100
+ unsigned long long* numbers = ALLOC_N(unsigned long long, numbers_count);
101
+
102
+ hashids_decode_safe(hashids, RSTRING_PTR(str), numbers, numbers_count);
103
+
104
+ VALUE rb_numbers = rb_ary_new_capa(numbers_count);
105
+
106
+ for (size_t i = 0; i < numbers_count; i++) {
107
+ rb_ary_push(rb_numbers, ULL2NUM(numbers[i]));
108
+ }
109
+
110
+ ruby_xfree(numbers);
111
+ return rb_numbers;
112
+ }
113
+
114
+ void Init_extension(void) {
115
+ VALUE EncodedId = rb_define_module("EncodedId");
116
+ VALUE HashIdC = rb_define_class_under(EncodedId, "HashIdC", rb_cObject);
117
+
118
+ rb_define_alloc_func(HashIdC, hashids_alloc);
119
+ // rb_define_method(HashIdC, "initialize", rb_hashids_m_initialize, 1);
120
+
121
+ rb_define_method(HashIdC, "encode", rb_hash_id_c_encode, 1);
122
+ rb_define_method(HashIdC, "decode", rb_hash_id_c_decode, 1);
123
+ }