random-rails 0.1.0 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +5 -0
- data/CHANGELOG.md +74 -0
- data/Gemfile.lock +132 -0
- data/README.md +113 -15
- data/Rakefile +1 -1
- data/lib/random-rails/adapters/active_record/base.rb +130 -3
- data/lib/random-rails/configuration.rb +30 -0
- data/lib/random-rails/version.rb +1 -1
- data/lib/random-rails.rb +1 -0
- data/random-rails.gemspec +62 -0
- metadata +31 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 353a1c5d2a49d22d293032ea70253cb1bf711173c01fab5f6d528a0c3ed2661a
|
4
|
+
data.tar.gz: 864bb5abd310a796055a5d3a0f3648ad01a2115f8e1eedd9b8be09819b430f72
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8a939b36e74e9bb6904544089ad7f16ab3adb546755b0736ff56e4fdbd47f39213b8914b3c77f25316dd76f8df4b17baed3f7a8b822702aa96234c86621f9867
|
7
|
+
data.tar.gz: ddb9675cbca82dd3602272c991c194aef7034e564760cec346e0e3563bad0309d9f0d6e06165d5dcd0e5848063205429ef32ddb2189843dac475d9882cb4ccd8
|
data/.rubocop.yml
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,77 @@
|
|
1
|
+
## [1.0.2] - 2025-10-06
|
2
|
+
|
3
|
+
### Maintenance Release - Code Quality & CI Improvements
|
4
|
+
|
5
|
+
#### Fixed
|
6
|
+
|
7
|
+
- **Floating-point precision bug**: Replaced direct float equality comparison (`precision == 1.0`) with epsilon-based comparison (`(precision - 1.0).abs < Float::EPSILON`) to avoid floating-point precision issues
|
8
|
+
- **Code formatting**: Improved code consistency and readability with proper parentheses around ternary operators and aligned variable assignments
|
9
|
+
|
10
|
+
#### Added
|
11
|
+
|
12
|
+
- **Enhanced test matrix**: Added Ruby 3.2.9, 3.3.9, and Rails 8.0.3 to CI test matrix for better compatibility testing
|
13
|
+
- **Improved .gitignore**: Added missing `*.gem` pattern to prevent accidental gem file commits
|
14
|
+
|
15
|
+
#### Changed
|
16
|
+
|
17
|
+
- **Code style improvements**:
|
18
|
+
- Added parentheses around ternary operator conditions for better readability
|
19
|
+
- Aligned variable assignments for consistent formatting
|
20
|
+
- Updated string quotations to use double quotes consistently in specs
|
21
|
+
- Removed unused `cache_key` variable
|
22
|
+
- Added missing blank lines for better code organization
|
23
|
+
|
24
|
+
#### Technical Improvements
|
25
|
+
|
26
|
+
- Better floating-point comparison to prevent precision-related bugs
|
27
|
+
- More robust code style following Ruby best practices
|
28
|
+
- Enhanced CI coverage with additional Ruby and Rails versions
|
29
|
+
|
30
|
+
## [1.0.1] - 2025-09-30
|
31
|
+
|
32
|
+
### Major Release - Multi-Database Support & Performance Optimizations
|
33
|
+
|
34
|
+
#### Added
|
35
|
+
|
36
|
+
- **Multi-database support**: PostgreSQL, MySQL, and SQLite
|
37
|
+
- **Intelligent strategy selection**: Automatically chooses the best sampling method based on database type and table size
|
38
|
+
- **Multiple sampling strategies**:
|
39
|
+
- `TABLESAMPLE BERNOULLI` for PostgreSQL (ultra-fast on large tables)
|
40
|
+
- Efficient offset-based sampling for all databases
|
41
|
+
- Traditional `ORDER BY RANDOM()` as fallback
|
42
|
+
- **Configuration system**: Global configuration for sampling strategies, thresholds, and caching
|
43
|
+
- **Performance optimizations**:
|
44
|
+
- Fast table size estimation using database-specific methods
|
45
|
+
- Configurable table size caching
|
46
|
+
- Optimized multiple record retrieval
|
47
|
+
- **Enhanced API**: Support for `count` parameter and strategy selection
|
48
|
+
- **Comprehensive test suite**: Full coverage for all database adapters and strategies
|
49
|
+
|
50
|
+
#### Changed
|
51
|
+
|
52
|
+
- **Breaking**: Changed API from `random(precision:)` to `random(count:, strategy:, precision:)`
|
53
|
+
- **Breaking**: Now requires explicit count parameter for multiple records
|
54
|
+
- Improved error handling for edge cases (empty tables, connection failures)
|
55
|
+
- Enhanced SQL generation for better performance
|
56
|
+
|
57
|
+
#### Performance Improvements
|
58
|
+
|
59
|
+
- Up to 32x faster than `ORDER BY RANDOM()` on large tables
|
60
|
+
- Consistent ~2-3ms performance regardless of table size
|
61
|
+
- Intelligent strategy selection reduces query time dramatically
|
62
|
+
|
63
|
+
#### Examples
|
64
|
+
|
65
|
+
```ruby
|
66
|
+
# Before (slow on large tables)
|
67
|
+
User.order('RANDOM()').first # ~171s on 1M records
|
68
|
+
|
69
|
+
# After (consistently fast)
|
70
|
+
User.random # ~5ms on 1M records
|
71
|
+
User.random(count: 5) # Multiple records
|
72
|
+
User.random(strategy: :tablesample, precision: 2.0) # Custom strategy
|
73
|
+
```
|
74
|
+
|
1
75
|
## [0.1.0] - 2023-07-13
|
2
76
|
|
3
77
|
- Initial release
|
data/Gemfile.lock
ADDED
@@ -0,0 +1,132 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
random-rails (1.0.2)
|
5
|
+
activerecord (>= 4.0, < 8.1)
|
6
|
+
activesupport (>= 6.1.5, < 8.1)
|
7
|
+
|
8
|
+
GEM
|
9
|
+
remote: https://rubygems.org/
|
10
|
+
specs:
|
11
|
+
activemodel (7.0.6)
|
12
|
+
activesupport (= 7.0.6)
|
13
|
+
activerecord (7.0.6)
|
14
|
+
activemodel (= 7.0.6)
|
15
|
+
activesupport (= 7.0.6)
|
16
|
+
activesupport (7.0.6)
|
17
|
+
concurrent-ruby (~> 1.0, >= 1.0.2)
|
18
|
+
i18n (>= 1.6, < 2)
|
19
|
+
minitest (>= 5.1)
|
20
|
+
tzinfo (~> 2.0)
|
21
|
+
ast (2.4.2)
|
22
|
+
coderay (1.1.3)
|
23
|
+
concurrent-ruby (1.2.2)
|
24
|
+
diff-lcs (1.5.0)
|
25
|
+
docile (1.4.0)
|
26
|
+
faker (3.2.0)
|
27
|
+
i18n (>= 1.8.11, < 2)
|
28
|
+
i18n (1.14.1)
|
29
|
+
concurrent-ruby (~> 1.0)
|
30
|
+
json (2.6.3)
|
31
|
+
language_server-protocol (3.17.0.3)
|
32
|
+
lint_roller (1.1.0)
|
33
|
+
method_source (1.0.0)
|
34
|
+
mini_portile2 (2.8.9)
|
35
|
+
minitest (5.18.1)
|
36
|
+
mysql2 (0.5.5)
|
37
|
+
parallel (1.23.0)
|
38
|
+
parser (3.2.2.3)
|
39
|
+
ast (~> 2.4.1)
|
40
|
+
racc
|
41
|
+
pg (1.5.3)
|
42
|
+
pry (0.14.2)
|
43
|
+
coderay (~> 1.1)
|
44
|
+
method_source (~> 1.0)
|
45
|
+
racc (1.7.1)
|
46
|
+
rainbow (3.1.1)
|
47
|
+
rake (13.0.6)
|
48
|
+
regexp_parser (2.8.1)
|
49
|
+
rexml (3.2.5)
|
50
|
+
rspec (3.12.0)
|
51
|
+
rspec-core (~> 3.12.0)
|
52
|
+
rspec-expectations (~> 3.12.0)
|
53
|
+
rspec-mocks (~> 3.12.0)
|
54
|
+
rspec-core (3.12.2)
|
55
|
+
rspec-support (~> 3.12.0)
|
56
|
+
rspec-expectations (3.12.3)
|
57
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
58
|
+
rspec-support (~> 3.12.0)
|
59
|
+
rspec-mocks (3.12.6)
|
60
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
61
|
+
rspec-support (~> 3.12.0)
|
62
|
+
rspec-support (3.12.1)
|
63
|
+
rubocop (1.52.1)
|
64
|
+
json (~> 2.3)
|
65
|
+
parallel (~> 1.10)
|
66
|
+
parser (>= 3.2.2.3)
|
67
|
+
rainbow (>= 2.2.2, < 4.0)
|
68
|
+
regexp_parser (>= 1.8, < 3.0)
|
69
|
+
rexml (>= 3.2.5, < 4.0)
|
70
|
+
rubocop-ast (>= 1.28.0, < 2.0)
|
71
|
+
ruby-progressbar (~> 1.7)
|
72
|
+
unicode-display_width (>= 2.4.0, < 3.0)
|
73
|
+
rubocop-ast (1.29.0)
|
74
|
+
parser (>= 3.2.1.0)
|
75
|
+
rubocop-capybara (2.18.0)
|
76
|
+
rubocop (~> 1.41)
|
77
|
+
rubocop-factory_bot (2.23.1)
|
78
|
+
rubocop (~> 1.33)
|
79
|
+
rubocop-performance (1.18.0)
|
80
|
+
rubocop (>= 1.7.0, < 2.0)
|
81
|
+
rubocop-ast (>= 0.4.0)
|
82
|
+
rubocop-rspec (2.22.0)
|
83
|
+
rubocop (~> 1.33)
|
84
|
+
rubocop-capybara (~> 2.17)
|
85
|
+
rubocop-factory_bot (~> 2.22)
|
86
|
+
ruby-progressbar (1.13.0)
|
87
|
+
simplecov (0.22.0)
|
88
|
+
docile (~> 1.1)
|
89
|
+
simplecov-html (~> 0.11)
|
90
|
+
simplecov_json_formatter (~> 0.1)
|
91
|
+
simplecov-html (0.12.3)
|
92
|
+
simplecov_json_formatter (0.1.4)
|
93
|
+
sqlite3 (1.6.3)
|
94
|
+
mini_portile2 (~> 2.8.0)
|
95
|
+
standard (1.30.1)
|
96
|
+
language_server-protocol (~> 3.17.0.2)
|
97
|
+
lint_roller (~> 1.0)
|
98
|
+
rubocop (~> 1.52.0)
|
99
|
+
standard-custom (~> 1.0.0)
|
100
|
+
standard-performance (~> 1.1.0)
|
101
|
+
standard-custom (1.0.2)
|
102
|
+
lint_roller (~> 1.0)
|
103
|
+
rubocop (~> 1.50)
|
104
|
+
standard-performance (1.1.1)
|
105
|
+
lint_roller (~> 1.1)
|
106
|
+
rubocop-performance (~> 1.18.0)
|
107
|
+
tzinfo (2.0.6)
|
108
|
+
concurrent-ruby (~> 1.0)
|
109
|
+
unicode-display_width (2.4.2)
|
110
|
+
|
111
|
+
PLATFORMS
|
112
|
+
arm64-darwin-21
|
113
|
+
arm64-darwin-23
|
114
|
+
x86_64-linux
|
115
|
+
|
116
|
+
DEPENDENCIES
|
117
|
+
faker (~> 3.2)
|
118
|
+
mysql2 (~> 0.5.5)
|
119
|
+
pg (~> 1.5)
|
120
|
+
pry (~> 0.14.2)
|
121
|
+
rake (~> 13.0)
|
122
|
+
random-rails!
|
123
|
+
rspec (~> 3.0)
|
124
|
+
rubocop (~> 1.52)
|
125
|
+
rubocop-performance (~> 1.18)
|
126
|
+
rubocop-rspec (~> 2.22)
|
127
|
+
simplecov (~> 0.22.0)
|
128
|
+
sqlite3 (~> 1.6)
|
129
|
+
standard (~> 1.3)
|
130
|
+
|
131
|
+
BUNDLED WITH
|
132
|
+
2.4.13
|
data/README.md
CHANGED
@@ -1,44 +1,138 @@
|
|
1
|
-
|
1
|
+
[](https://github.com/the-rubies-way/random-rails/actions/workflows/linter.yml)
|
2
|
+
[](https://github.com/the-rubies-way/random-rails/actions/workflows/test.yml)
|
3
|
+
[](https://opensource-heroes.com/r/the-rubies-way/random-rails)
|
2
4
|
|
3
|
-
|
5
|
+
# RandomRails
|
6
|
+
|
7
|
+
🚀 The most performant way to get random records from ActiveRecord. Supports **PostgreSQL**, **MySQL**, and **SQLite** with intelligent strategy selection to replace slow `ORDER BY RANDOM()` queries.
|
8
|
+
|
9
|
+
## Why RandomRails?
|
10
|
+
|
11
|
+
Traditional `ORDER BY RANDOM()` queries become extremely slow on large tables because they require sorting the entire dataset. RandomRails solves this by using:
|
12
|
+
|
13
|
+
- **TABLESAMPLE BERNOULLI** for PostgreSQL (ultra-fast on large tables)
|
14
|
+
- **Efficient offset-based sampling** for all databases
|
15
|
+
- **Intelligent strategy selection** based on table size and database type
|
16
|
+
- **Configurable sampling methods** for different use cases
|
17
|
+
|
18
|
+
## Performance Comparison
|
19
|
+
|
20
|
+
Real-world benchmark results comparing RandomRails with traditional methods (10 iterations each):
|
21
|
+
|
22
|
+
| Sample Size | `ORDER BY RANDOM()` | `User.random()` | `User.sample()` | Performance Gain |
|
23
|
+
| --------------- | --------------------- | ----------------- | ----------------- | ----------------------- |
|
24
|
+
| 1,000 users | 3.8359s | **0.2157s** | 347.1409s | **17.79x faster** |
|
25
|
+
| 10,000 users | 6.1273s | **2.7313s** | 369.7583s | **2.24x faster** |
|
26
|
+
| 100,000 users | 31.578s | **3.6968s** | 369.4334s | **8.54x faster** |
|
27
|
+
| 1,000,000 users | 171.497s | **5.3441s** | 373.6102s | **32.09x faster** |
|
28
|
+
|
29
|
+
**Key Takeaways:**
|
30
|
+
|
31
|
+
- RandomRails consistently outperforms `ORDER BY RANDOM()` by 2-32x
|
32
|
+
- Performance advantage increases dramatically with table size
|
33
|
+
- Traditional `User.sample()` method performs poorly at scale
|
4
34
|
|
5
35
|
## Installation
|
6
36
|
|
7
|
-
|
37
|
+
Add to your Gemfile:
|
8
38
|
|
9
|
-
```
|
10
|
-
|
39
|
+
```ruby
|
40
|
+
gem "random-rails"
|
11
41
|
```
|
12
42
|
|
13
|
-
|
43
|
+
Or install directly:
|
14
44
|
|
15
45
|
```bash
|
16
46
|
gem install random-rails
|
17
47
|
```
|
18
48
|
|
19
|
-
##
|
49
|
+
## Examples
|
50
|
+
|
51
|
+
### Basic Usage
|
20
52
|
|
21
|
-
|
53
|
+
Get a single random record:
|
22
54
|
|
23
55
|
```ruby
|
24
56
|
User.random
|
25
|
-
# =>
|
57
|
+
# => #<User id: 42, name: "John", ...>
|
58
|
+
```
|
59
|
+
|
60
|
+
Get multiple random records:
|
61
|
+
|
62
|
+
```ruby
|
63
|
+
User.random(count: 5)
|
64
|
+
# => [#<User id: 1, ...>, #<User id: 15, ...>, ...]
|
65
|
+
```
|
66
|
+
|
67
|
+
Chain with other ActiveRecord methods:
|
68
|
+
|
69
|
+
```ruby
|
70
|
+
User.where(active: true).random(count: 3)
|
71
|
+
# => [#<User id: 8, active: true, ...>, ...]
|
26
72
|
```
|
27
73
|
|
28
|
-
|
74
|
+
### Advanced Usage
|
75
|
+
|
76
|
+
#### Sampling Strategies
|
77
|
+
|
78
|
+
RandomRails provides multiple sampling strategies:
|
29
79
|
|
30
80
|
```ruby
|
31
|
-
|
32
|
-
|
81
|
+
# Auto-select best strategy (default)
|
82
|
+
User.random(strategy: :auto)
|
83
|
+
|
84
|
+
# Force TABLESAMPLE (PostgreSQL only)
|
85
|
+
User.random(strategy: :tablesample, precision: 1.0)
|
86
|
+
|
87
|
+
# Use efficient offset-based sampling
|
88
|
+
User.random(strategy: :offset)
|
89
|
+
|
90
|
+
# Fallback to ORDER BY RANDOM()
|
91
|
+
User.random(strategy: :order_by)
|
33
92
|
```
|
34
93
|
|
35
|
-
|
94
|
+
#### Configuration
|
95
|
+
|
96
|
+
Configure RandomRails globally:
|
36
97
|
|
37
98
|
```ruby
|
38
|
-
|
39
|
-
|
99
|
+
# config/initializers/random_rails.rb
|
100
|
+
RandomRails.configure do |config|
|
101
|
+
config.default_strategy = :auto # Default sampling strategy
|
102
|
+
config.tablesample_threshold = 10_000 # Use TABLESAMPLE for tables larger than this
|
103
|
+
config.cache_table_sizes = true # Cache table size estimates
|
104
|
+
config.precision = 1.0 # Default TABLESAMPLE precision
|
105
|
+
end
|
40
106
|
```
|
41
107
|
|
108
|
+
#### Database-Specific Features
|
109
|
+
|
110
|
+
##### PostgreSQL
|
111
|
+
|
112
|
+
- Uses `TABLESAMPLE BERNOULLI` for large tables (> 10k records by default)
|
113
|
+
- Falls back to offset method for smaller tables
|
114
|
+
- Fast table size estimation using `pg_class`
|
115
|
+
|
116
|
+
##### MySQL
|
117
|
+
|
118
|
+
- Uses efficient offset-based sampling
|
119
|
+
- Table size estimation via `information_schema`
|
120
|
+
- Fallback to `ORDER BY RAND()` when needed
|
121
|
+
|
122
|
+
##### SQLite
|
123
|
+
|
124
|
+
- Offset-based sampling for optimal performance
|
125
|
+
- Graceful handling of table size estimation
|
126
|
+
- Compatible with in-memory databases
|
127
|
+
|
128
|
+
## Benchmarks
|
129
|
+
|
130
|
+
RandomRails automatically selects the best strategy for your database and table size. The benchmarks above demonstrate real-world performance improvements across different table sizes, with RandomRails consistently delivering superior performance through intelligent strategy selection:
|
131
|
+
|
132
|
+
- **Small tables**: Uses efficient offset-based sampling
|
133
|
+
- **Large tables (PostgreSQL)**: Leverages `TABLESAMPLE BERNOULLI` for optimal performance
|
134
|
+
- **All databases**: Falls back to optimized methods when needed
|
135
|
+
|
42
136
|
## Development
|
43
137
|
|
44
138
|
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
@@ -56,3 +150,7 @@ The gem is available as open source under the terms of the [MIT License](https:/
|
|
56
150
|
## Code of Conduct
|
57
151
|
|
58
152
|
Everyone interacting in the ActiveRecord::Random project's codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/the-rubies-way/random-rails/blob/master/CODE_OF_CONDUCT.md).
|
153
|
+
|
154
|
+
## Thanks for your support!
|
155
|
+
|
156
|
+
[`<img width="100" alt="RailsJazz" src="https://avatars.githubusercontent.com/u/104008706?s=200">`](https://github.com/railsjazz)
|
data/Rakefile
CHANGED
@@ -2,9 +2,136 @@ module RandomRails
|
|
2
2
|
module Adapters
|
3
3
|
module ActiveRecord
|
4
4
|
module Base
|
5
|
-
|
6
|
-
|
7
|
-
|
5
|
+
# Main method to get random records efficiently
|
6
|
+
#
|
7
|
+
# @param count [Integer] Number of random records to return (default: 1)
|
8
|
+
# @param strategy [Symbol] Sampling strategy (:auto, :tablesample, :offset, :order_by)
|
9
|
+
# @param precision [Float] For TABLESAMPLE, percentage of table to sample (default: 1.0)
|
10
|
+
# @return [ActiveRecord::Base, Array<ActiveRecord::Base>, ActiveRecord::Relation] Single object for count=1, relation otherwise
|
11
|
+
def random(count: 1, strategy: :auto, precision: 1.0)
|
12
|
+
strategy = determine_strategy(strategy)
|
13
|
+
|
14
|
+
relation = case strategy
|
15
|
+
when :tablesample
|
16
|
+
tablesample_random(precision: precision, count: count)
|
17
|
+
when :offset
|
18
|
+
offset_random(count: count)
|
19
|
+
when :order_by
|
20
|
+
order_by_random(count: count)
|
21
|
+
else
|
22
|
+
# Fallback to offset method
|
23
|
+
offset_random(count: count)
|
24
|
+
end
|
25
|
+
|
26
|
+
# Return single object for count=1, relation for count>1
|
27
|
+
(count == 1) ? relation.take : relation
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
|
32
|
+
# Determine the best strategy based on database adapter and table size
|
33
|
+
#
|
34
|
+
# @param requested_strategy [Symbol] The strategy requested by the user
|
35
|
+
# @return [Symbol] The strategy to use
|
36
|
+
def determine_strategy(requested_strategy)
|
37
|
+
requested_strategy = RandomRails.configuration.default_strategy if requested_strategy == :auto
|
38
|
+
|
39
|
+
return requested_strategy unless requested_strategy == :auto
|
40
|
+
|
41
|
+
adapter_name = connection.adapter_name.downcase
|
42
|
+
|
43
|
+
case adapter_name
|
44
|
+
when "postgresql"
|
45
|
+
# PostgreSQL supports TABLESAMPLE for large tables, offset for smaller ones
|
46
|
+
estimated_count = estimate_table_size
|
47
|
+
|
48
|
+
(estimated_count > RandomRails.configuration.tablesample_threshold) ? :tablesample : :offset
|
49
|
+
when "mysql", "mysql2"
|
50
|
+
# MySQL doesn't have TABLESAMPLE, use offset method
|
51
|
+
:offset
|
52
|
+
when "sqlite"
|
53
|
+
# SQLite doesn't have TABLESAMPLE, use offset method
|
54
|
+
:offset
|
55
|
+
else
|
56
|
+
# Unknown adapter, use safest method
|
57
|
+
:order_by
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
# TABLESAMPLE method (PostgreSQL only)
|
62
|
+
def tablesample_random(precision:, count:)
|
63
|
+
if connection.adapter_name.downcase == "postgresql"
|
64
|
+
# Use configured precision if not specified
|
65
|
+
# using Float::EPSILON to avoid floating point precision issues
|
66
|
+
precision = RandomRails.configuration.precision if (precision - 1.0).abs < Float::EPSILON
|
67
|
+
|
68
|
+
from("#{table_name} TABLESAMPLE BERNOULLI(#{precision})").limit(count)
|
69
|
+
else
|
70
|
+
# Fallback for non-PostgreSQL databases
|
71
|
+
offset_random(count: count)
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
# Efficient offset-based random sampling
|
76
|
+
def offset_random(count:)
|
77
|
+
total_count = estimate_table_size
|
78
|
+
|
79
|
+
return limit(count) if total_count == 0
|
80
|
+
|
81
|
+
# Generate random offset, ensuring we always have an offset clause
|
82
|
+
max_offset = [total_count - count, 0].max
|
83
|
+
random_offset = (max_offset > 0) ? rand(max_offset + 1) : 0
|
84
|
+
|
85
|
+
# Always apply offset, even if it's 0, to ensure consistent SQL structure
|
86
|
+
offset(random_offset).limit(count)
|
87
|
+
end
|
88
|
+
|
89
|
+
# Traditional ORDER BY RANDOM() method (fallback)
|
90
|
+
def order_by_random(count:)
|
91
|
+
case connection.adapter_name.downcase
|
92
|
+
when "postgresql"
|
93
|
+
order("RANDOM()").limit(count)
|
94
|
+
when "mysql", "mysql2"
|
95
|
+
order("RAND()").limit(count)
|
96
|
+
when "sqlite"
|
97
|
+
order("RANDOM()").limit(count)
|
98
|
+
else
|
99
|
+
order("RANDOM()").limit(count)
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
# Estimate table size efficiently
|
104
|
+
def estimate_table_size
|
105
|
+
if RandomRails.configuration.cache_table_sizes && @estimated_count
|
106
|
+
return @estimated_count
|
107
|
+
end
|
108
|
+
|
109
|
+
estimated_count = begin
|
110
|
+
case connection.adapter_name.downcase
|
111
|
+
when "postgresql", "pg"
|
112
|
+
# Use pg_class for fast estimate
|
113
|
+
sql = "SELECT reltuples::INTEGER FROM pg_class WHERE relname = '#{table_name}'"
|
114
|
+
result = connection.execute(sql).first
|
115
|
+
|
116
|
+
result ? result["reltuples"].to_i : count
|
117
|
+
when "mysql", "mysql2"
|
118
|
+
# Use information_schema for fast estimate
|
119
|
+
sql = "SELECT table_rows FROM information_schema.tables WHERE table_name = '#{table_name}'"
|
120
|
+
result = connection.execute(sql).first
|
121
|
+
|
122
|
+
result ? result[0].to_i : count
|
123
|
+
else
|
124
|
+
# Fallback to actual count for SQLite and others
|
125
|
+
count
|
126
|
+
end
|
127
|
+
rescue
|
128
|
+
# If estimation fails, use actual count
|
129
|
+
count
|
130
|
+
end
|
131
|
+
|
132
|
+
@estimated_count = estimated_count if RandomRails.configuration.cache_table_sizes
|
133
|
+
|
134
|
+
estimated_count
|
8
135
|
end
|
9
136
|
end
|
10
137
|
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module RandomRails
|
4
|
+
class Configuration
|
5
|
+
attr_accessor :default_strategy, :tablesample_threshold, :cache_table_sizes, :precision
|
6
|
+
|
7
|
+
def initialize
|
8
|
+
@default_strategy = :auto
|
9
|
+
@tablesample_threshold = 10_000 # Use TABLESAMPLE for tables larger than this
|
10
|
+
@cache_table_sizes = true # Cache table size estimates
|
11
|
+
@precision = 1.0 # Default precision for TABLESAMPLE
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
class << self
|
16
|
+
attr_writer :configuration
|
17
|
+
|
18
|
+
def configuration
|
19
|
+
@configuration ||= Configuration.new
|
20
|
+
end
|
21
|
+
|
22
|
+
def configure
|
23
|
+
yield(configuration)
|
24
|
+
end
|
25
|
+
|
26
|
+
def reset_configuration!
|
27
|
+
@configuration = Configuration.new
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
data/lib/random-rails/version.rb
CHANGED
data/lib/random-rails.rb
CHANGED
@@ -0,0 +1,62 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
$:.push File.expand_path("../lib", __FILE__)
|
4
|
+
require "random-rails/version"
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "random-rails"
|
8
|
+
spec.version = RandomRails::VERSION
|
9
|
+
spec.authors = ["loqimean"]
|
10
|
+
spec.email = ["vanuha277@gmail.com"]
|
11
|
+
|
12
|
+
spec.summary = "Awesome gem to get random records from database."
|
13
|
+
spec.description = "The easiest way to get random records from database with best performance that you ever seen."
|
14
|
+
spec.homepage = "https://github.com/the-rubies-way/random-rails"
|
15
|
+
spec.license = "MIT"
|
16
|
+
spec.required_ruby_version = ">= 2.6.0"
|
17
|
+
|
18
|
+
spec.metadata["source_code_uri"] = spec.homepage
|
19
|
+
spec.metadata["changelog_uri"] = "#{spec.homepage}/blob/master/CHANGELOG.md"
|
20
|
+
|
21
|
+
# Specify which files should be added to the gem when it is released.
|
22
|
+
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
23
|
+
spec.files = Dir.chdir(__dir__) do
|
24
|
+
`git ls-files -z`.split("\x0").reject do |f|
|
25
|
+
(File.expand_path(f) == __FILE__) || f.start_with?("bin/", "test/", "spec/", "features/", ".git", ".circleci", "appveyor")
|
26
|
+
end
|
27
|
+
end
|
28
|
+
spec.bindir = "exe"
|
29
|
+
spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
|
30
|
+
spec.require_paths = ["lib"]
|
31
|
+
|
32
|
+
# **********************************
|
33
|
+
# * Dependencies
|
34
|
+
spec.add_runtime_dependency "activerecord", ">= 4.0", "< 8.1"
|
35
|
+
spec.add_runtime_dependency "activesupport", ">= 6.1.5", "< 8.1"
|
36
|
+
|
37
|
+
spec.add_development_dependency "rspec", "~> 3.0"
|
38
|
+
spec.add_development_dependency "rubocop-rspec", "~> 2.22"
|
39
|
+
spec.add_development_dependency "standard", "~> 1.3"
|
40
|
+
spec.add_development_dependency "rubocop", "~> 1.52"
|
41
|
+
spec.add_development_dependency "rubocop-performance", "~> 1.18"
|
42
|
+
spec.add_development_dependency "simplecov", "~> 0.22.0"
|
43
|
+
spec.add_development_dependency "pry", "~> 0.14.2"
|
44
|
+
spec.add_development_dependency "sqlite3", "~> 1.6"
|
45
|
+
spec.add_development_dependency "pg", "~> 1.5"
|
46
|
+
spec.add_development_dependency "mysql2", "~> 0.5.5"
|
47
|
+
spec.add_development_dependency "faker", "~> 3.2"
|
48
|
+
|
49
|
+
spec.post_install_message = <<~MSG
|
50
|
+
|
51
|
+
===================================================================
|
52
|
+
Thanks for installing random-rails!
|
53
|
+
|
54
|
+
=> For usage examples and documentation, please visit:
|
55
|
+
https://github.com/the-rubies-way/random-rails#examples
|
56
|
+
|
57
|
+
=> If you find this gem useful, please consider starring the repository:
|
58
|
+
https://github.com/the-rubies-way/random-rails
|
59
|
+
===================================================================
|
60
|
+
|
61
|
+
MSG
|
62
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: random-rails
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 1.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- loqimean
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2025-10-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activerecord
|
@@ -17,6 +17,9 @@ dependencies:
|
|
17
17
|
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: '4.0'
|
20
|
+
- - "<"
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: '8.1'
|
20
23
|
type: :runtime
|
21
24
|
prerelease: false
|
22
25
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -24,6 +27,9 @@ dependencies:
|
|
24
27
|
- - ">="
|
25
28
|
- !ruby/object:Gem::Version
|
26
29
|
version: '4.0'
|
30
|
+
- - "<"
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '8.1'
|
27
33
|
- !ruby/object:Gem::Dependency
|
28
34
|
name: activesupport
|
29
35
|
requirement: !ruby/object:Gem::Requirement
|
@@ -31,6 +37,9 @@ dependencies:
|
|
31
37
|
- - ">="
|
32
38
|
- !ruby/object:Gem::Version
|
33
39
|
version: 6.1.5
|
40
|
+
- - "<"
|
41
|
+
- !ruby/object:Gem::Version
|
42
|
+
version: '8.1'
|
34
43
|
type: :runtime
|
35
44
|
prerelease: false
|
36
45
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -38,6 +47,9 @@ dependencies:
|
|
38
47
|
- - ">="
|
39
48
|
- !ruby/object:Gem::Version
|
40
49
|
version: 6.1.5
|
50
|
+
- - "<"
|
51
|
+
- !ruby/object:Gem::Version
|
52
|
+
version: '8.1'
|
41
53
|
- !ruby/object:Gem::Dependency
|
42
54
|
name: rspec
|
43
55
|
requirement: !ruby/object:Gem::Requirement
|
@@ -208,22 +220,35 @@ files:
|
|
208
220
|
- CHANGELOG.md
|
209
221
|
- CODE_OF_CONDUCT.md
|
210
222
|
- Gemfile
|
223
|
+
- Gemfile.lock
|
211
224
|
- LICENSE.txt
|
212
225
|
- README.md
|
213
226
|
- Rakefile
|
214
227
|
- lib/random-rails.rb
|
215
228
|
- lib/random-rails/active_record.rb
|
216
229
|
- lib/random-rails/adapters/active_record/base.rb
|
230
|
+
- lib/random-rails/configuration.rb
|
217
231
|
- lib/random-rails/version.rb
|
232
|
+
- random-rails.gemspec
|
218
233
|
- sig/random-rails/random.rbs
|
219
234
|
homepage: https://github.com/the-rubies-way/random-rails
|
220
235
|
licenses:
|
221
236
|
- MIT
|
222
237
|
metadata:
|
223
|
-
homepage_uri: https://github.com/the-rubies-way/random-rails
|
224
238
|
source_code_uri: https://github.com/the-rubies-way/random-rails
|
225
239
|
changelog_uri: https://github.com/the-rubies-way/random-rails/blob/master/CHANGELOG.md
|
226
|
-
post_install_message:
|
240
|
+
post_install_message: |2+
|
241
|
+
|
242
|
+
===================================================================
|
243
|
+
Thanks for installing random-rails!
|
244
|
+
|
245
|
+
=> For usage examples and documentation, please visit:
|
246
|
+
https://github.com/the-rubies-way/random-rails#examples
|
247
|
+
|
248
|
+
=> If you find this gem useful, please consider starring the repository:
|
249
|
+
https://github.com/the-rubies-way/random-rails
|
250
|
+
===================================================================
|
251
|
+
|
227
252
|
rdoc_options: []
|
228
253
|
require_paths:
|
229
254
|
- lib
|
@@ -238,8 +263,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
238
263
|
- !ruby/object:Gem::Version
|
239
264
|
version: '0'
|
240
265
|
requirements: []
|
241
|
-
rubygems_version: 3.
|
266
|
+
rubygems_version: 3.5.16
|
242
267
|
signing_key:
|
243
268
|
specification_version: 4
|
244
269
|
summary: Awesome gem to get random records from database.
|
245
270
|
test_files: []
|
271
|
+
...
|