stream_sampler 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/MIT-LICENSE +20 -0
- data/README.md +17 -0
- data/Rakefile +28 -0
- data/lib/stream_sampler.rb +54 -0
- data/lib/stream_sampler/version.rb +3 -0
- data/lib/tasks/stream_sampler_tasks.rake +4 -0
- data/test/dummy/README.rdoc +28 -0
- data/test/dummy/Rakefile +6 -0
- data/test/dummy/app/assets/javascripts/application.js +13 -0
- data/test/dummy/app/assets/stylesheets/application.css +15 -0
- data/test/dummy/app/controllers/application_controller.rb +5 -0
- data/test/dummy/app/helpers/application_helper.rb +2 -0
- data/test/dummy/app/models/user.rb +2 -0
- data/test/dummy/app/views/layouts/application.html.erb +14 -0
- data/test/dummy/bin/bundle +3 -0
- data/test/dummy/bin/rails +4 -0
- data/test/dummy/bin/rake +4 -0
- data/test/dummy/bin/setup +29 -0
- data/test/dummy/config.ru +4 -0
- data/test/dummy/config/application.rb +26 -0
- data/test/dummy/config/boot.rb +5 -0
- data/test/dummy/config/database.yml +25 -0
- data/test/dummy/config/environment.rb +5 -0
- data/test/dummy/config/environments/development.rb +41 -0
- data/test/dummy/config/environments/production.rb +79 -0
- data/test/dummy/config/environments/test.rb +42 -0
- data/test/dummy/config/initializers/assets.rb +11 -0
- data/test/dummy/config/initializers/backtrace_silencers.rb +7 -0
- data/test/dummy/config/initializers/cookies_serializer.rb +3 -0
- data/test/dummy/config/initializers/filter_parameter_logging.rb +4 -0
- data/test/dummy/config/initializers/inflections.rb +16 -0
- data/test/dummy/config/initializers/mime_types.rb +4 -0
- data/test/dummy/config/initializers/session_store.rb +3 -0
- data/test/dummy/config/initializers/wrap_parameters.rb +14 -0
- data/test/dummy/config/locales/en.yml +23 -0
- data/test/dummy/config/routes.rb +56 -0
- data/test/dummy/config/secrets.yml +22 -0
- data/test/dummy/db/development.sqlite3 +0 -0
- data/test/dummy/db/migrate/20150305003414_create_users.rb +9 -0
- data/test/dummy/db/schema.rb +22 -0
- data/test/dummy/db/test.sqlite3 +0 -0
- data/test/dummy/log/development.log +25 -0
- data/test/dummy/log/test.log +3211 -0
- data/test/dummy/public/404.html +67 -0
- data/test/dummy/public/422.html +67 -0
- data/test/dummy/public/500.html +66 -0
- data/test/dummy/public/favicon.ico +0 -0
- data/test/dummy/test/fixtures/users.yml +7 -0
- data/test/dummy/test/models/user_test.rb +7 -0
- data/test/stream_sampler_test.rb +43 -0
- data/test/stream_sampler_test.rb~ +6 -0
- data/test/test_helper.rb +18 -0
- metadata +186 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: fa3f2e1ac252b5b185e0de3eb2c90c6bdc8ee329
|
4
|
+
data.tar.gz: 42754cb69ff7f64f94db0baee3ff6107f0183df3
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 2917f114780d53b800297c67f5623afb2366f050d5634abde5136246d240f804c44e96f014c11e9557e90465f97851200c746d20fc44a0c043cb350b681368f9
|
7
|
+
data.tar.gz: 4141109d8eb7ded587e58ec642493ec850ec2b12637e59a080e7ff5b1809bcd5851c7ce27375b3c15c9b7d544e376e91560103fb447731fa2293cd5d137f16d6
|
data/MIT-LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright 2015 Tim Robertson
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
# StreamSampler - a gem to add stream sampling to Ruby classes.
|
2
|
+
|
3
|
+
This gem adds stream sampling (aka reservoir sampling) to Ruby. To use,
|
4
|
+
add the gem to your Gemfile (or require it explicitly), and call the
|
5
|
+
`StreamSampler.reservoir_sample` method:
|
6
|
+
|
7
|
+
require 'stream_sampler`
|
8
|
+
# takes a 10 item sample from a stream of items:
|
9
|
+
items = (1..1000).to_a
|
10
|
+
StreamSampler.reservoir_sample(items, 10)
|
11
|
+
|
12
|
+
As a special case, if ActiveRecord and ActiveSupport are defined, the stream
|
13
|
+
sampling methods will be added as class methods on `ActiveRecord::Base`, so you
|
14
|
+
can do things like this:
|
15
|
+
|
16
|
+
require 'stream_sampler'
|
17
|
+
User.where(age: (18..65)).reservoir_sample(10)
|
data/Rakefile
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
begin
|
2
|
+
require 'bundler/setup'
|
3
|
+
rescue LoadError
|
4
|
+
puts 'You must `gem install bundler` and `bundle install` to run rake tasks'
|
5
|
+
end
|
6
|
+
|
7
|
+
require 'rdoc/task'
|
8
|
+
|
9
|
+
RDoc::Task.new(:rdoc) do |rdoc|
|
10
|
+
rdoc.rdoc_dir = 'rdoc'
|
11
|
+
rdoc.title = 'StreamSampler'
|
12
|
+
rdoc.options << '--line-numbers'
|
13
|
+
rdoc.rdoc_files.include('README.rdoc')
|
14
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
15
|
+
end
|
16
|
+
|
17
|
+
Bundler::GemHelper.install_tasks
|
18
|
+
|
19
|
+
require 'rake/testtask'
|
20
|
+
|
21
|
+
Rake::TestTask.new(:test) do |t|
|
22
|
+
t.libs << 'lib'
|
23
|
+
t.libs << 'test'
|
24
|
+
t.pattern = 'test/**/*_test.rb'
|
25
|
+
t.verbose = false
|
26
|
+
end
|
27
|
+
|
28
|
+
task default: :test
|
@@ -0,0 +1,54 @@
|
|
1
|
+
module StreamSampler
|
2
|
+
##
|
3
|
+
# Uses Vitter's reservoir sampling algorithm to take n items
|
4
|
+
# without replacement, from a iterable of items of unknown,
|
5
|
+
# arbitrary length.
|
6
|
+
#
|
7
|
+
# The algorithm is O(n) in the length of the iterable. The original
|
8
|
+
# paper is at:
|
9
|
+
#
|
10
|
+
# http://www.cs.umd.edu/~samir/498/vitter.pdf
|
11
|
+
#
|
12
|
+
def self.reservoir_sample(iterable, n, iteration_method = :each)
|
13
|
+
out = []
|
14
|
+
return out if n.to_i == 0
|
15
|
+
raise ArgumentError, "n must be >= 0" unless n > 0
|
16
|
+
|
17
|
+
iterable.send(iteration_method).with_index do |i, idx|
|
18
|
+
if idx < n
|
19
|
+
out << i
|
20
|
+
else
|
21
|
+
j = rand(idx)
|
22
|
+
out[j] = i if j < n
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
out
|
27
|
+
end
|
28
|
+
|
29
|
+
if defined?(ActiveSupport)
|
30
|
+
##
|
31
|
+
# A Concern to add stream sampling methods to ActiveRecord classes,
|
32
|
+
# so that you can call the StreamSampler methods in a cleaner way:
|
33
|
+
#
|
34
|
+
# YourModel.reservoir_sample(10)
|
35
|
+
#
|
36
|
+
# as opposed to:
|
37
|
+
#
|
38
|
+
# StreamSampler.reservoir_sample(YourModel, 10, :find_each)
|
39
|
+
#
|
40
|
+
module ActsAsSamplable
|
41
|
+
extend ActiveSupport::Concern
|
42
|
+
|
43
|
+
module ClassMethods
|
44
|
+
def reservoir_sample(n)
|
45
|
+
StreamSampler.reservoir_sample(self, n, :find_each)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
if defined?(ActiveRecord::Base) && defined?(StreamSampler::ActsAsSamplable)
|
53
|
+
ActiveRecord::Base.send(:include, StreamSampler::ActsAsSamplable)
|
54
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
== README
|
2
|
+
|
3
|
+
This README would normally document whatever steps are necessary to get the
|
4
|
+
application up and running.
|
5
|
+
|
6
|
+
Things you may want to cover:
|
7
|
+
|
8
|
+
* Ruby version
|
9
|
+
|
10
|
+
* System dependencies
|
11
|
+
|
12
|
+
* Configuration
|
13
|
+
|
14
|
+
* Database creation
|
15
|
+
|
16
|
+
* Database initialization
|
17
|
+
|
18
|
+
* How to run the test suite
|
19
|
+
|
20
|
+
* Services (job queues, cache servers, search engines, etc.)
|
21
|
+
|
22
|
+
* Deployment instructions
|
23
|
+
|
24
|
+
* ...
|
25
|
+
|
26
|
+
|
27
|
+
Please feel free to use a different markup language if you do not plan to run
|
28
|
+
<tt>rake doc:app</tt>.
|
data/test/dummy/Rakefile
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
// This is a manifest file that'll be compiled into application.js, which will include all the files
|
2
|
+
// listed below.
|
3
|
+
//
|
4
|
+
// Any JavaScript/Coffee file within this directory, lib/assets/javascripts, vendor/assets/javascripts,
|
5
|
+
// or any plugin's vendor/assets/javascripts directory can be referenced here using a relative path.
|
6
|
+
//
|
7
|
+
// It's not advisable to add code directly here, but if you do, it'll appear at the bottom of the
|
8
|
+
// compiled file.
|
9
|
+
//
|
10
|
+
// Read Sprockets README (https://github.com/sstephenson/sprockets#sprockets-directives) for details
|
11
|
+
// about supported directives.
|
12
|
+
//
|
13
|
+
//= require_tree .
|
@@ -0,0 +1,15 @@
|
|
1
|
+
/*
|
2
|
+
* This is a manifest file that'll be compiled into application.css, which will include all the files
|
3
|
+
* listed below.
|
4
|
+
*
|
5
|
+
* Any CSS and SCSS file within this directory, lib/assets/stylesheets, vendor/assets/stylesheets,
|
6
|
+
* or any plugin's vendor/assets/stylesheets directory can be referenced here using a relative path.
|
7
|
+
*
|
8
|
+
* You're free to add application-wide styles to this file and they'll appear at the bottom of the
|
9
|
+
* compiled file so the styles you add here take precedence over styles defined in any styles
|
10
|
+
* defined in the other CSS/SCSS files in this directory. It is generally better to create a new
|
11
|
+
* file per style scope.
|
12
|
+
*
|
13
|
+
*= require_tree .
|
14
|
+
*= require_self
|
15
|
+
*/
|
@@ -0,0 +1,14 @@
|
|
1
|
+
<!DOCTYPE html>
|
2
|
+
<html>
|
3
|
+
<head>
|
4
|
+
<title>Dummy</title>
|
5
|
+
<%= stylesheet_link_tag 'application', media: 'all', 'data-turbolinks-track' => true %>
|
6
|
+
<%= javascript_include_tag 'application', 'data-turbolinks-track' => true %>
|
7
|
+
<%= csrf_meta_tags %>
|
8
|
+
</head>
|
9
|
+
<body>
|
10
|
+
|
11
|
+
<%= yield %>
|
12
|
+
|
13
|
+
</body>
|
14
|
+
</html>
|
data/test/dummy/bin/rake
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'pathname'
|
3
|
+
|
4
|
+
# path to your application root.
|
5
|
+
APP_ROOT = Pathname.new File.expand_path('../../', __FILE__)
|
6
|
+
|
7
|
+
Dir.chdir APP_ROOT do
|
8
|
+
# This script is a starting point to setup your application.
|
9
|
+
# Add necessary setup steps to this file:
|
10
|
+
|
11
|
+
puts "== Installing dependencies =="
|
12
|
+
system "gem install bundler --conservative"
|
13
|
+
system "bundle check || bundle install"
|
14
|
+
|
15
|
+
# puts "\n== Copying sample files =="
|
16
|
+
# unless File.exist?("config/database.yml")
|
17
|
+
# system "cp config/database.yml.sample config/database.yml"
|
18
|
+
# end
|
19
|
+
|
20
|
+
puts "\n== Preparing database =="
|
21
|
+
system "bin/rake db:setup"
|
22
|
+
|
23
|
+
puts "\n== Removing old logs and tempfiles =="
|
24
|
+
system "rm -f log/*"
|
25
|
+
system "rm -rf tmp/cache"
|
26
|
+
|
27
|
+
puts "\n== Restarting application server =="
|
28
|
+
system "touch tmp/restart.txt"
|
29
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
require File.expand_path('../boot', __FILE__)
|
2
|
+
|
3
|
+
require 'rails/all'
|
4
|
+
|
5
|
+
Bundler.require(*Rails.groups)
|
6
|
+
require "stream_sampler"
|
7
|
+
|
8
|
+
module Dummy
|
9
|
+
class Application < Rails::Application
|
10
|
+
# Settings in config/environments/* take precedence over those specified here.
|
11
|
+
# Application configuration should go into files in config/initializers
|
12
|
+
# -- all .rb files in that directory are automatically loaded.
|
13
|
+
|
14
|
+
# Set Time.zone default to the specified zone and make Active Record auto-convert to this zone.
|
15
|
+
# Run "rake -D time" for a list of tasks for finding time zone names. Default is UTC.
|
16
|
+
# config.time_zone = 'Central Time (US & Canada)'
|
17
|
+
|
18
|
+
# The default locale is :en and all translations from config/locales/*.rb,yml are auto loaded.
|
19
|
+
# config.i18n.load_path += Dir[Rails.root.join('my', 'locales', '*.{rb,yml}').to_s]
|
20
|
+
# config.i18n.default_locale = :de
|
21
|
+
|
22
|
+
# Do not swallow errors in after_commit/after_rollback callbacks.
|
23
|
+
config.active_record.raise_in_transactional_callbacks = true
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# SQLite version 3.x
|
2
|
+
# gem install sqlite3
|
3
|
+
#
|
4
|
+
# Ensure the SQLite 3 gem is defined in your Gemfile
|
5
|
+
# gem 'sqlite3'
|
6
|
+
#
|
7
|
+
default: &default
|
8
|
+
adapter: sqlite3
|
9
|
+
pool: 5
|
10
|
+
timeout: 5000
|
11
|
+
|
12
|
+
development:
|
13
|
+
<<: *default
|
14
|
+
database: db/development.sqlite3
|
15
|
+
|
16
|
+
# Warning: The database defined as "test" will be erased and
|
17
|
+
# re-generated from your development database when you run "rake".
|
18
|
+
# Do not set this db to the same as development or production.
|
19
|
+
test:
|
20
|
+
<<: *default
|
21
|
+
database: db/test.sqlite3
|
22
|
+
|
23
|
+
production:
|
24
|
+
<<: *default
|
25
|
+
database: db/production.sqlite3
|
@@ -0,0 +1,41 @@
|
|
1
|
+
Rails.application.configure do
|
2
|
+
# Settings specified here will take precedence over those in config/application.rb.
|
3
|
+
|
4
|
+
# In the development environment your application's code is reloaded on
|
5
|
+
# every request. This slows down response time but is perfect for development
|
6
|
+
# since you don't have to restart the web server when you make code changes.
|
7
|
+
config.cache_classes = false
|
8
|
+
|
9
|
+
# Do not eager load code on boot.
|
10
|
+
config.eager_load = false
|
11
|
+
|
12
|
+
# Show full error reports and disable caching.
|
13
|
+
config.consider_all_requests_local = true
|
14
|
+
config.action_controller.perform_caching = false
|
15
|
+
|
16
|
+
# Don't care if the mailer can't send.
|
17
|
+
config.action_mailer.raise_delivery_errors = false
|
18
|
+
|
19
|
+
# Print deprecation notices to the Rails logger.
|
20
|
+
config.active_support.deprecation = :log
|
21
|
+
|
22
|
+
# Raise an error on page load if there are pending migrations.
|
23
|
+
config.active_record.migration_error = :page_load
|
24
|
+
|
25
|
+
# Debug mode disables concatenation and preprocessing of assets.
|
26
|
+
# This option may cause significant delays in view rendering with a large
|
27
|
+
# number of complex assets.
|
28
|
+
config.assets.debug = true
|
29
|
+
|
30
|
+
# Asset digests allow you to set far-future HTTP expiration dates on all assets,
|
31
|
+
# yet still be able to expire them through the digest params.
|
32
|
+
config.assets.digest = true
|
33
|
+
|
34
|
+
# Adds additional error checking when serving assets at runtime.
|
35
|
+
# Checks for improperly declared sprockets dependencies.
|
36
|
+
# Raises helpful error messages.
|
37
|
+
config.assets.raise_runtime_errors = true
|
38
|
+
|
39
|
+
# Raises error for missing translations
|
40
|
+
# config.action_view.raise_on_missing_translations = true
|
41
|
+
end
|
@@ -0,0 +1,79 @@
|
|
1
|
+
Rails.application.configure do
|
2
|
+
# Settings specified here will take precedence over those in config/application.rb.
|
3
|
+
|
4
|
+
# Code is not reloaded between requests.
|
5
|
+
config.cache_classes = true
|
6
|
+
|
7
|
+
# Eager load code on boot. This eager loads most of Rails and
|
8
|
+
# your application in memory, allowing both threaded web servers
|
9
|
+
# and those relying on copy on write to perform better.
|
10
|
+
# Rake tasks automatically ignore this option for performance.
|
11
|
+
config.eager_load = true
|
12
|
+
|
13
|
+
# Full error reports are disabled and caching is turned on.
|
14
|
+
config.consider_all_requests_local = false
|
15
|
+
config.action_controller.perform_caching = true
|
16
|
+
|
17
|
+
# Enable Rack::Cache to put a simple HTTP cache in front of your application
|
18
|
+
# Add `rack-cache` to your Gemfile before enabling this.
|
19
|
+
# For large-scale production use, consider using a caching reverse proxy like
|
20
|
+
# NGINX, varnish or squid.
|
21
|
+
# config.action_dispatch.rack_cache = true
|
22
|
+
|
23
|
+
# Disable serving static files from the `/public` folder by default since
|
24
|
+
# Apache or NGINX already handles this.
|
25
|
+
config.serve_static_files = ENV['RAILS_SERVE_STATIC_FILES'].present?
|
26
|
+
|
27
|
+
# Compress JavaScripts and CSS.
|
28
|
+
config.assets.js_compressor = :uglifier
|
29
|
+
# config.assets.css_compressor = :sass
|
30
|
+
|
31
|
+
# Do not fallback to assets pipeline if a precompiled asset is missed.
|
32
|
+
config.assets.compile = false
|
33
|
+
|
34
|
+
# Asset digests allow you to set far-future HTTP expiration dates on all assets,
|
35
|
+
# yet still be able to expire them through the digest params.
|
36
|
+
config.assets.digest = true
|
37
|
+
|
38
|
+
# `config.assets.precompile` and `config.assets.version` have moved to config/initializers/assets.rb
|
39
|
+
|
40
|
+
# Specifies the header that your server uses for sending files.
|
41
|
+
# config.action_dispatch.x_sendfile_header = 'X-Sendfile' # for Apache
|
42
|
+
# config.action_dispatch.x_sendfile_header = 'X-Accel-Redirect' # for NGINX
|
43
|
+
|
44
|
+
# Force all access to the app over SSL, use Strict-Transport-Security, and use secure cookies.
|
45
|
+
# config.force_ssl = true
|
46
|
+
|
47
|
+
# Use the lowest log level to ensure availability of diagnostic information
|
48
|
+
# when problems arise.
|
49
|
+
config.log_level = :debug
|
50
|
+
|
51
|
+
# Prepend all log lines with the following tags.
|
52
|
+
# config.log_tags = [ :subdomain, :uuid ]
|
53
|
+
|
54
|
+
# Use a different logger for distributed setups.
|
55
|
+
# config.logger = ActiveSupport::TaggedLogging.new(SyslogLogger.new)
|
56
|
+
|
57
|
+
# Use a different cache store in production.
|
58
|
+
# config.cache_store = :mem_cache_store
|
59
|
+
|
60
|
+
# Enable serving of images, stylesheets, and JavaScripts from an asset server.
|
61
|
+
# config.action_controller.asset_host = 'http://assets.example.com'
|
62
|
+
|
63
|
+
# Ignore bad email addresses and do not raise email delivery errors.
|
64
|
+
# Set this to true and configure the email server for immediate delivery to raise delivery errors.
|
65
|
+
# config.action_mailer.raise_delivery_errors = false
|
66
|
+
|
67
|
+
# Enable locale fallbacks for I18n (makes lookups for any locale fall back to
|
68
|
+
# the I18n.default_locale when a translation cannot be found).
|
69
|
+
config.i18n.fallbacks = true
|
70
|
+
|
71
|
+
# Send deprecation notices to registered listeners.
|
72
|
+
config.active_support.deprecation = :notify
|
73
|
+
|
74
|
+
# Use default logging formatter so that PID and timestamp are not suppressed.
|
75
|
+
config.log_formatter = ::Logger::Formatter.new
|
76
|
+
|
77
|
+
# Do not dump schema after migrations.
|
78
|
+
config.active_record.dump_schema_after_migration = false
|
79
|
+
end
|