rubcask 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. checksums.yaml +7 -0
  2. data/.standard.yml +3 -0
  3. data/Gemfile +20 -0
  4. data/Gemfile.lock +74 -0
  5. data/LICENSE.txt +21 -0
  6. data/README.md +111 -0
  7. data/Rakefile +14 -0
  8. data/benchmark/benchmark_io.rb +49 -0
  9. data/benchmark/benchmark_server.rb +10 -0
  10. data/benchmark/benchmark_server_pipeline.rb +24 -0
  11. data/benchmark/benchmark_worker.rb +46 -0
  12. data/benchmark/op_times.rb +32 -0
  13. data/benchmark/profile.rb +15 -0
  14. data/benchmark/server_benchmark_helper.rb +138 -0
  15. data/example/server_runner.rb +15 -0
  16. data/lib/rubcask/bytes.rb +11 -0
  17. data/lib/rubcask/concurrency/fake_atomic_fixnum.rb +34 -0
  18. data/lib/rubcask/concurrency/fake_lock.rb +41 -0
  19. data/lib/rubcask/concurrency/fake_monitor_mixin.rb +21 -0
  20. data/lib/rubcask/config.rb +55 -0
  21. data/lib/rubcask/data_entry.rb +9 -0
  22. data/lib/rubcask/data_file.rb +91 -0
  23. data/lib/rubcask/directory.rb +437 -0
  24. data/lib/rubcask/expirable_entry.rb +9 -0
  25. data/lib/rubcask/hint_entry.rb +9 -0
  26. data/lib/rubcask/hint_file.rb +56 -0
  27. data/lib/rubcask/hinted_file.rb +148 -0
  28. data/lib/rubcask/keydir_entry.rb +9 -0
  29. data/lib/rubcask/merge_directory.rb +75 -0
  30. data/lib/rubcask/protocol.rb +74 -0
  31. data/lib/rubcask/server/abstract_server.rb +113 -0
  32. data/lib/rubcask/server/async.rb +78 -0
  33. data/lib/rubcask/server/client.rb +131 -0
  34. data/lib/rubcask/server/config.rb +31 -0
  35. data/lib/rubcask/server/pipeline.rb +49 -0
  36. data/lib/rubcask/server/runner/config.rb +43 -0
  37. data/lib/rubcask/server/runner.rb +107 -0
  38. data/lib/rubcask/server/threaded.rb +171 -0
  39. data/lib/rubcask/task/clean_directory.rb +19 -0
  40. data/lib/rubcask/tombstone.rb +40 -0
  41. data/lib/rubcask/version.rb +5 -0
  42. data/lib/rubcask/worker/direct_worker.rb +23 -0
  43. data/lib/rubcask/worker/factory.rb +42 -0
  44. data/lib/rubcask/worker/ractor_worker.rb +40 -0
  45. data/lib/rubcask/worker/thread_worker.rb +40 -0
  46. data/lib/rubcask.rb +19 -0
  47. metadata +102 -0
@@ -0,0 +1,148 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "forwardable"
4
+ require "tempfile"
5
+ require "fileutils"
6
+
7
+ module Rubcask
8
+ # HintedFile represents DataFile with the associated hint file
9
+ # it delegated all read/write responsibility to the @data_file
10
+ class HintedFile
11
+ extend Forwardable
12
+
13
+ ID_REGEX = /(\d+)\.data$/
14
+ HINT_EXTENSION_REGEX = /\.data$/
15
+
16
+ def_delegators :@data_file, :seek, :[], :close, :flush, :each, :pos, :write_pos
17
+
18
+ # @return [String] path of the file
19
+ attr_reader :path
20
+
21
+ # @return [Integer] id of the file
22
+ attr_reader :id
23
+
24
+ # @return [String] Path of the hint file associated with the data file
25
+ attr_reader :hint_path
26
+
27
+ # @param [String] file_path Path of the data_file
28
+ # @param [Boolean] os_sync Should O_SYNC flag be used on the data file?
29
+ # @param [Boolean] read_only Should the data file be opened read-only?
30
+ # @param [Boolean] ruby_sync Should ruby I/O buffers by bupassed?
31
+ def initialize(file_path, os_sync: false, read_only: false, ruby_sync: false)
32
+ @id = file_path.scan(ID_REGEX)[0][0].to_i
33
+ @hint_path = file_path.sub(HINT_EXTENSION_REGEX, ".hint")
34
+ @path = file_path
35
+ @read_only = read_only
36
+
37
+ io = nil
38
+ size = nil
39
+ flags = (os_sync && ruby_sync) ? File::SYNC : 0
40
+ if File.exist?(file_path)
41
+ size = File.size(file_path)
42
+ @dirty = false
43
+ io = File.open(file_path, "#{read_only ? "r" : "a+"}b", flags: flags)
44
+ else # If file does not exist we ignore read_only as it does not make sense
45
+ size = 0
46
+ @dirty = true
47
+ io = File.open(file_path, "a+b", flags: flags)
48
+ end
49
+ @data_file = DataFile.new(io, size)
50
+
51
+ if ruby_sync
52
+ @data_file.sync = true
53
+ end
54
+ end
55
+
56
+ # yields every KeydirEntry in the file
57
+ # @yield [keydir_entry]
58
+ # @yieldparam [KeydirEntry] keydirEntry
59
+ # @return [Enumerator] if no block given
60
+ def each_keydir_entry(&block)
61
+ return to_enum(__method__) unless block
62
+ if has_hint_file?
63
+ return each_hint_file_keydir_entry(&block)
64
+ end
65
+ each_data_file_keydir_entry(&block)
66
+ end
67
+
68
+ # Appends the entry to the end of the file
69
+ # @param [DataEntry] entry entry to append
70
+ # @return [KeydirEntry]
71
+ def append(entry)
72
+ if !dirty?
73
+ FileUtils.rm_f(hint_path)
74
+ @dirty = true
75
+ end
76
+ write_entry = @data_file.append(entry)
77
+ KeydirEntry.new(id, write_entry.value_size, write_entry.value_pos, entry.expire_timestamp)
78
+ end
79
+
80
+ # Creates a new hint file
81
+ def save_hint_file
82
+ tempfile = Tempfile.new("hint")
83
+ current_pos = 0
84
+ map = {}
85
+ data_file.each do |entry|
86
+ new_pos = data_file.pos
87
+ new_entry = HintEntry.new(entry.expire_timestamp, entry.key, current_pos, new_pos - current_pos)
88
+ current_pos = new_pos
89
+ map[entry.key] = new_entry
90
+ end
91
+
92
+ begin
93
+ hint_file = HintFile.new(tempfile)
94
+ map.each_value do |entry|
95
+ hint_file.append(entry)
96
+ end
97
+ hint_file.close
98
+ FileUtils.mv(tempfile.path, hint_path)
99
+ @dirty = false
100
+ ensure
101
+ tempfile.close(true)
102
+ end
103
+ end
104
+
105
+ # @return true if hint path exists
106
+ def has_hint_file?
107
+ File.exist?(hint_path)
108
+ end
109
+
110
+ # @return true if there were any appends to the data file
111
+ def dirty?
112
+ @dirty
113
+ end
114
+
115
+ private
116
+
117
+ attr_reader :data_file
118
+
119
+ def each_data_file_keydir_entry
120
+ current_pos = 0
121
+ @data_file.each do |entry|
122
+ new_pos = @data_file.pos
123
+ value_size = new_pos - current_pos
124
+ value_pos = current_pos
125
+ current_pos = new_pos
126
+ yield [
127
+ entry.key,
128
+ KeydirEntry.new(
129
+ id, value_size, value_pos, entry.expire_timestamp
130
+ )
131
+ ]
132
+ end
133
+ end
134
+
135
+ def each_hint_file_keydir_entry
136
+ File.open(hint_path, "rb") do |file|
137
+ HintFile.new(file).each do |entry|
138
+ yield [
139
+ entry.key,
140
+ KeydirEntry.new(
141
+ id, entry.value_size, entry.value_pos, entry.expire_timestamp
142
+ )
143
+ ]
144
+ end
145
+ end
146
+ end
147
+ end
148
+ end
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "expirable_entry"
4
+
5
+ module Rubcask
6
+ KeydirEntry = Struct.new(:file_id, :value_size, :value_pos, :expire_timestamp) do
7
+ include ExpirableEntry
8
+ end
9
+ end
@@ -0,0 +1,75 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "hinted_file"
4
+
5
+ module Rubcask
6
+ # A temporary directory that is used during the merge operation.
7
+ # You probably should not use this class outside of this context.
8
+ # @see Rubcask::Directory
9
+ class MergeDirectory
10
+ def initialize(dir, max_id_ref:, config: Config.new)
11
+ @dir = dir
12
+ @config = config
13
+ @max_id = max_id_ref
14
+
15
+ @data_files = []
16
+
17
+ create_new_file!
18
+ end
19
+
20
+ def append(entry)
21
+ value_pos = active.write_pos
22
+ active.append(entry)
23
+ value_size = active.write_pos
24
+ @active_hints[entry.key] = HintEntry.new(entry.expire_timestamp, entry.key, value_pos, value_size)
25
+
26
+ if active.write_pos >= config.max_file_size
27
+ prepare_old_file!
28
+ create_new_file!
29
+ end
30
+ end
31
+
32
+ def close
33
+ if active.write_pos == 0
34
+ File.delete(active.path)
35
+ else
36
+ prepare_old_file!
37
+ end
38
+ end
39
+
40
+ private
41
+
42
+ attr_reader :config
43
+
44
+ def prepare_old_file!
45
+ active.close
46
+ save_active_hint_file!
47
+ end
48
+
49
+ def save_active_hint_file!
50
+ File.open(active.hint_path, "ab") do |io|
51
+ hint_file = HintFile.new(io)
52
+ @active_hints.each_value do |entry|
53
+ hint_file.append(entry)
54
+ end
55
+ end
56
+ end
57
+
58
+ def active
59
+ @data_files.last
60
+ end
61
+
62
+ def create_new_file!
63
+ @active_hints = {}
64
+
65
+ id = @max_id.increment
66
+ file = HintedFile.new(
67
+ File.join(@dir, "#{id}.data"),
68
+ os_sync: false,
69
+ read_only: false,
70
+ ruby_sync: config.io_strategy != :ruby
71
+ )
72
+ @data_files << file
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,74 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Rubcask
4
+ # Rubcask protocol is inspired by Redis, but is even simpler implementation-wise
5
+ # The format of response is
6
+ # "#{message.byte_size}"\r\n#{message}
7
+ # eg: "2\r\nOK"
8
+ # The format of request it is
9
+ # "#{message.byte_size}\r\n#{method}\r\n#{first_argument.byte_size}\r\n#{first_argument}
10
+ # eg: "13\r\nget\r\n5\r\nlorem"
11
+ module Protocol
12
+ # Success
13
+ OK = "ok"
14
+
15
+ # Repesents no data
16
+ NIL = "nil"
17
+
18
+ # Error message
19
+ ERROR = "error"
20
+
21
+ PING = "ping"
22
+ PONG = "pong"
23
+
24
+ SEPARATOR = "\r\n"
25
+
26
+ module_function
27
+
28
+ # Returns the provided message with the header of the start
29
+ # @param [String] message Message to encode
30
+ # @return [String]
31
+ def encode_message(message)
32
+ buffer = (+"").b
33
+ buffer << message.bytesize.to_s
34
+ buffer << SEPARATOR
35
+ buffer << message
36
+ buffer
37
+ end
38
+
39
+ # @param [String] method Name of the method
40
+ # @param [Array<String>] args method arguments
41
+ # @return [String]
42
+ def create_call_message(method, *args)
43
+ buffer = (+"").b
44
+ buffer << method
45
+ buffer << SEPARATOR
46
+ args.each do |arg|
47
+ buffer << encode_message(arg)
48
+ end
49
+
50
+ encode_message(buffer)
51
+ end
52
+
53
+ class << self
54
+ private
55
+
56
+ # @!macro [attach] generate_cached_message
57
+ # @method $1_message
58
+ # @note This method is autogenerated
59
+ # @return [String] Encoded "$1" messege.
60
+ def generate_cached_message(method)
61
+ value = encode_message(const_get(method.upcase)).freeze
62
+ define_method "#{method}_message" do
63
+ value
64
+ end
65
+ end
66
+ end
67
+
68
+ generate_cached_message "ok"
69
+ generate_cached_message "nil"
70
+ generate_cached_message "error"
71
+ generate_cached_message "ping"
72
+ generate_cached_message "pong"
73
+ end
74
+ end
@@ -0,0 +1,113 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../bytes"
4
+ require_relative "../protocol"
5
+ require_relative "config"
6
+
7
+ module Rubcask
8
+ module Server
9
+ class AbstractServer
10
+ BLOCK_SIZE = Rubcask::Bytes::KILOBYTE * 64
11
+ MAX_READ_SIZE = BLOCK_SIZE * 128
12
+
13
+ include Protocol
14
+
15
+ attr_reader :dir
16
+
17
+ private
18
+
19
+ def client_loop(conn)
20
+ loop do
21
+ length = conn.gets(Protocol::SEPARATOR)
22
+
23
+ break unless length
24
+ length = length.to_i
25
+
26
+ command_body = read_command_body(conn, length)
27
+
28
+ break unless command_body
29
+ break if command_body.bytesize != length
30
+
31
+ reader = StringIO.new(command_body)
32
+
33
+ command = reader.gets(SEPARATOR)
34
+ command&.chomp!(SEPARATOR)
35
+
36
+ args = parse_args(reader)
37
+
38
+ conn.write(execute_command!(command, args))
39
+ end
40
+ end
41
+
42
+ def execute_command!(command, args)
43
+ begin
44
+ if command == "ping"
45
+ return pong_message
46
+ end
47
+
48
+ if command == "get"
49
+ return error_message if args.size != 1
50
+ val = @dir[args[0]]
51
+ return val ? encode_message(val) : nil_message
52
+ end
53
+
54
+ if command == "set"
55
+ return error_message if args.size != 2
56
+
57
+ @dir[args[0]] = args[1]
58
+
59
+ return ok_message
60
+ end
61
+
62
+ if command == "setex"
63
+ return error_message if args.size != 3
64
+ ttl = args[2].to_i
65
+ return error_message if ttl.negative?
66
+ @dir.set_with_ttl(args[0], args[1], ttl)
67
+ return ok_message
68
+ end
69
+
70
+ if command == "del"
71
+ return error_message if args.size != 1
72
+
73
+ return @dir.delete(args[0]) ? ok_message : nil_message
74
+ end
75
+ rescue => e
76
+ logger.warn("Error " + e.to_s)
77
+ end
78
+
79
+ error_message
80
+ end
81
+
82
+ def parse_word(reader)
83
+ length = reader.gets(SEPARATOR).to_i
84
+ return nil if length.zero?
85
+ reader.read(length)
86
+ end
87
+
88
+ def read_command_body(conn, length)
89
+ command_body = (+"").b
90
+ size = 0
91
+
92
+ while size < length
93
+ val = conn.read([MAX_READ_SIZE, length - size].min)
94
+ return nil if val.nil?
95
+ size += val.bytesize
96
+ command_body << val
97
+ end
98
+
99
+ command_body
100
+ end
101
+
102
+ def parse_args(reader)
103
+ args = []
104
+
105
+ while (word = parse_word(reader))
106
+ args << word
107
+ end
108
+
109
+ args
110
+ end
111
+ end
112
+ end
113
+ end
@@ -0,0 +1,78 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "async/io"
4
+ require "async/io/trap"
5
+ require "async/io/stream"
6
+
7
+ require_relative "abstract_server"
8
+
9
+ module Rubcask
10
+ module Server
11
+ # Async-based server supporting Rubcask protocol
12
+ # It requires "async-io" gem.
13
+ class Async < AbstractServer
14
+ def initialize(dir, config: Server::Config.new)
15
+ @dir = dir
16
+ @config = config
17
+ @hostname = config.hostname
18
+ @port = config.port
19
+ @logger = Logger.new($stdout)
20
+ @endpoint = ::Async::IO::Endpoint.tcp(@hostname, @port)
21
+ end
22
+
23
+ # Shuts down the server
24
+ # @note You might want to use it inside signal trap
25
+ def shutdown
26
+ return unless @task
27
+ Sync do
28
+ @shutdown_condition.signal
29
+ @task.wait
30
+ end
31
+ end
32
+
33
+ # Starts the server
34
+ # @param [::Async::Condition, nil] on_start_condition The condition will be signalled after a successful bind
35
+ def start(on_start_condition = nil)
36
+ Async do
37
+ @shutdown_condition = ::Async::Condition.new
38
+
39
+ _, @task = @endpoint.bind do |server, task|
40
+ if @config.keepalive
41
+ server.setsockopt(Socket::SOL_SOCKET, Socket::SO_KEEPALIVE, true)
42
+ end
43
+
44
+ define_close_routine(server, task)
45
+
46
+ Console.logger.info(server) { "Accepting connections on #{server.local_address.inspect}" }
47
+
48
+ server.listen(Socket::SOMAXCONN)
49
+ on_start_condition&.signal
50
+
51
+ server.accept_each do |conn|
52
+ conn.binmode
53
+ client_loop(::Async::IO::Stream.new(conn))
54
+ end
55
+ end
56
+ end
57
+ end
58
+
59
+ private
60
+
61
+ def define_close_routine(server, task)
62
+ task.async do |subtask|
63
+ @shutdown_condition.wait
64
+
65
+ Console.logger.info(server) { "Shutting down connections on #{server.local_address.inspect}" }
66
+
67
+ server.close
68
+
69
+ task.stop
70
+ end
71
+ end
72
+
73
+ def read_command_body(conn, length)
74
+ conn.read(length) # Async does the looping for us
75
+ end
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,131 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "socket"
4
+
5
+ require_relative "../protocol"
6
+ require_relative "pipeline"
7
+
8
+ module Rubcask
9
+ module Server
10
+ class Client
11
+ # @!macro [new] raises_invalid_response
12
+ # @raise [InvalidResponseError] If the response is invalid
13
+
14
+ class InvalidResponseError < Error; end
15
+
16
+ include Protocol
17
+
18
+ # yields a new client to the block
19
+ # closes the client after the block is terminated
20
+ # @param host [String] hostname of the server
21
+ # @param port [String] port of the server
22
+ # @yieldparam [Client] the running client
23
+ def self.with_client(host, port)
24
+ client = new(host, port)
25
+ begin
26
+ yield client
27
+ ensure
28
+ client.close
29
+ end
30
+ end
31
+
32
+ # @param host [String] hostname of the server
33
+ # @param port [String] port of the server
34
+ def initialize(host, port)
35
+ @socket = TCPSocket.new(host, port)
36
+ end
37
+
38
+ # Get value associated with the key
39
+ # @param [String] key
40
+ # @return [String] Binary string representing the value
41
+ # @return [Protocol::NIL] If no data associated with the key
42
+ # @macro raises_invalid_response
43
+ def get(key)
44
+ call_method("get", key)
45
+ end
46
+
47
+ # Set value associated with the key
48
+ # @param [String] key
49
+ # @param [String] value
50
+ # @return [Protocol::OK] If set succeeded
51
+ # @return [Protocol::ERROR] If failed to set the value
52
+ # @macro raises_invalid_response
53
+ def set(key, value)
54
+ call_method("set", key, value)
55
+ end
56
+
57
+ # Remove value associated with the key
58
+ # @param [String] key
59
+ # @return [Protocol::OK] If delete succeeded
60
+ # @return [Protocol::NIL] Otherwise
61
+ # @macro raises_invalid_response
62
+ def del(key)
63
+ call_method("del", key)
64
+ end
65
+
66
+ # Ping the server
67
+ # Use this method to check if server is running and responding
68
+ # @return [Protocol::PONG]
69
+ # @macro raises_invalid_response
70
+ def ping
71
+ call_method("ping")
72
+ end
73
+
74
+ # Ping the server
75
+ # Use this method to check if server is running and responding
76
+ # @param [String] key
77
+ # @param [String] value
78
+ # @param [Integer, String] ttl
79
+ # @return [String] Binary string representing the value
80
+ # @return [Protocol::NIL] If no data associated with the key
81
+ # @macro raises_invalid_response
82
+ def setex(key, value, ttl)
83
+ call_method("setex", key, value, ttl.to_s)
84
+ end
85
+
86
+ # Run the block in the pipeline
87
+ # @note pipeline execution IS NOT atomic
88
+ # @note instance_eval is used so you can call methods directly instead of using block argument
89
+ # @yield_param [Pipeline] pipeline
90
+ # @return [Array<String>] List of responses to the executed methods
91
+ # @macro raises_invalid_response
92
+ def pipelined(&block)
93
+ pipeline = Pipeline.new
94
+ pipeline.instance_eval(&block)
95
+ call(pipeline.out)
96
+ pipeline.count.times.map { get_response }
97
+ end
98
+
99
+ # Close the client
100
+ def close
101
+ @socket.close
102
+ end
103
+
104
+ private
105
+
106
+ def call_method(method, *args)
107
+ call(create_call_message(method, *args))
108
+ get_response
109
+ end
110
+
111
+ def call(message)
112
+ @socket.write(message)
113
+ end
114
+
115
+ def get_response
116
+ length = @socket.gets(Protocol::SEPARATOR)
117
+
118
+ if length.nil?
119
+ raise InvalidResponseError, "no response"
120
+ end
121
+ length = length.to_i
122
+
123
+ response = @socket.read(length)
124
+ if response.bytesize < length
125
+ raise InvalidResponseError, "response too short"
126
+ end
127
+ response
128
+ end
129
+ end
130
+ end
131
+ end
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Rubcask
4
+ module Server
5
+ # @!attribute hostname
6
+ # @return [String] Hostname of the server
7
+ # @!attribute port
8
+ # @return [Integer] Port of the server
9
+ # @!attribute timeout
10
+ # Timeut of the server
11
+ #
12
+ # If the client does not send any messages for provided number of seconds the connection with it s closed
13
+ # @return [Integer]
14
+ # @!attribute keepalive
15
+ # @return [boolean] Flag whether to set TCP's keepalive
16
+ Config = Struct.new(:hostname, :port, :timeout, :keepalive) do
17
+ def initialize
18
+ self.hostname = "localhost"
19
+ self.timeout = nil
20
+ self.keepalive = true
21
+ self.port = 8080
22
+
23
+ yield(self) if block_given?
24
+ end
25
+
26
+ def self.configure(&block)
27
+ new(&block).freeze
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,49 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Rubcask
4
+ module Server
5
+ # @!macro [new] see_client
6
+ # @see Client#$0
7
+
8
+ # Pipeline represents a sequence of commands.
9
+ # @note Pipeline execution IS NOT atomic.
10
+ # @see Client
11
+ class Pipeline
12
+ include Protocol
13
+
14
+ attr_reader :out, :count
15
+
16
+ def initialize
17
+ @out = (+"").b
18
+ @count = 0
19
+ end
20
+
21
+ # @macro see_client
22
+ def get(key)
23
+ @out << create_call_message("get", key)
24
+ end
25
+
26
+ # @macro see_client
27
+ def set(key, value)
28
+ @out << create_call_message("set", key, value)
29
+ end
30
+
31
+ # @macro see_client
32
+ def del(key)
33
+ @out << create_call_message("del", key)
34
+ end
35
+
36
+ # @macro see_client
37
+ def ping
38
+ @out << create_call_message("ping")
39
+ end
40
+
41
+ private
42
+
43
+ def create_call_message(method, *args)
44
+ @count += 1
45
+ super
46
+ end
47
+ end
48
+ end
49
+ end