carton_db 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +12 -0
- data/.rspec +2 -0
- data/.ruby-version +1 -0
- data/.travis.yml +5 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +140 -0
- data/Rakefile +22 -0
- data/bin/console +15 -0
- data/bin/setup +9 -0
- data/carton_db.gemspec +27 -0
- data/lib/carton_db/datum.rb +147 -0
- data/lib/carton_db/escaping.rb +60 -0
- data/lib/carton_db/list_map_db/segment.rb +130 -0
- data/lib/carton_db/list_map_db/segment_group.rb +38 -0
- data/lib/carton_db/list_map_db.rb +312 -0
- data/lib/carton_db/version.rb +4 -0
- data/lib/carton_db.rb +8 -0
- data/tmp/.gitkeep +0 -0
- metadata +106 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: aecfa7225f91fad95b3a8eaa417a155248195aff
|
4
|
+
data.tar.gz: 7e64f92388321296900fa4774b7fec3e608fb388
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: b0afb5be66793688c004e8eca157cee2e53ea2aacb8df21afa600fa93f616d9df25a99ecf905a183e7d1be0c461da3eaf60e60f92ec978793bca1add8f50042f
|
7
|
+
data.tar.gz: 2bceb896fbcf2a4ba811ce5548f90cfde7a451ca7417b58a2aa846681452f44b89564a5755c05af82f020d0b37e1612ef365ac5eff3fe00c480a5d2b4cda9e71
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
2.3.1
|
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2017 Steve Jorgensen
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,140 @@
|
|
1
|
+
# CartonDb
|
2
|
+
|
3
|
+
A pure Ruby key/value data storage system where the values may
|
4
|
+
consist of simple data structures.
|
5
|
+
|
6
|
+
The primary goals of this library are simplicity of implementation
|
7
|
+
and reliable, predicatble behavior when used as intended, along
|
8
|
+
with documentation making it reasonably clear what is intended.
|
9
|
+
|
10
|
+
## Uses
|
11
|
+
|
12
|
+
Uses for this gem seem pretty limited, but you might have a
|
13
|
+
purpose for it that the author has not thought about.
|
14
|
+
|
15
|
+
This is a formalization of a solution that was created to solve
|
16
|
+
a specific problem. The problem was adding a feature to a
|
17
|
+
Ruby program running on Heroku to collect data into a map of
|
18
|
+
sets of elements that would be too large to be effectively
|
19
|
+
handled in memory. The application didn't already have any use
|
20
|
+
for a relational database server, and I didn't want to add one
|
21
|
+
just for this requirement. A redis db with sufficient capacity
|
22
|
+
would have been expensive, and solutions such as SQLite are
|
23
|
+
specifically not supported by Heroku so people don't mistakenly
|
24
|
+
expect the data to be preserved. Ruby's `DBM` and `SDMB` proved
|
25
|
+
to be too unpredicatable and flakey to be practical solutions.
|
26
|
+
|
27
|
+
Although this tool was initially developed to store transient
|
28
|
+
data for use within a single process invocation and then
|
29
|
+
discarded, it is also quite well suited for long term data
|
30
|
+
storage on a system that preserves filesystem data over time.
|
31
|
+
|
32
|
+
## Installation
|
33
|
+
|
34
|
+
Add this line to your application's Gemfile:
|
35
|
+
|
36
|
+
```ruby
|
37
|
+
gem 'carton_db'
|
38
|
+
```
|
39
|
+
|
40
|
+
And then execute:
|
41
|
+
|
42
|
+
$ bundle
|
43
|
+
|
44
|
+
Or install it yourself as:
|
45
|
+
|
46
|
+
$ gem install carton_db
|
47
|
+
|
48
|
+
## Characteristics
|
49
|
+
|
50
|
+
Each database has a name which is the path of a directory in the
|
51
|
+
filesystem containing the files that store the data.
|
52
|
+
|
53
|
+
A database is accessed through an instance of a database class.
|
54
|
+
|
55
|
+
An instance of a database class maintains no state in memory
|
56
|
+
between calls to its methods except for the database name.
|
57
|
+
|
58
|
+
An empty directory is a valid empty database.
|
59
|
+
|
60
|
+
Concurrent reads from a database are supported and safe.
|
61
|
+
|
62
|
+
Writing to a database concurrently with reads or writes by
|
63
|
+
other processes or threads is not supported, and the results of
|
64
|
+
attempting to do that are unpredictable.
|
65
|
+
|
66
|
+
Initializing a new database class instance creates its directory
|
67
|
+
in the filesystem if it does not already exist. The parent of the
|
68
|
+
database directory is expected to already exist, and an error
|
69
|
+
will occur if it doesn't.
|
70
|
+
|
71
|
+
The database structure is designed to effectively handle up to
|
72
|
+
several million elements with entries containing up to 1 or 2
|
73
|
+
thousand elements each.
|
74
|
+
|
75
|
+
The speed of database operations is relatively good, but this is
|
76
|
+
not a high performance database management system. See the
|
77
|
+
code documentation in the classes for more details about the
|
78
|
+
performance of particular database operations.
|
79
|
+
|
80
|
+
## Usage
|
81
|
+
|
82
|
+
Currently, this gem includes only one kind of database, which is
|
83
|
+
implemented by the `CartonDB::ListMapDb` class. It is a map of
|
84
|
+
lists where each entry has a string for a key and a list of of 0
|
85
|
+
or more string elements as content.
|
86
|
+
|
87
|
+
The name of the database is the path of a directory in the
|
88
|
+
filesystem that either already exists or shall be created as
|
89
|
+
a container for the stored data.
|
90
|
+
|
91
|
+
Example:
|
92
|
+
|
93
|
+
require 'carton_db'
|
94
|
+
|
95
|
+
db = CartonDb::ListMapDb.new('/tmp/my_list_map')
|
96
|
+
|
97
|
+
db['Some Key'] = ['element 1', 'element 2']
|
98
|
+
|
99
|
+
db['Another Key'] = []
|
100
|
+
|
101
|
+
db.append_to 'Yet Another', 'abc'
|
102
|
+
db.append_to 'Yet Another', 'def'
|
103
|
+
|
104
|
+
p db.count
|
105
|
+
# 3
|
106
|
+
|
107
|
+
p db['Some Key']
|
108
|
+
# ["element 1", "element 2"]
|
109
|
+
|
110
|
+
p db['Another Key']
|
111
|
+
# []
|
112
|
+
|
113
|
+
p db['Yet Another']
|
114
|
+
# ["abc", "def"]
|
115
|
+
|
116
|
+
p db['Something Else']
|
117
|
+
# nil
|
118
|
+
|
119
|
+
## Development
|
120
|
+
|
121
|
+
After checking out the repo, run `bin/setup` to install dependencies.
|
122
|
+
Then, run `rake spec` to run the tests. You can also run `bin/console`
|
123
|
+
for an interactive prompt that will allow you to experiment.
|
124
|
+
|
125
|
+
To install this gem onto your local machine, run `bundle exec rake
|
126
|
+
install`. To release a new version, update the version number in
|
127
|
+
`version.rb`, and then run `bundle exec rake release`, which will
|
128
|
+
create a git tag for the version, push git commits and tags, and push
|
129
|
+
the `.gem` file to [rubygems.org](https://rubygems.org).
|
130
|
+
|
131
|
+
## Contributing
|
132
|
+
|
133
|
+
Bug reports and pull requests are welcome on GitHub at
|
134
|
+
https://github.com/[USERNAME]/carton_db.
|
135
|
+
|
136
|
+
|
137
|
+
## License
|
138
|
+
|
139
|
+
The gem is available as open source under the terms of the
|
140
|
+
[MIT License](http://opensource.org/licenses/MIT).
|
data/Rakefile
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
require "bundler/gem_tasks"
|
3
|
+
require "rspec/core/rake_task"
|
4
|
+
|
5
|
+
namespace :spec do
|
6
|
+
|
7
|
+
desc 'Run all RSpec code examples'
|
8
|
+
RSpec::Core::RakeTask.new(:all)
|
9
|
+
|
10
|
+
desc 'Run RSpec code examples except those tagged as slow'
|
11
|
+
RSpec::Core::RakeTask.new(:fast) do |t|
|
12
|
+
t.rspec_opts = '--tag ~slow'
|
13
|
+
end
|
14
|
+
|
15
|
+
desc 'Run RSpec code examples that are tagged as slow'
|
16
|
+
RSpec::Core::RakeTask.new(:slow) do |t|
|
17
|
+
t.rspec_opts = '--tag slow'
|
18
|
+
end
|
19
|
+
|
20
|
+
end
|
21
|
+
|
22
|
+
task :default => 'spec:all'
|
data/bin/console
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# -*- coding: UTF-8 -*-
|
3
|
+
|
4
|
+
require "bundler/setup"
|
5
|
+
require "carton_db"
|
6
|
+
|
7
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
8
|
+
# with your gem easier. You can also use a different console, if you like.
|
9
|
+
|
10
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
11
|
+
# require "pry"
|
12
|
+
# Pry.start
|
13
|
+
|
14
|
+
require "irb"
|
15
|
+
IRB.start(__FILE__)
|
data/bin/setup
ADDED
data/carton_db.gemspec
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
# -*- coding: UTF-8 -*-
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'carton_db/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "carton_db"
|
8
|
+
spec.version = CartonDb::VERSION
|
9
|
+
spec.authors = ["Steve Jorgensen"]
|
10
|
+
spec.email = ["stevej@stevej.name"]
|
11
|
+
|
12
|
+
spec.summary = "A pure Ruby key/value data storage system where the" \
|
13
|
+
" values may consist of simple data structures."
|
14
|
+
spec.homepage = "https://github.com/stevecj/carton_db.js"
|
15
|
+
spec.license = "MIT"
|
16
|
+
|
17
|
+
spec.files = `git ls-files -z`.split("\x0").reject do |f|
|
18
|
+
f.match(%r{^(test|spec|features)/})
|
19
|
+
end
|
20
|
+
spec.bindir = "exe"
|
21
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
22
|
+
spec.require_paths = ["lib"]
|
23
|
+
|
24
|
+
spec.add_development_dependency "bundler", "~> 1.14"
|
25
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
26
|
+
spec.add_development_dependency "rspec", "~> 3.0"
|
27
|
+
end
|
@@ -0,0 +1,147 @@
|
|
1
|
+
require 'digest'
|
2
|
+
|
3
|
+
module CartonDb
|
4
|
+
|
5
|
+
module Datum
|
6
|
+
|
7
|
+
def self.for_plain(plain_text, auto_placeholder: false)
|
8
|
+
if auto_placeholder && plain_text.nil?
|
9
|
+
Datum::Placeholder
|
10
|
+
else
|
11
|
+
Datum::ForPlain.new(plain_text)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.for_escaped(escaped_text, auto_placeholder: false)
|
16
|
+
if auto_placeholder && escaped_text.nil?
|
17
|
+
Datum::Placeholder
|
18
|
+
else
|
19
|
+
Datum::ForEscaped.new(escaped_text)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def self.placeholder
|
24
|
+
Datum::Placeholder
|
25
|
+
end
|
26
|
+
|
27
|
+
class Base
|
28
|
+
def plain
|
29
|
+
raise NotImplementedError, "Subclass responsibility."
|
30
|
+
end
|
31
|
+
|
32
|
+
def escaped
|
33
|
+
raise NotImplementedError, "Subclass responsibility."
|
34
|
+
end
|
35
|
+
|
36
|
+
def placeholder?
|
37
|
+
raise NotImplementedError, "Subclass responsibility."
|
38
|
+
end
|
39
|
+
|
40
|
+
def storage_hashcode
|
41
|
+
return nil if placeholder?
|
42
|
+
@storage_hashcode ||= Digest::MD5.digest(plain)
|
43
|
+
end
|
44
|
+
|
45
|
+
def eql?(other)
|
46
|
+
raise NotImplementedError, "Subclass responsibility."
|
47
|
+
end
|
48
|
+
|
49
|
+
alias == eql?
|
50
|
+
|
51
|
+
def hash
|
52
|
+
raise NotImplementedError, "Subclass responsibility."
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
class ForPlain < Datum::Base
|
57
|
+
attr_reader :plain
|
58
|
+
|
59
|
+
def initialize(plain)
|
60
|
+
if plain.nil?
|
61
|
+
raise ArgumentError "A non-nil 'plain' value is required."
|
62
|
+
end
|
63
|
+
@plain = plain
|
64
|
+
end
|
65
|
+
|
66
|
+
def escaped
|
67
|
+
@escaped ||= CartonDb::Escaping.escape(@plain)
|
68
|
+
end
|
69
|
+
|
70
|
+
def placeholder?
|
71
|
+
false
|
72
|
+
end
|
73
|
+
|
74
|
+
def eql?(other)
|
75
|
+
return false unless other.is_a?(Datum::Base)
|
76
|
+
return true if other.class == self.class && @plain == other.plain
|
77
|
+
return escaped == other.escaped
|
78
|
+
end
|
79
|
+
|
80
|
+
alias == eql?
|
81
|
+
|
82
|
+
def hash
|
83
|
+
escaped.hash
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
class ForEscaped < Datum::Base
|
88
|
+
attr_reader :escaped
|
89
|
+
|
90
|
+
def initialize(escaped)
|
91
|
+
if escaped.nil?
|
92
|
+
raise ArgumentError "A non-nil 'escaped' value is required."
|
93
|
+
end
|
94
|
+
@escaped = escaped
|
95
|
+
end
|
96
|
+
|
97
|
+
def plain
|
98
|
+
@plain ||= CartonDb::Escaping.unescape(@escaped)
|
99
|
+
end
|
100
|
+
|
101
|
+
def placeholder?
|
102
|
+
false
|
103
|
+
end
|
104
|
+
|
105
|
+
def eql?(other)
|
106
|
+
return false unless other.is_a?(Datum::Base)
|
107
|
+
return true if other.class == self.class && @escaped == other.escaped
|
108
|
+
return escaped == other.escaped
|
109
|
+
end
|
110
|
+
|
111
|
+
alias == eql?
|
112
|
+
|
113
|
+
def hash
|
114
|
+
escaped.hash
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
class PlaceholderClass < Datum::Base
|
119
|
+
def plain
|
120
|
+
nil
|
121
|
+
end
|
122
|
+
|
123
|
+
def escaped
|
124
|
+
nil
|
125
|
+
end
|
126
|
+
|
127
|
+
def placeholder?
|
128
|
+
true
|
129
|
+
end
|
130
|
+
|
131
|
+
def eql?(other)
|
132
|
+
return false unless other.is_a?(Datum::Base)
|
133
|
+
return other.placeholder?
|
134
|
+
end
|
135
|
+
|
136
|
+
alias == eql?
|
137
|
+
|
138
|
+
def hash
|
139
|
+
PlaceholderClass.hash
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
Placeholder = PlaceholderClass.new
|
144
|
+
|
145
|
+
end
|
146
|
+
|
147
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
module CartonDb
|
2
|
+
|
3
|
+
module Escaping
|
4
|
+
|
5
|
+
ESCAPING_MAP = {
|
6
|
+
"\u0000".freeze => '\x00'.freeze,
|
7
|
+
"\u0001".freeze => '\x01'.freeze,
|
8
|
+
"\u0002".freeze => '\x02'.freeze,
|
9
|
+
"\u0003".freeze => '\x03'.freeze,
|
10
|
+
"\u0004".freeze => '\x04'.freeze,
|
11
|
+
"\u0005".freeze => '\x05'.freeze,
|
12
|
+
"\u0006".freeze => '\x06'.freeze,
|
13
|
+
"\u0007".freeze => '\a'.freeze,
|
14
|
+
"\u0008".freeze => '\b'.freeze,
|
15
|
+
"\u0009".freeze => '\t'.freeze,
|
16
|
+
"\u000A".freeze => '\n'.freeze,
|
17
|
+
"\u000B".freeze => '\v'.freeze,
|
18
|
+
"\u000C".freeze => '\f'.freeze,
|
19
|
+
"\u000D".freeze => '\r'.freeze,
|
20
|
+
"\u000E".freeze => '\x0E'.freeze,
|
21
|
+
"\u000F".freeze => '\x0F'.freeze,
|
22
|
+
"\u0010".freeze => '\x10'.freeze,
|
23
|
+
"\u0011".freeze => '\x11'.freeze,
|
24
|
+
"\u0012".freeze => '\x12'.freeze,
|
25
|
+
"\u0013".freeze => '\x13'.freeze,
|
26
|
+
"\u0014".freeze => '\x14'.freeze,
|
27
|
+
"\u0015".freeze => '\x15'.freeze,
|
28
|
+
"\u0016".freeze => '\x16'.freeze,
|
29
|
+
"\u0017".freeze => '\x17'.freeze,
|
30
|
+
"\u0018".freeze => '\x18'.freeze,
|
31
|
+
"\u0019".freeze => '\x19'.freeze,
|
32
|
+
"\u001A".freeze => '\x1A'.freeze,
|
33
|
+
"\u001B".freeze => '\x1B'.freeze,
|
34
|
+
"\u001C".freeze => '\x1C'.freeze,
|
35
|
+
"\u001D".freeze => '\x1D'.freeze,
|
36
|
+
"\u001E".freeze => '\x1E'.freeze,
|
37
|
+
"\u001F".freeze => '\x1F'.freeze,
|
38
|
+
"\u007F".freeze => '\x7F'.freeze,
|
39
|
+
"\\".freeze => "\\\\".freeze,
|
40
|
+
}.freeze
|
41
|
+
|
42
|
+
UNESCAPING_MAP = ESCAPING_MAP.invert.freeze
|
43
|
+
|
44
|
+
def self.escape(value)
|
45
|
+
value.gsub(
|
46
|
+
/[\x00-\x1F\x7F\\]/,
|
47
|
+
ESCAPING_MAP
|
48
|
+
)
|
49
|
+
end
|
50
|
+
|
51
|
+
def self.unescape(esc)
|
52
|
+
esc.gsub(
|
53
|
+
/\\(?:\\|x[01][0-9A-F]|x7F|[abtnvfr])/,
|
54
|
+
UNESCAPING_MAP
|
55
|
+
)
|
56
|
+
end
|
57
|
+
|
58
|
+
end
|
59
|
+
|
60
|
+
end
|
@@ -0,0 +1,130 @@
|
|
1
|
+
# -*- coding: UTF-8 -*-
|
2
|
+
require 'fileutils'
|
3
|
+
|
4
|
+
module CartonDb
|
5
|
+
class ListMapDb
|
6
|
+
|
7
|
+
class Segment
|
8
|
+
|
9
|
+
def self.in_db_for_hashcode(db_name, hashcode)
|
10
|
+
seg_hash_part = hashcode[-1]
|
11
|
+
seg_num = seg_hash_part.bytes[0] & 127
|
12
|
+
|
13
|
+
group_hashcode = hashcode[0..-2]
|
14
|
+
seg_group = ListMapDb::SegmentGroup.
|
15
|
+
in_db_for_hashcode(db_name, group_hashcode)
|
16
|
+
|
17
|
+
new(seg_group, "#{seg_num}.txt")
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.each_in_db(db_name)
|
21
|
+
ListMapDb::SegmentGroup.each_in_db db_name do |seg_group|
|
22
|
+
Dir.entries(seg_group.directory_path).each do |de|
|
23
|
+
next unless de =~ /^\d{1,3}[.]txt$/
|
24
|
+
yield new(seg_group, de)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.clear_all_in_db(db_name)
|
30
|
+
ListMapDb::SegmentGroup.each_in_db db_name do |seg_group|
|
31
|
+
filenames = []
|
32
|
+
Dir.entries(seg_group.directory_path).each do |de|
|
33
|
+
next unless de =~ /^\d{1,3}[.]txt$/
|
34
|
+
filename = File.join(seg_group.directory_path, de)
|
35
|
+
filenames << filename
|
36
|
+
end
|
37
|
+
FileUtils.rm *filenames
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
attr_accessor :segment_group, :segment_filename
|
42
|
+
private :segment_group=, :segment_filename=
|
43
|
+
|
44
|
+
def initialize(segment_group, segment_filename)
|
45
|
+
self.segment_group = segment_group
|
46
|
+
self.segment_filename = segment_filename
|
47
|
+
end
|
48
|
+
|
49
|
+
def filename
|
50
|
+
File.join(segment_group.directory_path, segment_filename)
|
51
|
+
end
|
52
|
+
|
53
|
+
def content?
|
54
|
+
stat && ! stat.zero?
|
55
|
+
end
|
56
|
+
|
57
|
+
def empty?
|
58
|
+
! content?
|
59
|
+
end
|
60
|
+
|
61
|
+
def each_entry
|
62
|
+
entries = nil
|
63
|
+
each_entry_element_line do |key_d, elem_d, _line|
|
64
|
+
entries ||= {}
|
65
|
+
content = entries[key_d] ||= []
|
66
|
+
content << elem_d.plain unless elem_d.placeholder?
|
67
|
+
end
|
68
|
+
return unless entries
|
69
|
+
entries.each do |key_d, content|
|
70
|
+
yield key_d.plain, content
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def each_entry_element_line
|
75
|
+
return if empty?
|
76
|
+
each_line do |line|
|
77
|
+
esc_key, esc_element = line.strip.split("\t", 2)
|
78
|
+
key_d = CartonDb::Datum.for_escaped(esc_key)
|
79
|
+
element_d = CartonDb::Datum.for_escaped(
|
80
|
+
esc_element, auto_placeholder: true)
|
81
|
+
yield key_d, element_d, line
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
def open_append
|
86
|
+
touch_dir
|
87
|
+
File.open filename, 'a', **FILE_ENCODING_OPTS do |io|
|
88
|
+
yield io
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
def open_overwrite
|
93
|
+
touch_dir
|
94
|
+
File.open filename, 'w', **FILE_ENCODING_OPTS do |io|
|
95
|
+
yield io
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
private
|
100
|
+
|
101
|
+
def stat
|
102
|
+
return @stat if defined? @stat
|
103
|
+
return @stat = nil unless File.file?(filename)
|
104
|
+
return @stat = File.stat(filename)
|
105
|
+
end
|
106
|
+
|
107
|
+
def open_read
|
108
|
+
File.open filename, 'r', **FILE_ENCODING_OPTS do |io|
|
109
|
+
yield io
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
def touch_dir
|
114
|
+
dir = File.dirname(filename)
|
115
|
+
return if File.directory?(dir)
|
116
|
+
FileUtils.mkdir dir
|
117
|
+
end
|
118
|
+
|
119
|
+
def each_line
|
120
|
+
open_read do |io|
|
121
|
+
io.each_line do |line|
|
122
|
+
yield line
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
end
|
128
|
+
|
129
|
+
end
|
130
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
# -*- coding: UTF-8 -*-
|
2
|
+
require 'fileutils'
|
3
|
+
|
4
|
+
module CartonDb
|
5
|
+
class ListMapDb
|
6
|
+
|
7
|
+
class SegmentGroup
|
8
|
+
|
9
|
+
def self.in_db_for_hashcode(db_name, hashcode)
|
10
|
+
group_hash_part = hashcode[-1]
|
11
|
+
group_num = group_hash_part.bytes[0] & 127
|
12
|
+
new(db_name, group_num.to_s)
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.each_in_db(db_name)
|
16
|
+
Dir.entries(db_name).each do |de|
|
17
|
+
next unless de =~ /^\d{1,3}$/
|
18
|
+
seg_group = new(db_name, de)
|
19
|
+
next unless File.directory?(seg_group.directory_path)
|
20
|
+
yield seg_group
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
attr_accessor :db_name, :name_part
|
25
|
+
private :db_name=, :name_part=
|
26
|
+
|
27
|
+
def initialize(db_name, name_part)
|
28
|
+
self.db_name = db_name
|
29
|
+
self.name_part = name_part
|
30
|
+
end
|
31
|
+
|
32
|
+
def directory_path
|
33
|
+
@directory_path ||= File.join(db_name, name_part)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,312 @@
|
|
1
|
+
# -*- coding: UTF-8 -*-
|
2
|
+
require 'forwardable'
|
3
|
+
require 'fileutils'
|
4
|
+
require 'carton_db/list_map_db/segment'
|
5
|
+
require 'carton_db/list_map_db/segment_group'
|
6
|
+
|
7
|
+
module CartonDb
|
8
|
+
|
9
|
+
# A map with string keys lists of strings as contents.
|
10
|
+
#
|
11
|
+
# This is suitable for storing a total number of elements as
|
12
|
+
# large as the low millions, with each entry containing a
|
13
|
+
# number of elements in the hundreds or low thousands.
|
14
|
+
class ListMapDb
|
15
|
+
extend Forwardable
|
16
|
+
include Enumerable
|
17
|
+
|
18
|
+
FILE_ENCODING_OPTS = {
|
19
|
+
internal_encoding: Encoding::UTF_8,
|
20
|
+
external_encoding: Encoding::UTF_8
|
21
|
+
}.freeze
|
22
|
+
|
23
|
+
# Initializes an instance that interacts with the database
|
24
|
+
# identified by the given name, which is the full path to a
|
25
|
+
# directory in the filesystem.
|
26
|
+
#
|
27
|
+
# The directory for the database will be created if it does
|
28
|
+
# not already exist.
|
29
|
+
#
|
30
|
+
# This is a very fast operation.
|
31
|
+
#
|
32
|
+
# @param name [String] The full path of the directory in the
|
33
|
+
# filesystem in which the data is stored or will be stored.
|
34
|
+
def initialize(name)
|
35
|
+
self.name = name
|
36
|
+
FileUtils.mkdir name unless File.directory?(name)
|
37
|
+
end
|
38
|
+
|
39
|
+
# Creates a new entry or replaces the contents of the
|
40
|
+
# existing entry identified by the given key.
|
41
|
+
#
|
42
|
+
# The is a fairly fast operation, but can be somewhat
|
43
|
+
# slower in a large database. Note that appending and
|
44
|
+
# concatenating may be faster than assignment.
|
45
|
+
#
|
46
|
+
# @param key [String] The key identifying the entry.
|
47
|
+
# @param content [Array<String>] An array or other
|
48
|
+
# enumerable collection of 0 or more list element string
|
49
|
+
# values to be stored.
|
50
|
+
def []=(key, content)
|
51
|
+
key_d = CartonDb::Datum.for_plain(key)
|
52
|
+
segment = segment_containing(key_d)
|
53
|
+
if segment.empty?
|
54
|
+
concat_elements key_d.plain, content
|
55
|
+
else
|
56
|
+
replace_entry_in_file segment, key_d, content
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
# Returns the content of the entry identified by the given
|
61
|
+
# key or nil if no such entry exists.
|
62
|
+
#
|
63
|
+
# This operation is fast, but may be slower for a larger
|
64
|
+
# database.
|
65
|
+
#
|
66
|
+
# @param key [String] The key identifying the entry.
|
67
|
+
# @return [Array<String>] if a matching entry exists.
|
68
|
+
# @return [nil] if no matching entry exists.
|
69
|
+
def [](key)
|
70
|
+
key_d = CartonDb::Datum.for_plain(key)
|
71
|
+
segment = segment_containing(key_d)
|
72
|
+
|
73
|
+
ary = nil
|
74
|
+
segment.each_entry_element_line do |kd, ed, _line|
|
75
|
+
next ary unless kd == key_d
|
76
|
+
ary ||= []
|
77
|
+
ary << ed.plain unless ed.placeholder?
|
78
|
+
end
|
79
|
+
ary
|
80
|
+
end
|
81
|
+
|
82
|
+
def key?(key)
|
83
|
+
key_d = CartonDb::Datum.for_plain(key)
|
84
|
+
segment = segment_containing(key_d)
|
85
|
+
|
86
|
+
segment.each_entry_element_line do |kd, _ed, _line|
|
87
|
+
return true if kd = key_d
|
88
|
+
end
|
89
|
+
false
|
90
|
+
end
|
91
|
+
|
92
|
+
# Returns true if the map has no entries.
|
93
|
+
#
|
94
|
+
# This is a fairly fast operation.
|
95
|
+
#
|
96
|
+
# @return [Boolean]
|
97
|
+
def empty?
|
98
|
+
ListMapDb::Segment.each_in_db name do |segment|
|
99
|
+
return false unless segment.empty?
|
100
|
+
end
|
101
|
+
true
|
102
|
+
end
|
103
|
+
|
104
|
+
# Returns the number of entries in the map.
|
105
|
+
#
|
106
|
+
# This operation scans the entire database to count the keys,
|
107
|
+
# so it can be be a slow operation if the database is large.
|
108
|
+
#
|
109
|
+
# @return [Fixnum]
|
110
|
+
def count
|
111
|
+
key_count = 0
|
112
|
+
file_key_datum_set = Set.new
|
113
|
+
ListMapDb::Segment.each_in_db name do |segment|
|
114
|
+
next if segment.empty?
|
115
|
+
file_key_datum_set.clear
|
116
|
+
segment.each_entry_element_line do |kd, _ed, _line|
|
117
|
+
file_key_datum_set << kd
|
118
|
+
end
|
119
|
+
key_count += file_key_datum_set.length
|
120
|
+
end
|
121
|
+
key_count
|
122
|
+
end
|
123
|
+
|
124
|
+
# Creates an entry with an empty list as its content if no
|
125
|
+
# entry exists for the given key. Has no effect on the content
|
126
|
+
# of the entry if it already exists.
|
127
|
+
#
|
128
|
+
# Using :small optimization (the default), the operation may
|
129
|
+
# be slow for a large database since it checks for the
|
130
|
+
# existence of the key before marking its existence.
|
131
|
+
#
|
132
|
+
# Using :fast optimization, the operation will always be
|
133
|
+
# fast, but it will add a mark for the existence of the key
|
134
|
+
# even if that is redundant, thus adding to the size of the
|
135
|
+
# stored data.
|
136
|
+
#
|
137
|
+
# @param key [String] The key identifying the entry.
|
138
|
+
# @param optimization[:small, :fast] The optimization mode.
|
139
|
+
def touch(key, optimization: :small)
|
140
|
+
if optimization != :small && optimization != :fast
|
141
|
+
raise ArgumentError, "Invalid optimization value. Must be :small or :fast"
|
142
|
+
end
|
143
|
+
|
144
|
+
key_d = CartonDb::Datum.for_plain(key)
|
145
|
+
segment = segment_containing(key_d)
|
146
|
+
|
147
|
+
if optimization == :small && segment.content?
|
148
|
+
segment.each_entry_element_line do |kd, _ed, _line|
|
149
|
+
return if kd == key_d
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
segment.open_append do |io|
|
154
|
+
io << key_d.escaped << "\n"
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
# Removes all entries from the database, leaving it empty.
|
159
|
+
#
|
160
|
+
# This operation can be somewhat slow for a large database.
|
161
|
+
#
|
162
|
+
def clear
|
163
|
+
ListMapDb::Segment.clear_all_in_db name
|
164
|
+
end
|
165
|
+
|
166
|
+
# Yields each entry in the database as a key/array pair.
|
167
|
+
#
|
168
|
+
# This operation can take a lot of total time for a large
|
169
|
+
# database, but yields entries pretty rapidly regardless
|
170
|
+
# of database size.
|
171
|
+
#
|
172
|
+
# @yieldparam key [String] The key of the entry.
|
173
|
+
# @yeildparam array [Array<String>] The elements of the list
|
174
|
+
# entry's content.
|
175
|
+
def each
|
176
|
+
key_arrays_slice = {}
|
177
|
+
ListMapDb::Segment.each_in_db name do |segment|
|
178
|
+
segment.each_entry do |key, content|
|
179
|
+
yield key, content
|
180
|
+
end
|
181
|
+
end
|
182
|
+
end
|
183
|
+
|
184
|
+
# Removes an entry from the database. Has no effect if the
|
185
|
+
# entry already does not exist.
|
186
|
+
#
|
187
|
+
# This operation is fast, but may be slower for a larger
|
188
|
+
# database.
|
189
|
+
#
|
190
|
+
# @param key [String] The key identifying the entry to be
|
191
|
+
# deleted.
|
192
|
+
def delete(key)
|
193
|
+
key_d = CartonDb::Datum.for_plain(key)
|
194
|
+
segment = segment_containing(key_d)
|
195
|
+
return if segment.empty?
|
196
|
+
|
197
|
+
new_segment = ListMapDb::Segment.new(
|
198
|
+
segment.segment_group, "#{segment.segment_filename}.new")
|
199
|
+
new_segment.open_overwrite do |io|
|
200
|
+
segment.each_entry_element_line do |kd, _ed, line|
|
201
|
+
io << line unless kd == key_d
|
202
|
+
end
|
203
|
+
end
|
204
|
+
|
205
|
+
File.unlink segment.filename
|
206
|
+
File.rename new_segment.filename, segment.filename
|
207
|
+
end
|
208
|
+
|
209
|
+
# Appends an element string to the content of an entry.
|
210
|
+
# If the entry does not already exist, then one is
|
211
|
+
# created with a list containing the given element as its
|
212
|
+
# content.
|
213
|
+
#
|
214
|
+
# Since this will only append text to a file within the
|
215
|
+
# database, it is a very fast operation.
|
216
|
+
#
|
217
|
+
# @param key [String] The key identifying the entry.
|
218
|
+
# @param element [String] The element to be appended to the
|
219
|
+
# content of the entry.
|
220
|
+
def append_element(key, element)
|
221
|
+
key_d = CartonDb::Datum.for_plain(key)
|
222
|
+
element_d = CartonDb::Datum.for_plain(element)
|
223
|
+
segment = segment_containing(key_d)
|
224
|
+
FileUtils.mkpath File.dirname(segment.filename)
|
225
|
+
segment.open_append do |io|
|
226
|
+
io << "#{key_d.escaped}\t#{element_d.escaped}\n"
|
227
|
+
end
|
228
|
+
end
|
229
|
+
|
230
|
+
# Appends any number of element strings to the content of an
|
231
|
+
# entry. If the entry does not already exist, then one is
|
232
|
+
# created with the given list as its content.
|
233
|
+
#
|
234
|
+
# Appending an empty or nil collection is equivalent to
|
235
|
+
# invoking `db.touch key, optimization: :small`.
|
236
|
+
#
|
237
|
+
# When appending a non-empty collection, this is a fast
|
238
|
+
# operation since it only appends text to an existing file.
|
239
|
+
#
|
240
|
+
# @param key [String] The key identifying the entry.
|
241
|
+
# @param elements [Array<String>] An array or other
|
242
|
+
# enumerable collection of elements to append.
|
243
|
+
def concat_elements(key, elements)
|
244
|
+
if empty_collection?(elements)
|
245
|
+
touch key, optimization: :small
|
246
|
+
else
|
247
|
+
concat_any_elements key, elements
|
248
|
+
end
|
249
|
+
end
|
250
|
+
|
251
|
+
# Appends any number of element strings to the content of an
|
252
|
+
# entry. If the given elements collection is empty or nil,
|
253
|
+
# then the database is unchanged, and a new entry is not
|
254
|
+
# created if one did not exist previously.
|
255
|
+
#
|
256
|
+
# This is a fast operation since it only appends text to an
|
257
|
+
# existing file.
|
258
|
+
#
|
259
|
+
# @param key [String] The key identifying the entry.
|
260
|
+
# @param elements [Array<String>] An array or other
|
261
|
+
# enumerable collection of elements to append.
|
262
|
+
def concat_any_elements(key, elements)
|
263
|
+
key_d = CartonDb::Datum.for_plain(key)
|
264
|
+
segment = segment_containing(key_d)
|
265
|
+
segment.open_append do |io|
|
266
|
+
elements.each do |element|
|
267
|
+
element_d = CartonDb::Datum.for_plain(element)
|
268
|
+
io<< "#{key_d.escaped}\t#{element_d.escaped}\n"
|
269
|
+
end
|
270
|
+
end
|
271
|
+
end
|
272
|
+
|
273
|
+
private
|
274
|
+
|
275
|
+
attr_accessor :name
|
276
|
+
|
277
|
+
def replace_entry_in_file(segment, key_d, content)
|
278
|
+
new_segment = ListMapDb::Segment.new(
|
279
|
+
segment.segment_group, "#{segment.segment_filename}.new")
|
280
|
+
new_segment.open_overwrite do |nf_io|
|
281
|
+
segment.each_entry_element_line do |kd, _ed, line|
|
282
|
+
nf_io.print line unless kd == key_d
|
283
|
+
end
|
284
|
+
element_count = 0
|
285
|
+
content.each do |element|
|
286
|
+
element_d = CartonDb::Datum.for_plain(element)
|
287
|
+
element_count += 1
|
288
|
+
nf_io.puts "#{key_d.escaped}\t#{element_d.escaped}"
|
289
|
+
end
|
290
|
+
if element_count.zero?
|
291
|
+
nf_io.puts key_d.escaped
|
292
|
+
end
|
293
|
+
end
|
294
|
+
File.unlink segment.filename
|
295
|
+
File.rename new_segment.filename, segment.filename
|
296
|
+
end
|
297
|
+
|
298
|
+
def segment_containing(key)
|
299
|
+
ListMapDb::Segment.in_db_for_hashcode(
|
300
|
+
name, key.storage_hashcode
|
301
|
+
)
|
302
|
+
end
|
303
|
+
|
304
|
+
def empty_collection?(collection)
|
305
|
+
return true if collection.nil?
|
306
|
+
occupied = collection.any? { true }
|
307
|
+
! occupied
|
308
|
+
end
|
309
|
+
|
310
|
+
end
|
311
|
+
|
312
|
+
end
|
data/lib/carton_db.rb
ADDED
data/tmp/.gitkeep
ADDED
File without changes
|
metadata
ADDED
@@ -0,0 +1,106 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: carton_db
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Steve Jorgensen
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2017-05-06 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.14'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.14'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '10.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rspec
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '3.0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '3.0'
|
55
|
+
description:
|
56
|
+
email:
|
57
|
+
- stevej@stevej.name
|
58
|
+
executables: []
|
59
|
+
extensions: []
|
60
|
+
extra_rdoc_files: []
|
61
|
+
files:
|
62
|
+
- ".gitignore"
|
63
|
+
- ".rspec"
|
64
|
+
- ".ruby-version"
|
65
|
+
- ".travis.yml"
|
66
|
+
- Gemfile
|
67
|
+
- LICENSE.txt
|
68
|
+
- README.md
|
69
|
+
- Rakefile
|
70
|
+
- bin/console
|
71
|
+
- bin/setup
|
72
|
+
- carton_db.gemspec
|
73
|
+
- lib/carton_db.rb
|
74
|
+
- lib/carton_db/datum.rb
|
75
|
+
- lib/carton_db/escaping.rb
|
76
|
+
- lib/carton_db/list_map_db.rb
|
77
|
+
- lib/carton_db/list_map_db/segment.rb
|
78
|
+
- lib/carton_db/list_map_db/segment_group.rb
|
79
|
+
- lib/carton_db/version.rb
|
80
|
+
- tmp/.gitkeep
|
81
|
+
homepage: https://github.com/stevecj/carton_db.js
|
82
|
+
licenses:
|
83
|
+
- MIT
|
84
|
+
metadata: {}
|
85
|
+
post_install_message:
|
86
|
+
rdoc_options: []
|
87
|
+
require_paths:
|
88
|
+
- lib
|
89
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
90
|
+
requirements:
|
91
|
+
- - ">="
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: '0'
|
94
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
95
|
+
requirements:
|
96
|
+
- - ">="
|
97
|
+
- !ruby/object:Gem::Version
|
98
|
+
version: '0'
|
99
|
+
requirements: []
|
100
|
+
rubyforge_project:
|
101
|
+
rubygems_version: 2.6.10
|
102
|
+
signing_key:
|
103
|
+
specification_version: 4
|
104
|
+
summary: A pure Ruby key/value data storage system where the values may consist of
|
105
|
+
simple data structures.
|
106
|
+
test_files: []
|