carton_db 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +12 -0
- data/.rspec +2 -0
- data/.ruby-version +1 -0
- data/.travis.yml +5 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +140 -0
- data/Rakefile +22 -0
- data/bin/console +15 -0
- data/bin/setup +9 -0
- data/carton_db.gemspec +27 -0
- data/lib/carton_db/datum.rb +147 -0
- data/lib/carton_db/escaping.rb +60 -0
- data/lib/carton_db/list_map_db/segment.rb +130 -0
- data/lib/carton_db/list_map_db/segment_group.rb +38 -0
- data/lib/carton_db/list_map_db.rb +312 -0
- data/lib/carton_db/version.rb +4 -0
- data/lib/carton_db.rb +8 -0
- data/tmp/.gitkeep +0 -0
- metadata +106 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: aecfa7225f91fad95b3a8eaa417a155248195aff
|
4
|
+
data.tar.gz: 7e64f92388321296900fa4774b7fec3e608fb388
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: b0afb5be66793688c004e8eca157cee2e53ea2aacb8df21afa600fa93f616d9df25a99ecf905a183e7d1be0c461da3eaf60e60f92ec978793bca1add8f50042f
|
7
|
+
data.tar.gz: 2bceb896fbcf2a4ba811ce5548f90cfde7a451ca7417b58a2aa846681452f44b89564a5755c05af82f020d0b37e1612ef365ac5eff3fe00c480a5d2b4cda9e71
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
2.3.1
|
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2017 Steve Jorgensen
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,140 @@
|
|
1
|
+
# CartonDb
|
2
|
+
|
3
|
+
A pure Ruby key/value data storage system where the values may
|
4
|
+
consist of simple data structures.
|
5
|
+
|
6
|
+
The primary goals of this library are simplicity of implementation
|
7
|
+
and reliable, predicatble behavior when used as intended, along
|
8
|
+
with documentation making it reasonably clear what is intended.
|
9
|
+
|
10
|
+
## Uses
|
11
|
+
|
12
|
+
Uses for this gem seem pretty limited, but you might have a
|
13
|
+
purpose for it that the author has not thought about.
|
14
|
+
|
15
|
+
This is a formalization of a solution that was created to solve
|
16
|
+
a specific problem. The problem was adding a feature to a
|
17
|
+
Ruby program running on Heroku to collect data into a map of
|
18
|
+
sets of elements that would be too large to be effectively
|
19
|
+
handled in memory. The application didn't already have any use
|
20
|
+
for a relational database server, and I didn't want to add one
|
21
|
+
just for this requirement. A redis db with sufficient capacity
|
22
|
+
would have been expensive, and solutions such as SQLite are
|
23
|
+
specifically not supported by Heroku so people don't mistakenly
|
24
|
+
expect the data to be preserved. Ruby's `DBM` and `SDMB` proved
|
25
|
+
to be too unpredicatable and flakey to be practical solutions.
|
26
|
+
|
27
|
+
Although this tool was initially developed to store transient
|
28
|
+
data for use within a single process invocation and then
|
29
|
+
discarded, it is also quite well suited for long term data
|
30
|
+
storage on a system that preserves filesystem data over time.
|
31
|
+
|
32
|
+
## Installation
|
33
|
+
|
34
|
+
Add this line to your application's Gemfile:
|
35
|
+
|
36
|
+
```ruby
|
37
|
+
gem 'carton_db'
|
38
|
+
```
|
39
|
+
|
40
|
+
And then execute:
|
41
|
+
|
42
|
+
$ bundle
|
43
|
+
|
44
|
+
Or install it yourself as:
|
45
|
+
|
46
|
+
$ gem install carton_db
|
47
|
+
|
48
|
+
## Characteristics
|
49
|
+
|
50
|
+
Each database has a name which is the path of a directory in the
|
51
|
+
filesystem containing the files that store the data.
|
52
|
+
|
53
|
+
A database is accessed through an instance of a database class.
|
54
|
+
|
55
|
+
An instance of a database class maintains no state in memory
|
56
|
+
between calls to its methods except for the database name.
|
57
|
+
|
58
|
+
An empty directory is a valid empty database.
|
59
|
+
|
60
|
+
Concurrent reads from a database are supported and safe.
|
61
|
+
|
62
|
+
Writing to a database concurrently with reads or writes by
|
63
|
+
other processes or threads is not supported, and the results of
|
64
|
+
attempting to do that are unpredictable.
|
65
|
+
|
66
|
+
Initializing a new database class instance creates its directory
|
67
|
+
in the filesystem if it does not already exist. The parent of the
|
68
|
+
database directory is expected to already exist, and an error
|
69
|
+
will occur if it doesn't.
|
70
|
+
|
71
|
+
The database structure is designed to effectively handle up to
|
72
|
+
several million elements with entries containing up to 1 or 2
|
73
|
+
thousand elements each.
|
74
|
+
|
75
|
+
The speed of database operations is relatively good, but this is
|
76
|
+
not a high performance database management system. See the
|
77
|
+
code documentation in the classes for more details about the
|
78
|
+
performance of particular database operations.
|
79
|
+
|
80
|
+
## Usage
|
81
|
+
|
82
|
+
Currently, this gem includes only one kind of database, which is
|
83
|
+
implemented by the `CartonDB::ListMapDb` class. It is a map of
|
84
|
+
lists where each entry has a string for a key and a list of of 0
|
85
|
+
or more string elements as content.
|
86
|
+
|
87
|
+
The name of the database is the path of a directory in the
|
88
|
+
filesystem that either already exists or shall be created as
|
89
|
+
a container for the stored data.
|
90
|
+
|
91
|
+
Example:
|
92
|
+
|
93
|
+
require 'carton_db'
|
94
|
+
|
95
|
+
db = CartonDb::ListMapDb.new('/tmp/my_list_map')
|
96
|
+
|
97
|
+
db['Some Key'] = ['element 1', 'element 2']
|
98
|
+
|
99
|
+
db['Another Key'] = []
|
100
|
+
|
101
|
+
db.append_to 'Yet Another', 'abc'
|
102
|
+
db.append_to 'Yet Another', 'def'
|
103
|
+
|
104
|
+
p db.count
|
105
|
+
# 3
|
106
|
+
|
107
|
+
p db['Some Key']
|
108
|
+
# ["element 1", "element 2"]
|
109
|
+
|
110
|
+
p db['Another Key']
|
111
|
+
# []
|
112
|
+
|
113
|
+
p db['Yet Another']
|
114
|
+
# ["abc", "def"]
|
115
|
+
|
116
|
+
p db['Something Else']
|
117
|
+
# nil
|
118
|
+
|
119
|
+
## Development
|
120
|
+
|
121
|
+
After checking out the repo, run `bin/setup` to install dependencies.
|
122
|
+
Then, run `rake spec` to run the tests. You can also run `bin/console`
|
123
|
+
for an interactive prompt that will allow you to experiment.
|
124
|
+
|
125
|
+
To install this gem onto your local machine, run `bundle exec rake
|
126
|
+
install`. To release a new version, update the version number in
|
127
|
+
`version.rb`, and then run `bundle exec rake release`, which will
|
128
|
+
create a git tag for the version, push git commits and tags, and push
|
129
|
+
the `.gem` file to [rubygems.org](https://rubygems.org).
|
130
|
+
|
131
|
+
## Contributing
|
132
|
+
|
133
|
+
Bug reports and pull requests are welcome on GitHub at
|
134
|
+
https://github.com/[USERNAME]/carton_db.
|
135
|
+
|
136
|
+
|
137
|
+
## License
|
138
|
+
|
139
|
+
The gem is available as open source under the terms of the
|
140
|
+
[MIT License](http://opensource.org/licenses/MIT).
|
data/Rakefile
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
require "bundler/gem_tasks"
|
3
|
+
require "rspec/core/rake_task"
|
4
|
+
|
5
|
+
namespace :spec do
|
6
|
+
|
7
|
+
desc 'Run all RSpec code examples'
|
8
|
+
RSpec::Core::RakeTask.new(:all)
|
9
|
+
|
10
|
+
desc 'Run RSpec code examples except those tagged as slow'
|
11
|
+
RSpec::Core::RakeTask.new(:fast) do |t|
|
12
|
+
t.rspec_opts = '--tag ~slow'
|
13
|
+
end
|
14
|
+
|
15
|
+
desc 'Run RSpec code examples that are tagged as slow'
|
16
|
+
RSpec::Core::RakeTask.new(:slow) do |t|
|
17
|
+
t.rspec_opts = '--tag slow'
|
18
|
+
end
|
19
|
+
|
20
|
+
end
|
21
|
+
|
22
|
+
task :default => 'spec:all'
|
data/bin/console
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# -*- coding: UTF-8 -*-
|
3
|
+
|
4
|
+
require "bundler/setup"
|
5
|
+
require "carton_db"
|
6
|
+
|
7
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
8
|
+
# with your gem easier. You can also use a different console, if you like.
|
9
|
+
|
10
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
11
|
+
# require "pry"
|
12
|
+
# Pry.start
|
13
|
+
|
14
|
+
require "irb"
|
15
|
+
IRB.start(__FILE__)
|
data/bin/setup
ADDED
data/carton_db.gemspec
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
# -*- coding: UTF-8 -*-
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'carton_db/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "carton_db"
|
8
|
+
spec.version = CartonDb::VERSION
|
9
|
+
spec.authors = ["Steve Jorgensen"]
|
10
|
+
spec.email = ["stevej@stevej.name"]
|
11
|
+
|
12
|
+
spec.summary = "A pure Ruby key/value data storage system where the" \
|
13
|
+
" values may consist of simple data structures."
|
14
|
+
spec.homepage = "https://github.com/stevecj/carton_db.js"
|
15
|
+
spec.license = "MIT"
|
16
|
+
|
17
|
+
spec.files = `git ls-files -z`.split("\x0").reject do |f|
|
18
|
+
f.match(%r{^(test|spec|features)/})
|
19
|
+
end
|
20
|
+
spec.bindir = "exe"
|
21
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
22
|
+
spec.require_paths = ["lib"]
|
23
|
+
|
24
|
+
spec.add_development_dependency "bundler", "~> 1.14"
|
25
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
26
|
+
spec.add_development_dependency "rspec", "~> 3.0"
|
27
|
+
end
|
@@ -0,0 +1,147 @@
|
|
1
|
+
require 'digest'
|
2
|
+
|
3
|
+
module CartonDb
|
4
|
+
|
5
|
+
module Datum
|
6
|
+
|
7
|
+
def self.for_plain(plain_text, auto_placeholder: false)
|
8
|
+
if auto_placeholder && plain_text.nil?
|
9
|
+
Datum::Placeholder
|
10
|
+
else
|
11
|
+
Datum::ForPlain.new(plain_text)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.for_escaped(escaped_text, auto_placeholder: false)
|
16
|
+
if auto_placeholder && escaped_text.nil?
|
17
|
+
Datum::Placeholder
|
18
|
+
else
|
19
|
+
Datum::ForEscaped.new(escaped_text)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def self.placeholder
|
24
|
+
Datum::Placeholder
|
25
|
+
end
|
26
|
+
|
27
|
+
class Base
|
28
|
+
def plain
|
29
|
+
raise NotImplementedError, "Subclass responsibility."
|
30
|
+
end
|
31
|
+
|
32
|
+
def escaped
|
33
|
+
raise NotImplementedError, "Subclass responsibility."
|
34
|
+
end
|
35
|
+
|
36
|
+
def placeholder?
|
37
|
+
raise NotImplementedError, "Subclass responsibility."
|
38
|
+
end
|
39
|
+
|
40
|
+
def storage_hashcode
|
41
|
+
return nil if placeholder?
|
42
|
+
@storage_hashcode ||= Digest::MD5.digest(plain)
|
43
|
+
end
|
44
|
+
|
45
|
+
def eql?(other)
|
46
|
+
raise NotImplementedError, "Subclass responsibility."
|
47
|
+
end
|
48
|
+
|
49
|
+
alias == eql?
|
50
|
+
|
51
|
+
def hash
|
52
|
+
raise NotImplementedError, "Subclass responsibility."
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
class ForPlain < Datum::Base
|
57
|
+
attr_reader :plain
|
58
|
+
|
59
|
+
def initialize(plain)
|
60
|
+
if plain.nil?
|
61
|
+
raise ArgumentError "A non-nil 'plain' value is required."
|
62
|
+
end
|
63
|
+
@plain = plain
|
64
|
+
end
|
65
|
+
|
66
|
+
def escaped
|
67
|
+
@escaped ||= CartonDb::Escaping.escape(@plain)
|
68
|
+
end
|
69
|
+
|
70
|
+
def placeholder?
|
71
|
+
false
|
72
|
+
end
|
73
|
+
|
74
|
+
def eql?(other)
|
75
|
+
return false unless other.is_a?(Datum::Base)
|
76
|
+
return true if other.class == self.class && @plain == other.plain
|
77
|
+
return escaped == other.escaped
|
78
|
+
end
|
79
|
+
|
80
|
+
alias == eql?
|
81
|
+
|
82
|
+
def hash
|
83
|
+
escaped.hash
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
class ForEscaped < Datum::Base
|
88
|
+
attr_reader :escaped
|
89
|
+
|
90
|
+
def initialize(escaped)
|
91
|
+
if escaped.nil?
|
92
|
+
raise ArgumentError "A non-nil 'escaped' value is required."
|
93
|
+
end
|
94
|
+
@escaped = escaped
|
95
|
+
end
|
96
|
+
|
97
|
+
def plain
|
98
|
+
@plain ||= CartonDb::Escaping.unescape(@escaped)
|
99
|
+
end
|
100
|
+
|
101
|
+
def placeholder?
|
102
|
+
false
|
103
|
+
end
|
104
|
+
|
105
|
+
def eql?(other)
|
106
|
+
return false unless other.is_a?(Datum::Base)
|
107
|
+
return true if other.class == self.class && @escaped == other.escaped
|
108
|
+
return escaped == other.escaped
|
109
|
+
end
|
110
|
+
|
111
|
+
alias == eql?
|
112
|
+
|
113
|
+
def hash
|
114
|
+
escaped.hash
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
class PlaceholderClass < Datum::Base
|
119
|
+
def plain
|
120
|
+
nil
|
121
|
+
end
|
122
|
+
|
123
|
+
def escaped
|
124
|
+
nil
|
125
|
+
end
|
126
|
+
|
127
|
+
def placeholder?
|
128
|
+
true
|
129
|
+
end
|
130
|
+
|
131
|
+
def eql?(other)
|
132
|
+
return false unless other.is_a?(Datum::Base)
|
133
|
+
return other.placeholder?
|
134
|
+
end
|
135
|
+
|
136
|
+
alias == eql?
|
137
|
+
|
138
|
+
def hash
|
139
|
+
PlaceholderClass.hash
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
Placeholder = PlaceholderClass.new
|
144
|
+
|
145
|
+
end
|
146
|
+
|
147
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
module CartonDb
|
2
|
+
|
3
|
+
module Escaping
|
4
|
+
|
5
|
+
ESCAPING_MAP = {
|
6
|
+
"\u0000".freeze => '\x00'.freeze,
|
7
|
+
"\u0001".freeze => '\x01'.freeze,
|
8
|
+
"\u0002".freeze => '\x02'.freeze,
|
9
|
+
"\u0003".freeze => '\x03'.freeze,
|
10
|
+
"\u0004".freeze => '\x04'.freeze,
|
11
|
+
"\u0005".freeze => '\x05'.freeze,
|
12
|
+
"\u0006".freeze => '\x06'.freeze,
|
13
|
+
"\u0007".freeze => '\a'.freeze,
|
14
|
+
"\u0008".freeze => '\b'.freeze,
|
15
|
+
"\u0009".freeze => '\t'.freeze,
|
16
|
+
"\u000A".freeze => '\n'.freeze,
|
17
|
+
"\u000B".freeze => '\v'.freeze,
|
18
|
+
"\u000C".freeze => '\f'.freeze,
|
19
|
+
"\u000D".freeze => '\r'.freeze,
|
20
|
+
"\u000E".freeze => '\x0E'.freeze,
|
21
|
+
"\u000F".freeze => '\x0F'.freeze,
|
22
|
+
"\u0010".freeze => '\x10'.freeze,
|
23
|
+
"\u0011".freeze => '\x11'.freeze,
|
24
|
+
"\u0012".freeze => '\x12'.freeze,
|
25
|
+
"\u0013".freeze => '\x13'.freeze,
|
26
|
+
"\u0014".freeze => '\x14'.freeze,
|
27
|
+
"\u0015".freeze => '\x15'.freeze,
|
28
|
+
"\u0016".freeze => '\x16'.freeze,
|
29
|
+
"\u0017".freeze => '\x17'.freeze,
|
30
|
+
"\u0018".freeze => '\x18'.freeze,
|
31
|
+
"\u0019".freeze => '\x19'.freeze,
|
32
|
+
"\u001A".freeze => '\x1A'.freeze,
|
33
|
+
"\u001B".freeze => '\x1B'.freeze,
|
34
|
+
"\u001C".freeze => '\x1C'.freeze,
|
35
|
+
"\u001D".freeze => '\x1D'.freeze,
|
36
|
+
"\u001E".freeze => '\x1E'.freeze,
|
37
|
+
"\u001F".freeze => '\x1F'.freeze,
|
38
|
+
"\u007F".freeze => '\x7F'.freeze,
|
39
|
+
"\\".freeze => "\\\\".freeze,
|
40
|
+
}.freeze
|
41
|
+
|
42
|
+
UNESCAPING_MAP = ESCAPING_MAP.invert.freeze
|
43
|
+
|
44
|
+
def self.escape(value)
|
45
|
+
value.gsub(
|
46
|
+
/[\x00-\x1F\x7F\\]/,
|
47
|
+
ESCAPING_MAP
|
48
|
+
)
|
49
|
+
end
|
50
|
+
|
51
|
+
def self.unescape(esc)
|
52
|
+
esc.gsub(
|
53
|
+
/\\(?:\\|x[01][0-9A-F]|x7F|[abtnvfr])/,
|
54
|
+
UNESCAPING_MAP
|
55
|
+
)
|
56
|
+
end
|
57
|
+
|
58
|
+
end
|
59
|
+
|
60
|
+
end
|
@@ -0,0 +1,130 @@
|
|
1
|
+
# -*- coding: UTF-8 -*-
|
2
|
+
require 'fileutils'
|
3
|
+
|
4
|
+
module CartonDb
|
5
|
+
class ListMapDb
|
6
|
+
|
7
|
+
class Segment
|
8
|
+
|
9
|
+
def self.in_db_for_hashcode(db_name, hashcode)
|
10
|
+
seg_hash_part = hashcode[-1]
|
11
|
+
seg_num = seg_hash_part.bytes[0] & 127
|
12
|
+
|
13
|
+
group_hashcode = hashcode[0..-2]
|
14
|
+
seg_group = ListMapDb::SegmentGroup.
|
15
|
+
in_db_for_hashcode(db_name, group_hashcode)
|
16
|
+
|
17
|
+
new(seg_group, "#{seg_num}.txt")
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.each_in_db(db_name)
|
21
|
+
ListMapDb::SegmentGroup.each_in_db db_name do |seg_group|
|
22
|
+
Dir.entries(seg_group.directory_path).each do |de|
|
23
|
+
next unless de =~ /^\d{1,3}[.]txt$/
|
24
|
+
yield new(seg_group, de)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.clear_all_in_db(db_name)
|
30
|
+
ListMapDb::SegmentGroup.each_in_db db_name do |seg_group|
|
31
|
+
filenames = []
|
32
|
+
Dir.entries(seg_group.directory_path).each do |de|
|
33
|
+
next unless de =~ /^\d{1,3}[.]txt$/
|
34
|
+
filename = File.join(seg_group.directory_path, de)
|
35
|
+
filenames << filename
|
36
|
+
end
|
37
|
+
FileUtils.rm *filenames
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
attr_accessor :segment_group, :segment_filename
|
42
|
+
private :segment_group=, :segment_filename=
|
43
|
+
|
44
|
+
def initialize(segment_group, segment_filename)
|
45
|
+
self.segment_group = segment_group
|
46
|
+
self.segment_filename = segment_filename
|
47
|
+
end
|
48
|
+
|
49
|
+
def filename
|
50
|
+
File.join(segment_group.directory_path, segment_filename)
|
51
|
+
end
|
52
|
+
|
53
|
+
def content?
|
54
|
+
stat && ! stat.zero?
|
55
|
+
end
|
56
|
+
|
57
|
+
def empty?
|
58
|
+
! content?
|
59
|
+
end
|
60
|
+
|
61
|
+
def each_entry
|
62
|
+
entries = nil
|
63
|
+
each_entry_element_line do |key_d, elem_d, _line|
|
64
|
+
entries ||= {}
|
65
|
+
content = entries[key_d] ||= []
|
66
|
+
content << elem_d.plain unless elem_d.placeholder?
|
67
|
+
end
|
68
|
+
return unless entries
|
69
|
+
entries.each do |key_d, content|
|
70
|
+
yield key_d.plain, content
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def each_entry_element_line
|
75
|
+
return if empty?
|
76
|
+
each_line do |line|
|
77
|
+
esc_key, esc_element = line.strip.split("\t", 2)
|
78
|
+
key_d = CartonDb::Datum.for_escaped(esc_key)
|
79
|
+
element_d = CartonDb::Datum.for_escaped(
|
80
|
+
esc_element, auto_placeholder: true)
|
81
|
+
yield key_d, element_d, line
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
def open_append
|
86
|
+
touch_dir
|
87
|
+
File.open filename, 'a', **FILE_ENCODING_OPTS do |io|
|
88
|
+
yield io
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
def open_overwrite
|
93
|
+
touch_dir
|
94
|
+
File.open filename, 'w', **FILE_ENCODING_OPTS do |io|
|
95
|
+
yield io
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
private
|
100
|
+
|
101
|
+
def stat
|
102
|
+
return @stat if defined? @stat
|
103
|
+
return @stat = nil unless File.file?(filename)
|
104
|
+
return @stat = File.stat(filename)
|
105
|
+
end
|
106
|
+
|
107
|
+
def open_read
|
108
|
+
File.open filename, 'r', **FILE_ENCODING_OPTS do |io|
|
109
|
+
yield io
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
def touch_dir
|
114
|
+
dir = File.dirname(filename)
|
115
|
+
return if File.directory?(dir)
|
116
|
+
FileUtils.mkdir dir
|
117
|
+
end
|
118
|
+
|
119
|
+
def each_line
|
120
|
+
open_read do |io|
|
121
|
+
io.each_line do |line|
|
122
|
+
yield line
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
end
|
128
|
+
|
129
|
+
end
|
130
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
# -*- coding: UTF-8 -*-
|
2
|
+
require 'fileutils'
|
3
|
+
|
4
|
+
module CartonDb
|
5
|
+
class ListMapDb
|
6
|
+
|
7
|
+
class SegmentGroup
|
8
|
+
|
9
|
+
def self.in_db_for_hashcode(db_name, hashcode)
|
10
|
+
group_hash_part = hashcode[-1]
|
11
|
+
group_num = group_hash_part.bytes[0] & 127
|
12
|
+
new(db_name, group_num.to_s)
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.each_in_db(db_name)
|
16
|
+
Dir.entries(db_name).each do |de|
|
17
|
+
next unless de =~ /^\d{1,3}$/
|
18
|
+
seg_group = new(db_name, de)
|
19
|
+
next unless File.directory?(seg_group.directory_path)
|
20
|
+
yield seg_group
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
attr_accessor :db_name, :name_part
|
25
|
+
private :db_name=, :name_part=
|
26
|
+
|
27
|
+
def initialize(db_name, name_part)
|
28
|
+
self.db_name = db_name
|
29
|
+
self.name_part = name_part
|
30
|
+
end
|
31
|
+
|
32
|
+
def directory_path
|
33
|
+
@directory_path ||= File.join(db_name, name_part)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,312 @@
|
|
1
|
+
# -*- coding: UTF-8 -*-
|
2
|
+
require 'forwardable'
|
3
|
+
require 'fileutils'
|
4
|
+
require 'carton_db/list_map_db/segment'
|
5
|
+
require 'carton_db/list_map_db/segment_group'
|
6
|
+
|
7
|
+
module CartonDb
|
8
|
+
|
9
|
+
# A map with string keys lists of strings as contents.
|
10
|
+
#
|
11
|
+
# This is suitable for storing a total number of elements as
|
12
|
+
# large as the low millions, with each entry containing a
|
13
|
+
# number of elements in the hundreds or low thousands.
|
14
|
+
class ListMapDb
|
15
|
+
extend Forwardable
|
16
|
+
include Enumerable
|
17
|
+
|
18
|
+
FILE_ENCODING_OPTS = {
|
19
|
+
internal_encoding: Encoding::UTF_8,
|
20
|
+
external_encoding: Encoding::UTF_8
|
21
|
+
}.freeze
|
22
|
+
|
23
|
+
# Initializes an instance that interacts with the database
|
24
|
+
# identified by the given name, which is the full path to a
|
25
|
+
# directory in the filesystem.
|
26
|
+
#
|
27
|
+
# The directory for the database will be created if it does
|
28
|
+
# not already exist.
|
29
|
+
#
|
30
|
+
# This is a very fast operation.
|
31
|
+
#
|
32
|
+
# @param name [String] The full path of the directory in the
|
33
|
+
# filesystem in which the data is stored or will be stored.
|
34
|
+
def initialize(name)
|
35
|
+
self.name = name
|
36
|
+
FileUtils.mkdir name unless File.directory?(name)
|
37
|
+
end
|
38
|
+
|
39
|
+
# Creates a new entry or replaces the contents of the
|
40
|
+
# existing entry identified by the given key.
|
41
|
+
#
|
42
|
+
# The is a fairly fast operation, but can be somewhat
|
43
|
+
# slower in a large database. Note that appending and
|
44
|
+
# concatenating may be faster than assignment.
|
45
|
+
#
|
46
|
+
# @param key [String] The key identifying the entry.
|
47
|
+
# @param content [Array<String>] An array or other
|
48
|
+
# enumerable collection of 0 or more list element string
|
49
|
+
# values to be stored.
|
50
|
+
def []=(key, content)
|
51
|
+
key_d = CartonDb::Datum.for_plain(key)
|
52
|
+
segment = segment_containing(key_d)
|
53
|
+
if segment.empty?
|
54
|
+
concat_elements key_d.plain, content
|
55
|
+
else
|
56
|
+
replace_entry_in_file segment, key_d, content
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
# Returns the content of the entry identified by the given
|
61
|
+
# key or nil if no such entry exists.
|
62
|
+
#
|
63
|
+
# This operation is fast, but may be slower for a larger
|
64
|
+
# database.
|
65
|
+
#
|
66
|
+
# @param key [String] The key identifying the entry.
|
67
|
+
# @return [Array<String>] if a matching entry exists.
|
68
|
+
# @return [nil] if no matching entry exists.
|
69
|
+
def [](key)
|
70
|
+
key_d = CartonDb::Datum.for_plain(key)
|
71
|
+
segment = segment_containing(key_d)
|
72
|
+
|
73
|
+
ary = nil
|
74
|
+
segment.each_entry_element_line do |kd, ed, _line|
|
75
|
+
next ary unless kd == key_d
|
76
|
+
ary ||= []
|
77
|
+
ary << ed.plain unless ed.placeholder?
|
78
|
+
end
|
79
|
+
ary
|
80
|
+
end
|
81
|
+
|
82
|
+
def key?(key)
|
83
|
+
key_d = CartonDb::Datum.for_plain(key)
|
84
|
+
segment = segment_containing(key_d)
|
85
|
+
|
86
|
+
segment.each_entry_element_line do |kd, _ed, _line|
|
87
|
+
return true if kd = key_d
|
88
|
+
end
|
89
|
+
false
|
90
|
+
end
|
91
|
+
|
92
|
+
# Returns true if the map has no entries.
|
93
|
+
#
|
94
|
+
# This is a fairly fast operation.
|
95
|
+
#
|
96
|
+
# @return [Boolean]
|
97
|
+
def empty?
|
98
|
+
ListMapDb::Segment.each_in_db name do |segment|
|
99
|
+
return false unless segment.empty?
|
100
|
+
end
|
101
|
+
true
|
102
|
+
end
|
103
|
+
|
104
|
+
# Returns the number of entries in the map.
|
105
|
+
#
|
106
|
+
# This operation scans the entire database to count the keys,
|
107
|
+
# so it can be be a slow operation if the database is large.
|
108
|
+
#
|
109
|
+
# @return [Fixnum]
|
110
|
+
def count
|
111
|
+
key_count = 0
|
112
|
+
file_key_datum_set = Set.new
|
113
|
+
ListMapDb::Segment.each_in_db name do |segment|
|
114
|
+
next if segment.empty?
|
115
|
+
file_key_datum_set.clear
|
116
|
+
segment.each_entry_element_line do |kd, _ed, _line|
|
117
|
+
file_key_datum_set << kd
|
118
|
+
end
|
119
|
+
key_count += file_key_datum_set.length
|
120
|
+
end
|
121
|
+
key_count
|
122
|
+
end
|
123
|
+
|
124
|
+
# Creates an entry with an empty list as its content if no
|
125
|
+
# entry exists for the given key. Has no effect on the content
|
126
|
+
# of the entry if it already exists.
|
127
|
+
#
|
128
|
+
# Using :small optimization (the default), the operation may
|
129
|
+
# be slow for a large database since it checks for the
|
130
|
+
# existence of the key before marking its existence.
|
131
|
+
#
|
132
|
+
# Using :fast optimization, the operation will always be
|
133
|
+
# fast, but it will add a mark for the existence of the key
|
134
|
+
# even if that is redundant, thus adding to the size of the
|
135
|
+
# stored data.
|
136
|
+
#
|
137
|
+
# @param key [String] The key identifying the entry.
|
138
|
+
# @param optimization[:small, :fast] The optimization mode.
|
139
|
+
def touch(key, optimization: :small)
|
140
|
+
if optimization != :small && optimization != :fast
|
141
|
+
raise ArgumentError, "Invalid optimization value. Must be :small or :fast"
|
142
|
+
end
|
143
|
+
|
144
|
+
key_d = CartonDb::Datum.for_plain(key)
|
145
|
+
segment = segment_containing(key_d)
|
146
|
+
|
147
|
+
if optimization == :small && segment.content?
|
148
|
+
segment.each_entry_element_line do |kd, _ed, _line|
|
149
|
+
return if kd == key_d
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
segment.open_append do |io|
|
154
|
+
io << key_d.escaped << "\n"
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
# Removes all entries from the database, leaving it empty.
|
159
|
+
#
|
160
|
+
# This operation can be somewhat slow for a large database.
|
161
|
+
#
|
162
|
+
def clear
|
163
|
+
ListMapDb::Segment.clear_all_in_db name
|
164
|
+
end
|
165
|
+
|
166
|
+
# Yields each entry in the database as a key/array pair.
|
167
|
+
#
|
168
|
+
# This operation can take a lot of total time for a large
|
169
|
+
# database, but yields entries pretty rapidly regardless
|
170
|
+
# of database size.
|
171
|
+
#
|
172
|
+
# @yieldparam key [String] The key of the entry.
|
173
|
+
# @yeildparam array [Array<String>] The elements of the list
|
174
|
+
# entry's content.
|
175
|
+
def each
|
176
|
+
key_arrays_slice = {}
|
177
|
+
ListMapDb::Segment.each_in_db name do |segment|
|
178
|
+
segment.each_entry do |key, content|
|
179
|
+
yield key, content
|
180
|
+
end
|
181
|
+
end
|
182
|
+
end
|
183
|
+
|
184
|
+
# Removes an entry from the database. Has no effect if the
|
185
|
+
# entry already does not exist.
|
186
|
+
#
|
187
|
+
# This operation is fast, but may be slower for a larger
|
188
|
+
# database.
|
189
|
+
#
|
190
|
+
# @param key [String] The key identifying the entry to be
|
191
|
+
# deleted.
|
192
|
+
def delete(key)
|
193
|
+
key_d = CartonDb::Datum.for_plain(key)
|
194
|
+
segment = segment_containing(key_d)
|
195
|
+
return if segment.empty?
|
196
|
+
|
197
|
+
new_segment = ListMapDb::Segment.new(
|
198
|
+
segment.segment_group, "#{segment.segment_filename}.new")
|
199
|
+
new_segment.open_overwrite do |io|
|
200
|
+
segment.each_entry_element_line do |kd, _ed, line|
|
201
|
+
io << line unless kd == key_d
|
202
|
+
end
|
203
|
+
end
|
204
|
+
|
205
|
+
File.unlink segment.filename
|
206
|
+
File.rename new_segment.filename, segment.filename
|
207
|
+
end
|
208
|
+
|
209
|
+
# Appends an element string to the content of an entry.
|
210
|
+
# If the entry does not already exist, then one is
|
211
|
+
# created with a list containing the given element as its
|
212
|
+
# content.
|
213
|
+
#
|
214
|
+
# Since this will only append text to a file within the
|
215
|
+
# database, it is a very fast operation.
|
216
|
+
#
|
217
|
+
# @param key [String] The key identifying the entry.
|
218
|
+
# @param element [String] The element to be appended to the
|
219
|
+
# content of the entry.
|
220
|
+
def append_element(key, element)
|
221
|
+
key_d = CartonDb::Datum.for_plain(key)
|
222
|
+
element_d = CartonDb::Datum.for_plain(element)
|
223
|
+
segment = segment_containing(key_d)
|
224
|
+
FileUtils.mkpath File.dirname(segment.filename)
|
225
|
+
segment.open_append do |io|
|
226
|
+
io << "#{key_d.escaped}\t#{element_d.escaped}\n"
|
227
|
+
end
|
228
|
+
end
|
229
|
+
|
230
|
+
# Appends any number of element strings to the content of an
|
231
|
+
# entry. If the entry does not already exist, then one is
|
232
|
+
# created with the given list as its content.
|
233
|
+
#
|
234
|
+
# Appending an empty or nil collection is equivalent to
|
235
|
+
# invoking `db.touch key, optimization: :small`.
|
236
|
+
#
|
237
|
+
# When appending a non-empty collection, this is a fast
|
238
|
+
# operation since it only appends text to an existing file.
|
239
|
+
#
|
240
|
+
# @param key [String] The key identifying the entry.
|
241
|
+
# @param elements [Array<String>] An array or other
|
242
|
+
# enumerable collection of elements to append.
|
243
|
+
def concat_elements(key, elements)
|
244
|
+
if empty_collection?(elements)
|
245
|
+
touch key, optimization: :small
|
246
|
+
else
|
247
|
+
concat_any_elements key, elements
|
248
|
+
end
|
249
|
+
end
|
250
|
+
|
251
|
+
# Appends any number of element strings to the content of an
|
252
|
+
# entry. If the given elements collection is empty or nil,
|
253
|
+
# then the database is unchanged, and a new entry is not
|
254
|
+
# created if one did not exist previously.
|
255
|
+
#
|
256
|
+
# This is a fast operation since it only appends text to an
|
257
|
+
# existing file.
|
258
|
+
#
|
259
|
+
# @param key [String] The key identifying the entry.
|
260
|
+
# @param elements [Array<String>] An array or other
|
261
|
+
# enumerable collection of elements to append.
|
262
|
+
def concat_any_elements(key, elements)
|
263
|
+
key_d = CartonDb::Datum.for_plain(key)
|
264
|
+
segment = segment_containing(key_d)
|
265
|
+
segment.open_append do |io|
|
266
|
+
elements.each do |element|
|
267
|
+
element_d = CartonDb::Datum.for_plain(element)
|
268
|
+
io<< "#{key_d.escaped}\t#{element_d.escaped}\n"
|
269
|
+
end
|
270
|
+
end
|
271
|
+
end
|
272
|
+
|
273
|
+
private
|
274
|
+
|
275
|
+
attr_accessor :name
|
276
|
+
|
277
|
+
def replace_entry_in_file(segment, key_d, content)
|
278
|
+
new_segment = ListMapDb::Segment.new(
|
279
|
+
segment.segment_group, "#{segment.segment_filename}.new")
|
280
|
+
new_segment.open_overwrite do |nf_io|
|
281
|
+
segment.each_entry_element_line do |kd, _ed, line|
|
282
|
+
nf_io.print line unless kd == key_d
|
283
|
+
end
|
284
|
+
element_count = 0
|
285
|
+
content.each do |element|
|
286
|
+
element_d = CartonDb::Datum.for_plain(element)
|
287
|
+
element_count += 1
|
288
|
+
nf_io.puts "#{key_d.escaped}\t#{element_d.escaped}"
|
289
|
+
end
|
290
|
+
if element_count.zero?
|
291
|
+
nf_io.puts key_d.escaped
|
292
|
+
end
|
293
|
+
end
|
294
|
+
File.unlink segment.filename
|
295
|
+
File.rename new_segment.filename, segment.filename
|
296
|
+
end
|
297
|
+
|
298
|
+
def segment_containing(key)
|
299
|
+
ListMapDb::Segment.in_db_for_hashcode(
|
300
|
+
name, key.storage_hashcode
|
301
|
+
)
|
302
|
+
end
|
303
|
+
|
304
|
+
def empty_collection?(collection)
|
305
|
+
return true if collection.nil?
|
306
|
+
occupied = collection.any? { true }
|
307
|
+
! occupied
|
308
|
+
end
|
309
|
+
|
310
|
+
end
|
311
|
+
|
312
|
+
end
|
data/lib/carton_db.rb
ADDED
data/tmp/.gitkeep
ADDED
File without changes
|
metadata
ADDED
@@ -0,0 +1,106 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: carton_db
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Steve Jorgensen
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2017-05-06 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.14'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.14'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '10.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rspec
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '3.0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '3.0'
|
55
|
+
description:
|
56
|
+
email:
|
57
|
+
- stevej@stevej.name
|
58
|
+
executables: []
|
59
|
+
extensions: []
|
60
|
+
extra_rdoc_files: []
|
61
|
+
files:
|
62
|
+
- ".gitignore"
|
63
|
+
- ".rspec"
|
64
|
+
- ".ruby-version"
|
65
|
+
- ".travis.yml"
|
66
|
+
- Gemfile
|
67
|
+
- LICENSE.txt
|
68
|
+
- README.md
|
69
|
+
- Rakefile
|
70
|
+
- bin/console
|
71
|
+
- bin/setup
|
72
|
+
- carton_db.gemspec
|
73
|
+
- lib/carton_db.rb
|
74
|
+
- lib/carton_db/datum.rb
|
75
|
+
- lib/carton_db/escaping.rb
|
76
|
+
- lib/carton_db/list_map_db.rb
|
77
|
+
- lib/carton_db/list_map_db/segment.rb
|
78
|
+
- lib/carton_db/list_map_db/segment_group.rb
|
79
|
+
- lib/carton_db/version.rb
|
80
|
+
- tmp/.gitkeep
|
81
|
+
homepage: https://github.com/stevecj/carton_db.js
|
82
|
+
licenses:
|
83
|
+
- MIT
|
84
|
+
metadata: {}
|
85
|
+
post_install_message:
|
86
|
+
rdoc_options: []
|
87
|
+
require_paths:
|
88
|
+
- lib
|
89
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
90
|
+
requirements:
|
91
|
+
- - ">="
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: '0'
|
94
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
95
|
+
requirements:
|
96
|
+
- - ">="
|
97
|
+
- !ruby/object:Gem::Version
|
98
|
+
version: '0'
|
99
|
+
requirements: []
|
100
|
+
rubyforge_project:
|
101
|
+
rubygems_version: 2.6.10
|
102
|
+
signing_key:
|
103
|
+
specification_version: 4
|
104
|
+
summary: A pure Ruby key/value data storage system where the values may consist of
|
105
|
+
simple data structures.
|
106
|
+
test_files: []
|