anansi 0.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +5 -0
- data/Gemfile +11 -0
- data/Gemfile.lock +63 -0
- data/LICENSE +21 -0
- data/README.md +56 -0
- data/lib/anansi/version.rb +5 -0
- data/lib/anansi.rb +210 -0
- data/sorbet/config +3 -0
- data/sorbet/rbi/gems/ast@2.4.2.rbi +584 -0
- data/sorbet/rbi/gems/diff-lcs@1.5.0.rbi +8 -0
- data/sorbet/rbi/gems/netrc@0.11.0.rbi +161 -0
- data/sorbet/rbi/gems/parallel@1.22.1.rbi +277 -0
- data/sorbet/rbi/gems/parser@3.1.3.0.rbi +6208 -0
- data/sorbet/rbi/gems/rbi@0.0.16.rbi +3008 -0
- data/sorbet/rbi/gems/spoom@1.1.13.rbi +2371 -0
- data/sorbet/rbi/gems/sqlite3@1.5.3.rbi +1600 -0
- data/sorbet/rbi/gems/tapioca@0.10.3.rbi +2979 -0
- data/sorbet/rbi/gems/thor@1.2.1.rbi +3919 -0
- data/sorbet/rbi/gems/unparser@0.6.5.rbi +8 -0
- data/sorbet/rbi/gems/webrick@1.7.0.rbi +2527 -0
- data/sorbet/rbi/gems/yard-sorbet@0.7.0.rbi +389 -0
- data/sorbet/rbi/gems/yard@0.9.28.rbi +16666 -0
- data/sorbet/tapioca/config.yml +13 -0
- data/sorbet/tapioca/require.rb +4 -0
- metadata +72 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 5485f4a8cdebb188f7216267be755f55ab898d5259b8e0cc0d14926779a9c4e0
|
4
|
+
data.tar.gz: 8c63b03367addfdef4d41bf62e835b358384612b7c63e2413fa03b4b943740f2
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: bfe9cb9fb95cbf7dc6a9edbfdf29542bcd2604c6788071f1c229435daf58620341cc1393017238c03280f8d17d92e0e64fe589624ebca7d50887d81796dad250
|
7
|
+
data.tar.gz: 70a253f8c9077f3603522b503ab3171132d8989a127b1641d629cf7003e8fa45131e86d2ff1d0520d6638e1bcc92fd277dc753b5134638ebf5a47ce1a95cdcce
|
data/CHANGELOG.md
ADDED
data/Gemfile
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
source "https://rubygems.org"
|
4
|
+
|
5
|
+
# Specify your gem's dependencies in anansi.gemspec
|
6
|
+
gemspec
|
7
|
+
|
8
|
+
gem 'sorbet', group: :development
|
9
|
+
gem 'sorbet-runtime'
|
10
|
+
gem 'sqlite3'
|
11
|
+
gem 'tapioca', require: false, group: :development
|
data/Gemfile.lock
ADDED
@@ -0,0 +1,63 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
anansi (0.0.0)
|
5
|
+
|
6
|
+
GEM
|
7
|
+
remote: https://rubygems.org/
|
8
|
+
specs:
|
9
|
+
ast (2.4.2)
|
10
|
+
diff-lcs (1.5.0)
|
11
|
+
netrc (0.11.0)
|
12
|
+
parallel (1.22.1)
|
13
|
+
parser (3.1.3.0)
|
14
|
+
ast (~> 2.4.1)
|
15
|
+
rbi (0.0.16)
|
16
|
+
ast
|
17
|
+
parser (>= 2.6.4.0)
|
18
|
+
sorbet-runtime (>= 0.5.9204)
|
19
|
+
unparser
|
20
|
+
sorbet (0.5.10568)
|
21
|
+
sorbet-static (= 0.5.10568)
|
22
|
+
sorbet-runtime (0.5.10568)
|
23
|
+
sorbet-static (0.5.10568-universal-darwin-21)
|
24
|
+
sorbet-static-and-runtime (0.5.10568)
|
25
|
+
sorbet (= 0.5.10568)
|
26
|
+
sorbet-runtime (= 0.5.10568)
|
27
|
+
spoom (1.1.13)
|
28
|
+
sorbet (>= 0.5.9204)
|
29
|
+
sorbet-runtime (>= 0.5.9204)
|
30
|
+
thor (>= 0.19.2)
|
31
|
+
sqlite3 (1.5.3-arm64-darwin)
|
32
|
+
tapioca (0.10.3)
|
33
|
+
bundler (>= 1.17.3)
|
34
|
+
netrc (>= 0.11.0)
|
35
|
+
parallel (>= 1.21.0)
|
36
|
+
rbi (~> 0.0.0, >= 0.0.16)
|
37
|
+
sorbet-static-and-runtime (>= 0.5.9892)
|
38
|
+
spoom (~> 1.1.0, >= 1.1.11)
|
39
|
+
thor (>= 1.2.0)
|
40
|
+
yard-sorbet
|
41
|
+
thor (1.2.1)
|
42
|
+
unparser (0.6.5)
|
43
|
+
diff-lcs (~> 1.3)
|
44
|
+
parser (>= 3.1.0)
|
45
|
+
webrick (1.7.0)
|
46
|
+
yard (0.9.28)
|
47
|
+
webrick (~> 1.7.0)
|
48
|
+
yard-sorbet (0.7.0)
|
49
|
+
sorbet-runtime (>= 0.5)
|
50
|
+
yard (>= 0.9)
|
51
|
+
|
52
|
+
PLATFORMS
|
53
|
+
arm64-darwin-21
|
54
|
+
|
55
|
+
DEPENDENCIES
|
56
|
+
anansi!
|
57
|
+
sorbet
|
58
|
+
sorbet-runtime
|
59
|
+
sqlite3
|
60
|
+
tapioca
|
61
|
+
|
62
|
+
BUNDLED WITH
|
63
|
+
2.3.22
|
data/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2023 Sutro Labs
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
# Anansi - A hybrid Ruby Set using memory and Disk (using sqlite3) for large sized tasks
|
2
|
+
|
3
|
+
> He lifted the pot over his head and threw it on the ground. The pot crashed on the ground and the wisdom blew far and wide all over the earth. And this is how wisdom came to the world (or your disk).
|
4
|
+
|
5
|
+
**- Kiren Babal** ([Anansi and the Wisdom Pot](https://www.differenttruths.com/literature/short-story/anansi-and-the-wisdom-pot/))
|
6
|
+
|
7
|
+
## Why?
|
8
|
+
|
9
|
+
A Ruby Set in memory isn't great for huge tasks. But a disk based Set is too slow for everything. We need the best of both worlds.
|
10
|
+
|
11
|
+
## What?
|
12
|
+
|
13
|
+
Data structures that use constant memory by spilling to disk after crossing a size threshold.
|
14
|
+
|
15
|
+
Currently the only supported data structure is `AppendSet`.
|
16
|
+
|
17
|
+
## Installation
|
18
|
+
|
19
|
+
Install the gem and add to the application's Gemfile by executing:
|
20
|
+
|
21
|
+
$ bundle add anansi
|
22
|
+
|
23
|
+
If bundler is not being used to manage dependencies, install the gem by executing:
|
24
|
+
|
25
|
+
$ gem install anansi
|
26
|
+
|
27
|
+
## Usage
|
28
|
+
Add items to an `AppendSet`:
|
29
|
+
```ruby
|
30
|
+
append_set = Anansi::AppendSet.new
|
31
|
+
append_set.add(['foo', 'bar', 'buzz'])
|
32
|
+
```
|
33
|
+
|
34
|
+
Check if an item exists in an `AppendSet`:
|
35
|
+
```ruby
|
36
|
+
append_set.include? 'foo'
|
37
|
+
```
|
38
|
+
|
39
|
+
Get the size of an `AppendSet`:
|
40
|
+
```ruby
|
41
|
+
append_set.size
|
42
|
+
```
|
43
|
+
|
44
|
+
(If you need other data structures, stay tuned and [watch our org](https://github.com/sutrolabs).)
|
45
|
+
|
46
|
+
## License
|
47
|
+
|
48
|
+
The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
|
49
|
+
|
50
|
+
Feedback
|
51
|
+
--------
|
52
|
+
[Source code available on Github](https://github.com/sutrolabs/anansi). Feedback and pull requests are greatly appreciated. Let us know if we can improve this.
|
53
|
+
|
54
|
+
From
|
55
|
+
-----------
|
56
|
+
:wave: The folks at [Census](http://getcensus.com) originally put this together. Have data? We'll sync your data warehouse with your CRM and the customer success apps critical to your team.
|
data/lib/anansi.rb
ADDED
@@ -0,0 +1,210 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# typed: strict
|
4
|
+
|
5
|
+
require "sqlite3"
|
6
|
+
require "sorbet-runtime"
|
7
|
+
|
8
|
+
module Anansi
|
9
|
+
# An append-only implementation of parts of Set from the stdlib that uses
|
10
|
+
# constant memory regardless of the set size by spilling data to disk (using
|
11
|
+
# SQLite) once the set size crosses some threshold.
|
12
|
+
class AppendSet
|
13
|
+
# Interface to abstract over Set / SQLite
|
14
|
+
module Store
|
15
|
+
extend T::Sig
|
16
|
+
extend T::Helpers
|
17
|
+
extend T::Generic
|
18
|
+
|
19
|
+
interface!
|
20
|
+
|
21
|
+
Item = type_member { { upper: Object } }
|
22
|
+
|
23
|
+
T::Sig::WithoutRuntime.sig { abstract.returns(Integer) }
|
24
|
+
def size; end
|
25
|
+
|
26
|
+
T::Sig::WithoutRuntime.sig { abstract.params(item: Item).returns(T::Boolean) }
|
27
|
+
def include?(item); end
|
28
|
+
|
29
|
+
T::Sig::WithoutRuntime.sig { abstract.params(items: T::Enumerable[Item]).void }
|
30
|
+
def add(items); end
|
31
|
+
end
|
32
|
+
|
33
|
+
# Implementation for Set
|
34
|
+
class SetStore
|
35
|
+
extend T::Sig
|
36
|
+
extend T::Generic
|
37
|
+
|
38
|
+
Item = type_member { { upper: Object } }
|
39
|
+
|
40
|
+
include Store
|
41
|
+
|
42
|
+
T::Sig::WithoutRuntime.sig { void }
|
43
|
+
def initialize
|
44
|
+
@set = T.let(Set.new, T::Set[Item])
|
45
|
+
end
|
46
|
+
|
47
|
+
T::Sig::WithoutRuntime.sig { returns(T::Enumerable[Item]) }
|
48
|
+
def all
|
49
|
+
@set
|
50
|
+
end
|
51
|
+
|
52
|
+
T::Sig::WithoutRuntime.sig { override.returns(Integer) }
|
53
|
+
def size
|
54
|
+
@set.size
|
55
|
+
end
|
56
|
+
|
57
|
+
T::Sig::WithoutRuntime.sig { override.params(item: Item).returns(T::Boolean) }
|
58
|
+
def include?(item)
|
59
|
+
@set.include?(item)
|
60
|
+
end
|
61
|
+
|
62
|
+
T::Sig::WithoutRuntime.sig { override.params(items: T::Enumerable[Item]).void }
|
63
|
+
def add(items)
|
64
|
+
items.each { |item| @set.add(item) }
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
# Implementation for SQLite
|
69
|
+
class SQLiteStore
|
70
|
+
extend T::Sig
|
71
|
+
extend T::Generic
|
72
|
+
|
73
|
+
Item = type_member { { upper: Object } }
|
74
|
+
|
75
|
+
include Store
|
76
|
+
|
77
|
+
T::Sig::WithoutRuntime.sig { void }
|
78
|
+
def initialize
|
79
|
+
# Both +@tempfile+ and +@sqlite+ get automatically cleaned up (closed,
|
80
|
+
# deleted from disk) when this object goes out of scope. Worst-case, files
|
81
|
+
# on disk should get reaped at process exit or dyno reboot.
|
82
|
+
@tempfile = T.let(Tempfile.new(%w[AppendSetSQLiteStore .sqlite3]), Tempfile)
|
83
|
+
|
84
|
+
# We immediately unlink the file so that it is guaranteed to be removed
|
85
|
+
# from disk on process exit (by the OS). In testing, I've found that Ruby
|
86
|
+
# guarantee of unlinking the file in the finalizer doesn't always seem to
|
87
|
+
# succeed; not sure why. +unlink+ is safe here because the @sqlite
|
88
|
+
# instance variable holds an open file handle - as long as a file has an
|
89
|
+
# open handle on a unix-like OS, it won't actually be deleted until that
|
90
|
+
# file handle is closed (though it does become invisible to commands like
|
91
|
+
# `ls`).
|
92
|
+
@sqlite = T.let(SQLite3::Database.new(@tempfile), SQLite3::Database)
|
93
|
+
@tempfile.unlink
|
94
|
+
|
95
|
+
# This is a non-persistent db so make it go fast
|
96
|
+
# Partially based on https://github.com/avinassh/fast-sqlite3-inserts
|
97
|
+
@sqlite.execute "pragma synchronous=off"
|
98
|
+
@sqlite.execute "pragma journal_mode=off"
|
99
|
+
@sqlite.execute "pragma locking_mode=exclusive"
|
100
|
+
@sqlite.execute "pragma temp_store=memory"
|
101
|
+
|
102
|
+
# The single table that will store data
|
103
|
+
@sqlite.execute "create table items (item text primary key)"
|
104
|
+
|
105
|
+
# Prepare our three statements
|
106
|
+
@count_statement = T.let(@sqlite.prepare("select count(item) from items"), SQLite3::Statement)
|
107
|
+
@add_statement =
|
108
|
+
T.let(@sqlite.prepare("insert into items values (?) on conflict (item) do nothing"), SQLite3::Statement)
|
109
|
+
@include_statement =
|
110
|
+
T.let(@sqlite.prepare("select exists(select 1 from items where item = ?)"), SQLite3::Statement)
|
111
|
+
end
|
112
|
+
|
113
|
+
T::Sig::WithoutRuntime.sig { override.returns(Integer) }
|
114
|
+
def size
|
115
|
+
result = @count_statement.execute
|
116
|
+
result.first.first
|
117
|
+
end
|
118
|
+
|
119
|
+
T::Sig::WithoutRuntime.sig { override.params(item: Item).returns(T::Boolean) }
|
120
|
+
def include?(item)
|
121
|
+
result = @include_statement.execute(marshal(item))
|
122
|
+
result.first.first == 1
|
123
|
+
end
|
124
|
+
|
125
|
+
T::Sig::WithoutRuntime.sig { override.params(items: T::Enumerable[Item]).void }
|
126
|
+
def add(items)
|
127
|
+
# transaction should improve bulk insert performance
|
128
|
+
@sqlite.execute "begin"
|
129
|
+
items.each { |item| @add_statement.execute(marshal(item)) }
|
130
|
+
@sqlite.execute "commit"
|
131
|
+
end
|
132
|
+
|
133
|
+
private
|
134
|
+
|
135
|
+
T::Sig::WithoutRuntime.sig { params(item: Item).returns(String) }
|
136
|
+
def marshal(item)
|
137
|
+
Base64.encode64(Marshal.dump(item))
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
# Facade that implements Store by proxying to SetStore or SQLiteStore. The
|
142
|
+
# spillover logic is also included here, as well as an optimization for +any?+
|
143
|
+
extend T::Sig
|
144
|
+
extend T::Generic
|
145
|
+
include Store
|
146
|
+
Item = type_member { { upper: Object } }
|
147
|
+
|
148
|
+
# This is pretty arbitrary, but it seems to correlate well with efficient
|
149
|
+
# sqlite transaction performance and it gives us enough granularity in
|
150
|
+
# checkpointing to see if we should spill
|
151
|
+
ADD_BATCH_SIZE = 50_000
|
152
|
+
|
153
|
+
# This is pretty arbitrary, but assuming that each key is ~200 bytes in memory
|
154
|
+
# (a conservative estimate), it means that we'll never burn more than 100MB of
|
155
|
+
# RAM on this set before we go to disk. Ideally we could express this as a byte
|
156
|
+
# threshold, not an item threshold - I couldn't find an easy / cheap way to do
|
157
|
+
# that in Ruby.
|
158
|
+
SPILL_THRESHOLD = 500_000
|
159
|
+
|
160
|
+
T::Sig::WithoutRuntime.sig { void }
|
161
|
+
def initialize
|
162
|
+
@store = T.let(SetStore[Item].new, Store[Item])
|
163
|
+
@spilled = T.let(false, T::Boolean)
|
164
|
+
end
|
165
|
+
|
166
|
+
T::Sig::WithoutRuntime.sig { returns(T::Boolean) }
|
167
|
+
def any?
|
168
|
+
if @spilled
|
169
|
+
# We must be non-empty if we've spilled to SQLite, so avoid an expensive
|
170
|
+
# call to the DB to check
|
171
|
+
true
|
172
|
+
else
|
173
|
+
set_store = T.cast(@store, SetStore[Item])
|
174
|
+
set_store.all.any?
|
175
|
+
end
|
176
|
+
end
|
177
|
+
|
178
|
+
T::Sig::WithoutRuntime.sig { override.returns(Integer) }
|
179
|
+
def size
|
180
|
+
@store.size
|
181
|
+
end
|
182
|
+
|
183
|
+
T::Sig::WithoutRuntime.sig { override.params(item: Item).returns(T::Boolean) }
|
184
|
+
def include?(item)
|
185
|
+
@store.include?(item)
|
186
|
+
end
|
187
|
+
|
188
|
+
T::Sig::WithoutRuntime.sig { override.params(items: T::Enumerable[Item]).void }
|
189
|
+
def add(items)
|
190
|
+
items.each_slice(ADD_BATCH_SIZE) do |slice|
|
191
|
+
@store.add(slice)
|
192
|
+
spill_if_needed!
|
193
|
+
end
|
194
|
+
end
|
195
|
+
|
196
|
+
private
|
197
|
+
|
198
|
+
T::Sig::WithoutRuntime.sig { void }
|
199
|
+
def spill_if_needed!
|
200
|
+
return if @spilled
|
201
|
+
return if @store.size < SPILL_THRESHOLD
|
202
|
+
|
203
|
+
set_store = T.cast(@store, SetStore[Item])
|
204
|
+
sqlite_store = SQLiteStore.new
|
205
|
+
sqlite_store.add(set_store.all)
|
206
|
+
@store = sqlite_store
|
207
|
+
@spilled = true
|
208
|
+
end
|
209
|
+
end
|
210
|
+
end
|
data/sorbet/config
ADDED