jor 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +3 -0
- data/Gemfile +2 -0
- data/LICENCE +22 -0
- data/README.md +319 -0
- data/Rakefile +10 -0
- data/config.ru +2 -0
- data/jor.gemspec +29 -0
- data/lib/jor.rb +14 -0
- data/lib/jor/collection.rb +449 -0
- data/lib/jor/doc.rb +62 -0
- data/lib/jor/errors.rb +96 -0
- data/lib/jor/server.rb +52 -0
- data/lib/jor/storage.rb +88 -0
- data/lib/jor/version.rb +3 -0
- data/test/test_helper.rb +11 -0
- data/test/test_helpers/fixtures.rb +71 -0
- data/test/unit/collection_test.rb +884 -0
- data/test/unit/doc_test.rb +139 -0
- data/test/unit/server_test.rb +113 -0
- data/test/unit/storage_test.rb +171 -0
- data/test/unit/test_case.rb +21 -0
- metadata +170 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENCE
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
The MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2013 3scale networks S.L.
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
22
|
+
|
data/README.md
ADDED
@@ -0,0 +1,319 @@
|
|
1
|
+
|
2
|
+
# JOR
|
3
|
+
|
4
|
+
## Description
|
5
|
+
|
6
|
+
**JOR** is the acronym for **J**SON **o**ver **R**edis.
|
7
|
+
|
8
|
+
This project provides persistent storage for objects that can be serialized in JSON.
|
9
|
+
|
10
|
+
JOR uses Redis as the backend data-store, furthermore, it provides a MongoDB-like query language for fast retrieval that is heavily inspired by the API of MongoDB.
|
11
|
+
|
12
|
+
Why not using MongoDB right away? There is no a good reason :) JOR is leaner but only offers a fraction of the functionality. I would not consider JOR for the backend
|
13
|
+
of any large system but go straight to MongoDB. However, if you project is small enough, or it has to be embedded, or if you are already using Redis for some other task you might
|
14
|
+
find it interesting.
|
15
|
+
|
16
|
+
JOR aims to only offer:
|
17
|
+
|
18
|
+
* CRUD for objects serialized as JSON documents, and
|
19
|
+
* a JSON-based query language to find the documents matching the constrains of the query document
|
20
|
+
|
21
|
+
For instance, the document (in this case a ruby Hash)
|
22
|
+
|
23
|
+
doc = {
|
24
|
+
"_id" => 42,
|
25
|
+
"name" => "bon menjar",
|
26
|
+
"stars" => 3,
|
27
|
+
"cuisine" => ["asian", "japanese"],
|
28
|
+
"address" => {
|
29
|
+
"address" => "Main St 100",
|
30
|
+
"city" => "Ann Arbor",
|
31
|
+
"zipcode" => "08104"
|
32
|
+
},
|
33
|
+
"description" => "very long description that we might not want to index",
|
34
|
+
"wines" => [
|
35
|
+
{
|
36
|
+
"name" => "wine1",
|
37
|
+
"year" => 1998,
|
38
|
+
"type" => ["garnatxa", "merlot"]
|
39
|
+
},
|
40
|
+
{
|
41
|
+
"name" => "wine2",
|
42
|
+
"year" => 2009,
|
43
|
+
"type" => ["syrah", "merlot"]
|
44
|
+
}
|
45
|
+
]
|
46
|
+
}
|
47
|
+
|
48
|
+
|
49
|
+
can be inserted like this:
|
50
|
+
|
51
|
+
jor.create_collection("restaurants")
|
52
|
+
jor.restaurants.insert(doc)
|
53
|
+
|
54
|
+
and retrieved by using a query (that is also a document). For instance:
|
55
|
+
|
56
|
+
jor.restaurants.find({})
|
57
|
+
|
58
|
+
will return all documents in the `restaurants` collection.
|
59
|
+
|
60
|
+
jor.restaurants.find({"_id" => 42})
|
61
|
+
|
62
|
+
will return all documents whose `_id` is 42. The query document can be arbitrarily complex:
|
63
|
+
|
64
|
+
jor.restaurants.find({
|
65
|
+
"stars" => {"$gte" => 3},
|
66
|
+
"wines" => {"type" =>
|
67
|
+
{"$in" => ["garnatxa", "syrah"]}}
|
68
|
+
})
|
69
|
+
|
70
|
+
the `find` will return restaurants with 3 or more stars that also have wines of type
|
71
|
+
garnatxa or syrah.
|
72
|
+
|
73
|
+
|
74
|
+
## Getting Started
|
75
|
+
|
76
|
+
### Installation
|
77
|
+
|
78
|
+
From rubygems
|
79
|
+
|
80
|
+
gem install jor
|
81
|
+
|
82
|
+
From source
|
83
|
+
|
84
|
+
git clone https://github.com/solso/jor.git
|
85
|
+
build jor.gemspec
|
86
|
+
gem install
|
87
|
+
|
88
|
+
|
89
|
+
### Initialize
|
90
|
+
|
91
|
+
You can pass the your own redis handler on instantiation…
|
92
|
+
|
93
|
+
require 'jor'
|
94
|
+
|
95
|
+
redis = Redis.new(:driver => :hiredis)
|
96
|
+
jor = JOR::Storage.new(redis)
|
97
|
+
|
98
|
+
if you don't, JOR will create a redis connection against redis' default (localhost, port 6379).
|
99
|
+
|
100
|
+
We advise using `hiredis` to improve performance.
|
101
|
+
|
102
|
+
JOR is not thread-safe at the current version.
|
103
|
+
|
104
|
+
## How to
|
105
|
+
|
106
|
+
### Collections
|
107
|
+
|
108
|
+
JOR allows to have multiple **collections**. A collections is a container of documents that are mean to be group together. The operations `insert`, `delete` and `find` are scoped by a collection.
|
109
|
+
|
110
|
+
To create a collection:
|
111
|
+
|
112
|
+
jor.create_collection("restaurants")
|
113
|
+
|
114
|
+
Number of documents in collection:
|
115
|
+
|
116
|
+
jor.restaurant.count()
|
117
|
+
|
118
|
+
Id of the last document inserted in the collection, zero if empty
|
119
|
+
|
120
|
+
jor.restaurant.last_id()
|
121
|
+
|
122
|
+
Delete a collection with all its documents:
|
123
|
+
|
124
|
+
jor.destroy_collection("restaurants")
|
125
|
+
|
126
|
+
Delete all collections:
|
127
|
+
|
128
|
+
jor.destroy_all()
|
129
|
+
|
130
|
+
Collections can be created to have **auto_incremental** ids
|
131
|
+
|
132
|
+
jor.create_collection("events", :auto_increment => true)
|
133
|
+
|
134
|
+
A collection is either auto_incremental or not, cannot be both types at the same time. The default type is not auto-incremental.
|
135
|
+
|
136
|
+
Auto-incremental collections expect documents without the field `_id`, which will be assigned automatically upon insertion.
|
137
|
+
|
138
|
+
### Insert
|
139
|
+
|
140
|
+
To insert documents to a collection just do `insert`. The parameters can be either a `Hash` (will be stored as JSON encoded string), or an `Array` of `Hash` objects
|
141
|
+
|
142
|
+
jor.restaurant.insert(doc)
|
143
|
+
jor.restaurant.insert([doc1, doc2, …, docN])
|
144
|
+
|
145
|
+
There is marginal benefits to use bulk insertion, it's mostly for convenience.
|
146
|
+
|
147
|
+
Every document stored in JOR has an field called **´_id´** that is unique within a collection. Trying to insert a document with an `_id` that already exists will raise an exception.
|
148
|
+
|
149
|
+
The `_id` must be a natural number (>=0), remember that you only need to define the field `_id` when dealing with collections that are not auto-incremental (the default case).
|
150
|
+
|
151
|
+
By the way, field names cannot start with `'` or with `$`. These characters are reserved.
|
152
|
+
|
153
|
+
There are two other special fields:
|
154
|
+
|
155
|
+
* `_created_at`: it is set when inserting a document, and should not changed ever again. The field is indexed by default.
|
156
|
+
* `_updated_at`: it is set every time the document is updated. Also indexed by default.
|
157
|
+
|
158
|
+
Both times are unix epoch time in milli-seconds (the decimal part).
|
159
|
+
|
160
|
+
#### Options:
|
161
|
+
|
162
|
+
`:exclude_fields_to_index`
|
163
|
+
|
164
|
+
If you know that you will never do a search for a field you might want to exclude it from the indexes. By default all fields are indexes. Adding fields to be excluded improves the efficiency of the insert.
|
165
|
+
|
166
|
+
For instance, if you want to exclude the field `description` from the index:
|
167
|
+
|
168
|
+
jor.restaurant.insert(doc,
|
169
|
+
{:excluded_fields_to_index => {"description" => true}})
|
170
|
+
|
171
|
+
Excluding fields is something to consider because the performance of the insert in linear with the number of fields of the document O(#fields). An excluded fields will not affect the content of the document, it will just make it not "findable".
|
172
|
+
|
173
|
+
We advise to exclude any fields that is a string that does not serve as a symbol or tag since strings fields can only be found by a exact match.
|
174
|
+
|
175
|
+
You can also exclude fields that are objects, for instance, if you do not want to index the types of wines:
|
176
|
+
|
177
|
+
jor.restaurant.insert(doc,
|
178
|
+
{:excluded_fields_to_index => {"wines" => {"type" => true}}})
|
179
|
+
|
180
|
+
Exclusion is per document based, it will only affect the document being inserted.
|
181
|
+
|
182
|
+
The field `_id` cannot be excluded.
|
183
|
+
|
184
|
+
Note that if you exclude a field from the index you will not be able to use that field on `find` operations. Search is only done over indexed fields. Unless explicitly stated all fields of the documents are indexed.
|
185
|
+
|
186
|
+
|
187
|
+
### Find
|
188
|
+
|
189
|
+
To retrieve the stored documents you only need to define a query document (also a Hash). The interface is inspired on the MongoDB query language, so if you are familiar with it will be straight forward.
|
190
|
+
|
191
|
+
jor.restaurant.find({})
|
192
|
+
|
193
|
+
will find all restaurants in the collection. The query document is all `{}`.
|
194
|
+
|
195
|
+
The query document `{"_id" => 42}` will only return one (or zero) restaurant documents. The one whose field `_id` has value 42.
|
196
|
+
|
197
|
+
The query document is a subset of the original stored document. For the fields defined, it will match the value. For those who are not defined, it will act as a wildcard.
|
198
|
+
|
199
|
+
Some `operators` are also available:
|
200
|
+
|
201
|
+
* For comparisons:
|
202
|
+
* **$gt**: greater than (>)
|
203
|
+
* **$gte**: greater than or equal (>=)
|
204
|
+
* **$lt**: lower than (<)
|
205
|
+
* **$lte**: lower than or equal (<=)
|
206
|
+
|
207
|
+
* For sets:
|
208
|
+
* **$in**: the value must be in the defined set
|
209
|
+
* **$all**: all values must be in the defined set
|
210
|
+
* **$not**: the value must not be in the defined set
|
211
|
+
|
212
|
+
The syntax to use the `operators` also follows a hash
|
213
|
+
|
214
|
+
jor.restaurants.find({
|
215
|
+
"stars" => {"$gte" => 3},
|
216
|
+
"wines" => {
|
217
|
+
"year" => 2008,
|
218
|
+
"type" => {
|
219
|
+
"$all" => ["garnatxa", "syrah"]
|
220
|
+
}
|
221
|
+
}
|
222
|
+
})
|
223
|
+
|
224
|
+
The query document will return all documents that match all 3 conditions:
|
225
|
+
|
226
|
+
The field `start` must be greater or equal than 3, they have at least one `wine` that is from `year` 2008 and the `type` of wine contains both garnatxa and syrah (turns out that wines can be mixed)
|
227
|
+
|
228
|
+
The following `find` returns all documents whose `_id` is on the list
|
229
|
+
|
230
|
+
jor.restaurants.find({"_id" => {"$in" => [1, 3, 5, 7, 11]}})
|
231
|
+
|
232
|
+
The result of the `find` in an Array of the documents (as Hash objects). The documents are returned by ascending `_id`.
|
233
|
+
|
234
|
+
The following `find` returns all documents except those whose `name` is "el bulli" or "can roca"
|
235
|
+
|
236
|
+
jor.restaurants.find({"name" => {"$not" => ["el bulli", "can roca"]}})
|
237
|
+
|
238
|
+
|
239
|
+
|
240
|
+
#### Options:
|
241
|
+
|
242
|
+
`find` accepts the following options that you can override:
|
243
|
+
|
244
|
+
* `:max_documents`, the maximum number of documents to be returned, by default 1000.
|
245
|
+
* `:only_ids`, return only the ids instead of the Hash, by default false. This is useful for joins.
|
246
|
+
* `:raw`, returns only the document as JSON encoded strings, you save JOR to do the final parsing of the JSON encoded string. By default false.
|
247
|
+
* `:reversed`, returns the documents sorted by descendant `_id`. Default if false.
|
248
|
+
|
249
|
+
|
250
|
+
### Delete
|
251
|
+
|
252
|
+
Deleting a document is basically like doing a `find` with the exception that all documents that meet the query document will be deleted.
|
253
|
+
|
254
|
+
jor.restaurants.find({"_id" => 42})
|
255
|
+
|
256
|
+
Deletes the document with `_id` 42 (only one document by definition).
|
257
|
+
|
258
|
+
jor.restaurants.delete({"address" => {"zipcode" => "08104"}})
|
259
|
+
|
260
|
+
Deletes any document that the `zipcode` on its `address` is "08104".
|
261
|
+
|
262
|
+
|
263
|
+
### Update
|
264
|
+
|
265
|
+
Updating a document is also doing a `find` and doing a deep merge of the documents found and the source doc. For instance,
|
266
|
+
|
267
|
+
jor.restaurants.update({"_id" => 42}, {"address" => {"zipcode" => "08105"}})
|
268
|
+
|
269
|
+
Updates (or add if did not exist) the `address` => `zipcode` of the document with `_id` 42.
|
270
|
+
|
271
|
+
jor.restaurants.update({"address" => {"zipcode" => "08105"}} , {"address" => {"zipcode" => "08106"}})
|
272
|
+
|
273
|
+
Updates all documents with `zipcode` "08105" to "08106". Updates are __not__ limited to a single document. The update applies
|
274
|
+
to all the documents that match the first argument of the `update` operation.
|
275
|
+
|
276
|
+
Indexes are managed in the same way than an `insert` operations, so that you can use `:exclude_fields_to_index` as options.
|
277
|
+
|
278
|
+
If the update is a removal of a field, you must do it like this:
|
279
|
+
|
280
|
+
jor.restaurants.update({"_id" => 42}, {"address" => nil)
|
281
|
+
|
282
|
+
Note that this will remove all the fields that hang from `address`, whether it is a value, array or a hash.
|
283
|
+
|
284
|
+
### Misc Operations
|
285
|
+
|
286
|
+
You can find which document fields are indexed by doing,
|
287
|
+
|
288
|
+
jor.restaurants.indexes(doc_id)
|
289
|
+
|
290
|
+
This operation will return all fields that are indexed, but not all the indexes there are. Numeric fields, for instance, have two indexes.
|
291
|
+
|
292
|
+
Also, you can get sysadmin related info,
|
293
|
+
|
294
|
+
jor.info()
|
295
|
+
|
296
|
+
|
297
|
+
## Benchmarks
|
298
|
+
|
299
|
+
The thing is quite fast (thanks to Redis).
|
300
|
+
|
301
|
+
With a commodity laptop (macbook air) we can get between 300~400 documents inserts per second for the restaurant example used in this README.
|
302
|
+
|
303
|
+
The complexity of an `insert` and `find` operations depend on the number of fields of the document and the query document respectively. For the case of the restaurant document there are 16 fields.
|
304
|
+
|
305
|
+
Real benchmarks are more than welcomed.
|
306
|
+
|
307
|
+
## To Do
|
308
|
+
|
309
|
+
* normalize indexed strings (downcase, trimmed, something else) so that at least the == on a string is case insensitive. Tokenizing a string is easy to do, but can affect performance O(#fields + #words_on_string_fields). Perhaps as an option.
|
310
|
+
|
311
|
+
## Contribute
|
312
|
+
|
313
|
+
Fork the [project](http://github.com/solso/jor/) and send pull requests.
|
314
|
+
|
315
|
+
|
316
|
+
|
317
|
+
|
318
|
+
|
319
|
+
|
data/Rakefile
ADDED
data/config.ru
ADDED
data/jor.gemspec
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
lib = File.expand_path('../lib/', __FILE__)
|
3
|
+
$:.unshift lib unless $:.include?(lib)
|
4
|
+
|
5
|
+
require 'jor/version'
|
6
|
+
|
7
|
+
Gem::Specification.new do |gem|
|
8
|
+
gem.name = "jor"
|
9
|
+
|
10
|
+
gem.authors = ["Josep M. Pujol"]
|
11
|
+
gem.email = 'josep@3scale.net'
|
12
|
+
|
13
|
+
gem.description = %q{JSON on top of Redis}
|
14
|
+
gem.summary = %q{Storage engine for JSON documents using Redis. It allows fast find operations (index) by any field of the JSON document (ala MongoDB)}
|
15
|
+
gem.homepage = ""
|
16
|
+
|
17
|
+
gem.files = `git ls-files`.split($\)
|
18
|
+
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
19
|
+
gem.require_paths = ["lib"]
|
20
|
+
gem.version = JOR::VERSION
|
21
|
+
|
22
|
+
gem.add_dependency 'hiredis', '0.4.5'
|
23
|
+
gem.add_dependency 'redis', '3.0.3'
|
24
|
+
gem.add_dependency 'rake'
|
25
|
+
gem.add_dependency 'rack', '1.5.2'
|
26
|
+
gem.add_dependency 'rack-test'
|
27
|
+
gem.add_dependency 'thin'
|
28
|
+
end
|
29
|
+
|
data/lib/jor.rb
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
require 'json'
|
2
|
+
require 'hiredis'
|
3
|
+
require 'redis'
|
4
|
+
require 'rack'
|
5
|
+
|
6
|
+
require File.dirname(__FILE__) + "/jor/errors"
|
7
|
+
require File.dirname(__FILE__) + "/jor/storage"
|
8
|
+
require File.dirname(__FILE__) + "/jor/collection"
|
9
|
+
require File.dirname(__FILE__) + "/jor/doc"
|
10
|
+
require File.dirname(__FILE__) + "/jor/server"
|
11
|
+
require File.dirname(__FILE__) + "/jor/version"
|
12
|
+
|
13
|
+
module JOR
|
14
|
+
end
|
@@ -0,0 +1,449 @@
|
|
1
|
+
module JOR
|
2
|
+
class Collection
|
3
|
+
|
4
|
+
DEFAULT_OPTIONS = {
|
5
|
+
:max_documents => 1000,
|
6
|
+
:raw => false,
|
7
|
+
:only_ids => false,
|
8
|
+
:reversed => false,
|
9
|
+
:excluded_fields_to_index => {}
|
10
|
+
}
|
11
|
+
|
12
|
+
def initialize(storage, name, auto_increment = false)
|
13
|
+
@storage = storage
|
14
|
+
@name = name
|
15
|
+
@auto_increment = auto_increment
|
16
|
+
end
|
17
|
+
|
18
|
+
def name
|
19
|
+
@name
|
20
|
+
end
|
21
|
+
|
22
|
+
def redis
|
23
|
+
@storage.redis
|
24
|
+
end
|
25
|
+
|
26
|
+
def storage
|
27
|
+
@storage
|
28
|
+
end
|
29
|
+
|
30
|
+
def auto_increment?
|
31
|
+
@auto_increment
|
32
|
+
end
|
33
|
+
|
34
|
+
def insert(docs, options = {})
|
35
|
+
raise NotInCollection.new unless name
|
36
|
+
opt = merge_and_symbolize_options(options)
|
37
|
+
|
38
|
+
docs.is_a?(Array) ? docs_list = docs : docs_list = [docs]
|
39
|
+
|
40
|
+
docs_list.each_with_index do |doc, i|
|
41
|
+
if auto_increment?
|
42
|
+
raise DocumentDoesNotNeedId.new(name) unless doc["_id"].nil?
|
43
|
+
doc["_id"] = next_id()
|
44
|
+
else
|
45
|
+
raise DocumentNeedsId.new(name) if doc["_id"].nil?
|
46
|
+
end
|
47
|
+
|
48
|
+
doc["_created_at"] ||= Time.now.to_f
|
49
|
+
|
50
|
+
encd = JSON::generate(doc)
|
51
|
+
paths = Doc.paths("!",doc)
|
52
|
+
id = doc["_id"]
|
53
|
+
|
54
|
+
raise InvalidDocumentId.new(id) if !id.is_a?(Numeric) || id < 0
|
55
|
+
|
56
|
+
if !opt[:excluded_fields_to_index].nil? && opt[:excluded_fields_to_index].size>0
|
57
|
+
excluded_paths = Doc.paths("!",opt[:excluded_fields_to_index])
|
58
|
+
paths = Doc.difference(paths, excluded_paths)
|
59
|
+
end
|
60
|
+
|
61
|
+
redis.watch(doc_key(id))
|
62
|
+
exists = redis.get(doc_key(id))
|
63
|
+
|
64
|
+
if !exists.nil?
|
65
|
+
redis.multi
|
66
|
+
redis.exec
|
67
|
+
raise DocumentIdAlreadyExists.new(id, name)
|
68
|
+
else
|
69
|
+
res = redis.multi do
|
70
|
+
redis.set(doc_key(id),encd)
|
71
|
+
redis.zadd(doc_sset_key(),id,id)
|
72
|
+
paths.each do |path|
|
73
|
+
add_index(path,id)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
raise DocumentIdAlreadyExists.new(id, name) unless exists.nil?
|
77
|
+
end
|
78
|
+
end
|
79
|
+
docs
|
80
|
+
end
|
81
|
+
|
82
|
+
def delete(doc)
|
83
|
+
raise NotInCollection.new unless name
|
84
|
+
ids = find(doc, {:only_ids => true, :max_documents => -1})
|
85
|
+
ids.each do |id|
|
86
|
+
delete_by_id(id)
|
87
|
+
end
|
88
|
+
ids.size
|
89
|
+
end
|
90
|
+
|
91
|
+
def update(doc_dest, doc_source, options = {})
|
92
|
+
raise NotInCollection.new unless name
|
93
|
+
doc_source.delete("_id") unless doc_source["_id"].nil?
|
94
|
+
opt = merge_and_symbolize_options(options)
|
95
|
+
|
96
|
+
doc_source["_updated_at"] = Time.now.to_f
|
97
|
+
paths_all = Doc.paths("!",doc_source)
|
98
|
+
excluded_paths = []
|
99
|
+
|
100
|
+
if !opt[:excluded_fields_to_index].nil? && opt[:excluded_fields_to_index].size>0
|
101
|
+
excluded_paths = Doc.paths("!",opt[:excluded_fields_to_index])
|
102
|
+
end
|
103
|
+
|
104
|
+
paths_to_index = Doc.difference(paths_all, excluded_paths)
|
105
|
+
|
106
|
+
docs = find(doc_dest, {:max_documents => -1})
|
107
|
+
|
108
|
+
results = []
|
109
|
+
|
110
|
+
docs.each do |doc|
|
111
|
+
|
112
|
+
indexes_doc = redis.smembers(idx_set_key(doc["_id"]))
|
113
|
+
|
114
|
+
to_remove = []
|
115
|
+
indexes_doc.each do |index_doc|
|
116
|
+
## for each index there is, check if it's affected by the new paths
|
117
|
+
path_to_from_index = get_path_to_from_index(index_doc)
|
118
|
+
|
119
|
+
paths_all.each do |path|
|
120
|
+
if path["obj"].kind_of?(NilClass)
|
121
|
+
if path["path_to"]==path_to_from_index
|
122
|
+
to_remove << index_doc
|
123
|
+
elsif path_to_from_index.index("#{path["path_to"]}/")!=nil
|
124
|
+
to_remove << index_doc
|
125
|
+
end
|
126
|
+
else
|
127
|
+
if path["path_to"]==path_to_from_index
|
128
|
+
to_remove << index_doc
|
129
|
+
end
|
130
|
+
end
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
redis.pipelined do
|
135
|
+
to_remove.each do |index|
|
136
|
+
remove_index(index, doc["_id"])
|
137
|
+
redis.srem(idx_set_key(doc["_id"]),index)
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
## now, the indexes that refer to the changed fields are gone
|
142
|
+
new_doc = Doc.deep_merge(doc, doc_source)
|
143
|
+
encd = JSON::generate(new_doc)
|
144
|
+
|
145
|
+
res = redis.multi do
|
146
|
+
redis.set(doc_key(new_doc["_id"]),encd)
|
147
|
+
## note that it's not paths_all but only the ones who are not excluded
|
148
|
+
paths_to_index.each do |path|
|
149
|
+
add_index(path,new_doc["_id"])
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
results << new_doc
|
154
|
+
|
155
|
+
end
|
156
|
+
|
157
|
+
return results
|
158
|
+
end
|
159
|
+
|
160
|
+
def count
|
161
|
+
raise NotInCollection.new unless name
|
162
|
+
redis.zcard(doc_sset_key())
|
163
|
+
end
|
164
|
+
|
165
|
+
def find(doc, options = {})
|
166
|
+
raise NotInCollection.new unless name
|
167
|
+
# list of ids of the documents
|
168
|
+
ids = []
|
169
|
+
opt = merge_and_symbolize_options(options)
|
170
|
+
|
171
|
+
if opt[:max_documents] >= 0
|
172
|
+
num_docs = opt[:max_documents]-1
|
173
|
+
else
|
174
|
+
num_docs = -1
|
175
|
+
end
|
176
|
+
|
177
|
+
## if doc contains _id it ignores the rest of the doc's fields
|
178
|
+
if !doc["_id"].nil? && !doc["_id"].kind_of?(Hash) && doc.size==1
|
179
|
+
ids << doc["_id"]
|
180
|
+
return [] if opt[:only_ids]==true && redis.get(doc_key(ids.first)).nil?
|
181
|
+
elsif (doc == {})
|
182
|
+
if (opt[:reversed]==true)
|
183
|
+
ids = redis.zrevrange(doc_sset_key(),0,num_docs)
|
184
|
+
else
|
185
|
+
ids = redis.zrange(doc_sset_key(),0,num_docs)
|
186
|
+
end
|
187
|
+
ids.map!(&:to_i)
|
188
|
+
##ids = redis.smembers(doc_sset_key())
|
189
|
+
else
|
190
|
+
paths = Doc.paths("!",doc)
|
191
|
+
## for now, consider all logical and
|
192
|
+
paths.each_with_index do |path, i|
|
193
|
+
tmp_res = fetch_ids_by_index(path)
|
194
|
+
if i==0
|
195
|
+
ids = tmp_res
|
196
|
+
else
|
197
|
+
ids = ids & tmp_res
|
198
|
+
end
|
199
|
+
end
|
200
|
+
|
201
|
+
ids.map!(&:to_i)
|
202
|
+
ids.reverse! if opt[:reversed]
|
203
|
+
end
|
204
|
+
|
205
|
+
return [] if ids.nil? || ids.size==0
|
206
|
+
|
207
|
+
## return only up to max_documents, if max_documents is negative
|
208
|
+
## return them all (they have already been reversed)
|
209
|
+
if opt[:max_documents] >= 0 && opt[:max_documents] < ids.size
|
210
|
+
ids = ids[0..opt[:max_documents]-1]
|
211
|
+
end
|
212
|
+
|
213
|
+
## return only the ids, it saves fetching the JSON string from
|
214
|
+
## redis and decoding it
|
215
|
+
return ids if (opt[:only_ids]==true)
|
216
|
+
|
217
|
+
results = redis.pipelined do
|
218
|
+
ids.each do |id|
|
219
|
+
redis.get(doc_key(id))
|
220
|
+
end
|
221
|
+
end
|
222
|
+
|
223
|
+
## remove nils
|
224
|
+
results.delete_if {|i| i == nil}
|
225
|
+
|
226
|
+
## return the results JSON encoded (raw), many times you do not need the
|
227
|
+
## object but only the JSON string
|
228
|
+
return results if (opt[:raw]==true)
|
229
|
+
|
230
|
+
results.map! { |item| JSON::parse(item) }
|
231
|
+
return results
|
232
|
+
end
|
233
|
+
|
234
|
+
def last_id
|
235
|
+
if auto_increment?
|
236
|
+
val = redis.get("#{Storage::NAMESPACE}/#{name}/next_id")
|
237
|
+
return val.to_i unless val.nil?
|
238
|
+
return 0
|
239
|
+
else
|
240
|
+
val = redis.zrevrange(doc_sset_key(),0,0)
|
241
|
+
return 0 if val.nil? || val.size==0
|
242
|
+
return val.first.to_i
|
243
|
+
end
|
244
|
+
end
|
245
|
+
|
246
|
+
def next_id
|
247
|
+
redis.incrby("#{Storage::NAMESPACE}/#{name}/next_id",1)
|
248
|
+
end
|
249
|
+
|
250
|
+
def indexes(id)
|
251
|
+
indexes = redis.smembers(idx_set_key(id))
|
252
|
+
res = []
|
253
|
+
indexes.each do |ind|
|
254
|
+
v = ind.split("!")
|
255
|
+
str = v[1..v.size].join("!")
|
256
|
+
|
257
|
+
v = str.split("/")
|
258
|
+
type = v[v.size-2]
|
259
|
+
path = v[0..v.size-3].join("/")
|
260
|
+
|
261
|
+
if path!=""
|
262
|
+
value_tmp = v[v.size-1]
|
263
|
+
w = value_tmp.split("_")
|
264
|
+
value = w[0..w.size-2].join("_")
|
265
|
+
## returns the value of a Numeric as string, to cast to float or int, cannot
|
266
|
+
## be derived from type because it Numeric
|
267
|
+
res << {"path" => path, "obj" => type, "value" => value}
|
268
|
+
end
|
269
|
+
end
|
270
|
+
return res
|
271
|
+
end
|
272
|
+
|
273
|
+
protected
|
274
|
+
|
275
|
+
def merge_and_symbolize_options(options = {})
|
276
|
+
DEFAULT_OPTIONS.merge(options.inject({}){|memo,(k,v)| memo[k.to_sym] = v; memo})
|
277
|
+
end
|
278
|
+
|
279
|
+
def find_docs(doc)
|
280
|
+
return [doc["_id"]] if !doc["_id"].nil?
|
281
|
+
|
282
|
+
ids = []
|
283
|
+
paths = Doc.paths("!",doc)
|
284
|
+
|
285
|
+
## for now, consider all logical and
|
286
|
+
paths.each_with_index do |path, i|
|
287
|
+
tmp_res = fetch_ids_by_index(path)
|
288
|
+
if i==0
|
289
|
+
ids = tmp_res
|
290
|
+
else
|
291
|
+
ids = ids & tmp_res
|
292
|
+
end
|
293
|
+
end
|
294
|
+
ids
|
295
|
+
end
|
296
|
+
|
297
|
+
def check_selectors(sel)
|
298
|
+
Storage::SELECTORS.each do |type, set|
|
299
|
+
istrue = true
|
300
|
+
sel.each do |s|
|
301
|
+
istrue = istrue && set.member?(s)
|
302
|
+
end
|
303
|
+
return type if istrue
|
304
|
+
end
|
305
|
+
raise IncompatibleSelectors.new(selectors)
|
306
|
+
end
|
307
|
+
|
308
|
+
def find_type(obj)
|
309
|
+
[String, Numeric, TrueClass, FalseClass].each do |type|
|
310
|
+
return type if obj.kind_of? type
|
311
|
+
end
|
312
|
+
raise TypeNotSupported.new(obj.class)
|
313
|
+
end
|
314
|
+
|
315
|
+
def fetch_ids_by_index(path)
|
316
|
+
|
317
|
+
if path["selector"]==true
|
318
|
+
type = check_selectors(path["obj"].keys)
|
319
|
+
if type == :compare
|
320
|
+
key = idx_key(path["path_to"], find_type(path["obj"].values.first))
|
321
|
+
|
322
|
+
rmin = "-inf"
|
323
|
+
rmin = path["obj"]["$gte"] unless path["obj"]["$gte"].nil?
|
324
|
+
rmin = "(#{path["obj"]["$gt"]}" unless path["obj"]["$gt"].nil?
|
325
|
+
|
326
|
+
rmax = "+inf"
|
327
|
+
rmax = path["obj"]["$lte"] unless path["obj"]["$lte"].nil?
|
328
|
+
rmax = "(#{path["obj"]["$lt"]}" unless path["obj"]["$lt"].nil?
|
329
|
+
|
330
|
+
##ZRANGEBYSCORE zset (5 (10 : 5 < x < 10
|
331
|
+
return redis.zrangebyscore(key,rmin,rmax)
|
332
|
+
elsif type == :sets
|
333
|
+
if path["obj"]["$in"]
|
334
|
+
target = path["obj"]["$in"]
|
335
|
+
join_set = []
|
336
|
+
target.each do |item|
|
337
|
+
join_set = join_set | redis.smembers(idx_key(path["path_to"], find_type(item), item))
|
338
|
+
end
|
339
|
+
return join_set.sort
|
340
|
+
elsif path["obj"]["$all"]
|
341
|
+
join_set = []
|
342
|
+
target = path["obj"]["$all"]
|
343
|
+
target.each do |item|
|
344
|
+
if join_set.size==0
|
345
|
+
join_set = redis.smembers(idx_key(path["path_to"], find_type(item), item))
|
346
|
+
else
|
347
|
+
join_set = join_set & redis.smembers(idx_key(path["path_to"], find_type(item), item))
|
348
|
+
end
|
349
|
+
return [] if (join_set.nil? || join_set.size==0)
|
350
|
+
end
|
351
|
+
return join_set.sort
|
352
|
+
elsif !path["obj"]["$not"].nil?
|
353
|
+
target = path["obj"]["$not"]
|
354
|
+
target = [target] unless target.kind_of?(Array)
|
355
|
+
join_set = []
|
356
|
+
target.each do |item|
|
357
|
+
join_set = join_set | redis.smembers(idx_key(path["path_to"], find_type(item), item))
|
358
|
+
end
|
359
|
+
return (redis.zrange(doc_sset_key(),0,-1) - join_set).sort
|
360
|
+
end
|
361
|
+
end
|
362
|
+
end
|
363
|
+
|
364
|
+
[String, Numeric, TrueClass, FalseClass].each do |type|
|
365
|
+
if path["obj"].kind_of?(type)
|
366
|
+
return redis.smembers(idx_key(path["path_to"], type, path["obj"])).sort
|
367
|
+
end
|
368
|
+
end
|
369
|
+
path["obj"].kind_of?(NilClass) ? [] : (raise TypeNotSupported.new(path["obj"].class))
|
370
|
+
end
|
371
|
+
|
372
|
+
def delete_by_id(id)
|
373
|
+
indexes = redis.smembers(idx_set_key(id))
|
374
|
+
v = redis.pipelined do
|
375
|
+
indexes.each do |index|
|
376
|
+
remove_index(index, id)
|
377
|
+
end
|
378
|
+
redis.del(idx_set_key(id))
|
379
|
+
redis.zrem(doc_sset_key(),id)
|
380
|
+
redis.del(doc_key(id))
|
381
|
+
end
|
382
|
+
end
|
383
|
+
|
384
|
+
def remove_index(index, id)
|
385
|
+
v = index.split("_")
|
386
|
+
key = v[0..v.size-2].join("_")
|
387
|
+
if v.last=="srem"
|
388
|
+
redis.srem(key, id)
|
389
|
+
elsif v.last=="zrem"
|
390
|
+
redis.zrem(key, id)
|
391
|
+
else
|
392
|
+
raise UnknownIndex.new(index,id)
|
393
|
+
end
|
394
|
+
end
|
395
|
+
|
396
|
+
def add_index(path, id)
|
397
|
+
[String, Numeric, TrueClass, FalseClass].each do |type|
|
398
|
+
if path["obj"].kind_of?(type)
|
399
|
+
key = idx_key(path["path_to"], type, path["obj"])
|
400
|
+
redis.sadd(key, id)
|
401
|
+
redis.sadd(idx_set_key(id), "#{key}_srem")
|
402
|
+
if type==Numeric
|
403
|
+
key = idx_key(path["path_to"], type)
|
404
|
+
redis.zadd(key, path["obj"], id)
|
405
|
+
redis.sadd(idx_set_key(id), "#{key}_zrem")
|
406
|
+
end
|
407
|
+
return true
|
408
|
+
end
|
409
|
+
end
|
410
|
+
path["obj"].kind_of?(NilClass) ? true : (raise TypeNotSupported.new(path["obj"].class))
|
411
|
+
end
|
412
|
+
|
413
|
+
def get_path_to_from_index(index)
|
414
|
+
## this traverses all indexes, Numeric fields have two actual indexes that do not
|
415
|
+
## appear on the indexes method
|
416
|
+
ini_pos = index.index("/!")
|
417
|
+
ini_pos += 1
|
418
|
+
|
419
|
+
end_pos = index.index("/String")
|
420
|
+
end_pos = index.index("/Numeric") if end_pos.nil?
|
421
|
+
end_pos = index.index("/TrueClass") if end_pos.nil?
|
422
|
+
end_pos = index.index("/FalseClass") if end_pos.nil?
|
423
|
+
|
424
|
+
raise CouldNotFindPathToFromIndex.new(index) if ini_pos.nil? || end_pos.nil?
|
425
|
+
end_pos -= 1
|
426
|
+
|
427
|
+
return index[ini_pos..end_pos]
|
428
|
+
end
|
429
|
+
|
430
|
+
def idx_key(path_to, type, obj = nil)
|
431
|
+
tmp = ""
|
432
|
+
tmp = "/#{obj}" unless obj.nil?
|
433
|
+
"#{Storage::NAMESPACE}/#{name}/idx/#{path_to}/#{type}#{tmp}"
|
434
|
+
end
|
435
|
+
|
436
|
+
def idx_set_key(id)
|
437
|
+
"#{Storage::NAMESPACE}/#{name}/sidx/#{id}"
|
438
|
+
end
|
439
|
+
|
440
|
+
def doc_key(id)
|
441
|
+
"#{Storage::NAMESPACE}/#{name}/docs/#{id}"
|
442
|
+
end
|
443
|
+
|
444
|
+
def doc_sset_key()
|
445
|
+
"#{Storage::NAMESPACE}/#{name}/ssdocs"
|
446
|
+
end
|
447
|
+
|
448
|
+
end
|
449
|
+
end
|