command_search 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/lib/command_search/aliaser.rb +46 -0
- data/lib/command_search/command_dealiaser.rb +44 -0
- data/lib/command_search/lexer.rb +104 -0
- data/lib/command_search/memory.rb +102 -0
- data/lib/command_search/mongoer.rb +264 -0
- data/lib/command_search/optimizer.rb +84 -0
- data/lib/command_search/parser.rb +85 -0
- data/lib/command_search.rb +35 -0
- metadata +51 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 217fdaec3f53844fbda942d9b8f9cb6422cfd422
|
4
|
+
data.tar.gz: ffbc227be18fa8fcd947dd42889838719921cbed
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: ec86ff8eccc714a7ac0aae2a36f78c6a36bad5fcfb87fd61dbfbb8544b27f008f787604a3e50241c309ce8335177097e78c311487873baee09043a5d3243fa4e
|
7
|
+
data.tar.gz: be520d824f7f9ef919089da78adfdde811a2e1e072b791cf5ec1fa4f462d09b538d7272e7417083992305ee4e4dbffa15a42efb3c11deadd5b7b4cfb1a796f93
|
@@ -0,0 +1,46 @@
|
|
1
|
+
module CommandSearch
|
2
|
+
module Aliaser
|
3
|
+
module_function
|
4
|
+
|
5
|
+
def build_regex(str)
|
6
|
+
head_border = '(?<=^|\s|[|(-])'
|
7
|
+
tail_border = '(?=$|\s|[|)])'
|
8
|
+
Regexp.new(head_border + Regexp.escape(str) + tail_border, 'i')
|
9
|
+
end
|
10
|
+
|
11
|
+
def opens_quote?(str)
|
12
|
+
while str[/".*"/] || str[/'.*'/]
|
13
|
+
mark = str[/["']/]
|
14
|
+
str.sub(/#{mark}.*#{mark}/, '')
|
15
|
+
end
|
16
|
+
str[/"/] || str[/\B'/]
|
17
|
+
end
|
18
|
+
|
19
|
+
def alias_item(query, alias_key, alias_value)
|
20
|
+
if alias_key.is_a?(Regexp)
|
21
|
+
pattern = alias_key
|
22
|
+
else
|
23
|
+
pattern = build_regex(alias_key.to_s)
|
24
|
+
end
|
25
|
+
current_match = query[pattern]
|
26
|
+
return query unless current_match
|
27
|
+
offset = Regexp.last_match.offset(0)
|
28
|
+
head = query[0...offset.first]
|
29
|
+
tail = alias_item(query[offset.last..-1], alias_key, alias_value)
|
30
|
+
if opens_quote?(head)
|
31
|
+
replacement = current_match
|
32
|
+
else
|
33
|
+
if alias_value.is_a?(String)
|
34
|
+
replacement = alias_value
|
35
|
+
elsif alias_value.is_a?(Proc)
|
36
|
+
replacement = alias_value.call(current_match).to_s
|
37
|
+
end
|
38
|
+
end
|
39
|
+
head + replacement + tail
|
40
|
+
end
|
41
|
+
|
42
|
+
def alias(query, aliases)
|
43
|
+
aliases.reduce(query) { |q, (k, v)| alias_item(q, k, v) }
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
module CommandSearch
|
2
|
+
module CommandDealiaser
|
3
|
+
module_function
|
4
|
+
|
5
|
+
def dealias_key(key, aliases)
|
6
|
+
key = aliases[key.to_sym] while aliases[key.to_sym].is_a?(Symbol)
|
7
|
+
key.to_s
|
8
|
+
end
|
9
|
+
|
10
|
+
def dealias_values((key_node, seach_node), aliases)
|
11
|
+
new_key = dealias_key(key_node[:value], aliases)
|
12
|
+
key_node[:value] = new_key
|
13
|
+
[key_node, seach_node]
|
14
|
+
end
|
15
|
+
|
16
|
+
def unnest_unaliased(node, aliases)
|
17
|
+
type = node[:nest_type]
|
18
|
+
values = node[:value].map { |x| x[:value].to_sym }
|
19
|
+
return node if type == :colon && aliases[values.first]
|
20
|
+
return node if type == :compare && (values & aliases.keys).any?
|
21
|
+
str_values = values.join(node[:nest_op])
|
22
|
+
{ type: :str, value: str_values }
|
23
|
+
end
|
24
|
+
|
25
|
+
def dealias(ast, aliases)
|
26
|
+
ast.flat_map do |x|
|
27
|
+
next x unless x[:nest_type]
|
28
|
+
x[:value] = dealias(x[:value], aliases)
|
29
|
+
next x unless [:colon, :compare].include?(x[:nest_type])
|
30
|
+
x[:value] = dealias_values(x[:value], aliases)
|
31
|
+
x
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def decompose_unaliasable(ast, aliases)
|
36
|
+
ast.flat_map do |x|
|
37
|
+
next x unless x[:nest_type]
|
38
|
+
x[:value] = decompose_unaliasable(x[:value], aliases)
|
39
|
+
next x unless [:colon, :compare].include?(x[:nest_type])
|
40
|
+
unnest_unaliased(x, aliases)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,104 @@
|
|
1
|
+
module CommandSearch
|
2
|
+
module Lexer
|
3
|
+
module_function
|
4
|
+
|
5
|
+
# This class takes a string and returns it tokenized into
|
6
|
+
# atoms/words, along with their type. It is coupled to the
|
7
|
+
# parser in names of char_types and output data structure.
|
8
|
+
|
9
|
+
# This currently does not support numbers with commas in them
|
10
|
+
|
11
|
+
def char_type(char)
|
12
|
+
case char
|
13
|
+
when /["']/
|
14
|
+
:quote
|
15
|
+
when /[()]/
|
16
|
+
:paren
|
17
|
+
when /[<>]/
|
18
|
+
:compare
|
19
|
+
when /\s/
|
20
|
+
:space
|
21
|
+
when /\d/
|
22
|
+
:number
|
23
|
+
when '.'
|
24
|
+
:period
|
25
|
+
when '-'
|
26
|
+
:minus
|
27
|
+
when ':'
|
28
|
+
:colon
|
29
|
+
when '='
|
30
|
+
:equal
|
31
|
+
when '|'
|
32
|
+
:pipe
|
33
|
+
else
|
34
|
+
:str
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def char_token(char)
|
39
|
+
{ type: char_type(char), value: char }
|
40
|
+
end
|
41
|
+
|
42
|
+
def value_indices(match, list)
|
43
|
+
list.each_index.select { |i| list[i][:value] == match }
|
44
|
+
end
|
45
|
+
|
46
|
+
def group_quoted_strings(input)
|
47
|
+
out = input
|
48
|
+
while value_indices("'", out).length >= 2 || value_indices('"', out).length >= 2
|
49
|
+
(a, b) = value_indices("'", out).first(2)
|
50
|
+
(c, d) = value_indices('"', out).first(2)
|
51
|
+
if a && b && (c.nil? || (a < c))
|
52
|
+
(x, y) = [a, b]
|
53
|
+
else
|
54
|
+
(x, y) = [c, d]
|
55
|
+
end
|
56
|
+
vals = out[x..y].map { |i| i[:value] }
|
57
|
+
trimmed_vals = vals.take(vals.length - 1).drop(1)
|
58
|
+
out[x..y] = { type: :quoted_str, value: trimmed_vals.join }
|
59
|
+
end
|
60
|
+
out
|
61
|
+
end
|
62
|
+
|
63
|
+
def group_pattern(input, group_type, pattern)
|
64
|
+
out = input
|
65
|
+
len = pattern.count
|
66
|
+
while (out.map { |x| x[:type] }).each_cons(len).find_index(pattern)
|
67
|
+
i = (out.map { |x| x[:type] }).each_cons(len).find_index(pattern)
|
68
|
+
span = i..(i + len - 1)
|
69
|
+
val = out[span].map { |x| x[:value] }.join()
|
70
|
+
out[span] = { type: group_type, value: val }
|
71
|
+
end
|
72
|
+
out
|
73
|
+
end
|
74
|
+
|
75
|
+
def full_tokens(char_token_list)
|
76
|
+
out = char_token_list.clone
|
77
|
+
|
78
|
+
out = group_quoted_strings(out)
|
79
|
+
|
80
|
+
out = group_pattern(out, :pipe, [:pipe, :pipe])
|
81
|
+
out = group_pattern(out, :compare, [:compare, :equal])
|
82
|
+
|
83
|
+
out = group_pattern(out, :number, [:number, :period, :number])
|
84
|
+
out = group_pattern(out, :number, [:number, :number])
|
85
|
+
out = group_pattern(out, :number, [:minus, :number])
|
86
|
+
|
87
|
+
out = group_pattern(out, :str, [:equal])
|
88
|
+
out = group_pattern(out, :str, [:period])
|
89
|
+
out = group_pattern(out, :str, [:number, :str])
|
90
|
+
out = group_pattern(out, :str, [:number, :minus])
|
91
|
+
out = group_pattern(out, :str, [:str, :number])
|
92
|
+
out = group_pattern(out, :str, [:str, :minus])
|
93
|
+
out = group_pattern(out, :str, [:str, :str])
|
94
|
+
|
95
|
+
out = out.reject { |x| x[:type] == :space }
|
96
|
+
out
|
97
|
+
end
|
98
|
+
|
99
|
+
def lex(input)
|
100
|
+
char_tokens = input.split('').map(&method(:char_token))
|
101
|
+
full_tokens(char_tokens)
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
@@ -0,0 +1,102 @@
|
|
1
|
+
require('chronic')
|
2
|
+
|
3
|
+
module CommandSearch
|
4
|
+
module Memory
|
5
|
+
module_function
|
6
|
+
|
7
|
+
def command_check(item, val, command_types)
|
8
|
+
cmd = val[0][:value].to_sym
|
9
|
+
cmd_search = val[1][:value]
|
10
|
+
raw_cmd_type = [command_types[cmd]].flatten
|
11
|
+
allow_existence_boolean = raw_cmd_type.include?(:allow_existence_boolean)
|
12
|
+
cmd_type = (raw_cmd_type - [:allow_existence_boolean]).first
|
13
|
+
return unless cmd_type
|
14
|
+
if cmd_type == Boolean
|
15
|
+
if cmd_search[/true/i]
|
16
|
+
item[cmd]
|
17
|
+
else
|
18
|
+
item[cmd] == false
|
19
|
+
end
|
20
|
+
elsif allow_existence_boolean && (cmd_search[/true/i] || cmd_search[/false/i])
|
21
|
+
if cmd_search[/true/i]
|
22
|
+
item[cmd]
|
23
|
+
else
|
24
|
+
item[cmd] == nil
|
25
|
+
end
|
26
|
+
elsif !item.key?(cmd)
|
27
|
+
return false
|
28
|
+
elsif val[1][:type] == :str
|
29
|
+
item[cmd][/#{Regexp.escape(cmd_search)}/mi]
|
30
|
+
elsif val[1][:type] == :quoted_str
|
31
|
+
item[cmd][/\b#{Regexp.escape(cmd_search)}\b/]
|
32
|
+
else
|
33
|
+
item[cmd].to_s[/#{Regexp.escape(cmd_search)}/mi]
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def compare_check(item, node, command_types)
|
38
|
+
children = node[:value]
|
39
|
+
cmd = children.find { |c| command_types[c[:value].to_sym] }
|
40
|
+
raw_cmd_type = [command_types[cmd[:value].to_sym]].flatten
|
41
|
+
cmd_type = (raw_cmd_type - [:allow_existence_boolean]).first
|
42
|
+
|
43
|
+
args = children.map do |child|
|
44
|
+
child_val = child[:value]
|
45
|
+
item_val = item[child_val.to_s] || item[child_val.to_sym]
|
46
|
+
item_val ||= child_val unless child == cmd
|
47
|
+
return unless item_val
|
48
|
+
if cmd_type == Time
|
49
|
+
date_start_map = {
|
50
|
+
'<' => :start,
|
51
|
+
'>' => :end,
|
52
|
+
'<=' => :end,
|
53
|
+
'>=' => :start
|
54
|
+
}
|
55
|
+
date_pick = date_start_map[node[:nest_op]]
|
56
|
+
time_str = item_val.gsub(/[\._-]/, ' ')
|
57
|
+
date = Chronic.parse(time_str, { guess: nil })
|
58
|
+
if date_pick == :start
|
59
|
+
date.first
|
60
|
+
else
|
61
|
+
date.last
|
62
|
+
end
|
63
|
+
else
|
64
|
+
item_val
|
65
|
+
end
|
66
|
+
end
|
67
|
+
return unless args.all?
|
68
|
+
fn = node[:nest_op].to_sym.to_proc
|
69
|
+
fn.call(*args.map(&:to_f))
|
70
|
+
end
|
71
|
+
|
72
|
+
def check(item, ast, fields, command_types)
|
73
|
+
field_vals = fields.map { |x| item[x] || item[x.to_s] || item[x.to_sym] }.compact
|
74
|
+
ast_array = ast.is_a?(Array) ? ast : [ast]
|
75
|
+
ast_array.all? do |node|
|
76
|
+
val = node[:value]
|
77
|
+
case node[:nest_type]
|
78
|
+
when nil
|
79
|
+
if node[:type] == :quoted_str
|
80
|
+
field_vals.any? { |x| x.to_s[/\b#{Regexp.escape(val)}\b/] }
|
81
|
+
else
|
82
|
+
field_vals.any? { |x| x.to_s[/#{Regexp.escape(val)}/mi] }
|
83
|
+
end
|
84
|
+
when :colon
|
85
|
+
command_check(item, val, command_types)
|
86
|
+
when :compare
|
87
|
+
compare_check(item, node, command_types)
|
88
|
+
when :pipe
|
89
|
+
val.any? { |v| check(item, v, fields, command_types) }
|
90
|
+
when :minus
|
91
|
+
val.none? { |v| check(item, v, fields, command_types) }
|
92
|
+
when :paren
|
93
|
+
val.all? { |v| check(item, v, fields, command_types) }
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
def build_query(ast, fields, command_types = {})
|
99
|
+
proc { |x| check(x, ast, fields, command_types) }
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
@@ -0,0 +1,264 @@
|
|
1
|
+
require('chronic')
|
2
|
+
|
3
|
+
module CommandSearch
|
4
|
+
module Mongoer
|
5
|
+
module_function
|
6
|
+
|
7
|
+
def build_search(ast_node, fields)
|
8
|
+
str = ast_node[:value]
|
9
|
+
fields = [fields] unless fields.is_a?(Array)
|
10
|
+
if ast_node[:type] == :quoted_str
|
11
|
+
regex = /\b#{Regexp.escape(str)}\b/
|
12
|
+
else
|
13
|
+
regex = /#{Regexp.escape(str)}/mi
|
14
|
+
end
|
15
|
+
if ast_node[:negate]
|
16
|
+
forms = fields.map { |f| { f => { '$not' => regex } } }
|
17
|
+
else
|
18
|
+
forms = fields.map { |f| { f => regex } }
|
19
|
+
end
|
20
|
+
return forms if forms.count < 2
|
21
|
+
if ast_node[:negate]
|
22
|
+
{ '$and' => forms }
|
23
|
+
else
|
24
|
+
{ '$or' => forms }
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def is_bool_str?(str)
|
29
|
+
return true if str[/^true$|^false$/i]
|
30
|
+
false
|
31
|
+
end
|
32
|
+
|
33
|
+
def make_boolean(str)
|
34
|
+
return true if str[/^true$/i]
|
35
|
+
false
|
36
|
+
end
|
37
|
+
|
38
|
+
def build_command(ast_node, command_types)
|
39
|
+
# aliasing will is done before ast gets to mongoer.rb
|
40
|
+
(field_node, search_node) = ast_node[:value]
|
41
|
+
key = field_node[:value]
|
42
|
+
raw_type = command_types[key.to_sym]
|
43
|
+
type = raw_type
|
44
|
+
|
45
|
+
raw_val = search_node[:value]
|
46
|
+
search_type = search_node[:type]
|
47
|
+
|
48
|
+
if raw_type.is_a?(Array)
|
49
|
+
is_bool = raw_type.include?(:allow_existence_boolean) && is_bool_str?(raw_val) && search_type != :quoted_str
|
50
|
+
type = (raw_type - [:allow_existence_boolean]).first
|
51
|
+
else
|
52
|
+
is_bool = false
|
53
|
+
type = raw_type
|
54
|
+
end
|
55
|
+
|
56
|
+
if defined?(Boolean) && type == Boolean
|
57
|
+
# val = make_boolean(raw_val)
|
58
|
+
bool = make_boolean(raw_val)
|
59
|
+
bool = !bool if field_node[:negate]
|
60
|
+
val = [
|
61
|
+
{ key => { '$exists' => true } },
|
62
|
+
{ key => { '$ne' => !bool } }
|
63
|
+
]
|
64
|
+
key = '$and'
|
65
|
+
elsif is_bool
|
66
|
+
# This returns true for empty arrays, when it probably should not.
|
67
|
+
# Alternativly, something like tags>5 could return things that have more
|
68
|
+
# than 5 tags in the array.
|
69
|
+
# https://stackoverflow.com/questions/22367335/mongodb-check-if-value-exists-for-a-field-in-a-document
|
70
|
+
# val = { '$exists' => make_boolean(raw_val) }
|
71
|
+
bool = make_boolean(raw_val)
|
72
|
+
bool = !bool if field_node[:negate]
|
73
|
+
if bool
|
74
|
+
val = [
|
75
|
+
{ key => { '$exists' => true } },
|
76
|
+
{ key => { '$ne' => false } }
|
77
|
+
]
|
78
|
+
key = '$and'
|
79
|
+
else
|
80
|
+
val = { '$exists' => false }
|
81
|
+
end
|
82
|
+
elsif type == String
|
83
|
+
if search_type == :quoted_str
|
84
|
+
val = /\b#{Regexp.escape(raw_val)}\b/
|
85
|
+
else
|
86
|
+
val = /#{Regexp.escape(raw_val)}/mi
|
87
|
+
end
|
88
|
+
elsif [Numeric, Integer].include?(type)
|
89
|
+
if raw_val == raw_val.to_i.to_s
|
90
|
+
val = raw_val.to_i
|
91
|
+
elsif raw_val.to_f != 0 || raw_val[/^[\.0]*0$/]
|
92
|
+
val = raw_val.to_f
|
93
|
+
else
|
94
|
+
val = raw_val
|
95
|
+
end
|
96
|
+
elsif type == Time
|
97
|
+
time_str = raw_val.tr('_.-', ' ')
|
98
|
+
date = Chronic.parse(time_str, guess: nil)
|
99
|
+
if field_node[:negate]
|
100
|
+
val = [
|
101
|
+
{ key => { '$gt' => date.end } },
|
102
|
+
{ key => { '$lt' => date.begin } }
|
103
|
+
]
|
104
|
+
key = '$or'
|
105
|
+
else
|
106
|
+
val = [
|
107
|
+
{ key => { '$gte' => date.begin } },
|
108
|
+
{ key => { '$lte' => date.end } }
|
109
|
+
]
|
110
|
+
key = '$and'
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
# regex (case insensitive probably best default, and let
|
115
|
+
# proper regex and alias support allow developers to have
|
116
|
+
# case sensitive if they want maybe.)
|
117
|
+
|
118
|
+
if field_node[:negate] && (type == Numeric || type == String)
|
119
|
+
{ key => { '$not' => val } }
|
120
|
+
else
|
121
|
+
{ key => val }
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
def build_compare(ast_node, command_types)
|
126
|
+
flip_ops = {
|
127
|
+
'<' => '>',
|
128
|
+
'>' => '<',
|
129
|
+
'<=' => '>=',
|
130
|
+
'>=' => '<='
|
131
|
+
}
|
132
|
+
reverse_ops = {
|
133
|
+
'<' => '>=',
|
134
|
+
'<=' => '>',
|
135
|
+
'>' => '<=',
|
136
|
+
'>=' => '<'
|
137
|
+
}
|
138
|
+
mongo_op_map = {
|
139
|
+
'<' => '$lt',
|
140
|
+
'>' => '$gt',
|
141
|
+
'<=' => '$lte',
|
142
|
+
'>=' => '$gte'
|
143
|
+
}
|
144
|
+
|
145
|
+
keys = command_types.keys
|
146
|
+
(first_node, last_node) = ast_node[:value]
|
147
|
+
key = first_node[:value]
|
148
|
+
val = last_node[:value]
|
149
|
+
op = ast_node[:nest_op]
|
150
|
+
op = reverse_ops[op] if first_node[:negate]
|
151
|
+
|
152
|
+
if keys.include?(val.to_sym)
|
153
|
+
(key, val) = [val, key]
|
154
|
+
op = flip_ops[op]
|
155
|
+
end
|
156
|
+
|
157
|
+
mongo_op = mongo_op_map[op]
|
158
|
+
raw_type = command_types[key.to_sym]
|
159
|
+
|
160
|
+
if raw_type.is_a?(Array)
|
161
|
+
type = (raw_type - [:allow_boolean]).first
|
162
|
+
else
|
163
|
+
type = raw_type
|
164
|
+
end
|
165
|
+
|
166
|
+
if command_types[val.to_sym]
|
167
|
+
val = '$' + val
|
168
|
+
key = '$' + key
|
169
|
+
val = [key, val]
|
170
|
+
key = '$expr'
|
171
|
+
elsif type == Numeric
|
172
|
+
if val == val.to_i.to_s
|
173
|
+
val = val.to_i
|
174
|
+
else
|
175
|
+
val = val.to_f
|
176
|
+
end
|
177
|
+
elsif type == Time
|
178
|
+
# foo < day | day.start
|
179
|
+
# foo <= day | day.end
|
180
|
+
# foo > day | day.end
|
181
|
+
# foo >= day | day.start
|
182
|
+
date_start_map = {
|
183
|
+
'<' => :start,
|
184
|
+
'>' => :end,
|
185
|
+
'<=' => :end,
|
186
|
+
'>=' => :start
|
187
|
+
}
|
188
|
+
date_pick = date_start_map[op]
|
189
|
+
time_str = val.tr('_.-', ' ')
|
190
|
+
date = Chronic.parse(time_str, guess: nil)
|
191
|
+
if date_pick == :start
|
192
|
+
val = date.first
|
193
|
+
elsif date_pick == :end
|
194
|
+
val = date.last
|
195
|
+
end
|
196
|
+
end
|
197
|
+
{ key => { mongo_op => val } }
|
198
|
+
end
|
199
|
+
|
200
|
+
def build_searches(ast, fields, command_types)
|
201
|
+
ast.flat_map do |x|
|
202
|
+
type = x[:nest_type]
|
203
|
+
if type == :colon
|
204
|
+
build_command(x, command_types)
|
205
|
+
elsif type == :compare
|
206
|
+
build_compare(x, command_types)
|
207
|
+
elsif [:paren, :pipe, :minus].include?(type)
|
208
|
+
x[:value] = build_searches(x[:value], fields, command_types)
|
209
|
+
x
|
210
|
+
else
|
211
|
+
build_search(x, fields)
|
212
|
+
end
|
213
|
+
end
|
214
|
+
end
|
215
|
+
|
216
|
+
def build_tree(ast)
|
217
|
+
ast.flat_map do |x|
|
218
|
+
next x unless x[:nest_type]
|
219
|
+
mongo_types = { paren: '$and', pipe: '$or', minus: '$not' }
|
220
|
+
key = mongo_types[x[:nest_type]]
|
221
|
+
{ key => build_tree(x[:value]) }
|
222
|
+
end
|
223
|
+
end
|
224
|
+
|
225
|
+
def collapse_ors(ast)
|
226
|
+
ast.flat_map do |x|
|
227
|
+
['$and', '$or', '$not'].map do |key|
|
228
|
+
next unless x[key]
|
229
|
+
x[key] = collapse_ors(x[key])
|
230
|
+
end
|
231
|
+
next x unless x['$or']
|
232
|
+
val = x['$or'].flat_map { |kid| kid['$or'] || kid }
|
233
|
+
{ '$or' => val }
|
234
|
+
end
|
235
|
+
end
|
236
|
+
|
237
|
+
def decompose_nots(ast, not_depth = 0)
|
238
|
+
ast.flat_map do |x|
|
239
|
+
if x[:nest_type] == :minus
|
240
|
+
decompose_nots(x[:value], not_depth + 1)
|
241
|
+
elsif x[:nest_type]
|
242
|
+
x[:value] = decompose_nots(x[:value], not_depth)
|
243
|
+
x
|
244
|
+
else
|
245
|
+
x[:negate] = not_depth.odd?
|
246
|
+
x
|
247
|
+
end
|
248
|
+
end
|
249
|
+
end
|
250
|
+
|
251
|
+
def build_query(ast, fields, command_types = {})
|
252
|
+
# Numbers are searched as strings unless part of a compare/command
|
253
|
+
out = ast
|
254
|
+
out = decompose_nots(out)
|
255
|
+
out = build_searches(out, fields, command_types)
|
256
|
+
out = build_tree(out)
|
257
|
+
out = collapse_ors(out)
|
258
|
+
out = {} if out == []
|
259
|
+
out = out.first if out.count == 1
|
260
|
+
out = { '$and' => out } if out.count > 1
|
261
|
+
out
|
262
|
+
end
|
263
|
+
end
|
264
|
+
end
|
@@ -0,0 +1,84 @@
|
|
1
|
+
module CommandSearch
|
2
|
+
module Optimizer
|
3
|
+
module_function
|
4
|
+
|
5
|
+
def ands_and_ors(ast)
|
6
|
+
ast.uniq.map do |node|
|
7
|
+
next node unless node[:nest_type]
|
8
|
+
next node if node[:nest_type] == :compare
|
9
|
+
node[:value] = ands_and_ors(node[:value])
|
10
|
+
node[:value] = node[:value].flat_map do |kid|
|
11
|
+
next kid[:value] if kid[:nest_type] == :pipe
|
12
|
+
kid
|
13
|
+
end
|
14
|
+
if node[:nest_type] == :pipe && node[:value].length == 1
|
15
|
+
next node[:value].first
|
16
|
+
end
|
17
|
+
node
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def negate_negate(ast)
|
22
|
+
ast.flat_map do |node|
|
23
|
+
next node unless node[:nest_type]
|
24
|
+
node[:value] = negate_negate(node[:value])
|
25
|
+
next [] if node[:value] == []
|
26
|
+
next node if node[:value].count > 1
|
27
|
+
type = node[:nest_type]
|
28
|
+
child_type = node[:value].first[:nest_type]
|
29
|
+
next node unless type == :minus && child_type == :minus
|
30
|
+
node[:value].first[:value]
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def denest_parens(ast, parent_type = :root)
|
35
|
+
ast.flat_map do |node|
|
36
|
+
next node unless node[:nest_type]
|
37
|
+
|
38
|
+
node[:value] = denest_parens(node[:value], node[:nest_type])
|
39
|
+
|
40
|
+
valid_self = node[:nest_type] == :paren
|
41
|
+
valid_parent = parent_type != :pipe
|
42
|
+
valid_child = node[:value].count < 2
|
43
|
+
|
44
|
+
next node[:value] if valid_self && valid_parent
|
45
|
+
next node[:value] if valid_self && valid_child
|
46
|
+
node
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def remove_empty_strings(ast)
|
51
|
+
out = ast.flat_map do |node|
|
52
|
+
next if node[:type] == :quoted_str && node[:value] == ''
|
53
|
+
next node unless node[:nest_type]
|
54
|
+
node[:value] = remove_empty_strings(node[:value])
|
55
|
+
node
|
56
|
+
end
|
57
|
+
out.compact
|
58
|
+
end
|
59
|
+
|
60
|
+
def optimization_pass(ast)
|
61
|
+
# '(a b)|(c d)' is the only current
|
62
|
+
# situation where parens are needed.
|
63
|
+
# 'a|(b|(c|d))' can be flattened by
|
64
|
+
# repeated application of "ands_and_or"
|
65
|
+
# and "denest_parens".
|
66
|
+
out = ast
|
67
|
+
out = denest_parens(out)
|
68
|
+
out = negate_negate(out)
|
69
|
+
out = ands_and_ors(out)
|
70
|
+
out = remove_empty_strings(out)
|
71
|
+
out
|
72
|
+
end
|
73
|
+
|
74
|
+
def optimize(ast)
|
75
|
+
out_a = optimization_pass(ast)
|
76
|
+
out_b = optimization_pass(out_a)
|
77
|
+
until out_a == out_b
|
78
|
+
out_a = out_b
|
79
|
+
out_b = optimization_pass(out_b)
|
80
|
+
end
|
81
|
+
out_b
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
@@ -0,0 +1,85 @@
|
|
1
|
+
module CommandSearch
|
2
|
+
module Parser
|
3
|
+
module_function
|
4
|
+
|
5
|
+
def parens_rindex(input)
|
6
|
+
val_list = input.map { |x| x[:value] }
|
7
|
+
open_i = val_list.rindex('(')
|
8
|
+
return unless open_i
|
9
|
+
close_offset = val_list.drop(open_i).index(')')
|
10
|
+
return unless close_offset
|
11
|
+
[open_i, close_offset + open_i]
|
12
|
+
end
|
13
|
+
|
14
|
+
def group_parens(input)
|
15
|
+
out = input
|
16
|
+
while parens_rindex(out)
|
17
|
+
(a, b) = parens_rindex(out)
|
18
|
+
val = out[(a + 1)..(b - 1)]
|
19
|
+
out[a..b] = { type: :nest, nest_type: :paren, value: val }
|
20
|
+
end
|
21
|
+
out
|
22
|
+
end
|
23
|
+
|
24
|
+
def cluster(type, input, cluster_type = :binary)
|
25
|
+
binary = (cluster_type == :binary)
|
26
|
+
out = input
|
27
|
+
out = out[:value] while out.is_a?(Hash)
|
28
|
+
out.compact!
|
29
|
+
# rindex (vs index) important for nested prefixes
|
30
|
+
while (i = out.rindex { |x| x[:type] == type })
|
31
|
+
val = [out[i + 1]]
|
32
|
+
val.unshift(out[i - 1]) if binary && i > 0
|
33
|
+
front_offset = 0
|
34
|
+
front_offset = 1 if binary && i > 0
|
35
|
+
out[(i - front_offset)..(i + 1)] = {
|
36
|
+
type: :nest,
|
37
|
+
nest_type: type,
|
38
|
+
nest_op: out[i][:value],
|
39
|
+
value: val
|
40
|
+
}
|
41
|
+
end
|
42
|
+
out.map do |x|
|
43
|
+
next x unless x[:type] == :nest
|
44
|
+
x[:value] = cluster(type, x[:value], cluster_type)
|
45
|
+
x
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def unchain(type, input)
|
50
|
+
input.each_index do |i|
|
51
|
+
front = input.dig(i, :type)
|
52
|
+
mid = input.dig(i + 1, :type)
|
53
|
+
back = input.dig(i + 2, :type)
|
54
|
+
if front == type && mid != type && back == type
|
55
|
+
input.insert(i + 1, input[i + 1])
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def clean_ununused_syntax(input)
|
61
|
+
out = input.map do |x|
|
62
|
+
next if x[:type] == :paren && x[:value].is_a?(String)
|
63
|
+
if x[:nest_type] == :compare && x[:value].length < 2
|
64
|
+
x = clean_ununused_syntax(x[:value]).first
|
65
|
+
end
|
66
|
+
next x unless x && x[:type] == :nest
|
67
|
+
x[:value] = clean_ununused_syntax(x[:value])
|
68
|
+
x
|
69
|
+
end
|
70
|
+
out.compact
|
71
|
+
end
|
72
|
+
|
73
|
+
def parse(input)
|
74
|
+
out = input
|
75
|
+
out = group_parens(out)
|
76
|
+
out = cluster(:colon, out)
|
77
|
+
out = unchain(:compare, out)
|
78
|
+
out = cluster(:compare, out)
|
79
|
+
out = cluster(:minus, out, :prefix)
|
80
|
+
out = cluster(:pipe, out)
|
81
|
+
out = clean_ununused_syntax(out)
|
82
|
+
out
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
load(__dir__ + '/command_search/aliaser.rb')
|
2
|
+
load(__dir__ + '/command_search/lexer.rb')
|
3
|
+
load(__dir__ + '/command_search/parser.rb')
|
4
|
+
load(__dir__ + '/command_search/command_dealiaser.rb')
|
5
|
+
load(__dir__ + '/command_search/optimizer.rb')
|
6
|
+
load(__dir__ + '/command_search/mongoer.rb')
|
7
|
+
load(__dir__ + '/command_search/memory.rb')
|
8
|
+
|
9
|
+
class Boolean; end
|
10
|
+
|
11
|
+
module CommandSearch
|
12
|
+
module_function
|
13
|
+
|
14
|
+
def search(source, query, options = {})
|
15
|
+
aliases = options[:aliases] || {}
|
16
|
+
fields = options[:fields] || []
|
17
|
+
command_fields = options[:command_fields] || {}
|
18
|
+
|
19
|
+
aliased_query = Aliaser.alias(query, aliases)
|
20
|
+
tokens = Lexer.lex(aliased_query)
|
21
|
+
parsed = Parser.parse(tokens)
|
22
|
+
dealiased = CommandDealiaser.dealias(parsed, command_fields)
|
23
|
+
cleaned = CommandDealiaser.decompose_unaliasable(dealiased, command_fields)
|
24
|
+
opted = Optimizer.optimize(cleaned)
|
25
|
+
|
26
|
+
if source.respond_to?(:mongo_client) && source.queryable
|
27
|
+
fields = [:__CommandSearch_mongo_fields_dummy_key__] if fields.empty?
|
28
|
+
mongo_query = Mongoer.build_query(opted, fields, command_fields)
|
29
|
+
return source.where(mongo_query)
|
30
|
+
end
|
31
|
+
|
32
|
+
selector = Memory.build_query(opted, fields, command_fields)
|
33
|
+
source.select(&selector)
|
34
|
+
end
|
35
|
+
end
|
metadata
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: command_search
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- zumbalogy
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2018-09-03 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: Query collections with ease and without an engine like Elasticsearch.
|
14
|
+
email:
|
15
|
+
executables: []
|
16
|
+
extensions: []
|
17
|
+
extra_rdoc_files: []
|
18
|
+
files:
|
19
|
+
- lib/command_search.rb
|
20
|
+
- lib/command_search/aliaser.rb
|
21
|
+
- lib/command_search/command_dealiaser.rb
|
22
|
+
- lib/command_search/lexer.rb
|
23
|
+
- lib/command_search/memory.rb
|
24
|
+
- lib/command_search/mongoer.rb
|
25
|
+
- lib/command_search/optimizer.rb
|
26
|
+
- lib/command_search/parser.rb
|
27
|
+
homepage: https://github.com/zumbalogy/command_search
|
28
|
+
licenses:
|
29
|
+
- Unlicense
|
30
|
+
metadata: {}
|
31
|
+
post_install_message:
|
32
|
+
rdoc_options: []
|
33
|
+
require_paths:
|
34
|
+
- lib
|
35
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
36
|
+
requirements:
|
37
|
+
- - ">="
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: '0'
|
40
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
41
|
+
requirements:
|
42
|
+
- - ">="
|
43
|
+
- !ruby/object:Gem::Version
|
44
|
+
version: '0'
|
45
|
+
requirements: []
|
46
|
+
rubyforge_project:
|
47
|
+
rubygems_version: 2.5.2
|
48
|
+
signing_key:
|
49
|
+
specification_version: 4
|
50
|
+
summary: A friendly search gem for users and developers.
|
51
|
+
test_files: []
|