boogex 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Gemfile +14 -0
- data/Gemfile.lock +52 -0
- data/LICENSE +20 -0
- data/README.md +13 -0
- data/Rakefile +11 -0
- data/boogex.gemspec +22 -0
- data/lib/boogex.rb +8 -0
- data/lib/boogex/convertor.rb +176 -0
- data/lib/boogex/error.rb +4 -0
- data/lib/boogex/helper.rb +5 -0
- data/lib/boogex/version.rb +4 -0
- data/test/convertor_test.rb +36 -0
- data/test/reports/TEST-Boogex.xml +11 -0
- data/test/test_helper.rb +8 -0
- metadata +71 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: f9474689b9e61cffbef211549d9e942254c1d0e8
|
4
|
+
data.tar.gz: f56c73ade74210ca6aa2a82e69936e7bedeb678a
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 19d9416eb7d615dee24a748f0405f81eb188264e168576ff66cba4e0086d790149605a3f3b09b1696e33cba81ec39c8ecf7c71587e4e838db68a3831d19b6c6f
|
7
|
+
data.tar.gz: 41d95f425232c4ee624570d6121606cb65ef134919302f18aa458d278fec408963afdd80744b13fce032aabf72e0ebca5e8d62fa137e5c888b2573208aab8e9a
|
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
boogex (0.0.1)
|
5
|
+
|
6
|
+
GEM
|
7
|
+
remote: https://rubygems.org/
|
8
|
+
specs:
|
9
|
+
activesupport (4.2.3)
|
10
|
+
i18n (~> 0.7)
|
11
|
+
json (~> 1.7, >= 1.7.7)
|
12
|
+
minitest (~> 5.1)
|
13
|
+
thread_safe (~> 0.3, >= 0.3.4)
|
14
|
+
tzinfo (~> 1.1)
|
15
|
+
addressable (2.3.8)
|
16
|
+
ansi (1.5.0)
|
17
|
+
builder (3.2.2)
|
18
|
+
crack (0.4.2)
|
19
|
+
safe_yaml (~> 1.0.0)
|
20
|
+
factory_girl (4.5.0)
|
21
|
+
activesupport (>= 3.0.0)
|
22
|
+
faker (1.4.3)
|
23
|
+
i18n (~> 0.5)
|
24
|
+
i18n (0.7.0)
|
25
|
+
json (1.8.3)
|
26
|
+
minitest (5.7.0)
|
27
|
+
minitest-reporters (1.0.7)
|
28
|
+
ansi
|
29
|
+
builder
|
30
|
+
minitest (>= 5.0)
|
31
|
+
ruby-progressbar
|
32
|
+
rake (10.4.2)
|
33
|
+
ruby-progressbar (1.7.5)
|
34
|
+
safe_yaml (1.0.4)
|
35
|
+
thread_safe (0.3.5)
|
36
|
+
tzinfo (1.2.2)
|
37
|
+
thread_safe (~> 0.1)
|
38
|
+
webmock (1.21.0)
|
39
|
+
addressable (>= 2.3.6)
|
40
|
+
crack (>= 0.3.2)
|
41
|
+
|
42
|
+
PLATFORMS
|
43
|
+
ruby
|
44
|
+
|
45
|
+
DEPENDENCIES
|
46
|
+
boogex!
|
47
|
+
factory_girl
|
48
|
+
faker
|
49
|
+
minitest
|
50
|
+
minitest-reporters
|
51
|
+
rake
|
52
|
+
webmock
|
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2015 Lexer
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
data/Rakefile
ADDED
data/boogex.gemspec
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Gem::Specification.new do |s|
|
2
|
+
s.name = 'boogex'
|
3
|
+
s.version = '0.0.1'
|
4
|
+
s.date = '2016-02-14'
|
5
|
+
s.homepage = 'https://github.com/lexerdev/boogex.gem'
|
6
|
+
s.license = 'MIT'
|
7
|
+
s.summary = "Boolean Lucene to Regex convertor."
|
8
|
+
s.description = "Converts Lucene Boolean query language into ruby regex."
|
9
|
+
s.authors = ["Sam Crouch"]
|
10
|
+
s.email = 'samuel.crouch@lexer.com.au'
|
11
|
+
s.files = `git ls-files`.split($RS).reject do |file|
|
12
|
+
file =~ %r{^(?:
|
13
|
+
spec/.*
|
14
|
+
|Gemfile
|
15
|
+
|Rakefile
|
16
|
+
|\.gitignore
|
17
|
+
|\.rubocop.yml
|
18
|
+
)$}x
|
19
|
+
end
|
20
|
+
s.test_files = `git ls-files`.split($RS)
|
21
|
+
s.require_paths = ['lib']
|
22
|
+
end
|
data/lib/boogex.rb
ADDED
@@ -0,0 +1,176 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module Boogex
|
4
|
+
def self.convert(text)
|
5
|
+
puts "Converting \"#{text}\" into regex"
|
6
|
+
array = array_struct(text)
|
7
|
+
array = ors_to_pipes(array)
|
8
|
+
array = regex_formatting(array)
|
9
|
+
regex_array_to_string(array)
|
10
|
+
end
|
11
|
+
|
12
|
+
private
|
13
|
+
|
14
|
+
# This function converts a string into an array where brackets in the string are converted to an array structure
|
15
|
+
# to allow further manipulation
|
16
|
+
# "a OR (b) OR c" => ["a OR ", ["b"], " OR c"]
|
17
|
+
# "a OR (b AND (c OR d)) OR e" => ["a OR ", ["b AND ", ["c OR d"]], " OR e"]
|
18
|
+
def self.array_struct(text)
|
19
|
+
inside_brackets = "[^\(\)]*"
|
20
|
+
|
21
|
+
#This regex looks for anything in brackets OR anything with brackets in brackets OR anything with brackets in brackets in brackets
|
22
|
+
regex = /(\(#{inside_brackets}\))|(\(#{inside_brackets}\(#{inside_brackets}\)#{inside_brackets}\))|(\(#{inside_brackets}\(#{inside_brackets}\(#{inside_brackets}\)#{inside_brackets}\)#{inside_brackets}\))/
|
23
|
+
|
24
|
+
cuts = text.scan(regex).to_a.flatten.reject(&:nil?)
|
25
|
+
|
26
|
+
# If nothing found then return orignal text
|
27
|
+
return text if cuts.empty?
|
28
|
+
|
29
|
+
# The text is now cut into an array where the bracketing of the string determines the elements
|
30
|
+
# ie. "a OR (b) OR c" => ["a OR ", ["b"], " OR c"]
|
31
|
+
text_array = cuts.inject([text]) do |a, cut|
|
32
|
+
a.each_with_object([]) do |str, result|
|
33
|
+
if !str.include?(cut)
|
34
|
+
result << str
|
35
|
+
else
|
36
|
+
splits = str.split(cut)
|
37
|
+
|
38
|
+
result << splits.first
|
39
|
+
cut_without_brackets = cut[1..-2]
|
40
|
+
result << [cut_without_brackets]
|
41
|
+
result << splits.last
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end.uniq
|
45
|
+
|
46
|
+
# This recursively converts any brackets in the text back into the array_struct function
|
47
|
+
# where the upper limit of recursion is 3 levels of bracketing. This is limitied by the regex
|
48
|
+
# on line 9 but can easily be extended.
|
49
|
+
# If the element of the array is a string then no recursion to apply.
|
50
|
+
# If the element of the array is an array then iterate THAT through the array_struct function
|
51
|
+
text_array.reject(&:empty?).each_with_object([]) do |str, result|
|
52
|
+
if str.is_a?(String)
|
53
|
+
result << str
|
54
|
+
next
|
55
|
+
end
|
56
|
+
|
57
|
+
result << str.collect do |str|
|
58
|
+
array_struct(str)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
# This function converts the Lucene Boolean `OR` into regex `|` and removes any quotation marks
|
64
|
+
def self.ors_to_pipes(obj)
|
65
|
+
return obj.gsub(' OR ', '|').gsub('"', '').gsub("'", '') if obj.is_a?(String)
|
66
|
+
|
67
|
+
# This recursively applies this function to ensure all levels of the array are converted
|
68
|
+
obj.collect do |text|
|
69
|
+
ors_to_pipes(text)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
# This function begins to tranform the elements of the array structure to regex formatting
|
74
|
+
# including:
|
75
|
+
# - (a) Any elements that are not bookended by | are then wrapped in (?:) as this modularises
|
76
|
+
# the regex of the elements of the structures
|
77
|
+
#
|
78
|
+
# - (b) Converting any Lucene Boolean `AND` into an AND array structure where the first element is "AND" and the
|
79
|
+
# remaining elements of that array are the regexes that make up the `AND`
|
80
|
+
# ie. ["pete AND james"] => ["AND", "pete", "james"]
|
81
|
+
# ie. ["jenny AND", ["billy OR jimmy"]] => ["AND", "jenny", ["billy OR jimmy"]]
|
82
|
+
def self.regex_formatting(obj)
|
83
|
+
# (a)
|
84
|
+
# if string then wrap it in brackets if needed and then return
|
85
|
+
if obj.is_a?(String)
|
86
|
+
if contain_AND?(obj)
|
87
|
+
result = ['AND']
|
88
|
+
result = result + obj.split(' AND ').reject(&:empty?).collect do |str|
|
89
|
+
regex_formatting(str)
|
90
|
+
end
|
91
|
+
return result
|
92
|
+
end
|
93
|
+
needs_brackets = not_in_or?(obj)
|
94
|
+
obj = wrap_in_brackets(obj) if needs_brackets
|
95
|
+
return obj
|
96
|
+
end
|
97
|
+
|
98
|
+
# if an all string array, then check if any of the elements of the array need bracket wrapped and return
|
99
|
+
if all_strings?(obj)
|
100
|
+
needs_brackets = obj.any? do |text|
|
101
|
+
not_in_or?(text)
|
102
|
+
end
|
103
|
+
obj = obj.join('')
|
104
|
+
obj = wrap_in_brackets(obj) if needs_brackets
|
105
|
+
return obj
|
106
|
+
end
|
107
|
+
|
108
|
+
# (b)
|
109
|
+
result = []
|
110
|
+
|
111
|
+
# If this level of bracket contains a string with `AND` in it, then consider this element an `AND` array
|
112
|
+
result << 'AND' if obj.any? do |elem|
|
113
|
+
contain_AND?(elem)
|
114
|
+
end
|
115
|
+
|
116
|
+
obj.each_with_object(result) do |text, result|
|
117
|
+
if contain_AND?(text)
|
118
|
+
text.split(' AND ').reject(&:empty?).each do |str|
|
119
|
+
result << regex_formatting(str)
|
120
|
+
end
|
121
|
+
else
|
122
|
+
result << regex_formatting(text)
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
# This function converts the entire array with regex formatting into a regex string
|
128
|
+
# The AND array is an adhoc format generated by Lexer as regex doesn't have a Lucene
|
129
|
+
# Boolean `AND` equivalent. This adhoc regex AND is generated in the Lexer stack in the task generator
|
130
|
+
def self.regex_array_to_string(obj)
|
131
|
+
return obj if obj.is_a?(String)
|
132
|
+
|
133
|
+
is_AND_array = is_AND_array?(obj)
|
134
|
+
|
135
|
+
# This removes the "AND" from the AND array
|
136
|
+
obj.shift if is_AND_array
|
137
|
+
|
138
|
+
result = obj.collect do |text|
|
139
|
+
regex_array_to_string(text)
|
140
|
+
end
|
141
|
+
return construct_AND_array(result) if is_AND_array
|
142
|
+
|
143
|
+
result.join('')
|
144
|
+
end
|
145
|
+
|
146
|
+
def self.is_AND_array?(array)
|
147
|
+
array[0] == 'AND'
|
148
|
+
end
|
149
|
+
|
150
|
+
def self.construct_AND_array(array)
|
151
|
+
'#{andify["' + array.join('", "') + '"]}'
|
152
|
+
end
|
153
|
+
|
154
|
+
def self.contain_AND?(obj)
|
155
|
+
obj.is_a?(String) && obj.include?(' AND ')
|
156
|
+
end
|
157
|
+
|
158
|
+
def self.all_strings?(array)
|
159
|
+
array.all? do |elem|
|
160
|
+
elem.is_a?(String)
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
# Is this text not in awe? lols. Rather, is it not wrapped in regex `or` .
|
165
|
+
# ie. |pieceofcontent| = false
|
166
|
+
# ie. |pieceofcontent = true
|
167
|
+
# ie. pieceofcontent| = true
|
168
|
+
# ie. pieceofcontent = true
|
169
|
+
def self.not_in_or?(text)
|
170
|
+
text[0] != '|' && text[-1] != '|'
|
171
|
+
end
|
172
|
+
|
173
|
+
def self.wrap_in_brackets(text)
|
174
|
+
'(?:' + text + ')'
|
175
|
+
end
|
176
|
+
end
|
data/lib/boogex/error.rb
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
require "#{File.dirname(__FILE__)}/test_helper"
|
2
|
+
|
3
|
+
describe Boogex do
|
4
|
+
it 'turns OR into |' do
|
5
|
+
string = 'This OR That'
|
6
|
+
expecting = '(?:This|That)'
|
7
|
+
result = Boogex.convert(string)
|
8
|
+
assert_equal expecting, result
|
9
|
+
end
|
10
|
+
|
11
|
+
it 'turns AND into andify array string' do
|
12
|
+
string = 'This AND That'
|
13
|
+
expecting = '#{andify["(?:This)", "(?:That)"]}'
|
14
|
+
result = Boogex.convert(string)
|
15
|
+
assert_equal expecting, result
|
16
|
+
end
|
17
|
+
|
18
|
+
it 'understands bracketing' do
|
19
|
+
string = '(This OR That) AND My self'
|
20
|
+
expecting = '#{andify["(?:This|That)", "(?:My self)"]}'
|
21
|
+
result = Boogex.convert(string)
|
22
|
+
assert_equal expecting, result
|
23
|
+
|
24
|
+
string = 'This AND (That OR (My self)'
|
25
|
+
expecting = '#{andify["(?:This|That)", "(?:My self)"]}'
|
26
|
+
result = Boogex.convert(string)
|
27
|
+
assert_equal expecting, result
|
28
|
+
end
|
29
|
+
|
30
|
+
it 'correctly convert this Lucene boolean query string' do
|
31
|
+
string = '(((asd OR dd) AND that) AND this) OR What?'
|
32
|
+
expecting = '#{andify["#{andify["(?:asd|dd)", "(?:that)"]}", "(?:this)"]}|What?'
|
33
|
+
result = Boogex.convert(string)
|
34
|
+
assert_equal expecting, result
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,11 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<testsuite name="Boogex" skipped="0" failures="0" errors="0" tests="4" assertions="4" time="0.0007573158945888281">
|
3
|
+
<testcase name="test_0001_correctly convert this Lucene boolean query string" classname="Boogex" assertions="1" time="0.0001257510157302022">
|
4
|
+
</testcase>
|
5
|
+
<testcase name="test_0002_correctly convert this Lucene boolean query string" classname="Boogex" assertions="1" time="0.00011062994599342346">
|
6
|
+
</testcase>
|
7
|
+
<testcase name="test_0003_correctly convert this Lucene boolean query string" classname="Boogex" assertions="1" time="0.00020186102483421564">
|
8
|
+
</testcase>
|
9
|
+
<testcase name="test_0004_correctly convert this Lucene boolean query string" classname="Boogex" assertions="1" time="0.0003190739080309868">
|
10
|
+
</testcase>
|
11
|
+
</testsuite>
|
data/test/test_helper.rb
ADDED
@@ -0,0 +1,8 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
require 'minitest/autorun'
|
4
|
+
require 'minitest/reporters'
|
5
|
+
require 'minitest/spec'
|
6
|
+
|
7
|
+
MiniTest::Reporters.use! [Minitest::Reporters::SpecReporter.new, MiniTest::Reporters::JUnitReporter.new]
|
8
|
+
require File.join(File.dirname(__FILE__), '..', 'lib', 'boogex')
|
metadata
ADDED
@@ -0,0 +1,71 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: boogex
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Sam Crouch
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2016-02-14 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: Converts Lucene Boolean query language into ruby regex.
|
14
|
+
email: samuel.crouch@lexer.com.au
|
15
|
+
executables: []
|
16
|
+
extensions: []
|
17
|
+
extra_rdoc_files: []
|
18
|
+
files:
|
19
|
+
- Gemfile
|
20
|
+
- Gemfile.lock
|
21
|
+
- LICENSE
|
22
|
+
- README.md
|
23
|
+
- Rakefile
|
24
|
+
- boogex.gemspec
|
25
|
+
- lib/boogex.rb
|
26
|
+
- lib/boogex/convertor.rb
|
27
|
+
- lib/boogex/error.rb
|
28
|
+
- lib/boogex/helper.rb
|
29
|
+
- lib/boogex/version.rb
|
30
|
+
- test/convertor_test.rb
|
31
|
+
- test/reports/TEST-Boogex.xml
|
32
|
+
- test/test_helper.rb
|
33
|
+
homepage: https://github.com/lexerdev/boogex.gem
|
34
|
+
licenses:
|
35
|
+
- MIT
|
36
|
+
metadata: {}
|
37
|
+
post_install_message:
|
38
|
+
rdoc_options: []
|
39
|
+
require_paths:
|
40
|
+
- lib
|
41
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
42
|
+
requirements:
|
43
|
+
- - ">="
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
46
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
47
|
+
requirements:
|
48
|
+
- - ">="
|
49
|
+
- !ruby/object:Gem::Version
|
50
|
+
version: '0'
|
51
|
+
requirements: []
|
52
|
+
rubyforge_project:
|
53
|
+
rubygems_version: 2.2.2
|
54
|
+
signing_key:
|
55
|
+
specification_version: 4
|
56
|
+
summary: Boolean Lucene to Regex convertor.
|
57
|
+
test_files:
|
58
|
+
- Gemfile
|
59
|
+
- Gemfile.lock
|
60
|
+
- LICENSE
|
61
|
+
- README.md
|
62
|
+
- Rakefile
|
63
|
+
- boogex.gemspec
|
64
|
+
- lib/boogex.rb
|
65
|
+
- lib/boogex/convertor.rb
|
66
|
+
- lib/boogex/error.rb
|
67
|
+
- lib/boogex/helper.rb
|
68
|
+
- lib/boogex/version.rb
|
69
|
+
- test/convertor_test.rb
|
70
|
+
- test/reports/TEST-Boogex.xml
|
71
|
+
- test/test_helper.rb
|