boogex 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +14 -0
- data/Gemfile.lock +52 -0
- data/LICENSE +20 -0
- data/README.md +13 -0
- data/Rakefile +11 -0
- data/boogex.gemspec +22 -0
- data/lib/boogex.rb +8 -0
- data/lib/boogex/convertor.rb +176 -0
- data/lib/boogex/error.rb +4 -0
- data/lib/boogex/helper.rb +5 -0
- data/lib/boogex/version.rb +4 -0
- data/test/convertor_test.rb +36 -0
- data/test/reports/TEST-Boogex.xml +11 -0
- data/test/test_helper.rb +8 -0
- metadata +71 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: f9474689b9e61cffbef211549d9e942254c1d0e8
|
4
|
+
data.tar.gz: f56c73ade74210ca6aa2a82e69936e7bedeb678a
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 19d9416eb7d615dee24a748f0405f81eb188264e168576ff66cba4e0086d790149605a3f3b09b1696e33cba81ec39c8ecf7c71587e4e838db68a3831d19b6c6f
|
7
|
+
data.tar.gz: 41d95f425232c4ee624570d6121606cb65ef134919302f18aa458d278fec408963afdd80744b13fce032aabf72e0ebca5e8d62fa137e5c888b2573208aab8e9a
|
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
boogex (0.0.1)
|
5
|
+
|
6
|
+
GEM
|
7
|
+
remote: https://rubygems.org/
|
8
|
+
specs:
|
9
|
+
activesupport (4.2.3)
|
10
|
+
i18n (~> 0.7)
|
11
|
+
json (~> 1.7, >= 1.7.7)
|
12
|
+
minitest (~> 5.1)
|
13
|
+
thread_safe (~> 0.3, >= 0.3.4)
|
14
|
+
tzinfo (~> 1.1)
|
15
|
+
addressable (2.3.8)
|
16
|
+
ansi (1.5.0)
|
17
|
+
builder (3.2.2)
|
18
|
+
crack (0.4.2)
|
19
|
+
safe_yaml (~> 1.0.0)
|
20
|
+
factory_girl (4.5.0)
|
21
|
+
activesupport (>= 3.0.0)
|
22
|
+
faker (1.4.3)
|
23
|
+
i18n (~> 0.5)
|
24
|
+
i18n (0.7.0)
|
25
|
+
json (1.8.3)
|
26
|
+
minitest (5.7.0)
|
27
|
+
minitest-reporters (1.0.7)
|
28
|
+
ansi
|
29
|
+
builder
|
30
|
+
minitest (>= 5.0)
|
31
|
+
ruby-progressbar
|
32
|
+
rake (10.4.2)
|
33
|
+
ruby-progressbar (1.7.5)
|
34
|
+
safe_yaml (1.0.4)
|
35
|
+
thread_safe (0.3.5)
|
36
|
+
tzinfo (1.2.2)
|
37
|
+
thread_safe (~> 0.1)
|
38
|
+
webmock (1.21.0)
|
39
|
+
addressable (>= 2.3.6)
|
40
|
+
crack (>= 0.3.2)
|
41
|
+
|
42
|
+
PLATFORMS
|
43
|
+
ruby
|
44
|
+
|
45
|
+
DEPENDENCIES
|
46
|
+
boogex!
|
47
|
+
factory_girl
|
48
|
+
faker
|
49
|
+
minitest
|
50
|
+
minitest-reporters
|
51
|
+
rake
|
52
|
+
webmock
|
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2015 Lexer
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
data/Rakefile
ADDED
data/boogex.gemspec
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Gem::Specification.new do |s|
|
2
|
+
s.name = 'boogex'
|
3
|
+
s.version = '0.0.1'
|
4
|
+
s.date = '2016-02-14'
|
5
|
+
s.homepage = 'https://github.com/lexerdev/boogex.gem'
|
6
|
+
s.license = 'MIT'
|
7
|
+
s.summary = "Boolean Lucene to Regex convertor."
|
8
|
+
s.description = "Converts Lucene Boolean query language into ruby regex."
|
9
|
+
s.authors = ["Sam Crouch"]
|
10
|
+
s.email = 'samuel.crouch@lexer.com.au'
|
11
|
+
s.files = `git ls-files`.split($RS).reject do |file|
|
12
|
+
file =~ %r{^(?:
|
13
|
+
spec/.*
|
14
|
+
|Gemfile
|
15
|
+
|Rakefile
|
16
|
+
|\.gitignore
|
17
|
+
|\.rubocop.yml
|
18
|
+
)$}x
|
19
|
+
end
|
20
|
+
s.test_files = `git ls-files`.split($RS)
|
21
|
+
s.require_paths = ['lib']
|
22
|
+
end
|
data/lib/boogex.rb
ADDED
@@ -0,0 +1,176 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module Boogex
|
4
|
+
def self.convert(text)
|
5
|
+
puts "Converting \"#{text}\" into regex"
|
6
|
+
array = array_struct(text)
|
7
|
+
array = ors_to_pipes(array)
|
8
|
+
array = regex_formatting(array)
|
9
|
+
regex_array_to_string(array)
|
10
|
+
end
|
11
|
+
|
12
|
+
private
|
13
|
+
|
14
|
+
# This function converts a string into an array where brackets in the string are converted to an array structure
|
15
|
+
# to allow further manipulation
|
16
|
+
# "a OR (b) OR c" => ["a OR ", ["b"], " OR c"]
|
17
|
+
# "a OR (b AND (c OR d)) OR e" => ["a OR ", ["b AND ", ["c OR d"]], " OR e"]
|
18
|
+
def self.array_struct(text)
|
19
|
+
inside_brackets = "[^\(\)]*"
|
20
|
+
|
21
|
+
#This regex looks for anything in brackets OR anything with brackets in brackets OR anything with brackets in brackets in brackets
|
22
|
+
regex = /(\(#{inside_brackets}\))|(\(#{inside_brackets}\(#{inside_brackets}\)#{inside_brackets}\))|(\(#{inside_brackets}\(#{inside_brackets}\(#{inside_brackets}\)#{inside_brackets}\)#{inside_brackets}\))/
|
23
|
+
|
24
|
+
cuts = text.scan(regex).to_a.flatten.reject(&:nil?)
|
25
|
+
|
26
|
+
# If nothing found then return orignal text
|
27
|
+
return text if cuts.empty?
|
28
|
+
|
29
|
+
# The text is now cut into an array where the bracketing of the string determines the elements
|
30
|
+
# ie. "a OR (b) OR c" => ["a OR ", ["b"], " OR c"]
|
31
|
+
text_array = cuts.inject([text]) do |a, cut|
|
32
|
+
a.each_with_object([]) do |str, result|
|
33
|
+
if !str.include?(cut)
|
34
|
+
result << str
|
35
|
+
else
|
36
|
+
splits = str.split(cut)
|
37
|
+
|
38
|
+
result << splits.first
|
39
|
+
cut_without_brackets = cut[1..-2]
|
40
|
+
result << [cut_without_brackets]
|
41
|
+
result << splits.last
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end.uniq
|
45
|
+
|
46
|
+
# This recursively converts any brackets in the text back into the array_struct function
|
47
|
+
# where the upper limit of recursion is 3 levels of bracketing. This is limitied by the regex
|
48
|
+
# on line 9 but can easily be extended.
|
49
|
+
# If the element of the array is a string then no recursion to apply.
|
50
|
+
# If the element of the array is an array then iterate THAT through the array_struct function
|
51
|
+
text_array.reject(&:empty?).each_with_object([]) do |str, result|
|
52
|
+
if str.is_a?(String)
|
53
|
+
result << str
|
54
|
+
next
|
55
|
+
end
|
56
|
+
|
57
|
+
result << str.collect do |str|
|
58
|
+
array_struct(str)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
# This function converts the Lucene Boolean `OR` into regex `|` and removes any quotation marks
|
64
|
+
def self.ors_to_pipes(obj)
|
65
|
+
return obj.gsub(' OR ', '|').gsub('"', '').gsub("'", '') if obj.is_a?(String)
|
66
|
+
|
67
|
+
# This recursively applies this function to ensure all levels of the array are converted
|
68
|
+
obj.collect do |text|
|
69
|
+
ors_to_pipes(text)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
# This function begins to tranform the elements of the array structure to regex formatting
|
74
|
+
# including:
|
75
|
+
# - (a) Any elements that are not bookended by | are then wrapped in (?:) as this modularises
|
76
|
+
# the regex of the elements of the structures
|
77
|
+
#
|
78
|
+
# - (b) Converting any Lucene Boolean `AND` into an AND array structure where the first element is "AND" and the
|
79
|
+
# remaining elements of that array are the regexes that make up the `AND`
|
80
|
+
# ie. ["pete AND james"] => ["AND", "pete", "james"]
|
81
|
+
# ie. ["jenny AND", ["billy OR jimmy"]] => ["AND", "jenny", ["billy OR jimmy"]]
|
82
|
+
def self.regex_formatting(obj)
|
83
|
+
# (a)
|
84
|
+
# if string then wrap it in brackets if needed and then return
|
85
|
+
if obj.is_a?(String)
|
86
|
+
if contain_AND?(obj)
|
87
|
+
result = ['AND']
|
88
|
+
result = result + obj.split(' AND ').reject(&:empty?).collect do |str|
|
89
|
+
regex_formatting(str)
|
90
|
+
end
|
91
|
+
return result
|
92
|
+
end
|
93
|
+
needs_brackets = not_in_or?(obj)
|
94
|
+
obj = wrap_in_brackets(obj) if needs_brackets
|
95
|
+
return obj
|
96
|
+
end
|
97
|
+
|
98
|
+
# if an all string array, then check if any of the elements of the array need bracket wrapped and return
|
99
|
+
if all_strings?(obj)
|
100
|
+
needs_brackets = obj.any? do |text|
|
101
|
+
not_in_or?(text)
|
102
|
+
end
|
103
|
+
obj = obj.join('')
|
104
|
+
obj = wrap_in_brackets(obj) if needs_brackets
|
105
|
+
return obj
|
106
|
+
end
|
107
|
+
|
108
|
+
# (b)
|
109
|
+
result = []
|
110
|
+
|
111
|
+
# If this level of bracket contains a string with `AND` in it, then consider this element an `AND` array
|
112
|
+
result << 'AND' if obj.any? do |elem|
|
113
|
+
contain_AND?(elem)
|
114
|
+
end
|
115
|
+
|
116
|
+
obj.each_with_object(result) do |text, result|
|
117
|
+
if contain_AND?(text)
|
118
|
+
text.split(' AND ').reject(&:empty?).each do |str|
|
119
|
+
result << regex_formatting(str)
|
120
|
+
end
|
121
|
+
else
|
122
|
+
result << regex_formatting(text)
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
# This function converts the entire array with regex formatting into a regex string
|
128
|
+
# The AND array is an adhoc format generated by Lexer as regex doesn't have a Lucene
|
129
|
+
# Boolean `AND` equivalent. This adhoc regex AND is generated in the Lexer stack in the task generator
|
130
|
+
def self.regex_array_to_string(obj)
|
131
|
+
return obj if obj.is_a?(String)
|
132
|
+
|
133
|
+
is_AND_array = is_AND_array?(obj)
|
134
|
+
|
135
|
+
# This removes the "AND" from the AND array
|
136
|
+
obj.shift if is_AND_array
|
137
|
+
|
138
|
+
result = obj.collect do |text|
|
139
|
+
regex_array_to_string(text)
|
140
|
+
end
|
141
|
+
return construct_AND_array(result) if is_AND_array
|
142
|
+
|
143
|
+
result.join('')
|
144
|
+
end
|
145
|
+
|
146
|
+
def self.is_AND_array?(array)
|
147
|
+
array[0] == 'AND'
|
148
|
+
end
|
149
|
+
|
150
|
+
def self.construct_AND_array(array)
|
151
|
+
'#{andify["' + array.join('", "') + '"]}'
|
152
|
+
end
|
153
|
+
|
154
|
+
def self.contain_AND?(obj)
|
155
|
+
obj.is_a?(String) && obj.include?(' AND ')
|
156
|
+
end
|
157
|
+
|
158
|
+
def self.all_strings?(array)
|
159
|
+
array.all? do |elem|
|
160
|
+
elem.is_a?(String)
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
# Is this text not in awe? lols. Rather, is it not wrapped in regex `or` .
|
165
|
+
# ie. |pieceofcontent| = false
|
166
|
+
# ie. |pieceofcontent = true
|
167
|
+
# ie. pieceofcontent| = true
|
168
|
+
# ie. pieceofcontent = true
|
169
|
+
def self.not_in_or?(text)
|
170
|
+
text[0] != '|' && text[-1] != '|'
|
171
|
+
end
|
172
|
+
|
173
|
+
def self.wrap_in_brackets(text)
|
174
|
+
'(?:' + text + ')'
|
175
|
+
end
|
176
|
+
end
|
data/lib/boogex/error.rb
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
require "#{File.dirname(__FILE__)}/test_helper"
|
2
|
+
|
3
|
+
describe Boogex do
|
4
|
+
it 'turns OR into |' do
|
5
|
+
string = 'This OR That'
|
6
|
+
expecting = '(?:This|That)'
|
7
|
+
result = Boogex.convert(string)
|
8
|
+
assert_equal expecting, result
|
9
|
+
end
|
10
|
+
|
11
|
+
it 'turns AND into andify array string' do
|
12
|
+
string = 'This AND That'
|
13
|
+
expecting = '#{andify["(?:This)", "(?:That)"]}'
|
14
|
+
result = Boogex.convert(string)
|
15
|
+
assert_equal expecting, result
|
16
|
+
end
|
17
|
+
|
18
|
+
it 'understands bracketing' do
|
19
|
+
string = '(This OR That) AND My self'
|
20
|
+
expecting = '#{andify["(?:This|That)", "(?:My self)"]}'
|
21
|
+
result = Boogex.convert(string)
|
22
|
+
assert_equal expecting, result
|
23
|
+
|
24
|
+
string = 'This AND (That OR (My self)'
|
25
|
+
expecting = '#{andify["(?:This|That)", "(?:My self)"]}'
|
26
|
+
result = Boogex.convert(string)
|
27
|
+
assert_equal expecting, result
|
28
|
+
end
|
29
|
+
|
30
|
+
it 'correctly convert this Lucene boolean query string' do
|
31
|
+
string = '(((asd OR dd) AND that) AND this) OR What?'
|
32
|
+
expecting = '#{andify["#{andify["(?:asd|dd)", "(?:that)"]}", "(?:this)"]}|What?'
|
33
|
+
result = Boogex.convert(string)
|
34
|
+
assert_equal expecting, result
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,11 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<testsuite name="Boogex" skipped="0" failures="0" errors="0" tests="4" assertions="4" time="0.0007573158945888281">
|
3
|
+
<testcase name="test_0001_correctly convert this Lucene boolean query string" classname="Boogex" assertions="1" time="0.0001257510157302022">
|
4
|
+
</testcase>
|
5
|
+
<testcase name="test_0002_correctly convert this Lucene boolean query string" classname="Boogex" assertions="1" time="0.00011062994599342346">
|
6
|
+
</testcase>
|
7
|
+
<testcase name="test_0003_correctly convert this Lucene boolean query string" classname="Boogex" assertions="1" time="0.00020186102483421564">
|
8
|
+
</testcase>
|
9
|
+
<testcase name="test_0004_correctly convert this Lucene boolean query string" classname="Boogex" assertions="1" time="0.0003190739080309868">
|
10
|
+
</testcase>
|
11
|
+
</testsuite>
|
data/test/test_helper.rb
ADDED
@@ -0,0 +1,8 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
require 'minitest/autorun'
|
4
|
+
require 'minitest/reporters'
|
5
|
+
require 'minitest/spec'
|
6
|
+
|
7
|
+
MiniTest::Reporters.use! [Minitest::Reporters::SpecReporter.new, MiniTest::Reporters::JUnitReporter.new]
|
8
|
+
require File.join(File.dirname(__FILE__), '..', 'lib', 'boogex')
|
metadata
ADDED
@@ -0,0 +1,71 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: boogex
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Sam Crouch
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2016-02-14 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: Converts Lucene Boolean query language into ruby regex.
|
14
|
+
email: samuel.crouch@lexer.com.au
|
15
|
+
executables: []
|
16
|
+
extensions: []
|
17
|
+
extra_rdoc_files: []
|
18
|
+
files:
|
19
|
+
- Gemfile
|
20
|
+
- Gemfile.lock
|
21
|
+
- LICENSE
|
22
|
+
- README.md
|
23
|
+
- Rakefile
|
24
|
+
- boogex.gemspec
|
25
|
+
- lib/boogex.rb
|
26
|
+
- lib/boogex/convertor.rb
|
27
|
+
- lib/boogex/error.rb
|
28
|
+
- lib/boogex/helper.rb
|
29
|
+
- lib/boogex/version.rb
|
30
|
+
- test/convertor_test.rb
|
31
|
+
- test/reports/TEST-Boogex.xml
|
32
|
+
- test/test_helper.rb
|
33
|
+
homepage: https://github.com/lexerdev/boogex.gem
|
34
|
+
licenses:
|
35
|
+
- MIT
|
36
|
+
metadata: {}
|
37
|
+
post_install_message:
|
38
|
+
rdoc_options: []
|
39
|
+
require_paths:
|
40
|
+
- lib
|
41
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
42
|
+
requirements:
|
43
|
+
- - ">="
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
46
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
47
|
+
requirements:
|
48
|
+
- - ">="
|
49
|
+
- !ruby/object:Gem::Version
|
50
|
+
version: '0'
|
51
|
+
requirements: []
|
52
|
+
rubyforge_project:
|
53
|
+
rubygems_version: 2.2.2
|
54
|
+
signing_key:
|
55
|
+
specification_version: 4
|
56
|
+
summary: Boolean Lucene to Regex convertor.
|
57
|
+
test_files:
|
58
|
+
- Gemfile
|
59
|
+
- Gemfile.lock
|
60
|
+
- LICENSE
|
61
|
+
- README.md
|
62
|
+
- Rakefile
|
63
|
+
- boogex.gemspec
|
64
|
+
- lib/boogex.rb
|
65
|
+
- lib/boogex/convertor.rb
|
66
|
+
- lib/boogex/error.rb
|
67
|
+
- lib/boogex/helper.rb
|
68
|
+
- lib/boogex/version.rb
|
69
|
+
- test/convertor_test.rb
|
70
|
+
- test/reports/TEST-Boogex.xml
|
71
|
+
- test/test_helper.rb
|