boogex 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: f9474689b9e61cffbef211549d9e942254c1d0e8
4
+ data.tar.gz: f56c73ade74210ca6aa2a82e69936e7bedeb678a
5
+ SHA512:
6
+ metadata.gz: 19d9416eb7d615dee24a748f0405f81eb188264e168576ff66cba4e0086d790149605a3f3b09b1696e33cba81ec39c8ecf7c71587e4e838db68a3831d19b6c6f
7
+ data.tar.gz: 41d95f425232c4ee624570d6121606cb65ef134919302f18aa458d278fec408963afdd80744b13fce032aabf72e0ebca5e8d62fa137e5c888b2573208aab8e9a
data/Gemfile ADDED
@@ -0,0 +1,14 @@
1
+ # encoding: utf-8
2
+
3
+ source 'https://rubygems.org'
4
+
5
+ group :development, :test do
6
+ gem 'rake'
7
+ gem 'minitest'
8
+ gem 'minitest-reporters'
9
+ gem 'webmock'
10
+ gem 'faker'
11
+ gem 'factory_girl'
12
+ end
13
+
14
+ gemspec
@@ -0,0 +1,52 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ boogex (0.0.1)
5
+
6
+ GEM
7
+ remote: https://rubygems.org/
8
+ specs:
9
+ activesupport (4.2.3)
10
+ i18n (~> 0.7)
11
+ json (~> 1.7, >= 1.7.7)
12
+ minitest (~> 5.1)
13
+ thread_safe (~> 0.3, >= 0.3.4)
14
+ tzinfo (~> 1.1)
15
+ addressable (2.3.8)
16
+ ansi (1.5.0)
17
+ builder (3.2.2)
18
+ crack (0.4.2)
19
+ safe_yaml (~> 1.0.0)
20
+ factory_girl (4.5.0)
21
+ activesupport (>= 3.0.0)
22
+ faker (1.4.3)
23
+ i18n (~> 0.5)
24
+ i18n (0.7.0)
25
+ json (1.8.3)
26
+ minitest (5.7.0)
27
+ minitest-reporters (1.0.7)
28
+ ansi
29
+ builder
30
+ minitest (>= 5.0)
31
+ ruby-progressbar
32
+ rake (10.4.2)
33
+ ruby-progressbar (1.7.5)
34
+ safe_yaml (1.0.4)
35
+ thread_safe (0.3.5)
36
+ tzinfo (1.2.2)
37
+ thread_safe (~> 0.1)
38
+ webmock (1.21.0)
39
+ addressable (>= 2.3.6)
40
+ crack (>= 0.3.2)
41
+
42
+ PLATFORMS
43
+ ruby
44
+
45
+ DEPENDENCIES
46
+ boogex!
47
+ factory_girl
48
+ faker
49
+ minitest
50
+ minitest-reporters
51
+ rake
52
+ webmock
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2015 Lexer
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,13 @@
1
+ # boogex.gem
2
+
3
+ This is a Gem that allows conversion of Lucene.
4
+
5
+ # Tests
6
+
7
+ Run tests:
8
+ bundle exec rake test
9
+
10
+ # IRB
11
+
12
+ If you need to have a laugh and play around with gem:
13
+ irb -rubygems -I lib -r $PWD/lib/boogex.rb
@@ -0,0 +1,11 @@
1
+ # encoding: utf-8
2
+ require 'bundler'
3
+ require 'rake'
4
+ require 'rake/testtask'
5
+
6
+ Rake::TestTask.new do |t|
7
+ t.test_files = FileList['test/**/*_test.rb']
8
+ t.libs.push 'spec'
9
+ end
10
+
11
+ task default: [:test]
@@ -0,0 +1,22 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = 'boogex'
3
+ s.version = '0.0.1'
4
+ s.date = '2016-02-14'
5
+ s.homepage = 'https://github.com/lexerdev/boogex.gem'
6
+ s.license = 'MIT'
7
+ s.summary = "Boolean Lucene to Regex convertor."
8
+ s.description = "Converts Lucene Boolean query language into ruby regex."
9
+ s.authors = ["Sam Crouch"]
10
+ s.email = 'samuel.crouch@lexer.com.au'
11
+ s.files = `git ls-files`.split($RS).reject do |file|
12
+ file =~ %r{^(?:
13
+ spec/.*
14
+ |Gemfile
15
+ |Rakefile
16
+ |\.gitignore
17
+ |\.rubocop.yml
18
+ )$}x
19
+ end
20
+ s.test_files = `git ls-files`.split($RS)
21
+ s.require_paths = ['lib']
22
+ end
@@ -0,0 +1,8 @@
1
+ # encoding: utf-8
2
+ require 'boogex/error'
3
+ require 'boogex/version'
4
+ require 'boogex/helper'
5
+ require 'boogex/convertor'
6
+
7
+ module Boogex
8
+ end
@@ -0,0 +1,176 @@
1
+ # encoding: utf-8
2
+
3
+ module Boogex
4
+ def self.convert(text)
5
+ puts "Converting \"#{text}\" into regex"
6
+ array = array_struct(text)
7
+ array = ors_to_pipes(array)
8
+ array = regex_formatting(array)
9
+ regex_array_to_string(array)
10
+ end
11
+
12
+ private
13
+
14
+ # This function converts a string into an array where brackets in the string are converted to an array structure
15
+ # to allow further manipulation
16
+ # "a OR (b) OR c" => ["a OR ", ["b"], " OR c"]
17
+ # "a OR (b AND (c OR d)) OR e" => ["a OR ", ["b AND ", ["c OR d"]], " OR e"]
18
+ def self.array_struct(text)
19
+ inside_brackets = "[^\(\)]*"
20
+
21
+ #This regex looks for anything in brackets OR anything with brackets in brackets OR anything with brackets in brackets in brackets
22
+ regex = /(\(#{inside_brackets}\))|(\(#{inside_brackets}\(#{inside_brackets}\)#{inside_brackets}\))|(\(#{inside_brackets}\(#{inside_brackets}\(#{inside_brackets}\)#{inside_brackets}\)#{inside_brackets}\))/
23
+
24
+ cuts = text.scan(regex).to_a.flatten.reject(&:nil?)
25
+
26
+ # If nothing found then return orignal text
27
+ return text if cuts.empty?
28
+
29
+ # The text is now cut into an array where the bracketing of the string determines the elements
30
+ # ie. "a OR (b) OR c" => ["a OR ", ["b"], " OR c"]
31
+ text_array = cuts.inject([text]) do |a, cut|
32
+ a.each_with_object([]) do |str, result|
33
+ if !str.include?(cut)
34
+ result << str
35
+ else
36
+ splits = str.split(cut)
37
+
38
+ result << splits.first
39
+ cut_without_brackets = cut[1..-2]
40
+ result << [cut_without_brackets]
41
+ result << splits.last
42
+ end
43
+ end
44
+ end.uniq
45
+
46
+ # This recursively converts any brackets in the text back into the array_struct function
47
+ # where the upper limit of recursion is 3 levels of bracketing. This is limitied by the regex
48
+ # on line 9 but can easily be extended.
49
+ # If the element of the array is a string then no recursion to apply.
50
+ # If the element of the array is an array then iterate THAT through the array_struct function
51
+ text_array.reject(&:empty?).each_with_object([]) do |str, result|
52
+ if str.is_a?(String)
53
+ result << str
54
+ next
55
+ end
56
+
57
+ result << str.collect do |str|
58
+ array_struct(str)
59
+ end
60
+ end
61
+ end
62
+
63
+ # This function converts the Lucene Boolean `OR` into regex `|` and removes any quotation marks
64
+ def self.ors_to_pipes(obj)
65
+ return obj.gsub(' OR ', '|').gsub('"', '').gsub("'", '') if obj.is_a?(String)
66
+
67
+ # This recursively applies this function to ensure all levels of the array are converted
68
+ obj.collect do |text|
69
+ ors_to_pipes(text)
70
+ end
71
+ end
72
+
73
+ # This function begins to tranform the elements of the array structure to regex formatting
74
+ # including:
75
+ # - (a) Any elements that are not bookended by | are then wrapped in (?:) as this modularises
76
+ # the regex of the elements of the structures
77
+ #
78
+ # - (b) Converting any Lucene Boolean `AND` into an AND array structure where the first element is "AND" and the
79
+ # remaining elements of that array are the regexes that make up the `AND`
80
+ # ie. ["pete AND james"] => ["AND", "pete", "james"]
81
+ # ie. ["jenny AND", ["billy OR jimmy"]] => ["AND", "jenny", ["billy OR jimmy"]]
82
+ def self.regex_formatting(obj)
83
+ # (a)
84
+ # if string then wrap it in brackets if needed and then return
85
+ if obj.is_a?(String)
86
+ if contain_AND?(obj)
87
+ result = ['AND']
88
+ result = result + obj.split(' AND ').reject(&:empty?).collect do |str|
89
+ regex_formatting(str)
90
+ end
91
+ return result
92
+ end
93
+ needs_brackets = not_in_or?(obj)
94
+ obj = wrap_in_brackets(obj) if needs_brackets
95
+ return obj
96
+ end
97
+
98
+ # if an all string array, then check if any of the elements of the array need bracket wrapped and return
99
+ if all_strings?(obj)
100
+ needs_brackets = obj.any? do |text|
101
+ not_in_or?(text)
102
+ end
103
+ obj = obj.join('')
104
+ obj = wrap_in_brackets(obj) if needs_brackets
105
+ return obj
106
+ end
107
+
108
+ # (b)
109
+ result = []
110
+
111
+ # If this level of bracket contains a string with `AND` in it, then consider this element an `AND` array
112
+ result << 'AND' if obj.any? do |elem|
113
+ contain_AND?(elem)
114
+ end
115
+
116
+ obj.each_with_object(result) do |text, result|
117
+ if contain_AND?(text)
118
+ text.split(' AND ').reject(&:empty?).each do |str|
119
+ result << regex_formatting(str)
120
+ end
121
+ else
122
+ result << regex_formatting(text)
123
+ end
124
+ end
125
+ end
126
+
127
+ # This function converts the entire array with regex formatting into a regex string
128
+ # The AND array is an adhoc format generated by Lexer as regex doesn't have a Lucene
129
+ # Boolean `AND` equivalent. This adhoc regex AND is generated in the Lexer stack in the task generator
130
+ def self.regex_array_to_string(obj)
131
+ return obj if obj.is_a?(String)
132
+
133
+ is_AND_array = is_AND_array?(obj)
134
+
135
+ # This removes the "AND" from the AND array
136
+ obj.shift if is_AND_array
137
+
138
+ result = obj.collect do |text|
139
+ regex_array_to_string(text)
140
+ end
141
+ return construct_AND_array(result) if is_AND_array
142
+
143
+ result.join('')
144
+ end
145
+
146
+ def self.is_AND_array?(array)
147
+ array[0] == 'AND'
148
+ end
149
+
150
+ def self.construct_AND_array(array)
151
+ '#{andify["' + array.join('", "') + '"]}'
152
+ end
153
+
154
+ def self.contain_AND?(obj)
155
+ obj.is_a?(String) && obj.include?(' AND ')
156
+ end
157
+
158
+ def self.all_strings?(array)
159
+ array.all? do |elem|
160
+ elem.is_a?(String)
161
+ end
162
+ end
163
+
164
+ # Is this text not in awe? lols. Rather, is it not wrapped in regex `or` .
165
+ # ie. |pieceofcontent| = false
166
+ # ie. |pieceofcontent = true
167
+ # ie. pieceofcontent| = true
168
+ # ie. pieceofcontent = true
169
+ def self.not_in_or?(text)
170
+ text[0] != '|' && text[-1] != '|'
171
+ end
172
+
173
+ def self.wrap_in_brackets(text)
174
+ '(?:' + text + ')'
175
+ end
176
+ end
@@ -0,0 +1,4 @@
1
+ # encoding: utf-8
2
+ module Boogex
3
+ Error = Class.new(StandardError)
4
+ end
@@ -0,0 +1,5 @@
1
+ # encoding: utf-8
2
+ $LOAD_PATH.unshift("#{File.dirname(__FILE__)}")
3
+
4
+ module Boogex::Helper
5
+ end
@@ -0,0 +1,4 @@
1
+ # encoding: utf-8
2
+ module Boogex
3
+ VERSION = '0.0.1'
4
+ end
@@ -0,0 +1,36 @@
1
+ require "#{File.dirname(__FILE__)}/test_helper"
2
+
3
+ describe Boogex do
4
+ it 'turns OR into |' do
5
+ string = 'This OR That'
6
+ expecting = '(?:This|That)'
7
+ result = Boogex.convert(string)
8
+ assert_equal expecting, result
9
+ end
10
+
11
+ it 'turns AND into andify array string' do
12
+ string = 'This AND That'
13
+ expecting = '#{andify["(?:This)", "(?:That)"]}'
14
+ result = Boogex.convert(string)
15
+ assert_equal expecting, result
16
+ end
17
+
18
+ it 'understands bracketing' do
19
+ string = '(This OR That) AND My self'
20
+ expecting = '#{andify["(?:This|That)", "(?:My self)"]}'
21
+ result = Boogex.convert(string)
22
+ assert_equal expecting, result
23
+
24
+ string = 'This AND (That OR (My self)'
25
+ expecting = '#{andify["(?:This|That)", "(?:My self)"]}'
26
+ result = Boogex.convert(string)
27
+ assert_equal expecting, result
28
+ end
29
+
30
+ it 'correctly convert this Lucene boolean query string' do
31
+ string = '(((asd OR dd) AND that) AND this) OR What?'
32
+ expecting = '#{andify["#{andify["(?:asd|dd)", "(?:that)"]}", "(?:this)"]}|What?'
33
+ result = Boogex.convert(string)
34
+ assert_equal expecting, result
35
+ end
36
+ end
@@ -0,0 +1,11 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <testsuite name="Boogex" skipped="0" failures="0" errors="0" tests="4" assertions="4" time="0.0007573158945888281">
3
+ <testcase name="test_0001_correctly convert this Lucene boolean query string" classname="Boogex" assertions="1" time="0.0001257510157302022">
4
+ </testcase>
5
+ <testcase name="test_0002_correctly convert this Lucene boolean query string" classname="Boogex" assertions="1" time="0.00011062994599342346">
6
+ </testcase>
7
+ <testcase name="test_0003_correctly convert this Lucene boolean query string" classname="Boogex" assertions="1" time="0.00020186102483421564">
8
+ </testcase>
9
+ <testcase name="test_0004_correctly convert this Lucene boolean query string" classname="Boogex" assertions="1" time="0.0003190739080309868">
10
+ </testcase>
11
+ </testsuite>
@@ -0,0 +1,8 @@
1
+ # encoding: UTF-8
2
+
3
+ require 'minitest/autorun'
4
+ require 'minitest/reporters'
5
+ require 'minitest/spec'
6
+
7
+ MiniTest::Reporters.use! [Minitest::Reporters::SpecReporter.new, MiniTest::Reporters::JUnitReporter.new]
8
+ require File.join(File.dirname(__FILE__), '..', 'lib', 'boogex')
metadata ADDED
@@ -0,0 +1,71 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: boogex
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Sam Crouch
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2016-02-14 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Converts Lucene Boolean query language into ruby regex.
14
+ email: samuel.crouch@lexer.com.au
15
+ executables: []
16
+ extensions: []
17
+ extra_rdoc_files: []
18
+ files:
19
+ - Gemfile
20
+ - Gemfile.lock
21
+ - LICENSE
22
+ - README.md
23
+ - Rakefile
24
+ - boogex.gemspec
25
+ - lib/boogex.rb
26
+ - lib/boogex/convertor.rb
27
+ - lib/boogex/error.rb
28
+ - lib/boogex/helper.rb
29
+ - lib/boogex/version.rb
30
+ - test/convertor_test.rb
31
+ - test/reports/TEST-Boogex.xml
32
+ - test/test_helper.rb
33
+ homepage: https://github.com/lexerdev/boogex.gem
34
+ licenses:
35
+ - MIT
36
+ metadata: {}
37
+ post_install_message:
38
+ rdoc_options: []
39
+ require_paths:
40
+ - lib
41
+ required_ruby_version: !ruby/object:Gem::Requirement
42
+ requirements:
43
+ - - ">="
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ required_rubygems_version: !ruby/object:Gem::Requirement
47
+ requirements:
48
+ - - ">="
49
+ - !ruby/object:Gem::Version
50
+ version: '0'
51
+ requirements: []
52
+ rubyforge_project:
53
+ rubygems_version: 2.2.2
54
+ signing_key:
55
+ specification_version: 4
56
+ summary: Boolean Lucene to Regex convertor.
57
+ test_files:
58
+ - Gemfile
59
+ - Gemfile.lock
60
+ - LICENSE
61
+ - README.md
62
+ - Rakefile
63
+ - boogex.gemspec
64
+ - lib/boogex.rb
65
+ - lib/boogex/convertor.rb
66
+ - lib/boogex/error.rb
67
+ - lib/boogex/helper.rb
68
+ - lib/boogex/version.rb
69
+ - test/convertor_test.rb
70
+ - test/reports/TEST-Boogex.xml
71
+ - test/test_helper.rb