boogex 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: f9474689b9e61cffbef211549d9e942254c1d0e8
4
+ data.tar.gz: f56c73ade74210ca6aa2a82e69936e7bedeb678a
5
+ SHA512:
6
+ metadata.gz: 19d9416eb7d615dee24a748f0405f81eb188264e168576ff66cba4e0086d790149605a3f3b09b1696e33cba81ec39c8ecf7c71587e4e838db68a3831d19b6c6f
7
+ data.tar.gz: 41d95f425232c4ee624570d6121606cb65ef134919302f18aa458d278fec408963afdd80744b13fce032aabf72e0ebca5e8d62fa137e5c888b2573208aab8e9a
data/Gemfile ADDED
@@ -0,0 +1,14 @@
1
+ # encoding: utf-8
2
+
3
+ source 'https://rubygems.org'
4
+
5
+ group :development, :test do
6
+ gem 'rake'
7
+ gem 'minitest'
8
+ gem 'minitest-reporters'
9
+ gem 'webmock'
10
+ gem 'faker'
11
+ gem 'factory_girl'
12
+ end
13
+
14
+ gemspec
@@ -0,0 +1,52 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ boogex (0.0.1)
5
+
6
+ GEM
7
+ remote: https://rubygems.org/
8
+ specs:
9
+ activesupport (4.2.3)
10
+ i18n (~> 0.7)
11
+ json (~> 1.7, >= 1.7.7)
12
+ minitest (~> 5.1)
13
+ thread_safe (~> 0.3, >= 0.3.4)
14
+ tzinfo (~> 1.1)
15
+ addressable (2.3.8)
16
+ ansi (1.5.0)
17
+ builder (3.2.2)
18
+ crack (0.4.2)
19
+ safe_yaml (~> 1.0.0)
20
+ factory_girl (4.5.0)
21
+ activesupport (>= 3.0.0)
22
+ faker (1.4.3)
23
+ i18n (~> 0.5)
24
+ i18n (0.7.0)
25
+ json (1.8.3)
26
+ minitest (5.7.0)
27
+ minitest-reporters (1.0.7)
28
+ ansi
29
+ builder
30
+ minitest (>= 5.0)
31
+ ruby-progressbar
32
+ rake (10.4.2)
33
+ ruby-progressbar (1.7.5)
34
+ safe_yaml (1.0.4)
35
+ thread_safe (0.3.5)
36
+ tzinfo (1.2.2)
37
+ thread_safe (~> 0.1)
38
+ webmock (1.21.0)
39
+ addressable (>= 2.3.6)
40
+ crack (>= 0.3.2)
41
+
42
+ PLATFORMS
43
+ ruby
44
+
45
+ DEPENDENCIES
46
+ boogex!
47
+ factory_girl
48
+ faker
49
+ minitest
50
+ minitest-reporters
51
+ rake
52
+ webmock
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2015 Lexer
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,13 @@
1
+ # boogex.gem
2
+
3
+ This is a Gem that allows conversion of Lucene.
4
+
5
+ # Tests
6
+
7
+ Run tests:
8
+ bundle exec rake test
9
+
10
+ # IRB
11
+
12
+ If you need to have a laugh and play around with gem:
13
+ irb -rubygems -I lib -r $PWD/lib/boogex.rb
@@ -0,0 +1,11 @@
1
+ # encoding: utf-8
2
+ require 'bundler'
3
+ require 'rake'
4
+ require 'rake/testtask'
5
+
6
+ Rake::TestTask.new do |t|
7
+ t.test_files = FileList['test/**/*_test.rb']
8
+ t.libs.push 'spec'
9
+ end
10
+
11
+ task default: [:test]
@@ -0,0 +1,22 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = 'boogex'
3
+ s.version = '0.0.1'
4
+ s.date = '2016-02-14'
5
+ s.homepage = 'https://github.com/lexerdev/boogex.gem'
6
+ s.license = 'MIT'
7
+ s.summary = "Boolean Lucene to Regex convertor."
8
+ s.description = "Converts Lucene Boolean query language into ruby regex."
9
+ s.authors = ["Sam Crouch"]
10
+ s.email = 'samuel.crouch@lexer.com.au'
11
+ s.files = `git ls-files`.split($RS).reject do |file|
12
+ file =~ %r{^(?:
13
+ spec/.*
14
+ |Gemfile
15
+ |Rakefile
16
+ |\.gitignore
17
+ |\.rubocop.yml
18
+ )$}x
19
+ end
20
+ s.test_files = `git ls-files`.split($RS)
21
+ s.require_paths = ['lib']
22
+ end
@@ -0,0 +1,8 @@
1
+ # encoding: utf-8
2
+ require 'boogex/error'
3
+ require 'boogex/version'
4
+ require 'boogex/helper'
5
+ require 'boogex/convertor'
6
+
7
+ module Boogex
8
+ end
@@ -0,0 +1,176 @@
1
+ # encoding: utf-8
2
+
3
+ module Boogex
4
+ def self.convert(text)
5
+ puts "Converting \"#{text}\" into regex"
6
+ array = array_struct(text)
7
+ array = ors_to_pipes(array)
8
+ array = regex_formatting(array)
9
+ regex_array_to_string(array)
10
+ end
11
+
12
+ private
13
+
14
+ # This function converts a string into an array where brackets in the string are converted to an array structure
15
+ # to allow further manipulation
16
+ # "a OR (b) OR c" => ["a OR ", ["b"], " OR c"]
17
+ # "a OR (b AND (c OR d)) OR e" => ["a OR ", ["b AND ", ["c OR d"]], " OR e"]
18
+ def self.array_struct(text)
19
+ inside_brackets = "[^\(\)]*"
20
+
21
+ #This regex looks for anything in brackets OR anything with brackets in brackets OR anything with brackets in brackets in brackets
22
+ regex = /(\(#{inside_brackets}\))|(\(#{inside_brackets}\(#{inside_brackets}\)#{inside_brackets}\))|(\(#{inside_brackets}\(#{inside_brackets}\(#{inside_brackets}\)#{inside_brackets}\)#{inside_brackets}\))/
23
+
24
+ cuts = text.scan(regex).to_a.flatten.reject(&:nil?)
25
+
26
+ # If nothing found then return orignal text
27
+ return text if cuts.empty?
28
+
29
+ # The text is now cut into an array where the bracketing of the string determines the elements
30
+ # ie. "a OR (b) OR c" => ["a OR ", ["b"], " OR c"]
31
+ text_array = cuts.inject([text]) do |a, cut|
32
+ a.each_with_object([]) do |str, result|
33
+ if !str.include?(cut)
34
+ result << str
35
+ else
36
+ splits = str.split(cut)
37
+
38
+ result << splits.first
39
+ cut_without_brackets = cut[1..-2]
40
+ result << [cut_without_brackets]
41
+ result << splits.last
42
+ end
43
+ end
44
+ end.uniq
45
+
46
+ # This recursively converts any brackets in the text back into the array_struct function
47
+ # where the upper limit of recursion is 3 levels of bracketing. This is limitied by the regex
48
+ # on line 9 but can easily be extended.
49
+ # If the element of the array is a string then no recursion to apply.
50
+ # If the element of the array is an array then iterate THAT through the array_struct function
51
+ text_array.reject(&:empty?).each_with_object([]) do |str, result|
52
+ if str.is_a?(String)
53
+ result << str
54
+ next
55
+ end
56
+
57
+ result << str.collect do |str|
58
+ array_struct(str)
59
+ end
60
+ end
61
+ end
62
+
63
+ # This function converts the Lucene Boolean `OR` into regex `|` and removes any quotation marks
64
+ def self.ors_to_pipes(obj)
65
+ return obj.gsub(' OR ', '|').gsub('"', '').gsub("'", '') if obj.is_a?(String)
66
+
67
+ # This recursively applies this function to ensure all levels of the array are converted
68
+ obj.collect do |text|
69
+ ors_to_pipes(text)
70
+ end
71
+ end
72
+
73
+ # This function begins to tranform the elements of the array structure to regex formatting
74
+ # including:
75
+ # - (a) Any elements that are not bookended by | are then wrapped in (?:) as this modularises
76
+ # the regex of the elements of the structures
77
+ #
78
+ # - (b) Converting any Lucene Boolean `AND` into an AND array structure where the first element is "AND" and the
79
+ # remaining elements of that array are the regexes that make up the `AND`
80
+ # ie. ["pete AND james"] => ["AND", "pete", "james"]
81
+ # ie. ["jenny AND", ["billy OR jimmy"]] => ["AND", "jenny", ["billy OR jimmy"]]
82
+ def self.regex_formatting(obj)
83
+ # (a)
84
+ # if string then wrap it in brackets if needed and then return
85
+ if obj.is_a?(String)
86
+ if contain_AND?(obj)
87
+ result = ['AND']
88
+ result = result + obj.split(' AND ').reject(&:empty?).collect do |str|
89
+ regex_formatting(str)
90
+ end
91
+ return result
92
+ end
93
+ needs_brackets = not_in_or?(obj)
94
+ obj = wrap_in_brackets(obj) if needs_brackets
95
+ return obj
96
+ end
97
+
98
+ # if an all string array, then check if any of the elements of the array need bracket wrapped and return
99
+ if all_strings?(obj)
100
+ needs_brackets = obj.any? do |text|
101
+ not_in_or?(text)
102
+ end
103
+ obj = obj.join('')
104
+ obj = wrap_in_brackets(obj) if needs_brackets
105
+ return obj
106
+ end
107
+
108
+ # (b)
109
+ result = []
110
+
111
+ # If this level of bracket contains a string with `AND` in it, then consider this element an `AND` array
112
+ result << 'AND' if obj.any? do |elem|
113
+ contain_AND?(elem)
114
+ end
115
+
116
+ obj.each_with_object(result) do |text, result|
117
+ if contain_AND?(text)
118
+ text.split(' AND ').reject(&:empty?).each do |str|
119
+ result << regex_formatting(str)
120
+ end
121
+ else
122
+ result << regex_formatting(text)
123
+ end
124
+ end
125
+ end
126
+
127
+ # This function converts the entire array with regex formatting into a regex string
128
+ # The AND array is an adhoc format generated by Lexer as regex doesn't have a Lucene
129
+ # Boolean `AND` equivalent. This adhoc regex AND is generated in the Lexer stack in the task generator
130
+ def self.regex_array_to_string(obj)
131
+ return obj if obj.is_a?(String)
132
+
133
+ is_AND_array = is_AND_array?(obj)
134
+
135
+ # This removes the "AND" from the AND array
136
+ obj.shift if is_AND_array
137
+
138
+ result = obj.collect do |text|
139
+ regex_array_to_string(text)
140
+ end
141
+ return construct_AND_array(result) if is_AND_array
142
+
143
+ result.join('')
144
+ end
145
+
146
+ def self.is_AND_array?(array)
147
+ array[0] == 'AND'
148
+ end
149
+
150
+ def self.construct_AND_array(array)
151
+ '#{andify["' + array.join('", "') + '"]}'
152
+ end
153
+
154
+ def self.contain_AND?(obj)
155
+ obj.is_a?(String) && obj.include?(' AND ')
156
+ end
157
+
158
+ def self.all_strings?(array)
159
+ array.all? do |elem|
160
+ elem.is_a?(String)
161
+ end
162
+ end
163
+
164
+ # Is this text not in awe? lols. Rather, is it not wrapped in regex `or` .
165
+ # ie. |pieceofcontent| = false
166
+ # ie. |pieceofcontent = true
167
+ # ie. pieceofcontent| = true
168
+ # ie. pieceofcontent = true
169
+ def self.not_in_or?(text)
170
+ text[0] != '|' && text[-1] != '|'
171
+ end
172
+
173
+ def self.wrap_in_brackets(text)
174
+ '(?:' + text + ')'
175
+ end
176
+ end
@@ -0,0 +1,4 @@
1
+ # encoding: utf-8
2
+ module Boogex
3
+ Error = Class.new(StandardError)
4
+ end
@@ -0,0 +1,5 @@
1
+ # encoding: utf-8
2
+ $LOAD_PATH.unshift("#{File.dirname(__FILE__)}")
3
+
4
+ module Boogex::Helper
5
+ end
@@ -0,0 +1,4 @@
1
+ # encoding: utf-8
2
+ module Boogex
3
+ VERSION = '0.0.1'
4
+ end
@@ -0,0 +1,36 @@
1
+ require "#{File.dirname(__FILE__)}/test_helper"
2
+
3
+ describe Boogex do
4
+ it 'turns OR into |' do
5
+ string = 'This OR That'
6
+ expecting = '(?:This|That)'
7
+ result = Boogex.convert(string)
8
+ assert_equal expecting, result
9
+ end
10
+
11
+ it 'turns AND into andify array string' do
12
+ string = 'This AND That'
13
+ expecting = '#{andify["(?:This)", "(?:That)"]}'
14
+ result = Boogex.convert(string)
15
+ assert_equal expecting, result
16
+ end
17
+
18
+ it 'understands bracketing' do
19
+ string = '(This OR That) AND My self'
20
+ expecting = '#{andify["(?:This|That)", "(?:My self)"]}'
21
+ result = Boogex.convert(string)
22
+ assert_equal expecting, result
23
+
24
+ string = 'This AND (That OR (My self)'
25
+ expecting = '#{andify["(?:This|That)", "(?:My self)"]}'
26
+ result = Boogex.convert(string)
27
+ assert_equal expecting, result
28
+ end
29
+
30
+ it 'correctly convert this Lucene boolean query string' do
31
+ string = '(((asd OR dd) AND that) AND this) OR What?'
32
+ expecting = '#{andify["#{andify["(?:asd|dd)", "(?:that)"]}", "(?:this)"]}|What?'
33
+ result = Boogex.convert(string)
34
+ assert_equal expecting, result
35
+ end
36
+ end
@@ -0,0 +1,11 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <testsuite name="Boogex" skipped="0" failures="0" errors="0" tests="4" assertions="4" time="0.0007573158945888281">
3
+ <testcase name="test_0001_correctly convert this Lucene boolean query string" classname="Boogex" assertions="1" time="0.0001257510157302022">
4
+ </testcase>
5
+ <testcase name="test_0002_correctly convert this Lucene boolean query string" classname="Boogex" assertions="1" time="0.00011062994599342346">
6
+ </testcase>
7
+ <testcase name="test_0003_correctly convert this Lucene boolean query string" classname="Boogex" assertions="1" time="0.00020186102483421564">
8
+ </testcase>
9
+ <testcase name="test_0004_correctly convert this Lucene boolean query string" classname="Boogex" assertions="1" time="0.0003190739080309868">
10
+ </testcase>
11
+ </testsuite>
@@ -0,0 +1,8 @@
1
+ # encoding: UTF-8
2
+
3
+ require 'minitest/autorun'
4
+ require 'minitest/reporters'
5
+ require 'minitest/spec'
6
+
7
+ MiniTest::Reporters.use! [Minitest::Reporters::SpecReporter.new, MiniTest::Reporters::JUnitReporter.new]
8
+ require File.join(File.dirname(__FILE__), '..', 'lib', 'boogex')
metadata ADDED
@@ -0,0 +1,71 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: boogex
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Sam Crouch
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2016-02-14 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Converts Lucene Boolean query language into ruby regex.
14
+ email: samuel.crouch@lexer.com.au
15
+ executables: []
16
+ extensions: []
17
+ extra_rdoc_files: []
18
+ files:
19
+ - Gemfile
20
+ - Gemfile.lock
21
+ - LICENSE
22
+ - README.md
23
+ - Rakefile
24
+ - boogex.gemspec
25
+ - lib/boogex.rb
26
+ - lib/boogex/convertor.rb
27
+ - lib/boogex/error.rb
28
+ - lib/boogex/helper.rb
29
+ - lib/boogex/version.rb
30
+ - test/convertor_test.rb
31
+ - test/reports/TEST-Boogex.xml
32
+ - test/test_helper.rb
33
+ homepage: https://github.com/lexerdev/boogex.gem
34
+ licenses:
35
+ - MIT
36
+ metadata: {}
37
+ post_install_message:
38
+ rdoc_options: []
39
+ require_paths:
40
+ - lib
41
+ required_ruby_version: !ruby/object:Gem::Requirement
42
+ requirements:
43
+ - - ">="
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ required_rubygems_version: !ruby/object:Gem::Requirement
47
+ requirements:
48
+ - - ">="
49
+ - !ruby/object:Gem::Version
50
+ version: '0'
51
+ requirements: []
52
+ rubyforge_project:
53
+ rubygems_version: 2.2.2
54
+ signing_key:
55
+ specification_version: 4
56
+ summary: Boolean Lucene to Regex convertor.
57
+ test_files:
58
+ - Gemfile
59
+ - Gemfile.lock
60
+ - LICENSE
61
+ - README.md
62
+ - Rakefile
63
+ - boogex.gemspec
64
+ - lib/boogex.rb
65
+ - lib/boogex/convertor.rb
66
+ - lib/boogex/error.rb
67
+ - lib/boogex/helper.rb
68
+ - lib/boogex/version.rb
69
+ - test/convertor_test.rb
70
+ - test/reports/TEST-Boogex.xml
71
+ - test/test_helper.rb