minidusen 0.8.0 → 0.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/minidusen/parser.rb +50 -21
- data/lib/minidusen/query.rb +4 -0
- data/lib/minidusen/syntax.rb +1 -1
- data/lib/minidusen/token.rb +1 -1
- data/lib/minidusen/version.rb +1 -1
- data/spec/minidusen/parser_spec.rb +47 -9
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 53ae4e4778bb465b76e102597bcde71ea9a0db5a
|
4
|
+
data.tar.gz: f2edf0821b4675e6be42805345e1cff779a42e43
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: dd3747985637690f5ee1f6504518c7e4a79a32c31ed9d5e99c54e0354c38b6e672b517ac518a0042308dbee49b08740c0b2517fecc32142ca61b8f6bf457c734
|
7
|
+
data.tar.gz: 72a9c8759ba4379793959ebd83dcd694076a146e81a4d0d95ff7635e3f0a6cff789381040f68b3b20267ea0dd7457ca709b0f334ebcf5c4c6173de6594dba42a
|
data/lib/minidusen/parser.rb
CHANGED
@@ -1,34 +1,63 @@
|
|
1
1
|
module Minidusen
|
2
2
|
class Parser
|
3
3
|
|
4
|
+
class CannotParse < StandardError; end
|
5
|
+
|
4
6
|
TEXT_QUERY = /(?:(\-)?"([^"]+)"|(\-)?([\S]+))/
|
5
7
|
FIELD_QUERY = /(\-)?(\w+)\:#{TEXT_QUERY}/
|
6
8
|
|
7
|
-
|
8
|
-
query_string = query_string.dup # we are going to delete substrings in-place
|
9
|
-
query = Query.new
|
10
|
-
extract_field_query_tokens(query_string, query)
|
11
|
-
extract_text_query_tokens(query_string, query)
|
12
|
-
query
|
13
|
-
end
|
9
|
+
class << self
|
14
10
|
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
11
|
+
def parse(object)
|
12
|
+
case object
|
13
|
+
when Query
|
14
|
+
object
|
15
|
+
when String
|
16
|
+
parse_string(object)
|
17
|
+
when Array
|
18
|
+
parse_array(object)
|
19
|
+
else
|
20
|
+
raise CannotParse, "Cannot parse #{object.inspect}"
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
def parse_string(string)
|
27
|
+
string = string.dup # we are going to delete substrings in-place
|
28
|
+
query = Query.new
|
29
|
+
extract_field_query_tokens(string, query)
|
30
|
+
extract_text_query_tokens(string, query)
|
31
|
+
query
|
21
32
|
end
|
22
|
-
end
|
23
33
|
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
query << Token.new(options)
|
34
|
+
def parse_array(array)
|
35
|
+
tokens = array.map { |string|
|
36
|
+
string.is_a?(String) or raise CannotParse, "Cannot parse an array of #{string.class}"
|
37
|
+
Token.new(:field => 'text', :value => string)
|
38
|
+
}
|
39
|
+
Query.new(tokens)
|
31
40
|
end
|
41
|
+
|
42
|
+
def extract_text_query_tokens(query_string, query)
|
43
|
+
while query_string.sub!(TEXT_QUERY, '')
|
44
|
+
value = "#{$2}#{$4}"
|
45
|
+
exclude = "#{$1}#{$3}" == "-"
|
46
|
+
options = { :field => 'text', :value => value, :exclude => exclude }
|
47
|
+
query << Token.new(options)
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def extract_field_query_tokens(query_string, query)
|
52
|
+
while query_string.sub!(FIELD_QUERY, '')
|
53
|
+
field = $2
|
54
|
+
value = "#{$4}#{$6}"
|
55
|
+
exclude = "#{$1}" == "-"
|
56
|
+
options = { :field => field, :value => value, :exclude => exclude }
|
57
|
+
query << Token.new(options)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
32
61
|
end
|
33
62
|
|
34
63
|
end
|
data/lib/minidusen/query.rb
CHANGED
data/lib/minidusen/syntax.rb
CHANGED
data/lib/minidusen/token.rb
CHANGED
data/lib/minidusen/version.rb
CHANGED
@@ -2,19 +2,57 @@ describe Minidusen::Parser do
|
|
2
2
|
|
3
3
|
describe '.parse' do
|
4
4
|
|
5
|
-
|
6
|
-
|
7
|
-
|
5
|
+
describe 'when called with a String' do
|
6
|
+
|
7
|
+
it 'parses the given string into tokens' do
|
8
|
+
query = Minidusen::Parser.parse('fieldname:fieldvalue word "a phrase"')
|
9
|
+
query.size.should == 3
|
10
|
+
query[0].field.should == 'fieldname'
|
11
|
+
query[0].value.should == 'fieldvalue'
|
12
|
+
query[1].field.should == 'text'
|
13
|
+
query[1].value.should == 'word'
|
14
|
+
query[2].field.should == 'text'
|
15
|
+
query[2].value.should == 'a phrase'
|
16
|
+
end
|
17
|
+
|
18
|
+
it 'should parse field tokens first, because they usually give maximum filtering at little cost' do
|
19
|
+
query = Minidusen::Parser.parse('word1 field1:field1-value word2 field2:field2-value')
|
20
|
+
query.collect(&:value).should == ['field1-value', 'field2-value', 'word1', 'word2']
|
21
|
+
end
|
22
|
+
|
23
|
+
it 'should not consider the dash to be a word boundary' do
|
24
|
+
query = Minidusen::Parser.parse('Baden-Baden')
|
25
|
+
query.collect(&:value).should == ['Baden-Baden']
|
26
|
+
end
|
27
|
+
|
28
|
+
it 'should parse umlauts and accents' do
|
29
|
+
query = Minidusen::Parser.parse('field:åöÙÔøüéíÁ "ÄüÊçñÆ ððÿáÒÉ" pulvérisateur pędzić')
|
30
|
+
query.collect(&:value).should == ['åöÙÔøüéíÁ', 'ÄüÊçñÆ ððÿáÒÉ', 'pulvérisateur', 'pędzić']
|
31
|
+
end
|
32
|
+
|
8
33
|
end
|
9
34
|
|
10
|
-
|
11
|
-
|
12
|
-
|
35
|
+
describe 'when called with a Query' do
|
36
|
+
|
37
|
+
it 'returns the query' do
|
38
|
+
passed_query = Minidusen::Query.new
|
39
|
+
parsed_query = Minidusen::Parser.parse(passed_query)
|
40
|
+
parsed_query.should == passed_query
|
41
|
+
end
|
42
|
+
|
13
43
|
end
|
14
44
|
|
15
|
-
|
16
|
-
|
17
|
-
|
45
|
+
describe 'when called with an array of strings' do
|
46
|
+
|
47
|
+
it 'returns a query of text tokens' do
|
48
|
+
query = Minidusen::Parser.parse(['word', 'a phrase'])
|
49
|
+
query.size.should == 2
|
50
|
+
query[0].field.should == 'text'
|
51
|
+
query[0].value.should == 'word'
|
52
|
+
query[1].field.should == 'text'
|
53
|
+
query[1].value.should == 'a phrase'
|
54
|
+
end
|
55
|
+
|
18
56
|
end
|
19
57
|
|
20
58
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: minidusen
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.8.
|
4
|
+
version: 0.8.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Henning Koch
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-10-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -117,7 +117,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
117
117
|
version: '0'
|
118
118
|
requirements: []
|
119
119
|
rubyforge_project:
|
120
|
-
rubygems_version: 2.
|
120
|
+
rubygems_version: 2.6.13
|
121
121
|
signing_key:
|
122
122
|
specification_version: 4
|
123
123
|
summary: Low-tech search for ActiveRecord with MySQL or PostgreSQL
|