human_query_parser 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Rakefile ADDED
@@ -0,0 +1,10 @@
1
+ require 'bundler/gem_tasks'
2
+ require 'rake/testtask'
3
+
4
+ Rake::TestTask.new(:test) do |t|
5
+ t.libs << 'test'
6
+ t.libs << 'lib'
7
+ t.test_files = FileList['test/**/*_test.rb']
8
+ end
9
+
10
+ task default: :test
data/bin/_guard-core ADDED
@@ -0,0 +1,16 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # This file was generated by Bundler.
4
+ #
5
+ # The application '_guard-core' is installed as part of a gem, and
6
+ # this file is here to facilitate running it.
7
+ #
8
+
9
+ require 'pathname'
10
+ ENV['BUNDLE_GEMFILE'] ||= File.expand_path("../../Gemfile",
11
+ Pathname.new(__FILE__).realpath,)
12
+
13
+ require 'rubygems'
14
+ require 'bundler/setup'
15
+
16
+ load Gem.bin_path('guard', '_guard-core')
data/bin/guard ADDED
@@ -0,0 +1,16 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # This file was generated by Bundler.
4
+ #
5
+ # The application 'guard' is installed as part of a gem, and
6
+ # this file is here to facilitate running it.
7
+ #
8
+
9
+ require 'pathname'
10
+ ENV['BUNDLE_GEMFILE'] ||= File.expand_path("../../Gemfile",
11
+ Pathname.new(__FILE__).realpath,)
12
+
13
+ require 'rubygems'
14
+ require 'bundler/setup'
15
+
16
+ load Gem.bin_path('guard', 'guard')
data/bin/rake ADDED
@@ -0,0 +1,16 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # This file was generated by Bundler.
4
+ #
5
+ # The application 'rake' is installed as part of a gem, and
6
+ # this file is here to facilitate running it.
7
+ #
8
+
9
+ require 'pathname'
10
+ ENV['BUNDLE_GEMFILE'] ||= File.expand_path("../../Gemfile",
11
+ Pathname.new(__FILE__).realpath,)
12
+
13
+ require 'rubygems'
14
+ require 'bundler/setup'
15
+
16
+ load Gem.bin_path('rake', 'rake')
@@ -0,0 +1,29 @@
1
+ lib = File.expand_path('../lib', __FILE__)
2
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
3
+
4
+ require 'human_query_parser/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = 'human_query_parser'
8
+ spec.version = HumanQueryParser::VERSION
9
+ spec.authors = ['PatientsLikeMe']
10
+ spec.email = ['engineers@patientslikeme.com']
11
+ spec.homepage = 'https://www.patientslikeme.com'
12
+
13
+ spec.summary = 'A tool for taking search queries of the form most users will expect, and producing ElasticSearch queries that do what most users would expect.'
14
+ spec.description = 'A tool for taking search queries of the form most users will expect, and producing ElasticSearch queries that do what most users would expect.'
15
+
16
+ spec.files = `git ls-files -z`.split("\x0")
17
+ spec.bindir = 'exe'
18
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
19
+ spec.test_files = spec.files.grep(%r{^test/})
20
+ spec.require_paths = ['lib']
21
+
22
+ spec.add_runtime_dependency 'parslet', '~> 1.8'
23
+
24
+ spec.add_development_dependency 'bundler', '~> 1.10'
25
+ spec.add_development_dependency 'rake', '~> 10.0'
26
+ spec.add_development_dependency 'minitest'
27
+ spec.add_development_dependency 'minitest-reporters'
28
+ spec.add_development_dependency 'pry'
29
+ end
@@ -0,0 +1,14 @@
1
+ require 'human_query_parser/parser'
2
+ require 'human_query_parser/bareword'
3
+ require 'human_query_parser/phrase'
4
+ require 'human_query_parser/query'
5
+ require 'human_query_parser/term'
6
+ require 'human_query_parser/transform'
7
+
8
+ module HumanQueryParser
9
+ def self.compile(query_text, search_fields)
10
+ parse_result = HumanQueryParser::Parser.new.parse(query_text)
11
+ query = HumanQueryParser::Transform.new.apply(parse_result)
12
+ query.es_query(search_fields)
13
+ end
14
+ end
@@ -0,0 +1,63 @@
1
+ module HumanQueryParser
2
+ class Bareword
3
+ attr_reader :content
4
+
5
+ def initialize(content)
6
+ @content = content.to_s
7
+ end
8
+
9
+ def query_fragments(search_fields, fuzzy)
10
+ if fuzzy
11
+ [
12
+ {
13
+ multi_match: {
14
+ fields: search_fields,
15
+ query: content,
16
+ max_expansions: 50,
17
+ fuzziness: "AUTO",
18
+ prefix_length: 1,
19
+ },
20
+ },
21
+ {
22
+ multi_match: {
23
+ fields: search_fields,
24
+ query: content,
25
+ max_expansions: 50,
26
+ fuzziness: "AUTO",
27
+ operator: 'and',
28
+ boost: 6.0,
29
+ prefix_length: 1,
30
+ },
31
+ },
32
+ {
33
+ multi_match: {
34
+ fields: search_fields,
35
+ query: content,
36
+ max_expansions: 50,
37
+ type: "phrase",
38
+ boost: 8.0,
39
+ },
40
+ },
41
+ {
42
+ multi_match: {
43
+ fields: search_fields,
44
+ query: content,
45
+ max_expansions: 50,
46
+ fuzziness: "AUTO",
47
+ prefix_length: 3,
48
+ },
49
+ },
50
+ ]
51
+ else
52
+ [
53
+ {
54
+ multi_match: {
55
+ fields: search_fields,
56
+ query: content,
57
+ },
58
+ },
59
+ ]
60
+ end
61
+ end
62
+ end
63
+ end
@@ -0,0 +1,26 @@
1
+ require 'parslet'
2
+
3
+ module HumanQueryParser
4
+ class Parser < Parslet::Parser
5
+ # Single character rules
6
+ rule(:plus) { str('+') }
7
+ rule(:minus) { str('-') }
8
+ rule(:quote) { str('"') }
9
+
10
+ rule(:space) { match('\s').repeat(1) }
11
+ rule(:space?) { space.maybe }
12
+
13
+ # Things
14
+ rule(:operator) { plus | minus }
15
+ rule(:phrase) {
16
+ quote >> (quote.absent? >> any).repeat.as(:phrase) >> quote
17
+ }
18
+ rule(:bareword_start) { quote.absent? >> any }
19
+ rule(:bareword) { (bareword_start >> (space.absent? >> any).repeat).as(:bareword) }
20
+ rule(:term) { space? >> operator.maybe.as(:operator) >> (phrase | bareword).as(:term) >> space? }
21
+
22
+ # Put it all together
23
+ rule(:query) { term.repeat.as(:query) }
24
+ root :query
25
+ end
26
+ end
@@ -0,0 +1,36 @@
1
+ module HumanQueryParser
2
+ class Phrase
3
+ attr_reader :content
4
+
5
+ def initialize(content)
6
+ @content = content.to_s
7
+ end
8
+
9
+ def query_fragments(search_fields, fuzzy)
10
+ multi_match = {
11
+ fields: search_fields,
12
+ query: content,
13
+ type: "phrase",
14
+ }
15
+
16
+ if fuzzy
17
+ [
18
+ {
19
+ function_score: {
20
+ query: {
21
+ multi_match: multi_match.merge(max_expansions: 50),
22
+ },
23
+ boost: 8.0,
24
+ },
25
+ },
26
+ ]
27
+ else
28
+ [
29
+ {
30
+ multi_match: multi_match,
31
+ },
32
+ ]
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,46 @@
1
+ require 'human_query_parser/bareword'
2
+ require 'human_query_parser/term'
3
+
4
+ module HumanQueryParser
5
+ class Query
6
+ attr_reader :terms_by_operator
7
+
8
+ def initialize(terms)
9
+ @terms_by_operator = terms.group_by(&:operator).inject({}) do |hash, (operator, term_group)|
10
+ hash[operator] = combine_barewords(term_group, operator)
11
+ hash
12
+ end
13
+ end
14
+
15
+ def combine_barewords(terms, operator)
16
+ bareword_terms, others = terms.partition(&:bareword?)
17
+
18
+ if bareword_terms.any?
19
+ strings = bareword_terms.map { |term| term.content.content }
20
+ new_bareword = Bareword.new(strings.join(" "))
21
+ [Term.new(operator, new_bareword)] + others
22
+ else
23
+ others
24
+ end
25
+ end
26
+
27
+ def terms_for_operator(operator)
28
+ terms_by_operator[operator] || []
29
+ end
30
+
31
+ def es_query(search_fields)
32
+ bool_clauses = terms_by_operator.inject({}) do |hash, (operator, terms)|
33
+ es_operator = case operator
34
+ when nil then :should
35
+ when '+' then :must
36
+ when '-' then :must_not
37
+ end
38
+
39
+ hash[es_operator] = terms.flat_map { |term| term.query_fragments(search_fields) }
40
+ hash
41
+ end
42
+
43
+ { bool: bool_clauses }
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,22 @@
1
+ module HumanQueryParser
2
+ class Term
3
+ attr_reader :operator, :content
4
+
5
+ def initialize(operator, content)
6
+ @operator = operator
7
+ @content = content
8
+ end
9
+
10
+ def bareword?
11
+ content.is_a?(Bareword)
12
+ end
13
+
14
+ def fuzzy?
15
+ operator.nil?
16
+ end
17
+
18
+ def query_fragments(search_fields)
19
+ content.query_fragments(search_fields, fuzzy?)
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,14 @@
1
+ require 'parslet'
2
+ require 'human_query_parser/bareword'
3
+ require 'human_query_parser/phrase'
4
+ require 'human_query_parser/query'
5
+ require 'human_query_parser/term'
6
+
7
+ module HumanQueryParser
8
+ class Transform < Parslet::Transform
9
+ rule(:phrase => simple(:phrase)) { Phrase.new(phrase) }
10
+ rule(:bareword => simple(:bareword)) { Bareword.new(bareword) }
11
+ rule(:term => simple(:term), :operator => simple(:operator)) { Term.new(operator, term) }
12
+ rule(:query => sequence(:terms)) { Query.new(terms) }
13
+ end
14
+ end
@@ -0,0 +1,3 @@
1
+ module HumanQueryParser
2
+ VERSION = '1.0.0'.freeze
3
+ end
@@ -0,0 +1,52 @@
1
+ require 'test_helper'
2
+ require 'json'
3
+
4
+ class HumanQueryParser::BarewordTest < Minitest::Spec
5
+ it 'generates a non-fuzzy query fragment correctly' do
6
+ HumanQueryParser::Bareword.new('blue').query_fragments(['field1', 'field2'], false).must_equal([
7
+ {
8
+ multi_match: {
9
+ fields: ['field1', 'field2'],
10
+ query: 'blue',
11
+ },
12
+ },
13
+ ],)
14
+ end
15
+
16
+ it 'generates a fuzzy query fragment correctly' do
17
+ actual_fragments = HumanQueryParser::Bareword.new('blue').query_fragments(['field1', 'field2'], true)
18
+
19
+ basic_multi_match = {
20
+ fields: ['field1', 'field2'],
21
+ query: 'blue',
22
+ max_expansions: 50,
23
+ }
24
+
25
+ expected_fragments = [
26
+ { multi_match: basic_multi_match.merge(fuzziness: "AUTO", prefix_length: 1) },
27
+ {
28
+ multi_match: basic_multi_match.merge({
29
+ operator: "and",
30
+ fuzziness: "AUTO",
31
+ prefix_length: 1,
32
+ boost: 6.0,
33
+ },),
34
+ },
35
+ {
36
+ multi_match: basic_multi_match.merge(type: 'phrase', boost: 8.0),
37
+ },
38
+ { multi_match: basic_multi_match.merge(fuzziness: "AUTO", prefix_length: 1) },
39
+ ]
40
+
41
+ actual_fragments.size.must_equal expected_fragments.size
42
+ expected_fragments.each do |fragment|
43
+ assert actual_fragments.include?(fragment), <<-MESSAGE
44
+ Generated query fragments:
45
+ #{JSON.pretty_generate(actual_fragments)}
46
+
47
+ Were expected to contain the following fragment, but didn't:
48
+ #{JSON.pretty_generate(fragment)}
49
+ MESSAGE
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,18 @@
1
+ require_relative 'test_helper'
2
+
3
+ class HumanQueryParserTest < Minitest::Spec
4
+ it 'compiles things, basically' do
5
+ HumanQueryParser.compile('+test', ['field1', 'field2']).must_equal({
6
+ bool: {
7
+ must: [
8
+ {
9
+ multi_match: {
10
+ fields: ['field1', 'field2'],
11
+ query: 'test',
12
+ },
13
+ },
14
+ ],
15
+ },
16
+ },)
17
+ end
18
+ end
@@ -0,0 +1,136 @@
1
+ require 'test_helper'
2
+
3
+ class HumanQueryParser::ParserTest < Minitest::Spec
4
+ subject { HumanQueryParser::Parser.new }
5
+
6
+ it 'parses a single term correctly' do
7
+ subject.parse('word').must_equal({
8
+ query: [
9
+ {
10
+ operator: nil,
11
+ term: {
12
+ bareword: 'word',
13
+ },
14
+ },
15
+ ],
16
+ },)
17
+ end
18
+
19
+ it 'parses two terms correctly' do
20
+ subject.parse('word up').must_equal({
21
+ query: [
22
+ {
23
+ operator: nil,
24
+ term: {
25
+ bareword: 'word',
26
+ },
27
+ },
28
+ {
29
+ operator: nil,
30
+ term: {
31
+ bareword: 'up',
32
+ },
33
+ },
34
+ ],
35
+ },)
36
+ end
37
+
38
+ it 'ignores extra spacing' do
39
+ subject.parse(' word up ').must_equal({
40
+ query: [
41
+ {
42
+ operator: nil,
43
+ term: {
44
+ bareword: 'word',
45
+ },
46
+ },
47
+ {
48
+ operator: nil,
49
+ term: {
50
+ bareword: 'up',
51
+ },
52
+ },
53
+ ],
54
+ },)
55
+ end
56
+
57
+ it 'parses terms with operators correctly' do
58
+ subject.parse('+word').must_equal({
59
+ query: [
60
+ {
61
+ operator: '+',
62
+ term: {
63
+ bareword: 'word',
64
+ },
65
+ },
66
+ ],
67
+ },)
68
+
69
+ subject.parse('-word').must_equal({
70
+ query: [
71
+ {
72
+ operator: '-',
73
+ term: {
74
+ bareword: 'word',
75
+ },
76
+ },
77
+ ],
78
+ },)
79
+ end
80
+
81
+ it 'parses quoted phrases correctly' do
82
+ subject.parse('word "a phrase"').must_equal({
83
+ query: [
84
+ {
85
+ operator: nil,
86
+ term: {
87
+ bareword: 'word',
88
+ },
89
+ },
90
+ {
91
+ operator: nil,
92
+ term: {
93
+ phrase: 'a phrase',
94
+ },
95
+ },
96
+ ],
97
+ },)
98
+ end
99
+
100
+ it 'passes through extra spacing in phrases' do
101
+ subject.parse('"a phrase"').must_equal({
102
+ query: [
103
+ {
104
+ operator: nil,
105
+ term: {
106
+ phrase: 'a phrase',
107
+ },
108
+ },
109
+ ],
110
+ },)
111
+ end
112
+
113
+ it 'parses phrases with operators correctly' do
114
+ subject.parse('+"a phrase"').must_equal({
115
+ query: [
116
+ {
117
+ operator: '+',
118
+ term: {
119
+ phrase: 'a phrase',
120
+ },
121
+ },
122
+ ],
123
+ },)
124
+
125
+ subject.parse('-"a phrase"').must_equal({
126
+ query: [
127
+ {
128
+ operator: '-',
129
+ term: {
130
+ phrase: 'a phrase',
131
+ },
132
+ },
133
+ ],
134
+ },)
135
+ end
136
+ end