RubyGems - human_query_parser - Versions diffs - 1.0.0 - Mend

human_query_parser 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

checksums.yaml +7 -0
data/.gitignore +10 -0
data/.rubocop.yml +131 -0
data/CHANGELOG.md +4 -0
data/CODE_OF_CONDUCT.md +46 -0
data/Gemfile +5 -0
data/Guardfile +22 -0
data/Jenkinsfile +92 -0
data/LICENSE +21 -0
data/README.md +261 -0
data/Rakefile +10 -0
data/bin/_guard-core +16 -0
data/bin/guard +16 -0
data/bin/rake +16 -0
data/human_query_parser.gemspec +29 -0
data/lib/human_query_parser.rb +14 -0
data/lib/human_query_parser/bareword.rb +63 -0
data/lib/human_query_parser/parser.rb +26 -0
data/lib/human_query_parser/phrase.rb +36 -0
data/lib/human_query_parser/query.rb +46 -0
data/lib/human_query_parser/term.rb +22 -0
data/lib/human_query_parser/transform.rb +14 -0
data/lib/human_query_parser/version.rb +3 -0
data/test/bareword_test.rb +52 -0
data/test/human_query_parser_test.rb +18 -0
data/test/parser_test.rb +136 -0
data/test/phrase_test.rb +33 -0
data/test/query_test.rb +159 -0
data/test/term_test.rb +16 -0
data/test/test_helper.rb +18 -0
data/test/transform_test.rb +185 -0
metadata +167 -0

data/Rakefile ADDED Viewed

@@ -0,0 +1,10 @@
+require 'bundler/gem_tasks'
+require 'rake/testtask'
+Rake::TestTask.new(:test) do |t|
+  t.libs << 'test'
+  t.libs << 'lib'
+  t.test_files = FileList['test/**/*_test.rb']
+end
+task default: :test

data/bin/_guard-core ADDED Viewed

@@ -0,0 +1,16 @@
+#!/usr/bin/env ruby
+#
+# This file was generated by Bundler.
+#
+# The application '_guard-core' is installed as part of a gem, and
+# this file is here to facilitate running it.
+#
+require 'pathname'
+ENV['BUNDLE_GEMFILE'] ||= File.expand_path("../../Gemfile",
+  Pathname.new(__FILE__).realpath,)
+require 'rubygems'
+require 'bundler/setup'
+load Gem.bin_path('guard', '_guard-core')

data/bin/guard ADDED Viewed

@@ -0,0 +1,16 @@
+#!/usr/bin/env ruby
+#
+# This file was generated by Bundler.
+#
+# The application 'guard' is installed as part of a gem, and
+# this file is here to facilitate running it.
+#
+require 'pathname'
+ENV['BUNDLE_GEMFILE'] ||= File.expand_path("../../Gemfile",
+  Pathname.new(__FILE__).realpath,)
+require 'rubygems'
+require 'bundler/setup'
+load Gem.bin_path('guard', 'guard')

data/bin/rake ADDED Viewed

@@ -0,0 +1,16 @@
+#!/usr/bin/env ruby
+#
+# This file was generated by Bundler.
+#
+# The application 'rake' is installed as part of a gem, and
+# this file is here to facilitate running it.
+#
+require 'pathname'
+ENV['BUNDLE_GEMFILE'] ||= File.expand_path("../../Gemfile",
+  Pathname.new(__FILE__).realpath,)
+require 'rubygems'
+require 'bundler/setup'
+load Gem.bin_path('rake', 'rake')

data/human_query_parser.gemspec ADDED Viewed

@@ -0,0 +1,29 @@
+lib = File.expand_path('../lib', __FILE__)
+$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
+require 'human_query_parser/version'
+Gem::Specification.new do |spec|
+  spec.name          = 'human_query_parser'
+  spec.version       = HumanQueryParser::VERSION
+  spec.authors       = ['PatientsLikeMe']
+  spec.email         = ['engineers@patientslikeme.com']
+  spec.homepage      = 'https://www.patientslikeme.com'
+  spec.summary       = 'A tool for taking search queries of the form most users will expect, and producing ElasticSearch queries that do what most users would expect.'
+  spec.description   = 'A tool for taking search queries of the form most users will expect, and producing ElasticSearch queries that do what most users would expect.'
+  spec.files         = `git ls-files -z`.split("\x0")
+  spec.bindir        = 'exe'
+  spec.executables   = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
+  spec.test_files    = spec.files.grep(%r{^test/})
+  spec.require_paths = ['lib']
+  spec.add_runtime_dependency 'parslet', '~> 1.8'
+  spec.add_development_dependency 'bundler', '~> 1.10'
+  spec.add_development_dependency 'rake', '~> 10.0'
+  spec.add_development_dependency 'minitest'
+  spec.add_development_dependency 'minitest-reporters'
+  spec.add_development_dependency 'pry'
+end

data/lib/human_query_parser.rb ADDED Viewed

@@ -0,0 +1,14 @@
+require 'human_query_parser/parser'
+require 'human_query_parser/bareword'
+require 'human_query_parser/phrase'
+require 'human_query_parser/query'
+require 'human_query_parser/term'
+require 'human_query_parser/transform'
+module HumanQueryParser
+  def self.compile(query_text, search_fields)
+    parse_result = HumanQueryParser::Parser.new.parse(query_text)
+    query = HumanQueryParser::Transform.new.apply(parse_result)
+    query.es_query(search_fields)
+  end
+end

data/lib/human_query_parser/bareword.rb ADDED Viewed

@@ -0,0 +1,63 @@
+module HumanQueryParser
+  class Bareword
+    attr_reader :content
+    def initialize(content)
+      @content = content.to_s
+    end
+    def query_fragments(search_fields, fuzzy)
+      if fuzzy
+        [
+          {
+            multi_match: {
+              fields: search_fields,
+              query: content,
+              max_expansions: 50,
+              fuzziness: "AUTO",
+              prefix_length: 1,
+            },
+          },
+          {
+            multi_match: {
+              fields: search_fields,
+              query: content,
+              max_expansions: 50,
+              fuzziness: "AUTO",
+              operator: 'and',
+              boost: 6.0,
+              prefix_length: 1,
+            },
+          },
+          {
+            multi_match: {
+              fields: search_fields,
+              query: content,
+              max_expansions: 50,
+              type: "phrase",
+              boost: 8.0,
+            },
+          },
+          {
+            multi_match: {
+              fields: search_fields,
+              query: content,
+              max_expansions: 50,
+              fuzziness: "AUTO",
+              prefix_length: 3,
+            },
+          },
+        ]
+      else
+        [
+          {
+            multi_match: {
+              fields: search_fields,
+              query: content,
+            },
+          },
+        ]
+      end
+    end
+  end
+end

data/lib/human_query_parser/parser.rb ADDED Viewed

@@ -0,0 +1,26 @@
+require 'parslet'
+module HumanQueryParser
+  class Parser < Parslet::Parser
+    # Single character rules
+    rule(:plus)       { str('+') }
+    rule(:minus)      { str('-') }
+    rule(:quote)      { str('"') }
+    rule(:space)      { match('\s').repeat(1) }
+    rule(:space?)     { space.maybe }
+    # Things
+    rule(:operator)   { plus | minus }
+    rule(:phrase) {
+      quote >> (quote.absent? >> any).repeat.as(:phrase) >> quote
+    }
+    rule(:bareword_start) { quote.absent? >> any }
+    rule(:bareword)   { (bareword_start >> (space.absent? >> any).repeat).as(:bareword) }
+    rule(:term)       { space? >> operator.maybe.as(:operator) >> (phrase | bareword).as(:term) >> space? }
+    # Put it all together
+    rule(:query)      { term.repeat.as(:query) }
+    root :query
+  end
+end

data/lib/human_query_parser/phrase.rb ADDED Viewed

@@ -0,0 +1,36 @@
+module HumanQueryParser
+  class Phrase
+    attr_reader :content
+    def initialize(content)
+      @content = content.to_s
+    end
+    def query_fragments(search_fields, fuzzy)
+      multi_match = {
+        fields: search_fields,
+        query: content,
+        type: "phrase",
+      }
+      if fuzzy
+        [
+          {
+            function_score: {
+              query: {
+                multi_match: multi_match.merge(max_expansions: 50),
+              },
+              boost: 8.0,
+            },
+          },
+        ]
+      else
+        [
+          {
+            multi_match: multi_match,
+          },
+        ]
+      end
+    end
+  end
+end

data/lib/human_query_parser/query.rb ADDED Viewed

@@ -0,0 +1,46 @@
+require 'human_query_parser/bareword'
+require 'human_query_parser/term'
+module HumanQueryParser
+  class Query
+    attr_reader :terms_by_operator
+    def initialize(terms)
+      @terms_by_operator = terms.group_by(&:operator).inject({}) do |hash, (operator, term_group)|
+        hash[operator] = combine_barewords(term_group, operator)
+        hash
+      end
+    end
+    def combine_barewords(terms, operator)
+      bareword_terms, others = terms.partition(&:bareword?)
+      if bareword_terms.any?
+        strings = bareword_terms.map { |term| term.content.content }
+        new_bareword = Bareword.new(strings.join(" "))
+        [Term.new(operator, new_bareword)] + others
+      else
+        others
+      end
+    end
+    def terms_for_operator(operator)
+      terms_by_operator[operator] || []
+    end
+    def es_query(search_fields)
+      bool_clauses = terms_by_operator.inject({}) do |hash, (operator, terms)|
+        es_operator = case operator
+        when nil then :should
+        when '+' then :must
+        when '-' then :must_not
+        end
+        hash[es_operator] = terms.flat_map { |term| term.query_fragments(search_fields) }
+        hash
+      end
+      { bool: bool_clauses }
+    end
+  end
+end

data/lib/human_query_parser/term.rb ADDED Viewed

@@ -0,0 +1,22 @@
+module HumanQueryParser
+  class Term
+    attr_reader :operator, :content
+    def initialize(operator, content)
+      @operator = operator
+      @content = content
+    end
+    def bareword?
+      content.is_a?(Bareword)
+    end
+    def fuzzy?
+      operator.nil?
+    end
+    def query_fragments(search_fields)
+      content.query_fragments(search_fields, fuzzy?)
+    end
+  end
+end

data/lib/human_query_parser/transform.rb ADDED Viewed

@@ -0,0 +1,14 @@
+require 'parslet'
+require 'human_query_parser/bareword'
+require 'human_query_parser/phrase'
+require 'human_query_parser/query'
+require 'human_query_parser/term'
+module HumanQueryParser
+  class Transform < Parslet::Transform
+    rule(:phrase => simple(:phrase)) { Phrase.new(phrase) }
+    rule(:bareword => simple(:bareword)) { Bareword.new(bareword) }
+    rule(:term => simple(:term), :operator => simple(:operator)) { Term.new(operator, term) }
+    rule(:query => sequence(:terms)) { Query.new(terms) }
+  end
+end

data/lib/human_query_parser/version.rb ADDED Viewed

@@ -0,0 +1,3 @@
+module HumanQueryParser
+  VERSION = '1.0.0'.freeze
+end

data/test/bareword_test.rb ADDED Viewed

@@ -0,0 +1,52 @@
+require 'test_helper'
+require 'json'
+class HumanQueryParser::BarewordTest < Minitest::Spec
+  it 'generates a non-fuzzy query fragment correctly' do
+    HumanQueryParser::Bareword.new('blue').query_fragments(['field1', 'field2'], false).must_equal([
+      {
+        multi_match: {
+          fields: ['field1', 'field2'],
+          query: 'blue',
+        },
+      },
+    ],)
+  end
+  it 'generates a fuzzy query fragment correctly' do
+    actual_fragments = HumanQueryParser::Bareword.new('blue').query_fragments(['field1', 'field2'], true)
+    basic_multi_match = {
+      fields: ['field1', 'field2'],
+      query: 'blue',
+      max_expansions: 50,
+    }
+    expected_fragments = [
+      { multi_match: basic_multi_match.merge(fuzziness: "AUTO", prefix_length: 1) },
+      {
+        multi_match: basic_multi_match.merge({
+          operator: "and",
+          fuzziness: "AUTO",
+          prefix_length: 1,
+          boost: 6.0,
+        },),
+      },
+      {
+        multi_match: basic_multi_match.merge(type: 'phrase', boost: 8.0),
+      },
+      { multi_match: basic_multi_match.merge(fuzziness: "AUTO", prefix_length: 1) },
+    ]
+    actual_fragments.size.must_equal expected_fragments.size
+    expected_fragments.each do |fragment|
+      assert actual_fragments.include?(fragment), <<-MESSAGE
+Generated query fragments:
+#{JSON.pretty_generate(actual_fragments)}
+Were expected to contain the following fragment, but didn't:
+#{JSON.pretty_generate(fragment)}
+      MESSAGE
+    end
+  end
+end

data/test/human_query_parser_test.rb ADDED Viewed

@@ -0,0 +1,18 @@
+require_relative 'test_helper'
+class HumanQueryParserTest < Minitest::Spec
+  it 'compiles things, basically' do
+    HumanQueryParser.compile('+test', ['field1', 'field2']).must_equal({
+      bool: {
+        must: [
+          {
+            multi_match: {
+              fields: ['field1', 'field2'],
+              query: 'test',
+            },
+          },
+        ],
+      },
+    },)
+  end
+end

data/test/parser_test.rb ADDED Viewed

@@ -0,0 +1,136 @@
+require 'test_helper'
+class HumanQueryParser::ParserTest < Minitest::Spec
+  subject { HumanQueryParser::Parser.new }
+  it 'parses a single term correctly' do
+    subject.parse('word').must_equal({
+      query: [
+        {
+          operator: nil,
+          term: {
+            bareword: 'word',
+          },
+        },
+      ],
+    },)
+  end
+  it 'parses two terms correctly' do
+    subject.parse('word up').must_equal({
+      query: [
+        {
+          operator: nil,
+          term: {
+            bareword: 'word',
+          },
+        },
+        {
+          operator: nil,
+          term: {
+            bareword: 'up',
+          },
+        },
+      ],
+    },)
+  end
+  it 'ignores extra spacing' do
+    subject.parse('       word   up  ').must_equal({
+      query: [
+        {
+          operator: nil,
+          term: {
+            bareword: 'word',
+          },
+        },
+        {
+          operator: nil,
+          term: {
+            bareword: 'up',
+          },
+        },
+      ],
+    },)
+  end
+  it 'parses terms with operators correctly' do
+    subject.parse('+word').must_equal({
+      query: [
+        {
+          operator: '+',
+          term: {
+            bareword: 'word',
+          },
+        },
+      ],
+    },)
+    subject.parse('-word').must_equal({
+      query: [
+        {
+          operator: '-',
+          term: {
+            bareword: 'word',
+          },
+        },
+      ],
+    },)
+  end
+  it 'parses quoted phrases correctly' do
+    subject.parse('word "a phrase"').must_equal({
+      query: [
+        {
+          operator: nil,
+          term: {
+            bareword: 'word',
+          },
+        },
+        {
+          operator: nil,
+          term: {
+            phrase: 'a phrase',
+          },
+        },
+      ],
+    },)
+  end
+  it 'passes through extra spacing in phrases' do
+    subject.parse('"a    phrase"').must_equal({
+      query: [
+        {
+          operator: nil,
+          term: {
+            phrase: 'a    phrase',
+          },
+        },
+      ],
+    },)
+  end
+  it 'parses phrases with operators correctly' do
+    subject.parse('+"a    phrase"').must_equal({
+      query: [
+        {
+          operator: '+',
+          term: {
+            phrase: 'a    phrase',
+          },
+        },
+      ],
+    },)
+    subject.parse('-"a    phrase"').must_equal({
+      query: [
+        {
+          operator: '-',
+          term: {
+            phrase: 'a    phrase',
+          },
+        },
+      ],
+    },)
+  end
+end