RubyGems - es_query_builder - Versions diffs - 1.0.0 - Mend

es_query_builder 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

checksums.yaml +7 -0
data/lib/constants.rb +4 -0
data/lib/elastic_search_query.rb +462 -0
data/lib/fetch_es_data.rb +30 -0
data/lib/housing_es_query_builder.rb +8 -0
data/lib/indexer.rb +68 -0
data/lib/token_query_builder.rb +76 -0
metadata +83 -0

checksums.yaml ADDED Viewed

@@ -0,0 +1,7 @@
+---
+SHA1:
+  metadata.gz: 90f99bbca380c475dd8e32b11b7d6bf8eaed0847
+  data.tar.gz: 5e0218899c63aa6ba9489fe6c254f84b1510f278
+SHA512:
+  metadata.gz: be2e9c5a52e306fd76263b23752ea054249d8e5abb793115b8e30cd0150ab8a3e7a5ba2d115b9f0775683d6ed139f1ebbf6eaab2ab202b863606aae63676656c
+  data.tar.gz: b1f12714aa7d81e8c3b7f8386dc9a3bd90a0c927dcc94e349813de523c01c7a0f8267cb41ee13b167a743a0266ff7ac869cd96f5c8daed7d518953547d142f63

data/lib/constants.rb ADDED Viewed

@@ -0,0 +1,4 @@
+module Constants
+  MAX_BULK_INDEX_SIZE = 2000
+  FUNCTION_SCORE_METHODS = %w(multiply sum avg first max min)
+end

data/lib/elastic_search_query.rb ADDED Viewed

@@ -0,0 +1,462 @@
+class ElasticSearchQuery
+	#### All Get Query ================================================================
+  # returns the structure for ids matching
+  def self.get_ids_query_structure
+    {
+      :ids => {
+        :values => []
+      }
+    }
+  end
+  # returns constant score filter structure
+  def self.get_constant_score_filter_structure filter = {}, boost = 1
+    {
+      :constant_score => {
+        :filter => filter,
+        :boost => boost
+      }
+    }
+  end
+  # returns constant score query structure
+  def self.get_constant_score_query_structure query = {}, boost = 1
+    {
+      constant_score: {
+        filter: {
+            bool: {
+                must: query
+            }
+        },
+        boost: boost
+      }
+    }
+   end
+  # returns exists filter
+  def self.get_exists_filter field
+    {
+      constant_score: {
+        filter: {
+          exists: {
+            field: field
+          }
+        }
+      }
+    }
+  end
+  # With Elasticsearch 6.1 nested filter has been replaced with nested query
+  # nested filter structure
+  def self.get_nested_filter_structure path, query = {}
+    {
+      :nested => {
+        :path => path,
+        :query => query
+      }
+    }
+  end
+  # returns nested query structure
+  def self.get_nested_query_structure path, query = {}, score_mode=nil
+    raise ArgumentError.new("path has to be a string") unless (path.is_a? String)
+    subquery = {
+      :nested => {
+        :path => path,
+        :query => query
+      }
+    }
+    if Constants::FUNCTION_SCORE_METHODS.include? score_mode
+      subquery[:nested][:score_mode] = score_mode
+    end
+    return subquery
+  end
+  # returns structure for match_phrase_prefix
+  def self.get_match_phrase_prefix_query field, prefix
+      raise ArgumentError.new("field and prefix should be strings") unless (field.is_a? String) && (prefix.is_a? String)
+      return {
+        :match_phrase_prefix => {
+          field => prefix
+        }
+      }
+  end
+  # term filter query
+  def self.get_term_filter_query(field, value, cache_flag = false)
+    {
+      term: {
+        field => value
+      }
+    }
+  end
+  # term boost query
+  def self.get_term_boost_query field, value, boost
+    {
+      :term => {
+        field => {
+          :value => value,
+          :boost => boost
+        }
+      }
+    }
+  end
+  # returns terms_filter_query
+  def self.get_terms_filter_query(field, value)
+      raise "Cannot append terms query to #{value} which is not an array" unless value.is_a?(Array)
+      {
+        terms: {
+          field => value
+        }
+      }
+  end
+  # returns filtered_structure
+  def self.get_filtered_structure
+    {
+      bool: {
+          must: [],
+          should: [],
+          must_not: [],
+          filter: {
+              bool: {
+                  must: [],
+                  should: [],
+                  must_not: []
+              }
+          }
+      }
+    }
+  end
+  # returns range query
+  def self.get_range_query field, from, to
+    q = {
+      :range=> {
+        field => {}
+      }
+    }
+    q[:range][field][:from] = from if from.present?
+    q[:range][field][:to] = to if to.present?
+    return q
+  end
+  # returns nested terms query
+  def self.get_nested_terms_query(path, field, value)
+    raise "Cannot append terms query to #{value} which is not an array" unless value.is_a?(Array)
+    {
+      nested: {
+        path: path,
+        query: {
+          terms: {
+            field => value
+          }
+        }
+      }
+    }
+  end
+  # returns basic bool structure with should, must and mus_not clause
+  def self.get_bool_filter_structure
+    {
+      bool: {
+        must: [],
+        should: [],
+        must_not: []
+      }
+    }
+  end
+  # returns query bool structure
+  def self.get_query_bool_structure
+    {
+      query:{
+        bool: {
+          filter:{
+          }
+        }
+      }
+    }
+  end
+  # used to append sort query so that the result is sorted based on the sort_fields provided
+  def self.get_sort_subquery sort_fields=[]
+      raise "Cannot append sort query which is not an array" unless sort_fields.is_a?(Array)
+      sort = []
+      sort_fields.each do |field|
+        if field.is_a? Hash
+          field.each do |key, order|
+            next unless (order=="asc" || order=="desc")
+            sort << {key => {"order" => order}}
+          end
+        elsif (field.is_a? String) || (field.is_a? Symbol)
+          sort << field.to_s
+        end
+      end
+      return sort
+  end
+  # constraucts a top_hits structure
+  # this aggregator is intended to be used as a sub aggregator,
+  # so that the top matching documents can be aggregated per bucket
+  # @param name [String] name of the aggregation
+  # @param size [Integer] specifying number of top results to be returned
+  # @param sort [Array] specifying the sorting order
+  # @param source [Array] specifying the data fields to be present in the result
+  def self.get_top_hits_aggregations name, size, sort, source = []
+    query = {
+      name => {
+        "top_hits": {
+          "size": size
+        }
+      }
+    }.with_indifferent_access
+    query[name]["top_hits"]["sort"] = sort if sort.present?
+    if source.present?
+      query[name]["top_hits"]["_source"] = {
+        "include": source
+      }
+    end
+    query
+  end
+  # returns a reverse nested structure
+  def self.get_reverse_nested_aggs name, aggregations
+    {
+      name => {
+        "reverse_nested": {},
+        "aggs": aggregations
+      }
+    }
+  end
+  def self.get_terms_aggregation_structure name, field_name, include_array = [], script = "", size = nil
+    query = {
+      name => {
+        terms: {
+          field: field_name
+        }
+      }
+    }
+    query[name][:terms][:include] = include_array if include_array.present?
+    if script.present?
+      query[name][:terms].delete(:field)
+      query[name][:terms][:script] = script
+    end
+    if size.present?
+      size = 1  if size == 0 #size 0 is not supported since ES 5
+      query[name][:terms][:size] = size
+    end
+    query
+  end
+  # returns a generic metrics aggregation by providing the comparator
+  # @param name [String] name of the aggregation
+  # @param comparator [String] the metric to be used (eg. cardinality, avg)
+  # @param field_name [String] field on which aggregation is used
+  def self.get_metrics_aggregations_query name, comparator, field_name
+    query = {
+      name => {
+        comparator => {
+          field: field_name
+        }
+      }
+    }
+  end
+  # this helps to contruct a structure where we can use aggregations on the nested objects also.
+  # @param name [String] name of the aggregation
+  # @param path [String] path to the nested object
+  # @param field_name [String] field on which aggregation is used
+  # @param aggregation [Hash] specifying the aggregations (eg. Average)
+  def self.get_nested_aggregation_query name, path, aggregation
+    query = {
+      name => {
+        nested: {
+          path: path
+        },
+        aggregations: aggregation
+      }
+    }
+  end
+  # builds ids query with provided values
+  def self.get_ids_filter_query ids
+    ids_query_structure = get_ids_query_structure
+    ids_query_structure[:ids][:values] = ids
+    ids_query_structure
+  end
+  # constructs a structure that defines a single bucket which matches a specified filter
+  # @param name [String] name of the aggregation
+  # @param aggregation [Hash] specifying the aggregations (eg. Average)
+  # @params filter [Hash] the matching condition (eg. { "term": { "type": "t-shirt" } })
+  def self.filtered_aggregation name, aggregation, filter
+    {
+      :aggs => {
+        name.intern => {
+          filter: filter,
+          aggs: aggregation
+        }
+      }
+    }
+  end
+  # calculates percentiles based on the field and provided percentile points
+  # @param aggregation_name [String] name of the aggregation
+  # @param field [String] on which aggregation is to be performed
+  # @param percentile_points [Array] percentile points in which we are interested
+  def self.percentile_aggregation aggregation_name, field, percentile_points
+    {
+      aggregation_name.intern => {
+        :percentiles => {
+          :field => field,
+          :percents => percentile_points
+        }
+      }
+    }
+  end
+  # used for bucketing the response based on the field and range provided
+  # sample range [{ "to" : 100.0 },{ "from" : 100.0, "to" : 200.0 },{ "from" : 200.0 }]
+  # @param aggregation_name [String] name of the aggregation
+  # @param field [String] on which aggregation is to be performed
+  # @param ranges [Array] specifying ranges
+  def self.range_aggregation aggregation_name, field, ranges
+    {
+      aggregation_name.intern => {
+        :range => {
+          :field => field,
+          :ranges => ranges
+        }
+      }
+    }
+  end
+  # constructs an aggregation structure based on field_name provided
+  # this dynamically builds buckets on the basis of field_name and provides the aggregations accordingly
+  # if field_name = genre and genre has values (rock, jazz, thrash metal), then aggregations will be based on these three genres
+  # @param name [String] name of the aggregation
+  # @param field_name [String] on which aggregation is to be performed
+  # @param aggregation [Array] specifying aggregations
+  # @param include_array [Array] specifying conditions on the field_name
+  # @param script [Hash], to be executed for aggregation
+  def self.get_terms_structure_with_aggregation name, field_name, aggregation, include_array = [], script = ""
+    query = get_terms_aggregation_structure name, field_name, include_array, script
+    query[name][:aggs] = aggregation
+    return query
+  end
+  def self.function_score query, seed
+    {
+      function_score: {
+        query: query,
+        functions: [
+          {
+            random_score: {
+              seed: seed
+            }
+          }
+          ]
+      }
+    }
+  end
+  # returns a structure of dis_max query.
+  # @param queries [Array], array of queries used for union
+  def self.dis_max_query queries=[]
+    raise ArgumentError.new("queries is not an Array") unless queries.instance_of? Array
+    return {
+      dis_max: {
+        queries: queries
+      }
+    }
+  end
+  # to modify the score of documents that are retrieved by a query
+  # @param query [Hash]
+  # @param functions [Array] specifying the conditions and scores
+  # @param boost_mode [String] specifying boost_mode (replace, multiply)
+  def self.script_scoring_query(query, functions, boost_mode="replace")
+    return query.except(:query).merge({
+      query: {
+        function_score: {
+          query: query[:query],
+          functions: functions,
+          boost_mode: boost_mode
+        }
+      }
+    })
+  end
+  def self.get_script_score_function_structure
+    return {
+      script_score: {
+          script: {
+              params: {},
+              inline: ''
+          }
+      }
+    }
+  end
+  # get nested query to search on nested objects
+  def self.get_nested_exists_query field_name
+    {
+      nested: {
+        path: field_name,
+        query: {
+          match_all: {}
+        }
+      }
+    }
+  end
+  # this appends bools structure containing must, should and must_not in an existing query under the filter context
+  # if filter is not present in the existing query, the entire bool structure is assigned,
+  # if filter is present, then the bool structure is merged with the existing structure under the filter context
+  # finally the filter priovided in the method params is appended in the must clause
+  # @param query [Hash] main query
+  # @param filter [Hash] assigned in the must clause of the main query
+  def self.append_query_filter(query, filter)
+    bool_query = get_bool_filter_structure
+    if query[:query][:bool][:filter].nil?
+      query[:query][:bool][:filter] = bool_query
+    else
+      bool_query[:bool].each { |key, val|
+        query[:query][:bool][:filter][:bool][key] = [] unless query[:query][:bool][:filter][:bool].key? (key)
+      }
+    end
+    query[:query][:bool][:filter][:bool][:must].push(filter)
+    query
+  end
+  # sets the max number of results to be returned by the query
+  def self.append_size_filter(query, size)
+    query[:size] = size
+    query
+  end
+  # merges bool queries into the main_query
+  # @param main_query [Hash] query to be modified
+  # @param query [Hash] query whose bool params are to be merged in main_query
+  def self.merge_bool_query(main_query, query)
+    query[:bool].each { |key, val|
+      if main_query[:bool].key?(key)
+        main_query[:bool][key] = Array.new([main_query[:bool][key]]) << val
+        main_query[:bool][key].flatten!
+      else
+        main_query[:bool][key] = val
+      end
+    }
+    main_query
+  end
+end

data/lib/fetch_es_data.rb ADDED Viewed

@@ -0,0 +1,30 @@
+# having methods to fetch data from Elastic search
+class FetchEsData
+	# initializing elastic search host and port
+	# @param search_host [String] Elastic Host
+	# @param search_port [String] Elastic port
+	# @return [FetchEsData] to perform search
+	def initialize(search_host, search_port)
+		@search_host = search_host
+		@search_port = search_port
+	end
+	# fetches data from elastic search
+	# @param query [String] the input
+	# @param index_name [String] name of the index
+	# @param type_name [String] index type
+	# @param extension [String] extension to Elastic seach path (eg. '_search', '_msearch')
+	# @return [String, Hash]
+	def fetch_shortlisted_data_from_es(query:, index_name:, type_name:, extension: '_search')
+		uri = URI("http://#{@search_host}:#{@search_port}/#{index_name}/#{type_name}/#{extension}")
+		req = Net::HTTP::Post.new(uri, initheader = {'Content-Type' =>'application/json'})
+		req.body = "#{query.to_json}\n"
+		res = Net::HTTP.start(uri.hostname, uri.port) do |http|
+			http.request(req)
+		end
+		body = JSON.parse(res.body) rescue {}
+		return res.code, body
+	end
+end

data/lib/housing_es_query_builder.rb ADDED Viewed

@@ -0,0 +1,8 @@
+require 'elastic_search_query'
+require 'token_query_builder'
+require 'fetch_es_data'
+require 'indexer'
+require 'constants'
+module HousingEsQueryBuilder
+end

data/lib/indexer.rb ADDED Viewed

@@ -0,0 +1,68 @@
+# used to index data in almost realtime to Elastic Search
+class Indexer
+  # get elastic client object
+  # @param host [String] Elastic Host
+  # @param port [String] Elastic Port
+  # @return [Indexer] to use indexing methods
+  def initialize(host, port)
+    @client = Elasticsearch::Client.new(host: host, port: port)
+  end
+  # expects data in the form of
+  # [{update:{_index: index, _type: _doc, _id: 23, data: {doc: data }}},{index: {_index: index2, _type: _doc, _id: 28, data: data}}]
+  # @param data [Array] Data to be indexed
+  def bulk_index(data)
+    check_bulk_index_params(data)
+    response = @client.bulk body: data
+    response = response.with_indifferent_access
+    if response[:errors] == true
+      raise "Not able to index with errors as #{(response["items"].map{|t| t["index"]["error"]}.compact)}"
+    end
+  end
+  # checks whether the record exists in the given index with given type and id
+  # @param index_name [String] name of the index
+  # @param type_name_name [String] name of the type
+  # @param id [String] doc_id
+  # @param parent_id [String] parent_id (optional)
+  # @return [True] if record found
+  # @return [False] if record not found
+  def record_exists?(index_name, type_name, id, parent_id = nil)
+    options_hash = generate_options_hash(index_name, type_name, id, parent_id)
+    @client.exists options_hash
+  end
+  # deletes the record if  exists in the given index with given type and id,
+  # raises DocumentNotFoundException if record is not found
+  # @param index_name [String] name of the index
+  # @param index_name [String] name of the type
+  # @param id [String] doc_id
+  # @param parent_id [String] parent_id
+  def delete_record(index_name, type_name, id, parent_id = nil)
+    if record_exists?(index_name, type_name, id, parent_id)
+      @client.delete generate_options_hash(index_name, type_name, id, parent_id)
+    end
+  end
+  private
+  def generate_options_hash(name, type, id, parent_id)
+    options_hash = {
+      index: name,
+      type: type,
+      id: id
+    }
+    options_hash[:parent] = parent_id if parent_id
+    options_hash
+  end
+  def check_bulk_index_params(data)
+    raise "Please provide and array of documents" unless data.is_a?(Array)
+    count = data.size
+    raise "Record count should be less than #{Constants::MAX_BULK_INDEX_SIZE}" if count > Constants::MAX_BULK_INDEX_SIZE
+  end
+end

data/lib/token_query_builder.rb ADDED Viewed

@@ -0,0 +1,76 @@
+require 'elastic_search_query'
+class TokenQueryBuilder
+  # constructs match filter based on the field_name and analyzer provided
+  # @param analyzer [String] Type of analyzer to be used
+  # @param field_name [String] field to be searched
+  # @param query [String] input data
+  # @param opts [Hash] options to be provided for search (eg. {fuzziness: 0, prefix_length: 1, max_expansions: 20, operator: "and"})
+  # @return [Hash]
+  def self.construct_match_filter(analyzer, field_name, query, opts)
+    default_options = {fuzziness: 0, prefix_length: 1, max_expansions: 20, operator: "or"}
+    opts = default_options.merge(opts)
+    field = analyzer.present? ? "#{field_name}.#{analyzer}": "#{field_name}"
+    {
+      match: {
+        field => {
+          query: query,
+          operator: opts[:operator],
+          fuzziness: opts[:fuzziness],
+          prefix_length: opts[:prefix_length]
+        }
+      }
+    }
+  end
+  # wraps the query in the constant_score structure
+  # @param filters [Array] filters to be included
+  # @param  boost [Float] to influence the relevance score
+  # @return [Hash]
+  def self.wrap_constant_score_query(filters: [], boost: 0)
+   	{
+      constant_score: {
+        boost: boost,
+          filter: {
+            bool: {
+              should: filters
+        	}
+      	}
+    }
+  }
+  end
+  # constructs match filter based on keys array
+  # @param analyzer [String] Type of analyzer to be used
+  # @param  query [String] input data
+  # @param  opts [Hash] options to be provided for search (eg. {fuzziness: 0, prefix_length: 1, max_expansions: 20, operator: "and"})
+  # @param  keys [Array] fields to be queried
+  # @param  boost [Float] to influence the relevance score
+  # @return [Hash]
+  def self.cs_with_multiple_filter(analyzer, query, opts, keys = [], boost: 0)
+    filters = []
+    (keys.compact).each do |key|
+      filter = construct_match_filter(analyzer, key, query, opts)
+      filters.push(filter)
+    end
+    wrap_constant_score_query(filters: filters, boost: boost)
+  end
+  # constructs simgle term query
+  def self.cs_with_single_filter(query, key = "")
+    ElasticSearchQuery.get_term_filter_query key, query, true
+  end
+  # constructs a constant_score wrapped match query based on the analyzer provided
+  # @param analyzer [String] Type of analyzer to be used (Analyzer provided here must be defined in the index definition first)
+  # @param query [String] input data
+  # @param key [String] field to be searched
+  # @param alias_key [String] alias of the field to be searched
+  # @param boost [Float] to influence relevance score
+  # @param options [Hash] options to be provided for search (eg. {fuzziness: 0, prefix_length: 1, max_expansions: 20, operator: "and"})
+  # @return [Hash]
+  def self.constant_score_match_query(analyzer, query, key, alias_key = nil, boost = 1, options = {})
+    cs_with_multiple_filter(analyzer, query, options, [key, alias_key], boost: boost)
+  end
+end

metadata ADDED Viewed

@@ -0,0 +1,83 @@
+--- !ruby/object:Gem::Specification
+name: es_query_builder
+version: !ruby/object:Gem::Version
+  version: 1.0.0
+platform: ruby
+authors:
+- Mohib Yousuf
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2018-05-03 00:00:00.000000000 Z
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: rails
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '4.0'
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: 4.0.2
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '4.0'
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: 4.0.2
+- !ruby/object:Gem::Dependency
+  name: elasticsearch
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
+description:
+email: mohib.yousuf@hotmail.com
+executables: []
+extensions: []
+extra_rdoc_files: []
+files:
+- lib/constants.rb
+- lib/elastic_search_query.rb
+- lib/fetch_es_data.rb
+- lib/housing_es_query_builder.rb
+- lib/indexer.rb
+- lib/token_query_builder.rb
+homepage: https://github.com/elarahq/es.query.builder
+licenses:
+- MIT
+metadata: {}
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: '0'
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: '0'
+requirements: []
+rubyforge_project:
+rubygems_version: 2.6.12
+signing_key:
+specification_version: 4
+summary: For Building Elastic Search Queries
+test_files: []