caoutsearch 0.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (59) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +22 -0
  3. data/README.md +43 -0
  4. data/lib/caoutsearch/config/client.rb +13 -0
  5. data/lib/caoutsearch/config/mappings.rb +40 -0
  6. data/lib/caoutsearch/config/settings.rb +29 -0
  7. data/lib/caoutsearch/filter/base.rb +101 -0
  8. data/lib/caoutsearch/filter/boolean.rb +19 -0
  9. data/lib/caoutsearch/filter/date.rb +49 -0
  10. data/lib/caoutsearch/filter/default.rb +51 -0
  11. data/lib/caoutsearch/filter/geo_point.rb +11 -0
  12. data/lib/caoutsearch/filter/match.rb +57 -0
  13. data/lib/caoutsearch/filter/none.rb +7 -0
  14. data/lib/caoutsearch/filter/range.rb +28 -0
  15. data/lib/caoutsearch/filter.rb +29 -0
  16. data/lib/caoutsearch/index/base.rb +35 -0
  17. data/lib/caoutsearch/index/document.rb +107 -0
  18. data/lib/caoutsearch/index/indice.rb +55 -0
  19. data/lib/caoutsearch/index/indice_versions.rb +123 -0
  20. data/lib/caoutsearch/index/instrumentation.rb +19 -0
  21. data/lib/caoutsearch/index/internal_dsl.rb +77 -0
  22. data/lib/caoutsearch/index/naming.rb +29 -0
  23. data/lib/caoutsearch/index/reindex.rb +77 -0
  24. data/lib/caoutsearch/index/scoping.rb +54 -0
  25. data/lib/caoutsearch/index/serialization.rb +136 -0
  26. data/lib/caoutsearch/index.rb +7 -0
  27. data/lib/caoutsearch/instrumentation/base.rb +69 -0
  28. data/lib/caoutsearch/instrumentation/index.rb +57 -0
  29. data/lib/caoutsearch/instrumentation/search.rb +41 -0
  30. data/lib/caoutsearch/mappings.rb +79 -0
  31. data/lib/caoutsearch/search/base.rb +27 -0
  32. data/lib/caoutsearch/search/dsl/item.rb +42 -0
  33. data/lib/caoutsearch/search/query/base.rb +16 -0
  34. data/lib/caoutsearch/search/query/boolean.rb +63 -0
  35. data/lib/caoutsearch/search/query/cleaning.rb +29 -0
  36. data/lib/caoutsearch/search/query/getters.rb +35 -0
  37. data/lib/caoutsearch/search/query/merge.rb +27 -0
  38. data/lib/caoutsearch/search/query/nested.rb +23 -0
  39. data/lib/caoutsearch/search/query/setters.rb +68 -0
  40. data/lib/caoutsearch/search/sanitizer.rb +28 -0
  41. data/lib/caoutsearch/search/search/delete_methods.rb +21 -0
  42. data/lib/caoutsearch/search/search/inspect.rb +36 -0
  43. data/lib/caoutsearch/search/search/instrumentation.rb +21 -0
  44. data/lib/caoutsearch/search/search/internal_dsl.rb +77 -0
  45. data/lib/caoutsearch/search/search/naming.rb +47 -0
  46. data/lib/caoutsearch/search/search/query_builder.rb +94 -0
  47. data/lib/caoutsearch/search/search/query_methods.rb +180 -0
  48. data/lib/caoutsearch/search/search/resettable.rb +35 -0
  49. data/lib/caoutsearch/search/search/response.rb +88 -0
  50. data/lib/caoutsearch/search/search/scroll_methods.rb +113 -0
  51. data/lib/caoutsearch/search/search/search_methods.rb +230 -0
  52. data/lib/caoutsearch/search/type_cast.rb +76 -0
  53. data/lib/caoutsearch/search/value.rb +111 -0
  54. data/lib/caoutsearch/search/value_overflow.rb +17 -0
  55. data/lib/caoutsearch/search.rb +6 -0
  56. data/lib/caoutsearch/settings.rb +22 -0
  57. data/lib/caoutsearch/version.rb +5 -0
  58. data/lib/caoutsearch.rb +38 -0
  59. metadata +268 -0
@@ -0,0 +1,55 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Caoutsearch
4
+ module Index
5
+ module Indice
6
+ extend ActiveSupport::Concern
7
+
8
+ included do
9
+ delegate :refresh_indice, to: :class
10
+ end
11
+
12
+ class_methods do
13
+ # Create index or an alias
14
+ #
15
+ # ArticleIndex.create_indice
16
+ #
17
+ def create_indice
18
+ client.indices.create(
19
+ index: index_name,
20
+ body: {
21
+ settings: settings.as_json,
22
+ mappings: mappings.as_json
23
+ }
24
+ )
25
+ end
26
+
27
+ # Verify index existence
28
+ #
29
+ # ArticleIndex.indice_exists?
30
+ # => true
31
+ #
32
+ def indice_exists?
33
+ client.indices.exists?(index: index_name)
34
+ end
35
+
36
+ # Verify index existence
37
+ #
38
+ # ArticleIndex.delete_indice
39
+ #
40
+ def delete_indice
41
+ client.indices.delete(index: index_name)
42
+ end
43
+
44
+ # Explicitly refresh one or more index, making all operations performed
45
+ # since the last refresh available for search.
46
+ #
47
+ # ArticleIndex.refresh_indice
48
+ #
49
+ def refresh_indice
50
+ client.indices.refresh(index: index_name)
51
+ end
52
+ end
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,123 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Caoutsearch
4
+ module Index
5
+ module IndiceVersions
6
+ extend ActiveSupport::Concern
7
+
8
+ class_methods do
9
+ # List versions
10
+ #
11
+ # ArticleIndex.indice_versions
12
+ # => ["articles_v0", "articles_v1"]
13
+ #
14
+ def indice_versions
15
+ client.cat
16
+ .indices(h: ["index"], format: :json)
17
+ .filter_map { |h| h["index"] }
18
+ .grep(indice_version_regexp)
19
+ end
20
+
21
+ # List aliased versions
22
+ #
23
+ # ArticleIndex.aliased_indice_versions
24
+ # => ["articles_v1"]
25
+ #
26
+ def aliased_indice_versions
27
+ client.indices
28
+ .get_alias(name: index_name)
29
+ .keys
30
+ .grep(indice_version_regexp)
31
+ rescue Elastic::Transport::Transport::Errors::NotFound
32
+ []
33
+ end
34
+
35
+ # List last version available (aliased or not)
36
+ #
37
+ # ArticleIndex.last_indice_version
38
+ # => "articles_v1"
39
+ #
40
+ def last_indice_version
41
+ indice_versions.max
42
+ end
43
+
44
+ # Next version to create
45
+ #
46
+ # ArticleIndex.next_indice_version
47
+ # => "articles_v2"
48
+ #
49
+ def next_indice_version
50
+ current_version = last_indice_version
51
+
52
+ if current_version
53
+ number = current_version[/_v(\d+)$/, 1].to_i
54
+ "#{index_name}_v#{number + 1}"
55
+ else
56
+ "#{index_name}_v0"
57
+ end
58
+ end
59
+
60
+ # Create new version of the index
61
+ #
62
+ # ArticleIndex.create_indice_version
63
+ # => "articles_v2"
64
+ #
65
+ def create_indice_version(version_name = next_indice_version)
66
+ client.indices.create(
67
+ index: version_name,
68
+ body: {
69
+ settings: settings.as_json,
70
+ mappings: mappings.as_json
71
+ }
72
+ )
73
+
74
+ version_name
75
+ end
76
+
77
+ # Switch the index to a new version
78
+ #
79
+ # ArticleIndex.switch_indice_version("articles_v2")
80
+ # => true
81
+ #
82
+ def switch_indice_version(version_name = :__last__)
83
+ version_name = last_indice_version if version_name == :__last__
84
+
85
+ actions = []
86
+ actions << { add: { index: version_name, alias: index_name } }
87
+
88
+ aliased_indice_versions.each do |alias_name|
89
+ return false if alias_name == version_name
90
+ actions << { remove: { index: alias_name, alias: index_name } }
91
+ end
92
+
93
+ client.indices.update_aliases(body: { actions: actions })
94
+ refresh_indice
95
+ version_name
96
+ end
97
+
98
+ # Prune not-aliased versions
99
+ #
100
+ def prune_indice_versions
101
+ old_versions = indice_versions - aliased_indice_versions
102
+ old_versions.each do |version_name|
103
+ client.indices.delete(index: version_name)
104
+ end
105
+ end
106
+
107
+ # Delete all versions, alias or not
108
+ #
109
+ def delete_all_indice_versions
110
+ indice_versions.each do |version_name|
111
+ client.indices.delete(index: version_name)
112
+ end
113
+ end
114
+
115
+ private
116
+
117
+ def indice_version_regexp
118
+ @version_regexp ||= /^#{index_name}_(v\d+)$/
119
+ end
120
+ end
121
+ end
122
+ end
123
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Caoutsearch
4
+ module Index
5
+ module Instrumentation
6
+ extend ActiveSupport::Concern
7
+
8
+ def instrument(action, **options, &block)
9
+ ActiveSupport::Notifications.instrument("#{action}.caoutsearch_index", **options, klass: self.class.to_s, &block)
10
+ end
11
+
12
+ class_methods do
13
+ def instrument(action, **options, &block)
14
+ ActiveSupport::Notifications.instrument("#{action}.caoutsearch_index", **options, klass: to_s, &block)
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,77 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Caoutsearch
4
+ module Index
5
+ module InternalDSL
6
+ extend ActiveSupport::Concern
7
+
8
+ included do
9
+ # Be careful with these class attributes
10
+ # Always use `+=` or `.dup.merge` to assign a new copy
11
+ #
12
+ class_attribute :properties, instance_writer: false, default: []
13
+ class_attribute :partial_reindexations, instance_writer: false, default: {}
14
+ end
15
+
16
+ class_methods do
17
+ # Declare a property
18
+ #
19
+ # class ArticleIndex < Caoutsearch::Index::Base
20
+ # property :title
21
+ # property :description
22
+ # property :tags
23
+ #
24
+ # def tags
25
+ # record.tags.map do |tag|
26
+ # {
27
+ # label: tag.label,
28
+ # score: tag.score
29
+ # }
30
+ # end
31
+ # end
32
+ # end
33
+ #
34
+ def property(key, body = nil)
35
+ raise ArgumentError, "The property body needs to be callable." if body && !body.respond_to?(:call)
36
+
37
+ key = key.to_s
38
+ self.properties += [key] unless properties.include?(key)
39
+
40
+ define_method(key, &body) if body
41
+ end
42
+
43
+ # Declare an alias-property for partial reindexation
44
+ #
45
+ # class LocalIndex < Caoutsearch::Index::Base
46
+ # property :invariant
47
+ # property :geoaddress
48
+ # property :geoposition
49
+ #
50
+ # allow_partial_reindex :post_processed_data, properties: %i[geoaddress occupation]
51
+ # end
52
+ #
53
+ def allow_partial_reindex(name, body = nil, properties: nil, upsert: false)
54
+ raise ArgumentError, "The allow_partial_reindex body needs to be callable." if body && !body.respond_to?(:call)
55
+
56
+ name = name.to_s
57
+ self.partial_reindexations = partial_reindexations.dup.merge(name => { properties: properties })
58
+
59
+ if body
60
+ define_method(name, &body)
61
+ else
62
+ define_method(name) do
63
+ body = { doc: properties.index_with { |key| send(key) } }
64
+ body[:doc_as_upsert] = true if upsert
65
+ body
66
+ end
67
+ end
68
+ end
69
+
70
+ def allow_reindex?(subject)
71
+ subject = subject.to_s
72
+ properties.include?(subject) || partial_reindexations.include?(subject)
73
+ end
74
+ end
75
+ end
76
+ end
77
+ end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Caoutsearch
4
+ module Index
5
+ module Naming
6
+ extend ActiveSupport::Concern
7
+
8
+ included do
9
+ delegate :index_name, to: :class
10
+ end
11
+
12
+ class_methods do
13
+ def index_name
14
+ @index_name ||= default_index_name
15
+ end
16
+
17
+ def index_name=(name)
18
+ @index_name = name
19
+ end
20
+
21
+ private
22
+
23
+ def default_index_name
24
+ name.gsub(/Index$/, "").tableize.tr("/", "_")
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,77 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Caoutsearch
4
+ module Index
5
+ module Reindex
6
+ extend ActiveSupport::Concern
7
+
8
+ class_methods do
9
+ # Reindex multiple records with bulk API
10
+ #
11
+ # Examples:
12
+ # ArticleIndex.reindex(Article.all)
13
+ # ArticleIndex.reindex(Article.modified_since(2.days), :title, :content)
14
+ #
15
+ # Examples with options:
16
+ #
17
+ # articles = Article.modified_since(2.days)
18
+ #
19
+ # ArticleIndex.reindex(articles, index: "article_v3")
20
+ # ArticleIndex.reindex(articles, batch_size: 10)
21
+ # ArticleIndex.reindex(articles, method: :update)
22
+ # ArticleIndex.reindex(articles, method: :update)
23
+ #
24
+ # When passing a limited set of records (< 100), you can pass a `limited_set` option to avoid
25
+ # extra queries
26
+ # ArticleIndex.reindex(Article.limit(100), limited_set: true)
27
+ #
28
+ def reindex(records, *keys, **options)
29
+ options.assert_valid_keys(:index, :refresh, :batch_size, :method, :total, :progress)
30
+ keys.flatten!
31
+
32
+ records = apply_scopes(records, keys)
33
+ records = records.strict_loading
34
+
35
+ index = options.fetch(:index, index_name)
36
+ refresh = options.fetch(:refresh, false)
37
+ method = options.fetch(:method) { keys.present? ? :update : :index }
38
+ batch_size = options[:batch_size] || 100
39
+ total = options[:total] || records.count(:all)
40
+ progress = options[:progress]
41
+ current_progress = 0
42
+
43
+ return if total.zero?
44
+
45
+ progress&.total = total
46
+ progress&.progress = current_progress
47
+
48
+ finder = if total <= batch_size
49
+ records.to_a.each_slice(total)
50
+ else
51
+ records.find_in_batches(batch_size: batch_size)
52
+ end
53
+
54
+ finder.each do |batch|
55
+ current_progress += batch.size
56
+ request_payload = {
57
+ index: index,
58
+ body: bulkify(batch, method, keys)
59
+ }
60
+
61
+ instrument(:reindex, total: total, progress: current_progress, records: batch) do |event_payload|
62
+ event_payload[:request] = request_payload
63
+ event_payload[:response] = client.bulk(request_payload)
64
+ end
65
+
66
+ progress&.increment(batch.size)
67
+ records.connection.clear_query_cache
68
+ end
69
+
70
+ refresh_indice(index: index) if refresh
71
+ end
72
+
73
+ alias_method :update_documents, :reindex
74
+ end
75
+ end
76
+ end
77
+ end
@@ -0,0 +1,54 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Caoutsearch
4
+ module Index
5
+ module Scoping
6
+ extend ActiveSupport::Concern
7
+
8
+ included do
9
+ class_attribute :scopes, instance_accessor: false, default: {}
10
+ class_attribute :preloads, instance_accessor: false, default: {}
11
+ end
12
+
13
+ class_methods do
14
+ def default_scope(body)
15
+ scope :_default, body
16
+ end
17
+
18
+ def scope(name, body)
19
+ raise ArgumentError, "The scope body needs to be callable." unless body.respond_to?(:call)
20
+
21
+ name = name.to_s
22
+ self.scopes = scopes.dup.merge(name => body)
23
+ end
24
+
25
+ def preload(name, with: nil)
26
+ name = name.to_s
27
+ with = Array.wrap(with || name)
28
+
29
+ scope name, ->(records) { records.preload(*with) }
30
+ end
31
+
32
+ private
33
+
34
+ def apply_scopes(records, names = [])
35
+ names = names.map(&:to_s)
36
+ names = properties if names.empty?
37
+ names += %w[_default] # Use += instead of << to create a copy
38
+
39
+ names.each do |name|
40
+ if scopes.include?[name]
41
+ scope = scopes[name]
42
+ records = scope.call(records)
43
+ elsif partial_reindexations.include?(name)
44
+ properties = partial_reindexations.dig(name, :properties)
45
+ records = apply_scopes(records, properties) if properties&.any?
46
+ end
47
+ end
48
+
49
+ records
50
+ end
51
+ end
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,136 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Caoutsearch
4
+ module Index
5
+ module Serialization
6
+ extend ActiveSupport::Concern
7
+
8
+ included do
9
+ delegate :to_json, to: :as_json
10
+ end
11
+
12
+ class_methods do
13
+ # Transform record or array of records to JSON:
14
+ #
15
+ # transform(nil) => nil
16
+ # transform(record.first) => { ... }
17
+ # transform(record.limit(100)) => [{ ... }, { ... }, ...]
18
+ #
19
+ def transform(input, *keys)
20
+ if input.nil?
21
+ nil
22
+ elsif input.respond_to?(:map)
23
+ input.map { |record| transform(record, *keys) }
24
+ else
25
+ new(input).as_json(*keys)
26
+ end
27
+ end
28
+
29
+ # Convert an array of records to an Elasticsearch bulk payload
30
+ #
31
+ def bulkify(input, method, keys)
32
+ raise ArgumentError, "unknown method #{method}" unless %i[index update delete].include?(method)
33
+
34
+ input.reduce([]) do |payload, record|
35
+ payload + new(record).bulkify(method, keys)
36
+ end
37
+ end
38
+ end
39
+
40
+ # Serialize the object payload
41
+ #
42
+ def as_json(*keys)
43
+ keys = keys.map(&:to_s)
44
+ _, partial_keys = analyze_keys(keys)
45
+
46
+ raise SerializationError, format_keys("cannot serializer the following keys together: %{keys}", partial_keys) if keys.size > 1 && partial_keys.any?
47
+
48
+ json = {}
49
+ keys = properties if keys.empty?
50
+ keys.each do |key|
51
+ result = send(key)
52
+
53
+ if partial_reindexations.include?(key)
54
+ json = json.merge(result)
55
+ else
56
+ json[key.to_sym] = result
57
+ end
58
+ end
59
+
60
+ simplify(json)
61
+ end
62
+
63
+ # Recursive objects simplication:
64
+ #
65
+ # [nil, 'A', 'A', 'B'] => ['A', 'B']
66
+ # [nil, 'A', 'A'] => 'A'
67
+ # [nil, nil] => nil
68
+ # [] => nil
69
+ #
70
+ # { key: [nil, 'A', 'A', 'B'] } => { key: ['A', 'B'] }
71
+ # { key: [nil, 'A', 'A'] } => { key: 'A' }
72
+ # { key: [nil, nil] } => { key: nil }
73
+ # { key: [] } => { key: nil }
74
+ # { } => { }
75
+ #
76
+ def simplify(object)
77
+ case object
78
+ when Array
79
+ object = object.filter_map { |array_item| simplify(array_item) }.uniq
80
+ object = object[0] if object.size <= 1
81
+ when Hash
82
+ object.each { |key, value| object[key] = simplify(value) }
83
+ end
84
+
85
+ object
86
+ end
87
+
88
+ # Convert the object Elasticsearch `header\ndata` payload format
89
+ #
90
+ def bulkify(method, keys)
91
+ raise ArgumentError, "unknown method #{method}" unless %i[index update delete].include?(method)
92
+
93
+ keys = keys.map(&:to_s)
94
+ payload = []
95
+ property_keys, partial_keys = analyze_keys(keys)
96
+
97
+ case method
98
+ when :index
99
+ raise SerializationError, format("cannot serialize the following keys: %{keys}", keys: partial_keys.to_sentence) if partial_keys.any?
100
+
101
+ payload << { index: { _id: record.id } }
102
+ payload << as_json(*keys)
103
+
104
+ when :update
105
+ if property_keys.any?
106
+ payload << { update: { _id: record.id } }
107
+ payload << { doc: as_json(*property_keys) }
108
+ end
109
+
110
+ partial_keys.each do |key|
111
+ payload << { update: { _id: record.id } }
112
+ payload << as_json(*key)
113
+ end
114
+
115
+ when :delete
116
+ payload << { update: { _id: record.id } }
117
+ end
118
+
119
+ payload
120
+ end
121
+
122
+ private
123
+
124
+ def analyze_keys(keys)
125
+ partial_keys = partial_reindexations.keys & keys
126
+ property_keys = properties & keys
127
+ unknown_keys = keys - property_keys - partial_keys
128
+
129
+ raise ArgumentError, format("unknown keys: %{keys}", keys: unknown_keys.to_sentence) if unknown_keys.any?
130
+
131
+ property_keys = properties & keys
132
+ [property_keys, partial_keys]
133
+ end
134
+ end
135
+ end
136
+ end
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Caoutsearch
4
+ module Index
5
+ SerializationError = Class.new(StandardError)
6
+ end
7
+ end
@@ -0,0 +1,69 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Caoutsearch
4
+ module Instrumentation
5
+ class Base < ActiveSupport::LogSubscriber
6
+ private
7
+
8
+ def log_request(subject, event, format: nil)
9
+ return unless format
10
+
11
+ payload = event.payload
12
+ request = payload[:request]
13
+
14
+ debug do
15
+ title = color("#{payload[:klass]} #{subject}", GREEN, true)
16
+ request_body = format_request_body(request, format: format)
17
+
18
+ message = " #{title} #{request_body}"
19
+ message = yield(message, payload) if block_given?
20
+ message
21
+ end
22
+ end
23
+
24
+ def log_response(subject, event, warn_errors: false)
25
+ payload = event.payload
26
+ response = payload[:response]
27
+ return unless response
28
+
29
+ debug do
30
+ title = color("#{payload[:klass]} #{subject}", GREEN, true)
31
+
32
+ duration = "#{event.duration.round(1)}ms"
33
+ duration += " / took #{response["took"]}ms" if response.key?("took")
34
+ duration = color("(#{duration})", GREEN, true)
35
+
36
+ message = " #{title} #{duration}"
37
+ message += " got errors" if response["errors"]
38
+ message = yield(message, payload) if block_given?
39
+
40
+ message
41
+ end
42
+
43
+ return unless response["errors"] && warn_errors
44
+
45
+ errors = response["items"].select { |k, _| k.values.first["error"] }
46
+ errors.each do |error|
47
+ warn { color(error, RED, true) }
48
+ end
49
+ end
50
+
51
+ def format_request_body(body, format: nil)
52
+ case format
53
+ when "amazing_print", "awesome_print"
54
+ body.ai(limit: true, index: false)
55
+ when "full"
56
+ json = JSON.dump(body)
57
+ color(json, BLUE, true)
58
+ when "truncated"
59
+ json = JSON.dump(body).truncate(200, omission: "…}")
60
+ color(json, BLUE, true)
61
+ end
62
+ end
63
+
64
+ def inspect_json_size(json)
65
+ ApplicationController.helpers.number_to_human_size(JSON.dump(json).bytesize)
66
+ end
67
+ end
68
+ end
69
+ end