caoutsearch 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +22 -0
  3. data/README.md +43 -0
  4. data/lib/caoutsearch/config/client.rb +13 -0
  5. data/lib/caoutsearch/config/mappings.rb +40 -0
  6. data/lib/caoutsearch/config/settings.rb +29 -0
  7. data/lib/caoutsearch/filter/base.rb +101 -0
  8. data/lib/caoutsearch/filter/boolean.rb +19 -0
  9. data/lib/caoutsearch/filter/date.rb +49 -0
  10. data/lib/caoutsearch/filter/default.rb +51 -0
  11. data/lib/caoutsearch/filter/geo_point.rb +11 -0
  12. data/lib/caoutsearch/filter/match.rb +57 -0
  13. data/lib/caoutsearch/filter/none.rb +7 -0
  14. data/lib/caoutsearch/filter/range.rb +28 -0
  15. data/lib/caoutsearch/filter.rb +29 -0
  16. data/lib/caoutsearch/index/base.rb +35 -0
  17. data/lib/caoutsearch/index/document.rb +107 -0
  18. data/lib/caoutsearch/index/indice.rb +55 -0
  19. data/lib/caoutsearch/index/indice_versions.rb +123 -0
  20. data/lib/caoutsearch/index/instrumentation.rb +19 -0
  21. data/lib/caoutsearch/index/internal_dsl.rb +77 -0
  22. data/lib/caoutsearch/index/naming.rb +29 -0
  23. data/lib/caoutsearch/index/reindex.rb +77 -0
  24. data/lib/caoutsearch/index/scoping.rb +54 -0
  25. data/lib/caoutsearch/index/serialization.rb +136 -0
  26. data/lib/caoutsearch/index.rb +7 -0
  27. data/lib/caoutsearch/instrumentation/base.rb +69 -0
  28. data/lib/caoutsearch/instrumentation/index.rb +57 -0
  29. data/lib/caoutsearch/instrumentation/search.rb +41 -0
  30. data/lib/caoutsearch/mappings.rb +79 -0
  31. data/lib/caoutsearch/search/base.rb +27 -0
  32. data/lib/caoutsearch/search/dsl/item.rb +42 -0
  33. data/lib/caoutsearch/search/query/base.rb +16 -0
  34. data/lib/caoutsearch/search/query/boolean.rb +63 -0
  35. data/lib/caoutsearch/search/query/cleaning.rb +29 -0
  36. data/lib/caoutsearch/search/query/getters.rb +35 -0
  37. data/lib/caoutsearch/search/query/merge.rb +27 -0
  38. data/lib/caoutsearch/search/query/nested.rb +23 -0
  39. data/lib/caoutsearch/search/query/setters.rb +68 -0
  40. data/lib/caoutsearch/search/sanitizer.rb +28 -0
  41. data/lib/caoutsearch/search/search/delete_methods.rb +21 -0
  42. data/lib/caoutsearch/search/search/inspect.rb +36 -0
  43. data/lib/caoutsearch/search/search/instrumentation.rb +21 -0
  44. data/lib/caoutsearch/search/search/internal_dsl.rb +77 -0
  45. data/lib/caoutsearch/search/search/naming.rb +47 -0
  46. data/lib/caoutsearch/search/search/query_builder.rb +94 -0
  47. data/lib/caoutsearch/search/search/query_methods.rb +180 -0
  48. data/lib/caoutsearch/search/search/resettable.rb +35 -0
  49. data/lib/caoutsearch/search/search/response.rb +88 -0
  50. data/lib/caoutsearch/search/search/scroll_methods.rb +113 -0
  51. data/lib/caoutsearch/search/search/search_methods.rb +230 -0
  52. data/lib/caoutsearch/search/type_cast.rb +76 -0
  53. data/lib/caoutsearch/search/value.rb +111 -0
  54. data/lib/caoutsearch/search/value_overflow.rb +17 -0
  55. data/lib/caoutsearch/search.rb +6 -0
  56. data/lib/caoutsearch/settings.rb +22 -0
  57. data/lib/caoutsearch/version.rb +5 -0
  58. data/lib/caoutsearch.rb +38 -0
  59. metadata +268 -0
@@ -0,0 +1,55 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Caoutsearch
4
+ module Index
5
+ module Indice
6
+ extend ActiveSupport::Concern
7
+
8
+ included do
9
+ delegate :refresh_indice, to: :class
10
+ end
11
+
12
+ class_methods do
13
+ # Create index or an alias
14
+ #
15
+ # ArticleIndex.create_indice
16
+ #
17
+ def create_indice
18
+ client.indices.create(
19
+ index: index_name,
20
+ body: {
21
+ settings: settings.as_json,
22
+ mappings: mappings.as_json
23
+ }
24
+ )
25
+ end
26
+
27
+ # Verify index existence
28
+ #
29
+ # ArticleIndex.indice_exists?
30
+ # => true
31
+ #
32
+ def indice_exists?
33
+ client.indices.exists?(index: index_name)
34
+ end
35
+
36
+ # Verify index existence
37
+ #
38
+ # ArticleIndex.delete_indice
39
+ #
40
+ def delete_indice
41
+ client.indices.delete(index: index_name)
42
+ end
43
+
44
+ # Explicitly refresh one or more index, making all operations performed
45
+ # since the last refresh available for search.
46
+ #
47
+ # ArticleIndex.refresh_indice
48
+ #
49
+ def refresh_indice
50
+ client.indices.refresh(index: index_name)
51
+ end
52
+ end
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,123 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Caoutsearch
4
+ module Index
5
+ module IndiceVersions
6
+ extend ActiveSupport::Concern
7
+
8
+ class_methods do
9
+ # List versions
10
+ #
11
+ # ArticleIndex.indice_versions
12
+ # => ["articles_v0", "articles_v1"]
13
+ #
14
+ def indice_versions
15
+ client.cat
16
+ .indices(h: ["index"], format: :json)
17
+ .filter_map { |h| h["index"] }
18
+ .grep(indice_version_regexp)
19
+ end
20
+
21
+ # List aliased versions
22
+ #
23
+ # ArticleIndex.aliased_indice_versions
24
+ # => ["articles_v1"]
25
+ #
26
+ def aliased_indice_versions
27
+ client.indices
28
+ .get_alias(name: index_name)
29
+ .keys
30
+ .grep(indice_version_regexp)
31
+ rescue Elastic::Transport::Transport::Errors::NotFound
32
+ []
33
+ end
34
+
35
+ # List last version available (aliased or not)
36
+ #
37
+ # ArticleIndex.last_indice_version
38
+ # => "articles_v1"
39
+ #
40
+ def last_indice_version
41
+ indice_versions.max
42
+ end
43
+
44
+ # Next version to create
45
+ #
46
+ # ArticleIndex.next_indice_version
47
+ # => "articles_v2"
48
+ #
49
+ def next_indice_version
50
+ current_version = last_indice_version
51
+
52
+ if current_version
53
+ number = current_version[/_v(\d+)$/, 1].to_i
54
+ "#{index_name}_v#{number + 1}"
55
+ else
56
+ "#{index_name}_v0"
57
+ end
58
+ end
59
+
60
+ # Create new version of the index
61
+ #
62
+ # ArticleIndex.create_indice_version
63
+ # => "articles_v2"
64
+ #
65
+ def create_indice_version(version_name = next_indice_version)
66
+ client.indices.create(
67
+ index: version_name,
68
+ body: {
69
+ settings: settings.as_json,
70
+ mappings: mappings.as_json
71
+ }
72
+ )
73
+
74
+ version_name
75
+ end
76
+
77
+ # Switch the index to a new version
78
+ #
79
+ # ArticleIndex.switch_indice_version("articles_v2")
80
+ # => true
81
+ #
82
+ def switch_indice_version(version_name = :__last__)
83
+ version_name = last_indice_version if version_name == :__last__
84
+
85
+ actions = []
86
+ actions << { add: { index: version_name, alias: index_name } }
87
+
88
+ aliased_indice_versions.each do |alias_name|
89
+ return false if alias_name == version_name
90
+ actions << { remove: { index: alias_name, alias: index_name } }
91
+ end
92
+
93
+ client.indices.update_aliases(body: { actions: actions })
94
+ refresh_indice
95
+ version_name
96
+ end
97
+
98
+ # Prune not-aliased versions
99
+ #
100
+ def prune_indice_versions
101
+ old_versions = indice_versions - aliased_indice_versions
102
+ old_versions.each do |version_name|
103
+ client.indices.delete(index: version_name)
104
+ end
105
+ end
106
+
107
+ # Delete all versions, alias or not
108
+ #
109
+ def delete_all_indice_versions
110
+ indice_versions.each do |version_name|
111
+ client.indices.delete(index: version_name)
112
+ end
113
+ end
114
+
115
+ private
116
+
117
+ def indice_version_regexp
118
+ @version_regexp ||= /^#{index_name}_(v\d+)$/
119
+ end
120
+ end
121
+ end
122
+ end
123
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Caoutsearch
4
+ module Index
5
+ module Instrumentation
6
+ extend ActiveSupport::Concern
7
+
8
+ def instrument(action, **options, &block)
9
+ ActiveSupport::Notifications.instrument("#{action}.caoutsearch_index", **options, klass: self.class.to_s, &block)
10
+ end
11
+
12
+ class_methods do
13
+ def instrument(action, **options, &block)
14
+ ActiveSupport::Notifications.instrument("#{action}.caoutsearch_index", **options, klass: to_s, &block)
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,77 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Caoutsearch
4
+ module Index
5
+ module InternalDSL
6
+ extend ActiveSupport::Concern
7
+
8
+ included do
9
+ # Be careful with these class attributes
10
+ # Always use `+=` or `.dup.merge` to assign a new copy
11
+ #
12
+ class_attribute :properties, instance_writer: false, default: []
13
+ class_attribute :partial_reindexations, instance_writer: false, default: {}
14
+ end
15
+
16
+ class_methods do
17
+ # Declare a property
18
+ #
19
+ # class ArticleIndex < Caoutsearch::Index::Base
20
+ # property :title
21
+ # property :description
22
+ # property :tags
23
+ #
24
+ # def tags
25
+ # record.tags.map do |tag|
26
+ # {
27
+ # label: tag.label,
28
+ # score: tag.score
29
+ # }
30
+ # end
31
+ # end
32
+ # end
33
+ #
34
+ def property(key, body = nil)
35
+ raise ArgumentError, "The property body needs to be callable." if body && !body.respond_to?(:call)
36
+
37
+ key = key.to_s
38
+ self.properties += [key] unless properties.include?(key)
39
+
40
+ define_method(key, &body) if body
41
+ end
42
+
43
+ # Declare an alias-property for partial reindexation
44
+ #
45
+ # class LocalIndex < Caoutsearch::Index::Base
46
+ # property :invariant
47
+ # property :geoaddress
48
+ # property :geoposition
49
+ #
50
+ # allow_partial_reindex :post_processed_data, properties: %i[geoaddress occupation]
51
+ # end
52
+ #
53
+ def allow_partial_reindex(name, body = nil, properties: nil, upsert: false)
54
+ raise ArgumentError, "The allow_partial_reindex body needs to be callable." if body && !body.respond_to?(:call)
55
+
56
+ name = name.to_s
57
+ self.partial_reindexations = partial_reindexations.dup.merge(name => { properties: properties })
58
+
59
+ if body
60
+ define_method(name, &body)
61
+ else
62
+ define_method(name) do
63
+ body = { doc: properties.index_with { |key| send(key) } }
64
+ body[:doc_as_upsert] = true if upsert
65
+ body
66
+ end
67
+ end
68
+ end
69
+
70
+ def allow_reindex?(subject)
71
+ subject = subject.to_s
72
+ properties.include?(subject) || partial_reindexations.include?(subject)
73
+ end
74
+ end
75
+ end
76
+ end
77
+ end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Caoutsearch
4
+ module Index
5
+ module Naming
6
+ extend ActiveSupport::Concern
7
+
8
+ included do
9
+ delegate :index_name, to: :class
10
+ end
11
+
12
+ class_methods do
13
+ def index_name
14
+ @index_name ||= default_index_name
15
+ end
16
+
17
+ def index_name=(name)
18
+ @index_name = name
19
+ end
20
+
21
+ private
22
+
23
+ def default_index_name
24
+ name.gsub(/Index$/, "").tableize.tr("/", "_")
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,77 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Caoutsearch
4
+ module Index
5
+ module Reindex
6
+ extend ActiveSupport::Concern
7
+
8
+ class_methods do
9
+ # Reindex multiple records with bulk API
10
+ #
11
+ # Examples:
12
+ # ArticleIndex.reindex(Article.all)
13
+ # ArticleIndex.reindex(Article.modified_since(2.days), :title, :content)
14
+ #
15
+ # Examples with options:
16
+ #
17
+ # articles = Article.modified_since(2.days)
18
+ #
19
+ # ArticleIndex.reindex(articles, index: "article_v3")
20
+ # ArticleIndex.reindex(articles, batch_size: 10)
21
+ # ArticleIndex.reindex(articles, method: :update)
22
+ # ArticleIndex.reindex(articles, method: :update)
23
+ #
24
+ # When passing a limited set of records (< 100), you can pass a `limited_set` option to avoid
25
+ # extra queries
26
+ # ArticleIndex.reindex(Article.limit(100), limited_set: true)
27
+ #
28
+ def reindex(records, *keys, **options)
29
+ options.assert_valid_keys(:index, :refresh, :batch_size, :method, :total, :progress)
30
+ keys.flatten!
31
+
32
+ records = apply_scopes(records, keys)
33
+ records = records.strict_loading
34
+
35
+ index = options.fetch(:index, index_name)
36
+ refresh = options.fetch(:refresh, false)
37
+ method = options.fetch(:method) { keys.present? ? :update : :index }
38
+ batch_size = options[:batch_size] || 100
39
+ total = options[:total] || records.count(:all)
40
+ progress = options[:progress]
41
+ current_progress = 0
42
+
43
+ return if total.zero?
44
+
45
+ progress&.total = total
46
+ progress&.progress = current_progress
47
+
48
+ finder = if total <= batch_size
49
+ records.to_a.each_slice(total)
50
+ else
51
+ records.find_in_batches(batch_size: batch_size)
52
+ end
53
+
54
+ finder.each do |batch|
55
+ current_progress += batch.size
56
+ request_payload = {
57
+ index: index,
58
+ body: bulkify(batch, method, keys)
59
+ }
60
+
61
+ instrument(:reindex, total: total, progress: current_progress, records: batch) do |event_payload|
62
+ event_payload[:request] = request_payload
63
+ event_payload[:response] = client.bulk(request_payload)
64
+ end
65
+
66
+ progress&.increment(batch.size)
67
+ records.connection.clear_query_cache
68
+ end
69
+
70
+ refresh_indice(index: index) if refresh
71
+ end
72
+
73
+ alias_method :update_documents, :reindex
74
+ end
75
+ end
76
+ end
77
+ end
@@ -0,0 +1,54 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Caoutsearch
4
+ module Index
5
+ module Scoping
6
+ extend ActiveSupport::Concern
7
+
8
+ included do
9
+ class_attribute :scopes, instance_accessor: false, default: {}
10
+ class_attribute :preloads, instance_accessor: false, default: {}
11
+ end
12
+
13
+ class_methods do
14
+ def default_scope(body)
15
+ scope :_default, body
16
+ end
17
+
18
+ def scope(name, body)
19
+ raise ArgumentError, "The scope body needs to be callable." unless body.respond_to?(:call)
20
+
21
+ name = name.to_s
22
+ self.scopes = scopes.dup.merge(name => body)
23
+ end
24
+
25
+ def preload(name, with: nil)
26
+ name = name.to_s
27
+ with = Array.wrap(with || name)
28
+
29
+ scope name, ->(records) { records.preload(*with) }
30
+ end
31
+
32
+ private
33
+
34
+ def apply_scopes(records, names = [])
35
+ names = names.map(&:to_s)
36
+ names = properties if names.empty?
37
+ names += %w[_default] # Use += instead of << to create a copy
38
+
39
+ names.each do |name|
40
+ if scopes.include?[name]
41
+ scope = scopes[name]
42
+ records = scope.call(records)
43
+ elsif partial_reindexations.include?(name)
44
+ properties = partial_reindexations.dig(name, :properties)
45
+ records = apply_scopes(records, properties) if properties&.any?
46
+ end
47
+ end
48
+
49
+ records
50
+ end
51
+ end
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,136 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Caoutsearch
4
+ module Index
5
+ module Serialization
6
+ extend ActiveSupport::Concern
7
+
8
+ included do
9
+ delegate :to_json, to: :as_json
10
+ end
11
+
12
+ class_methods do
13
+ # Transform record or array of records to JSON:
14
+ #
15
+ # transform(nil) => nil
16
+ # transform(record.first) => { ... }
17
+ # transform(record.limit(100)) => [{ ... }, { ... }, ...]
18
+ #
19
+ def transform(input, *keys)
20
+ if input.nil?
21
+ nil
22
+ elsif input.respond_to?(:map)
23
+ input.map { |record| transform(record, *keys) }
24
+ else
25
+ new(input).as_json(*keys)
26
+ end
27
+ end
28
+
29
+ # Convert an array of records to an Elasticsearch bulk payload
30
+ #
31
+ def bulkify(input, method, keys)
32
+ raise ArgumentError, "unknown method #{method}" unless %i[index update delete].include?(method)
33
+
34
+ input.reduce([]) do |payload, record|
35
+ payload + new(record).bulkify(method, keys)
36
+ end
37
+ end
38
+ end
39
+
40
+ # Serialize the object payload
41
+ #
42
+ def as_json(*keys)
43
+ keys = keys.map(&:to_s)
44
+ _, partial_keys = analyze_keys(keys)
45
+
46
+ raise SerializationError, format_keys("cannot serializer the following keys together: %{keys}", partial_keys) if keys.size > 1 && partial_keys.any?
47
+
48
+ json = {}
49
+ keys = properties if keys.empty?
50
+ keys.each do |key|
51
+ result = send(key)
52
+
53
+ if partial_reindexations.include?(key)
54
+ json = json.merge(result)
55
+ else
56
+ json[key.to_sym] = result
57
+ end
58
+ end
59
+
60
+ simplify(json)
61
+ end
62
+
63
+ # Recursive objects simplication:
64
+ #
65
+ # [nil, 'A', 'A', 'B'] => ['A', 'B']
66
+ # [nil, 'A', 'A'] => 'A'
67
+ # [nil, nil] => nil
68
+ # [] => nil
69
+ #
70
+ # { key: [nil, 'A', 'A', 'B'] } => { key: ['A', 'B'] }
71
+ # { key: [nil, 'A', 'A'] } => { key: 'A' }
72
+ # { key: [nil, nil] } => { key: nil }
73
+ # { key: [] } => { key: nil }
74
+ # { } => { }
75
+ #
76
+ def simplify(object)
77
+ case object
78
+ when Array
79
+ object = object.filter_map { |array_item| simplify(array_item) }.uniq
80
+ object = object[0] if object.size <= 1
81
+ when Hash
82
+ object.each { |key, value| object[key] = simplify(value) }
83
+ end
84
+
85
+ object
86
+ end
87
+
88
+ # Convert the object Elasticsearch `header\ndata` payload format
89
+ #
90
+ def bulkify(method, keys)
91
+ raise ArgumentError, "unknown method #{method}" unless %i[index update delete].include?(method)
92
+
93
+ keys = keys.map(&:to_s)
94
+ payload = []
95
+ property_keys, partial_keys = analyze_keys(keys)
96
+
97
+ case method
98
+ when :index
99
+ raise SerializationError, format("cannot serialize the following keys: %{keys}", keys: partial_keys.to_sentence) if partial_keys.any?
100
+
101
+ payload << { index: { _id: record.id } }
102
+ payload << as_json(*keys)
103
+
104
+ when :update
105
+ if property_keys.any?
106
+ payload << { update: { _id: record.id } }
107
+ payload << { doc: as_json(*property_keys) }
108
+ end
109
+
110
+ partial_keys.each do |key|
111
+ payload << { update: { _id: record.id } }
112
+ payload << as_json(*key)
113
+ end
114
+
115
+ when :delete
116
+ payload << { update: { _id: record.id } }
117
+ end
118
+
119
+ payload
120
+ end
121
+
122
+ private
123
+
124
+ def analyze_keys(keys)
125
+ partial_keys = partial_reindexations.keys & keys
126
+ property_keys = properties & keys
127
+ unknown_keys = keys - property_keys - partial_keys
128
+
129
+ raise ArgumentError, format("unknown keys: %{keys}", keys: unknown_keys.to_sentence) if unknown_keys.any?
130
+
131
+ property_keys = properties & keys
132
+ [property_keys, partial_keys]
133
+ end
134
+ end
135
+ end
136
+ end
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Caoutsearch
4
+ module Index
5
+ SerializationError = Class.new(StandardError)
6
+ end
7
+ end
@@ -0,0 +1,69 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Caoutsearch
4
+ module Instrumentation
5
+ class Base < ActiveSupport::LogSubscriber
6
+ private
7
+
8
+ def log_request(subject, event, format: nil)
9
+ return unless format
10
+
11
+ payload = event.payload
12
+ request = payload[:request]
13
+
14
+ debug do
15
+ title = color("#{payload[:klass]} #{subject}", GREEN, true)
16
+ request_body = format_request_body(request, format: format)
17
+
18
+ message = " #{title} #{request_body}"
19
+ message = yield(message, payload) if block_given?
20
+ message
21
+ end
22
+ end
23
+
24
+ def log_response(subject, event, warn_errors: false)
25
+ payload = event.payload
26
+ response = payload[:response]
27
+ return unless response
28
+
29
+ debug do
30
+ title = color("#{payload[:klass]} #{subject}", GREEN, true)
31
+
32
+ duration = "#{event.duration.round(1)}ms"
33
+ duration += " / took #{response["took"]}ms" if response.key?("took")
34
+ duration = color("(#{duration})", GREEN, true)
35
+
36
+ message = " #{title} #{duration}"
37
+ message += " got errors" if response["errors"]
38
+ message = yield(message, payload) if block_given?
39
+
40
+ message
41
+ end
42
+
43
+ return unless response["errors"] && warn_errors
44
+
45
+ errors = response["items"].select { |k, _| k.values.first["error"] }
46
+ errors.each do |error|
47
+ warn { color(error, RED, true) }
48
+ end
49
+ end
50
+
51
+ def format_request_body(body, format: nil)
52
+ case format
53
+ when "amazing_print", "awesome_print"
54
+ body.ai(limit: true, index: false)
55
+ when "full"
56
+ json = JSON.dump(body)
57
+ color(json, BLUE, true)
58
+ when "truncated"
59
+ json = JSON.dump(body).truncate(200, omission: "…}")
60
+ color(json, BLUE, true)
61
+ end
62
+ end
63
+
64
+ def inspect_json_size(json)
65
+ ApplicationController.helpers.number_to_human_size(JSON.dump(json).bytesize)
66
+ end
67
+ end
68
+ end
69
+ end