search-engine-for-typesense 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +21 -0
  3. data/README.md +148 -0
  4. data/app/search_engine/search_engine/app_info.rb +11 -0
  5. data/app/search_engine/search_engine/index_partition_job.rb +170 -0
  6. data/lib/generators/search_engine/install/install_generator.rb +20 -0
  7. data/lib/generators/search_engine/install/templates/initializer.rb.tt +230 -0
  8. data/lib/generators/search_engine/model/model_generator.rb +86 -0
  9. data/lib/generators/search_engine/model/templates/model.rb.tt +12 -0
  10. data/lib/search-engine-for-typesense.rb +12 -0
  11. data/lib/search_engine/active_record_syncable.rb +247 -0
  12. data/lib/search_engine/admin/stopwords.rb +125 -0
  13. data/lib/search_engine/admin/synonyms.rb +125 -0
  14. data/lib/search_engine/admin.rb +12 -0
  15. data/lib/search_engine/ast/and.rb +52 -0
  16. data/lib/search_engine/ast/binary_op.rb +75 -0
  17. data/lib/search_engine/ast/eq.rb +19 -0
  18. data/lib/search_engine/ast/group.rb +18 -0
  19. data/lib/search_engine/ast/gt.rb +12 -0
  20. data/lib/search_engine/ast/gte.rb +12 -0
  21. data/lib/search_engine/ast/in.rb +28 -0
  22. data/lib/search_engine/ast/lt.rb +12 -0
  23. data/lib/search_engine/ast/lte.rb +12 -0
  24. data/lib/search_engine/ast/matches.rb +55 -0
  25. data/lib/search_engine/ast/node.rb +176 -0
  26. data/lib/search_engine/ast/not_eq.rb +13 -0
  27. data/lib/search_engine/ast/not_in.rb +24 -0
  28. data/lib/search_engine/ast/or.rb +52 -0
  29. data/lib/search_engine/ast/prefix.rb +51 -0
  30. data/lib/search_engine/ast/raw.rb +41 -0
  31. data/lib/search_engine/ast/unary_op.rb +43 -0
  32. data/lib/search_engine/ast.rb +101 -0
  33. data/lib/search_engine/base/creation.rb +727 -0
  34. data/lib/search_engine/base/deletion.rb +80 -0
  35. data/lib/search_engine/base/display_coercions.rb +36 -0
  36. data/lib/search_engine/base/hydration.rb +312 -0
  37. data/lib/search_engine/base/index_maintenance/cleanup.rb +202 -0
  38. data/lib/search_engine/base/index_maintenance/lifecycle.rb +251 -0
  39. data/lib/search_engine/base/index_maintenance/schema.rb +117 -0
  40. data/lib/search_engine/base/index_maintenance.rb +459 -0
  41. data/lib/search_engine/base/indexing_dsl.rb +255 -0
  42. data/lib/search_engine/base/joins.rb +479 -0
  43. data/lib/search_engine/base/model_dsl.rb +472 -0
  44. data/lib/search_engine/base/presets.rb +43 -0
  45. data/lib/search_engine/base/pretty_printer.rb +315 -0
  46. data/lib/search_engine/base/relation_delegation.rb +42 -0
  47. data/lib/search_engine/base/scopes.rb +113 -0
  48. data/lib/search_engine/base/updating.rb +92 -0
  49. data/lib/search_engine/base.rb +38 -0
  50. data/lib/search_engine/bulk.rb +284 -0
  51. data/lib/search_engine/cache.rb +33 -0
  52. data/lib/search_engine/cascade.rb +531 -0
  53. data/lib/search_engine/cli/doctor.rb +631 -0
  54. data/lib/search_engine/cli/support.rb +217 -0
  55. data/lib/search_engine/cli.rb +222 -0
  56. data/lib/search_engine/client/http_adapter.rb +63 -0
  57. data/lib/search_engine/client/request_builder.rb +92 -0
  58. data/lib/search_engine/client/services/base.rb +74 -0
  59. data/lib/search_engine/client/services/collections.rb +161 -0
  60. data/lib/search_engine/client/services/documents.rb +214 -0
  61. data/lib/search_engine/client/services/operations.rb +152 -0
  62. data/lib/search_engine/client/services/search.rb +190 -0
  63. data/lib/search_engine/client/services.rb +29 -0
  64. data/lib/search_engine/client.rb +765 -0
  65. data/lib/search_engine/client_options.rb +20 -0
  66. data/lib/search_engine/collection_resolver.rb +191 -0
  67. data/lib/search_engine/collections_graph.rb +330 -0
  68. data/lib/search_engine/compiled_params.rb +143 -0
  69. data/lib/search_engine/compiler.rb +383 -0
  70. data/lib/search_engine/config/observability.rb +27 -0
  71. data/lib/search_engine/config/presets.rb +92 -0
  72. data/lib/search_engine/config/selection.rb +16 -0
  73. data/lib/search_engine/config/typesense.rb +48 -0
  74. data/lib/search_engine/config/validators.rb +97 -0
  75. data/lib/search_engine/config.rb +917 -0
  76. data/lib/search_engine/console_helpers.rb +130 -0
  77. data/lib/search_engine/deletion.rb +103 -0
  78. data/lib/search_engine/dispatcher.rb +125 -0
  79. data/lib/search_engine/dsl/parser.rb +582 -0
  80. data/lib/search_engine/engine.rb +167 -0
  81. data/lib/search_engine/errors.rb +290 -0
  82. data/lib/search_engine/filters/sanitizer.rb +189 -0
  83. data/lib/search_engine/hydration/materializers.rb +808 -0
  84. data/lib/search_engine/hydration/selection_context.rb +96 -0
  85. data/lib/search_engine/indexer/batch_planner.rb +76 -0
  86. data/lib/search_engine/indexer/bulk_import.rb +626 -0
  87. data/lib/search_engine/indexer/import_dispatcher.rb +198 -0
  88. data/lib/search_engine/indexer/retry_policy.rb +103 -0
  89. data/lib/search_engine/indexer.rb +747 -0
  90. data/lib/search_engine/instrumentation.rb +308 -0
  91. data/lib/search_engine/joins/guard.rb +202 -0
  92. data/lib/search_engine/joins/resolver.rb +95 -0
  93. data/lib/search_engine/logging/color.rb +78 -0
  94. data/lib/search_engine/logging/format_helpers.rb +92 -0
  95. data/lib/search_engine/logging/partition_progress.rb +53 -0
  96. data/lib/search_engine/logging_subscriber.rb +388 -0
  97. data/lib/search_engine/mapper.rb +785 -0
  98. data/lib/search_engine/multi.rb +286 -0
  99. data/lib/search_engine/multi_result.rb +186 -0
  100. data/lib/search_engine/notifications/compact_logger.rb +675 -0
  101. data/lib/search_engine/observability.rb +162 -0
  102. data/lib/search_engine/operations.rb +58 -0
  103. data/lib/search_engine/otel.rb +227 -0
  104. data/lib/search_engine/partitioner.rb +128 -0
  105. data/lib/search_engine/ranking_plan.rb +118 -0
  106. data/lib/search_engine/registry.rb +158 -0
  107. data/lib/search_engine/relation/compiler.rb +711 -0
  108. data/lib/search_engine/relation/deletion.rb +37 -0
  109. data/lib/search_engine/relation/dsl/filters.rb +624 -0
  110. data/lib/search_engine/relation/dsl/selection.rb +240 -0
  111. data/lib/search_engine/relation/dsl.rb +903 -0
  112. data/lib/search_engine/relation/dx/dry_run.rb +59 -0
  113. data/lib/search_engine/relation/dx/friendly_where.rb +24 -0
  114. data/lib/search_engine/relation/dx.rb +231 -0
  115. data/lib/search_engine/relation/materializers.rb +118 -0
  116. data/lib/search_engine/relation/options.rb +138 -0
  117. data/lib/search_engine/relation/state.rb +274 -0
  118. data/lib/search_engine/relation/updating.rb +44 -0
  119. data/lib/search_engine/relation.rb +623 -0
  120. data/lib/search_engine/result.rb +664 -0
  121. data/lib/search_engine/schema.rb +1083 -0
  122. data/lib/search_engine/sources/active_record_source.rb +185 -0
  123. data/lib/search_engine/sources/base.rb +62 -0
  124. data/lib/search_engine/sources/lambda_source.rb +55 -0
  125. data/lib/search_engine/sources/sql_source.rb +196 -0
  126. data/lib/search_engine/sources.rb +71 -0
  127. data/lib/search_engine/stale_rules.rb +160 -0
  128. data/lib/search_engine/test/minitest_assertions.rb +57 -0
  129. data/lib/search_engine/test/offline_client.rb +134 -0
  130. data/lib/search_engine/test/rspec_matchers.rb +77 -0
  131. data/lib/search_engine/test/stub_client.rb +201 -0
  132. data/lib/search_engine/test.rb +66 -0
  133. data/lib/search_engine/test_autoload.rb +8 -0
  134. data/lib/search_engine/update.rb +35 -0
  135. data/lib/search_engine/version.rb +7 -0
  136. data/lib/search_engine.rb +332 -0
  137. data/lib/tasks/search_engine.rake +501 -0
  138. data/lib/tasks/search_engine_doctor.rake +16 -0
  139. metadata +225 -0
@@ -0,0 +1,96 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SearchEngine
4
+ module Hydration
5
+ # Pure builder for selection and hydration context consumed by Result.
6
+ # Computes effective base and nested selections and strict-missing policy.
7
+ module SelectionContext
8
+ # Build selection context from an immutable relation instance.
9
+ # @param relation [SearchEngine::Relation]
10
+ # @return [Hash, nil] frozen context or nil when no selection and no strict flag
11
+ def self.build(relation)
12
+ state = snapshot_state(relation)
13
+
14
+ include_base, exclude_base, include_nested, exclude_nested, nested_order = extract_selection_maps(state)
15
+
16
+ nothing_selected = include_base.empty? && maps_all_empty?(include_nested)
17
+ nothing_excluded = exclude_base.empty? && maps_all_empty?(exclude_nested)
18
+
19
+ effective_base = compute_effective_base(include_base, exclude_base)
20
+ nested_effective = compute_nested_effective(include_nested, exclude_nested, nested_order)
21
+
22
+ strict_missing = strict_missing_flag(state)
23
+ requested_root = effective_base
24
+
25
+ selection = {}
26
+ selection[:base] = effective_base unless effective_base.empty?
27
+ selection[:nested] = nested_effective unless nested_effective.empty?
28
+ selection[:strict_missing] = true if strict_missing
29
+ selection[:requested_root] = requested_root unless requested_root.empty?
30
+
31
+ return nil if selection.empty? && nothing_selected && nothing_excluded
32
+
33
+ selection.freeze
34
+ end
35
+
36
+ # --- helpers ----------------------------------------------------------
37
+
38
+ def self.snapshot_state(relation)
39
+ relation.instance_variable_get(:@state) || {}
40
+ end
41
+ private_class_method :snapshot_state
42
+
43
+ def self.extract_selection_maps(state)
44
+ include_base = Array(state[:select]).map(&:to_s)
45
+ include_nested = (state[:select_nested] || {}).transform_values { |arr| Array(arr).map(&:to_s) }
46
+ exclude_base = Array(state[:exclude]).map(&:to_s)
47
+ exclude_nested = (state[:exclude_nested] || {}).transform_values { |arr| Array(arr).map(&:to_s) }
48
+ nested_order = Array(state[:select_nested_order])
49
+ [include_base, exclude_base, include_nested, exclude_nested, nested_order]
50
+ end
51
+ private_class_method :extract_selection_maps
52
+
53
+ def self.compute_effective_base(include_base, exclude_base)
54
+ return [] if include_base.empty?
55
+
56
+ (include_base - exclude_base).map(&:to_s).reject(&:empty?)
57
+ end
58
+ private_class_method :compute_effective_base
59
+
60
+ def self.compute_nested_effective(include_nested, exclude_nested, nested_order)
61
+ out = {}
62
+ nested_order.each do |assoc|
63
+ inc = Array(include_nested[assoc]).map(&:to_s)
64
+ next if inc.empty?
65
+
66
+ exc = Array(exclude_nested[assoc]).map(&:to_s)
67
+ eff = (inc - exc)
68
+ out[assoc] = eff unless eff.empty?
69
+ end
70
+ out
71
+ end
72
+ private_class_method :compute_nested_effective
73
+
74
+ def self.strict_missing_flag(state)
75
+ opts = state[:options] || {}
76
+ sel = opts[:selection] || opts['selection'] || {}
77
+ if sel.key?(:strict_missing) || sel.key?('strict_missing')
78
+ val = sel[:strict_missing]
79
+ val = sel['strict_missing'] if val.nil?
80
+ return true if val == true || val.to_s == 'true'
81
+
82
+ return false
83
+ end
84
+ SearchEngine.config.selection.strict_missing ? true : false
85
+ rescue StandardError
86
+ false
87
+ end
88
+ private_class_method :strict_missing_flag
89
+
90
+ def self.maps_all_empty?(map)
91
+ Array(map.values).all? { |v| Array(v).empty? }
92
+ end
93
+ private_class_method :maps_all_empty?
94
+ end
95
+ end
96
+ end
@@ -0,0 +1,76 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'json'
4
+
5
+ module SearchEngine
6
+ class Indexer
7
+ # Plans and produces batches from a stream of documents.
8
+ #
9
+ # Takes either pre-batched arrays from an enumerable or a flat enumerable of
10
+ # docs and emits JSONL-encoded buffers alongside counts and stats.
11
+ # The Indexer currently receives already-batched arrays; this planner keeps
12
+ # that contract and focuses on JSONL encoding with minimal allocations.
13
+ #
14
+ # @since M8
15
+ class BatchPlanner
16
+ # Produce a JSONL buffer and counts for a provided docs array.
17
+ #
18
+ # @param docs [Array<Hash>]
19
+ # @param buffer [String] a reusable String buffer to encode into
20
+ # @return [Array(Integer, Integer)] [docs_count, bytes_sent]
21
+ # @raise [SearchEngine::Errors::InvalidParams] when a document is not a Hash or missing :id
22
+ # @see https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/indexer
23
+ def self.encode_jsonl!(docs, buffer)
24
+ count = 0
25
+ buffer.clear
26
+ size = docs.size
27
+ docs.each_with_index do |raw, idx|
28
+ doc = ensure_hash_document(raw)
29
+ ensure_id!(doc)
30
+ # Force system timestamp field prior to serialization to Typesense
31
+ now_i = if defined?(Time) && defined?(Time.zone) && Time.zone
32
+ Time.zone.now.to_i
33
+ else
34
+ Time.now.to_i
35
+ end
36
+ doc[:doc_updated_at] = now_i if doc.is_a?(Hash)
37
+ buffer << JSON.generate(doc)
38
+ buffer << "\n" if idx < (size - 1)
39
+ count += 1
40
+ end
41
+ # Ensure trailing newline for non-empty payloads for consistency
42
+ buffer << "\n" if size.positive? && !buffer.end_with?("\n")
43
+ [count, buffer.bytesize]
44
+ end
45
+
46
+ # Utility: normalize a batch-like object to an Array.
47
+ # @param batch [Object]
48
+ # @return [Array]
49
+ def self.to_array(batch)
50
+ return batch if batch.is_a?(Array)
51
+
52
+ batch.respond_to?(:to_a) ? batch.to_a : Array(batch)
53
+ end
54
+
55
+ class << self
56
+ private
57
+
58
+ def ensure_hash_document(obj)
59
+ if obj.is_a?(Hash)
60
+ obj
61
+ else
62
+ raise SearchEngine::Errors::InvalidParams,
63
+ 'Indexer requires batches of Hash-like documents with at least an :id key. ' \
64
+ 'Mapping DSL is not available yet. See ' \
65
+ 'https://nikita-shkoda.mintlify.app/projects/search-engine-for-typesense/indexer.'
66
+ end
67
+ end
68
+
69
+ def ensure_id!(doc)
70
+ has_id = doc.key?(:id) || doc.key?('id')
71
+ raise SearchEngine::Errors::InvalidParams, 'document is missing required id' unless has_id
72
+ end
73
+ end
74
+ end
75
+ end
76
+ end