search-engine-for-typesense 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +21 -0
  3. data/README.md +148 -0
  4. data/app/search_engine/search_engine/app_info.rb +11 -0
  5. data/app/search_engine/search_engine/index_partition_job.rb +170 -0
  6. data/lib/generators/search_engine/install/install_generator.rb +20 -0
  7. data/lib/generators/search_engine/install/templates/initializer.rb.tt +230 -0
  8. data/lib/generators/search_engine/model/model_generator.rb +86 -0
  9. data/lib/generators/search_engine/model/templates/model.rb.tt +12 -0
  10. data/lib/search-engine-for-typesense.rb +12 -0
  11. data/lib/search_engine/active_record_syncable.rb +247 -0
  12. data/lib/search_engine/admin/stopwords.rb +125 -0
  13. data/lib/search_engine/admin/synonyms.rb +125 -0
  14. data/lib/search_engine/admin.rb +12 -0
  15. data/lib/search_engine/ast/and.rb +52 -0
  16. data/lib/search_engine/ast/binary_op.rb +75 -0
  17. data/lib/search_engine/ast/eq.rb +19 -0
  18. data/lib/search_engine/ast/group.rb +18 -0
  19. data/lib/search_engine/ast/gt.rb +12 -0
  20. data/lib/search_engine/ast/gte.rb +12 -0
  21. data/lib/search_engine/ast/in.rb +28 -0
  22. data/lib/search_engine/ast/lt.rb +12 -0
  23. data/lib/search_engine/ast/lte.rb +12 -0
  24. data/lib/search_engine/ast/matches.rb +55 -0
  25. data/lib/search_engine/ast/node.rb +176 -0
  26. data/lib/search_engine/ast/not_eq.rb +13 -0
  27. data/lib/search_engine/ast/not_in.rb +24 -0
  28. data/lib/search_engine/ast/or.rb +52 -0
  29. data/lib/search_engine/ast/prefix.rb +51 -0
  30. data/lib/search_engine/ast/raw.rb +41 -0
  31. data/lib/search_engine/ast/unary_op.rb +43 -0
  32. data/lib/search_engine/ast.rb +101 -0
  33. data/lib/search_engine/base/creation.rb +727 -0
  34. data/lib/search_engine/base/deletion.rb +80 -0
  35. data/lib/search_engine/base/display_coercions.rb +36 -0
  36. data/lib/search_engine/base/hydration.rb +312 -0
  37. data/lib/search_engine/base/index_maintenance/cleanup.rb +202 -0
  38. data/lib/search_engine/base/index_maintenance/lifecycle.rb +251 -0
  39. data/lib/search_engine/base/index_maintenance/schema.rb +117 -0
  40. data/lib/search_engine/base/index_maintenance.rb +459 -0
  41. data/lib/search_engine/base/indexing_dsl.rb +255 -0
  42. data/lib/search_engine/base/joins.rb +479 -0
  43. data/lib/search_engine/base/model_dsl.rb +472 -0
  44. data/lib/search_engine/base/presets.rb +43 -0
  45. data/lib/search_engine/base/pretty_printer.rb +315 -0
  46. data/lib/search_engine/base/relation_delegation.rb +42 -0
  47. data/lib/search_engine/base/scopes.rb +113 -0
  48. data/lib/search_engine/base/updating.rb +92 -0
  49. data/lib/search_engine/base.rb +38 -0
  50. data/lib/search_engine/bulk.rb +284 -0
  51. data/lib/search_engine/cache.rb +33 -0
  52. data/lib/search_engine/cascade.rb +531 -0
  53. data/lib/search_engine/cli/doctor.rb +631 -0
  54. data/lib/search_engine/cli/support.rb +217 -0
  55. data/lib/search_engine/cli.rb +222 -0
  56. data/lib/search_engine/client/http_adapter.rb +63 -0
  57. data/lib/search_engine/client/request_builder.rb +92 -0
  58. data/lib/search_engine/client/services/base.rb +74 -0
  59. data/lib/search_engine/client/services/collections.rb +161 -0
  60. data/lib/search_engine/client/services/documents.rb +214 -0
  61. data/lib/search_engine/client/services/operations.rb +152 -0
  62. data/lib/search_engine/client/services/search.rb +190 -0
  63. data/lib/search_engine/client/services.rb +29 -0
  64. data/lib/search_engine/client.rb +765 -0
  65. data/lib/search_engine/client_options.rb +20 -0
  66. data/lib/search_engine/collection_resolver.rb +191 -0
  67. data/lib/search_engine/collections_graph.rb +330 -0
  68. data/lib/search_engine/compiled_params.rb +143 -0
  69. data/lib/search_engine/compiler.rb +383 -0
  70. data/lib/search_engine/config/observability.rb +27 -0
  71. data/lib/search_engine/config/presets.rb +92 -0
  72. data/lib/search_engine/config/selection.rb +16 -0
  73. data/lib/search_engine/config/typesense.rb +48 -0
  74. data/lib/search_engine/config/validators.rb +97 -0
  75. data/lib/search_engine/config.rb +917 -0
  76. data/lib/search_engine/console_helpers.rb +130 -0
  77. data/lib/search_engine/deletion.rb +103 -0
  78. data/lib/search_engine/dispatcher.rb +125 -0
  79. data/lib/search_engine/dsl/parser.rb +582 -0
  80. data/lib/search_engine/engine.rb +167 -0
  81. data/lib/search_engine/errors.rb +290 -0
  82. data/lib/search_engine/filters/sanitizer.rb +189 -0
  83. data/lib/search_engine/hydration/materializers.rb +808 -0
  84. data/lib/search_engine/hydration/selection_context.rb +96 -0
  85. data/lib/search_engine/indexer/batch_planner.rb +76 -0
  86. data/lib/search_engine/indexer/bulk_import.rb +626 -0
  87. data/lib/search_engine/indexer/import_dispatcher.rb +198 -0
  88. data/lib/search_engine/indexer/retry_policy.rb +103 -0
  89. data/lib/search_engine/indexer.rb +747 -0
  90. data/lib/search_engine/instrumentation.rb +308 -0
  91. data/lib/search_engine/joins/guard.rb +202 -0
  92. data/lib/search_engine/joins/resolver.rb +95 -0
  93. data/lib/search_engine/logging/color.rb +78 -0
  94. data/lib/search_engine/logging/format_helpers.rb +92 -0
  95. data/lib/search_engine/logging/partition_progress.rb +53 -0
  96. data/lib/search_engine/logging_subscriber.rb +388 -0
  97. data/lib/search_engine/mapper.rb +785 -0
  98. data/lib/search_engine/multi.rb +286 -0
  99. data/lib/search_engine/multi_result.rb +186 -0
  100. data/lib/search_engine/notifications/compact_logger.rb +675 -0
  101. data/lib/search_engine/observability.rb +162 -0
  102. data/lib/search_engine/operations.rb +58 -0
  103. data/lib/search_engine/otel.rb +227 -0
  104. data/lib/search_engine/partitioner.rb +128 -0
  105. data/lib/search_engine/ranking_plan.rb +118 -0
  106. data/lib/search_engine/registry.rb +158 -0
  107. data/lib/search_engine/relation/compiler.rb +711 -0
  108. data/lib/search_engine/relation/deletion.rb +37 -0
  109. data/lib/search_engine/relation/dsl/filters.rb +624 -0
  110. data/lib/search_engine/relation/dsl/selection.rb +240 -0
  111. data/lib/search_engine/relation/dsl.rb +903 -0
  112. data/lib/search_engine/relation/dx/dry_run.rb +59 -0
  113. data/lib/search_engine/relation/dx/friendly_where.rb +24 -0
  114. data/lib/search_engine/relation/dx.rb +231 -0
  115. data/lib/search_engine/relation/materializers.rb +118 -0
  116. data/lib/search_engine/relation/options.rb +138 -0
  117. data/lib/search_engine/relation/state.rb +274 -0
  118. data/lib/search_engine/relation/updating.rb +44 -0
  119. data/lib/search_engine/relation.rb +623 -0
  120. data/lib/search_engine/result.rb +664 -0
  121. data/lib/search_engine/schema.rb +1083 -0
  122. data/lib/search_engine/sources/active_record_source.rb +185 -0
  123. data/lib/search_engine/sources/base.rb +62 -0
  124. data/lib/search_engine/sources/lambda_source.rb +55 -0
  125. data/lib/search_engine/sources/sql_source.rb +196 -0
  126. data/lib/search_engine/sources.rb +71 -0
  127. data/lib/search_engine/stale_rules.rb +160 -0
  128. data/lib/search_engine/test/minitest_assertions.rb +57 -0
  129. data/lib/search_engine/test/offline_client.rb +134 -0
  130. data/lib/search_engine/test/rspec_matchers.rb +77 -0
  131. data/lib/search_engine/test/stub_client.rb +201 -0
  132. data/lib/search_engine/test.rb +66 -0
  133. data/lib/search_engine/test_autoload.rb +8 -0
  134. data/lib/search_engine/update.rb +35 -0
  135. data/lib/search_engine/version.rb +7 -0
  136. data/lib/search_engine.rb +332 -0
  137. data/lib/tasks/search_engine.rake +501 -0
  138. data/lib/tasks/search_engine_doctor.rake +16 -0
  139. metadata +225 -0
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SearchEngine
4
+ # Helpers for constructing URL-level client options.
5
+ #
6
+ # These options should not appear in request bodies. They are derived from
7
+ # {SearchEngine.config}. This module is intentionally minimal for M0 and will
8
+ # be used by the client implementation in future milestones.
9
+ module ClientOptions
10
+ # Build URL-level options from configuration.
11
+ # @param config [SearchEngine::Config]
12
+ # @return [Hash] keys: :use_cache, :cache_ttl
13
+ def self.url_options_from_config(config = SearchEngine.config)
14
+ {
15
+ use_cache: config.use_cache ? true : false,
16
+ cache_ttl: Integer(config.cache_ttl_s || 0)
17
+ }
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,191 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'search_engine/registry'
4
+ require 'search_engine/cascade'
5
+
6
+ module SearchEngine
7
+ # Helper utilities to resolve collection models from Typesense collection names.
8
+ #
9
+ # Public API:
10
+ # - {.model_for_physical(physical, client: nil)} => Class or nil
11
+ # - {.model_for_logical(logical)} => Class or nil
12
+ # - {.physicals_for_logical(client, logical)} => Array<[String, Integer]>
13
+ module CollectionResolver
14
+ class << self
15
+ # Build a map of logical names => model classes by merging registry with
16
+ # a scan of the SearchEngine namespace for subclasses of Base.
17
+ # @return [Hash{String=>Class}]
18
+ def models_map
19
+ map = {}
20
+ reg = SearchEngine::Registry.mapping
21
+ reg.each { |k, v| map[k.to_s] = v } if reg && !reg.empty?
22
+
23
+ # Walk the SearchEngine namespace to find Base descendants
24
+ begin
25
+ SearchEngine.constants.each do |c|
26
+ const = SearchEngine.const_get(c)
27
+ next unless const.is_a?(Class)
28
+ next unless const.ancestors.include?(SearchEngine::Base)
29
+
30
+ logical = if const.respond_to?(:collection)
31
+ const.collection.to_s
32
+ else
33
+ demod = const.name.split('::').last
34
+ demod.respond_to?(:underscore) ? demod.underscore.pluralize : "#{demod.downcase}s"
35
+ end
36
+ map[logical] ||= const
37
+ end
38
+ rescue StandardError
39
+ # best-effort; namespace may not be fully loaded
40
+ end
41
+
42
+ map
43
+ end
44
+
45
+ # Resolve a model class for a physical Typesense collection name.
46
+ # Tries, in order: normalized base logical, reverse alias lookup, and returns nil when not found.
47
+ # @param physical [#to_s]
48
+ # @param client [SearchEngine::Client, nil]
49
+ # @return [Class, nil]
50
+ def model_for_physical(physical, client: nil)
51
+ phys = physical.to_s
52
+ base = logical_from_physical(phys)
53
+
54
+ # Prefer models_map first to handle classes that don't invoke collection macro yet
55
+ mm = models_map
56
+ klass = mm[base]
57
+ klass ||= model_for_logical(base)
58
+ return klass if klass
59
+
60
+ # Reverse alias mapping: find a registered logical whose alias targets this physical
61
+ reg = SearchEngine::Registry.mapping
62
+ return nil if reg.nil? || reg.empty?
63
+
64
+ cli = client || SearchEngine.client
65
+ reg.each_key do |logical|
66
+ target = cli.resolve_alias(logical)
67
+ return reg[logical] if target && target.to_s == phys
68
+ rescue StandardError
69
+ # ignore and continue
70
+ end
71
+
72
+ nil
73
+ end
74
+
75
+ # Resolve a model class for a logical collection name using registry, falling
76
+ # back to autoloading a namespaced model constant.
77
+ # @param logical [#to_s]
78
+ # @return [Class, nil]
79
+ def model_for_logical(logical)
80
+ name = logical.to_s
81
+
82
+ mm = models_map
83
+ return mm[name] if mm.key?(name)
84
+
85
+ begin
86
+ return SearchEngine.collection_for(name)
87
+ rescue StandardError
88
+ # fall through
89
+ end
90
+
91
+ # Heuristic: SearchEngine::<Classify(name)>
92
+ m = classify_model(name)
93
+ return m if m
94
+
95
+ # Heuristic: nested modules e.g. foo_bar_baz -> SearchEngine::FooBar::Baz
96
+ parts = name.split('_')
97
+ if parts.size >= 2
98
+ last = parts.pop
99
+ mod = parts.map { |p| camelize(p) }.join
100
+ candidate = "SearchEngine::#{mod}::#{camelize(last)}"
101
+ begin
102
+ const = Object.const_get(candidate)
103
+ return const if const.is_a?(Class) && const.ancestors.include?(SearchEngine::Base)
104
+ rescue StandardError
105
+ # ignore
106
+ end
107
+ end
108
+
109
+ nil
110
+ end
111
+
112
+ # Convert physical name into a logical base by stripping timestamp suffix when present.
113
+ # @param name [#to_s]
114
+ # @return [String]
115
+ def logical_from_physical(name)
116
+ s = name.to_s
117
+ begin
118
+ out = SearchEngine::Cascade.normalize_physical_to_logical(s)
119
+ return out if out && !out.to_s.empty? && out.to_s != s
120
+ rescue StandardError
121
+ # fall through to regex fallback
122
+ end
123
+
124
+ # Regex fallback independent of Cascade implementation
125
+ m = s.match(/\A(.+)_\d{8}_\d{6}_\d{3}\z/)
126
+ return m[1].to_s if m && m[1]
127
+
128
+ s
129
+ end
130
+
131
+ # List physical collections associated with a logical alias.
132
+ # Prefer the alias target when present; otherwise scan server collections and
133
+ # group by normalized base.
134
+ # @param client [SearchEngine::Client]
135
+ # @param logical [#to_s]
136
+ # @return [Array<Array(String, Integer)>]
137
+ def physicals_for_logical(client, logical)
138
+ list = Array(client.list_collections)
139
+ pairs = list.map do |h|
140
+ name = (h[:name] || h['name']).to_s
141
+ num = (h[:num_documents] || h['num_documents']).to_i
142
+ [name, num]
143
+ end
144
+
145
+ # Filter all physicals that normalize to the logical name
146
+ filtered = pairs.select do |(physical, _num)|
147
+ logical_from_physical(physical).to_s == logical.to_s
148
+ end
149
+
150
+ # If no filtered physicals (unexpected), fallback to alias target if present
151
+ begin
152
+ aliased = client.resolve_alias(logical)
153
+ if filtered.empty? && aliased && !aliased.to_s.strip.empty?
154
+ # Retrieve live schema to confirm presence; if present, synthesize with count 0
155
+ schema = client.retrieve_collection_schema(aliased)
156
+ return [[aliased.to_s, (schema && (schema[:num_documents] || schema['num_documents'])).to_i]]
157
+ end
158
+ rescue StandardError
159
+ # ignore alias/schema issues
160
+ end
161
+
162
+ filtered
163
+ end
164
+
165
+ private
166
+
167
+ def classify_model(name)
168
+ if defined?(ActiveSupport::Inflector)
169
+ klass_name = ActiveSupport::Inflector.classify(name.to_s)
170
+ const = Object.const_get("SearchEngine::#{klass_name}")
171
+ else
172
+ base = name.to_s.split('_').map { |s| s[0].upcase + s[1..] }.join
173
+ const = Object.const_get("SearchEngine::#{base}")
174
+ end
175
+ return const if const.is_a?(Class) && const.ancestors.include?(SearchEngine::Base)
176
+
177
+ nil
178
+ rescue StandardError
179
+ nil
180
+ end
181
+
182
+ def camelize(token)
183
+ if defined?(ActiveSupport::Inflector)
184
+ ActiveSupport::Inflector.camelize(token.to_s)
185
+ else
186
+ token.to_s.split('_').map { |s| s[0].upcase + s[1..] }.join
187
+ end
188
+ end
189
+ end
190
+ end
191
+ end
@@ -0,0 +1,330 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SearchEngine
4
+ # Build and render an in-memory graph of Typesense collections and references.
5
+ #
6
+ # Public API:
7
+ # - {.build(client:, style: :unicode, width: nil)} => Hash with nodes/edges and rendered ASCII
8
+ #
9
+ # The renderer produces a Unicode box-drawing diagram by default and falls
10
+ # back to a compact ASCII list when the layout exceeds the available width.
11
+ module CollectionsGraph
12
+ class << self
13
+ # Build a collections graph and produce ASCII renderings.
14
+ #
15
+ # @param client [SearchEngine::Client]
16
+ # @param style [Symbol] :unicode or :ascii
17
+ # @param width [Integer, nil] max diagram width; defaults to detected terminal width or 100
18
+ # @return [Hash] { nodes:, edges:, cycles:, isolated:, ascii:, ascii_compact:, stats: { ... } }
19
+ def build(client:, style: :unicode, width: nil)
20
+ safe_style = style.to_s == 'ascii' ? :ascii : :unicode
21
+ max_width = detect_width(width)
22
+
23
+ # Build reverse graph from Typesense (with registry fallback)
24
+ reverse_graph = SearchEngine::Cascade.build_reverse_graph(client: client)
25
+ edges = forward_edges_from_reverse(reverse_graph)
26
+
27
+ nodes, source = fetch_nodes(client, edges)
28
+ isolated = compute_isolated(nodes, edges)
29
+ cycles = detect_immediate_cycles(edges)
30
+
31
+ ascii, layout_mode = render_ascii(nodes, edges, width: max_width, style: safe_style)
32
+ ascii_compact = render_ascii_compact(nodes, edges, style: safe_style)
33
+ mermaid = render_mermaid(nodes, edges)
34
+
35
+ stats = {
36
+ nodes: nodes.size,
37
+ edges: edges.size,
38
+ cycles: cycles.size,
39
+ isolated: isolated.size,
40
+ layout: layout_mode,
41
+ source: source
42
+ }
43
+
44
+ if defined?(SearchEngine::Instrumentation)
45
+ SearchEngine::Instrumentation.instrument('search_engine.collections.graph', stats) {}
46
+ end
47
+
48
+ {
49
+ nodes: nodes,
50
+ edges: edges,
51
+ cycles: cycles,
52
+ isolated: isolated,
53
+ ascii: ascii,
54
+ ascii_compact: ascii_compact,
55
+ mermaid: mermaid,
56
+ stats: stats
57
+ }
58
+ end
59
+
60
+ private
61
+
62
+ # Prefer IO.console, then ENV, then stty; default to 100 when unknown.
63
+ def detect_width(explicit)
64
+ return Integer(explicit) if explicit&.to_i&.positive?
65
+
66
+ begin
67
+ require 'io/console'
68
+ w = IO.console&.winsize&.[](1)
69
+ return Integer(w) if w&.to_i&.positive?
70
+ rescue StandardError
71
+ # ignore; fallback paths below
72
+ end
73
+
74
+ env_w = ENV['COLUMNS']
75
+ return Integer(env_w) if env_w&.to_i&.positive?
76
+
77
+ begin
78
+ out = `stty size 2>/dev/null`.to_s
79
+ parts = out.split
80
+ return Integer(parts.last) if parts.size >= 2 && parts.last.to_i.positive?
81
+ rescue StandardError
82
+ # ignore
83
+ end
84
+
85
+ 100
86
+ end
87
+
88
+ # Transform reverse graph (target => [{referrer, local_key, foreign_key}, ...])
89
+ # into a flat forward edge list.
90
+ def forward_edges_from_reverse(reverse_graph)
91
+ edges = []
92
+ reverse_graph.each do |target, arr|
93
+ Array(arr).each do |e|
94
+ from = (e[:referrer] || e['referrer']).to_s
95
+ local_key = (e[:local_key] || e['local_key']).to_s
96
+ foreign_key = (e[:foreign_key] || e['foreign_key']).to_s
97
+ to = target.to_s
98
+ next if from.empty? || to.empty?
99
+
100
+ edges << { from: from, to: to, local_key: local_key, foreign_key: foreign_key }
101
+ end
102
+ end
103
+ # Deduplicate identical edges deterministically
104
+ edges.uniq.sort_by { |e| [e[:from], e[:to], e[:local_key], e[:foreign_key]] }
105
+ end
106
+
107
+ # Determine node set from Typesense (preferred) or fallback sources.
108
+ def fetch_nodes(client, edges)
109
+ begin
110
+ list = Array(client.list_collections)
111
+ return [list.map { |c| (c[:name] || c['name']).to_s }.reject(&:empty?).uniq.sort, :typesense]
112
+ rescue StandardError
113
+ # ignore; fallback to registry/edges
114
+ end
115
+
116
+ if defined?(SearchEngine::Registry)
117
+ begin
118
+ reg = SearchEngine::Registry.mapping || {}
119
+ keys = reg.keys.map(&:to_s)
120
+ return [keys.uniq.sort, :registry] unless keys.empty?
121
+ rescue StandardError
122
+ # ignore
123
+ end
124
+ end
125
+
126
+ froms = edges.map { |e| e[:from] }
127
+ tos = edges.map { |e| e[:to] }
128
+ fallback = (froms + tos).uniq.sort
129
+ [fallback, :inferred]
130
+ end
131
+
132
+ def compute_isolated(nodes, edges)
133
+ touched = edges.flat_map { |e| [e[:from], e[:to]] }.uniq
134
+ (nodes - touched).sort
135
+ end
136
+
137
+ # Immediate cycles only: A→B and B→A pairs.
138
+ def detect_immediate_cycles(edges)
139
+ set = edges.each_with_object({}) do |e, h|
140
+ (h[e[:from]] ||= []) << e[:to]
141
+ end
142
+ pairs = []
143
+ set.each do |a, outs|
144
+ outs.each do |b|
145
+ next unless set[b]&.include?(a)
146
+
147
+ pairs << [a, b].sort
148
+ end
149
+ end
150
+ pairs.uniq.sort
151
+ end
152
+
153
+ # Render boxes-per-edge when it fits; otherwise return compact list.
154
+ # Returns [string, layout_mode]
155
+ def render_ascii(nodes, edges, width:, style: :unicode)
156
+ charset = charset_for(style)
157
+
158
+ header = "Collections Graph (nodes: #{nodes.size}, edges: #{edges.size})"
159
+
160
+ lines = [header, '']
161
+
162
+ # Try to render each edge as a 3-line pair of boxes with a labeled connector.
163
+ edges.each do |e|
164
+ block = build_edge_block(e, charset, width)
165
+ return [render_ascii_compact(nodes, edges, style: style, header: header), :compact] if block.nil?
166
+
167
+ lines.concat(block)
168
+ end
169
+
170
+ # Add isolated and cycles summary
171
+ iso = compute_isolated(nodes, edges)
172
+ unless iso.empty?
173
+ lines << ''
174
+ lines << "Isolated: #{iso.join(', ')}"
175
+ end
176
+
177
+ cycles = detect_immediate_cycles(edges)
178
+ lines << (cycles.empty? ? 'Cycles: none' : "Cycles: #{cycles.map { |a, b| "#{a}↔#{b}" }.join(', ')}")
179
+
180
+ [lines.join("\n"), :layered]
181
+ end
182
+
183
+ # Compact grouped list renderer suitable for narrow terminals.
184
+ def render_ascii_compact(nodes, edges, style: :unicode, header: nil)
185
+ charset = charset_for(style)
186
+ header ||= "Collections Graph (nodes: #{nodes.size}, edges: #{edges.size})"
187
+ lines = [header]
188
+
189
+ by_from = {}
190
+ edges.each do |e|
191
+ (by_from[e[:from]] ||= []) << e
192
+ end
193
+
194
+ by_from.keys.sort.each do |from|
195
+ lines << "- #{from}"
196
+ by_from[from].sort_by { |ee| [ee[:to], ee[:local_key], ee[:foreign_key]] }.each do |ee|
197
+ via = label_for(ee, charset, ascii_arrow: true)
198
+ line = if via.empty?
199
+ " #{charset[:branch]} #{charset[:arrow]} #{ee[:to]}"
200
+ else
201
+ " #{charset[:branch]} #{charset[:arrow]} #{ee[:to]} [via #{via}]"
202
+ end
203
+ lines << line
204
+ end
205
+ end
206
+
207
+ iso = compute_isolated(nodes, edges)
208
+ lines << "Isolated: #{iso.join(', ')}" unless iso.empty?
209
+
210
+ cycles = detect_immediate_cycles(edges)
211
+ lines << (cycles.empty? ? 'Cycles: none' : "Cycles: #{cycles.map { |a, b| "#{a}↔#{b}" }.join(', ')}")
212
+
213
+ lines.join("\n")
214
+ end
215
+
216
+ def label_for(edge, charset, ascii_arrow: false)
217
+ lk = edge[:local_key].to_s
218
+ fk = edge[:foreign_key].to_s
219
+ mid = ascii_arrow ? '->' : charset[:thin_arrow]
220
+ return '' if lk.empty? && fk.empty?
221
+
222
+ return lk if fk.empty?
223
+
224
+ return fk if lk.empty?
225
+
226
+ "#{lk} #{mid} #{fk}"
227
+ end
228
+
229
+ def charset_for(style)
230
+ if style == :ascii
231
+ { tl: '+', tr: '+', bl: '+', br: '+', v: '|', h: '-', arrow: '>', thin_arrow: '->', branch: '+-' }
232
+ else
233
+ { tl: '┌', tr: '┐', bl: '└', br: '┘', v: '│', h: '─', arrow: '▶', thin_arrow: '→', branch: '└─' }
234
+ end
235
+ end
236
+
237
+ # Build a single-line "boxed" label: ┌ name ┐ with balanced padding.
238
+ def build_single_line_box(name, charset)
239
+ s = name.to_s
240
+ inner = " #{s} "
241
+ charset[:tl] + inner.tr("\n", ' ') + charset[:tr]
242
+ end
243
+
244
+ # Build 3-line box (top, middle, bottom) for a name.
245
+ def build_box_lines(name, charset)
246
+ s = name.to_s
247
+ content = " #{s} "
248
+ top = charset[:tl] + (charset[:h] * content.length) + charset[:tr]
249
+ mid = charset[:v] + content + charset[:v]
250
+ bot = charset[:bl] + (charset[:h] * content.length) + charset[:br]
251
+ [top, mid, bot]
252
+ end
253
+
254
+ # Build the 3-line block for an edge, or nil when it does not fit width.
255
+ def build_edge_block(edge, charset, width)
256
+ lt, lm, lb = build_box_lines(edge[:from], charset)
257
+ rt, rm, rb = build_box_lines(edge[:to], charset)
258
+ label = label_for(edge, charset)
259
+ connector_mid = if label.empty?
260
+ " #{charset[:h] * 3}#{charset[:arrow]} "
261
+ else
262
+ " #{charset[:h] * 2} via #{label} #{charset[:h]}#{charset[:arrow]} "
263
+ end
264
+
265
+ total_mid = lm.length + connector_mid.length + rm.length
266
+ return nil if total_mid > width
267
+
268
+ gap = ' ' * connector_mid.length
269
+ [(lt + gap + rt), (lm + connector_mid + rm), (lb + gap + rb), '']
270
+ end
271
+
272
+ # Mermaid flowchart (LR) generator with labeled edges and node declarations.
273
+ # @return [String]
274
+ def render_mermaid(nodes, edges)
275
+ # Prefer logical names from edges to avoid physical (timestamped) names.
276
+ edge_names = edges.flat_map { |e| [e[:from].to_s, e[:to].to_s] }.reject(&:empty?).uniq
277
+ names = edge_names.empty? ? Array(nodes).map(&:to_s) : edge_names
278
+
279
+ # Stable ids for all names we intend to render.
280
+ ids = {}
281
+ names.each_with_index { |name, idx| ids[name] = sanitize_mermaid_id("C_#{idx}_#{name}") }
282
+
283
+ lines = ['flowchart LR']
284
+
285
+ # Declare nodes to ensure isolated logical nodes render as well.
286
+ names.each do |name|
287
+ id = ids[name]
288
+ label = name.to_s.gsub('"', '\\"')
289
+ lines << " #{id}[\"#{label}\"]"
290
+ end
291
+
292
+ # Edges with labels (via ...), using logical names.
293
+ edges.each do |e|
294
+ from = ids[e[:from].to_s]
295
+ to = ids[e[:to].to_s]
296
+ next if from.nil? || to.nil?
297
+
298
+ via = mermaid_edge_label(e)
299
+ if via.empty?
300
+ lines << " #{from} --> #{to}"
301
+ else
302
+ esc = via.gsub('"', '\\"')
303
+ lines << " #{from} -- \"#{esc}\" --> #{to}"
304
+ end
305
+ end
306
+
307
+ lines.join("\n")
308
+ end
309
+
310
+ def sanitize_mermaid_id(name)
311
+ s = name.to_s
312
+ s = s.gsub(/[^a-zA-Z0-9_]/, '_')
313
+ s = "C_#{s}" if s.empty? || s[0] =~ /[^a-zA-Z_]/
314
+ s
315
+ end
316
+
317
+ def mermaid_edge_label(edge)
318
+ lk = edge[:local_key].to_s
319
+ fk = edge[:foreign_key].to_s
320
+ return '' if lk.empty? && fk.empty?
321
+
322
+ return lk if fk.empty?
323
+
324
+ return fk if lk.empty?
325
+
326
+ "#{lk} -> #{fk}"
327
+ end
328
+ end
329
+ end
330
+ end
@@ -0,0 +1,143 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'json'
4
+
5
+ module SearchEngine
6
+ # Immutable, deterministic wrapper for compiled Typesense params.
7
+ #
8
+ # Guarantees:
9
+ # - Canonicalized symbol keys and lexicographic key ordering at every hash level
10
+ # - Array order preserved as provided
11
+ # - Deep frozen internal representation
12
+ # - Stable to_h and to_json across Ruby versions/runs
13
+ #
14
+ # Public surface mirrors a minimal read-only Hash API used in the codebase.
15
+ # New instances should be constructed from plain Hashes only.
16
+ class CompiledParams
17
+ EMPTY_HASH = {}.freeze
18
+ EMPTY_ARRAY = [].freeze
19
+
20
+ # @param value [Hash, #to_h]
21
+ def initialize(value)
22
+ input = if value.is_a?(Hash)
23
+ value
24
+ elsif value.respond_to?(:to_h)
25
+ value.to_h
26
+ else
27
+ EMPTY_HASH
28
+ end
29
+ @canonical = canonicalize_hash(input)
30
+ deep_freeze!(@canonical)
31
+ freeze
32
+ end
33
+
34
+ # Fast constructor from any object responding to +to_h+.
35
+ # @param value [Object]
36
+ # @return [SearchEngine::CompiledParams]
37
+ def self.from(value)
38
+ value.is_a?(self) ? value : new(value)
39
+ end
40
+
41
+ # Return the canonical, deeply frozen Hash (symbol keys, sorted order).
42
+ # @return [Hash]
43
+ def to_h
44
+ @canonical
45
+ end
46
+
47
+ # Implicit Hash conversion for APIs like Hash#merge expecting #to_hash.
48
+ # @return [Hash]
49
+ alias_method :to_hash, :to_h
50
+
51
+ # Deterministic JSON serialization using the canonical ordered Hash.
52
+ # @return [String]
53
+ def to_json(*_args)
54
+ JSON.generate(@canonical)
55
+ end
56
+
57
+ # Read-style Hash helpers used in callers ---------------------------------
58
+
59
+ # @param key [Object]
60
+ # @return [Object]
61
+ def [](key)
62
+ @canonical[key_to_sym(key)]
63
+ end
64
+
65
+ # @param key [Object]
66
+ # @return [Boolean]
67
+ def key?(key)
68
+ @canonical.key?(key_to_sym(key))
69
+ end
70
+
71
+ # @return [Array<Symbol>]
72
+ def keys
73
+ @canonical.keys
74
+ end
75
+
76
+ # @yieldparam key [Symbol]
77
+ # @yieldparam value [Object]
78
+ # @return [Enumerator]
79
+ def each(&block)
80
+ return enum_for(:each) unless block_given?
81
+
82
+ @canonical.each(&block)
83
+ end
84
+
85
+ # Equality based on canonical Hash content.
86
+ # @param other [Object]
87
+ # @return [Boolean]
88
+ def ==(other)
89
+ if other.is_a?(CompiledParams)
90
+ other.to_h == @canonical
91
+ elsif other.respond_to?(:to_h)
92
+ other.to_h == @canonical
93
+ else
94
+ false
95
+ end
96
+ end
97
+
98
+ private
99
+
100
+ def key_to_sym(k)
101
+ k.respond_to?(:to_sym) ? k.to_sym : k
102
+ end
103
+
104
+ def canonicalize_hash(hash)
105
+ # Normalize keys to symbols; sort by key.to_s; recurse into values
106
+ sorted_keys = hash.keys.sort_by(&:to_s)
107
+ sorted_keys.each_with_object({}) do |k, acc|
108
+ sym_key = key_to_sym(k)
109
+ acc[sym_key] = canonicalize_value(hash[k])
110
+ end
111
+ end
112
+
113
+ def canonicalize_array(array)
114
+ return EMPTY_ARRAY if array.empty?
115
+
116
+ array.map { |v| canonicalize_value(v) }
117
+ end
118
+
119
+ def canonicalize_value(value)
120
+ case value
121
+ when Hash
122
+ canonicalize_hash(value)
123
+ when Array
124
+ canonicalize_array(value)
125
+ else
126
+ value
127
+ end
128
+ end
129
+
130
+ def deep_freeze!(obj)
131
+ case obj
132
+ when Hash
133
+ obj.each_value { |v| deep_freeze!(v) }
134
+ obj.freeze
135
+ when Array
136
+ obj.each { |v| deep_freeze!(v) }
137
+ obj.freeze
138
+ else
139
+ obj.freeze if obj.respond_to?(:freeze)
140
+ end
141
+ end
142
+ end
143
+ end