search-engine-for-typesense 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +21 -0
- data/README.md +148 -0
- data/app/search_engine/search_engine/app_info.rb +11 -0
- data/app/search_engine/search_engine/index_partition_job.rb +170 -0
- data/lib/generators/search_engine/install/install_generator.rb +20 -0
- data/lib/generators/search_engine/install/templates/initializer.rb.tt +230 -0
- data/lib/generators/search_engine/model/model_generator.rb +86 -0
- data/lib/generators/search_engine/model/templates/model.rb.tt +12 -0
- data/lib/search-engine-for-typesense.rb +12 -0
- data/lib/search_engine/active_record_syncable.rb +247 -0
- data/lib/search_engine/admin/stopwords.rb +125 -0
- data/lib/search_engine/admin/synonyms.rb +125 -0
- data/lib/search_engine/admin.rb +12 -0
- data/lib/search_engine/ast/and.rb +52 -0
- data/lib/search_engine/ast/binary_op.rb +75 -0
- data/lib/search_engine/ast/eq.rb +19 -0
- data/lib/search_engine/ast/group.rb +18 -0
- data/lib/search_engine/ast/gt.rb +12 -0
- data/lib/search_engine/ast/gte.rb +12 -0
- data/lib/search_engine/ast/in.rb +28 -0
- data/lib/search_engine/ast/lt.rb +12 -0
- data/lib/search_engine/ast/lte.rb +12 -0
- data/lib/search_engine/ast/matches.rb +55 -0
- data/lib/search_engine/ast/node.rb +176 -0
- data/lib/search_engine/ast/not_eq.rb +13 -0
- data/lib/search_engine/ast/not_in.rb +24 -0
- data/lib/search_engine/ast/or.rb +52 -0
- data/lib/search_engine/ast/prefix.rb +51 -0
- data/lib/search_engine/ast/raw.rb +41 -0
- data/lib/search_engine/ast/unary_op.rb +43 -0
- data/lib/search_engine/ast.rb +101 -0
- data/lib/search_engine/base/creation.rb +727 -0
- data/lib/search_engine/base/deletion.rb +80 -0
- data/lib/search_engine/base/display_coercions.rb +36 -0
- data/lib/search_engine/base/hydration.rb +312 -0
- data/lib/search_engine/base/index_maintenance/cleanup.rb +202 -0
- data/lib/search_engine/base/index_maintenance/lifecycle.rb +251 -0
- data/lib/search_engine/base/index_maintenance/schema.rb +117 -0
- data/lib/search_engine/base/index_maintenance.rb +459 -0
- data/lib/search_engine/base/indexing_dsl.rb +255 -0
- data/lib/search_engine/base/joins.rb +479 -0
- data/lib/search_engine/base/model_dsl.rb +472 -0
- data/lib/search_engine/base/presets.rb +43 -0
- data/lib/search_engine/base/pretty_printer.rb +315 -0
- data/lib/search_engine/base/relation_delegation.rb +42 -0
- data/lib/search_engine/base/scopes.rb +113 -0
- data/lib/search_engine/base/updating.rb +92 -0
- data/lib/search_engine/base.rb +38 -0
- data/lib/search_engine/bulk.rb +284 -0
- data/lib/search_engine/cache.rb +33 -0
- data/lib/search_engine/cascade.rb +531 -0
- data/lib/search_engine/cli/doctor.rb +631 -0
- data/lib/search_engine/cli/support.rb +217 -0
- data/lib/search_engine/cli.rb +222 -0
- data/lib/search_engine/client/http_adapter.rb +63 -0
- data/lib/search_engine/client/request_builder.rb +92 -0
- data/lib/search_engine/client/services/base.rb +74 -0
- data/lib/search_engine/client/services/collections.rb +161 -0
- data/lib/search_engine/client/services/documents.rb +214 -0
- data/lib/search_engine/client/services/operations.rb +152 -0
- data/lib/search_engine/client/services/search.rb +190 -0
- data/lib/search_engine/client/services.rb +29 -0
- data/lib/search_engine/client.rb +765 -0
- data/lib/search_engine/client_options.rb +20 -0
- data/lib/search_engine/collection_resolver.rb +191 -0
- data/lib/search_engine/collections_graph.rb +330 -0
- data/lib/search_engine/compiled_params.rb +143 -0
- data/lib/search_engine/compiler.rb +383 -0
- data/lib/search_engine/config/observability.rb +27 -0
- data/lib/search_engine/config/presets.rb +92 -0
- data/lib/search_engine/config/selection.rb +16 -0
- data/lib/search_engine/config/typesense.rb +48 -0
- data/lib/search_engine/config/validators.rb +97 -0
- data/lib/search_engine/config.rb +917 -0
- data/lib/search_engine/console_helpers.rb +130 -0
- data/lib/search_engine/deletion.rb +103 -0
- data/lib/search_engine/dispatcher.rb +125 -0
- data/lib/search_engine/dsl/parser.rb +582 -0
- data/lib/search_engine/engine.rb +167 -0
- data/lib/search_engine/errors.rb +290 -0
- data/lib/search_engine/filters/sanitizer.rb +189 -0
- data/lib/search_engine/hydration/materializers.rb +808 -0
- data/lib/search_engine/hydration/selection_context.rb +96 -0
- data/lib/search_engine/indexer/batch_planner.rb +76 -0
- data/lib/search_engine/indexer/bulk_import.rb +626 -0
- data/lib/search_engine/indexer/import_dispatcher.rb +198 -0
- data/lib/search_engine/indexer/retry_policy.rb +103 -0
- data/lib/search_engine/indexer.rb +747 -0
- data/lib/search_engine/instrumentation.rb +308 -0
- data/lib/search_engine/joins/guard.rb +202 -0
- data/lib/search_engine/joins/resolver.rb +95 -0
- data/lib/search_engine/logging/color.rb +78 -0
- data/lib/search_engine/logging/format_helpers.rb +92 -0
- data/lib/search_engine/logging/partition_progress.rb +53 -0
- data/lib/search_engine/logging_subscriber.rb +388 -0
- data/lib/search_engine/mapper.rb +785 -0
- data/lib/search_engine/multi.rb +286 -0
- data/lib/search_engine/multi_result.rb +186 -0
- data/lib/search_engine/notifications/compact_logger.rb +675 -0
- data/lib/search_engine/observability.rb +162 -0
- data/lib/search_engine/operations.rb +58 -0
- data/lib/search_engine/otel.rb +227 -0
- data/lib/search_engine/partitioner.rb +128 -0
- data/lib/search_engine/ranking_plan.rb +118 -0
- data/lib/search_engine/registry.rb +158 -0
- data/lib/search_engine/relation/compiler.rb +711 -0
- data/lib/search_engine/relation/deletion.rb +37 -0
- data/lib/search_engine/relation/dsl/filters.rb +624 -0
- data/lib/search_engine/relation/dsl/selection.rb +240 -0
- data/lib/search_engine/relation/dsl.rb +903 -0
- data/lib/search_engine/relation/dx/dry_run.rb +59 -0
- data/lib/search_engine/relation/dx/friendly_where.rb +24 -0
- data/lib/search_engine/relation/dx.rb +231 -0
- data/lib/search_engine/relation/materializers.rb +118 -0
- data/lib/search_engine/relation/options.rb +138 -0
- data/lib/search_engine/relation/state.rb +274 -0
- data/lib/search_engine/relation/updating.rb +44 -0
- data/lib/search_engine/relation.rb +623 -0
- data/lib/search_engine/result.rb +664 -0
- data/lib/search_engine/schema.rb +1083 -0
- data/lib/search_engine/sources/active_record_source.rb +185 -0
- data/lib/search_engine/sources/base.rb +62 -0
- data/lib/search_engine/sources/lambda_source.rb +55 -0
- data/lib/search_engine/sources/sql_source.rb +196 -0
- data/lib/search_engine/sources.rb +71 -0
- data/lib/search_engine/stale_rules.rb +160 -0
- data/lib/search_engine/test/minitest_assertions.rb +57 -0
- data/lib/search_engine/test/offline_client.rb +134 -0
- data/lib/search_engine/test/rspec_matchers.rb +77 -0
- data/lib/search_engine/test/stub_client.rb +201 -0
- data/lib/search_engine/test.rb +66 -0
- data/lib/search_engine/test_autoload.rb +8 -0
- data/lib/search_engine/update.rb +35 -0
- data/lib/search_engine/version.rb +7 -0
- data/lib/search_engine.rb +332 -0
- data/lib/tasks/search_engine.rake +501 -0
- data/lib/tasks/search_engine_doctor.rake +16 -0
- metadata +225 -0
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SearchEngine
|
|
4
|
+
# Helpers for constructing URL-level client options.
|
|
5
|
+
#
|
|
6
|
+
# These options should not appear in request bodies. They are derived from
|
|
7
|
+
# {SearchEngine.config}. This module is intentionally minimal for M0 and will
|
|
8
|
+
# be used by the client implementation in future milestones.
|
|
9
|
+
module ClientOptions
|
|
10
|
+
# Build URL-level options from configuration.
|
|
11
|
+
# @param config [SearchEngine::Config]
|
|
12
|
+
# @return [Hash] keys: :use_cache, :cache_ttl
|
|
13
|
+
def self.url_options_from_config(config = SearchEngine.config)
|
|
14
|
+
{
|
|
15
|
+
use_cache: config.use_cache ? true : false,
|
|
16
|
+
cache_ttl: Integer(config.cache_ttl_s || 0)
|
|
17
|
+
}
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'search_engine/registry'
|
|
4
|
+
require 'search_engine/cascade'
|
|
5
|
+
|
|
6
|
+
module SearchEngine
|
|
7
|
+
# Helper utilities to resolve collection models from Typesense collection names.
|
|
8
|
+
#
|
|
9
|
+
# Public API:
|
|
10
|
+
# - {.model_for_physical(physical, client: nil)} => Class or nil
|
|
11
|
+
# - {.model_for_logical(logical)} => Class or nil
|
|
12
|
+
# - {.physicals_for_logical(client, logical)} => Array<[String, Integer]>
|
|
13
|
+
module CollectionResolver
|
|
14
|
+
class << self
|
|
15
|
+
# Build a map of logical names => model classes by merging registry with
|
|
16
|
+
# a scan of the SearchEngine namespace for subclasses of Base.
|
|
17
|
+
# @return [Hash{String=>Class}]
|
|
18
|
+
def models_map
|
|
19
|
+
map = {}
|
|
20
|
+
reg = SearchEngine::Registry.mapping
|
|
21
|
+
reg.each { |k, v| map[k.to_s] = v } if reg && !reg.empty?
|
|
22
|
+
|
|
23
|
+
# Walk the SearchEngine namespace to find Base descendants
|
|
24
|
+
begin
|
|
25
|
+
SearchEngine.constants.each do |c|
|
|
26
|
+
const = SearchEngine.const_get(c)
|
|
27
|
+
next unless const.is_a?(Class)
|
|
28
|
+
next unless const.ancestors.include?(SearchEngine::Base)
|
|
29
|
+
|
|
30
|
+
logical = if const.respond_to?(:collection)
|
|
31
|
+
const.collection.to_s
|
|
32
|
+
else
|
|
33
|
+
demod = const.name.split('::').last
|
|
34
|
+
demod.respond_to?(:underscore) ? demod.underscore.pluralize : "#{demod.downcase}s"
|
|
35
|
+
end
|
|
36
|
+
map[logical] ||= const
|
|
37
|
+
end
|
|
38
|
+
rescue StandardError
|
|
39
|
+
# best-effort; namespace may not be fully loaded
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
map
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Resolve a model class for a physical Typesense collection name.
|
|
46
|
+
# Tries, in order: normalized base logical, reverse alias lookup, and returns nil when not found.
|
|
47
|
+
# @param physical [#to_s]
|
|
48
|
+
# @param client [SearchEngine::Client, nil]
|
|
49
|
+
# @return [Class, nil]
|
|
50
|
+
def model_for_physical(physical, client: nil)
|
|
51
|
+
phys = physical.to_s
|
|
52
|
+
base = logical_from_physical(phys)
|
|
53
|
+
|
|
54
|
+
# Prefer models_map first to handle classes that don't invoke collection macro yet
|
|
55
|
+
mm = models_map
|
|
56
|
+
klass = mm[base]
|
|
57
|
+
klass ||= model_for_logical(base)
|
|
58
|
+
return klass if klass
|
|
59
|
+
|
|
60
|
+
# Reverse alias mapping: find a registered logical whose alias targets this physical
|
|
61
|
+
reg = SearchEngine::Registry.mapping
|
|
62
|
+
return nil if reg.nil? || reg.empty?
|
|
63
|
+
|
|
64
|
+
cli = client || SearchEngine.client
|
|
65
|
+
reg.each_key do |logical|
|
|
66
|
+
target = cli.resolve_alias(logical)
|
|
67
|
+
return reg[logical] if target && target.to_s == phys
|
|
68
|
+
rescue StandardError
|
|
69
|
+
# ignore and continue
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
nil
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# Resolve a model class for a logical collection name using registry, falling
|
|
76
|
+
# back to autoloading a namespaced model constant.
|
|
77
|
+
# @param logical [#to_s]
|
|
78
|
+
# @return [Class, nil]
|
|
79
|
+
def model_for_logical(logical)
|
|
80
|
+
name = logical.to_s
|
|
81
|
+
|
|
82
|
+
mm = models_map
|
|
83
|
+
return mm[name] if mm.key?(name)
|
|
84
|
+
|
|
85
|
+
begin
|
|
86
|
+
return SearchEngine.collection_for(name)
|
|
87
|
+
rescue StandardError
|
|
88
|
+
# fall through
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# Heuristic: SearchEngine::<Classify(name)>
|
|
92
|
+
m = classify_model(name)
|
|
93
|
+
return m if m
|
|
94
|
+
|
|
95
|
+
# Heuristic: nested modules e.g. foo_bar_baz -> SearchEngine::FooBar::Baz
|
|
96
|
+
parts = name.split('_')
|
|
97
|
+
if parts.size >= 2
|
|
98
|
+
last = parts.pop
|
|
99
|
+
mod = parts.map { |p| camelize(p) }.join
|
|
100
|
+
candidate = "SearchEngine::#{mod}::#{camelize(last)}"
|
|
101
|
+
begin
|
|
102
|
+
const = Object.const_get(candidate)
|
|
103
|
+
return const if const.is_a?(Class) && const.ancestors.include?(SearchEngine::Base)
|
|
104
|
+
rescue StandardError
|
|
105
|
+
# ignore
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
nil
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
# Convert physical name into a logical base by stripping timestamp suffix when present.
|
|
113
|
+
# @param name [#to_s]
|
|
114
|
+
# @return [String]
|
|
115
|
+
def logical_from_physical(name)
|
|
116
|
+
s = name.to_s
|
|
117
|
+
begin
|
|
118
|
+
out = SearchEngine::Cascade.normalize_physical_to_logical(s)
|
|
119
|
+
return out if out && !out.to_s.empty? && out.to_s != s
|
|
120
|
+
rescue StandardError
|
|
121
|
+
# fall through to regex fallback
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
# Regex fallback independent of Cascade implementation
|
|
125
|
+
m = s.match(/\A(.+)_\d{8}_\d{6}_\d{3}\z/)
|
|
126
|
+
return m[1].to_s if m && m[1]
|
|
127
|
+
|
|
128
|
+
s
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
# List physical collections associated with a logical alias.
|
|
132
|
+
# Prefer the alias target when present; otherwise scan server collections and
|
|
133
|
+
# group by normalized base.
|
|
134
|
+
# @param client [SearchEngine::Client]
|
|
135
|
+
# @param logical [#to_s]
|
|
136
|
+
# @return [Array<Array(String, Integer)>]
|
|
137
|
+
def physicals_for_logical(client, logical)
|
|
138
|
+
list = Array(client.list_collections)
|
|
139
|
+
pairs = list.map do |h|
|
|
140
|
+
name = (h[:name] || h['name']).to_s
|
|
141
|
+
num = (h[:num_documents] || h['num_documents']).to_i
|
|
142
|
+
[name, num]
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
# Filter all physicals that normalize to the logical name
|
|
146
|
+
filtered = pairs.select do |(physical, _num)|
|
|
147
|
+
logical_from_physical(physical).to_s == logical.to_s
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
# If no filtered physicals (unexpected), fallback to alias target if present
|
|
151
|
+
begin
|
|
152
|
+
aliased = client.resolve_alias(logical)
|
|
153
|
+
if filtered.empty? && aliased && !aliased.to_s.strip.empty?
|
|
154
|
+
# Retrieve live schema to confirm presence; if present, synthesize with count 0
|
|
155
|
+
schema = client.retrieve_collection_schema(aliased)
|
|
156
|
+
return [[aliased.to_s, (schema && (schema[:num_documents] || schema['num_documents'])).to_i]]
|
|
157
|
+
end
|
|
158
|
+
rescue StandardError
|
|
159
|
+
# ignore alias/schema issues
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
filtered
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
private
|
|
166
|
+
|
|
167
|
+
def classify_model(name)
|
|
168
|
+
if defined?(ActiveSupport::Inflector)
|
|
169
|
+
klass_name = ActiveSupport::Inflector.classify(name.to_s)
|
|
170
|
+
const = Object.const_get("SearchEngine::#{klass_name}")
|
|
171
|
+
else
|
|
172
|
+
base = name.to_s.split('_').map { |s| s[0].upcase + s[1..] }.join
|
|
173
|
+
const = Object.const_get("SearchEngine::#{base}")
|
|
174
|
+
end
|
|
175
|
+
return const if const.is_a?(Class) && const.ancestors.include?(SearchEngine::Base)
|
|
176
|
+
|
|
177
|
+
nil
|
|
178
|
+
rescue StandardError
|
|
179
|
+
nil
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
def camelize(token)
|
|
183
|
+
if defined?(ActiveSupport::Inflector)
|
|
184
|
+
ActiveSupport::Inflector.camelize(token.to_s)
|
|
185
|
+
else
|
|
186
|
+
token.to_s.split('_').map { |s| s[0].upcase + s[1..] }.join
|
|
187
|
+
end
|
|
188
|
+
end
|
|
189
|
+
end
|
|
190
|
+
end
|
|
191
|
+
end
|
|
@@ -0,0 +1,330 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SearchEngine
|
|
4
|
+
# Build and render an in-memory graph of Typesense collections and references.
|
|
5
|
+
#
|
|
6
|
+
# Public API:
|
|
7
|
+
# - {.build(client:, style: :unicode, width: nil)} => Hash with nodes/edges and rendered ASCII
|
|
8
|
+
#
|
|
9
|
+
# The renderer produces a Unicode box-drawing diagram by default and falls
|
|
10
|
+
# back to a compact ASCII list when the layout exceeds the available width.
|
|
11
|
+
module CollectionsGraph
|
|
12
|
+
class << self
|
|
13
|
+
# Build a collections graph and produce ASCII renderings.
|
|
14
|
+
#
|
|
15
|
+
# @param client [SearchEngine::Client]
|
|
16
|
+
# @param style [Symbol] :unicode or :ascii
|
|
17
|
+
# @param width [Integer, nil] max diagram width; defaults to detected terminal width or 100
|
|
18
|
+
# @return [Hash] { nodes:, edges:, cycles:, isolated:, ascii:, ascii_compact:, stats: { ... } }
|
|
19
|
+
def build(client:, style: :unicode, width: nil)
|
|
20
|
+
safe_style = style.to_s == 'ascii' ? :ascii : :unicode
|
|
21
|
+
max_width = detect_width(width)
|
|
22
|
+
|
|
23
|
+
# Build reverse graph from Typesense (with registry fallback)
|
|
24
|
+
reverse_graph = SearchEngine::Cascade.build_reverse_graph(client: client)
|
|
25
|
+
edges = forward_edges_from_reverse(reverse_graph)
|
|
26
|
+
|
|
27
|
+
nodes, source = fetch_nodes(client, edges)
|
|
28
|
+
isolated = compute_isolated(nodes, edges)
|
|
29
|
+
cycles = detect_immediate_cycles(edges)
|
|
30
|
+
|
|
31
|
+
ascii, layout_mode = render_ascii(nodes, edges, width: max_width, style: safe_style)
|
|
32
|
+
ascii_compact = render_ascii_compact(nodes, edges, style: safe_style)
|
|
33
|
+
mermaid = render_mermaid(nodes, edges)
|
|
34
|
+
|
|
35
|
+
stats = {
|
|
36
|
+
nodes: nodes.size,
|
|
37
|
+
edges: edges.size,
|
|
38
|
+
cycles: cycles.size,
|
|
39
|
+
isolated: isolated.size,
|
|
40
|
+
layout: layout_mode,
|
|
41
|
+
source: source
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
if defined?(SearchEngine::Instrumentation)
|
|
45
|
+
SearchEngine::Instrumentation.instrument('search_engine.collections.graph', stats) {}
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
{
|
|
49
|
+
nodes: nodes,
|
|
50
|
+
edges: edges,
|
|
51
|
+
cycles: cycles,
|
|
52
|
+
isolated: isolated,
|
|
53
|
+
ascii: ascii,
|
|
54
|
+
ascii_compact: ascii_compact,
|
|
55
|
+
mermaid: mermaid,
|
|
56
|
+
stats: stats
|
|
57
|
+
}
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
private
|
|
61
|
+
|
|
62
|
+
# Prefer IO.console, then ENV, then stty; default to 100 when unknown.
|
|
63
|
+
def detect_width(explicit)
|
|
64
|
+
return Integer(explicit) if explicit&.to_i&.positive?
|
|
65
|
+
|
|
66
|
+
begin
|
|
67
|
+
require 'io/console'
|
|
68
|
+
w = IO.console&.winsize&.[](1)
|
|
69
|
+
return Integer(w) if w&.to_i&.positive?
|
|
70
|
+
rescue StandardError
|
|
71
|
+
# ignore; fallback paths below
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
env_w = ENV['COLUMNS']
|
|
75
|
+
return Integer(env_w) if env_w&.to_i&.positive?
|
|
76
|
+
|
|
77
|
+
begin
|
|
78
|
+
out = `stty size 2>/dev/null`.to_s
|
|
79
|
+
parts = out.split
|
|
80
|
+
return Integer(parts.last) if parts.size >= 2 && parts.last.to_i.positive?
|
|
81
|
+
rescue StandardError
|
|
82
|
+
# ignore
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
100
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
# Transform reverse graph (target => [{referrer, local_key, foreign_key}, ...])
|
|
89
|
+
# into a flat forward edge list.
|
|
90
|
+
def forward_edges_from_reverse(reverse_graph)
|
|
91
|
+
edges = []
|
|
92
|
+
reverse_graph.each do |target, arr|
|
|
93
|
+
Array(arr).each do |e|
|
|
94
|
+
from = (e[:referrer] || e['referrer']).to_s
|
|
95
|
+
local_key = (e[:local_key] || e['local_key']).to_s
|
|
96
|
+
foreign_key = (e[:foreign_key] || e['foreign_key']).to_s
|
|
97
|
+
to = target.to_s
|
|
98
|
+
next if from.empty? || to.empty?
|
|
99
|
+
|
|
100
|
+
edges << { from: from, to: to, local_key: local_key, foreign_key: foreign_key }
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
# Deduplicate identical edges deterministically
|
|
104
|
+
edges.uniq.sort_by { |e| [e[:from], e[:to], e[:local_key], e[:foreign_key]] }
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
# Determine node set from Typesense (preferred) or fallback sources.
|
|
108
|
+
def fetch_nodes(client, edges)
|
|
109
|
+
begin
|
|
110
|
+
list = Array(client.list_collections)
|
|
111
|
+
return [list.map { |c| (c[:name] || c['name']).to_s }.reject(&:empty?).uniq.sort, :typesense]
|
|
112
|
+
rescue StandardError
|
|
113
|
+
# ignore; fallback to registry/edges
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
if defined?(SearchEngine::Registry)
|
|
117
|
+
begin
|
|
118
|
+
reg = SearchEngine::Registry.mapping || {}
|
|
119
|
+
keys = reg.keys.map(&:to_s)
|
|
120
|
+
return [keys.uniq.sort, :registry] unless keys.empty?
|
|
121
|
+
rescue StandardError
|
|
122
|
+
# ignore
|
|
123
|
+
end
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
froms = edges.map { |e| e[:from] }
|
|
127
|
+
tos = edges.map { |e| e[:to] }
|
|
128
|
+
fallback = (froms + tos).uniq.sort
|
|
129
|
+
[fallback, :inferred]
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
def compute_isolated(nodes, edges)
|
|
133
|
+
touched = edges.flat_map { |e| [e[:from], e[:to]] }.uniq
|
|
134
|
+
(nodes - touched).sort
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
# Immediate cycles only: A→B and B→A pairs.
|
|
138
|
+
def detect_immediate_cycles(edges)
|
|
139
|
+
set = edges.each_with_object({}) do |e, h|
|
|
140
|
+
(h[e[:from]] ||= []) << e[:to]
|
|
141
|
+
end
|
|
142
|
+
pairs = []
|
|
143
|
+
set.each do |a, outs|
|
|
144
|
+
outs.each do |b|
|
|
145
|
+
next unless set[b]&.include?(a)
|
|
146
|
+
|
|
147
|
+
pairs << [a, b].sort
|
|
148
|
+
end
|
|
149
|
+
end
|
|
150
|
+
pairs.uniq.sort
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
# Render boxes-per-edge when it fits; otherwise return compact list.
|
|
154
|
+
# Returns [string, layout_mode]
|
|
155
|
+
def render_ascii(nodes, edges, width:, style: :unicode)
|
|
156
|
+
charset = charset_for(style)
|
|
157
|
+
|
|
158
|
+
header = "Collections Graph (nodes: #{nodes.size}, edges: #{edges.size})"
|
|
159
|
+
|
|
160
|
+
lines = [header, '']
|
|
161
|
+
|
|
162
|
+
# Try to render each edge as a 3-line pair of boxes with a labeled connector.
|
|
163
|
+
edges.each do |e|
|
|
164
|
+
block = build_edge_block(e, charset, width)
|
|
165
|
+
return [render_ascii_compact(nodes, edges, style: style, header: header), :compact] if block.nil?
|
|
166
|
+
|
|
167
|
+
lines.concat(block)
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
# Add isolated and cycles summary
|
|
171
|
+
iso = compute_isolated(nodes, edges)
|
|
172
|
+
unless iso.empty?
|
|
173
|
+
lines << ''
|
|
174
|
+
lines << "Isolated: #{iso.join(', ')}"
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
cycles = detect_immediate_cycles(edges)
|
|
178
|
+
lines << (cycles.empty? ? 'Cycles: none' : "Cycles: #{cycles.map { |a, b| "#{a}↔#{b}" }.join(', ')}")
|
|
179
|
+
|
|
180
|
+
[lines.join("\n"), :layered]
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
# Compact grouped list renderer suitable for narrow terminals.
|
|
184
|
+
def render_ascii_compact(nodes, edges, style: :unicode, header: nil)
|
|
185
|
+
charset = charset_for(style)
|
|
186
|
+
header ||= "Collections Graph (nodes: #{nodes.size}, edges: #{edges.size})"
|
|
187
|
+
lines = [header]
|
|
188
|
+
|
|
189
|
+
by_from = {}
|
|
190
|
+
edges.each do |e|
|
|
191
|
+
(by_from[e[:from]] ||= []) << e
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
by_from.keys.sort.each do |from|
|
|
195
|
+
lines << "- #{from}"
|
|
196
|
+
by_from[from].sort_by { |ee| [ee[:to], ee[:local_key], ee[:foreign_key]] }.each do |ee|
|
|
197
|
+
via = label_for(ee, charset, ascii_arrow: true)
|
|
198
|
+
line = if via.empty?
|
|
199
|
+
" #{charset[:branch]} #{charset[:arrow]} #{ee[:to]}"
|
|
200
|
+
else
|
|
201
|
+
" #{charset[:branch]} #{charset[:arrow]} #{ee[:to]} [via #{via}]"
|
|
202
|
+
end
|
|
203
|
+
lines << line
|
|
204
|
+
end
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
iso = compute_isolated(nodes, edges)
|
|
208
|
+
lines << "Isolated: #{iso.join(', ')}" unless iso.empty?
|
|
209
|
+
|
|
210
|
+
cycles = detect_immediate_cycles(edges)
|
|
211
|
+
lines << (cycles.empty? ? 'Cycles: none' : "Cycles: #{cycles.map { |a, b| "#{a}↔#{b}" }.join(', ')}")
|
|
212
|
+
|
|
213
|
+
lines.join("\n")
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
def label_for(edge, charset, ascii_arrow: false)
|
|
217
|
+
lk = edge[:local_key].to_s
|
|
218
|
+
fk = edge[:foreign_key].to_s
|
|
219
|
+
mid = ascii_arrow ? '->' : charset[:thin_arrow]
|
|
220
|
+
return '' if lk.empty? && fk.empty?
|
|
221
|
+
|
|
222
|
+
return lk if fk.empty?
|
|
223
|
+
|
|
224
|
+
return fk if lk.empty?
|
|
225
|
+
|
|
226
|
+
"#{lk} #{mid} #{fk}"
|
|
227
|
+
end
|
|
228
|
+
|
|
229
|
+
def charset_for(style)
|
|
230
|
+
if style == :ascii
|
|
231
|
+
{ tl: '+', tr: '+', bl: '+', br: '+', v: '|', h: '-', arrow: '>', thin_arrow: '->', branch: '+-' }
|
|
232
|
+
else
|
|
233
|
+
{ tl: '┌', tr: '┐', bl: '└', br: '┘', v: '│', h: '─', arrow: '▶', thin_arrow: '→', branch: '└─' }
|
|
234
|
+
end
|
|
235
|
+
end
|
|
236
|
+
|
|
237
|
+
# Build a single-line "boxed" label: ┌ name ┐ with balanced padding.
|
|
238
|
+
def build_single_line_box(name, charset)
|
|
239
|
+
s = name.to_s
|
|
240
|
+
inner = " #{s} "
|
|
241
|
+
charset[:tl] + inner.tr("\n", ' ') + charset[:tr]
|
|
242
|
+
end
|
|
243
|
+
|
|
244
|
+
# Build 3-line box (top, middle, bottom) for a name.
|
|
245
|
+
def build_box_lines(name, charset)
|
|
246
|
+
s = name.to_s
|
|
247
|
+
content = " #{s} "
|
|
248
|
+
top = charset[:tl] + (charset[:h] * content.length) + charset[:tr]
|
|
249
|
+
mid = charset[:v] + content + charset[:v]
|
|
250
|
+
bot = charset[:bl] + (charset[:h] * content.length) + charset[:br]
|
|
251
|
+
[top, mid, bot]
|
|
252
|
+
end
|
|
253
|
+
|
|
254
|
+
# Build the 3-line block for an edge, or nil when it does not fit width.
|
|
255
|
+
def build_edge_block(edge, charset, width)
|
|
256
|
+
lt, lm, lb = build_box_lines(edge[:from], charset)
|
|
257
|
+
rt, rm, rb = build_box_lines(edge[:to], charset)
|
|
258
|
+
label = label_for(edge, charset)
|
|
259
|
+
connector_mid = if label.empty?
|
|
260
|
+
" #{charset[:h] * 3}#{charset[:arrow]} "
|
|
261
|
+
else
|
|
262
|
+
" #{charset[:h] * 2} via #{label} #{charset[:h]}#{charset[:arrow]} "
|
|
263
|
+
end
|
|
264
|
+
|
|
265
|
+
total_mid = lm.length + connector_mid.length + rm.length
|
|
266
|
+
return nil if total_mid > width
|
|
267
|
+
|
|
268
|
+
gap = ' ' * connector_mid.length
|
|
269
|
+
[(lt + gap + rt), (lm + connector_mid + rm), (lb + gap + rb), '']
|
|
270
|
+
end
|
|
271
|
+
|
|
272
|
+
# Mermaid flowchart (LR) generator with labeled edges and node declarations.
|
|
273
|
+
# @return [String]
|
|
274
|
+
def render_mermaid(nodes, edges)
|
|
275
|
+
# Prefer logical names from edges to avoid physical (timestamped) names.
|
|
276
|
+
edge_names = edges.flat_map { |e| [e[:from].to_s, e[:to].to_s] }.reject(&:empty?).uniq
|
|
277
|
+
names = edge_names.empty? ? Array(nodes).map(&:to_s) : edge_names
|
|
278
|
+
|
|
279
|
+
# Stable ids for all names we intend to render.
|
|
280
|
+
ids = {}
|
|
281
|
+
names.each_with_index { |name, idx| ids[name] = sanitize_mermaid_id("C_#{idx}_#{name}") }
|
|
282
|
+
|
|
283
|
+
lines = ['flowchart LR']
|
|
284
|
+
|
|
285
|
+
# Declare nodes to ensure isolated logical nodes render as well.
|
|
286
|
+
names.each do |name|
|
|
287
|
+
id = ids[name]
|
|
288
|
+
label = name.to_s.gsub('"', '\\"')
|
|
289
|
+
lines << " #{id}[\"#{label}\"]"
|
|
290
|
+
end
|
|
291
|
+
|
|
292
|
+
# Edges with labels (via ...), using logical names.
|
|
293
|
+
edges.each do |e|
|
|
294
|
+
from = ids[e[:from].to_s]
|
|
295
|
+
to = ids[e[:to].to_s]
|
|
296
|
+
next if from.nil? || to.nil?
|
|
297
|
+
|
|
298
|
+
via = mermaid_edge_label(e)
|
|
299
|
+
if via.empty?
|
|
300
|
+
lines << " #{from} --> #{to}"
|
|
301
|
+
else
|
|
302
|
+
esc = via.gsub('"', '\\"')
|
|
303
|
+
lines << " #{from} -- \"#{esc}\" --> #{to}"
|
|
304
|
+
end
|
|
305
|
+
end
|
|
306
|
+
|
|
307
|
+
lines.join("\n")
|
|
308
|
+
end
|
|
309
|
+
|
|
310
|
+
def sanitize_mermaid_id(name)
|
|
311
|
+
s = name.to_s
|
|
312
|
+
s = s.gsub(/[^a-zA-Z0-9_]/, '_')
|
|
313
|
+
s = "C_#{s}" if s.empty? || s[0] =~ /[^a-zA-Z_]/
|
|
314
|
+
s
|
|
315
|
+
end
|
|
316
|
+
|
|
317
|
+
def mermaid_edge_label(edge)
|
|
318
|
+
lk = edge[:local_key].to_s
|
|
319
|
+
fk = edge[:foreign_key].to_s
|
|
320
|
+
return '' if lk.empty? && fk.empty?
|
|
321
|
+
|
|
322
|
+
return lk if fk.empty?
|
|
323
|
+
|
|
324
|
+
return fk if lk.empty?
|
|
325
|
+
|
|
326
|
+
"#{lk} -> #{fk}"
|
|
327
|
+
end
|
|
328
|
+
end
|
|
329
|
+
end
|
|
330
|
+
end
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'json'
|
|
4
|
+
|
|
5
|
+
module SearchEngine
|
|
6
|
+
# Immutable, deterministic wrapper for compiled Typesense params.
|
|
7
|
+
#
|
|
8
|
+
# Guarantees:
|
|
9
|
+
# - Canonicalized symbol keys and lexicographic key ordering at every hash level
|
|
10
|
+
# - Array order preserved as provided
|
|
11
|
+
# - Deep frozen internal representation
|
|
12
|
+
# - Stable to_h and to_json across Ruby versions/runs
|
|
13
|
+
#
|
|
14
|
+
# Public surface mirrors a minimal read-only Hash API used in the codebase.
|
|
15
|
+
# New instances should be constructed from plain Hashes only.
|
|
16
|
+
class CompiledParams
|
|
17
|
+
EMPTY_HASH = {}.freeze
|
|
18
|
+
EMPTY_ARRAY = [].freeze
|
|
19
|
+
|
|
20
|
+
# @param value [Hash, #to_h]
|
|
21
|
+
def initialize(value)
|
|
22
|
+
input = if value.is_a?(Hash)
|
|
23
|
+
value
|
|
24
|
+
elsif value.respond_to?(:to_h)
|
|
25
|
+
value.to_h
|
|
26
|
+
else
|
|
27
|
+
EMPTY_HASH
|
|
28
|
+
end
|
|
29
|
+
@canonical = canonicalize_hash(input)
|
|
30
|
+
deep_freeze!(@canonical)
|
|
31
|
+
freeze
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# Fast constructor from any object responding to +to_h+.
|
|
35
|
+
# @param value [Object]
|
|
36
|
+
# @return [SearchEngine::CompiledParams]
|
|
37
|
+
def self.from(value)
|
|
38
|
+
value.is_a?(self) ? value : new(value)
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# Return the canonical, deeply frozen Hash (symbol keys, sorted order).
|
|
42
|
+
# @return [Hash]
|
|
43
|
+
def to_h
|
|
44
|
+
@canonical
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# Implicit Hash conversion for APIs like Hash#merge expecting #to_hash.
|
|
48
|
+
# @return [Hash]
|
|
49
|
+
alias_method :to_hash, :to_h
|
|
50
|
+
|
|
51
|
+
# Deterministic JSON serialization using the canonical ordered Hash.
|
|
52
|
+
# @return [String]
|
|
53
|
+
def to_json(*_args)
|
|
54
|
+
JSON.generate(@canonical)
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# Read-style Hash helpers used in callers ---------------------------------
|
|
58
|
+
|
|
59
|
+
# @param key [Object]
|
|
60
|
+
# @return [Object]
|
|
61
|
+
def [](key)
|
|
62
|
+
@canonical[key_to_sym(key)]
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# @param key [Object]
|
|
66
|
+
# @return [Boolean]
|
|
67
|
+
def key?(key)
|
|
68
|
+
@canonical.key?(key_to_sym(key))
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# @return [Array<Symbol>]
|
|
72
|
+
def keys
|
|
73
|
+
@canonical.keys
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# @yieldparam key [Symbol]
|
|
77
|
+
# @yieldparam value [Object]
|
|
78
|
+
# @return [Enumerator]
|
|
79
|
+
def each(&block)
|
|
80
|
+
return enum_for(:each) unless block_given?
|
|
81
|
+
|
|
82
|
+
@canonical.each(&block)
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
# Equality based on canonical Hash content.
|
|
86
|
+
# @param other [Object]
|
|
87
|
+
# @return [Boolean]
|
|
88
|
+
def ==(other)
|
|
89
|
+
if other.is_a?(CompiledParams)
|
|
90
|
+
other.to_h == @canonical
|
|
91
|
+
elsif other.respond_to?(:to_h)
|
|
92
|
+
other.to_h == @canonical
|
|
93
|
+
else
|
|
94
|
+
false
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
private
|
|
99
|
+
|
|
100
|
+
def key_to_sym(k)
|
|
101
|
+
k.respond_to?(:to_sym) ? k.to_sym : k
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def canonicalize_hash(hash)
|
|
105
|
+
# Normalize keys to symbols; sort by key.to_s; recurse into values
|
|
106
|
+
sorted_keys = hash.keys.sort_by(&:to_s)
|
|
107
|
+
sorted_keys.each_with_object({}) do |k, acc|
|
|
108
|
+
sym_key = key_to_sym(k)
|
|
109
|
+
acc[sym_key] = canonicalize_value(hash[k])
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def canonicalize_array(array)
|
|
114
|
+
return EMPTY_ARRAY if array.empty?
|
|
115
|
+
|
|
116
|
+
array.map { |v| canonicalize_value(v) }
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
def canonicalize_value(value)
|
|
120
|
+
case value
|
|
121
|
+
when Hash
|
|
122
|
+
canonicalize_hash(value)
|
|
123
|
+
when Array
|
|
124
|
+
canonicalize_array(value)
|
|
125
|
+
else
|
|
126
|
+
value
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
def deep_freeze!(obj)
|
|
131
|
+
case obj
|
|
132
|
+
when Hash
|
|
133
|
+
obj.each_value { |v| deep_freeze!(v) }
|
|
134
|
+
obj.freeze
|
|
135
|
+
when Array
|
|
136
|
+
obj.each { |v| deep_freeze!(v) }
|
|
137
|
+
obj.freeze
|
|
138
|
+
else
|
|
139
|
+
obj.freeze if obj.respond_to?(:freeze)
|
|
140
|
+
end
|
|
141
|
+
end
|
|
142
|
+
end
|
|
143
|
+
end
|