flexvec 0.2.0__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {flexvec-0.2.0 → flexvec-0.3.0}/PKG-INFO +1 -1
- {flexvec-0.2.0 → flexvec-0.3.0}/flexvec/score.py +1 -62
- {flexvec-0.2.0 → flexvec-0.3.0}/flexvec/vec_ops.py +2 -2
- {flexvec-0.2.0 → flexvec-0.3.0}/flexvec.egg-info/PKG-INFO +1 -1
- {flexvec-0.2.0 → flexvec-0.3.0}/pyproject.toml +1 -1
- {flexvec-0.2.0 → flexvec-0.3.0}/LICENSE +0 -0
- {flexvec-0.2.0 → flexvec-0.3.0}/README.md +0 -0
- {flexvec-0.2.0 → flexvec-0.3.0}/flexvec/__init__.py +0 -0
- {flexvec-0.2.0 → flexvec-0.3.0}/flexvec/__main__.py +0 -0
- {flexvec-0.2.0 → flexvec-0.3.0}/flexvec/embed.py +0 -0
- {flexvec-0.2.0 → flexvec-0.3.0}/flexvec/execute.py +0 -0
- {flexvec-0.2.0 → flexvec-0.3.0}/flexvec/keyword.py +0 -0
- {flexvec-0.2.0 → flexvec-0.3.0}/flexvec/onnx/__init__.py +0 -0
- {flexvec-0.2.0 → flexvec-0.3.0}/flexvec/onnx/embed.py +0 -0
- {flexvec-0.2.0 → flexvec-0.3.0}/flexvec/onnx/fetch.py +0 -0
- {flexvec-0.2.0 → flexvec-0.3.0}/flexvec/onnx/nomic_embed.py +0 -0
- {flexvec-0.2.0 → flexvec-0.3.0}/flexvec/onnx/special_tokens_map.json +0 -0
- {flexvec-0.2.0 → flexvec-0.3.0}/flexvec/onnx/tokenizer.json +0 -0
- {flexvec-0.2.0 → flexvec-0.3.0}/flexvec/onnx/tokenizer_config.json +0 -0
- {flexvec-0.2.0 → flexvec-0.3.0}/flexvec/onnx/vocab.txt +0 -0
- {flexvec-0.2.0 → flexvec-0.3.0}/flexvec.egg-info/SOURCES.txt +0 -0
- {flexvec-0.2.0 → flexvec-0.3.0}/flexvec.egg-info/dependency_links.txt +0 -0
- {flexvec-0.2.0 → flexvec-0.3.0}/flexvec.egg-info/requires.txt +0 -0
- {flexvec-0.2.0 → flexvec-0.3.0}/flexvec.egg-info/top_level.txt +0 -0
- {flexvec-0.2.0 → flexvec-0.3.0}/setup.cfg +0 -0
- {flexvec-0.2.0 → flexvec-0.3.0}/tests/test_algebraic.py +0 -0
- {flexvec-0.2.0 → flexvec-0.3.0}/tests/test_keyword.py +0 -0
- {flexvec-0.2.0 → flexvec-0.3.0}/tests/test_tokens_beir.py +0 -0
- {flexvec-0.2.0 → flexvec-0.3.0}/tests/test_vec_ops.py +0 -0
|
@@ -32,13 +32,10 @@ def parse_modifiers(modifier_str: str) -> dict:
|
|
|
32
32
|
from:TEXT to:TEXT trajectory — direction through embedding space
|
|
33
33
|
pool:N candidate count (default 500)
|
|
34
34
|
communities per-query Louvain, adds _community
|
|
35
|
-
pagerank PageRank on candidate subgraph, adds _local_centrality
|
|
36
|
-
peaks HDBSCAN cluster extremes, adds _cluster_id, _is_attractor
|
|
37
35
|
|
|
38
36
|
Deprecated aliases (accepted, will be removed):
|
|
39
37
|
like: → centroid:, unlike: → suppress:, limit: → pool:, recent: → decay:
|
|
40
38
|
local_communities → communities, detect_communities → communities
|
|
41
|
-
local_pagerank → pagerank
|
|
42
39
|
|
|
43
40
|
Dead tokens (silently ignored): kind:TYPE, community:N
|
|
44
41
|
Unknown tokens silently ignored (forward-compatible).
|
|
@@ -54,10 +51,6 @@ def parse_modifiers(modifier_str: str) -> dict:
|
|
|
54
51
|
'trajectory_from': None,
|
|
55
52
|
'trajectory_to': None,
|
|
56
53
|
'local_communities': False,
|
|
57
|
-
'local_pagerank': False,
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
'peaks': False,
|
|
61
54
|
}
|
|
62
55
|
|
|
63
56
|
if not modifier_str:
|
|
@@ -73,13 +66,10 @@ def parse_modifiers(modifier_str: str) -> dict:
|
|
|
73
66
|
modifier_str = re.sub(r'\brecent\b', 'decay', modifier_str)
|
|
74
67
|
modifier_str = modifier_str.replace('local_communities', 'communities')
|
|
75
68
|
modifier_str = modifier_str.replace('detect_communities', 'communities')
|
|
76
|
-
modifier_str = modifier_str.replace('local_pagerank', 'pagerank')
|
|
77
|
-
|
|
78
69
|
# Known token prefixes for boundary detection (canonical names only)
|
|
79
70
|
_TOKEN_BOUNDARY = (
|
|
80
71
|
r'diverse|decay:|suppress:|centroid:|pool:|'
|
|
81
|
-
r'communities|
|
|
82
|
-
r'peaks|from:|similar:'
|
|
72
|
+
r'communities|from:|similar:'
|
|
83
73
|
)
|
|
84
74
|
|
|
85
75
|
# Extract similar:TEXT (multi-word, up to next token boundary)
|
|
@@ -138,10 +128,6 @@ def parse_modifiers(modifier_str: str) -> dict:
|
|
|
138
128
|
result['like'] = token.split(':', 1)[1].split(',')
|
|
139
129
|
elif token == 'communities':
|
|
140
130
|
result['local_communities'] = True
|
|
141
|
-
elif token == 'pagerank':
|
|
142
|
-
result['local_pagerank'] = True
|
|
143
|
-
elif token == 'peaks':
|
|
144
|
-
result['peaks'] = True
|
|
145
131
|
# kind: and community: silently ignored (dead tokens)
|
|
146
132
|
|
|
147
133
|
return result
|
|
@@ -375,53 +361,6 @@ def score_candidates(
|
|
|
375
361
|
enrichment[int(node)] = {'_community': ci}
|
|
376
362
|
_merge_enrichment(enrichment)
|
|
377
363
|
|
|
378
|
-
# Local PageRank on candidate subgraph
|
|
379
|
-
if modifiers and modifiers.get('local_pagerank') and len(cand_indices) >= 3:
|
|
380
|
-
import networkx as nx
|
|
381
|
-
sims = cand_vecs @ cand_vecs.T
|
|
382
|
-
rows, cols = np.where(np.triu(sims > 0.3, k=1))
|
|
383
|
-
G = nx.Graph()
|
|
384
|
-
G.add_nodes_from(range(len(cand_indices)))
|
|
385
|
-
G.add_weighted_edges_from(
|
|
386
|
-
(int(r), int(c), float(sims[r, c])) for r, c in zip(rows, cols)
|
|
387
|
-
)
|
|
388
|
-
if G.number_of_edges() > 0:
|
|
389
|
-
pr = nx.pagerank(G, weight='weight')
|
|
390
|
-
enrichment = {int(node): {'_local_centrality': float(val)}
|
|
391
|
-
for node, val in pr.items()}
|
|
392
|
-
_merge_enrichment(enrichment)
|
|
393
|
-
|
|
394
|
-
# MST — minimum spanning tree spine, leaves, hubs
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
if modifiers and modifiers.get('peaks') and len(cand_indices) >= 20:
|
|
398
|
-
try:
|
|
399
|
-
import hdbscan
|
|
400
|
-
clusterer = hdbscan.HDBSCAN(
|
|
401
|
-
min_cluster_size=5, min_samples=3, metric='euclidean')
|
|
402
|
-
cluster_labels = clusterer.fit_predict(cand_vecs)
|
|
403
|
-
|
|
404
|
-
enrichment = {}
|
|
405
|
-
# Per cluster, find the point farthest from centroid (strange attractor)
|
|
406
|
-
unique_labels = set(cluster_labels)
|
|
407
|
-
unique_labels.discard(-1) # noise
|
|
408
|
-
attractors = set()
|
|
409
|
-
for cl in unique_labels:
|
|
410
|
-
members = np.where(cluster_labels == cl)[0]
|
|
411
|
-
centroid = cand_vecs[members].mean(axis=0)
|
|
412
|
-
dists = np.linalg.norm(cand_vecs[members] - centroid, axis=1)
|
|
413
|
-
attractor_pos = members[np.argmax(dists)]
|
|
414
|
-
attractors.add(int(attractor_pos))
|
|
415
|
-
|
|
416
|
-
for pos in range(len(cand_ids)):
|
|
417
|
-
enrichment[pos] = {
|
|
418
|
-
'_cluster_id': int(cluster_labels[pos]) if cluster_labels[pos] >= 0 else None,
|
|
419
|
-
'_is_attractor': 1 if pos in attractors else 0,
|
|
420
|
-
}
|
|
421
|
-
_merge_enrichment(enrichment)
|
|
422
|
-
except ImportError:
|
|
423
|
-
pass # hdbscan not installed, silently skip
|
|
424
|
-
|
|
425
364
|
# === Apply structural enrichments to results ===
|
|
426
365
|
def _attach_enrichments(results_list):
|
|
427
366
|
"""Attach _-prefixed structural columns to result dicts."""
|
|
@@ -288,8 +288,8 @@ def materialize_vec_ops(db, sql: str) -> str:
|
|
|
288
288
|
|
|
289
289
|
# Populate temp table (unique name per call for HTTP concurrency)
|
|
290
290
|
# Dynamic column construction: discover all _-prefixed columns from
|
|
291
|
-
# structural tokens (local_communities
|
|
292
|
-
#
|
|
291
|
+
# structural tokens (e.g. local_communities) and build the schema
|
|
292
|
+
# automatically. Any token can emit any column.
|
|
293
293
|
tmp_name = f"_vec_results_{uuid.uuid4().hex[:8]}"
|
|
294
294
|
|
|
295
295
|
base_cols = [('id', 'TEXT PRIMARY KEY'), ('score', 'REAL')]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|