flexvec 0.2.0__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. {flexvec-0.2.0 → flexvec-0.3.0}/PKG-INFO +1 -1
  2. {flexvec-0.2.0 → flexvec-0.3.0}/flexvec/score.py +1 -62
  3. {flexvec-0.2.0 → flexvec-0.3.0}/flexvec/vec_ops.py +2 -2
  4. {flexvec-0.2.0 → flexvec-0.3.0}/flexvec.egg-info/PKG-INFO +1 -1
  5. {flexvec-0.2.0 → flexvec-0.3.0}/pyproject.toml +1 -1
  6. {flexvec-0.2.0 → flexvec-0.3.0}/LICENSE +0 -0
  7. {flexvec-0.2.0 → flexvec-0.3.0}/README.md +0 -0
  8. {flexvec-0.2.0 → flexvec-0.3.0}/flexvec/__init__.py +0 -0
  9. {flexvec-0.2.0 → flexvec-0.3.0}/flexvec/__main__.py +0 -0
  10. {flexvec-0.2.0 → flexvec-0.3.0}/flexvec/embed.py +0 -0
  11. {flexvec-0.2.0 → flexvec-0.3.0}/flexvec/execute.py +0 -0
  12. {flexvec-0.2.0 → flexvec-0.3.0}/flexvec/keyword.py +0 -0
  13. {flexvec-0.2.0 → flexvec-0.3.0}/flexvec/onnx/__init__.py +0 -0
  14. {flexvec-0.2.0 → flexvec-0.3.0}/flexvec/onnx/embed.py +0 -0
  15. {flexvec-0.2.0 → flexvec-0.3.0}/flexvec/onnx/fetch.py +0 -0
  16. {flexvec-0.2.0 → flexvec-0.3.0}/flexvec/onnx/nomic_embed.py +0 -0
  17. {flexvec-0.2.0 → flexvec-0.3.0}/flexvec/onnx/special_tokens_map.json +0 -0
  18. {flexvec-0.2.0 → flexvec-0.3.0}/flexvec/onnx/tokenizer.json +0 -0
  19. {flexvec-0.2.0 → flexvec-0.3.0}/flexvec/onnx/tokenizer_config.json +0 -0
  20. {flexvec-0.2.0 → flexvec-0.3.0}/flexvec/onnx/vocab.txt +0 -0
  21. {flexvec-0.2.0 → flexvec-0.3.0}/flexvec.egg-info/SOURCES.txt +0 -0
  22. {flexvec-0.2.0 → flexvec-0.3.0}/flexvec.egg-info/dependency_links.txt +0 -0
  23. {flexvec-0.2.0 → flexvec-0.3.0}/flexvec.egg-info/requires.txt +0 -0
  24. {flexvec-0.2.0 → flexvec-0.3.0}/flexvec.egg-info/top_level.txt +0 -0
  25. {flexvec-0.2.0 → flexvec-0.3.0}/setup.cfg +0 -0
  26. {flexvec-0.2.0 → flexvec-0.3.0}/tests/test_algebraic.py +0 -0
  27. {flexvec-0.2.0 → flexvec-0.3.0}/tests/test_keyword.py +0 -0
  28. {flexvec-0.2.0 → flexvec-0.3.0}/tests/test_tokens_beir.py +0 -0
  29. {flexvec-0.2.0 → flexvec-0.3.0}/tests/test_vec_ops.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: flexvec
3
- Version: 0.2.0
3
+ Version: 0.3.0
4
4
  Summary: numpy-backed semantic search for any SQLite database
5
5
  Author-email: Damian Delmas <damian@getflex.dev>
6
6
  License: MIT
@@ -32,13 +32,10 @@ def parse_modifiers(modifier_str: str) -> dict:
32
32
  from:TEXT to:TEXT trajectory — direction through embedding space
33
33
  pool:N candidate count (default 500)
34
34
  communities per-query Louvain, adds _community
35
- pagerank PageRank on candidate subgraph, adds _local_centrality
36
- peaks HDBSCAN cluster extremes, adds _cluster_id, _is_attractor
37
35
 
38
36
  Deprecated aliases (accepted, will be removed):
39
37
  like: → centroid:, unlike: → suppress:, limit: → pool:, recent: → decay:
40
38
  local_communities → communities, detect_communities → communities
41
- local_pagerank → pagerank
42
39
 
43
40
  Dead tokens (silently ignored): kind:TYPE, community:N
44
41
  Unknown tokens silently ignored (forward-compatible).
@@ -54,10 +51,6 @@ def parse_modifiers(modifier_str: str) -> dict:
54
51
  'trajectory_from': None,
55
52
  'trajectory_to': None,
56
53
  'local_communities': False,
57
- 'local_pagerank': False,
58
-
59
-
60
- 'peaks': False,
61
54
  }
62
55
 
63
56
  if not modifier_str:
@@ -73,13 +66,10 @@ def parse_modifiers(modifier_str: str) -> dict:
73
66
  modifier_str = re.sub(r'\brecent\b', 'decay', modifier_str)
74
67
  modifier_str = modifier_str.replace('local_communities', 'communities')
75
68
  modifier_str = modifier_str.replace('detect_communities', 'communities')
76
- modifier_str = modifier_str.replace('local_pagerank', 'pagerank')
77
-
78
69
  # Known token prefixes for boundary detection (canonical names only)
79
70
  _TOKEN_BOUNDARY = (
80
71
  r'diverse|decay:|suppress:|centroid:|pool:|'
81
- r'communities|pagerank|'
82
- r'peaks|from:|similar:'
72
+ r'communities|from:|similar:'
83
73
  )
84
74
 
85
75
  # Extract similar:TEXT (multi-word, up to next token boundary)
@@ -138,10 +128,6 @@ def parse_modifiers(modifier_str: str) -> dict:
138
128
  result['like'] = token.split(':', 1)[1].split(',')
139
129
  elif token == 'communities':
140
130
  result['local_communities'] = True
141
- elif token == 'pagerank':
142
- result['local_pagerank'] = True
143
- elif token == 'peaks':
144
- result['peaks'] = True
145
131
  # kind: and community: silently ignored (dead tokens)
146
132
 
147
133
  return result
@@ -375,53 +361,6 @@ def score_candidates(
375
361
  enrichment[int(node)] = {'_community': ci}
376
362
  _merge_enrichment(enrichment)
377
363
 
378
- # Local PageRank on candidate subgraph
379
- if modifiers and modifiers.get('local_pagerank') and len(cand_indices) >= 3:
380
- import networkx as nx
381
- sims = cand_vecs @ cand_vecs.T
382
- rows, cols = np.where(np.triu(sims > 0.3, k=1))
383
- G = nx.Graph()
384
- G.add_nodes_from(range(len(cand_indices)))
385
- G.add_weighted_edges_from(
386
- (int(r), int(c), float(sims[r, c])) for r, c in zip(rows, cols)
387
- )
388
- if G.number_of_edges() > 0:
389
- pr = nx.pagerank(G, weight='weight')
390
- enrichment = {int(node): {'_local_centrality': float(val)}
391
- for node, val in pr.items()}
392
- _merge_enrichment(enrichment)
393
-
394
- # MST — minimum spanning tree spine, leaves, hubs
395
-
396
-
397
- if modifiers and modifiers.get('peaks') and len(cand_indices) >= 20:
398
- try:
399
- import hdbscan
400
- clusterer = hdbscan.HDBSCAN(
401
- min_cluster_size=5, min_samples=3, metric='euclidean')
402
- cluster_labels = clusterer.fit_predict(cand_vecs)
403
-
404
- enrichment = {}
405
- # Per cluster, find the point farthest from centroid (strange attractor)
406
- unique_labels = set(cluster_labels)
407
- unique_labels.discard(-1) # noise
408
- attractors = set()
409
- for cl in unique_labels:
410
- members = np.where(cluster_labels == cl)[0]
411
- centroid = cand_vecs[members].mean(axis=0)
412
- dists = np.linalg.norm(cand_vecs[members] - centroid, axis=1)
413
- attractor_pos = members[np.argmax(dists)]
414
- attractors.add(int(attractor_pos))
415
-
416
- for pos in range(len(cand_ids)):
417
- enrichment[pos] = {
418
- '_cluster_id': int(cluster_labels[pos]) if cluster_labels[pos] >= 0 else None,
419
- '_is_attractor': 1 if pos in attractors else 0,
420
- }
421
- _merge_enrichment(enrichment)
422
- except ImportError:
423
- pass # hdbscan not installed, silently skip
424
-
425
364
  # === Apply structural enrichments to results ===
426
365
  def _attach_enrichments(results_list):
427
366
  """Attach _-prefixed structural columns to result dicts."""
@@ -288,8 +288,8 @@ def materialize_vec_ops(db, sql: str) -> str:
288
288
 
289
289
  # Populate temp table (unique name per call for HTTP concurrency)
290
290
  # Dynamic column construction: discover all _-prefixed columns from
291
- # structural tokens (local_communities, mst, homology, fingerprint, etc.)
292
- # and build the schema automatically. Any token can emit any column.
291
+ # structural tokens (e.g. local_communities) and build the schema
292
+ # automatically. Any token can emit any column.
293
293
  tmp_name = f"_vec_results_{uuid.uuid4().hex[:8]}"
294
294
 
295
295
  base_cols = [('id', 'TEXT PRIMARY KEY'), ('score', 'REAL')]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: flexvec
3
- Version: 0.2.0
3
+ Version: 0.3.0
4
4
  Summary: numpy-backed semantic search for any SQLite database
5
5
  Author-email: Damian Delmas <damian@getflex.dev>
6
6
  License: MIT
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "flexvec"
3
- version = "0.2.0"
3
+ version = "0.3.0"
4
4
  description = "numpy-backed semantic search for any SQLite database"
5
5
  license = { text = "MIT" }
6
6
  authors = [{name = "Damian Delmas", email = "damian@getflex.dev"}]
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes