PyPI - flexvec - Versions diffs - 0.2.0__tar.gz → 0.3.0__tar.gz - Mend

flexvec 0.2.0tar.gz → 0.3.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

{flexvec-0.2.0 → flexvec-0.3.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: flexvec
-Version: 0.2.0
+Version: 0.3.0
 Summary: numpy-backed semantic search for any SQLite database
 Author-email: Damian Delmas <damian@getflex.dev>
 License: MIT

{flexvec-0.2.0 → flexvec-0.3.0}/flexvec/score.py RENAMED Viewed

@@ -32,13 +32,10 @@ def parse_modifiers(modifier_str: str) -> dict:
         from:TEXT to:TEXT    trajectory — direction through embedding space
         pool:N               candidate count (default 500)
         communities          per-query Louvain, adds _community
-        pagerank             PageRank on candidate subgraph, adds _local_centrality
-        peaks                HDBSCAN cluster extremes, adds _cluster_id, _is_attractor
     Deprecated aliases (accepted, will be removed):
         like: → centroid:, unlike: → suppress:, limit: → pool:, recent: → decay:
         local_communities → communities, detect_communities → communities
-        local_pagerank → pagerank
     Dead tokens (silently ignored): kind:TYPE, community:N
     Unknown tokens silently ignored (forward-compatible).
@@ -54,10 +51,6 @@ def parse_modifiers(modifier_str: str) -> dict:
         'trajectory_from': None,
         'trajectory_to': None,
         'local_communities': False,
-        'local_pagerank': False,
-        'peaks': False,
     }
     if not modifier_str:
@@ -73,13 +66,10 @@ def parse_modifiers(modifier_str: str) -> dict:
     modifier_str = re.sub(r'\brecent\b', 'decay', modifier_str)
     modifier_str = modifier_str.replace('local_communities', 'communities')
     modifier_str = modifier_str.replace('detect_communities', 'communities')
-    modifier_str = modifier_str.replace('local_pagerank', 'pagerank')
     # Known token prefixes for boundary detection (canonical names only)
     _TOKEN_BOUNDARY = (
         r'diverse|decay:|suppress:|centroid:|pool:|'
-        r'communities|pagerank|'
-        r'peaks|from:|similar:'
+        r'communities|from:|similar:'
     )
     # Extract similar:TEXT (multi-word, up to next token boundary)
@@ -138,10 +128,6 @@ def parse_modifiers(modifier_str: str) -> dict:
             result['like'] = token.split(':', 1)[1].split(',')
         elif token == 'communities':
             result['local_communities'] = True
-        elif token == 'pagerank':
-            result['local_pagerank'] = True
-        elif token == 'peaks':
-            result['peaks'] = True
         # kind: and community: silently ignored (dead tokens)
     return result
@@ -375,53 +361,6 @@ def score_candidates(
                     enrichment[int(node)] = {'_community': ci}
             _merge_enrichment(enrichment)
-    # Local PageRank on candidate subgraph
-    if modifiers and modifiers.get('local_pagerank') and len(cand_indices) >= 3:
-        import networkx as nx
-        sims = cand_vecs @ cand_vecs.T
-        rows, cols = np.where(np.triu(sims > 0.3, k=1))
-        G = nx.Graph()
-        G.add_nodes_from(range(len(cand_indices)))
-        G.add_weighted_edges_from(
-            (int(r), int(c), float(sims[r, c])) for r, c in zip(rows, cols)
-        )
-        if G.number_of_edges() > 0:
-            pr = nx.pagerank(G, weight='weight')
-            enrichment = {int(node): {'_local_centrality': float(val)}
-                          for node, val in pr.items()}
-            _merge_enrichment(enrichment)
-    # MST — minimum spanning tree spine, leaves, hubs
-    if modifiers and modifiers.get('peaks') and len(cand_indices) >= 20:
-        try:
-            import hdbscan
-            clusterer = hdbscan.HDBSCAN(
-                min_cluster_size=5, min_samples=3, metric='euclidean')
-            cluster_labels = clusterer.fit_predict(cand_vecs)
-            enrichment = {}
-            # Per cluster, find the point farthest from centroid (strange attractor)
-            unique_labels = set(cluster_labels)
-            unique_labels.discard(-1)  # noise
-            attractors = set()
-            for cl in unique_labels:
-                members = np.where(cluster_labels == cl)[0]
-                centroid = cand_vecs[members].mean(axis=0)
-                dists = np.linalg.norm(cand_vecs[members] - centroid, axis=1)
-                attractor_pos = members[np.argmax(dists)]
-                attractors.add(int(attractor_pos))
-            for pos in range(len(cand_ids)):
-                enrichment[pos] = {
-                    '_cluster_id': int(cluster_labels[pos]) if cluster_labels[pos] >= 0 else None,
-                    '_is_attractor': 1 if pos in attractors else 0,
-                }
-            _merge_enrichment(enrichment)
-        except ImportError:
-            pass  # hdbscan not installed, silently skip
     # === Apply structural enrichments to results ===
     def _attach_enrichments(results_list):
         """Attach _-prefixed structural columns to result dicts."""

{flexvec-0.2.0 → flexvec-0.3.0}/flexvec/vec_ops.py RENAMED Viewed

@@ -288,8 +288,8 @@ def materialize_vec_ops(db, sql: str) -> str:
     # Populate temp table (unique name per call for HTTP concurrency)
     # Dynamic column construction: discover all _-prefixed columns from
-    # structural tokens (local_communities, mst, homology, fingerprint, etc.)
-    # and build the schema automatically. Any token can emit any column.
+    # structural tokens (e.g. local_communities) and build the schema
+    # automatically. Any token can emit any column.
     tmp_name = f"_vec_results_{uuid.uuid4().hex[:8]}"
     base_cols = [('id', 'TEXT PRIMARY KEY'), ('score', 'REAL')]

{flexvec-0.2.0 → flexvec-0.3.0}/flexvec.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: flexvec
-Version: 0.2.0
+Version: 0.3.0
 Summary: numpy-backed semantic search for any SQLite database
 Author-email: Damian Delmas <damian@getflex.dev>
 License: MIT

{flexvec-0.2.0 → flexvec-0.3.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "flexvec"
-version = "0.2.0"
+version = "0.3.0"
 description = "numpy-backed semantic search for any SQLite database"
 license = { text = "MIT" }
 authors = [{name = "Damian Delmas", email = "damian@getflex.dev"}]