PyPI - chunksilo - Versions diffs - 2.1.1__tar.gz → 2.1.3__tar.gz - Mend

chunksilo 2.1.1tar.gz → 2.1.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of chunksilo might be problematic. Click here for more details.

Files changed (31) hide show

{chunksilo-2.1.1/src/chunksilo.egg-info → chunksilo-2.1.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: chunksilo
-Version: 2.1.1
+Version: 2.1.3
 Summary: Local RAG-based semantic document search with MCP server interface
 Author: Fredrik Reveny
 License-Expression: Apache-2.0

{chunksilo-2.1.1 → chunksilo-2.1.3}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "chunksilo"
-version = "2.1.1"
+version = "2.1.3"
 description = "Local RAG-based semantic document search with MCP server interface"
 license = "Apache-2.0"
 requires-python = ">=3.11"

{chunksilo-2.1.1 → chunksilo-2.1.3}/src/chunksilo/__init__.py RENAMED Viewed

@@ -1,4 +1,4 @@
 # SPDX-License-Identifier: Apache-2.0
 """ChunkSilo - Local RAG-based semantic document search."""
-__version__ = "2.1.1"
+__version__ = "2.1.3"

{chunksilo-2.1.1 → chunksilo-2.1.3}/src/chunksilo/search.py RENAMED Viewed

@@ -389,8 +389,8 @@ def _prepare_jira_jql_query(query: str, config: dict[str, Any]) -> str:
     """Construct a JQL query from user search terms and configuration.
     Uses Jira's 'text' field which searches across Summary, Description,
-    Environment, Comments, and all text custom fields. This provides broad
-    coverage similar to natural language search.
+    Environment, Comments, and all text custom fields. Additionally detects
+    Jira issue keys (e.g., "ABEI-1660") and includes exact key searches.
     Note: Fuzzy search operators (~) are deprecated in Jira Cloud but work
     in Data Center/Server. ChunkSilo's semantic search (embeddings + reranker)
@@ -414,20 +414,35 @@ def _prepare_jira_jql_query(query: str, config: dict[str, Any]) -> str:
     References:
         - Jira text field: https://support.atlassian.com/jira-software-cloud/docs/search-for-work-items-using-the-text-field/
         - JQL operators: https://support.atlassian.com/jira-software-cloud/docs/jql-operators/
+        - JQL key field: https://support.atlassian.com/jira-software-cloud/docs/search-by-issue-key/
     """
+    # Detect Jira issue keys in the query (e.g., "ABEI-1660", "PROJ-123")
+    # Pattern matches: 1+ uppercase letters/digits, hyphen, 1+ digits
+    # Case-insensitive matching, but preserve original case for extraction
+    issue_key_pattern = r'\b([A-Z][A-Z0-9]+-\d+)\b'
+    detected_keys = re.findall(issue_key_pattern, query, re.IGNORECASE)
+    # Build key search clauses for exact issue key matches
+    key_clauses = []
+    if detected_keys:
+        # Normalize to uppercase (Jira keys are case-insensitive)
+        unique_keys = list(dict.fromkeys(k.upper() for k in detected_keys))
+        key_clauses = [f'key = "{key}"' for key in unique_keys]
+        logger.debug(f"Detected Jira issue keys in query: {unique_keys}")
     # Reuse Confluence query term preparation for stopword filtering
     # This gives us a clean list of meaningful search terms
     query_terms = _prepare_confluence_query_terms(query)
     # Build the text search clause
     # Using JQL 'text' field which searches across all text fields for broad recall
+    text_clause = ""
     if not query_terms:
         # No meaningful terms after filtering, use original query
         escaped = query.strip().replace('"', '\\"')
-        if not escaped:
-            logger.warning("Jira search skipped: empty query after processing")
-            return ""
-        text_clause = f'text ~ "{escaped}"'
+        if escaped and not detected_keys:
+            # Only add text clause if we don't have issue keys
+            text_clause = f'text ~ "{escaped}"'
     elif len(query_terms) == 1:
         # Single term - simple text search
         text_clause = f'text ~ "{query_terms[0]}"'
@@ -437,6 +452,21 @@ def _prepare_jira_jql_query(query: str, config: dict[str, Any]) -> str:
         text_conditions = ' OR '.join([f'text ~ "{term}"' for term in query_terms])
         text_clause = f'({text_conditions})'
+    # Combine key and text searches
+    if key_clauses and text_clause:
+        # Search both by key and text content
+        combined_clause = f'({" OR ".join(key_clauses)} OR {text_clause})'
+    elif key_clauses:
+        # Only key searches
+        combined_clause = " OR ".join(key_clauses)
+    elif text_clause:
+        # Only text search
+        combined_clause = text_clause
+    else:
+        # No valid search terms
+        logger.warning("Jira search skipped: empty query after processing")
+        return ""
     # Add project filter if configured
     # Empty projects list means search all accessible projects
     projects = config["jira"].get("projects", [])
@@ -444,9 +474,9 @@ def _prepare_jira_jql_query(query: str, config: dict[str, Any]) -> str:
         # Restrict search to specific project keys
         project_list = ", ".join([f'"{p}"' for p in projects])
         project_clause = f'project IN ({project_list})'
-        jql = f'{text_clause} AND {project_clause}'
+        jql = f'{combined_clause} AND {project_clause}'
     else:
-        jql = text_clause
+        jql = combined_clause
     # Order by updated DESC for recency
     # This enables ChunkSilo's recency boost feature and returns most relevant recent issues first
@@ -738,8 +768,8 @@ def _search_jira(query: str, config: dict[str, Any]) -> list[NodeWithScore]:
     Configuration Requirements:
         config["jira"]["url"]: Jira base URL (empty = disabled)
-        config["jira"]["username"]: Jira username or email
-        config["jira"]["api_token"]: Jira API token (not password)
+        config["jira"]["username"]: Jira username/email (required for Cloud, optional for Server PAT)
+        config["jira"]["api_token"]: API token (Cloud) or Personal Access Token (Server/Data Center)
         config["jira"]["max_results"]: Maximum issues to return
         config["jira"]["projects"]: List of project keys (empty = all)
         config["jira"]["include_comments"]: Include issue comments
@@ -763,8 +793,12 @@ def _search_jira(query: str, config: dict[str, Any]) -> list[NodeWithScore]:
         - Automatically configured through jira_options["verify"]
     Authentication:
-        - Uses basic auth (username + API token)
-        - Works for both Jira Cloud and Data Center/Server
+        - Jira Cloud: Set both username (email) and api_token (API token)
+          Uses basic auth internally
+        - Jira Server/Data Center with PAT: Set only api_token (Personal Access Token)
+          Leave username empty; uses bearer token auth internally
+        - Jira Server/Data Center with password: Set username and api_token (password)
+          Uses basic auth internally (if basic auth enabled on server)
     References:
         - Jira REST API: https://developer.atlassian.com/cloud/jira/platform/rest/v3/
@@ -790,13 +824,10 @@ def _search_jira(query: str, config: dict[str, Any]) -> list[NodeWithScore]:
     ca_bundle_path = config["ssl"]["ca_bundle_path"] or None
     # Validate required credentials are present
-    if not (base_url and username and api_token):
-        missing = []
-        if not username:
-            missing.append("jira.username")
-        if not api_token:
-            missing.append("jira.api_token")
-        logger.warning(f"Jira search skipped: missing {', '.join(missing)} in config")
+    # For Jira Cloud: both username and api_token required (basic auth)
+    # For Jira Server/Data Center with PAT: only api_token required (token auth)
+    if not api_token:
+        logger.warning("Jira search skipped: missing jira.api_token in config")
         return []
     try:
@@ -806,12 +837,23 @@ def _search_jira(query: str, config: dict[str, Any]) -> list[NodeWithScore]:
         if ca_bundle_path:
             jira_options["verify"] = ca_bundle_path
-        # Use basic auth (username + API token) for authentication
-        # Works for both Jira Cloud and Data Center/Server
-        jira_client = JIRA(
-            options=jira_options,
-            basic_auth=(username, api_token)
-        )
+        # Choose authentication method based on credentials provided:
+        # - Username + API token: Use basic auth (Jira Cloud, or Server with password)
+        # - API token only: Use token auth (Jira Server/Data Center with PAT)
+        if username:
+            # Basic auth for Jira Cloud (username + API token)
+            # Also works for Jira Server with username + password
+            jira_client = JIRA(
+                options=jira_options,
+                basic_auth=(username, api_token)
+            )
+        else:
+            # Token auth for Jira Server/Data Center Personal Access Tokens (PAT)
+            # PATs are used alone without a username
+            jira_client = JIRA(
+                options=jira_options,
+                token_auth=api_token
+            )
         # Construct JQL with text search and project filtering
         jql = _prepare_jira_jql_query(query, config)
@@ -1116,6 +1158,7 @@ def run_search(
                 rerank_request = RerankRequest(query=enhanced_query, passages=passages)
                 reranked_results = reranker.rerank(rerank_request)
+                # Build text-to-node mapping for fallback text matching
                 text_to_indices: dict[str, list[tuple[int, NodeWithScore]]] = {}
                 for idx, node in enumerate(nodes):
                     node_text = node.node.get_content() or ""
@@ -1126,20 +1169,34 @@ def run_search(
                 reranked_nodes = []
                 seen_indices: set[int] = set()
                 for result in reranked_results:
-                    doc_text = result.get("text", "")
                     score = result.get("score", 0.0)
-                    if doc_text in text_to_indices:
-                        for idx, node in text_to_indices[doc_text]:
-                            if idx not in seen_indices:
-                                reranked_nodes.append(node)
-                                rerank_scores[id(node)] = float(score)
-                                seen_indices.add(idx)
-                                break
+                    result_idx = result.get("id")
+                    # Primary: match by index (flashrank returns original passage index)
+                    if result_idx is not None and 0 <= result_idx < len(nodes):
+                        if result_idx not in seen_indices:
+                            node = nodes[result_idx]
+                            reranked_nodes.append(node)
+                            rerank_scores[id(node)] = float(score)
+                            seen_indices.add(result_idx)
+                    else:
+                        # Fallback: match by text content
+                        doc_text = result.get("text", "")
+                        if doc_text in text_to_indices:
+                            for idx, node in text_to_indices[doc_text]:
+                                if idx not in seen_indices:
+                                    reranked_nodes.append(node)
+                                    rerank_scores[id(node)] = float(score)
+                                    seen_indices.add(idx)
+                                    break
+                # Add remaining unmatched nodes with minimum matched score
+                # This ensures Jira/Confluence results aren't dropped due to text mismatch
+                min_score = min(rerank_scores.values()) if rerank_scores else 0.0
                 for idx, node in enumerate(nodes):
                     if idx not in seen_indices:
                         reranked_nodes.append(node)
+                        rerank_scores[id(node)] = min_score
                 nodes = reranked_nodes[:rerank_limit]
             except Exception as e:

{chunksilo-2.1.1 → chunksilo-2.1.3/src/chunksilo.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: chunksilo
-Version: 2.1.1
+Version: 2.1.3
 Summary: Local RAG-based semantic document search with MCP server interface
 Author: Fredrik Reveny
 License-Expression: Apache-2.0

{chunksilo-2.1.1 → chunksilo-2.1.3}/test/test_jira_integration.py RENAMED Viewed

@@ -186,6 +186,50 @@ class TestJiraJqlQuery:
         # Query with only stopwords should produce simple or empty query
         assert jql == "" or "ORDER BY updated DESC" in jql
+    def test_issue_key_detection_single(self, base_config):
+        """Single issue key should be detected and searched by key field."""
+        jql = _prepare_jira_jql_query("ABEI-1660", base_config)
+        assert 'key = "ABEI-1660"' in jql
+        assert "ORDER BY updated DESC" in jql
+    def test_issue_key_detection_lowercase(self, base_config):
+        """Lowercase issue key should be normalized to uppercase."""
+        jql = _prepare_jira_jql_query("abei-1660", base_config)
+        assert 'key = "ABEI-1660"' in jql
+        assert "ORDER BY updated DESC" in jql
+    def test_issue_key_detection_multiple(self, base_config):
+        """Multiple issue keys should be detected."""
+        jql = _prepare_jira_jql_query("ABEI-1660 PROJ-123", base_config)
+        assert 'key = "ABEI-1660"' in jql
+        assert 'key = "PROJ-123"' in jql
+        assert " OR " in jql
+        assert "ORDER BY updated DESC" in jql
+    def test_issue_key_mixed_with_text(self, base_config):
+        """Issue key mixed with text should search both key and text."""
+        jql = _prepare_jira_jql_query("ABEI-1660 authentication", base_config)
+        assert 'key = "ABEI-1660"' in jql
+        assert 'text ~ "authentication"' in jql
+        assert " OR " in jql
+        assert "ORDER BY updated DESC" in jql
+    def test_no_issue_key_detection(self, base_config):
+        """Non-issue-key queries should work as before."""
+        jql = _prepare_jira_jql_query("authentication bug", base_config)
+        assert "key =" not in jql  # No key search
+        assert "text ~" in jql  # Text search only
+        assert "ORDER BY updated DESC" in jql
+    def test_issue_key_with_project_filter(self, base_config):
+        """Issue key search should respect project filter."""
+        base_config["jira"]["projects"] = ["ABEI"]
+        jql = _prepare_jira_jql_query("ABEI-1660", base_config)
+        assert 'key = "ABEI-1660"' in jql
+        assert "project IN" in jql
+        assert "ABEI" in jql
+        assert "ORDER BY updated DESC" in jql
 # ============================================================================
 # ISSUE TO TEXT CONVERSION TESTS