chunksilo 2.1.1__tar.gz → 2.1.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of chunksilo might be problematic. Click here for more details.

Files changed (31) hide show
  1. {chunksilo-2.1.1/src/chunksilo.egg-info → chunksilo-2.1.3}/PKG-INFO +1 -1
  2. {chunksilo-2.1.1 → chunksilo-2.1.3}/pyproject.toml +1 -1
  3. {chunksilo-2.1.1 → chunksilo-2.1.3}/src/chunksilo/__init__.py +1 -1
  4. {chunksilo-2.1.1 → chunksilo-2.1.3}/src/chunksilo/search.py +92 -35
  5. {chunksilo-2.1.1 → chunksilo-2.1.3/src/chunksilo.egg-info}/PKG-INFO +1 -1
  6. {chunksilo-2.1.1 → chunksilo-2.1.3}/test/test_jira_integration.py +44 -0
  7. {chunksilo-2.1.1 → chunksilo-2.1.3}/LICENSE +0 -0
  8. {chunksilo-2.1.1 → chunksilo-2.1.3}/NOTICE +0 -0
  9. {chunksilo-2.1.1 → chunksilo-2.1.3}/README.md +0 -0
  10. {chunksilo-2.1.1 → chunksilo-2.1.3}/requirements.txt +0 -0
  11. {chunksilo-2.1.1 → chunksilo-2.1.3}/setup.cfg +0 -0
  12. {chunksilo-2.1.1 → chunksilo-2.1.3}/src/chunksilo/__main__.py +0 -0
  13. {chunksilo-2.1.1 → chunksilo-2.1.3}/src/chunksilo/cfgload.py +0 -0
  14. {chunksilo-2.1.1 → chunksilo-2.1.3}/src/chunksilo/cli.py +0 -0
  15. {chunksilo-2.1.1 → chunksilo-2.1.3}/src/chunksilo/confluence_html_formatter.py +0 -0
  16. {chunksilo-2.1.1 → chunksilo-2.1.3}/src/chunksilo/index.py +0 -0
  17. {chunksilo-2.1.1 → chunksilo-2.1.3}/src/chunksilo/server.py +0 -0
  18. {chunksilo-2.1.1 → chunksilo-2.1.3}/src/chunksilo.egg-info/SOURCES.txt +0 -0
  19. {chunksilo-2.1.1 → chunksilo-2.1.3}/src/chunksilo.egg-info/dependency_links.txt +0 -0
  20. {chunksilo-2.1.1 → chunksilo-2.1.3}/src/chunksilo.egg-info/entry_points.txt +0 -0
  21. {chunksilo-2.1.1 → chunksilo-2.1.3}/src/chunksilo.egg-info/requires.txt +0 -0
  22. {chunksilo-2.1.1 → chunksilo-2.1.3}/src/chunksilo.egg-info/top_level.txt +0 -0
  23. {chunksilo-2.1.1 → chunksilo-2.1.3}/test/test_chunk_location.py +0 -0
  24. {chunksilo-2.1.1 → chunksilo-2.1.3}/test/test_confluence_html_formatter.py +0 -0
  25. {chunksilo-2.1.1 → chunksilo-2.1.3}/test/test_error_handling.py +0 -0
  26. {chunksilo-2.1.1 → chunksilo-2.1.3}/test/test_heading_path_integration.py +0 -0
  27. {chunksilo-2.1.1 → chunksilo-2.1.3}/test/test_incremental_ingest.py +0 -0
  28. {chunksilo-2.1.1 → chunksilo-2.1.3}/test/test_rag_metrics.py +0 -0
  29. {chunksilo-2.1.1 → chunksilo-2.1.3}/test/test_retrieval_only.py +0 -0
  30. {chunksilo-2.1.1 → chunksilo-2.1.3}/test/test_system.py +0 -0
  31. {chunksilo-2.1.1 → chunksilo-2.1.3}/test/test_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: chunksilo
3
- Version: 2.1.1
3
+ Version: 2.1.3
4
4
  Summary: Local RAG-based semantic document search with MCP server interface
5
5
  Author: Fredrik Reveny
6
6
  License-Expression: Apache-2.0
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "chunksilo"
7
- version = "2.1.1"
7
+ version = "2.1.3"
8
8
  description = "Local RAG-based semantic document search with MCP server interface"
9
9
  license = "Apache-2.0"
10
10
  requires-python = ">=3.11"
@@ -1,4 +1,4 @@
1
1
  # SPDX-License-Identifier: Apache-2.0
2
2
  """ChunkSilo - Local RAG-based semantic document search."""
3
3
 
4
- __version__ = "2.1.1"
4
+ __version__ = "2.1.3"
@@ -389,8 +389,8 @@ def _prepare_jira_jql_query(query: str, config: dict[str, Any]) -> str:
389
389
  """Construct a JQL query from user search terms and configuration.
390
390
 
391
391
  Uses Jira's 'text' field which searches across Summary, Description,
392
- Environment, Comments, and all text custom fields. This provides broad
393
- coverage similar to natural language search.
392
+ Environment, Comments, and all text custom fields. Additionally detects
393
+ Jira issue keys (e.g., "ABEI-1660") and includes exact key searches.
394
394
 
395
395
  Note: Fuzzy search operators (~) are deprecated in Jira Cloud but work
396
396
  in Data Center/Server. ChunkSilo's semantic search (embeddings + reranker)
@@ -414,20 +414,35 @@ def _prepare_jira_jql_query(query: str, config: dict[str, Any]) -> str:
414
414
  References:
415
415
  - Jira text field: https://support.atlassian.com/jira-software-cloud/docs/search-for-work-items-using-the-text-field/
416
416
  - JQL operators: https://support.atlassian.com/jira-software-cloud/docs/jql-operators/
417
+ - JQL key field: https://support.atlassian.com/jira-software-cloud/docs/search-by-issue-key/
417
418
  """
419
+ # Detect Jira issue keys in the query (e.g., "ABEI-1660", "PROJ-123")
420
+ # Pattern matches: 1+ uppercase letters/digits, hyphen, 1+ digits
421
+ # Case-insensitive matching, but preserve original case for extraction
422
+ issue_key_pattern = r'\b([A-Z][A-Z0-9]+-\d+)\b'
423
+ detected_keys = re.findall(issue_key_pattern, query, re.IGNORECASE)
424
+
425
+ # Build key search clauses for exact issue key matches
426
+ key_clauses = []
427
+ if detected_keys:
428
+ # Normalize to uppercase (Jira keys are case-insensitive)
429
+ unique_keys = list(dict.fromkeys(k.upper() for k in detected_keys))
430
+ key_clauses = [f'key = "{key}"' for key in unique_keys]
431
+ logger.debug(f"Detected Jira issue keys in query: {unique_keys}")
432
+
418
433
  # Reuse Confluence query term preparation for stopword filtering
419
434
  # This gives us a clean list of meaningful search terms
420
435
  query_terms = _prepare_confluence_query_terms(query)
421
436
 
422
437
  # Build the text search clause
423
438
  # Using JQL 'text' field which searches across all text fields for broad recall
439
+ text_clause = ""
424
440
  if not query_terms:
425
441
  # No meaningful terms after filtering, use original query
426
442
  escaped = query.strip().replace('"', '\\"')
427
- if not escaped:
428
- logger.warning("Jira search skipped: empty query after processing")
429
- return ""
430
- text_clause = f'text ~ "{escaped}"'
443
+ if escaped and not detected_keys:
444
+ # Only add text clause if we don't have issue keys
445
+ text_clause = f'text ~ "{escaped}"'
431
446
  elif len(query_terms) == 1:
432
447
  # Single term - simple text search
433
448
  text_clause = f'text ~ "{query_terms[0]}"'
@@ -437,6 +452,21 @@ def _prepare_jira_jql_query(query: str, config: dict[str, Any]) -> str:
437
452
  text_conditions = ' OR '.join([f'text ~ "{term}"' for term in query_terms])
438
453
  text_clause = f'({text_conditions})'
439
454
 
455
+ # Combine key and text searches
456
+ if key_clauses and text_clause:
457
+ # Search both by key and text content
458
+ combined_clause = f'({" OR ".join(key_clauses)} OR {text_clause})'
459
+ elif key_clauses:
460
+ # Only key searches
461
+ combined_clause = " OR ".join(key_clauses)
462
+ elif text_clause:
463
+ # Only text search
464
+ combined_clause = text_clause
465
+ else:
466
+ # No valid search terms
467
+ logger.warning("Jira search skipped: empty query after processing")
468
+ return ""
469
+
440
470
  # Add project filter if configured
441
471
  # Empty projects list means search all accessible projects
442
472
  projects = config["jira"].get("projects", [])
@@ -444,9 +474,9 @@ def _prepare_jira_jql_query(query: str, config: dict[str, Any]) -> str:
444
474
  # Restrict search to specific project keys
445
475
  project_list = ", ".join([f'"{p}"' for p in projects])
446
476
  project_clause = f'project IN ({project_list})'
447
- jql = f'{text_clause} AND {project_clause}'
477
+ jql = f'{combined_clause} AND {project_clause}'
448
478
  else:
449
- jql = text_clause
479
+ jql = combined_clause
450
480
 
451
481
  # Order by updated DESC for recency
452
482
  # This enables ChunkSilo's recency boost feature and returns most relevant recent issues first
@@ -738,8 +768,8 @@ def _search_jira(query: str, config: dict[str, Any]) -> list[NodeWithScore]:
738
768
 
739
769
  Configuration Requirements:
740
770
  config["jira"]["url"]: Jira base URL (empty = disabled)
741
- config["jira"]["username"]: Jira username or email
742
- config["jira"]["api_token"]: Jira API token (not password)
771
+ config["jira"]["username"]: Jira username/email (required for Cloud, optional for Server PAT)
772
+ config["jira"]["api_token"]: API token (Cloud) or Personal Access Token (Server/Data Center)
743
773
  config["jira"]["max_results"]: Maximum issues to return
744
774
  config["jira"]["projects"]: List of project keys (empty = all)
745
775
  config["jira"]["include_comments"]: Include issue comments
@@ -763,8 +793,12 @@ def _search_jira(query: str, config: dict[str, Any]) -> list[NodeWithScore]:
763
793
  - Automatically configured through jira_options["verify"]
764
794
 
765
795
  Authentication:
766
- - Uses basic auth (username + API token)
767
- - Works for both Jira Cloud and Data Center/Server
796
+ - Jira Cloud: Set both username (email) and api_token (API token)
797
+ Uses basic auth internally
798
+ - Jira Server/Data Center with PAT: Set only api_token (Personal Access Token)
799
+ Leave username empty; uses bearer token auth internally
800
+ - Jira Server/Data Center with password: Set username and api_token (password)
801
+ Uses basic auth internally (if basic auth enabled on server)
768
802
 
769
803
  References:
770
804
  - Jira REST API: https://developer.atlassian.com/cloud/jira/platform/rest/v3/
@@ -790,13 +824,10 @@ def _search_jira(query: str, config: dict[str, Any]) -> list[NodeWithScore]:
790
824
  ca_bundle_path = config["ssl"]["ca_bundle_path"] or None
791
825
 
792
826
  # Validate required credentials are present
793
- if not (base_url and username and api_token):
794
- missing = []
795
- if not username:
796
- missing.append("jira.username")
797
- if not api_token:
798
- missing.append("jira.api_token")
799
- logger.warning(f"Jira search skipped: missing {', '.join(missing)} in config")
827
+ # For Jira Cloud: both username and api_token required (basic auth)
828
+ # For Jira Server/Data Center with PAT: only api_token required (token auth)
829
+ if not api_token:
830
+ logger.warning("Jira search skipped: missing jira.api_token in config")
800
831
  return []
801
832
 
802
833
  try:
@@ -806,12 +837,23 @@ def _search_jira(query: str, config: dict[str, Any]) -> list[NodeWithScore]:
806
837
  if ca_bundle_path:
807
838
  jira_options["verify"] = ca_bundle_path
808
839
 
809
- # Use basic auth (username + API token) for authentication
810
- # Works for both Jira Cloud and Data Center/Server
811
- jira_client = JIRA(
812
- options=jira_options,
813
- basic_auth=(username, api_token)
814
- )
840
+ # Choose authentication method based on credentials provided:
841
+ # - Username + API token: Use basic auth (Jira Cloud, or Server with password)
842
+ # - API token only: Use token auth (Jira Server/Data Center with PAT)
843
+ if username:
844
+ # Basic auth for Jira Cloud (username + API token)
845
+ # Also works for Jira Server with username + password
846
+ jira_client = JIRA(
847
+ options=jira_options,
848
+ basic_auth=(username, api_token)
849
+ )
850
+ else:
851
+ # Token auth for Jira Server/Data Center Personal Access Tokens (PAT)
852
+ # PATs are used alone without a username
853
+ jira_client = JIRA(
854
+ options=jira_options,
855
+ token_auth=api_token
856
+ )
815
857
 
816
858
  # Construct JQL with text search and project filtering
817
859
  jql = _prepare_jira_jql_query(query, config)
@@ -1116,6 +1158,7 @@ def run_search(
1116
1158
  rerank_request = RerankRequest(query=enhanced_query, passages=passages)
1117
1159
  reranked_results = reranker.rerank(rerank_request)
1118
1160
 
1161
+ # Build text-to-node mapping for fallback text matching
1119
1162
  text_to_indices: dict[str, list[tuple[int, NodeWithScore]]] = {}
1120
1163
  for idx, node in enumerate(nodes):
1121
1164
  node_text = node.node.get_content() or ""
@@ -1126,20 +1169,34 @@ def run_search(
1126
1169
  reranked_nodes = []
1127
1170
  seen_indices: set[int] = set()
1128
1171
  for result in reranked_results:
1129
- doc_text = result.get("text", "")
1130
1172
  score = result.get("score", 0.0)
1131
-
1132
- if doc_text in text_to_indices:
1133
- for idx, node in text_to_indices[doc_text]:
1134
- if idx not in seen_indices:
1135
- reranked_nodes.append(node)
1136
- rerank_scores[id(node)] = float(score)
1137
- seen_indices.add(idx)
1138
- break
1139
-
1173
+ result_idx = result.get("id")
1174
+
1175
+ # Primary: match by index (flashrank returns original passage index)
1176
+ if result_idx is not None and 0 <= result_idx < len(nodes):
1177
+ if result_idx not in seen_indices:
1178
+ node = nodes[result_idx]
1179
+ reranked_nodes.append(node)
1180
+ rerank_scores[id(node)] = float(score)
1181
+ seen_indices.add(result_idx)
1182
+ else:
1183
+ # Fallback: match by text content
1184
+ doc_text = result.get("text", "")
1185
+ if doc_text in text_to_indices:
1186
+ for idx, node in text_to_indices[doc_text]:
1187
+ if idx not in seen_indices:
1188
+ reranked_nodes.append(node)
1189
+ rerank_scores[id(node)] = float(score)
1190
+ seen_indices.add(idx)
1191
+ break
1192
+
1193
+ # Add remaining unmatched nodes with minimum matched score
1194
+ # This ensures Jira/Confluence results aren't dropped due to text mismatch
1195
+ min_score = min(rerank_scores.values()) if rerank_scores else 0.0
1140
1196
  for idx, node in enumerate(nodes):
1141
1197
  if idx not in seen_indices:
1142
1198
  reranked_nodes.append(node)
1199
+ rerank_scores[id(node)] = min_score
1143
1200
 
1144
1201
  nodes = reranked_nodes[:rerank_limit]
1145
1202
  except Exception as e:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: chunksilo
3
- Version: 2.1.1
3
+ Version: 2.1.3
4
4
  Summary: Local RAG-based semantic document search with MCP server interface
5
5
  Author: Fredrik Reveny
6
6
  License-Expression: Apache-2.0
@@ -186,6 +186,50 @@ class TestJiraJqlQuery:
186
186
  # Query with only stopwords should produce simple or empty query
187
187
  assert jql == "" or "ORDER BY updated DESC" in jql
188
188
 
189
+ def test_issue_key_detection_single(self, base_config):
190
+ """Single issue key should be detected and searched by key field."""
191
+ jql = _prepare_jira_jql_query("ABEI-1660", base_config)
192
+ assert 'key = "ABEI-1660"' in jql
193
+ assert "ORDER BY updated DESC" in jql
194
+
195
+ def test_issue_key_detection_lowercase(self, base_config):
196
+ """Lowercase issue key should be normalized to uppercase."""
197
+ jql = _prepare_jira_jql_query("abei-1660", base_config)
198
+ assert 'key = "ABEI-1660"' in jql
199
+ assert "ORDER BY updated DESC" in jql
200
+
201
+ def test_issue_key_detection_multiple(self, base_config):
202
+ """Multiple issue keys should be detected."""
203
+ jql = _prepare_jira_jql_query("ABEI-1660 PROJ-123", base_config)
204
+ assert 'key = "ABEI-1660"' in jql
205
+ assert 'key = "PROJ-123"' in jql
206
+ assert " OR " in jql
207
+ assert "ORDER BY updated DESC" in jql
208
+
209
+ def test_issue_key_mixed_with_text(self, base_config):
210
+ """Issue key mixed with text should search both key and text."""
211
+ jql = _prepare_jira_jql_query("ABEI-1660 authentication", base_config)
212
+ assert 'key = "ABEI-1660"' in jql
213
+ assert 'text ~ "authentication"' in jql
214
+ assert " OR " in jql
215
+ assert "ORDER BY updated DESC" in jql
216
+
217
+ def test_no_issue_key_detection(self, base_config):
218
+ """Non-issue-key queries should work as before."""
219
+ jql = _prepare_jira_jql_query("authentication bug", base_config)
220
+ assert "key =" not in jql # No key search
221
+ assert "text ~" in jql # Text search only
222
+ assert "ORDER BY updated DESC" in jql
223
+
224
+ def test_issue_key_with_project_filter(self, base_config):
225
+ """Issue key search should respect project filter."""
226
+ base_config["jira"]["projects"] = ["ABEI"]
227
+ jql = _prepare_jira_jql_query("ABEI-1660", base_config)
228
+ assert 'key = "ABEI-1660"' in jql
229
+ assert "project IN" in jql
230
+ assert "ABEI" in jql
231
+ assert "ORDER BY updated DESC" in jql
232
+
189
233
 
190
234
  # ============================================================================
191
235
  # ISSUE TO TEXT CONVERSION TESTS
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes