chunksilo 2.1.0__tar.gz → 2.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of chunksilo might be problematic. Click here for more details.

Files changed (31) hide show
  1. {chunksilo-2.1.0/src/chunksilo.egg-info → chunksilo-2.1.2}/PKG-INFO +2 -3
  2. {chunksilo-2.1.0 → chunksilo-2.1.2}/README.md +1 -1
  3. {chunksilo-2.1.0 → chunksilo-2.1.2}/pyproject.toml +1 -1
  4. {chunksilo-2.1.0 → chunksilo-2.1.2}/requirements.txt +0 -1
  5. {chunksilo-2.1.0 → chunksilo-2.1.2}/src/chunksilo/__init__.py +1 -1
  6. {chunksilo-2.1.0 → chunksilo-2.1.2}/src/chunksilo/search.py +54 -27
  7. {chunksilo-2.1.0 → chunksilo-2.1.2/src/chunksilo.egg-info}/PKG-INFO +2 -3
  8. {chunksilo-2.1.0 → chunksilo-2.1.2}/src/chunksilo.egg-info/requires.txt +0 -1
  9. {chunksilo-2.1.0 → chunksilo-2.1.2}/test/test_rag_metrics.py +0 -3
  10. {chunksilo-2.1.0 → chunksilo-2.1.2}/test/test_retrieval_only.py +0 -3
  11. {chunksilo-2.1.0 → chunksilo-2.1.2}/LICENSE +0 -0
  12. {chunksilo-2.1.0 → chunksilo-2.1.2}/NOTICE +0 -0
  13. {chunksilo-2.1.0 → chunksilo-2.1.2}/setup.cfg +0 -0
  14. {chunksilo-2.1.0 → chunksilo-2.1.2}/src/chunksilo/__main__.py +0 -0
  15. {chunksilo-2.1.0 → chunksilo-2.1.2}/src/chunksilo/cfgload.py +0 -0
  16. {chunksilo-2.1.0 → chunksilo-2.1.2}/src/chunksilo/cli.py +0 -0
  17. {chunksilo-2.1.0 → chunksilo-2.1.2}/src/chunksilo/confluence_html_formatter.py +0 -0
  18. {chunksilo-2.1.0 → chunksilo-2.1.2}/src/chunksilo/index.py +0 -0
  19. {chunksilo-2.1.0 → chunksilo-2.1.2}/src/chunksilo/server.py +0 -0
  20. {chunksilo-2.1.0 → chunksilo-2.1.2}/src/chunksilo.egg-info/SOURCES.txt +0 -0
  21. {chunksilo-2.1.0 → chunksilo-2.1.2}/src/chunksilo.egg-info/dependency_links.txt +0 -0
  22. {chunksilo-2.1.0 → chunksilo-2.1.2}/src/chunksilo.egg-info/entry_points.txt +0 -0
  23. {chunksilo-2.1.0 → chunksilo-2.1.2}/src/chunksilo.egg-info/top_level.txt +0 -0
  24. {chunksilo-2.1.0 → chunksilo-2.1.2}/test/test_chunk_location.py +0 -0
  25. {chunksilo-2.1.0 → chunksilo-2.1.2}/test/test_confluence_html_formatter.py +0 -0
  26. {chunksilo-2.1.0 → chunksilo-2.1.2}/test/test_error_handling.py +0 -0
  27. {chunksilo-2.1.0 → chunksilo-2.1.2}/test/test_heading_path_integration.py +0 -0
  28. {chunksilo-2.1.0 → chunksilo-2.1.2}/test/test_incremental_ingest.py +0 -0
  29. {chunksilo-2.1.0 → chunksilo-2.1.2}/test/test_jira_integration.py +0 -0
  30. {chunksilo-2.1.0 → chunksilo-2.1.2}/test/test_system.py +0 -0
  31. {chunksilo-2.1.0 → chunksilo-2.1.2}/test/test_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: chunksilo
3
- Version: 2.1.0
3
+ Version: 2.1.2
4
4
  Summary: Local RAG-based semantic document search with MCP server interface
5
5
  Author: Fredrik Reveny
6
6
  License-Expression: Apache-2.0
@@ -26,7 +26,6 @@ Requires-Dist: pillow<11,>=10.3.0
26
26
  Requires-Dist: pypdf<7,>=5.1.0
27
27
  Requires-Dist: python-docx<2,>=1.1.0
28
28
  Requires-Dist: mcp<2,>=1.0.0
29
- Requires-Dist: python-dotenv<2,>=1.0.0
30
29
  Requires-Dist: huggingface-hub<2,>=0.22.0
31
30
  Requires-Dist: flashrank<1,>=0.1.0
32
31
  Requires-Dist: fastembed<1,>=0.5.0
@@ -41,7 +40,7 @@ Requires-Dist: requests<3,>=2.31.0; extra == "test"
41
40
  Dynamic: license-file
42
41
 
43
42
  <p align="center">
44
- <img src="chunksilo.png" alt="ChunkSilo Logo" width="500">
43
+ <img src="https://raw.githubusercontent.com/Chetic/chunksilo/main/chunksilo.png" alt="ChunkSilo Logo" width="500">
45
44
  </p>
46
45
 
47
46
  # ChunkSilo MCP Server
@@ -1,5 +1,5 @@
1
1
  <p align="center">
2
- <img src="chunksilo.png" alt="ChunkSilo Logo" width="500">
2
+ <img src="https://raw.githubusercontent.com/Chetic/chunksilo/main/chunksilo.png" alt="ChunkSilo Logo" width="500">
3
3
  </p>
4
4
 
5
5
  # ChunkSilo MCP Server
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "chunksilo"
7
- version = "2.1.0"
7
+ version = "2.1.2"
8
8
  description = "Local RAG-based semantic document search with MCP server interface"
9
9
  license = "Apache-2.0"
10
10
  requires-python = ">=3.11"
@@ -6,7 +6,6 @@ pillow>=10.3.0,<11
6
6
  pypdf>=5.1.0,<7
7
7
  python-docx>=1.1.0,<2
8
8
  mcp>=1.0.0,<2
9
- python-dotenv>=1.0.0,<2
10
9
  huggingface-hub>=0.22.0,<2
11
10
  flashrank>=0.1.0,<1
12
11
  fastembed>=0.5.0,<1
@@ -1,4 +1,4 @@
1
1
  # SPDX-License-Identifier: Apache-2.0
2
2
  """ChunkSilo - Local RAG-based semantic document search."""
3
3
 
4
- __version__ = "2.1.0"
4
+ __version__ = "2.1.2"
@@ -738,8 +738,8 @@ def _search_jira(query: str, config: dict[str, Any]) -> list[NodeWithScore]:
738
738
 
739
739
  Configuration Requirements:
740
740
  config["jira"]["url"]: Jira base URL (empty = disabled)
741
- config["jira"]["username"]: Jira username or email
742
- config["jira"]["api_token"]: Jira API token (not password)
741
+ config["jira"]["username"]: Jira username/email (required for Cloud, optional for Server PAT)
742
+ config["jira"]["api_token"]: API token (Cloud) or Personal Access Token (Server/Data Center)
743
743
  config["jira"]["max_results"]: Maximum issues to return
744
744
  config["jira"]["projects"]: List of project keys (empty = all)
745
745
  config["jira"]["include_comments"]: Include issue comments
@@ -763,8 +763,12 @@ def _search_jira(query: str, config: dict[str, Any]) -> list[NodeWithScore]:
763
763
  - Automatically configured through jira_options["verify"]
764
764
 
765
765
  Authentication:
766
- - Uses basic auth (username + API token)
767
- - Works for both Jira Cloud and Data Center/Server
766
+ - Jira Cloud: Set both username (email) and api_token (API token)
767
+ Uses basic auth internally
768
+ - Jira Server/Data Center with PAT: Set only api_token (Personal Access Token)
769
+ Leave username empty; uses bearer token auth internally
770
+ - Jira Server/Data Center with password: Set username and api_token (password)
771
+ Uses basic auth internally (if basic auth enabled on server)
768
772
 
769
773
  References:
770
774
  - Jira REST API: https://developer.atlassian.com/cloud/jira/platform/rest/v3/
@@ -790,13 +794,10 @@ def _search_jira(query: str, config: dict[str, Any]) -> list[NodeWithScore]:
790
794
  ca_bundle_path = config["ssl"]["ca_bundle_path"] or None
791
795
 
792
796
  # Validate required credentials are present
793
- if not (base_url and username and api_token):
794
- missing = []
795
- if not username:
796
- missing.append("jira.username")
797
- if not api_token:
798
- missing.append("jira.api_token")
799
- logger.warning(f"Jira search skipped: missing {', '.join(missing)} in config")
797
+ # For Jira Cloud: both username and api_token required (basic auth)
798
+ # For Jira Server/Data Center with PAT: only api_token required (token auth)
799
+ if not api_token:
800
+ logger.warning("Jira search skipped: missing jira.api_token in config")
800
801
  return []
801
802
 
802
803
  try:
@@ -806,12 +807,23 @@ def _search_jira(query: str, config: dict[str, Any]) -> list[NodeWithScore]:
806
807
  if ca_bundle_path:
807
808
  jira_options["verify"] = ca_bundle_path
808
809
 
809
- # Use basic auth (username + API token) for authentication
810
- # Works for both Jira Cloud and Data Center/Server
811
- jira_client = JIRA(
812
- options=jira_options,
813
- basic_auth=(username, api_token)
814
- )
810
+ # Choose authentication method based on credentials provided:
811
+ # - Username + API token: Use basic auth (Jira Cloud, or Server with password)
812
+ # - API token only: Use token auth (Jira Server/Data Center with PAT)
813
+ if username:
814
+ # Basic auth for Jira Cloud (username + API token)
815
+ # Also works for Jira Server with username + password
816
+ jira_client = JIRA(
817
+ options=jira_options,
818
+ basic_auth=(username, api_token)
819
+ )
820
+ else:
821
+ # Token auth for Jira Server/Data Center Personal Access Tokens (PAT)
822
+ # PATs are used alone without a username
823
+ jira_client = JIRA(
824
+ options=jira_options,
825
+ token_auth=api_token
826
+ )
815
827
 
816
828
  # Construct JQL with text search and project filtering
817
829
  jql = _prepare_jira_jql_query(query, config)
@@ -1116,6 +1128,7 @@ def run_search(
1116
1128
  rerank_request = RerankRequest(query=enhanced_query, passages=passages)
1117
1129
  reranked_results = reranker.rerank(rerank_request)
1118
1130
 
1131
+ # Build text-to-node mapping for fallback text matching
1119
1132
  text_to_indices: dict[str, list[tuple[int, NodeWithScore]]] = {}
1120
1133
  for idx, node in enumerate(nodes):
1121
1134
  node_text = node.node.get_content() or ""
@@ -1126,20 +1139,34 @@ def run_search(
1126
1139
  reranked_nodes = []
1127
1140
  seen_indices: set[int] = set()
1128
1141
  for result in reranked_results:
1129
- doc_text = result.get("text", "")
1130
1142
  score = result.get("score", 0.0)
1131
-
1132
- if doc_text in text_to_indices:
1133
- for idx, node in text_to_indices[doc_text]:
1134
- if idx not in seen_indices:
1135
- reranked_nodes.append(node)
1136
- rerank_scores[id(node)] = float(score)
1137
- seen_indices.add(idx)
1138
- break
1139
-
1143
+ result_idx = result.get("id")
1144
+
1145
+ # Primary: match by index (flashrank returns original passage index)
1146
+ if result_idx is not None and 0 <= result_idx < len(nodes):
1147
+ if result_idx not in seen_indices:
1148
+ node = nodes[result_idx]
1149
+ reranked_nodes.append(node)
1150
+ rerank_scores[id(node)] = float(score)
1151
+ seen_indices.add(result_idx)
1152
+ else:
1153
+ # Fallback: match by text content
1154
+ doc_text = result.get("text", "")
1155
+ if doc_text in text_to_indices:
1156
+ for idx, node in text_to_indices[doc_text]:
1157
+ if idx not in seen_indices:
1158
+ reranked_nodes.append(node)
1159
+ rerank_scores[id(node)] = float(score)
1160
+ seen_indices.add(idx)
1161
+ break
1162
+
1163
+ # Add remaining unmatched nodes with minimum matched score
1164
+ # This ensures Jira/Confluence results aren't dropped due to text mismatch
1165
+ min_score = min(rerank_scores.values()) if rerank_scores else 0.0
1140
1166
  for idx, node in enumerate(nodes):
1141
1167
  if idx not in seen_indices:
1142
1168
  reranked_nodes.append(node)
1169
+ rerank_scores[id(node)] = min_score
1143
1170
 
1144
1171
  nodes = reranked_nodes[:rerank_limit]
1145
1172
  except Exception as e:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: chunksilo
3
- Version: 2.1.0
3
+ Version: 2.1.2
4
4
  Summary: Local RAG-based semantic document search with MCP server interface
5
5
  Author: Fredrik Reveny
6
6
  License-Expression: Apache-2.0
@@ -26,7 +26,6 @@ Requires-Dist: pillow<11,>=10.3.0
26
26
  Requires-Dist: pypdf<7,>=5.1.0
27
27
  Requires-Dist: python-docx<2,>=1.1.0
28
28
  Requires-Dist: mcp<2,>=1.0.0
29
- Requires-Dist: python-dotenv<2,>=1.0.0
30
29
  Requires-Dist: huggingface-hub<2,>=0.22.0
31
30
  Requires-Dist: flashrank<1,>=0.1.0
32
31
  Requires-Dist: fastembed<1,>=0.5.0
@@ -41,7 +40,7 @@ Requires-Dist: requests<3,>=2.31.0; extra == "test"
41
40
  Dynamic: license-file
42
41
 
43
42
  <p align="center">
44
- <img src="chunksilo.png" alt="ChunkSilo Logo" width="500">
43
+ <img src="https://raw.githubusercontent.com/Chetic/chunksilo/main/chunksilo.png" alt="ChunkSilo Logo" width="500">
45
44
  </p>
46
45
 
47
46
  # ChunkSilo MCP Server
@@ -6,7 +6,6 @@ pillow<11,>=10.3.0
6
6
  pypdf<7,>=5.1.0
7
7
  python-docx<2,>=1.1.0
8
8
  mcp<2,>=1.0.0
9
- python-dotenv<2,>=1.0.0
10
9
  huggingface-hub<2,>=0.22.0
11
10
  flashrank<1,>=0.1.0
12
11
  fastembed<1,>=0.5.0
@@ -21,9 +21,6 @@ from typing import Any, Dict, List, Optional, Set, Tuple
21
21
  from urllib.parse import urlparse
22
22
 
23
23
  import requests
24
- from dotenv import load_dotenv
25
-
26
- load_dotenv()
27
24
 
28
25
  # Set up logging
29
26
  logging.basicConfig(
@@ -2,7 +2,6 @@
2
2
  """Test the RAG system in retrieval-only mode (no LLM in the MCP server)."""
3
3
  import traceback
4
4
  from pathlib import Path
5
- from dotenv import load_dotenv
6
5
  import pytest
7
6
 
8
7
  from chunksilo.index import load_index_config, build_index
@@ -11,8 +10,6 @@ from chunksilo.cfgload import load_config
11
10
 
12
11
  STORAGE_DIR = Path(load_config()["storage"]["storage_dir"])
13
12
 
14
- load_dotenv()
15
-
16
13
 
17
14
  def test_ingestion():
18
15
  """Test the ingestion pipeline."""
File without changes
File without changes
File without changes
File without changes
File without changes