pyproc 0.3__tar.gz → 0.3.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. {pyproc-0.3 → pyproc-0.3.2}/PKG-INFO +4 -4
  2. {pyproc-0.3 → pyproc-0.3.2}/README.md +3 -3
  3. pyproc-0.3.2/plan/SEARCH_INDEX_LIMIT_REMOVAL_AND_PROGRESS.md +87 -0
  4. {pyproc-0.3 → pyproc-0.3.2}/pyproc/__init__.py +1 -1
  5. {pyproc-0.3 → pyproc-0.3.2}/pyproc/lpse.py +6 -3
  6. {pyproc-0.3 → pyproc-0.3.2}/pyproc/mcp/schemas.py +7 -8
  7. {pyproc-0.3 → pyproc-0.3.2}/pyproc/mcp/search_index.py +78 -9
  8. {pyproc-0.3 → pyproc-0.3.2}/pyproc/mcp/tools.py +96 -13
  9. {pyproc-0.3 → pyproc-0.3.2}/pyproject.toml +1 -1
  10. {pyproc-0.3 → pyproc-0.3.2}/tests/test_lpse_unit.py +14 -0
  11. {pyproc-0.3 → pyproc-0.3.2}/tests/test_mcp_schemas.py +28 -1
  12. pyproc-0.3.2/tests/test_mcp_search_index.py +169 -0
  13. pyproc-0.3/tests/test_mcp_search_index.py +0 -69
  14. {pyproc-0.3 → pyproc-0.3.2}/.github/workflows/pyproc-pypi.yml +0 -0
  15. {pyproc-0.3 → pyproc-0.3.2}/.github/workflows/test.yml +0 -0
  16. {pyproc-0.3 → pyproc-0.3.2}/.gitignore +0 -0
  17. {pyproc-0.3 → pyproc-0.3.2}/CHANGELOG.md +0 -0
  18. {pyproc-0.3 → pyproc-0.3.2}/LICENSE +0 -0
  19. {pyproc-0.3 → pyproc-0.3.2}/Makefile +0 -0
  20. {pyproc-0.3 → pyproc-0.3.2}/docs/assets/README_ASSET_PROMPTS.md +0 -0
  21. {pyproc-0.3 → pyproc-0.3.2}/docs/assets/logo.png +0 -0
  22. {pyproc-0.3 → pyproc-0.3.2}/docs/assets/pyproc-mcp-banner.png +0 -0
  23. {pyproc-0.3 → pyproc-0.3.2}/plan/API_AND_CLI_WRAPPER_OPTIMIZATION_PLAN.md +0 -0
  24. {pyproc-0.3 → pyproc-0.3.2}/plan/PYPROC_MCP_REBRAND_AND_IMPLEMENTATION_PLAN.md +0 -0
  25. {pyproc-0.3 → pyproc-0.3.2}/pyproc/cache.py +0 -0
  26. {pyproc-0.3 → pyproc-0.3.2}/pyproc/cli.py +0 -0
  27. {pyproc-0.3 → pyproc-0.3.2}/pyproc/exceptions.py +0 -0
  28. {pyproc-0.3 → pyproc-0.3.2}/pyproc/mcp/__init__.py +0 -0
  29. {pyproc-0.3 → pyproc-0.3.2}/pyproc/mcp/errors.py +0 -0
  30. {pyproc-0.3 → pyproc-0.3.2}/pyproc/mcp/hosts.py +0 -0
  31. {pyproc-0.3 → pyproc-0.3.2}/pyproc/mcp/prompts.py +0 -0
  32. {pyproc-0.3 → pyproc-0.3.2}/pyproc/mcp/resources.py +0 -0
  33. {pyproc-0.3 → pyproc-0.3.2}/pyproc/mcp/server.py +0 -0
  34. {pyproc-0.3 → pyproc-0.3.2}/pyproc/text.py +0 -0
  35. {pyproc-0.3 → pyproc-0.3.2}/pyproc/utils.py +0 -0
  36. {pyproc-0.3 → pyproc-0.3.2}/tests/.gitignore +0 -0
  37. {pyproc-0.3 → pyproc-0.3.2}/tests/__init__.py +0 -0
  38. {pyproc-0.3 → pyproc-0.3.2}/tests/fixtures/README.md +0 -0
  39. {pyproc-0.3 → pyproc-0.3.2}/tests/fixtures/darurat_pemenang.html +0 -0
  40. {pyproc-0.3 → pyproc-0.3.2}/tests/fixtures/darurat_pengumuman.html +0 -0
  41. {pyproc-0.3 → pyproc-0.3.2}/tests/fixtures/dt_darurat_list.json +0 -0
  42. {pyproc-0.3 → pyproc-0.3.2}/tests/fixtures/dt_lelang.json +0 -0
  43. {pyproc-0.3 → pyproc-0.3.2}/tests/fixtures/dt_lelang_data_only.json +0 -0
  44. {pyproc-0.3 → pyproc-0.3.2}/tests/fixtures/dt_nonspk.json +0 -0
  45. {pyproc-0.3 → pyproc-0.3.2}/tests/fixtures/dt_swakelola.json +0 -0
  46. {pyproc-0.3 → pyproc-0.3.2}/tests/fixtures/error_page.html +0 -0
  47. {pyproc-0.3 → pyproc-0.3.2}/tests/fixtures/hasil_evaluasi.html +0 -0
  48. {pyproc-0.3 → pyproc-0.3.2}/tests/fixtures/jadwal.html +0 -0
  49. {pyproc-0.3 → pyproc-0.3.2}/tests/fixtures/lelang_page.html +0 -0
  50. {pyproc-0.3 → pyproc-0.3.2}/tests/fixtures/nonspk_pemenang_berkontrak.html +0 -0
  51. {pyproc-0.3 → pyproc-0.3.2}/tests/fixtures/nonspk_pemenang_bug_10941898000.html +0 -0
  52. {pyproc-0.3 → pyproc-0.3.2}/tests/fixtures/nonspk_pengumuman.html +0 -0
  53. {pyproc-0.3 → pyproc-0.3.2}/tests/fixtures/not_found_page.html +0 -0
  54. {pyproc-0.3 → pyproc-0.3.2}/tests/fixtures/pemenang.html +0 -0
  55. {pyproc-0.3 → pyproc-0.3.2}/tests/fixtures/pengumuman_lelang.html +0 -0
  56. {pyproc-0.3 → pyproc-0.3.2}/tests/fixtures/peserta.html +0 -0
  57. {pyproc-0.3 → pyproc-0.3.2}/tests/fixtures/swakelola_pelaksana.html +0 -0
  58. {pyproc-0.3 → pyproc-0.3.2}/tests/fixtures/swakelola_pengumuman.html +0 -0
  59. {pyproc-0.3 → pyproc-0.3.2}/tests/supporting_files/list-host-with-filename.txt +0 -0
  60. {pyproc-0.3 → pyproc-0.3.2}/tests/supporting_files/list-host.txt +0 -0
  61. {pyproc-0.3 → pyproc-0.3.2}/tests/test_cache.py +0 -0
  62. {pyproc-0.3 → pyproc-0.3.2}/tests/test_cli_unit.py +0 -0
  63. {pyproc-0.3 → pyproc-0.3.2}/tests/test_downloader.py +0 -0
  64. {pyproc-0.3 → pyproc-0.3.2}/tests/test_lpse.py +0 -0
  65. {pyproc-0.3 → pyproc-0.3.2}/tests/test_mcp_hosts.py +0 -0
  66. {pyproc-0.3 → pyproc-0.3.2}/tests/test_mcp_tools.py +0 -0
  67. {pyproc-0.3 → pyproc-0.3.2}/tests/test_utils_unit.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pyproc
3
- Version: 0.3
3
+ Version: 0.3.2
4
4
  Summary: MCP tools for real-time Indonesian public procurement data from SPSE/Inaproc
5
5
  Project-URL: Homepage, https://github.com/wakataw/pyproc
6
6
  Project-URL: Issues, https://github.com/wakataw/pyproc/issues
@@ -36,7 +36,7 @@ Description-Content-Type: text/markdown
36
36
  # PyProc MCP
37
37
 
38
38
  <p align="center">
39
- <img src="docs/assets/logo.png"
39
+ <img src="https://raw.githubusercontent.com/wakataw/pyproc/remove-index-max-packages-cap/docs/assets/logo.png"
40
40
  alt="PyProc MCP — Real-time Indonesian procurement data for LLM agents"
41
41
  width="200">
42
42
  </p>
@@ -46,7 +46,7 @@ Description-Content-Type: text/markdown
46
46
  </p>
47
47
 
48
48
  <p align="center">
49
- <a href="https://pypi.org/project/pyproc/"><img src="https://img.shields.io/badge/version-v0.3-blue" alt="PyPI version"></a>
49
+ <a href="https://pypi.org/project/pyproc/"><img src="https://img.shields.io/badge/version-v0.3.2-blue" alt="PyPI version"></a>
50
50
  <a href="https://pypi.org/project/pyproc/"><img src="https://img.shields.io/badge/python-≥3.9-yellow" alt="Python ≥3.9"></a>
51
51
  <a href="https://github.com/wakataw/pyproc/actions/workflows/test.yml?query=branch%3Amaster"><img src="https://github.com/wakataw/pyproc/actions/workflows/test.yml/badge.svg?branch=master" alt="Test Status"></a>
52
52
  <a href="https://github.com/wakataw/pyproc/blob/master/LICENSE"><img src="https://img.shields.io/badge/license-MIT-green" alt="License: MIT"></a>
@@ -60,7 +60,7 @@ PyProc MCP turns public **SPSE/Inaproc** procurement data into **MCP tools** tha
60
60
  ## Why PyProc MCP?
61
61
 
62
62
  <p align="center">
63
- <img src="docs/assets/pyproc-mcp-banner.png"
63
+ <img src="https://raw.githubusercontent.com/wakataw/pyproc/remove-index-max-packages-cap/docs/assets/pyproc-mcp-banner.png"
64
64
  alt="PyProc MCP — Real-time Indonesian procurement data for LLM agents"
65
65
  width="800">
66
66
  </p>
@@ -1,7 +1,7 @@
1
1
  # PyProc MCP
2
2
 
3
3
  <p align="center">
4
- <img src="docs/assets/logo.png"
4
+ <img src="https://raw.githubusercontent.com/wakataw/pyproc/remove-index-max-packages-cap/docs/assets/logo.png"
5
5
  alt="PyProc MCP — Real-time Indonesian procurement data for LLM agents"
6
6
  width="200">
7
7
  </p>
@@ -11,7 +11,7 @@
11
11
  </p>
12
12
 
13
13
  <p align="center">
14
- <a href="https://pypi.org/project/pyproc/"><img src="https://img.shields.io/badge/version-v0.3-blue" alt="PyPI version"></a>
14
+ <a href="https://pypi.org/project/pyproc/"><img src="https://img.shields.io/badge/version-v0.3.2-blue" alt="PyPI version"></a>
15
15
  <a href="https://pypi.org/project/pyproc/"><img src="https://img.shields.io/badge/python-≥3.9-yellow" alt="Python ≥3.9"></a>
16
16
  <a href="https://github.com/wakataw/pyproc/actions/workflows/test.yml?query=branch%3Amaster"><img src="https://github.com/wakataw/pyproc/actions/workflows/test.yml/badge.svg?branch=master" alt="Test Status"></a>
17
17
  <a href="https://github.com/wakataw/pyproc/blob/master/LICENSE"><img src="https://img.shields.io/badge/license-MIT-green" alt="License: MIT"></a>
@@ -25,7 +25,7 @@ PyProc MCP turns public **SPSE/Inaproc** procurement data into **MCP tools** tha
25
25
  ## Why PyProc MCP?
26
26
 
27
27
  <p align="center">
28
- <img src="docs/assets/pyproc-mcp-banner.png"
28
+ <img src="https://raw.githubusercontent.com/wakataw/pyproc/remove-index-max-packages-cap/docs/assets/pyproc-mcp-banner.png"
29
29
  alt="PyProc MCP — Real-time Indonesian procurement data for LLM agents"
30
30
  width="800">
31
31
  </p>
@@ -0,0 +1,87 @@
1
+ # Plan: Remove max_packages cap, default to all data, paginate at 100/req, add progress
2
+
3
+ ## Context
4
+
5
+ The `create_procurement_search_index` MCP tool has `MAX_INDEX_PACKAGES = 500` capping the **total** number of packages that can be downloaded. Remove this cap so users can download all available data for a host/year. Default behavior becomes "download all available data." Per-request batch size stays fixed at 100 (internal, not exposed). Progress is reported as a running count via stderr logging — no percentage since we don't rely on `recordsTotal`/`recordsFiltered`.
6
+
7
+ ## Parameter design
8
+
9
+ | Parameter | Type | Default | Description |
10
+ |---|---|---|---|
11
+ | `max_packages` | int | `0` | `0` = download all (paginate until exhausted). Positive value caps the total. No upper bound. |
12
+ | Batch size | int (internal) | `100` | Fixed `CHUNK_SIZE`, not a user-facing parameter. Matches CLI `--chunk-size` default. |
13
+
14
+ ## Changes
15
+
16
+ ### 1. `pyproc/mcp/schemas.py`
17
+
18
+ - Delete `MAX_INDEX_PACKAGES = 500` (line 33)
19
+ - Change `DEFAULT_INDEX_MAX_PACKAGES = 100` → `DEFAULT_INDEX_MAX_PACKAGES = 0` (0 = all)
20
+ - Line 410: `max(1, min(max_packages, MAX_INDEX_PACKAGES))` → `max(0, max_packages)` (0 means unlimited)
21
+ - `CREATE_SEARCH_INDEX_SCHEMA` (lines 952-960):
22
+ - Remove `"maximum": MAX_INDEX_PACKAGES`
23
+ - Update description to `"Maximum packages to download. 0 = all available. Default 0."`
24
+
25
+ ### 2. `pyproc/mcp/search_index.py`
26
+
27
+ - Add `import logging` → `logger = logging.getLogger(__name__)`
28
+ - Add `CHUNK_SIZE = 100`
29
+
30
+ Replace the single-request block (lines 127-139) with a pagination loop:
31
+
32
+ ```python
33
+ CHUNK_SIZE = 100
34
+ all_rows = []
35
+ start = 0
36
+ limit = max_packages if max_packages > 0 else float('inf')
37
+
38
+ while len(all_rows) < limit:
39
+ remaining = limit - len(all_rows)
40
+ req_length = min(CHUNK_SIZE, int(remaining))
41
+ chunk_kwargs = {**kwargs, "start": start, "length": req_length}
42
+ chunk = search_method(**chunk_kwargs)
43
+ if not chunk:
44
+ break
45
+ all_rows.extend(chunk)
46
+ logger.info("Scrolled: %d rows from %s (TA %s)", len(all_rows), lpse_host, tahun_anggaran)
47
+ start += len(chunk)
48
+ if len(chunk) < req_length:
49
+ break # partial page → end of data
50
+
51
+ to_process = len(all_rows)
52
+ logger.info("Will index %d packages from %s (%s, %s)", to_process, lpse_host, package_type, tahun_anggaran)
53
+ ```
54
+
55
+ Progress logging in the detail loop:
56
+
57
+ ```python
58
+ for idx, row in enumerate(all_rows, start=1):
59
+ title = _package_title(row)
60
+ logger.info("[%d/%d] Indexing: %s", idx, to_process, title)
61
+ # ... existing fetch + index + rate_limit ...
62
+ ```
63
+
64
+ ### 3. `pyproc/mcp/tools.py`
65
+
66
+ - Update tool description string to mention "0 = all" default
67
+ - No code changes needed — progress is handled in `search_index.py`'s logger
68
+
69
+ ### 4. Tests
70
+
71
+ **`tests/test_mcp_schemas.py`** (`TestValidateSearchIndexParams`):
72
+ - Default `max_packages` → 0 (all)
73
+ - `max_packages=5000` → 5000 (not clamped)
74
+ - `max_packages=-1` → 0 (floor at 0)
75
+
76
+ **`tests/test_mcp_search_index.py`** (`TestSearchIndex`):
77
+ - Pagination: mock returns [100, 100, 50] → all 250 collected
78
+ - Stops on empty chunk
79
+ - Stops on partial chunk (< requested length)
80
+ - Respects max_packages limit when positive
81
+
82
+ ## Verification
83
+
84
+ ```bash
85
+ python -m pytest tests/test_mcp_schemas.py::TestValidateSearchIndexParams tests/test_mcp_search_index.py -v
86
+ python -m pytest tests/ --ignore=tests/test_lpse.py --ignore=tests/test_downloader.py -v
87
+ ```
@@ -1,6 +1,6 @@
1
1
  from .lpse import Lpse, JenisPengadaan, TipeSwakelola, By, KontrakStatus
2
2
 
3
- __version__ = '0.3'
3
+ __version__ = '0.3.2'
4
4
  __author__ = 'Agung Pratama'
5
5
  __all__ = [
6
6
  'Lpse',
@@ -254,7 +254,8 @@ class Lpse(object):
254
254
  )
255
255
 
256
256
  def get_paket_non_tender(self, start=0, length=0, data_only=False, kategori=None, search_keyword=None,
257
- order=By.KODE, tahun=None, ascending=False, instansi_id=None):
257
+ order=By.KODE, tahun=None, ascending=False, instansi_id=None,
258
+ rekanan=None, nama_penyedia=None):
258
259
  """
259
260
  Wrapper pencarian paket non tender
260
261
  :param start: index data awal
@@ -262,14 +263,16 @@ class Lpse(object):
262
263
  :param data_only: hanya menampilkan data tanpa menampilkan informasi lain
263
264
  :param kategori: kategori pengadaan (lihat di pypro.kategori)
264
265
  :param search_keyword: keyword pencarian paket pengadaan
265
- :param nama_penyedia: filter berdasarkan nama penyedia
266
266
  :param order: Mengurutkan data berdasarkan kolom
267
267
  :param tahun: Tahun pengadaan
268
268
  :param ascending: Ascending, descending jika diset False
269
269
  :param instansi_id: Filter pencarian berdasarkan instansi atau satker tertentu
270
+ :param rekanan: filter berdasarkan nama penyedia/rekanan
271
+ :param nama_penyedia: alias lama untuk rekanan
270
272
  :return: dictionary dari hasil pencarian paket (atau list jika data_only=True)
271
273
  """
272
- return self.get_paket('pl', start, length, data_only, kategori, search_keyword, None, order, tahun,
274
+ rekanan = rekanan or nama_penyedia
275
+ return self.get_paket('pl', start, length, data_only, kategori, search_keyword, rekanan, order, tahun,
273
276
  ascending, instansi_id)
274
277
 
275
278
  def get_paket_pencatatan_non_tender(self, start=0, length=0, data_only=False, kategori=None,
@@ -29,8 +29,7 @@ VALID_ORDER_BY = {
29
29
  }
30
30
  VALID_ORDER_DIR = {"asc", "desc"}
31
31
  VALID_KONTRAK_STATUS = {0, 1, 2}
32
- DEFAULT_INDEX_MAX_PACKAGES = 100
33
- MAX_INDEX_PACKAGES = 500
32
+ DEFAULT_INDEX_MAX_PACKAGES = 0
34
33
  DEFAULT_INDEX_SEARCH_LIMIT = 20
35
34
  MAX_INDEX_SEARCH_LIMIT = 100
36
35
  MAX_BULK_DETAIL_PACKAGE_IDS = 20
@@ -407,7 +406,7 @@ def validate_search_index_create_params(params: dict) -> dict:
407
406
  "tahun_anggaran": validate_tahun_anggaran(params.get("tahun_anggaran")),
408
407
  "kategori": validate_kategori(params.get("kategori")),
409
408
  "keyword_seed": str(params.get("keyword_seed") or "").strip() or None,
410
- "max_packages": max(1, min(max_packages, MAX_INDEX_PACKAGES)),
409
+ "max_packages": max(0, max_packages),
411
410
  "confirm_download": True,
412
411
  }
413
412
 
@@ -952,17 +951,17 @@ CREATE_SEARCH_INDEX_SCHEMA = {
952
951
  "max_packages": {
953
952
  "type": "integer",
954
953
  "description": (
955
- "Maximum packages to download and index. Default "
956
- f"{DEFAULT_INDEX_MAX_PACKAGES}, maximum {MAX_INDEX_PACKAGES}."
954
+ "Maximum packages to download and index. 0 = all available. "
955
+ f"Default {DEFAULT_INDEX_MAX_PACKAGES}."
957
956
  ),
958
957
  "default": DEFAULT_INDEX_MAX_PACKAGES,
959
- "maximum": MAX_INDEX_PACKAGES,
960
958
  },
961
959
  "confirm_download": {
962
960
  "type": "boolean",
963
961
  "description": (
964
- "Must be true. This confirms the user agreed to download "
965
- "package details into a local disposable full-text index."
962
+ "Must be true. Confirms user consent to download package details "
963
+ "into a local disposable full-text index. May involve many "
964
+ "SPSE requests when max_packages is 0 or large."
966
965
  ),
967
966
  },
968
967
  },
@@ -3,6 +3,7 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  import json
6
+ import logging
6
7
  import os
7
8
  import shutil
8
9
  import sqlite3
@@ -12,6 +13,10 @@ from uuid import uuid4
12
13
 
13
14
  from pyproc import Lpse, JenisPengadaan
14
15
 
16
+ logger = logging.getLogger(__name__)
17
+
18
+ CHUNK_SIZE = 100
19
+
15
20
  PACKAGE_METHODS = {
16
21
  "tender": ("get_paket_tender", "detil_paket_tender"),
17
22
  "non_tender": ("get_paket_non_tender", "detil_paket_non_tender"),
@@ -97,11 +102,18 @@ def create_procurement_search_index(
97
102
  tahun_anggaran: int | None = None,
98
103
  kategori: str | None = None,
99
104
  keyword_seed: str | None = None,
100
- max_packages: int = 100,
105
+ max_packages: int = 0,
101
106
  timeout: int = 30,
102
107
  rate_limit_callback=None,
108
+ progress_callback=None,
103
109
  ) -> dict:
104
- """Download a bounded package set and index details into local SQLite FTS."""
110
+ """Download a bounded package set and index details into local SQLite FTS.
111
+
112
+ Args:
113
+ progress_callback: Optional callable(step, current, total, message)
114
+ called at progress milestones. step is one of 'scroll',
115
+ 'index_start', 'index_package', 'complete'.
116
+ """
105
117
  index_id = f"{lpse_host}-{package_type}-{int(time.time())}-{uuid4().hex[:8]}"
106
118
  path = _index_path(index_id)
107
119
  metadata = {
@@ -125,22 +137,66 @@ def create_procurement_search_index(
125
137
  if rate_limit_callback:
126
138
  rate_limit_callback()
127
139
  search_method = getattr(lpse, PACKAGE_METHODS[package_type][0])
128
- kwargs = {
129
- "start": 0,
130
- "length": max_packages,
140
+ base_kwargs = {
131
141
  "data_only": True,
132
142
  "search_keyword": keyword_seed,
133
143
  "tahun": tahun_anggaran,
134
144
  }
135
145
  if package_type != "swakelola":
136
- kwargs["kategori"] = kategori_enum
137
- rows = search_method(**kwargs)
138
-
139
- for row in rows[:max_packages]:
146
+ base_kwargs["kategori"] = kategori_enum
147
+
148
+ # Paginate: scroll until max_packages reached or SPSE exhausted
149
+ all_rows = []
150
+ start = 0
151
+ unlimited = max_packages <= 0
152
+ limit = max_packages if not unlimited else float('inf')
153
+
154
+ while len(all_rows) < limit:
155
+ if unlimited:
156
+ req_length = CHUNK_SIZE
157
+ else:
158
+ req_length = min(CHUNK_SIZE, max(1, max_packages - len(all_rows)))
159
+ chunk_kwargs = {**base_kwargs, "start": start, "length": req_length}
160
+ chunk = search_method(**chunk_kwargs)
161
+ if not chunk:
162
+ break
163
+ all_rows.extend(chunk)
164
+ logger.info(
165
+ "Scrolled: %d rows from %s (TA %s)",
166
+ len(all_rows), lpse_host, tahun_anggaran,
167
+ )
168
+ if progress_callback:
169
+ progress_callback(
170
+ "scroll", len(all_rows), None,
171
+ f"Scrolled {len(all_rows)} packages from {lpse_host}",
172
+ )
173
+ start += len(chunk)
174
+ if len(chunk) < req_length:
175
+ break # partial page -> end of data
176
+
177
+ # Respect max_packages cap (only matters if SPSE returned more than requested)
178
+ to_process = len(all_rows) if unlimited else min(len(all_rows), max_packages)
179
+ logger.info(
180
+ "Will index %d packages from %s (%s, %s)",
181
+ to_process, lpse_host, package_type, tahun_anggaran,
182
+ )
183
+ if progress_callback:
184
+ progress_callback(
185
+ "index_start", 0, to_process,
186
+ f"Indexing {to_process} packages from {lpse_host}",
187
+ )
188
+
189
+ for idx, row in enumerate(all_rows[:to_process], start=1):
140
190
  id_paket = _package_id(row)
141
191
  if not id_paket:
142
192
  continue
143
193
  title = _package_title(row)
194
+ logger.info("[%d/%d] Indexing: %s", idx, to_process, title)
195
+ if progress_callback:
196
+ progress_callback(
197
+ "index_package", idx, to_process,
198
+ f"Indexing package {idx}/{to_process}: {title}",
199
+ )
144
200
  try:
145
201
  if rate_limit_callback:
146
202
  rate_limit_callback()
@@ -160,11 +216,24 @@ def create_procurement_search_index(
160
216
  indexed += 1
161
217
  except Exception:
162
218
  failed += 1
219
+ logger.warning(
220
+ "Failed to index package %s (%s)", id_paket, title,
221
+ )
163
222
  continue
164
223
  finally:
165
224
  db.commit()
166
225
  db.close()
167
226
 
227
+ logger.info(
228
+ "Index complete: %d indexed, %d failed from %s (%s, %s)",
229
+ indexed, failed, lpse_host, package_type, tahun_anggaran,
230
+ )
231
+ if progress_callback:
232
+ progress_callback(
233
+ "complete", indexed + failed, indexed + failed,
234
+ f"Index complete: {indexed} indexed, {failed} failed",
235
+ )
236
+
168
237
  return {
169
238
  **metadata,
170
239
  "path": str(path),
@@ -6,8 +6,10 @@ normalizes output, and returns MCP TextContent responses.
6
6
  This module auto-registers all tools with the server on import.
7
7
  """
8
8
 
9
+ import anyio
9
10
  import json
10
11
  import logging
12
+ import queue
11
13
  import time
12
14
 
13
15
  from mcp import types as mcp_types
@@ -42,7 +44,7 @@ from pyproc.mcp.schemas import (
42
44
  LIST_SEARCH_INDEXES_SCHEMA,
43
45
  DELETE_SEARCH_INDEX_SCHEMA,
44
46
  )
45
- from pyproc.mcp.server import register_tool, TIMEOUT, RATE_LIMIT_DELAY
47
+ from pyproc.mcp.server import register_tool, server, TIMEOUT, RATE_LIMIT_DELAY
46
48
  from pyproc.mcp.hosts import (
47
49
  HostMetadataError,
48
50
  search_lpse_hosts,
@@ -651,7 +653,10 @@ async def handle_validate_lpse_host(
651
653
  async def handle_create_procurement_search_index(
652
654
  name: str, arguments: dict
653
655
  ) -> list[mcp_types.TextContent]:
654
- """Create a bounded local SQLite FTS index from public procurement data."""
656
+ """Create a bounded local SQLite FTS index from public procurement data.
657
+
658
+ Sends MCP progress notifications when the client provides a progressToken.
659
+ """
655
660
  try:
656
661
  params = validate_search_index_create_params(arguments)
657
662
  except ValueError as exc:
@@ -665,12 +670,87 @@ async def handle_create_procurement_search_index(
665
670
  )
666
671
  params = dict(params)
667
672
  params.pop("confirm_download", None)
668
- output = create_procurement_search_index(
669
- timeout=TIMEOUT,
670
- rate_limit_callback=_rate_limit,
671
- **params,
672
- )
673
- return _make_json_response(output)
673
+
674
+ # ── check for MCP progress token ──────────────────────────────────────
675
+ progress_token = None
676
+ session = None
677
+ try:
678
+ ctx = server.request_context
679
+ if ctx.meta is not None:
680
+ progress_token = ctx.meta.progressToken
681
+ session = ctx.session
682
+ except LookupError:
683
+ pass # Not inside an MCP request (e.g., direct test call)
684
+
685
+ if progress_token is None or session is None:
686
+ # No progress support — call directly (preserves existing behaviour)
687
+ output = create_procurement_search_index(
688
+ timeout=TIMEOUT,
689
+ rate_limit_callback=_rate_limit,
690
+ **params,
691
+ )
692
+ return _make_json_response(output)
693
+
694
+ # ── progress requested — run indexing in worker thread, send notifications ──
695
+ progress_queue: queue.Queue = queue.Queue(maxsize=100)
696
+ _SENTINEL = object()
697
+
698
+ def _progress_callback(step: str, current: int, total: int | None,
699
+ message: str) -> None:
700
+ """Thread-safe: push progress data to queue (non-blocking)."""
701
+ try:
702
+ progress_queue.put_nowait((step, current, total, message))
703
+ except queue.Full:
704
+ pass # Non-critical; discard if queue is overwhelmed
705
+
706
+ result_container: list[dict] = []
707
+
708
+ async def _run_index() -> None:
709
+ """Run the synchronous indexing function in a worker thread."""
710
+ def _sync_index():
711
+ return create_procurement_search_index(
712
+ timeout=TIMEOUT,
713
+ rate_limit_callback=_rate_limit,
714
+ progress_callback=_progress_callback,
715
+ **params,
716
+ )
717
+ result = await anyio.to_thread.run_sync(_sync_index)
718
+ result_container.append(result)
719
+ progress_queue.put(_SENTINEL)
720
+
721
+ async def _drain_progress() -> None:
722
+ """Drain progress events from the queue and send MCP notifications."""
723
+ sentinel_seen = False
724
+ while not sentinel_seen:
725
+ # Drain all currently queued items without blocking
726
+ while True:
727
+ try:
728
+ item = progress_queue.get_nowait()
729
+ except queue.Empty:
730
+ break
731
+ if item is _SENTINEL:
732
+ sentinel_seen = True
733
+ break
734
+ _step, current, total, message = item
735
+ try:
736
+ await session.send_progress_notification(
737
+ progress_token=progress_token,
738
+ progress=float(current),
739
+ total=float(total) if total is not None else None,
740
+ message=message,
741
+ )
742
+ except Exception:
743
+ logger.debug(
744
+ "Failed to send progress notification", exc_info=True,
745
+ )
746
+ if not sentinel_seen:
747
+ await anyio.sleep(0.5)
748
+
749
+ async with anyio.create_task_group() as tg:
750
+ tg.start_soon(_drain_progress)
751
+ tg.start_soon(_run_index)
752
+
753
+ return _make_json_response(result_container[0])
674
754
 
675
755
 
676
756
  async def handle_search_procurement_index(
@@ -836,11 +916,14 @@ TOOL_DESCRIPTIONS = {
836
916
  "accessible. Useful before using a host with search or detail tools."
837
917
  ),
838
918
  "create_procurement_search_index": (
839
- "Create a bounded local SQLite full-text search index by downloading "
840
- "public package details for one LPSE host. Use only after the user "
841
- "chooses a broader local full-text search. This can make many SPSE "
842
- "requests, so keep max_packages small and prefer year/category/seed "
843
- "filters. Does not call the CLI."
919
+ "Create a local SQLite full-text search index by downloading "
920
+ "public package details for one LPSE host. By default downloads all "
921
+ "available packages (max_packages=0). Set a positive max_packages to "
922
+ "limit scope. Prefer year/category/seed filters to narrow results. "
923
+ "This can make many SPSE requests. "
924
+ "Progress notifications are sent during index creation when the "
925
+ "client supports the notifications/progress protocol. "
926
+ "Does not call the CLI."
844
927
  ),
845
928
  "search_procurement_index": (
846
929
  "Search a previously created local SQLite full-text procurement index. "
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "pyproc"
7
- version = "0.3"
7
+ version = "0.3.2"
8
8
  authors = [
9
9
  { name="Agung Pratama", email="workwithagung@gmail.com"}
10
10
  ]
@@ -250,6 +250,20 @@ class TestGetPaket(unittest.TestCase):
250
250
  'pl', 0, 5, False, None, None, None, By.KODE, None, False, None
251
251
  )
252
252
 
253
+ def test_get_paket_non_tender_accepts_rekanan(self):
254
+ self.lpse.get_paket = MagicMock(return_value={'data': []})
255
+ self.lpse.get_paket_non_tender(start=0, length=5, rekanan='PT A')
256
+ self.lpse.get_paket.assert_called_once_with(
257
+ 'pl', 0, 5, False, None, None, 'PT A', By.KODE, None, False, None
258
+ )
259
+
260
+ def test_get_paket_non_tender_accepts_legacy_nama_penyedia_alias(self):
261
+ self.lpse.get_paket = MagicMock(return_value={'data': []})
262
+ self.lpse.get_paket_non_tender(start=0, length=5, nama_penyedia='PT Legacy')
263
+ self.lpse.get_paket.assert_called_once_with(
264
+ 'pl', 0, 5, False, None, None, 'PT Legacy', By.KODE, None, False, None
265
+ )
266
+
253
267
  def test_get_paket_pencatatan_non_tender_calls_get_paket(self):
254
268
  self.lpse.get_paket = MagicMock(return_value={'data': []})
255
269
  self.lpse.get_paket_pencatatan_non_tender(start=0, length=5, rekanan='PT A')
@@ -329,7 +329,7 @@ class TestValidateSearchIndexParams(unittest.TestCase):
329
329
  })
330
330
  self.assertEqual(result["lpse_host"], "kemenkeu")
331
331
  self.assertEqual(result["package_type"], "tender")
332
- self.assertEqual(result["max_packages"], 100)
332
+ self.assertEqual(result["max_packages"], 0)
333
333
  self.assertTrue(result["confirm_download"])
334
334
 
335
335
  def test_create_index_params_full(self):
@@ -356,6 +356,33 @@ class TestValidateSearchIndexParams(unittest.TestCase):
356
356
  "confirm_download": True,
357
357
  })
358
358
 
359
+ def test_create_index_max_packages_large(self):
360
+ """max_packages above old 500 cap is no longer clamped."""
361
+ result = validate_search_index_create_params({
362
+ "lpse_host": "kemenkeu",
363
+ "max_packages": "5000",
364
+ "confirm_download": True,
365
+ })
366
+ self.assertEqual(result["max_packages"], 5000)
367
+
368
+ def test_create_index_max_packages_zero_means_all(self):
369
+ """max_packages=0 means download all."""
370
+ result = validate_search_index_create_params({
371
+ "lpse_host": "kemenkeu",
372
+ "max_packages": "0",
373
+ "confirm_download": True,
374
+ })
375
+ self.assertEqual(result["max_packages"], 0)
376
+
377
+ def test_create_index_max_packages_negative(self):
378
+ """Negative max_packages is floored to 0."""
379
+ result = validate_search_index_create_params({
380
+ "lpse_host": "kemenkeu",
381
+ "max_packages": "-5",
382
+ "confirm_download": True,
383
+ })
384
+ self.assertEqual(result["max_packages"], 0)
385
+
359
386
 
360
387
  class TestValidateMasterKlpdParams(unittest.TestCase):
361
388
 
@@ -0,0 +1,169 @@
1
+ """Tests for pyproc.mcp.search_index — local SQLite FTS indexes."""
2
+
3
+ import os
4
+ import tempfile
5
+ import unittest
6
+ from unittest.mock import MagicMock, patch
7
+
8
+
9
+ class TestSearchIndex(unittest.TestCase):
10
+
11
+ def test_create_search_and_delete_index(self):
12
+ from pyproc.mcp import search_index
13
+
14
+ with tempfile.TemporaryDirectory() as tmpdir:
15
+ mock_detail = MagicMock()
16
+ mock_detail.get_all_detil.return_value = {"error": False, "error_message": []}
17
+ mock_detail.todict.return_value = {
18
+ "id_paket": "100",
19
+ "pengumuman": {
20
+ "nama_tender": "Pengadaan Laptop",
21
+ "spesifikasi": "Laptop untuk pekerjaan kantor",
22
+ },
23
+ }
24
+
25
+ mock_lpse = MagicMock()
26
+ mock_lpse.get_paket_tender.return_value = [
27
+ ["100", "Pengadaan Laptop", "KEMENTERIAN KEUANGAN"],
28
+ ]
29
+ mock_lpse.detil_paket_tender.return_value = mock_detail
30
+ mock_lpse.__enter__ = MagicMock(return_value=mock_lpse)
31
+ mock_lpse.__exit__ = MagicMock(return_value=False)
32
+
33
+ with patch.dict(os.environ, {"PYPROC_MCP_INDEX_DIR": tmpdir}):
34
+ with patch("pyproc.mcp.search_index.Lpse", return_value=mock_lpse):
35
+ created = search_index.create_procurement_search_index(
36
+ lpse_host="kemenkeu",
37
+ package_type="tender",
38
+ tahun_anggaran=2025,
39
+ keyword_seed="laptop",
40
+ max_packages=1,
41
+ timeout=1,
42
+ )
43
+
44
+ self.assertEqual(created["indexed_packages"], 1)
45
+
46
+ result = search_index.search_procurement_index(
47
+ created["index_id"],
48
+ "laptop",
49
+ )
50
+ self.assertEqual(result["count"], 1)
51
+ self.assertEqual(result["matches"][0]["id_paket"], "100")
52
+
53
+ listed = search_index.list_procurement_indexes()
54
+ self.assertEqual(listed["count"], 1)
55
+
56
+ deleted = search_index.delete_procurement_index(created["index_id"])
57
+ self.assertTrue(deleted["deleted"])
58
+
59
+ def test_search_missing_index(self):
60
+ from pyproc.mcp import search_index
61
+
62
+ with tempfile.TemporaryDirectory() as tmpdir:
63
+ with patch.dict(os.environ, {"PYPROC_MCP_INDEX_DIR": tmpdir}):
64
+ with self.assertRaises(ValueError):
65
+ search_index.search_procurement_index("missing", "laptop")
66
+
67
+ def _make_mock_lpse(self, chunks, detail=None):
68
+ """Build a mock Lpse that returns paginated chunks from get_paket_tender."""
69
+ if detail is None:
70
+ detail = MagicMock()
71
+ detail.get_all_detil.return_value = {"error": False, "error_message": []}
72
+ detail.todict.return_value = {"id_paket": "100"}
73
+
74
+ mock_lpse = MagicMock()
75
+ mock_lpse.get_paket_tender.side_effect = list(chunks)
76
+ mock_lpse.detil_paket_tender.return_value = detail
77
+ mock_lpse.__enter__ = MagicMock(return_value=mock_lpse)
78
+ mock_lpse.__exit__ = MagicMock(return_value=False)
79
+ return mock_lpse
80
+
81
+ def _make_row(self, pkg_id, title="Test Package"):
82
+ return [str(pkg_id), title, "TEST INSTANSI"]
83
+
84
+ def test_pagination_multiple_chunks(self):
85
+ """All rows from multiple chunks are collected."""
86
+ from pyproc.mcp import search_index
87
+
88
+ chunk1 = [self._make_row(i) for i in range(1, 101)] # 100 rows
89
+ chunk2 = [self._make_row(i) for i in range(101, 201)] # 100 rows
90
+ chunk3 = [self._make_row(i) for i in range(201, 251)] # 50 rows (partial)
91
+ mock_lpse = self._make_mock_lpse([chunk1, chunk2, chunk3])
92
+
93
+ with tempfile.TemporaryDirectory() as tmpdir:
94
+ with patch.dict(os.environ, {"PYPROC_MCP_INDEX_DIR": tmpdir}):
95
+ with patch("pyproc.mcp.search_index.Lpse", return_value=mock_lpse):
96
+ created = search_index.create_procurement_search_index(
97
+ lpse_host="kemenkeu",
98
+ max_packages=0,
99
+ timeout=1,
100
+ )
101
+
102
+ self.assertEqual(created["indexed_packages"], 250)
103
+ self.assertEqual(created["failed_packages"], 0)
104
+ # Should have made exactly 3 requests
105
+ self.assertEqual(mock_lpse.get_paket_tender.call_count, 3)
106
+
107
+ def test_pagination_stops_on_empty(self):
108
+ """Pagination stops when SPSE returns an empty chunk after a full page."""
109
+ from pyproc.mcp import search_index
110
+
111
+ chunk1 = [self._make_row(i) for i in range(1, 101)] # 100 rows (full page)
112
+ chunk2 = [] # empty -> stop
113
+ mock_lpse = self._make_mock_lpse([chunk1, chunk2])
114
+
115
+ with tempfile.TemporaryDirectory() as tmpdir:
116
+ with patch.dict(os.environ, {"PYPROC_MCP_INDEX_DIR": tmpdir}):
117
+ with patch("pyproc.mcp.search_index.Lpse", return_value=mock_lpse):
118
+ created = search_index.create_procurement_search_index(
119
+ lpse_host="kemenkeu",
120
+ max_packages=0,
121
+ timeout=1,
122
+ )
123
+
124
+ self.assertEqual(created["indexed_packages"], 100)
125
+ self.assertEqual(mock_lpse.get_paket_tender.call_count, 2)
126
+
127
+ def test_pagination_respects_max_packages(self):
128
+ """max_packages > 0 caps total rows collected."""
129
+ from pyproc.mcp import search_index
130
+
131
+ chunk1 = [self._make_row(i) for i in range(1, 101)] # 100 rows
132
+ mock_lpse = self._make_mock_lpse([chunk1])
133
+
134
+ with tempfile.TemporaryDirectory() as tmpdir:
135
+ with patch.dict(os.environ, {"PYPROC_MCP_INDEX_DIR": tmpdir}):
136
+ with patch("pyproc.mcp.search_index.Lpse", return_value=mock_lpse):
137
+ created = search_index.create_procurement_search_index(
138
+ lpse_host="kemenkeu",
139
+ max_packages=30,
140
+ timeout=1,
141
+ )
142
+
143
+ # Should only index 30 (limited by max_packages)
144
+ self.assertEqual(created["indexed_packages"], 30)
145
+
146
+ def test_pagination_small_max_packages_single_request(self):
147
+ """max_packages <= CHUNK_SIZE makes a single request of that size."""
148
+ from pyproc.mcp import search_index
149
+
150
+ chunk = [self._make_row(i) for i in range(1, 51)]
151
+ mock_lpse = self._make_mock_lpse([chunk])
152
+
153
+ with tempfile.TemporaryDirectory() as tmpdir:
154
+ with patch.dict(os.environ, {"PYPROC_MCP_INDEX_DIR": tmpdir}):
155
+ with patch("pyproc.mcp.search_index.Lpse", return_value=mock_lpse):
156
+ created = search_index.create_procurement_search_index(
157
+ lpse_host="kemenkeu",
158
+ max_packages=50,
159
+ timeout=1,
160
+ )
161
+
162
+ self.assertEqual(created["indexed_packages"], 50)
163
+ # Single request with length=50
164
+ call_kwargs = mock_lpse.get_paket_tender.call_args[1]
165
+ self.assertEqual(call_kwargs["length"], 50)
166
+
167
+
168
+ if __name__ == "__main__":
169
+ unittest.main()
@@ -1,69 +0,0 @@
1
- """Tests for pyproc.mcp.search_index — local SQLite FTS indexes."""
2
-
3
- import os
4
- import tempfile
5
- import unittest
6
- from unittest.mock import MagicMock, patch
7
-
8
-
9
- class TestSearchIndex(unittest.TestCase):
10
-
11
- def test_create_search_and_delete_index(self):
12
- from pyproc.mcp import search_index
13
-
14
- with tempfile.TemporaryDirectory() as tmpdir:
15
- mock_detail = MagicMock()
16
- mock_detail.get_all_detil.return_value = {"error": False, "error_message": []}
17
- mock_detail.todict.return_value = {
18
- "id_paket": "100",
19
- "pengumuman": {
20
- "nama_tender": "Pengadaan Laptop",
21
- "spesifikasi": "Laptop untuk pekerjaan kantor",
22
- },
23
- }
24
-
25
- mock_lpse = MagicMock()
26
- mock_lpse.get_paket_tender.return_value = [
27
- ["100", "Pengadaan Laptop", "KEMENTERIAN KEUANGAN"],
28
- ]
29
- mock_lpse.detil_paket_tender.return_value = mock_detail
30
- mock_lpse.__enter__ = MagicMock(return_value=mock_lpse)
31
- mock_lpse.__exit__ = MagicMock(return_value=False)
32
-
33
- with patch.dict(os.environ, {"PYPROC_MCP_INDEX_DIR": tmpdir}):
34
- with patch("pyproc.mcp.search_index.Lpse", return_value=mock_lpse):
35
- created = search_index.create_procurement_search_index(
36
- lpse_host="kemenkeu",
37
- package_type="tender",
38
- tahun_anggaran=2025,
39
- keyword_seed="laptop",
40
- max_packages=1,
41
- timeout=1,
42
- )
43
-
44
- self.assertEqual(created["indexed_packages"], 1)
45
-
46
- result = search_index.search_procurement_index(
47
- created["index_id"],
48
- "laptop",
49
- )
50
- self.assertEqual(result["count"], 1)
51
- self.assertEqual(result["matches"][0]["id_paket"], "100")
52
-
53
- listed = search_index.list_procurement_indexes()
54
- self.assertEqual(listed["count"], 1)
55
-
56
- deleted = search_index.delete_procurement_index(created["index_id"])
57
- self.assertTrue(deleted["deleted"])
58
-
59
- def test_search_missing_index(self):
60
- from pyproc.mcp import search_index
61
-
62
- with tempfile.TemporaryDirectory() as tmpdir:
63
- with patch.dict(os.environ, {"PYPROC_MCP_INDEX_DIR": tmpdir}):
64
- with self.assertRaises(ValueError):
65
- search_index.search_procurement_index("missing", "laptop")
66
-
67
-
68
- if __name__ == "__main__":
69
- unittest.main()
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes