pyproc 0.3.1__tar.gz → 0.3.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. {pyproc-0.3.1 → pyproc-0.3.3}/.gitignore +1 -1
  2. {pyproc-0.3.1 → pyproc-0.3.3}/CHANGELOG.md +53 -0
  3. {pyproc-0.3.1 → pyproc-0.3.3}/PKG-INFO +105 -20
  4. {pyproc-0.3.1 → pyproc-0.3.3}/README.md +104 -19
  5. pyproc-0.3.3/plan/SEARCH_INDEX_LIMIT_REMOVAL_AND_PROGRESS.md +87 -0
  6. {pyproc-0.3.1 → pyproc-0.3.3}/pyproc/__init__.py +1 -1
  7. {pyproc-0.3.1 → pyproc-0.3.3}/pyproc/cli.py +342 -70
  8. {pyproc-0.3.1 → pyproc-0.3.3}/pyproc/lpse.py +101 -21
  9. pyproc-0.3.3/pyproc/mcp/parallel.py +183 -0
  10. {pyproc-0.3.1 → pyproc-0.3.3}/pyproc/mcp/schemas.py +150 -8
  11. {pyproc-0.3.1 → pyproc-0.3.3}/pyproc/mcp/search_index.py +169 -25
  12. {pyproc-0.3.1 → pyproc-0.3.3}/pyproc/mcp/server.py +4 -0
  13. {pyproc-0.3.1 → pyproc-0.3.3}/pyproc/mcp/tools.py +316 -48
  14. pyproc-0.3.3/pyproc/user_agents.py +123 -0
  15. {pyproc-0.3.1 → pyproc-0.3.3}/pyproject.toml +1 -1
  16. {pyproc-0.3.1 → pyproc-0.3.3}/tests/test_cli_unit.py +13 -0
  17. {pyproc-0.3.1 → pyproc-0.3.3}/tests/test_downloader.py +1 -1
  18. {pyproc-0.3.1 → pyproc-0.3.3}/tests/test_lpse_unit.py +181 -2
  19. {pyproc-0.3.1 → pyproc-0.3.3}/tests/test_mcp_schemas.py +107 -1
  20. pyproc-0.3.3/tests/test_mcp_search_index.py +214 -0
  21. {pyproc-0.3.1 → pyproc-0.3.3}/tests/test_mcp_tools.py +167 -37
  22. pyproc-0.3.3/tests/test_user_agents.py +63 -0
  23. pyproc-0.3.1/tests/test_mcp_search_index.py +0 -69
  24. {pyproc-0.3.1 → pyproc-0.3.3}/.github/workflows/pyproc-pypi.yml +0 -0
  25. {pyproc-0.3.1 → pyproc-0.3.3}/.github/workflows/test.yml +0 -0
  26. {pyproc-0.3.1 → pyproc-0.3.3}/LICENSE +0 -0
  27. {pyproc-0.3.1 → pyproc-0.3.3}/Makefile +0 -0
  28. {pyproc-0.3.1 → pyproc-0.3.3}/docs/assets/README_ASSET_PROMPTS.md +0 -0
  29. {pyproc-0.3.1 → pyproc-0.3.3}/docs/assets/logo.png +0 -0
  30. {pyproc-0.3.1 → pyproc-0.3.3}/docs/assets/pyproc-mcp-banner.png +0 -0
  31. {pyproc-0.3.1 → pyproc-0.3.3}/plan/API_AND_CLI_WRAPPER_OPTIMIZATION_PLAN.md +0 -0
  32. {pyproc-0.3.1 → pyproc-0.3.3}/plan/PYPROC_MCP_REBRAND_AND_IMPLEMENTATION_PLAN.md +0 -0
  33. {pyproc-0.3.1 → pyproc-0.3.3}/pyproc/cache.py +0 -0
  34. {pyproc-0.3.1 → pyproc-0.3.3}/pyproc/exceptions.py +0 -0
  35. {pyproc-0.3.1 → pyproc-0.3.3}/pyproc/mcp/__init__.py +0 -0
  36. {pyproc-0.3.1 → pyproc-0.3.3}/pyproc/mcp/errors.py +0 -0
  37. {pyproc-0.3.1 → pyproc-0.3.3}/pyproc/mcp/hosts.py +0 -0
  38. {pyproc-0.3.1 → pyproc-0.3.3}/pyproc/mcp/prompts.py +0 -0
  39. {pyproc-0.3.1 → pyproc-0.3.3}/pyproc/mcp/resources.py +0 -0
  40. {pyproc-0.3.1 → pyproc-0.3.3}/pyproc/text.py +0 -0
  41. {pyproc-0.3.1 → pyproc-0.3.3}/pyproc/utils.py +0 -0
  42. {pyproc-0.3.1 → pyproc-0.3.3}/tests/.gitignore +0 -0
  43. {pyproc-0.3.1 → pyproc-0.3.3}/tests/__init__.py +0 -0
  44. {pyproc-0.3.1 → pyproc-0.3.3}/tests/fixtures/README.md +0 -0
  45. {pyproc-0.3.1 → pyproc-0.3.3}/tests/fixtures/darurat_pemenang.html +0 -0
  46. {pyproc-0.3.1 → pyproc-0.3.3}/tests/fixtures/darurat_pengumuman.html +0 -0
  47. {pyproc-0.3.1 → pyproc-0.3.3}/tests/fixtures/dt_darurat_list.json +0 -0
  48. {pyproc-0.3.1 → pyproc-0.3.3}/tests/fixtures/dt_lelang.json +0 -0
  49. {pyproc-0.3.1 → pyproc-0.3.3}/tests/fixtures/dt_lelang_data_only.json +0 -0
  50. {pyproc-0.3.1 → pyproc-0.3.3}/tests/fixtures/dt_nonspk.json +0 -0
  51. {pyproc-0.3.1 → pyproc-0.3.3}/tests/fixtures/dt_swakelola.json +0 -0
  52. {pyproc-0.3.1 → pyproc-0.3.3}/tests/fixtures/error_page.html +0 -0
  53. {pyproc-0.3.1 → pyproc-0.3.3}/tests/fixtures/hasil_evaluasi.html +0 -0
  54. {pyproc-0.3.1 → pyproc-0.3.3}/tests/fixtures/jadwal.html +0 -0
  55. {pyproc-0.3.1 → pyproc-0.3.3}/tests/fixtures/lelang_page.html +0 -0
  56. {pyproc-0.3.1 → pyproc-0.3.3}/tests/fixtures/nonspk_pemenang_berkontrak.html +0 -0
  57. {pyproc-0.3.1 → pyproc-0.3.3}/tests/fixtures/nonspk_pemenang_bug_10941898000.html +0 -0
  58. {pyproc-0.3.1 → pyproc-0.3.3}/tests/fixtures/nonspk_pengumuman.html +0 -0
  59. {pyproc-0.3.1 → pyproc-0.3.3}/tests/fixtures/not_found_page.html +0 -0
  60. {pyproc-0.3.1 → pyproc-0.3.3}/tests/fixtures/pemenang.html +0 -0
  61. {pyproc-0.3.1 → pyproc-0.3.3}/tests/fixtures/pengumuman_lelang.html +0 -0
  62. {pyproc-0.3.1 → pyproc-0.3.3}/tests/fixtures/peserta.html +0 -0
  63. {pyproc-0.3.1 → pyproc-0.3.3}/tests/fixtures/swakelola_pelaksana.html +0 -0
  64. {pyproc-0.3.1 → pyproc-0.3.3}/tests/fixtures/swakelola_pengumuman.html +0 -0
  65. {pyproc-0.3.1 → pyproc-0.3.3}/tests/supporting_files/list-host-with-filename.txt +0 -0
  66. {pyproc-0.3.1 → pyproc-0.3.3}/tests/supporting_files/list-host.txt +0 -0
  67. {pyproc-0.3.1 → pyproc-0.3.3}/tests/test_cache.py +0 -0
  68. {pyproc-0.3.1 → pyproc-0.3.3}/tests/test_lpse.py +0 -0
  69. {pyproc-0.3.1 → pyproc-0.3.3}/tests/test_mcp_hosts.py +0 -0
  70. {pyproc-0.3.1 → pyproc-0.3.3}/tests/test_utils_unit.py +0 -0
@@ -54,4 +54,4 @@ coverage.xml
54
54
  .pytest_cache/
55
55
  *.csv
56
56
  *.idx
57
- statistic.txt
57
+ statistic.txt.claude/
@@ -4,6 +4,59 @@ All notable changes to this project will be documented in this file.
4
4
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
5
5
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
6
6
 
7
+ ## 0.3.2
8
+
9
+ ### Added
10
+ - [mcp] Tambah tool `get_master_klpd` untuk referensi K/L/PD dari LKPP ISB Satu Data
11
+ - [mcp] Tambah tool `get_master_lpse` untuk referensi LPSE dari LKPP ISB Satu Data
12
+ - [mcp] Tambah tool `get_tender_umum_publik` untuk unduh data tender dari ISB Satu Data (sumber alternatif)
13
+ - [mcp] Tambah tool `set_ssl_verify` untuk mengaktifkan/menonaktifkan verifikasi sertifikat TLS/SSL
14
+ - [mcp] Tambah tool `get_ssl_verify` untuk mengecek status verifikasi sertifikat TLS/SSL
15
+ - [mcp] Tambah notifikasi progress untuk `create_procurement_search_index`
16
+ - [cli] Subcommand `satudata` grouping `masterlpse` dan `tenderumum` untuk akses ISB Satu Data
17
+ - [env] Tambah variabel lingkungan `PYPROC_SSL_VERIFY` untuk konfigurasi SSL saat startup
18
+
19
+ ## 0.3.1
20
+
21
+ ### Changed
22
+ - [mcp] Hapus batas `max_packages` pada `create_procurement_search_index`, tambah pagination dan progress logging
23
+
24
+ ## 0.3
25
+
26
+ ### Added
27
+ - [mcp] Server MCP dengan dukungan penuh untuk MCP protocol (stdio transport)
28
+ - [mcp] Tool pencarian paket tender, non-tender, pencatatan, swakelola, dan darurat
29
+ - [mcp] Tool detil paket individu dan bulk (sampai 20 paket per panggilan)
30
+ - [mcp] Tool pencarian host LPSE berdasarkan nama instansi dan validasi host
31
+ - [mcp] Tool kategori pengadaan (tanpa panggilan jaringan)
32
+ - [mcp] Tool opsi pencarian (penjelasan strategi direct keyword vs local FTS index)
33
+ - [mcp] Local SQLite full-text search index (create, search, list, delete)
34
+ - [cli] Subcommand `masterklpd` untuk query referensi K/L/PD dari ISB API
35
+ - [cli] Subcommand `masterlpse` untuk browser LPSE interaktif dan unduh data tender
36
+ - [cli] Subcommand `tenderumum` untuk unduh data tender dari ISB API
37
+ - [library] Method `Lpse.get_master_klpd()`, `Lpse.get_master_lpse()`, `Lpse.get_tender_umum_publik()`
38
+ - [library] Dukungan context manager untuk class `Lpse`
39
+
40
+ ## 0.2
41
+
42
+ ### Added
43
+ - [cli] Subcommand `daftarhost` untuk unduh daftar host LPSE dalam format JSON dari GitHub Gist
44
+ - [cli] Argument `--instansi-id` untuk filter pencarian berdasarkan kode K/L/PD
45
+ - [cli] Argument `--tipe-swakelola-id` untuk filter tipe swakelola
46
+ - [library] Module `cache.py` untuk SQLite cache store dengan context manager
47
+ - [library] Parameter `verify` pada constructor `Lpse` untuk kontrol verifikasi SSL
48
+ - [test] Unit test framework dengan 102 test case (cache, CLI, API layer)
49
+
50
+ ### Changed
51
+ - [cli] Restruktur CLI dengan subcommand dispatch system, backward compatible
52
+ - [cli] Ganti flag `--non-tender` menjadi `--jenis-paket` dengan dukungan 5 tipe paket
53
+ - [build] Migrasi ke `pyproject.toml`, upgrade GitHub Actions
54
+
55
+ ### Fixed
56
+ - [downloader] Filter tahun anggaran pada infinite scrolling
57
+ - [downloader] Detil referer header
58
+ - [library] Method `get_paket` request method
59
+
7
60
  ## 0.1.7
8
61
 
9
62
  ### Fix
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pyproc
3
- Version: 0.3.1
3
+ Version: 0.3.3
4
4
  Summary: MCP tools for real-time Indonesian public procurement data from SPSE/Inaproc
5
5
  Project-URL: Homepage, https://github.com/wakataw/pyproc
6
6
  Project-URL: Issues, https://github.com/wakataw/pyproc/issues
@@ -36,7 +36,7 @@ Description-Content-Type: text/markdown
36
36
  # PyProc MCP
37
37
 
38
38
  <p align="center">
39
- <img src="docs/assets/logo.png"
39
+ <img src="https://raw.githubusercontent.com/wakataw/pyproc/remove-index-max-packages-cap/docs/assets/logo.png"
40
40
  alt="PyProc MCP — Real-time Indonesian procurement data for LLM agents"
41
41
  width="200">
42
42
  </p>
@@ -46,7 +46,7 @@ Description-Content-Type: text/markdown
46
46
  </p>
47
47
 
48
48
  <p align="center">
49
- <a href="https://pypi.org/project/pyproc/"><img src="https://img.shields.io/badge/version-v0.3.1-blue" alt="PyPI version"></a>
49
+ <a href="https://pypi.org/project/pyproc/"><img src="https://img.shields.io/badge/version-v0.3.3-blue" alt="PyPI version"></a>
50
50
  <a href="https://pypi.org/project/pyproc/"><img src="https://img.shields.io/badge/python-≥3.9-yellow" alt="Python ≥3.9"></a>
51
51
  <a href="https://github.com/wakataw/pyproc/actions/workflows/test.yml?query=branch%3Amaster"><img src="https://github.com/wakataw/pyproc/actions/workflows/test.yml/badge.svg?branch=master" alt="Test Status"></a>
52
52
  <a href="https://github.com/wakataw/pyproc/blob/master/LICENSE"><img src="https://img.shields.io/badge/license-MIT-green" alt="License: MIT"></a>
@@ -60,7 +60,7 @@ PyProc MCP turns public **SPSE/Inaproc** procurement data into **MCP tools** tha
60
60
  ## Why PyProc MCP?
61
61
 
62
62
  <p align="center">
63
- <img src="docs/assets/pyproc-mcp-banner.png"
63
+ <img src="https://raw.githubusercontent.com/wakataw/pyproc/remove-index-max-packages-cap/docs/assets/pyproc-mcp-banner.png"
64
64
  alt="PyProc MCP — Real-time Indonesian procurement data for LLM agents"
65
65
  width="800">
66
66
  </p>
@@ -160,18 +160,20 @@ Download procurement data in bulk from the command line.
160
160
 
161
161
  ```bash
162
162
  # Download tender data from Kemenkeu LPSE, export as JSON
163
- pyproc kemenkeu --keyword "mobil dinas" --tahun-anggaran 2025 --output-format json
163
+ pyproc spse kemenkeu --keyword "mobil dinas" --tahun-anggaran 2025 --output-format json
164
164
 
165
165
  # Download non-tender data from multiple LPSE hosts
166
- pyproc jakarta,sumbarprov --jenis-paket non_tender --tahun-anggaran 2025
166
+ pyproc spse jakarta,sumbarprov --jenis-paket non_tender --tahun-anggaran 2025
167
167
 
168
168
  # Download pencatatan non-tender data. This is distinct from non_tender.
169
- pyproc nasional --jenis-paket pencatatan_non_tender --tahun-anggaran 2026
169
+ pyproc spse nasional --jenis-paket pencatatan_non_tender --tahun-anggaran 2026
170
170
 
171
171
  # Download with custom output filename
172
- pyproc "kemenkeu;output_kemenkeu" --output-format csv --separator ";"
172
+ pyproc spse "kemenkeu;output_kemenkeu" --output-format csv --separator ";"
173
173
  ```
174
174
 
175
+ The `spse` subcommand is the default, so `pyproc kemenkeu` is shorthand for `pyproc spse kemenkeu`.
176
+
175
177
  See [CLI Usage](#cli-usage) for the full argument reference.
176
178
 
177
179
  ---
@@ -201,6 +203,11 @@ The MCP server exposes the following tools:
201
203
  | `get_swakelola_details_bulk` | Get details for multiple swakelola packages |
202
204
  | `get_pengadaan_darurat_details_bulk` | Get details for multiple pengadaan darurat packages |
203
205
  | `get_procurement_categories` | List supported procurement categories (no network call) |
206
+ | `get_master_klpd` | Get LKPP Satu Data master K/L/PD references (alternative data source) |
207
+ | `get_master_lpse` | Get LKPP Satu Data master LPSE references (alternative data source) |
208
+ | `get_tender_umum_publik` | Get tender data from LKPP ISB Satu Data (alternative data source / fallback) |
209
+ | `set_ssl_verify` | Enable/disable TLS/SSL certificate verification for all SPSE requests |
210
+ | `get_ssl_verify` | Return current SSL verification setting |
204
211
  | `validate_lpse_host` | Check if an LPSE host is accessible |
205
212
  | `create_procurement_search_index` | Download a bounded package set into a local SQLite full-text index |
206
213
  | `search_procurement_index` | Search a local SQLite full-text index |
@@ -253,6 +260,24 @@ The LLM should start with direct keyword search. If results are weak or the user
253
260
 
254
261
  ---
255
262
 
263
+ ## ISB Satu Data API (Alternative Data Source)
264
+
265
+ In addition to direct realtime SPSE/Inaproc queries, PyProc also integrates with the **LKPP ISB (Indonesia Satu Data) API**. This is a curated, government-maintained dataset that provides:
266
+
267
+ - **Master LPSE** — reference list of all LPSE units with their codes and names
268
+ - **Master K/L/PD** — reference list of all government institutions (Kementerian, Lembaga, Perangkat Daerah)
269
+ - **Tender Umum Publik** — public tender data indexed by LPSE code and budget year
270
+
271
+ The ISB Satu Data API serves as an **alternative data source** when:
272
+
273
+ - Direct SPSE/Inaproc queries fail or return errors (e.g., server timeouts, blocked hosts)
274
+ - You need reference data (LPSE codes, institution codes) before making SPSE queries
275
+ - The user explicitly wants the curated ISB dataset rather than realtime SPSE data
276
+
277
+ Use the `satudata` CLI subcommand or the `get_master_lpse`, `get_master_klpd`, and `get_tender_umum_publik` MCP tools to access ISB data.
278
+
279
+ ---
280
+
256
281
  ## Example LLM Workflows
257
282
 
258
283
  Here are examples of what you can ask an LLM when PyProc MCP tools are connected:
@@ -317,6 +342,7 @@ The MCP server is configured via environment variables:
317
342
  | `PYPROC_TIMEOUT` | `30` | HTTP request timeout in seconds |
318
343
  | `PYPROC_RATE_LIMIT_DELAY` | `1.0` | Minimum seconds between requests |
319
344
  | `PYPROC_LOG_LEVEL` | `INFO` | Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL) |
345
+ | `PYPROC_SSL_VERIFY` | `0` | Enable SSL/TLS certificate verification (`1`, `true`, `yes`, `on` to enable) |
320
346
 
321
347
  ---
322
348
 
@@ -468,7 +494,9 @@ JenisPengadaan.JASA_LAINNYA # Other Services
468
494
 
469
495
  ## CLI Usage
470
496
 
471
- ### Basic Download
497
+ ### SPSE (Direct Realtime Data)
498
+
499
+ Download procurement data directly from SPSE/Inaproc servers. The `spse` subcommand is the default.
472
500
 
473
501
  ```bash
474
502
  pyproc kemenkeu
@@ -480,7 +508,7 @@ This downloads tender data from the Kemenkeu LPSE and exports it as `kemenkeu.cs
480
508
 
481
509
  | Argument | Example | Default | Description |
482
510
  |---|---|---|---|
483
- | `lpse_host` | `pyproc kemenkeu` | Required | LPSE host or text file with host list |
511
+ | `lpse_host` | `pyproc spse kemenkeu` | Required | LPSE host or text file with host list |
484
512
  | `-k, --keyword` | `--keyword "mobil dinas"` | `""` | Search keyword filter |
485
513
  | `-t, --tahun-anggaran` | `--tahun-anggaran 2025` | Current year | Budget year (single, comma-separated, or range) |
486
514
  | `--kategori` | `--kategori PEKERJAAN_KONSTRUKSI` | None | Procurement category |
@@ -501,10 +529,10 @@ This downloads tender data from the Kemenkeu LPSE and exports it as `kemenkeu.cs
501
529
 
502
530
  ```bash
503
531
  # Download from multiple LPSE hosts
504
- pyproc jakarta,kemenkeu,sumbarprov
532
+ pyproc spse jakarta,kemenkeu,sumbarprov
505
533
 
506
534
  # With custom output filenames
507
- pyproc "jakarta;file_jakarta,kemenkeu;file_kemenkeu"
535
+ pyproc spse "jakarta;file_jakarta,kemenkeu;file_kemenkeu"
508
536
  ```
509
537
 
510
538
  ### Download Host List
@@ -517,6 +545,71 @@ pyproc daftarlpse
517
545
  pyproc daftarhost
518
546
  ```
519
547
 
548
+ ### Master KLPD
549
+
550
+ Query K/L/PD reference data from the LKPP ISB Satu Data API:
551
+
552
+ ```bash
553
+ # Show all K/L/PD references
554
+ pyproc masterklpd
555
+
556
+ # Search by institution name or code
557
+ pyproc masterklpd --query kemenkeu
558
+
559
+ # Filter by jenis (K, L, PD)
560
+ pyproc masterklpd --jenis K
561
+
562
+ # Get specific K/L/PD by code
563
+ pyproc masterklpd --kd-klpd K68
564
+
565
+ # Limit results
566
+ pyproc masterklpd --limit 5
567
+ ```
568
+
569
+ ### Satu Data (Alternative Source)
570
+
571
+ Access tender data from the LKPP ISB Satu Data API as an alternative to direct realtime SPSE queries.
572
+
573
+ #### Master LPSE (Interactive Browser)
574
+
575
+ ```bash
576
+ # Launch interactive LPSE browser
577
+ pyproc satudata masterlpse
578
+
579
+ # With custom timeout
580
+ pyproc satudata masterlpse --timeout 60
581
+
582
+ # Specify output format and file
583
+ pyproc satudata masterlpse --output csv --output-file hasil.csv
584
+ ```
585
+
586
+ #### Tender Umum Publik (Direct Download)
587
+
588
+ Download tender data for a specific LPSE code and budget year:
589
+
590
+ ```bash
591
+ # Download tender data for a specific LPSE and year
592
+ pyproc satudata tenderumum --kode-lpse 119 --tahun-anggaran 2026
593
+
594
+ # With custom options
595
+ pyproc satudata tenderumum --kode-lpse 119 --tahun-anggaran 2026 --timeout 60 --output json
596
+ ```
597
+
598
+ **Required arguments for `tenderumum`**:
599
+
600
+ | Argument | Example | Description |
601
+ |---|---|---|
602
+ | `--tahun-anggaran` | `2026` | Budget year |
603
+ | `--kode-lpse` | `119` | LPSE code from master LPSE |
604
+
605
+ **Optional arguments**:
606
+
607
+ | Argument | Default | Description |
608
+ |---|---|---|
609
+ | `--timeout` | `30` | Request timeout in seconds |
610
+ | `--output` | `json` | Output format: `json` or `csv` |
611
+ | `--output-file` | Auto-generated | Output file path |
612
+
520
613
  ---
521
614
 
522
615
  ## Responsible Use and Disclaimer
@@ -582,11 +675,3 @@ pyproc/
582
675
  ## License
583
676
 
584
677
  MIT License. See [LICENSE](LICENSE) for details.
585
-
586
- ---
587
-
588
- ## Donatur ☕️
589
-
590
- Orang-orang yang berjasa menyediakan kopi sehingga pengembangan paket tetap berjalan:
591
-
592
- - Angga Rinaldi Rizal (50 cangkir ☕️)
@@ -1,7 +1,7 @@
1
1
  # PyProc MCP
2
2
 
3
3
  <p align="center">
4
- <img src="docs/assets/logo.png"
4
+ <img src="https://raw.githubusercontent.com/wakataw/pyproc/remove-index-max-packages-cap/docs/assets/logo.png"
5
5
  alt="PyProc MCP — Real-time Indonesian procurement data for LLM agents"
6
6
  width="200">
7
7
  </p>
@@ -11,7 +11,7 @@
11
11
  </p>
12
12
 
13
13
  <p align="center">
14
- <a href="https://pypi.org/project/pyproc/"><img src="https://img.shields.io/badge/version-v0.3.1-blue" alt="PyPI version"></a>
14
+ <a href="https://pypi.org/project/pyproc/"><img src="https://img.shields.io/badge/version-v0.3.3-blue" alt="PyPI version"></a>
15
15
  <a href="https://pypi.org/project/pyproc/"><img src="https://img.shields.io/badge/python-≥3.9-yellow" alt="Python ≥3.9"></a>
16
16
  <a href="https://github.com/wakataw/pyproc/actions/workflows/test.yml?query=branch%3Amaster"><img src="https://github.com/wakataw/pyproc/actions/workflows/test.yml/badge.svg?branch=master" alt="Test Status"></a>
17
17
  <a href="https://github.com/wakataw/pyproc/blob/master/LICENSE"><img src="https://img.shields.io/badge/license-MIT-green" alt="License: MIT"></a>
@@ -25,7 +25,7 @@ PyProc MCP turns public **SPSE/Inaproc** procurement data into **MCP tools** tha
25
25
  ## Why PyProc MCP?
26
26
 
27
27
  <p align="center">
28
- <img src="docs/assets/pyproc-mcp-banner.png"
28
+ <img src="https://raw.githubusercontent.com/wakataw/pyproc/remove-index-max-packages-cap/docs/assets/pyproc-mcp-banner.png"
29
29
  alt="PyProc MCP — Real-time Indonesian procurement data for LLM agents"
30
30
  width="800">
31
31
  </p>
@@ -125,18 +125,20 @@ Download procurement data in bulk from the command line.
125
125
 
126
126
  ```bash
127
127
  # Download tender data from Kemenkeu LPSE, export as JSON
128
- pyproc kemenkeu --keyword "mobil dinas" --tahun-anggaran 2025 --output-format json
128
+ pyproc spse kemenkeu --keyword "mobil dinas" --tahun-anggaran 2025 --output-format json
129
129
 
130
130
  # Download non-tender data from multiple LPSE hosts
131
- pyproc jakarta,sumbarprov --jenis-paket non_tender --tahun-anggaran 2025
131
+ pyproc spse jakarta,sumbarprov --jenis-paket non_tender --tahun-anggaran 2025
132
132
 
133
133
  # Download pencatatan non-tender data. This is distinct from non_tender.
134
- pyproc nasional --jenis-paket pencatatan_non_tender --tahun-anggaran 2026
134
+ pyproc spse nasional --jenis-paket pencatatan_non_tender --tahun-anggaran 2026
135
135
 
136
136
  # Download with custom output filename
137
- pyproc "kemenkeu;output_kemenkeu" --output-format csv --separator ";"
137
+ pyproc spse "kemenkeu;output_kemenkeu" --output-format csv --separator ";"
138
138
  ```
139
139
 
140
+ The `spse` subcommand is the default, so `pyproc kemenkeu` is shorthand for `pyproc spse kemenkeu`.
141
+
140
142
  See [CLI Usage](#cli-usage) for the full argument reference.
141
143
 
142
144
  ---
@@ -166,6 +168,11 @@ The MCP server exposes the following tools:
166
168
  | `get_swakelola_details_bulk` | Get details for multiple swakelola packages |
167
169
  | `get_pengadaan_darurat_details_bulk` | Get details for multiple pengadaan darurat packages |
168
170
  | `get_procurement_categories` | List supported procurement categories (no network call) |
171
+ | `get_master_klpd` | Get LKPP Satu Data master K/L/PD references (alternative data source) |
172
+ | `get_master_lpse` | Get LKPP Satu Data master LPSE references (alternative data source) |
173
+ | `get_tender_umum_publik` | Get tender data from LKPP ISB Satu Data (alternative data source / fallback) |
174
+ | `set_ssl_verify` | Enable/disable TLS/SSL certificate verification for all SPSE requests |
175
+ | `get_ssl_verify` | Return current SSL verification setting |
169
176
  | `validate_lpse_host` | Check if an LPSE host is accessible |
170
177
  | `create_procurement_search_index` | Download a bounded package set into a local SQLite full-text index |
171
178
  | `search_procurement_index` | Search a local SQLite full-text index |
@@ -218,6 +225,24 @@ The LLM should start with direct keyword search. If results are weak or the user
218
225
 
219
226
  ---
220
227
 
228
+ ## ISB Satu Data API (Alternative Data Source)
229
+
230
+ In addition to direct realtime SPSE/Inaproc queries, PyProc also integrates with the **LKPP ISB (Indonesia Satu Data) API**. This is a curated, government-maintained dataset that provides:
231
+
232
+ - **Master LPSE** — reference list of all LPSE units with their codes and names
233
+ - **Master K/L/PD** — reference list of all government institutions (Kementerian, Lembaga, Perangkat Daerah)
234
+ - **Tender Umum Publik** — public tender data indexed by LPSE code and budget year
235
+
236
+ The ISB Satu Data API serves as an **alternative data source** when:
237
+
238
+ - Direct SPSE/Inaproc queries fail or return errors (e.g., server timeouts, blocked hosts)
239
+ - You need reference data (LPSE codes, institution codes) before making SPSE queries
240
+ - The user explicitly wants the curated ISB dataset rather than realtime SPSE data
241
+
242
+ Use the `satudata` CLI subcommand or the `get_master_lpse`, `get_master_klpd`, and `get_tender_umum_publik` MCP tools to access ISB data.
243
+
244
+ ---
245
+
221
246
  ## Example LLM Workflows
222
247
 
223
248
  Here are examples of what you can ask an LLM when PyProc MCP tools are connected:
@@ -282,6 +307,7 @@ The MCP server is configured via environment variables:
282
307
  | `PYPROC_TIMEOUT` | `30` | HTTP request timeout in seconds |
283
308
  | `PYPROC_RATE_LIMIT_DELAY` | `1.0` | Minimum seconds between requests |
284
309
  | `PYPROC_LOG_LEVEL` | `INFO` | Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL) |
310
+ | `PYPROC_SSL_VERIFY` | `0` | Enable SSL/TLS certificate verification (`1`, `true`, `yes`, `on` to enable) |
285
311
 
286
312
  ---
287
313
 
@@ -433,7 +459,9 @@ JenisPengadaan.JASA_LAINNYA # Other Services
433
459
 
434
460
  ## CLI Usage
435
461
 
436
- ### Basic Download
462
+ ### SPSE (Direct Realtime Data)
463
+
464
+ Download procurement data directly from SPSE/Inaproc servers. The `spse` subcommand is the default.
437
465
 
438
466
  ```bash
439
467
  pyproc kemenkeu
@@ -445,7 +473,7 @@ This downloads tender data from the Kemenkeu LPSE and exports it as `kemenkeu.cs
445
473
 
446
474
  | Argument | Example | Default | Description |
447
475
  |---|---|---|---|
448
- | `lpse_host` | `pyproc kemenkeu` | Required | LPSE host or text file with host list |
476
+ | `lpse_host` | `pyproc spse kemenkeu` | Required | LPSE host or text file with host list |
449
477
  | `-k, --keyword` | `--keyword "mobil dinas"` | `""` | Search keyword filter |
450
478
  | `-t, --tahun-anggaran` | `--tahun-anggaran 2025` | Current year | Budget year (single, comma-separated, or range) |
451
479
  | `--kategori` | `--kategori PEKERJAAN_KONSTRUKSI` | None | Procurement category |
@@ -466,10 +494,10 @@ This downloads tender data from the Kemenkeu LPSE and exports it as `kemenkeu.cs
466
494
 
467
495
  ```bash
468
496
  # Download from multiple LPSE hosts
469
- pyproc jakarta,kemenkeu,sumbarprov
497
+ pyproc spse jakarta,kemenkeu,sumbarprov
470
498
 
471
499
  # With custom output filenames
472
- pyproc "jakarta;file_jakarta,kemenkeu;file_kemenkeu"
500
+ pyproc spse "jakarta;file_jakarta,kemenkeu;file_kemenkeu"
473
501
  ```
474
502
 
475
503
  ### Download Host List
@@ -482,6 +510,71 @@ pyproc daftarlpse
482
510
  pyproc daftarhost
483
511
  ```
484
512
 
513
+ ### Master KLPD
514
+
515
+ Query K/L/PD reference data from the LKPP ISB Satu Data API:
516
+
517
+ ```bash
518
+ # Show all K/L/PD references
519
+ pyproc masterklpd
520
+
521
+ # Search by institution name or code
522
+ pyproc masterklpd --query kemenkeu
523
+
524
+ # Filter by jenis (K, L, PD)
525
+ pyproc masterklpd --jenis K
526
+
527
+ # Get specific K/L/PD by code
528
+ pyproc masterklpd --kd-klpd K68
529
+
530
+ # Limit results
531
+ pyproc masterklpd --limit 5
532
+ ```
533
+
534
+ ### Satu Data (Alternative Source)
535
+
536
+ Access tender data from the LKPP ISB Satu Data API as an alternative to direct realtime SPSE queries.
537
+
538
+ #### Master LPSE (Interactive Browser)
539
+
540
+ ```bash
541
+ # Launch interactive LPSE browser
542
+ pyproc satudata masterlpse
543
+
544
+ # With custom timeout
545
+ pyproc satudata masterlpse --timeout 60
546
+
547
+ # Specify output format and file
548
+ pyproc satudata masterlpse --output csv --output-file hasil.csv
549
+ ```
550
+
551
+ #### Tender Umum Publik (Direct Download)
552
+
553
+ Download tender data for a specific LPSE code and budget year:
554
+
555
+ ```bash
556
+ # Download tender data for a specific LPSE and year
557
+ pyproc satudata tenderumum --kode-lpse 119 --tahun-anggaran 2026
558
+
559
+ # With custom options
560
+ pyproc satudata tenderumum --kode-lpse 119 --tahun-anggaran 2026 --timeout 60 --output json
561
+ ```
562
+
563
+ **Required arguments for `tenderumum`**:
564
+
565
+ | Argument | Example | Description |
566
+ |---|---|---|
567
+ | `--tahun-anggaran` | `2026` | Budget year |
568
+ | `--kode-lpse` | `119` | LPSE code from master LPSE |
569
+
570
+ **Optional arguments**:
571
+
572
+ | Argument | Default | Description |
573
+ |---|---|---|
574
+ | `--timeout` | `30` | Request timeout in seconds |
575
+ | `--output` | `json` | Output format: `json` or `csv` |
576
+ | `--output-file` | Auto-generated | Output file path |
577
+
485
578
  ---
486
579
 
487
580
  ## Responsible Use and Disclaimer
@@ -547,11 +640,3 @@ pyproc/
547
640
  ## License
548
641
 
549
642
  MIT License. See [LICENSE](LICENSE) for details.
550
-
551
- ---
552
-
553
- ## Donatur ☕️
554
-
555
- Orang-orang yang berjasa menyediakan kopi sehingga pengembangan paket tetap berjalan:
556
-
557
- - Angga Rinaldi Rizal (50 cangkir ☕️)
@@ -0,0 +1,87 @@
1
+ # Plan: Remove max_packages cap, default to all data, paginate at 100/req, add progress
2
+
3
+ ## Context
4
+
5
+ The `create_procurement_search_index` MCP tool has `MAX_INDEX_PACKAGES = 500` capping the **total** number of packages that can be downloaded. Remove this cap so users can download all available data for a host/year. Default behavior becomes "download all available data." Per-request batch size stays fixed at 100 (internal, not exposed). Progress is reported as a running count via stderr logging — no percentage since we don't rely on `recordsTotal`/`recordsFiltered`.
6
+
7
+ ## Parameter design
8
+
9
+ | Parameter | Type | Default | Description |
10
+ |---|---|---|---|
11
+ | `max_packages` | int | `0` | `0` = download all (paginate until exhausted). Positive value caps the total. No upper bound. |
12
+ | Batch size | int (internal) | `100` | Fixed `CHUNK_SIZE`, not a user-facing parameter. Matches CLI `--chunk-size` default. |
13
+
14
+ ## Changes
15
+
16
+ ### 1. `pyproc/mcp/schemas.py`
17
+
18
+ - Delete `MAX_INDEX_PACKAGES = 500` (line 33)
19
+ - Change `DEFAULT_INDEX_MAX_PACKAGES = 100` → `DEFAULT_INDEX_MAX_PACKAGES = 0` (0 = all)
20
+ - Line 410: `max(1, min(max_packages, MAX_INDEX_PACKAGES))` → `max(0, max_packages)` (0 means unlimited)
21
+ - `CREATE_SEARCH_INDEX_SCHEMA` (lines 952-960):
22
+ - Remove `"maximum": MAX_INDEX_PACKAGES`
23
+ - Update description to `"Maximum packages to download. 0 = all available. Default 0."`
24
+
25
+ ### 2. `pyproc/mcp/search_index.py`
26
+
27
+ - Add `import logging` → `logger = logging.getLogger(__name__)`
28
+ - Add `CHUNK_SIZE = 100`
29
+
30
+ Replace the single-request block (lines 127-139) with a pagination loop:
31
+
32
+ ```python
33
+ CHUNK_SIZE = 100
34
+ all_rows = []
35
+ start = 0
36
+ limit = max_packages if max_packages > 0 else float('inf')
37
+
38
+ while len(all_rows) < limit:
39
+ remaining = limit - len(all_rows)
40
+ req_length = min(CHUNK_SIZE, int(remaining))
41
+ chunk_kwargs = {**kwargs, "start": start, "length": req_length}
42
+ chunk = search_method(**chunk_kwargs)
43
+ if not chunk:
44
+ break
45
+ all_rows.extend(chunk)
46
+ logger.info("Scrolled: %d rows from %s (TA %s)", len(all_rows), lpse_host, tahun_anggaran)
47
+ start += len(chunk)
48
+ if len(chunk) < req_length:
49
+ break # partial page → end of data
50
+
51
+ to_process = len(all_rows)
52
+ logger.info("Will index %d packages from %s (%s, %s)", to_process, lpse_host, package_type, tahun_anggaran)
53
+ ```
54
+
55
+ Progress logging in the detail loop:
56
+
57
+ ```python
58
+ for idx, row in enumerate(all_rows, start=1):
59
+ title = _package_title(row)
60
+ logger.info("[%d/%d] Indexing: %s", idx, to_process, title)
61
+ # ... existing fetch + index + rate_limit ...
62
+ ```
63
+
64
+ ### 3. `pyproc/mcp/tools.py`
65
+
66
+ - Update tool description string to mention "0 = all" default
67
+ - No code changes needed — progress is handled in `search_index.py`'s logger
68
+
69
+ ### 4. Tests
70
+
71
+ **`tests/test_mcp_schemas.py`** (`TestValidateSearchIndexParams`):
72
+ - Default `max_packages` → 0 (all)
73
+ - `max_packages=5000` → 5000 (not clamped)
74
+ - `max_packages=-1` → 0 (floor at 0)
75
+
76
+ **`tests/test_mcp_search_index.py`** (`TestSearchIndex`):
77
+ - Pagination: mock returns [100, 100, 50] → all 250 collected
78
+ - Stops on empty chunk
79
+ - Stops on partial chunk (< requested length)
80
+ - Respects max_packages limit when positive
81
+
82
+ ## Verification
83
+
84
+ ```bash
85
+ python -m pytest tests/test_mcp_schemas.py::TestValidateSearchIndexParams tests/test_mcp_search_index.py -v
86
+ python -m pytest tests/ --ignore=tests/test_lpse.py --ignore=tests/test_downloader.py -v
87
+ ```
@@ -1,6 +1,6 @@
1
1
  from .lpse import Lpse, JenisPengadaan, TipeSwakelola, By, KontrakStatus
2
2
 
3
- __version__ = '0.3.1'
3
+ __version__ = '0.3.3'
4
4
  __author__ = 'Agung Pratama'
5
5
  __all__ = [
6
6
  'Lpse',