pyproc 0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyproc/__init__.py +10 -0
- pyproc/cli.py +807 -0
- pyproc/exceptions.py +18 -0
- pyproc/lpse.py +757 -0
- pyproc/text.py +43 -0
- pyproc/utils.py +90 -0
- pyproc-0.2.dist-info/METADATA +210 -0
- pyproc-0.2.dist-info/RECORD +11 -0
- pyproc-0.2.dist-info/WHEEL +4 -0
- pyproc-0.2.dist-info/entry_points.txt +2 -0
- pyproc-0.2.dist-info/licenses/LICENSE +21 -0
pyproc/cli.py
ADDED
|
@@ -0,0 +1,807 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import csv
|
|
3
|
+
import re
|
|
4
|
+
import logging
|
|
5
|
+
import signal
|
|
6
|
+
import sqlite3
|
|
7
|
+
import threading
|
|
8
|
+
import requests
|
|
9
|
+
import pyproc
|
|
10
|
+
import json
|
|
11
|
+
from time import sleep
|
|
12
|
+
from .exceptions import DownloaderContextException
|
|
13
|
+
from . import text
|
|
14
|
+
from datetime import datetime
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
from urllib3.exceptions import InsecureRequestWarning
|
|
17
|
+
from urllib3 import disable_warnings
|
|
18
|
+
|
|
19
|
+
disable_warnings(InsecureRequestWarning)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def set_up_log(level):
|
|
23
|
+
"""
|
|
24
|
+
Set log level berdasarkan argumen yang diberikan user
|
|
25
|
+
:param level:
|
|
26
|
+
:return:
|
|
27
|
+
"""
|
|
28
|
+
numeric_level = getattr(logging, level.upper(), None)
|
|
29
|
+
if not isinstance(numeric_level, int):
|
|
30
|
+
raise ValueError('Invalid log level: {}'.format(level))
|
|
31
|
+
|
|
32
|
+
logging.basicConfig(level=numeric_level, format='[%(asctime)s %(levelname)s] %(message)s')
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def check_new_version():
|
|
36
|
+
resp = requests.get('https://pypi.org/pypi/pyproc/json').json()
|
|
37
|
+
current_version = pyproc.__version__
|
|
38
|
+
pypi_version = resp['info']['version']
|
|
39
|
+
status = current_version != pypi_version
|
|
40
|
+
|
|
41
|
+
return status, current_version, pypi_version
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class IWillFindYouAndIWillKillYou:
|
|
45
|
+
def __init__(self):
|
|
46
|
+
signal.signal(signal.SIGINT, self.exit_gracefully)
|
|
47
|
+
signal.signal(signal.SIGTERM, self.exit_gracefully)
|
|
48
|
+
|
|
49
|
+
def exit_gracefully(self, *args):
|
|
50
|
+
logging.debug("Get {} signal".format(args))
|
|
51
|
+
logging.error("Proses dibatalkan user")
|
|
52
|
+
exit(1)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class LpseHost(object):
|
|
56
|
+
|
|
57
|
+
def __init__(self, args):
|
|
58
|
+
self.is_valid = False
|
|
59
|
+
self.error = None
|
|
60
|
+
self.url, self.filename = self.parse_host(args)
|
|
61
|
+
|
|
62
|
+
def parse_host(self, args):
|
|
63
|
+
url_and_filename = args.split(';')
|
|
64
|
+
logging.debug("Url dan Filename {}".format(url_and_filename))
|
|
65
|
+
|
|
66
|
+
# cek jika hasil split lebih < 1 atau lebih dari 2
|
|
67
|
+
if len(url_and_filename) < 1 or len(url_and_filename) > 2:
|
|
68
|
+
self.error = text.ERROR_CTX_HOST_FORMAT.format(args)
|
|
69
|
+
return None, None
|
|
70
|
+
|
|
71
|
+
# split url dan filename, jika filename tidak disediakan, generate filename berdasarkan hostname
|
|
72
|
+
url = url_and_filename[0]
|
|
73
|
+
try:
|
|
74
|
+
filename = url_and_filename[1]
|
|
75
|
+
except IndexError:
|
|
76
|
+
filename = '_'.join(re.findall(r'([a-z0-9]+)', url.lower()))
|
|
77
|
+
|
|
78
|
+
# set host is valid
|
|
79
|
+
self.is_valid = True
|
|
80
|
+
|
|
81
|
+
logging.debug("Hasil parsing {} & {}".format(url, filename))
|
|
82
|
+
return [url, Path.cwd() / filename]
|
|
83
|
+
|
|
84
|
+
def __str__(self):
|
|
85
|
+
return str(self.__dict__)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
class DownloaderContext(object):
|
|
89
|
+
"""
|
|
90
|
+
Objek untuk menyimpan downloader context
|
|
91
|
+
"""
|
|
92
|
+
|
|
93
|
+
def __init__(self, args):
|
|
94
|
+
self.keyword = args.keyword
|
|
95
|
+
self.tahun_anggaran = self.parse_tahun_anggaran(args.tahun_anggaran)
|
|
96
|
+
self._kategori = args.kategori
|
|
97
|
+
self.nama_penyedia = args.nama_penyedia
|
|
98
|
+
self.chunk_size = args.chunk_size
|
|
99
|
+
self.workers = 1 # hard coded worker to 1
|
|
100
|
+
self.timeout = args.timeout
|
|
101
|
+
self.non_tender = args.non_tender
|
|
102
|
+
self.index_download_delay = args.index_download_delay
|
|
103
|
+
self.keep_index = args.keep_index
|
|
104
|
+
self.log_level = args.log
|
|
105
|
+
self.output_format = args.output_format
|
|
106
|
+
self.resume = args.resume
|
|
107
|
+
self.separator = args.separator
|
|
108
|
+
self.__lpse_host = args.lpse_host
|
|
109
|
+
|
|
110
|
+
@property
|
|
111
|
+
def kategori(self):
|
|
112
|
+
try:
|
|
113
|
+
return pyproc.JenisPengadaan[self._kategori]
|
|
114
|
+
except KeyError:
|
|
115
|
+
return None
|
|
116
|
+
|
|
117
|
+
def parse_tahun_anggaran(self, tahun_anggaran):
|
|
118
|
+
"""
|
|
119
|
+
Parse tahun anggaran untuk menghasilkan list dari tahun anggaran yang akan diunduh
|
|
120
|
+
:param tahun_anggaran: argumen tipe string dengan format X-Y (untuk range tahun anggaran) dan A,B,X,Z untuk beberapa tahun anggaran
|
|
121
|
+
:return: list dari tahun anggaran
|
|
122
|
+
"""
|
|
123
|
+
list_tahun_anggaran = []
|
|
124
|
+
|
|
125
|
+
if tahun_anggaran.lower().strip() == 'all':
|
|
126
|
+
return [None]
|
|
127
|
+
|
|
128
|
+
tahun_anggaran = re.sub(r'\s+', '', tahun_anggaran)
|
|
129
|
+
|
|
130
|
+
# split argumen tahun anggaran berdasarkan separator koma
|
|
131
|
+
for i in tahun_anggaran.split(','):
|
|
132
|
+
try:
|
|
133
|
+
# untuk setiap item, split berdasarkan dash lalu convert integer
|
|
134
|
+
# raise exception jika proses convert gagal, atau nilai tahun tidak berada antara 2000
|
|
135
|
+
# dan tahun berjalan
|
|
136
|
+
range_tahun = list(map(lambda x: int(x), i.split('-')))
|
|
137
|
+
|
|
138
|
+
for tahun in range(min(range_tahun), max(range_tahun) + 1):
|
|
139
|
+
if not 2000 < tahun <= datetime.now().year + 5:
|
|
140
|
+
raise DownloaderContextException(text.ERROR_CTX_RANGE_TAHUN.format(datetime.now().year + 5))
|
|
141
|
+
list_tahun_anggaran.append(tahun)
|
|
142
|
+
except ValueError:
|
|
143
|
+
raise DownloaderContextException(text.ERROR_CTX_TAHUN_ANGGARAN)
|
|
144
|
+
|
|
145
|
+
list_tahun_anggaran = list(set(list_tahun_anggaran))
|
|
146
|
+
list_tahun_anggaran.sort()
|
|
147
|
+
|
|
148
|
+
if not list_tahun_anggaran:
|
|
149
|
+
raise DownloaderContextException(text.ERROR_CTX_TAHUN_ANGGARAN)
|
|
150
|
+
|
|
151
|
+
return list_tahun_anggaran
|
|
152
|
+
|
|
153
|
+
def __get_host_from_file(self, file):
|
|
154
|
+
logging.debug("List LPSE host dari file")
|
|
155
|
+
with file.open('r') as f:
|
|
156
|
+
for line in f:
|
|
157
|
+
logging.debug("Parsing host {}".format(line.strip()))
|
|
158
|
+
yield LpseHost(line.strip())
|
|
159
|
+
|
|
160
|
+
def __get_host_from_argumen(self, arg):
|
|
161
|
+
logging.debug("List LPSE host dari argumen {}".format(arg))
|
|
162
|
+
for line in arg.strip().split(','):
|
|
163
|
+
logging.debug("Parsing host {}".format(line))
|
|
164
|
+
yield LpseHost(line)
|
|
165
|
+
|
|
166
|
+
@property
|
|
167
|
+
def lpse_host_list(self):
|
|
168
|
+
"""
|
|
169
|
+
Parse argument host, asumsi awal nilai yang diberikan oleh user adalah nama file. Jika file tidak ditemukan,
|
|
170
|
+
nilai tersebut dianggap sebagai host name dari aplikasi SPSE instansi.
|
|
171
|
+
:return:
|
|
172
|
+
"""
|
|
173
|
+
lpse_host_file = Path.cwd() / self.__lpse_host
|
|
174
|
+
try:
|
|
175
|
+
host_is_file = lpse_host_file.is_file()
|
|
176
|
+
except OSError:
|
|
177
|
+
host_is_file = False
|
|
178
|
+
|
|
179
|
+
if host_is_file:
|
|
180
|
+
host_generator = self.__get_host_from_file(lpse_host_file)
|
|
181
|
+
else:
|
|
182
|
+
host_generator = self.__get_host_from_argumen(self.__lpse_host)
|
|
183
|
+
|
|
184
|
+
return host_generator
|
|
185
|
+
|
|
186
|
+
def __str__(self):
|
|
187
|
+
return str(self.__dict__)
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
class LpseIndex:
|
|
191
|
+
def __init__(self, kwargs):
|
|
192
|
+
self.row_id = kwargs['row_id']
|
|
193
|
+
self.id_paket = kwargs['id_paket']
|
|
194
|
+
self.jenis_paket = kwargs['jenis_paket']
|
|
195
|
+
self.kategori_tahun_anggaran = kwargs['kategori_tahun_anggaran']
|
|
196
|
+
self.status = kwargs['status']
|
|
197
|
+
self.detail = self.parse_detail(kwargs['detail'])
|
|
198
|
+
|
|
199
|
+
@staticmethod
|
|
200
|
+
def parse_detail(detail):
|
|
201
|
+
try:
|
|
202
|
+
return json.loads(detail)
|
|
203
|
+
except TypeError:
|
|
204
|
+
return {}
|
|
205
|
+
|
|
206
|
+
def __str__(self):
|
|
207
|
+
return str(self.__dict__)
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
class IndexDownloader(object):
|
|
211
|
+
__tahun_anggaran_pattern = re.compile('(\d+)')
|
|
212
|
+
db = None
|
|
213
|
+
db_status_for_resume = False
|
|
214
|
+
db_file = None
|
|
215
|
+
lpse = None
|
|
216
|
+
|
|
217
|
+
def __init__(self, ctx, lpse_host):
|
|
218
|
+
self.ctx = ctx
|
|
219
|
+
self.lpse_host = lpse_host
|
|
220
|
+
self.lpse = pyproc.Lpse(lpse_host.url, timeout=ctx.timeout)
|
|
221
|
+
self.db = self.get_index_db(self.lpse_host.filename)
|
|
222
|
+
|
|
223
|
+
logging.info("{} - Mulai pengunduhan data {} tahun {}".format(
|
|
224
|
+
lpse_host.url, "Pengadaan Langsung" if self.ctx.non_tender else "Tender",
|
|
225
|
+
', '.join(map(str, self.ctx.tahun_anggaran)) if self.ctx.tahun_anggaran[0] is not None else 'ALL'
|
|
226
|
+
))
|
|
227
|
+
|
|
228
|
+
def __check_index_db(self, db):
|
|
229
|
+
status = False
|
|
230
|
+
try:
|
|
231
|
+
total = db.execute("SELECT COUNT(1) FROM INDEX_PAKET").fetchone()[0]
|
|
232
|
+
logging.info("{} - total previous index {}".format(self.lpse_host.url, total))
|
|
233
|
+
if total > 0:
|
|
234
|
+
status = True
|
|
235
|
+
except Exception as e:
|
|
236
|
+
logging.error("{} - check index db gagal, error: {}".format(self.lpse_host.url, e))
|
|
237
|
+
status = False
|
|
238
|
+
|
|
239
|
+
logging.info("{} - status previous index db {}".format(self.lpse_host.url, status))
|
|
240
|
+
self.db_status_for_resume = status
|
|
241
|
+
return status
|
|
242
|
+
|
|
243
|
+
def get_index_db(self, filename):
|
|
244
|
+
"""
|
|
245
|
+
Generate index database and table
|
|
246
|
+
table columns:
|
|
247
|
+
- data_id, concat(jenis, idpaket).
|
|
248
|
+
- nama_instansi
|
|
249
|
+
- jenis_paket
|
|
250
|
+
- kategori_tahun_anggaran
|
|
251
|
+
- status (0 belum download, 1 oke)
|
|
252
|
+
:param filename: Database Filename
|
|
253
|
+
:return: SQLite database object
|
|
254
|
+
"""
|
|
255
|
+
db_filename = filename.name + ".idx"
|
|
256
|
+
self.db_file = Path.cwd() / db_filename
|
|
257
|
+
db = sqlite3.connect(str(self.db_file), check_same_thread=False)
|
|
258
|
+
|
|
259
|
+
if self.ctx.resume and self.__check_index_db(db):
|
|
260
|
+
logging.info("{} - skip db init, melanjutkan proses".format(self.lpse_host.url))
|
|
261
|
+
return db
|
|
262
|
+
|
|
263
|
+
logging.debug("Generate index database: {}".format(self.db_file.name))
|
|
264
|
+
logging.debug("Create index table")
|
|
265
|
+
|
|
266
|
+
try:
|
|
267
|
+
db.execute("DROP TABLE IF EXISTS INDEX_PAKET")
|
|
268
|
+
db.execute("""CREATE TABLE INDEX_PAKET
|
|
269
|
+
(
|
|
270
|
+
ROW_ID varchar(100) unique primary key,
|
|
271
|
+
ID_PAKET VARCHAR(50),
|
|
272
|
+
JENIS_PAKET VARCHAR(32),
|
|
273
|
+
KATEGORI_TAHUN_ANGGARAN varchar (100),
|
|
274
|
+
STATUS int default 0,
|
|
275
|
+
DETAIL text
|
|
276
|
+
);""")
|
|
277
|
+
db.execute("CREATE INDEX INDEX_PAKET_KATEGORI_TAHUN_ANGGARAN_IDX ON INDEX_PAKET(KATEGORI_TAHUN_ANGGARAN);")
|
|
278
|
+
db.execute("CREATE INDEX INDEX_PAKET_ID_PAKET_IDX ON INDEX_PAKET(ID_PAKET);")
|
|
279
|
+
db.execute("CREATE INDEX INDEX_PAKET_JENIS_PAKET ON INDEX_PAKET(JENIS_PAKET);")
|
|
280
|
+
db.execute("CREATE INDEX INDEX_PAKET_STATUS_IDX ON INDEX_PAKET(STATUS);")
|
|
281
|
+
except sqlite3.OperationalError as e:
|
|
282
|
+
if 'INDEX_PAKET already exists' in str(e):
|
|
283
|
+
pass
|
|
284
|
+
else:
|
|
285
|
+
raise e
|
|
286
|
+
|
|
287
|
+
db.commit()
|
|
288
|
+
|
|
289
|
+
return db
|
|
290
|
+
|
|
291
|
+
def get_jenis_paket(self):
|
|
292
|
+
"""
|
|
293
|
+
Wrapper variable jenis paket
|
|
294
|
+
:return:
|
|
295
|
+
"""
|
|
296
|
+
if self.ctx.non_tender:
|
|
297
|
+
jenis_paket = 'pl'
|
|
298
|
+
else:
|
|
299
|
+
jenis_paket = 'lelang'
|
|
300
|
+
|
|
301
|
+
return jenis_paket
|
|
302
|
+
|
|
303
|
+
def get_total_package(self, tahun):
|
|
304
|
+
"""
|
|
305
|
+
Fungsi untuk mendapatkan total data dengan melakukan requests dengan length 0 data
|
|
306
|
+
:return: Integer jumlah data
|
|
307
|
+
"""
|
|
308
|
+
jenis_paket = self.get_jenis_paket()
|
|
309
|
+
|
|
310
|
+
data = self.lpse.get_paket(jenis_paket=jenis_paket, kategori=self.ctx.kategori,
|
|
311
|
+
nama_penyedia=self.ctx.nama_penyedia, search_keyword=self.ctx.keyword,
|
|
312
|
+
tahun=tahun)
|
|
313
|
+
|
|
314
|
+
logging.debug("Jumlah record {}".format(str(data)))
|
|
315
|
+
return data['recordsFiltered']
|
|
316
|
+
|
|
317
|
+
def start(self):
|
|
318
|
+
"""
|
|
319
|
+
Start index downloader
|
|
320
|
+
:return:
|
|
321
|
+
"""
|
|
322
|
+
if self.ctx.resume and self.db_status_for_resume:
|
|
323
|
+
return
|
|
324
|
+
|
|
325
|
+
for tahun in self.ctx.tahun_anggaran:
|
|
326
|
+
total = self.get_total_package(tahun=tahun)
|
|
327
|
+
batch_total = -(-total // self.ctx.chunk_size)
|
|
328
|
+
data_count = 0
|
|
329
|
+
|
|
330
|
+
for batch in range(batch_total):
|
|
331
|
+
data = self.lpse.get_paket(jenis_paket=self.get_jenis_paket(), start=batch * self.ctx.chunk_size,
|
|
332
|
+
length=self.ctx.chunk_size, kategori=self.ctx.kategori,
|
|
333
|
+
search_keyword=self.ctx.keyword, nama_penyedia=self.ctx.nama_penyedia,
|
|
334
|
+
data_only=True, tahun=tahun)
|
|
335
|
+
|
|
336
|
+
if not data:
|
|
337
|
+
break
|
|
338
|
+
|
|
339
|
+
self.db.executemany("INSERT OR IGNORE INTO INDEX_PAKET VALUES(?, ?, ?, ?, ?, ?)",
|
|
340
|
+
self.convert_index_for_db(data))
|
|
341
|
+
self.db.commit()
|
|
342
|
+
|
|
343
|
+
# update data count
|
|
344
|
+
data_count += len(data)
|
|
345
|
+
logging.info(
|
|
346
|
+
"{host} - TA {tahun} - Indexing halaman ke-{batch}.".format(
|
|
347
|
+
host=self.lpse_host.url,
|
|
348
|
+
batch=batch + 1,
|
|
349
|
+
tahun=tahun if tahun is not None else 'ALL'
|
|
350
|
+
)
|
|
351
|
+
)
|
|
352
|
+
|
|
353
|
+
sleep(self.ctx.index_download_delay)
|
|
354
|
+
|
|
355
|
+
def convert_index_for_db(self, data):
|
|
356
|
+
"""
|
|
357
|
+
Fungsi untuk menyesuaikan format index dari aplikasi spse ke database
|
|
358
|
+
:param data:
|
|
359
|
+
:return:
|
|
360
|
+
"""
|
|
361
|
+
for row in data:
|
|
362
|
+
yield [
|
|
363
|
+
'{}-{}'.format('nontender' if self.ctx.non_tender else 'tender', row[0]),
|
|
364
|
+
row[0],
|
|
365
|
+
'nontender' if self.ctx.non_tender else 'tender',
|
|
366
|
+
row[6] if self.ctx.non_tender else row[8],
|
|
367
|
+
0,
|
|
368
|
+
None # detail paket kosong
|
|
369
|
+
]
|
|
370
|
+
|
|
371
|
+
@staticmethod
|
|
372
|
+
def index_factory(cursor, row):
|
|
373
|
+
d = {}
|
|
374
|
+
for idx, col in enumerate(cursor.description):
|
|
375
|
+
d[col[0].lower()] = row[idx]
|
|
376
|
+
|
|
377
|
+
return LpseIndex(d)
|
|
378
|
+
|
|
379
|
+
def get_index(self):
|
|
380
|
+
logging.debug("[SQL] get index from database")
|
|
381
|
+
result = self.db.execute("SELECT * FROM INDEX_PAKET WHERE STATUS = 0")
|
|
382
|
+
|
|
383
|
+
for row in result.fetchall():
|
|
384
|
+
row = self.index_factory(result, row)
|
|
385
|
+
|
|
386
|
+
logging.debug("row data {}".format(row))
|
|
387
|
+
yield row
|
|
388
|
+
|
|
389
|
+
def resume(self):
|
|
390
|
+
"""
|
|
391
|
+
Fungsi untuk melanjutkan proses pengunduhan index berdasarkan kondisi terakhir
|
|
392
|
+
:return:
|
|
393
|
+
"""
|
|
394
|
+
pass
|
|
395
|
+
|
|
396
|
+
def __del__(self):
|
|
397
|
+
"""
|
|
398
|
+
Make sure everything is closed when object is garbage collected
|
|
399
|
+
:return:
|
|
400
|
+
"""
|
|
401
|
+
if self.db:
|
|
402
|
+
self.db.close()
|
|
403
|
+
del self.db
|
|
404
|
+
|
|
405
|
+
if self.lpse is not None:
|
|
406
|
+
del self.lpse
|
|
407
|
+
|
|
408
|
+
|
|
409
|
+
class DetailDownloader(object):
|
|
410
|
+
|
|
411
|
+
def __init__(self, index_downloader):
|
|
412
|
+
self.index_downloader = index_downloader
|
|
413
|
+
self.lock = threading.Lock()
|
|
414
|
+
|
|
415
|
+
logging.info("{} - Mulai pengunduhan detail data".format(self.index_downloader.lpse_host.url))
|
|
416
|
+
|
|
417
|
+
def __pre_process_index_db(self):
|
|
418
|
+
total = self.index_downloader.db.execute(
|
|
419
|
+
"""SELECT COUNT(1) FROM INDEX_PAKET WHERE STATUS = 0"""
|
|
420
|
+
).fetchone()[0]
|
|
421
|
+
deleted = 0
|
|
422
|
+
|
|
423
|
+
return total, deleted
|
|
424
|
+
|
|
425
|
+
def get_detail(self, lpse_index):
|
|
426
|
+
"""
|
|
427
|
+
Get detail paket berdasarkan paket ID
|
|
428
|
+
:param package_id:
|
|
429
|
+
:return:
|
|
430
|
+
"""
|
|
431
|
+
logging.debug("[DETAIL DOWNLOADER] download detail for {}".format(lpse_index))
|
|
432
|
+
if self.index_downloader.ctx.non_tender:
|
|
433
|
+
package_detail = self.index_downloader.lpse.detil_paket_non_tender(lpse_index.id_paket)
|
|
434
|
+
else:
|
|
435
|
+
package_detail = self.index_downloader.lpse.detil_paket_tender(lpse_index.id_paket)
|
|
436
|
+
|
|
437
|
+
info = package_detail.get_all_detil()
|
|
438
|
+
|
|
439
|
+
if info['error']:
|
|
440
|
+
logging.error('{} - Terjadi kesalahan untuk paket {}'.format(
|
|
441
|
+
self.index_downloader.lpse_host.url, info['error_message']
|
|
442
|
+
))
|
|
443
|
+
lpse_index.detail = package_detail
|
|
444
|
+
|
|
445
|
+
logging.debug("[DETAIL DOWNLOADER] update database detail data")
|
|
446
|
+
self.update_detail(lpse_index)
|
|
447
|
+
|
|
448
|
+
def update_detail(self, lpse_index):
|
|
449
|
+
with self.lock:
|
|
450
|
+
logging.debug("[DETAIL DOWNLOADER] update detail data {}".format(lpse_index))
|
|
451
|
+
self.index_downloader.db.execute(
|
|
452
|
+
"UPDATE INDEX_PAKET SET DETAIL = ?, STATUS = 1 WHERE ROW_ID = ?",
|
|
453
|
+
(json.dumps(lpse_index.detail.todict()), lpse_index.row_id)
|
|
454
|
+
)
|
|
455
|
+
self.index_downloader.db.commit()
|
|
456
|
+
|
|
457
|
+
def start(self):
|
|
458
|
+
total, deleted = self.__pre_process_index_db()
|
|
459
|
+
total_to_download = total - deleted
|
|
460
|
+
index_generator = self.index_downloader.get_index()
|
|
461
|
+
total_downloaded = 0
|
|
462
|
+
|
|
463
|
+
while True:
|
|
464
|
+
lpse_index = []
|
|
465
|
+
|
|
466
|
+
for i in range(self.index_downloader.ctx.workers):
|
|
467
|
+
try:
|
|
468
|
+
lpse_index.append(index_generator.__next__())
|
|
469
|
+
except StopIteration:
|
|
470
|
+
pass
|
|
471
|
+
|
|
472
|
+
logging.debug("[DETAIL DOWNLOADER] starting batch for {}".format(lpse_index))
|
|
473
|
+
|
|
474
|
+
threads = []
|
|
475
|
+
|
|
476
|
+
for i, index in enumerate(lpse_index):
|
|
477
|
+
t = threading.Thread(target=self.get_detail, args=(index,), name='detail-thread-{}'.format(i))
|
|
478
|
+
t.start()
|
|
479
|
+
logging.debug("[DETAIL DOWNLOADER] {} started".format(t.name))
|
|
480
|
+
threads.append(t)
|
|
481
|
+
|
|
482
|
+
for t in threads:
|
|
483
|
+
logging.debug("[DETAIL DOWNLOADER] thread {} join".format(t.name))
|
|
484
|
+
t.join()
|
|
485
|
+
|
|
486
|
+
for t in threads:
|
|
487
|
+
logging.debug("[DETAIL DOWNLOADER] thread {} deleted".format(t.name))
|
|
488
|
+
del t
|
|
489
|
+
|
|
490
|
+
del threads
|
|
491
|
+
|
|
492
|
+
total_downloaded += len(lpse_index)
|
|
493
|
+
|
|
494
|
+
if self.index_downloader.ctx.log_level == 'INFO':
|
|
495
|
+
print(
|
|
496
|
+
"\rMemproses {}/{} ({:,.2f}%) data".format(
|
|
497
|
+
total_downloaded,
|
|
498
|
+
total_to_download,
|
|
499
|
+
total_downloaded/total_to_download*100 if total_to_download > 0 else 0.0
|
|
500
|
+
),
|
|
501
|
+
end=' '
|
|
502
|
+
)
|
|
503
|
+
|
|
504
|
+
if len(lpse_index) != self.index_downloader.ctx.workers:
|
|
505
|
+
break
|
|
506
|
+
|
|
507
|
+
print()
|
|
508
|
+
logging.info("{} - {} data selesai diproses".format(self.index_downloader.lpse_host.url, total_downloaded))
|
|
509
|
+
|
|
510
|
+
|
|
511
|
+
class Exporter:
|
|
512
|
+
def __init__(self, index_downloader):
|
|
513
|
+
self.index_downloader = index_downloader
|
|
514
|
+
|
|
515
|
+
def get_detail(self):
|
|
516
|
+
"""
|
|
517
|
+
Query data detail dari database untuk diekspor
|
|
518
|
+
:return: generator result row
|
|
519
|
+
"""
|
|
520
|
+
logging.info("{} - Export Data".format(self.index_downloader.lpse_host.url))
|
|
521
|
+
result = self.index_downloader.db.execute("SELECT * from INDEX_PAKET WHERE STATUS = 1")
|
|
522
|
+
for data in result.fetchall():
|
|
523
|
+
data = self.index_downloader.index_factory(result, data)
|
|
524
|
+
yield data.detail
|
|
525
|
+
|
|
526
|
+
def get_file_obj(self, ext):
|
|
527
|
+
"""
|
|
528
|
+
Fungsi untuk mempermudah inisiasi objek file untuk export data
|
|
529
|
+
:param ext:
|
|
530
|
+
:return: file object
|
|
531
|
+
"""
|
|
532
|
+
filename = self.index_downloader.lpse_host.filename.name + '.' + ext
|
|
533
|
+
file_obj = Path.cwd() / filename
|
|
534
|
+
|
|
535
|
+
return file_obj
|
|
536
|
+
|
|
537
|
+
def to_csv(self, delimiter):
|
|
538
|
+
"""
|
|
539
|
+
Export detail data ke csv
|
|
540
|
+
:return:
|
|
541
|
+
"""
|
|
542
|
+
is_tender = not self.index_downloader.ctx.non_tender
|
|
543
|
+
header = [
|
|
544
|
+
'id_paket',
|
|
545
|
+
'nama_tender',
|
|
546
|
+
'tanggal_pembuatan',
|
|
547
|
+
'tahap_tender_saat_ini',
|
|
548
|
+
'k/l/pd',
|
|
549
|
+
'satuan_kerja',
|
|
550
|
+
'jenis_pengadaan',
|
|
551
|
+
'metode_pengadaan',
|
|
552
|
+
'tahun_anggaran',
|
|
553
|
+
'nilai_pagu_paket',
|
|
554
|
+
'nilai_hps_paket',
|
|
555
|
+
'jenis_kontrak',
|
|
556
|
+
'kualifikasi_usaha',
|
|
557
|
+
'peserta_tender',
|
|
558
|
+
'khusus_pelaku_usaha_oap',
|
|
559
|
+
'lokasi_pekerjaan',
|
|
560
|
+
'label_paket',
|
|
561
|
+
]
|
|
562
|
+
|
|
563
|
+
if not is_tender:
|
|
564
|
+
header[1] = 'nama_paket'
|
|
565
|
+
header[3] = 'tahap_paket_saat_ini'
|
|
566
|
+
header[7] = 'metode_pengadaan'
|
|
567
|
+
header[-4] = 'peserta_non_tender'
|
|
568
|
+
|
|
569
|
+
json_data_header = ['hasil_evaluasi', 'pemenang', 'pemenang_berkontrak', 'jadwal', 'peserta']
|
|
570
|
+
|
|
571
|
+
with self.get_file_obj('csv').open('w', newline='', encoding='utf-8') as f:
|
|
572
|
+
writer = csv.writer(f, delimiter=delimiter)
|
|
573
|
+
writer.writerow(['url'] + header + json_data_header)
|
|
574
|
+
|
|
575
|
+
for item in self.get_detail():
|
|
576
|
+
if item.get('pengumuman'):
|
|
577
|
+
base_data = [item.get('pengumuman').get(i) for i in header[1:]]
|
|
578
|
+
base_data[-1] = json.dumps(base_data[-1])
|
|
579
|
+
base_data[-2] = json.dumps(base_data[-2])
|
|
580
|
+
else:
|
|
581
|
+
base_data = [None]*len(header[1:])
|
|
582
|
+
|
|
583
|
+
writer.writerow(
|
|
584
|
+
[self.index_downloader.lpse_host.url, item.get('id_paket')] +
|
|
585
|
+
base_data +
|
|
586
|
+
[
|
|
587
|
+
json.dumps(item.get('hasil')),
|
|
588
|
+
json.dumps(item.get('pemenang')),
|
|
589
|
+
json.dumps(item.get('pemenang_berkontrak')),
|
|
590
|
+
json.dumps(item.get('peserta')),
|
|
591
|
+
json.dumps(item.get('jadwal')),
|
|
592
|
+
],
|
|
593
|
+
)
|
|
594
|
+
|
|
595
|
+
def to_json(self):
|
|
596
|
+
"""
|
|
597
|
+
Export detail data ke format json
|
|
598
|
+
:return:
|
|
599
|
+
"""
|
|
600
|
+
with self.get_file_obj('json').open('w') as f:
|
|
601
|
+
f.write("[")
|
|
602
|
+
for item in self.get_detail():
|
|
603
|
+
f.write(json.dumps(item))
|
|
604
|
+
f.write(",")
|
|
605
|
+
f.seek(f.tell() - 1)
|
|
606
|
+
f.write("]")
|
|
607
|
+
|
|
608
|
+
|
|
609
|
+
class QualityAssurance:
|
|
610
|
+
|
|
611
|
+
def __init__(self, index_downloader):
|
|
612
|
+
self.index_downloader = index_downloader
|
|
613
|
+
|
|
614
|
+
def check(self):
|
|
615
|
+
all_data = self.index_downloader.db.execute("SELECT STATUS, COUNT(1) FROM INDEX_PAKET GROUP BY STATUS")
|
|
616
|
+
result = dict(all_data.fetchall())
|
|
617
|
+
success = result.get(1, 0)
|
|
618
|
+
fail = result.get(0, 0)
|
|
619
|
+
total = sum(result.values())
|
|
620
|
+
|
|
621
|
+
return total, success, fail
|
|
622
|
+
|
|
623
|
+
|
|
624
|
+
class Downloader(object):
|
|
625
|
+
ctx = None
|
|
626
|
+
|
|
627
|
+
@staticmethod
|
|
628
|
+
def get_args_from_interactive_menu():
|
|
629
|
+
args = [
|
|
630
|
+
input("Alamat LPSE: "),
|
|
631
|
+
"--tahun-anggaran",
|
|
632
|
+
''.join(input("Tahun Anggaran [X atau X,Y,Z atau X-Z]: ").strip().split()),
|
|
633
|
+
"--keyword",
|
|
634
|
+
input("Kata kunci pencarian [default kosong]: ")
|
|
635
|
+
]
|
|
636
|
+
is_tender = input("Jenis pengadan [tender/pl]: ").lower().strip()
|
|
637
|
+
|
|
638
|
+
if is_tender in ['tender', 'pl']:
|
|
639
|
+
if is_tender == 'pl':
|
|
640
|
+
args.append('--non-tender')
|
|
641
|
+
else:
|
|
642
|
+
print("Pilihan {} tidak valid".format(is_tender))
|
|
643
|
+
exit(1)
|
|
644
|
+
|
|
645
|
+
return args
|
|
646
|
+
|
|
647
|
+
def get_ctx(self, sys_args):
|
|
648
|
+
"""
|
|
649
|
+
Parse command line argument.
|
|
650
|
+
-h, --help : menampilkan pesan bantuan
|
|
651
|
+
-k, --keyword : filter pencarian index paket berdasarkan kata kunci
|
|
652
|
+
-t, --tahun-anggaran : filter download detail berdasarkan tahun anggaran,
|
|
653
|
+
format X-Y atau X;Y;Z
|
|
654
|
+
--kategori : filter pencarian index paket berdasarkan kategori
|
|
655
|
+
--nama-penyedia : filter pencarian index paket berdasarkan nama penyedia
|
|
656
|
+
-c, --chunk-size : jumlah index per-halaman yang diunduh dalam satu iterasi
|
|
657
|
+
-w, --workers : jumlah workers yang berjalan secara paralel untuk mengunduh detail paket
|
|
658
|
+
-x, --timeout : waktu timeout respon dari server dalam detik
|
|
659
|
+
-n, --non-tender : flag untuk melakukan pengunduhan data paket pengadaan langsung
|
|
660
|
+
-d, --index-download-delay : waktu delay untuk setiap iterasi halaman index dalam detik
|
|
661
|
+
-k, --keep-workdir : tidak menghapus working direktori dari downloader
|
|
662
|
+
-f, --force : menjalankan program tanpa memperhatikan cache yang sudah ada sebelumnya
|
|
663
|
+
--clear : membersihkan folder cache di direktori home
|
|
664
|
+
LPSE_HOST : host LPSE atau file teks berisi daftar host LPSE.
|
|
665
|
+
Jika terdapat file teks dengan nama yang sama dengan hostname LPSE, prioritas
|
|
666
|
+
pertama dari program adalah membaca file.
|
|
667
|
+
:return: Lpse Downloader Context
|
|
668
|
+
"""
|
|
669
|
+
|
|
670
|
+
# if there is no argument, show interactive menu
|
|
671
|
+
if len(sys_args) == 0:
|
|
672
|
+
sys_args = self.get_args_from_interactive_menu()
|
|
673
|
+
|
|
674
|
+
parser = argparse.ArgumentParser()
|
|
675
|
+
parser.add_argument('lpse_host', type=str, help=text.HELP_LPSE_HOST)
|
|
676
|
+
parser.add_argument('-k', '--keyword', type=str, default="", help=text.HELP_KEYWORD)
|
|
677
|
+
parser.add_argument('-t', '--tahun-anggaran', type=str, default="{}".format(datetime.now().year),
|
|
678
|
+
help=text.HELP_TAHUN_ANGGARAN)
|
|
679
|
+
parser.add_argument('--kategori',
|
|
680
|
+
choices=[
|
|
681
|
+
"PENGADAAN_BARANG",
|
|
682
|
+
"JASA_KONSULTANSI_BADAN_USAHA_NON_KONSTRUKSI",
|
|
683
|
+
"PEKERJAAN_KONSTRUKSI",
|
|
684
|
+
"JASA_LAINNYA",
|
|
685
|
+
"JASA_KONSULTANSI_PERORANGAN",
|
|
686
|
+
"JASA_KONSULTANSI_BADAN_USAHA_KONSTRUKSI",
|
|
687
|
+
None
|
|
688
|
+
],
|
|
689
|
+
help=text.HELP_KATEGORI, default=None)
|
|
690
|
+
parser.add_argument('--nama-penyedia', type=str, default=None, help=text.HELP_PENYEDIA)
|
|
691
|
+
parser.add_argument('-c', '--chunk-size', type=int, default=100, help=text.HELP_CHUNK_SIZE)
|
|
692
|
+
parser.add_argument('-w', '--workers', type=int, default=8, help=text.HELP_WORKERS)
|
|
693
|
+
parser.add_argument('-x', '--timeout', type=int, default=30, help=text.HELP_TIMEOUT)
|
|
694
|
+
parser.add_argument('-n', '--non-tender', action='store_true', help=text.HELP_NONTENDER)
|
|
695
|
+
parser.add_argument('-d', '--index-download-delay', type=int, default=1, help=text.HELP_INDEX_DOWNLOAD_DELAY)
|
|
696
|
+
parser.add_argument('-o', '--output-format', choices=['json', 'csv'], default='csv', help=text.HELP_OUTPUT)
|
|
697
|
+
parser.add_argument('--keep-index', action='store_true', help=text.HELP_KEEP)
|
|
698
|
+
parser.add_argument('-r', '--resume', action='store_true', help=text.HELP_RESUME)
|
|
699
|
+
parser.add_argument('-s', '--separator', type=str, default=";", help=text.HELP_CSV_SEPARATOR)
|
|
700
|
+
parser.add_argument('--log', choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], default='INFO',
|
|
701
|
+
help=text.HELP_LOG_LEVEL)
|
|
702
|
+
|
|
703
|
+
args = parser.parse_args(sys_args)
|
|
704
|
+
|
|
705
|
+
set_up_log(args.log)
|
|
706
|
+
|
|
707
|
+
logging.debug('Parsing context')
|
|
708
|
+
|
|
709
|
+
self.ctx = DownloaderContext(args)
|
|
710
|
+
|
|
711
|
+
return self.ctx
|
|
712
|
+
|
|
713
|
+
def start(self):
|
|
714
|
+
for lpse_host in self.ctx.lpse_host_list:
|
|
715
|
+
if not lpse_host.is_valid:
|
|
716
|
+
logging.error("{} - {}".format(lpse_host.url, lpse_host.error))
|
|
717
|
+
continue
|
|
718
|
+
|
|
719
|
+
try:
|
|
720
|
+
index_downloader = IndexDownloader(self.ctx, lpse_host)
|
|
721
|
+
index_downloader.start()
|
|
722
|
+
except Exception as e:
|
|
723
|
+
logging.error("{} - Index Downloader Error {} {}".format(lpse_host.url, e.__class__, str(e)))
|
|
724
|
+
continue
|
|
725
|
+
|
|
726
|
+
try:
|
|
727
|
+
detail_downloader = DetailDownloader(index_downloader)
|
|
728
|
+
detail_downloader.start()
|
|
729
|
+
except Exception as e:
|
|
730
|
+
logging.error("{} - Detail Downloader Error {} {}".format(lpse_host.url, e.__class__, str(e)))
|
|
731
|
+
continue
|
|
732
|
+
|
|
733
|
+
exporter = Exporter(index_downloader)
|
|
734
|
+
|
|
735
|
+
if self.ctx.output_format == 'json':
|
|
736
|
+
exporter.to_json()
|
|
737
|
+
elif self.ctx.output_format == 'csv':
|
|
738
|
+
exporter.to_csv(delimiter=self.ctx.separator)
|
|
739
|
+
|
|
740
|
+
qa = QualityAssurance(index_downloader)
|
|
741
|
+
total, success, fail = qa.check()
|
|
742
|
+
|
|
743
|
+
with open('statistic.txt', 'a') as f:
|
|
744
|
+
f.write("{} total={} success={} fail={} tahun={}\n".format(
|
|
745
|
+
lpse_host.url, total, success, fail, self.ctx.tahun_anggaran
|
|
746
|
+
))
|
|
747
|
+
|
|
748
|
+
if total == 0:
|
|
749
|
+
logging.info("Proses selesai, tidak ada data yang ditemukan.")
|
|
750
|
+
elif fail == 0:
|
|
751
|
+
logging.info("Proses selesai: {}/{} ({:,.2f}%) terunduh".format(success, total, success/total*100))
|
|
752
|
+
else:
|
|
753
|
+
logging.error("Proses gagal: {}/{} ({:,.2f}%).".format(fail, total, fail/total*100))
|
|
754
|
+
logging.info("Jalankan perintah dengan parameter --resume / -r untuk mengunduh ulang paket yang gagal")
|
|
755
|
+
|
|
756
|
+
if not index_downloader.ctx.keep_index and fail == 0:
|
|
757
|
+
logging.info("{} - membersihkan direktori".format(lpse_host.url))
|
|
758
|
+
index_downloader.db.close()
|
|
759
|
+
try:
|
|
760
|
+
index_downloader.db_file.unlink()
|
|
761
|
+
except FileNotFoundError:
|
|
762
|
+
pass
|
|
763
|
+
|
|
764
|
+
del index_downloader
|
|
765
|
+
del detail_downloader
|
|
766
|
+
del exporter
|
|
767
|
+
|
|
768
|
+
|
|
769
|
+
def main():
|
|
770
|
+
import sys
|
|
771
|
+
|
|
772
|
+
IWillFindYouAndIWillKillYou()
|
|
773
|
+
|
|
774
|
+
print(text.INFO)
|
|
775
|
+
|
|
776
|
+
# Subcommand: daftarlpse
|
|
777
|
+
if len(sys.argv) > 1 and sys.argv[1] == 'daftarlpse':
|
|
778
|
+
set_up_log('INFO')
|
|
779
|
+
pyproc.utils.download_host(logging)
|
|
780
|
+
exit(0)
|
|
781
|
+
|
|
782
|
+
# Subcommand: daftarhost
|
|
783
|
+
if len(sys.argv) > 1 and sys.argv[1] == 'daftarhost':
|
|
784
|
+
set_up_log('INFO')
|
|
785
|
+
directory = sys.argv[2] if len(sys.argv) > 2 else '.'
|
|
786
|
+
pyproc.utils.download_host_json(logging, directory=directory)
|
|
787
|
+
exit(0)
|
|
788
|
+
|
|
789
|
+
downloader = Downloader()
|
|
790
|
+
downloader.get_ctx(sys.argv[1:])
|
|
791
|
+
|
|
792
|
+
try:
|
|
793
|
+
status, current, new = check_new_version()
|
|
794
|
+
if status:
|
|
795
|
+
logging.info(f"Anda menggunakan PyProc versi {current}, "
|
|
796
|
+
f"tersedia versi baru {new}. "
|
|
797
|
+
f"Mohon untuk memperbarui aplikasi.")
|
|
798
|
+
|
|
799
|
+
downloader.start()
|
|
800
|
+
except Exception as e:
|
|
801
|
+
logging.error(f"Terjadi galat {e}")
|
|
802
|
+
finally:
|
|
803
|
+
del downloader
|
|
804
|
+
|
|
805
|
+
|
|
806
|
+
if __name__ == '__main__':
|
|
807
|
+
main()
|