ominfra 0.0.0.dev102__py3-none-any.whl → 0.0.0.dev104__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,38 +4,6 @@
4
4
  # @omlish-script
5
5
  # @omlish-amalg-output ../clouds/aws/journald2aws/main.py
6
6
  # ruff: noqa: N802 UP006 UP007 UP036
7
- """
8
- TODO:
9
- - create log group
10
- - log stats - chunk sizes, byte count, num calls, etc
11
-
12
- ==
13
-
14
- https://www.freedesktop.org/software/systemd/man/latest/journalctl.html
15
-
16
- journalctl:
17
- -o json
18
- --show-cursor
19
-
20
- --since "2012-10-30 18:17:16"
21
- --until "2012-10-30 18:17:16"
22
-
23
- --after-cursor <cursor>
24
-
25
- ==
26
-
27
- https://www.freedesktop.org/software/systemd/man/latest/systemd.journal-fields.html
28
-
29
- ==
30
-
31
- @dc.dataclass(frozen=True)
32
- class Journald2AwsConfig:
33
- log_group_name: str
34
- log_stream_name: str
35
-
36
- aws_batch_size: int = 1_000
37
- aws_flush_interval_s: float = 1.
38
- """
39
7
  import abc
40
8
  import argparse
41
9
  import base64
@@ -81,15 +49,21 @@ if sys.version_info < (3, 8):
81
49
  ########################################
82
50
 
83
51
 
84
- # ../../../../../omlish/lite/check.py
52
+ # ../../../../../omlish/lite/cached.py
85
53
  T = ta.TypeVar('T')
86
54
 
55
+ # ../../../../../omlish/lite/contextmanagers.py
56
+ ExitStackedT = ta.TypeVar('ExitStackedT', bound='ExitStacked')
57
+
58
+ # ../../../../threadworkers.py
59
+ ThreadWorkerT = ta.TypeVar('ThreadWorkerT', bound='ThreadWorker')
60
+
87
61
 
88
62
  ########################################
89
63
  # ../../../../../omlish/lite/cached.py
90
64
 
91
65
 
92
- class cached_nullary: # noqa
66
+ class _cached_nullary: # noqa
93
67
  def __init__(self, fn):
94
68
  super().__init__()
95
69
  self._fn = fn
@@ -106,6 +80,10 @@ class cached_nullary: # noqa
106
80
  return bound
107
81
 
108
82
 
83
+ def cached_nullary(fn: ta.Callable[..., T]) -> ta.Callable[..., T]:
84
+ return _cached_nullary(fn)
85
+
86
+
109
87
  ########################################
110
88
  # ../../../../../omlish/lite/check.py
111
89
 
@@ -222,7 +200,7 @@ class Pidfile:
222
200
  return self
223
201
 
224
202
  def __exit__(self, exc_type, exc_val, exc_tb):
225
- if self._f is not None:
203
+ if hasattr(self, '_f'):
226
204
  self._f.close()
227
205
  del self._f
228
206
 
@@ -736,6 +714,52 @@ class AwsDataclassMeta:
736
714
  return AwsDataclassMeta.Converters(d2a, a2d)
737
715
 
738
716
 
717
+ ########################################
718
+ # ../../../../../omlish/lite/contextmanagers.py
719
+
720
+
721
+ ##
722
+
723
+
724
+ class ExitStacked:
725
+ _exit_stack: ta.Optional[contextlib.ExitStack] = None
726
+
727
+ def __enter__(self: ExitStackedT) -> ExitStackedT:
728
+ check_state(self._exit_stack is None)
729
+ es = self._exit_stack = contextlib.ExitStack()
730
+ es.__enter__()
731
+ return self
732
+
733
+ def __exit__(self, exc_type, exc_val, exc_tb):
734
+ if (es := self._exit_stack) is None:
735
+ return None
736
+ return es.__exit__(exc_type, exc_val, exc_tb)
737
+
738
+ def _enter_context(self, cm: ta.ContextManager[T]) -> T:
739
+ es = check_not_none(self._exit_stack)
740
+ return es.enter_context(cm)
741
+
742
+
743
+ ##
744
+
745
+
746
+ @contextlib.contextmanager
747
+ def attr_setting(obj, attr, val, *, default=None): # noqa
748
+ not_set = object()
749
+ orig = getattr(obj, attr, not_set)
750
+ try:
751
+ setattr(obj, attr, val)
752
+ if orig is not not_set:
753
+ yield orig
754
+ else:
755
+ yield default
756
+ finally:
757
+ if orig is not_set:
758
+ delattr(obj, attr)
759
+ else:
760
+ setattr(obj, attr, orig)
761
+
762
+
739
763
  ########################################
740
764
  # ../../../../../omlish/lite/io.py
741
765
 
@@ -1259,7 +1283,7 @@ class DataclassObjMarshaler(ObjMarshaler):
1259
1283
  return {k: m.marshal(getattr(o, k)) for k, m in self.fs.items()}
1260
1284
 
1261
1285
  def unmarshal(self, o: ta.Any) -> ta.Any:
1262
- return self.ty(**{k: self.fs[k].unmarshal(v) for k, v in o.items() if self.nonstrict or k in self.fs})
1286
+ return self.ty(**{k: self.fs[k].unmarshal(v) for k, v in o.items() if not self.nonstrict or k in self.fs})
1263
1287
 
1264
1288
 
1265
1289
  @dc.dataclass(frozen=True)
@@ -1319,7 +1343,10 @@ class UuidObjMarshaler(ObjMarshaler):
1319
1343
  return uuid.UUID(o)
1320
1344
 
1321
1345
 
1322
- _OBJ_MARSHALERS: ta.Dict[ta.Any, ObjMarshaler] = {
1346
+ ##
1347
+
1348
+
1349
+ _DEFAULT_OBJ_MARSHALERS: ta.Dict[ta.Any, ObjMarshaler] = {
1323
1350
  **{t: NopObjMarshaler() for t in (type(None),)},
1324
1351
  **{t: CastObjMarshaler(t) for t in (int, float, str, bool)},
1325
1352
  **{t: Base64ObjMarshaler(t) for t in (bytes, bytearray)},
@@ -1348,20 +1375,19 @@ _OBJ_MARSHALER_GENERIC_ITERABLE_TYPES: ta.Dict[ta.Any, type] = {
1348
1375
  }
1349
1376
 
1350
1377
 
1351
- def register_opj_marshaler(ty: ta.Any, m: ObjMarshaler) -> None:
1352
- if ty in _OBJ_MARSHALERS:
1353
- raise KeyError(ty)
1354
- _OBJ_MARSHALERS[ty] = m
1355
-
1356
-
1357
- def _make_obj_marshaler(ty: ta.Any) -> ObjMarshaler:
1378
+ def _make_obj_marshaler(
1379
+ ty: ta.Any,
1380
+ rec: ta.Callable[[ta.Any], ObjMarshaler],
1381
+ *,
1382
+ nonstrict_dataclasses: bool = False,
1383
+ ) -> ObjMarshaler:
1358
1384
  if isinstance(ty, type):
1359
1385
  if abc.ABC in ty.__bases__:
1360
1386
  impls = [ # type: ignore
1361
1387
  PolymorphicObjMarshaler.Impl(
1362
1388
  ity,
1363
1389
  ity.__qualname__,
1364
- get_obj_marshaler(ity),
1390
+ rec(ity),
1365
1391
  )
1366
1392
  for ity in deep_subclasses(ty)
1367
1393
  if abc.ABC not in ity.__bases__
@@ -1377,7 +1403,8 @@ def _make_obj_marshaler(ty: ta.Any) -> ObjMarshaler:
1377
1403
  if dc.is_dataclass(ty):
1378
1404
  return DataclassObjMarshaler(
1379
1405
  ty,
1380
- {f.name: get_obj_marshaler(f.type) for f in dc.fields(ty)},
1406
+ {f.name: rec(f.type) for f in dc.fields(ty)},
1407
+ nonstrict=nonstrict_dataclasses,
1381
1408
  )
1382
1409
 
1383
1410
  if is_generic_alias(ty):
@@ -1387,7 +1414,7 @@ def _make_obj_marshaler(ty: ta.Any) -> ObjMarshaler:
1387
1414
  pass
1388
1415
  else:
1389
1416
  k, v = ta.get_args(ty)
1390
- return MappingObjMarshaler(mt, get_obj_marshaler(k), get_obj_marshaler(v))
1417
+ return MappingObjMarshaler(mt, rec(k), rec(v))
1391
1418
 
1392
1419
  try:
1393
1420
  st = _OBJ_MARSHALER_GENERIC_ITERABLE_TYPES[ta.get_origin(ty)]
@@ -1395,33 +1422,71 @@ def _make_obj_marshaler(ty: ta.Any) -> ObjMarshaler:
1395
1422
  pass
1396
1423
  else:
1397
1424
  [e] = ta.get_args(ty)
1398
- return IterableObjMarshaler(st, get_obj_marshaler(e))
1425
+ return IterableObjMarshaler(st, rec(e))
1399
1426
 
1400
1427
  if is_union_alias(ty):
1401
- return OptionalObjMarshaler(get_obj_marshaler(get_optional_alias_arg(ty)))
1428
+ return OptionalObjMarshaler(rec(get_optional_alias_arg(ty)))
1402
1429
 
1403
1430
  raise TypeError(ty)
1404
1431
 
1405
1432
 
1406
- def get_obj_marshaler(ty: ta.Any) -> ObjMarshaler:
1407
- try:
1408
- return _OBJ_MARSHALERS[ty]
1409
- except KeyError:
1410
- pass
1433
+ ##
1411
1434
 
1412
- p = ProxyObjMarshaler()
1413
- _OBJ_MARSHALERS[ty] = p
1414
- try:
1415
- m = _make_obj_marshaler(ty)
1416
- except Exception:
1417
- del _OBJ_MARSHALERS[ty]
1418
- raise
1419
- else:
1420
- p.m = m
1435
+
1436
+ _OBJ_MARSHALERS_LOCK = threading.RLock()
1437
+
1438
+ _OBJ_MARSHALERS: ta.Dict[ta.Any, ObjMarshaler] = dict(_DEFAULT_OBJ_MARSHALERS)
1439
+
1440
+ _OBJ_MARSHALER_PROXIES: ta.Dict[ta.Any, ProxyObjMarshaler] = {}
1441
+
1442
+
1443
+ def register_opj_marshaler(ty: ta.Any, m: ObjMarshaler) -> None:
1444
+ with _OBJ_MARSHALERS_LOCK:
1445
+ if ty in _OBJ_MARSHALERS:
1446
+ raise KeyError(ty)
1421
1447
  _OBJ_MARSHALERS[ty] = m
1448
+
1449
+
1450
+ def get_obj_marshaler(
1451
+ ty: ta.Any,
1452
+ *,
1453
+ no_cache: bool = False,
1454
+ **kwargs: ta.Any,
1455
+ ) -> ObjMarshaler:
1456
+ with _OBJ_MARSHALERS_LOCK:
1457
+ if not no_cache:
1458
+ try:
1459
+ return _OBJ_MARSHALERS[ty]
1460
+ except KeyError:
1461
+ pass
1462
+
1463
+ try:
1464
+ return _OBJ_MARSHALER_PROXIES[ty]
1465
+ except KeyError:
1466
+ pass
1467
+
1468
+ rec = functools.partial(
1469
+ get_obj_marshaler,
1470
+ no_cache=no_cache,
1471
+ **kwargs,
1472
+ )
1473
+
1474
+ p = ProxyObjMarshaler()
1475
+ _OBJ_MARSHALER_PROXIES[ty] = p
1476
+ try:
1477
+ m = _make_obj_marshaler(ty, rec, **kwargs)
1478
+ finally:
1479
+ del _OBJ_MARSHALER_PROXIES[ty]
1480
+ p.m = m
1481
+
1482
+ if not no_cache:
1483
+ _OBJ_MARSHALERS[ty] = m
1422
1484
  return m
1423
1485
 
1424
1486
 
1487
+ ##
1488
+
1489
+
1425
1490
  def marshal_obj(o: ta.Any, ty: ta.Any = None) -> ta.Any:
1426
1491
  return get_obj_marshaler(ty if ty is not None else type(o)).marshal(o)
1427
1492
 
@@ -1448,6 +1513,52 @@ def check_runtime_version() -> None:
1448
1513
  f'Requires python {REQUIRED_PYTHON_VERSION}, got {sys.version_info} from {sys.executable}') # noqa
1449
1514
 
1450
1515
 
1516
+ ########################################
1517
+ # ../cursor.py
1518
+
1519
+
1520
+ class JournalctlToAwsCursor:
1521
+ def __init__(
1522
+ self,
1523
+ cursor_file: ta.Optional[str] = None,
1524
+ *,
1525
+ ensure_locked: ta.Optional[ta.Callable[[], None]] = None,
1526
+ ) -> None:
1527
+ super().__init__()
1528
+ self._cursor_file = cursor_file
1529
+ self._ensure_locked = ensure_locked
1530
+
1531
+ #
1532
+
1533
+ def get(self) -> ta.Optional[str]:
1534
+ if self._ensure_locked is not None:
1535
+ self._ensure_locked()
1536
+
1537
+ if not (cf := self._cursor_file):
1538
+ return None
1539
+ cf = os.path.expanduser(cf)
1540
+
1541
+ try:
1542
+ with open(cf) as f:
1543
+ return f.read().strip()
1544
+ except FileNotFoundError:
1545
+ return None
1546
+
1547
+ def set(self, cursor: str) -> None:
1548
+ if self._ensure_locked is not None:
1549
+ self._ensure_locked()
1550
+
1551
+ if not (cf := self._cursor_file):
1552
+ return
1553
+ cf = os.path.expanduser(cf)
1554
+
1555
+ log.info('Writing cursor file %s : %s', cf, cursor)
1556
+ with open(ncf := cf + '.next', 'w') as f:
1557
+ f.write(cursor)
1558
+
1559
+ os.rename(ncf, cf)
1560
+
1561
+
1451
1562
  ########################################
1452
1563
  # ../../logs.py
1453
1564
  """
@@ -1502,7 +1613,7 @@ class AwsPutLogEventsResponse(AwsDataclass):
1502
1613
  ##
1503
1614
 
1504
1615
 
1505
- class AwsLogMessagePoster:
1616
+ class AwsLogMessageBuilder:
1506
1617
  """
1507
1618
  TODO:
1508
1619
  - max_items
@@ -1528,7 +1639,7 @@ class AwsLogMessagePoster:
1528
1639
  log_group_name: str,
1529
1640
  log_stream_name: str,
1530
1641
  region_name: str,
1531
- credentials: AwsSigner.Credentials,
1642
+ credentials: ta.Optional[AwsSigner.Credentials],
1532
1643
 
1533
1644
  url: ta.Optional[str] = None,
1534
1645
  service_name: str = DEFAULT_SERVICE_NAME,
@@ -1550,11 +1661,16 @@ class AwsLogMessagePoster:
1550
1661
  headers = {**headers, **extra_headers}
1551
1662
  self._headers = {k: [v] for k, v in headers.items()}
1552
1663
 
1553
- self._signer = V4AwsSigner(
1554
- credentials,
1555
- region_name,
1556
- service_name,
1557
- )
1664
+ signer: ta.Optional[V4AwsSigner]
1665
+ if credentials is not None:
1666
+ signer = V4AwsSigner(
1667
+ credentials,
1668
+ region_name,
1669
+ service_name,
1670
+ )
1671
+ else:
1672
+ signer = None
1673
+ self._signer = signer
1558
1674
 
1559
1675
  #
1560
1676
 
@@ -1598,13 +1714,14 @@ class AwsLogMessagePoster:
1598
1714
  payload=body,
1599
1715
  )
1600
1716
 
1601
- sig_headers = self._signer.sign(
1602
- sig_req,
1603
- sign_payload=False,
1604
- )
1605
- sig_req = dc.replace(sig_req, headers={**sig_req.headers, **sig_headers})
1717
+ if (signer := self._signer) is not None:
1718
+ sig_headers = signer.sign(
1719
+ sig_req,
1720
+ sign_payload=False,
1721
+ )
1722
+ sig_req = dc.replace(sig_req, headers={**sig_req.headers, **sig_headers})
1606
1723
 
1607
- post = AwsLogMessagePoster.Post(
1724
+ post = AwsLogMessageBuilder.Post(
1608
1725
  url=self._url,
1609
1726
  headers={k: check_single(v) for k, v in sig_req.headers.items()},
1610
1727
  data=sig_req.payload,
@@ -1687,15 +1804,20 @@ class JournalctlMessageBuilder:
1687
1804
 
1688
1805
 
1689
1806
  ########################################
1690
- # ../../../../threadworker.py
1807
+ # ../../../../threadworkers.py
1691
1808
  """
1692
1809
  TODO:
1693
1810
  - implement stop lol
1694
1811
  - collective heartbeat monitoring - ThreadWorkerGroups
1812
+ - group -> 'context'? :|
1813
+ - shared stop_event?
1695
1814
  """
1696
1815
 
1697
1816
 
1698
- class ThreadWorker(abc.ABC):
1817
+ ##
1818
+
1819
+
1820
+ class ThreadWorker(ExitStacked, abc.ABC):
1699
1821
  def __init__(
1700
1822
  self,
1701
1823
  *,
@@ -1707,46 +1829,107 @@ class ThreadWorker(abc.ABC):
1707
1829
  stop_event = threading.Event()
1708
1830
  self._stop_event = stop_event
1709
1831
 
1832
+ self._lock = threading.RLock()
1710
1833
  self._thread: ta.Optional[threading.Thread] = None
1711
-
1712
1834
  self._last_heartbeat: ta.Optional[float] = None
1713
1835
 
1714
1836
  #
1715
1837
 
1838
+ def __enter__(self: ThreadWorkerT) -> ThreadWorkerT:
1839
+ with self._lock:
1840
+ return super().__enter__() # noqa
1841
+
1842
+ #
1843
+
1716
1844
  def should_stop(self) -> bool:
1717
1845
  return self._stop_event.is_set()
1718
1846
 
1847
+ class Stopping(Exception): # noqa
1848
+ pass
1849
+
1719
1850
  #
1720
1851
 
1721
1852
  @property
1722
1853
  def last_heartbeat(self) -> ta.Optional[float]:
1723
1854
  return self._last_heartbeat
1724
1855
 
1725
- def _heartbeat(self) -> bool:
1856
+ def _heartbeat(
1857
+ self,
1858
+ *,
1859
+ no_stop_check: bool = False,
1860
+ ) -> None:
1726
1861
  self._last_heartbeat = time.time()
1727
1862
 
1728
- if self.should_stop():
1863
+ if not no_stop_check and self.should_stop():
1729
1864
  log.info('Stopping: %s', self)
1730
- return False
1731
-
1732
- return True
1865
+ raise ThreadWorker.Stopping
1733
1866
 
1734
1867
  #
1735
1868
 
1869
+ def has_started(self) -> bool:
1870
+ return self._thread is not None
1871
+
1736
1872
  def is_alive(self) -> bool:
1737
1873
  return (thr := self._thread) is not None and thr.is_alive()
1738
1874
 
1739
1875
  def start(self) -> None:
1740
- thr = threading.Thread(target=self._run)
1741
- self._thread = thr
1742
- thr.start()
1876
+ with self._lock:
1877
+ if self._thread is not None:
1878
+ raise RuntimeError('Thread already started: %r', self)
1879
+
1880
+ thr = threading.Thread(target=self.__run)
1881
+ self._thread = thr
1882
+ thr.start()
1883
+
1884
+ #
1885
+
1886
+ def __run(self) -> None:
1887
+ try:
1888
+ self._run()
1889
+ except ThreadWorker.Stopping:
1890
+ log.exception('Thread worker stopped: %r', self)
1891
+ except Exception: # noqa
1892
+ log.exception('Error in worker thread: %r', self)
1893
+ raise
1743
1894
 
1744
1895
  @abc.abstractmethod
1745
1896
  def _run(self) -> None:
1746
1897
  raise NotImplementedError
1747
1898
 
1899
+ #
1900
+
1748
1901
  def stop(self) -> None:
1749
- raise NotImplementedError
1902
+ self._stop_event.set()
1903
+
1904
+ def join(self, timeout: ta.Optional[float] = None) -> None:
1905
+ with self._lock:
1906
+ if self._thread is None:
1907
+ raise RuntimeError('Thread not started: %r', self)
1908
+ self._thread.join(timeout)
1909
+
1910
+
1911
+ ##
1912
+
1913
+
1914
+ class ThreadWorkerGroup:
1915
+ @dc.dataclass()
1916
+ class State:
1917
+ worker: ThreadWorker
1918
+
1919
+ def __init__(self) -> None:
1920
+ super().__init__()
1921
+
1922
+ self._lock = threading.RLock()
1923
+ self._states: ta.Dict[ThreadWorker, ThreadWorkerGroup.State] = {}
1924
+
1925
+ def add(self, *workers: ThreadWorker) -> 'ThreadWorkerGroup':
1926
+ with self._lock:
1927
+ for w in workers:
1928
+ if w in self._states:
1929
+ raise KeyError(w)
1930
+ self._states[w] = ThreadWorkerGroup.State(w)
1931
+
1932
+ return self
1750
1933
 
1751
1934
 
1752
1935
  ########################################
@@ -1855,6 +2038,103 @@ def subprocess_try_output_str(*args: str, **kwargs: ta.Any) -> ta.Optional[str]:
1855
2038
  return out.decode().strip() if out is not None else None
1856
2039
 
1857
2040
 
2041
+ ##
2042
+
2043
+
2044
+ def subprocess_close(
2045
+ proc: subprocess.Popen,
2046
+ timeout: ta.Optional[float] = None,
2047
+ ) -> None:
2048
+ # TODO: terminate, sleep, kill
2049
+ if proc.stdout:
2050
+ proc.stdout.close()
2051
+ if proc.stderr:
2052
+ proc.stderr.close()
2053
+ if proc.stdin:
2054
+ proc.stdin.close()
2055
+
2056
+ proc.wait(timeout)
2057
+
2058
+
2059
+ ########################################
2060
+ # ../poster.py
2061
+ """
2062
+ TODO:
2063
+ - retries
2064
+ """
2065
+
2066
+
2067
+ class JournalctlToAwsPosterWorker(ThreadWorker):
2068
+ def __init__(
2069
+ self,
2070
+ queue, # type: queue.Queue[ta.Sequence[JournalctlMessage]] # noqa
2071
+ builder: AwsLogMessageBuilder,
2072
+ cursor: JournalctlToAwsCursor,
2073
+ *,
2074
+ ensure_locked: ta.Optional[ta.Callable[[], None]] = None,
2075
+ dry_run: bool = False,
2076
+ queue_timeout_s: float = 1.,
2077
+ **kwargs: ta.Any,
2078
+ ) -> None:
2079
+ super().__init__(**kwargs)
2080
+ self._queue = queue
2081
+ self._builder = builder
2082
+ self._cursor = cursor
2083
+ self._ensure_locked = ensure_locked
2084
+ self._dry_run = dry_run
2085
+ self._queue_timeout_s = queue_timeout_s
2086
+ #
2087
+
2088
+ def _run(self) -> None:
2089
+ if self._ensure_locked is not None:
2090
+ self._ensure_locked()
2091
+
2092
+ last_cursor: ta.Optional[str] = None # noqa
2093
+ while True:
2094
+ self._heartbeat()
2095
+
2096
+ try:
2097
+ msgs: ta.Sequence[JournalctlMessage] = self._queue.get(timeout=self._queue_timeout_s)
2098
+ except queue.Empty:
2099
+ msgs = []
2100
+
2101
+ if not msgs:
2102
+ log.debug('Empty queue chunk')
2103
+ continue
2104
+
2105
+ log.debug('%r', msgs)
2106
+
2107
+ cur_cursor: ta.Optional[str] = None
2108
+ for m in reversed(msgs):
2109
+ if m.cursor is not None:
2110
+ cur_cursor = m.cursor
2111
+ break
2112
+
2113
+ feed_msgs = []
2114
+ for m in msgs:
2115
+ feed_msgs.append(AwsLogMessageBuilder.Message(
2116
+ message=json.dumps(m.dct, sort_keys=True),
2117
+ ts_ms=int((m.ts_us / 1000.) if m.ts_us is not None else (time.time() * 1000.)),
2118
+ ))
2119
+
2120
+ for post in self._builder.feed(feed_msgs):
2121
+ log.debug('%r', post)
2122
+
2123
+ if not self._dry_run:
2124
+ with urllib.request.urlopen(urllib.request.Request( # noqa
2125
+ post.url,
2126
+ method='POST',
2127
+ headers=dict(post.headers),
2128
+ data=post.data,
2129
+ )) as resp:
2130
+ response = AwsPutLogEventsResponse.from_aws(json.loads(resp.read().decode('utf-8')))
2131
+ log.debug('%r', response)
2132
+
2133
+ if cur_cursor is not None:
2134
+ self._cursor.set(cur_cursor)
2135
+ last_cursor = cur_cursor # noqa
2136
+
2137
+
1858
2138
  ########################################
1859
2139
  # ../../../../journald/tailer.py
1860
2140
  """
@@ -2227,7 +2507,7 @@ class JournalctlTailerWorker(ThreadWorker):
2227
2507
  self._read_size = read_size
2228
2508
  self._sleep_s = sleep_s
2229
2509
 
2230
- self._mb = JournalctlMessageBuilder()
2510
+ self._builder = JournalctlMessageBuilder()
2231
2511
 
2232
2512
  self._proc: ta.Optional[subprocess.Popen] = None
2233
2513
 
@@ -2251,69 +2531,103 @@ class JournalctlTailerWorker(ThreadWorker):
2251
2531
 
2252
2532
  return cmd
2253
2533
 
2534
+ def _read_loop(self, stdout: ta.IO) -> None:
2535
+ while stdout.readable():
2536
+ self._heartbeat()
2537
+
2538
+ buf = stdout.read(self._read_size)
2539
+ if not buf:
2540
+ log.debug('Journalctl empty read')
2541
+ break
2542
+
2543
+ log.debug('Journalctl read buffer: %r', buf)
2544
+ msgs = self._builder.feed(buf)
2545
+ if msgs:
2546
+ while True:
2547
+ try:
2548
+ self._output.put(msgs, timeout=1.)
2549
+ except queue.Full:
2550
+ self._heartbeat()
2551
+ else:
2552
+ break
2553
+
2254
2554
  def _run(self) -> None:
2255
2555
  with subprocess.Popen(
2256
2556
  self._full_cmd(),
2257
2557
  stdout=subprocess.PIPE,
2258
2558
  ) as self._proc:
2259
- stdout = check_not_none(self._proc.stdout)
2559
+ try:
2560
+ stdout = check_not_none(self._proc.stdout)
2260
2561
 
2261
- fd = stdout.fileno()
2262
- fl = fcntl.fcntl(fd, fcntl.F_GETFL)
2263
- fcntl.fcntl(fd, fcntl.F_SETFL, fl | os.O_NONBLOCK)
2562
+ fd = stdout.fileno()
2563
+ fl = fcntl.fcntl(fd, fcntl.F_GETFL)
2564
+ fcntl.fcntl(fd, fcntl.F_SETFL, fl | os.O_NONBLOCK)
2264
2565
 
2265
- while True:
2266
- if not self._heartbeat():
2267
- return
2566
+ while True:
2567
+ self._heartbeat()
2268
2568
 
2269
- while stdout.readable():
2270
- if not self._heartbeat():
2271
- return
2569
+ self._read_loop(stdout)
2272
2570
 
2273
- buf = stdout.read(self._read_size)
2274
- if not buf:
2275
- log.debug('Journalctl empty read')
2276
- break
2571
+ log.debug('Journalctl not readable')
2277
2572
 
2278
- log.debug('Journalctl read buffer: %r', buf)
2279
- msgs = self._mb.feed(buf)
2280
- if msgs:
2281
- while True:
2282
- try:
2283
- self._output.put(msgs, timeout=1.)
2284
- except queue.Full:
2285
- if not self._heartbeat():
2286
- return
2287
- else:
2288
- break
2573
+ if self._proc.poll() is not None:
2574
+ log.critical('Journalctl process terminated')
2575
+ return
2289
2576
 
2290
- if self._proc.poll() is not None:
2291
- log.critical('Journalctl process terminated')
2292
- return
2577
+ time.sleep(self._sleep_s)
2293
2578
 
2294
- log.debug('Journalctl readable')
2295
- time.sleep(self._sleep_s)
2579
+ finally:
2580
+ subprocess_close(self._proc)
2296
2581
 
2297
2582
 
2298
2583
  ########################################
2299
- # main.py
2584
+ # ../driver.py
2585
+ """
2586
+ TODO:
2587
+ - create log group
2588
+ - log stats - chunk sizes, byte count, num calls, etc
2589
+
2590
+ ==
2591
+
2592
+ https://www.freedesktop.org/software/systemd/man/latest/journalctl.html
2593
+
2594
+ journalctl:
2595
+ -o json
2596
+ --show-cursor
2597
+
2598
+ --since "2012-10-30 18:17:16"
2599
+ --until "2012-10-30 18:17:16"
2300
2600
 
2601
+ --after-cursor <cursor>
2602
+
2603
+ ==
2604
+
2605
+ https://www.freedesktop.org/software/systemd/man/latest/systemd.journal-fields.html
2606
+
2607
+ ==
2301
2608
 
2302
2609
  @dc.dataclass(frozen=True)
2303
- class JournalctlOpts:
2304
- after_cursor: ta.Optional[str] = None
2610
+ class Journald2AwsConfig:
2611
+ log_group_name: str
2612
+ log_stream_name: str
2613
+
2614
+ aws_batch_size: int = 1_000
2615
+ aws_flush_interval_s: float = 1.
2616
+ """
2305
2617
 
2306
- since: ta.Optional[str] = None
2307
- until: ta.Optional[str] = None
2618
+
2619
+ ##
2308
2620
 
2309
2621
 
2310
- class JournalctlToAws:
2622
+ class JournalctlToAwsDriver(ExitStacked):
2311
2623
  @dc.dataclass(frozen=True)
2312
2624
  class Config:
2313
2625
  pid_file: ta.Optional[str] = None
2314
2626
 
2315
2627
  cursor_file: ta.Optional[str] = None
2316
2628
 
2629
+ runtime_limit: ta.Optional[float] = None
2630
+
2317
2631
  #
2318
2632
 
2319
2633
  aws_log_group_name: str = 'omlish'
@@ -2324,6 +2638,8 @@ class JournalctlToAws:
2324
2638
 
2325
2639
  aws_region_name: str = 'us-west-1'
2326
2640
 
2641
+ aws_dry_run: bool = False
2642
+
2327
2643
  #
2328
2644
 
2329
2645
  journalctl_cmd: ta.Optional[ta.Sequence[str]] = None
@@ -2331,24 +2647,10 @@ class JournalctlToAws:
2331
2647
  journalctl_after_cursor: ta.Optional[str] = None
2332
2648
  journalctl_since: ta.Optional[str] = None
2333
2649
 
2334
- #
2335
-
2336
- dry_run: bool = False
2337
-
2338
2650
  def __init__(self, config: Config) -> None:
2339
2651
  super().__init__()
2340
- self._config = config
2341
2652
 
2342
- #
2343
-
2344
- _es: contextlib.ExitStack
2345
-
2346
- def __enter__(self) -> 'JournalctlToAws':
2347
- self._es = contextlib.ExitStack().__enter__()
2348
- return self
2349
-
2350
- def __exit__(self, exc_type, exc_val, exc_tb):
2351
- return self._es.__exit__(exc_type, exc_val, exc_tb)
2653
+ self._config = config
2352
2654
 
2353
2655
  #
2354
2656
 
@@ -2361,7 +2663,7 @@ class JournalctlToAws:
2361
2663
 
2362
2664
  log.info('Opening pidfile %s', pfp)
2363
2665
 
2364
- pf = self._es.enter_context(Pidfile(pfp))
2666
+ pf = self._enter_context(Pidfile(pfp))
2365
2667
  pf.write()
2366
2668
  return pf
2367
2669
 
@@ -2371,48 +2673,32 @@ class JournalctlToAws:
2371
2673
 
2372
2674
  #
2373
2675
 
2374
- def _read_cursor_file(self) -> ta.Optional[str]:
2375
- self._ensure_locked()
2376
-
2377
- if not (cf := self._config.cursor_file):
2378
- return None
2379
- cf = os.path.expanduser(cf)
2380
-
2381
- try:
2382
- with open(cf) as f:
2383
- return f.read().strip()
2384
- except FileNotFoundError:
2385
- return None
2386
-
2387
- def _write_cursor_file(self, cursor: str) -> None:
2388
- self._ensure_locked()
2389
-
2390
- if not (cf := self._config.cursor_file):
2391
- return
2392
- cf = os.path.expanduser(cf)
2393
-
2394
- log.info('Writing cursor file %s : %s', cf, cursor)
2395
- with open(ncf := cf + '.next', 'w') as f:
2396
- f.write(cursor)
2397
-
2398
- os.rename(ncf, cf)
2676
+ @cached_nullary
2677
+ def _cursor(self) -> JournalctlToAwsCursor:
2678
+ return JournalctlToAwsCursor(
2679
+ self._config.cursor_file,
2680
+ ensure_locked=self._ensure_locked,
2681
+ )
2399
2682
 
2400
2683
  #
2401
2684
 
2402
2685
  @cached_nullary
2403
- def _aws_credentials(self) -> AwsSigner.Credentials:
2686
+ def _aws_credentials(self) -> ta.Optional[AwsSigner.Credentials]:
2687
+ if self._config.aws_access_key_id is None and self._config.aws_secret_access_key is None:
2688
+ return None
2689
+
2404
2690
  return AwsSigner.Credentials(
2405
2691
  access_key_id=check_non_empty_str(self._config.aws_access_key_id),
2406
2692
  secret_access_key=check_non_empty_str(self._config.aws_secret_access_key),
2407
2693
  )
2408
2694
 
2409
2695
  @cached_nullary
2410
- def _aws_log_message_poster(self) -> AwsLogMessagePoster:
2411
- return AwsLogMessagePoster(
2696
+ def _aws_log_message_builder(self) -> AwsLogMessageBuilder:
2697
+ return AwsLogMessageBuilder(
2412
2698
  log_group_name=self._config.aws_log_group_name,
2413
2699
  log_stream_name=check_non_empty_str(self._config.aws_log_stream_name),
2414
2700
  region_name=self._config.aws_region_name,
2415
- credentials=check_not_none(self._aws_credentials()),
2701
+ credentials=self._aws_credentials(),
2416
2702
  )
2417
2703
 
2418
2704
  #
@@ -2431,7 +2717,7 @@ class JournalctlToAws:
2431
2717
  else:
2432
2718
  ac = self._config.journalctl_after_cursor
2433
2719
  if ac is None:
2434
- ac = self._read_cursor_file()
2720
+ ac = self._cursor().get()
2435
2721
  if ac is not None:
2436
2722
  log.info('Starting from cursor %s', ac)
2437
2723
 
@@ -2447,63 +2733,49 @@ class JournalctlToAws:
2447
2733
 
2448
2734
  #
2449
2735
 
2450
- def run(self) -> None:
2451
- self._ensure_locked()
2736
+ @cached_nullary
2737
+ def _aws_poster_worker(self) -> JournalctlToAwsPosterWorker:
2738
+ return JournalctlToAwsPosterWorker(
2739
+ self._journalctl_message_queue(),
2740
+ self._aws_log_message_builder(),
2741
+ self._cursor(),
2452
2742
 
2453
- q = self._journalctl_message_queue() # type: queue.Queue[ta.Sequence[JournalctlMessage]]
2454
- jtw = self._journalctl_tailer_worker() # type: JournalctlTailerWorker
2455
- mp = self._aws_log_message_poster() # type: AwsLogMessagePoster
2743
+ ensure_locked=self._ensure_locked,
2744
+ dry_run=self._config.aws_dry_run,
2745
+ )
2456
2746
 
2457
- jtw.start()
2747
+ #
2458
2748
 
2459
- last_cursor: ta.Optional[str] = None # noqa
2460
- while True:
2461
- if not jtw.is_alive():
2462
- log.critical('Journalctl tailer worker died')
2463
- break
2749
+ def run(self) -> None:
2750
+ pw: JournalctlToAwsPosterWorker = self._aws_poster_worker()
2751
+ tw: JournalctlTailerWorker = self._journalctl_tailer_worker()
2464
2752
 
2465
- try:
2466
- msgs: ta.Sequence[JournalctlMessage] = q.get(timeout=1.)
2467
- except queue.Empty:
2468
- msgs = []
2469
- if not msgs:
2470
- continue
2753
+ ws = [pw, tw]
2471
2754
 
2472
- log.debug('%r', msgs)
2755
+ for w in ws:
2756
+ w.start()
2473
2757
 
2474
- cur_cursor: ta.Optional[str] = None
2475
- for m in reversed(msgs):
2476
- if m.cursor is not None:
2477
- cur_cursor = m.cursor
2758
+ start = time.time()
2759
+
2760
+ while True:
2761
+ for w in ws:
2762
+ if not w.is_alive():
2763
+ log.critical('Worker died: %r', w)
2478
2764
  break
2479
2765
 
2480
- if not msgs:
2481
- log.warning('Empty queue chunk')
2482
- continue
2766
+ if (rl := self._config.runtime_limit) is not None and time.time() - start >= rl:
2767
+ log.warning('Runtime limit reached')
2768
+ break
2483
2769
 
2484
- feed_msgs = []
2485
- for m in msgs:
2486
- feed_msgs.append(mp.Message(
2487
- message=json.dumps(m.dct, sort_keys=True),
2488
- ts_ms=int((m.ts_us / 1000.) if m.ts_us is not None else (time.time() * 1000.)),
2489
- ))
2770
+ time.sleep(1.)
2490
2771
 
2491
- [post] = mp.feed(feed_msgs)
2492
- log.debug('%r', post)
2772
+ for w in reversed(ws):
2773
+ w.stop()
2774
+ w.join()
2493
2775
 
2494
- if not self._config.dry_run:
2495
- with urllib.request.urlopen(urllib.request.Request( # noqa
2496
- post.url,
2497
- method='POST',
2498
- headers=dict(post.headers),
2499
- data=post.data,
2500
- )) as resp:
2501
- response = AwsPutLogEventsResponse.from_aws(json.loads(resp.read().decode('utf-8')))
2502
- log.debug('%r', response)
2503
2776
 
2504
- if cur_cursor is not None:
2505
- self._write_cursor_file(cur_cursor)
2506
- last_cursor = cur_cursor # noqa
2777
+ ########################################
2778
+ # main.py
2507
2779
 
2508
2780
 
2509
2781
  def _main() -> None:
@@ -2518,6 +2790,8 @@ def _main() -> None:
2518
2790
 
2519
2791
  parser.add_argument('--message', nargs='?')
2520
2792
  parser.add_argument('--real', action='store_true')
2793
+ parser.add_argument('--num-messages', type=int)
2794
+ parser.add_argument('--runtime-limit', type=float)
2521
2795
 
2522
2796
  args = parser.parse_args()
2523
2797
 
@@ -2527,13 +2801,13 @@ def _main() -> None:
2527
2801
 
2528
2802
  #
2529
2803
 
2530
- config: JournalctlToAws.Config
2804
+ config: JournalctlToAwsDriver.Config
2531
2805
  if args.config_file:
2532
2806
  with open(os.path.expanduser(args.config_file)) as cf:
2533
2807
  config_dct = json.load(cf)
2534
- config = unmarshal_obj(config_dct, JournalctlToAws.Config)
2808
+ config = unmarshal_obj(config_dct, JournalctlToAwsDriver.Config)
2535
2809
  else:
2536
- config = JournalctlToAws.Config()
2810
+ config = JournalctlToAwsDriver.Config()
2537
2811
 
2538
2812
  #
2539
2813
 
@@ -2550,7 +2824,7 @@ def _main() -> None:
2550
2824
  '--sleep-n', '2',
2551
2825
  '--sleep-s', '.5',
2552
2826
  *(['--message', args.message] if args.message else []),
2553
- '100000',
2827
+ str(args.num_messages or 100_000),
2554
2828
  ])
2555
2829
 
2556
2830
  #
@@ -2558,14 +2832,14 @@ def _main() -> None:
2558
2832
  for ca, pa in [
2559
2833
  ('journalctl_after_cursor', 'after_cursor'),
2560
2834
  ('journalctl_since', 'since'),
2561
- ('dry_run', 'dry_run'),
2835
+ ('aws_dry_run', 'dry_run'),
2562
2836
  ]:
2563
2837
  if (av := getattr(args, pa)):
2564
2838
  config = dc.replace(config, **{ca: av})
2565
2839
 
2566
2840
  #
2567
2841
 
2568
- with JournalctlToAws(config) as jta:
2842
+ with JournalctlToAwsDriver(config) as jta:
2569
2843
  jta.run()
2570
2844
 
2571
2845