ominfra 0.0.0.dev102__py3-none-any.whl → 0.0.0.dev104__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -4,38 +4,6 @@
4
4
  # @omlish-script
5
5
  # @omlish-amalg-output ../clouds/aws/journald2aws/main.py
6
6
  # ruff: noqa: N802 UP006 UP007 UP036
7
- """
8
- TODO:
9
- - create log group
10
- - log stats - chunk sizes, byte count, num calls, etc
11
-
12
- ==
13
-
14
- https://www.freedesktop.org/software/systemd/man/latest/journalctl.html
15
-
16
- journalctl:
17
- -o json
18
- --show-cursor
19
-
20
- --since "2012-10-30 18:17:16"
21
- --until "2012-10-30 18:17:16"
22
-
23
- --after-cursor <cursor>
24
-
25
- ==
26
-
27
- https://www.freedesktop.org/software/systemd/man/latest/systemd.journal-fields.html
28
-
29
- ==
30
-
31
- @dc.dataclass(frozen=True)
32
- class Journald2AwsConfig:
33
- log_group_name: str
34
- log_stream_name: str
35
-
36
- aws_batch_size: int = 1_000
37
- aws_flush_interval_s: float = 1.
38
- """
39
7
  import abc
40
8
  import argparse
41
9
  import base64
@@ -81,15 +49,21 @@ if sys.version_info < (3, 8):
81
49
  ########################################
82
50
 
83
51
 
84
- # ../../../../../omlish/lite/check.py
52
+ # ../../../../../omlish/lite/cached.py
85
53
  T = ta.TypeVar('T')
86
54
 
55
+ # ../../../../../omlish/lite/contextmanagers.py
56
+ ExitStackedT = ta.TypeVar('ExitStackedT', bound='ExitStacked')
57
+
58
+ # ../../../../threadworkers.py
59
+ ThreadWorkerT = ta.TypeVar('ThreadWorkerT', bound='ThreadWorker')
60
+
87
61
 
88
62
  ########################################
89
63
  # ../../../../../omlish/lite/cached.py
90
64
 
91
65
 
92
- class cached_nullary: # noqa
66
+ class _cached_nullary: # noqa
93
67
  def __init__(self, fn):
94
68
  super().__init__()
95
69
  self._fn = fn
@@ -106,6 +80,10 @@ class cached_nullary: # noqa
106
80
  return bound
107
81
 
108
82
 
83
+ def cached_nullary(fn: ta.Callable[..., T]) -> ta.Callable[..., T]:
84
+ return _cached_nullary(fn)
85
+
86
+
109
87
  ########################################
110
88
  # ../../../../../omlish/lite/check.py
111
89
 
@@ -222,7 +200,7 @@ class Pidfile:
222
200
  return self
223
201
 
224
202
  def __exit__(self, exc_type, exc_val, exc_tb):
225
- if self._f is not None:
203
+ if hasattr(self, '_f'):
226
204
  self._f.close()
227
205
  del self._f
228
206
 
@@ -736,6 +714,52 @@ class AwsDataclassMeta:
736
714
  return AwsDataclassMeta.Converters(d2a, a2d)
737
715
 
738
716
 
717
+ ########################################
718
+ # ../../../../../omlish/lite/contextmanagers.py
719
+
720
+
721
+ ##
722
+
723
+
724
+ class ExitStacked:
725
+ _exit_stack: ta.Optional[contextlib.ExitStack] = None
726
+
727
+ def __enter__(self: ExitStackedT) -> ExitStackedT:
728
+ check_state(self._exit_stack is None)
729
+ es = self._exit_stack = contextlib.ExitStack()
730
+ es.__enter__()
731
+ return self
732
+
733
+ def __exit__(self, exc_type, exc_val, exc_tb):
734
+ if (es := self._exit_stack) is None:
735
+ return None
736
+ return es.__exit__(exc_type, exc_val, exc_tb)
737
+
738
+ def _enter_context(self, cm: ta.ContextManager[T]) -> T:
739
+ es = check_not_none(self._exit_stack)
740
+ return es.enter_context(cm)
741
+
742
+
743
+ ##
744
+
745
+
746
+ @contextlib.contextmanager
747
+ def attr_setting(obj, attr, val, *, default=None): # noqa
748
+ not_set = object()
749
+ orig = getattr(obj, attr, not_set)
750
+ try:
751
+ setattr(obj, attr, val)
752
+ if orig is not not_set:
753
+ yield orig
754
+ else:
755
+ yield default
756
+ finally:
757
+ if orig is not_set:
758
+ delattr(obj, attr)
759
+ else:
760
+ setattr(obj, attr, orig)
761
+
762
+
739
763
  ########################################
740
764
  # ../../../../../omlish/lite/io.py
741
765
 
@@ -1259,7 +1283,7 @@ class DataclassObjMarshaler(ObjMarshaler):
1259
1283
  return {k: m.marshal(getattr(o, k)) for k, m in self.fs.items()}
1260
1284
 
1261
1285
  def unmarshal(self, o: ta.Any) -> ta.Any:
1262
- return self.ty(**{k: self.fs[k].unmarshal(v) for k, v in o.items() if self.nonstrict or k in self.fs})
1286
+ return self.ty(**{k: self.fs[k].unmarshal(v) for k, v in o.items() if not self.nonstrict or k in self.fs})
1263
1287
 
1264
1288
 
1265
1289
  @dc.dataclass(frozen=True)
@@ -1319,7 +1343,10 @@ class UuidObjMarshaler(ObjMarshaler):
1319
1343
  return uuid.UUID(o)
1320
1344
 
1321
1345
 
1322
- _OBJ_MARSHALERS: ta.Dict[ta.Any, ObjMarshaler] = {
1346
+ ##
1347
+
1348
+
1349
+ _DEFAULT_OBJ_MARSHALERS: ta.Dict[ta.Any, ObjMarshaler] = {
1323
1350
  **{t: NopObjMarshaler() for t in (type(None),)},
1324
1351
  **{t: CastObjMarshaler(t) for t in (int, float, str, bool)},
1325
1352
  **{t: Base64ObjMarshaler(t) for t in (bytes, bytearray)},
@@ -1348,20 +1375,19 @@ _OBJ_MARSHALER_GENERIC_ITERABLE_TYPES: ta.Dict[ta.Any, type] = {
1348
1375
  }
1349
1376
 
1350
1377
 
1351
- def register_opj_marshaler(ty: ta.Any, m: ObjMarshaler) -> None:
1352
- if ty in _OBJ_MARSHALERS:
1353
- raise KeyError(ty)
1354
- _OBJ_MARSHALERS[ty] = m
1355
-
1356
-
1357
- def _make_obj_marshaler(ty: ta.Any) -> ObjMarshaler:
1378
+ def _make_obj_marshaler(
1379
+ ty: ta.Any,
1380
+ rec: ta.Callable[[ta.Any], ObjMarshaler],
1381
+ *,
1382
+ nonstrict_dataclasses: bool = False,
1383
+ ) -> ObjMarshaler:
1358
1384
  if isinstance(ty, type):
1359
1385
  if abc.ABC in ty.__bases__:
1360
1386
  impls = [ # type: ignore
1361
1387
  PolymorphicObjMarshaler.Impl(
1362
1388
  ity,
1363
1389
  ity.__qualname__,
1364
- get_obj_marshaler(ity),
1390
+ rec(ity),
1365
1391
  )
1366
1392
  for ity in deep_subclasses(ty)
1367
1393
  if abc.ABC not in ity.__bases__
@@ -1377,7 +1403,8 @@ def _make_obj_marshaler(ty: ta.Any) -> ObjMarshaler:
1377
1403
  if dc.is_dataclass(ty):
1378
1404
  return DataclassObjMarshaler(
1379
1405
  ty,
1380
- {f.name: get_obj_marshaler(f.type) for f in dc.fields(ty)},
1406
+ {f.name: rec(f.type) for f in dc.fields(ty)},
1407
+ nonstrict=nonstrict_dataclasses,
1381
1408
  )
1382
1409
 
1383
1410
  if is_generic_alias(ty):
@@ -1387,7 +1414,7 @@ def _make_obj_marshaler(ty: ta.Any) -> ObjMarshaler:
1387
1414
  pass
1388
1415
  else:
1389
1416
  k, v = ta.get_args(ty)
1390
- return MappingObjMarshaler(mt, get_obj_marshaler(k), get_obj_marshaler(v))
1417
+ return MappingObjMarshaler(mt, rec(k), rec(v))
1391
1418
 
1392
1419
  try:
1393
1420
  st = _OBJ_MARSHALER_GENERIC_ITERABLE_TYPES[ta.get_origin(ty)]
@@ -1395,33 +1422,71 @@ def _make_obj_marshaler(ty: ta.Any) -> ObjMarshaler:
1395
1422
  pass
1396
1423
  else:
1397
1424
  [e] = ta.get_args(ty)
1398
- return IterableObjMarshaler(st, get_obj_marshaler(e))
1425
+ return IterableObjMarshaler(st, rec(e))
1399
1426
 
1400
1427
  if is_union_alias(ty):
1401
- return OptionalObjMarshaler(get_obj_marshaler(get_optional_alias_arg(ty)))
1428
+ return OptionalObjMarshaler(rec(get_optional_alias_arg(ty)))
1402
1429
 
1403
1430
  raise TypeError(ty)
1404
1431
 
1405
1432
 
1406
- def get_obj_marshaler(ty: ta.Any) -> ObjMarshaler:
1407
- try:
1408
- return _OBJ_MARSHALERS[ty]
1409
- except KeyError:
1410
- pass
1433
+ ##
1411
1434
 
1412
- p = ProxyObjMarshaler()
1413
- _OBJ_MARSHALERS[ty] = p
1414
- try:
1415
- m = _make_obj_marshaler(ty)
1416
- except Exception:
1417
- del _OBJ_MARSHALERS[ty]
1418
- raise
1419
- else:
1420
- p.m = m
1435
+
1436
+ _OBJ_MARSHALERS_LOCK = threading.RLock()
1437
+
1438
+ _OBJ_MARSHALERS: ta.Dict[ta.Any, ObjMarshaler] = dict(_DEFAULT_OBJ_MARSHALERS)
1439
+
1440
+ _OBJ_MARSHALER_PROXIES: ta.Dict[ta.Any, ProxyObjMarshaler] = {}
1441
+
1442
+
1443
+ def register_opj_marshaler(ty: ta.Any, m: ObjMarshaler) -> None:
1444
+ with _OBJ_MARSHALERS_LOCK:
1445
+ if ty in _OBJ_MARSHALERS:
1446
+ raise KeyError(ty)
1421
1447
  _OBJ_MARSHALERS[ty] = m
1448
+
1449
+
1450
+ def get_obj_marshaler(
1451
+ ty: ta.Any,
1452
+ *,
1453
+ no_cache: bool = False,
1454
+ **kwargs: ta.Any,
1455
+ ) -> ObjMarshaler:
1456
+ with _OBJ_MARSHALERS_LOCK:
1457
+ if not no_cache:
1458
+ try:
1459
+ return _OBJ_MARSHALERS[ty]
1460
+ except KeyError:
1461
+ pass
1462
+
1463
+ try:
1464
+ return _OBJ_MARSHALER_PROXIES[ty]
1465
+ except KeyError:
1466
+ pass
1467
+
1468
+ rec = functools.partial(
1469
+ get_obj_marshaler,
1470
+ no_cache=no_cache,
1471
+ **kwargs,
1472
+ )
1473
+
1474
+ p = ProxyObjMarshaler()
1475
+ _OBJ_MARSHALER_PROXIES[ty] = p
1476
+ try:
1477
+ m = _make_obj_marshaler(ty, rec, **kwargs)
1478
+ finally:
1479
+ del _OBJ_MARSHALER_PROXIES[ty]
1480
+ p.m = m
1481
+
1482
+ if not no_cache:
1483
+ _OBJ_MARSHALERS[ty] = m
1422
1484
  return m
1423
1485
 
1424
1486
 
1487
+ ##
1488
+
1489
+
1425
1490
  def marshal_obj(o: ta.Any, ty: ta.Any = None) -> ta.Any:
1426
1491
  return get_obj_marshaler(ty if ty is not None else type(o)).marshal(o)
1427
1492
 
@@ -1448,6 +1513,52 @@ def check_runtime_version() -> None:
1448
1513
  f'Requires python {REQUIRED_PYTHON_VERSION}, got {sys.version_info} from {sys.executable}') # noqa
1449
1514
 
1450
1515
 
1516
+ ########################################
1517
+ # ../cursor.py
1518
+
1519
+
1520
+ class JournalctlToAwsCursor:
1521
+ def __init__(
1522
+ self,
1523
+ cursor_file: ta.Optional[str] = None,
1524
+ *,
1525
+ ensure_locked: ta.Optional[ta.Callable[[], None]] = None,
1526
+ ) -> None:
1527
+ super().__init__()
1528
+ self._cursor_file = cursor_file
1529
+ self._ensure_locked = ensure_locked
1530
+
1531
+ #
1532
+
1533
+ def get(self) -> ta.Optional[str]:
1534
+ if self._ensure_locked is not None:
1535
+ self._ensure_locked()
1536
+
1537
+ if not (cf := self._cursor_file):
1538
+ return None
1539
+ cf = os.path.expanduser(cf)
1540
+
1541
+ try:
1542
+ with open(cf) as f:
1543
+ return f.read().strip()
1544
+ except FileNotFoundError:
1545
+ return None
1546
+
1547
+ def set(self, cursor: str) -> None:
1548
+ if self._ensure_locked is not None:
1549
+ self._ensure_locked()
1550
+
1551
+ if not (cf := self._cursor_file):
1552
+ return
1553
+ cf = os.path.expanduser(cf)
1554
+
1555
+ log.info('Writing cursor file %s : %s', cf, cursor)
1556
+ with open(ncf := cf + '.next', 'w') as f:
1557
+ f.write(cursor)
1558
+
1559
+ os.rename(ncf, cf)
1560
+
1561
+
1451
1562
  ########################################
1452
1563
  # ../../logs.py
1453
1564
  """
@@ -1502,7 +1613,7 @@ class AwsPutLogEventsResponse(AwsDataclass):
1502
1613
  ##
1503
1614
 
1504
1615
 
1505
- class AwsLogMessagePoster:
1616
+ class AwsLogMessageBuilder:
1506
1617
  """
1507
1618
  TODO:
1508
1619
  - max_items
@@ -1528,7 +1639,7 @@ class AwsLogMessagePoster:
1528
1639
  log_group_name: str,
1529
1640
  log_stream_name: str,
1530
1641
  region_name: str,
1531
- credentials: AwsSigner.Credentials,
1642
+ credentials: ta.Optional[AwsSigner.Credentials],
1532
1643
 
1533
1644
  url: ta.Optional[str] = None,
1534
1645
  service_name: str = DEFAULT_SERVICE_NAME,
@@ -1550,11 +1661,16 @@ class AwsLogMessagePoster:
1550
1661
  headers = {**headers, **extra_headers}
1551
1662
  self._headers = {k: [v] for k, v in headers.items()}
1552
1663
 
1553
- self._signer = V4AwsSigner(
1554
- credentials,
1555
- region_name,
1556
- service_name,
1557
- )
1664
+ signer: ta.Optional[V4AwsSigner]
1665
+ if credentials is not None:
1666
+ signer = V4AwsSigner(
1667
+ credentials,
1668
+ region_name,
1669
+ service_name,
1670
+ )
1671
+ else:
1672
+ signer = None
1673
+ self._signer = signer
1558
1674
 
1559
1675
  #
1560
1676
 
@@ -1598,13 +1714,14 @@ class AwsLogMessagePoster:
1598
1714
  payload=body,
1599
1715
  )
1600
1716
 
1601
- sig_headers = self._signer.sign(
1602
- sig_req,
1603
- sign_payload=False,
1604
- )
1605
- sig_req = dc.replace(sig_req, headers={**sig_req.headers, **sig_headers})
1717
+ if (signer := self._signer) is not None:
1718
+ sig_headers = signer.sign(
1719
+ sig_req,
1720
+ sign_payload=False,
1721
+ )
1722
+ sig_req = dc.replace(sig_req, headers={**sig_req.headers, **sig_headers})
1606
1723
 
1607
- post = AwsLogMessagePoster.Post(
1724
+ post = AwsLogMessageBuilder.Post(
1608
1725
  url=self._url,
1609
1726
  headers={k: check_single(v) for k, v in sig_req.headers.items()},
1610
1727
  data=sig_req.payload,
@@ -1687,15 +1804,20 @@ class JournalctlMessageBuilder:
1687
1804
 
1688
1805
 
1689
1806
  ########################################
1690
- # ../../../../threadworker.py
1807
+ # ../../../../threadworkers.py
1691
1808
  """
1692
1809
  TODO:
1693
1810
  - implement stop lol
1694
1811
  - collective heartbeat monitoring - ThreadWorkerGroups
1812
+ - group -> 'context'? :|
1813
+ - shared stop_event?
1695
1814
  """
1696
1815
 
1697
1816
 
1698
- class ThreadWorker(abc.ABC):
1817
+ ##
1818
+
1819
+
1820
+ class ThreadWorker(ExitStacked, abc.ABC):
1699
1821
  def __init__(
1700
1822
  self,
1701
1823
  *,
@@ -1707,46 +1829,107 @@ class ThreadWorker(abc.ABC):
1707
1829
  stop_event = threading.Event()
1708
1830
  self._stop_event = stop_event
1709
1831
 
1832
+ self._lock = threading.RLock()
1710
1833
  self._thread: ta.Optional[threading.Thread] = None
1711
-
1712
1834
  self._last_heartbeat: ta.Optional[float] = None
1713
1835
 
1714
1836
  #
1715
1837
 
1838
+ def __enter__(self: ThreadWorkerT) -> ThreadWorkerT:
1839
+ with self._lock:
1840
+ return super().__enter__() # noqa
1841
+
1842
+ #
1843
+
1716
1844
  def should_stop(self) -> bool:
1717
1845
  return self._stop_event.is_set()
1718
1846
 
1847
+ class Stopping(Exception): # noqa
1848
+ pass
1849
+
1719
1850
  #
1720
1851
 
1721
1852
  @property
1722
1853
  def last_heartbeat(self) -> ta.Optional[float]:
1723
1854
  return self._last_heartbeat
1724
1855
 
1725
- def _heartbeat(self) -> bool:
1856
+ def _heartbeat(
1857
+ self,
1858
+ *,
1859
+ no_stop_check: bool = False,
1860
+ ) -> None:
1726
1861
  self._last_heartbeat = time.time()
1727
1862
 
1728
- if self.should_stop():
1863
+ if not no_stop_check and self.should_stop():
1729
1864
  log.info('Stopping: %s', self)
1730
- return False
1731
-
1732
- return True
1865
+ raise ThreadWorker.Stopping
1733
1866
 
1734
1867
  #
1735
1868
 
1869
+ def has_started(self) -> bool:
1870
+ return self._thread is not None
1871
+
1736
1872
  def is_alive(self) -> bool:
1737
1873
  return (thr := self._thread) is not None and thr.is_alive()
1738
1874
 
1739
1875
  def start(self) -> None:
1740
- thr = threading.Thread(target=self._run)
1741
- self._thread = thr
1742
- thr.start()
1876
+ with self._lock:
1877
+ if self._thread is not None:
1878
+ raise RuntimeError('Thread already started: %r', self)
1879
+
1880
+ thr = threading.Thread(target=self.__run)
1881
+ self._thread = thr
1882
+ thr.start()
1883
+
1884
+ #
1885
+
1886
+ def __run(self) -> None:
1887
+ try:
1888
+ self._run()
1889
+ except ThreadWorker.Stopping:
1890
+ log.exception('Thread worker stopped: %r', self)
1891
+ except Exception: # noqa
1892
+ log.exception('Error in worker thread: %r', self)
1893
+ raise
1743
1894
 
1744
1895
  @abc.abstractmethod
1745
1896
  def _run(self) -> None:
1746
1897
  raise NotImplementedError
1747
1898
 
1899
+ #
1900
+
1748
1901
  def stop(self) -> None:
1749
- raise NotImplementedError
1902
+ self._stop_event.set()
1903
+
1904
+ def join(self, timeout: ta.Optional[float] = None) -> None:
1905
+ with self._lock:
1906
+ if self._thread is None:
1907
+ raise RuntimeError('Thread not started: %r', self)
1908
+ self._thread.join(timeout)
1909
+
1910
+
1911
+ ##
1912
+
1913
+
1914
+ class ThreadWorkerGroup:
1915
+ @dc.dataclass()
1916
+ class State:
1917
+ worker: ThreadWorker
1918
+
1919
+ def __init__(self) -> None:
1920
+ super().__init__()
1921
+
1922
+ self._lock = threading.RLock()
1923
+ self._states: ta.Dict[ThreadWorker, ThreadWorkerGroup.State] = {}
1924
+
1925
+ def add(self, *workers: ThreadWorker) -> 'ThreadWorkerGroup':
1926
+ with self._lock:
1927
+ for w in workers:
1928
+ if w in self._states:
1929
+ raise KeyError(w)
1930
+ self._states[w] = ThreadWorkerGroup.State(w)
1931
+
1932
+ return self
1750
1933
 
1751
1934
 
1752
1935
  ########################################
@@ -1855,6 +2038,103 @@ def subprocess_try_output_str(*args: str, **kwargs: ta.Any) -> ta.Optional[str]:
1855
2038
  return out.decode().strip() if out is not None else None
1856
2039
 
1857
2040
 
2041
+ ##
2042
+
2043
+
2044
+ def subprocess_close(
2045
+ proc: subprocess.Popen,
2046
+ timeout: ta.Optional[float] = None,
2047
+ ) -> None:
2048
+ # TODO: terminate, sleep, kill
2049
+ if proc.stdout:
2050
+ proc.stdout.close()
2051
+ if proc.stderr:
2052
+ proc.stderr.close()
2053
+ if proc.stdin:
2054
+ proc.stdin.close()
2055
+
2056
+ proc.wait(timeout)
2057
+
2058
+
2059
+ ########################################
2060
+ # ../poster.py
2061
+ """
2062
+ TODO:
2063
+ - retries
2064
+ """
2065
+
2066
+
2067
+ class JournalctlToAwsPosterWorker(ThreadWorker):
2068
+ def __init__(
2069
+ self,
2070
+ queue, # type: queue.Queue[ta.Sequence[JournalctlMessage]] # noqa
2071
+ builder: AwsLogMessageBuilder,
2072
+ cursor: JournalctlToAwsCursor,
2073
+ *,
2074
+ ensure_locked: ta.Optional[ta.Callable[[], None]] = None,
2075
+ dry_run: bool = False,
2076
+ queue_timeout_s: float = 1.,
2077
+ **kwargs: ta.Any,
2078
+ ) -> None:
2079
+ super().__init__(**kwargs)
2080
+ self._queue = queue
2081
+ self._builder = builder
2082
+ self._cursor = cursor
2083
+ self._ensure_locked = ensure_locked
2084
+ self._dry_run = dry_run
2085
+ self._queue_timeout_s = queue_timeout_s
2086
+ #
2087
+
2088
+ def _run(self) -> None:
2089
+ if self._ensure_locked is not None:
2090
+ self._ensure_locked()
2091
+
2092
+ last_cursor: ta.Optional[str] = None # noqa
2093
+ while True:
2094
+ self._heartbeat()
2095
+
2096
+ try:
2097
+ msgs: ta.Sequence[JournalctlMessage] = self._queue.get(timeout=self._queue_timeout_s)
2098
+ except queue.Empty:
2099
+ msgs = []
2100
+
2101
+ if not msgs:
2102
+ log.debug('Empty queue chunk')
2103
+ continue
2104
+
2105
+ log.debug('%r', msgs)
2106
+
2107
+ cur_cursor: ta.Optional[str] = None
2108
+ for m in reversed(msgs):
2109
+ if m.cursor is not None:
2110
+ cur_cursor = m.cursor
2111
+ break
2112
+
2113
+ feed_msgs = []
2114
+ for m in msgs:
2115
+ feed_msgs.append(AwsLogMessageBuilder.Message(
2116
+ message=json.dumps(m.dct, sort_keys=True),
2117
+ ts_ms=int((m.ts_us / 1000.) if m.ts_us is not None else (time.time() * 1000.)),
2118
+ ))
2119
+
2120
+ for post in self._builder.feed(feed_msgs):
2121
+ log.debug('%r', post)
2122
+
2123
+ if not self._dry_run:
2124
+ with urllib.request.urlopen(urllib.request.Request( # noqa
2125
+ post.url,
2126
+ method='POST',
2127
+ headers=dict(post.headers),
2128
+ data=post.data,
2129
+ )) as resp:
2130
+ response = AwsPutLogEventsResponse.from_aws(json.loads(resp.read().decode('utf-8')))
2131
+ log.debug('%r', response)
2132
+
2133
+ if cur_cursor is not None:
2134
+ self._cursor.set(cur_cursor)
2135
+ last_cursor = cur_cursor # noqa
2136
+
2137
+
1858
2138
  ########################################
1859
2139
  # ../../../../journald/tailer.py
1860
2140
  """
@@ -2227,7 +2507,7 @@ class JournalctlTailerWorker(ThreadWorker):
2227
2507
  self._read_size = read_size
2228
2508
  self._sleep_s = sleep_s
2229
2509
 
2230
- self._mb = JournalctlMessageBuilder()
2510
+ self._builder = JournalctlMessageBuilder()
2231
2511
 
2232
2512
  self._proc: ta.Optional[subprocess.Popen] = None
2233
2513
 
@@ -2251,69 +2531,103 @@ class JournalctlTailerWorker(ThreadWorker):
2251
2531
 
2252
2532
  return cmd
2253
2533
 
2534
+ def _read_loop(self, stdout: ta.IO) -> None:
2535
+ while stdout.readable():
2536
+ self._heartbeat()
2537
+
2538
+ buf = stdout.read(self._read_size)
2539
+ if not buf:
2540
+ log.debug('Journalctl empty read')
2541
+ break
2542
+
2543
+ log.debug('Journalctl read buffer: %r', buf)
2544
+ msgs = self._builder.feed(buf)
2545
+ if msgs:
2546
+ while True:
2547
+ try:
2548
+ self._output.put(msgs, timeout=1.)
2549
+ except queue.Full:
2550
+ self._heartbeat()
2551
+ else:
2552
+ break
2553
+
2254
2554
  def _run(self) -> None:
2255
2555
  with subprocess.Popen(
2256
2556
  self._full_cmd(),
2257
2557
  stdout=subprocess.PIPE,
2258
2558
  ) as self._proc:
2259
- stdout = check_not_none(self._proc.stdout)
2559
+ try:
2560
+ stdout = check_not_none(self._proc.stdout)
2260
2561
 
2261
- fd = stdout.fileno()
2262
- fl = fcntl.fcntl(fd, fcntl.F_GETFL)
2263
- fcntl.fcntl(fd, fcntl.F_SETFL, fl | os.O_NONBLOCK)
2562
+ fd = stdout.fileno()
2563
+ fl = fcntl.fcntl(fd, fcntl.F_GETFL)
2564
+ fcntl.fcntl(fd, fcntl.F_SETFL, fl | os.O_NONBLOCK)
2264
2565
 
2265
- while True:
2266
- if not self._heartbeat():
2267
- return
2566
+ while True:
2567
+ self._heartbeat()
2268
2568
 
2269
- while stdout.readable():
2270
- if not self._heartbeat():
2271
- return
2569
+ self._read_loop(stdout)
2272
2570
 
2273
- buf = stdout.read(self._read_size)
2274
- if not buf:
2275
- log.debug('Journalctl empty read')
2276
- break
2571
+ log.debug('Journalctl not readable')
2277
2572
 
2278
- log.debug('Journalctl read buffer: %r', buf)
2279
- msgs = self._mb.feed(buf)
2280
- if msgs:
2281
- while True:
2282
- try:
2283
- self._output.put(msgs, timeout=1.)
2284
- except queue.Full:
2285
- if not self._heartbeat():
2286
- return
2287
- else:
2288
- break
2573
+ if self._proc.poll() is not None:
2574
+ log.critical('Journalctl process terminated')
2575
+ return
2289
2576
 
2290
- if self._proc.poll() is not None:
2291
- log.critical('Journalctl process terminated')
2292
- return
2577
+ time.sleep(self._sleep_s)
2293
2578
 
2294
- log.debug('Journalctl readable')
2295
- time.sleep(self._sleep_s)
2579
+ finally:
2580
+ subprocess_close(self._proc)
2296
2581
 
2297
2582
 
2298
2583
  ########################################
2299
- # main.py
2584
+ # ../driver.py
2585
+ """
2586
+ TODO:
2587
+ - create log group
2588
+ - log stats - chunk sizes, byte count, num calls, etc
2589
+
2590
+ ==
2591
+
2592
+ https://www.freedesktop.org/software/systemd/man/latest/journalctl.html
2593
+
2594
+ journalctl:
2595
+ -o json
2596
+ --show-cursor
2597
+
2598
+ --since "2012-10-30 18:17:16"
2599
+ --until "2012-10-30 18:17:16"
2300
2600
 
2601
+ --after-cursor <cursor>
2602
+
2603
+ ==
2604
+
2605
+ https://www.freedesktop.org/software/systemd/man/latest/systemd.journal-fields.html
2606
+
2607
+ ==
2301
2608
 
2302
2609
  @dc.dataclass(frozen=True)
2303
- class JournalctlOpts:
2304
- after_cursor: ta.Optional[str] = None
2610
+ class Journald2AwsConfig:
2611
+ log_group_name: str
2612
+ log_stream_name: str
2613
+
2614
+ aws_batch_size: int = 1_000
2615
+ aws_flush_interval_s: float = 1.
2616
+ """
2305
2617
 
2306
- since: ta.Optional[str] = None
2307
- until: ta.Optional[str] = None
2618
+
2619
+ ##
2308
2620
 
2309
2621
 
2310
- class JournalctlToAws:
2622
+ class JournalctlToAwsDriver(ExitStacked):
2311
2623
  @dc.dataclass(frozen=True)
2312
2624
  class Config:
2313
2625
  pid_file: ta.Optional[str] = None
2314
2626
 
2315
2627
  cursor_file: ta.Optional[str] = None
2316
2628
 
2629
+ runtime_limit: ta.Optional[float] = None
2630
+
2317
2631
  #
2318
2632
 
2319
2633
  aws_log_group_name: str = 'omlish'
@@ -2324,6 +2638,8 @@ class JournalctlToAws:
2324
2638
 
2325
2639
  aws_region_name: str = 'us-west-1'
2326
2640
 
2641
+ aws_dry_run: bool = False
2642
+
2327
2643
  #
2328
2644
 
2329
2645
  journalctl_cmd: ta.Optional[ta.Sequence[str]] = None
@@ -2331,24 +2647,10 @@ class JournalctlToAws:
2331
2647
  journalctl_after_cursor: ta.Optional[str] = None
2332
2648
  journalctl_since: ta.Optional[str] = None
2333
2649
 
2334
- #
2335
-
2336
- dry_run: bool = False
2337
-
2338
2650
  def __init__(self, config: Config) -> None:
2339
2651
  super().__init__()
2340
- self._config = config
2341
2652
 
2342
- #
2343
-
2344
- _es: contextlib.ExitStack
2345
-
2346
- def __enter__(self) -> 'JournalctlToAws':
2347
- self._es = contextlib.ExitStack().__enter__()
2348
- return self
2349
-
2350
- def __exit__(self, exc_type, exc_val, exc_tb):
2351
- return self._es.__exit__(exc_type, exc_val, exc_tb)
2653
+ self._config = config
2352
2654
 
2353
2655
  #
2354
2656
 
@@ -2361,7 +2663,7 @@ class JournalctlToAws:
2361
2663
 
2362
2664
  log.info('Opening pidfile %s', pfp)
2363
2665
 
2364
- pf = self._es.enter_context(Pidfile(pfp))
2666
+ pf = self._enter_context(Pidfile(pfp))
2365
2667
  pf.write()
2366
2668
  return pf
2367
2669
 
@@ -2371,48 +2673,32 @@ class JournalctlToAws:
2371
2673
 
2372
2674
  #
2373
2675
 
2374
- def _read_cursor_file(self) -> ta.Optional[str]:
2375
- self._ensure_locked()
2376
-
2377
- if not (cf := self._config.cursor_file):
2378
- return None
2379
- cf = os.path.expanduser(cf)
2380
-
2381
- try:
2382
- with open(cf) as f:
2383
- return f.read().strip()
2384
- except FileNotFoundError:
2385
- return None
2386
-
2387
- def _write_cursor_file(self, cursor: str) -> None:
2388
- self._ensure_locked()
2389
-
2390
- if not (cf := self._config.cursor_file):
2391
- return
2392
- cf = os.path.expanduser(cf)
2393
-
2394
- log.info('Writing cursor file %s : %s', cf, cursor)
2395
- with open(ncf := cf + '.next', 'w') as f:
2396
- f.write(cursor)
2397
-
2398
- os.rename(ncf, cf)
2676
+ @cached_nullary
2677
+ def _cursor(self) -> JournalctlToAwsCursor:
2678
+ return JournalctlToAwsCursor(
2679
+ self._config.cursor_file,
2680
+ ensure_locked=self._ensure_locked,
2681
+ )
2399
2682
 
2400
2683
  #
2401
2684
 
2402
2685
  @cached_nullary
2403
- def _aws_credentials(self) -> AwsSigner.Credentials:
2686
+ def _aws_credentials(self) -> ta.Optional[AwsSigner.Credentials]:
2687
+ if self._config.aws_access_key_id is None and self._config.aws_secret_access_key is None:
2688
+ return None
2689
+
2404
2690
  return AwsSigner.Credentials(
2405
2691
  access_key_id=check_non_empty_str(self._config.aws_access_key_id),
2406
2692
  secret_access_key=check_non_empty_str(self._config.aws_secret_access_key),
2407
2693
  )
2408
2694
 
2409
2695
  @cached_nullary
2410
- def _aws_log_message_poster(self) -> AwsLogMessagePoster:
2411
- return AwsLogMessagePoster(
2696
+ def _aws_log_message_builder(self) -> AwsLogMessageBuilder:
2697
+ return AwsLogMessageBuilder(
2412
2698
  log_group_name=self._config.aws_log_group_name,
2413
2699
  log_stream_name=check_non_empty_str(self._config.aws_log_stream_name),
2414
2700
  region_name=self._config.aws_region_name,
2415
- credentials=check_not_none(self._aws_credentials()),
2701
+ credentials=self._aws_credentials(),
2416
2702
  )
2417
2703
 
2418
2704
  #
@@ -2431,7 +2717,7 @@ class JournalctlToAws:
2431
2717
  else:
2432
2718
  ac = self._config.journalctl_after_cursor
2433
2719
  if ac is None:
2434
- ac = self._read_cursor_file()
2720
+ ac = self._cursor().get()
2435
2721
  if ac is not None:
2436
2722
  log.info('Starting from cursor %s', ac)
2437
2723
 
@@ -2447,63 +2733,49 @@ class JournalctlToAws:
2447
2733
 
2448
2734
  #
2449
2735
 
2450
- def run(self) -> None:
2451
- self._ensure_locked()
2736
+ @cached_nullary
2737
+ def _aws_poster_worker(self) -> JournalctlToAwsPosterWorker:
2738
+ return JournalctlToAwsPosterWorker(
2739
+ self._journalctl_message_queue(),
2740
+ self._aws_log_message_builder(),
2741
+ self._cursor(),
2452
2742
 
2453
- q = self._journalctl_message_queue() # type: queue.Queue[ta.Sequence[JournalctlMessage]]
2454
- jtw = self._journalctl_tailer_worker() # type: JournalctlTailerWorker
2455
- mp = self._aws_log_message_poster() # type: AwsLogMessagePoster
2743
+ ensure_locked=self._ensure_locked,
2744
+ dry_run=self._config.aws_dry_run,
2745
+ )
2456
2746
 
2457
- jtw.start()
2747
+ #
2458
2748
 
2459
- last_cursor: ta.Optional[str] = None # noqa
2460
- while True:
2461
- if not jtw.is_alive():
2462
- log.critical('Journalctl tailer worker died')
2463
- break
2749
+ def run(self) -> None:
2750
+ pw: JournalctlToAwsPosterWorker = self._aws_poster_worker()
2751
+ tw: JournalctlTailerWorker = self._journalctl_tailer_worker()
2464
2752
 
2465
- try:
2466
- msgs: ta.Sequence[JournalctlMessage] = q.get(timeout=1.)
2467
- except queue.Empty:
2468
- msgs = []
2469
- if not msgs:
2470
- continue
2753
+ ws = [pw, tw]
2471
2754
 
2472
- log.debug('%r', msgs)
2755
+ for w in ws:
2756
+ w.start()
2473
2757
 
2474
- cur_cursor: ta.Optional[str] = None
2475
- for m in reversed(msgs):
2476
- if m.cursor is not None:
2477
- cur_cursor = m.cursor
2758
+ start = time.time()
2759
+
2760
+ while True:
2761
+ for w in ws:
2762
+ if not w.is_alive():
2763
+ log.critical('Worker died: %r', w)
2478
2764
  break
2479
2765
 
2480
- if not msgs:
2481
- log.warning('Empty queue chunk')
2482
- continue
2766
+ if (rl := self._config.runtime_limit) is not None and time.time() - start >= rl:
2767
+ log.warning('Runtime limit reached')
2768
+ break
2483
2769
 
2484
- feed_msgs = []
2485
- for m in msgs:
2486
- feed_msgs.append(mp.Message(
2487
- message=json.dumps(m.dct, sort_keys=True),
2488
- ts_ms=int((m.ts_us / 1000.) if m.ts_us is not None else (time.time() * 1000.)),
2489
- ))
2770
+ time.sleep(1.)
2490
2771
 
2491
- [post] = mp.feed(feed_msgs)
2492
- log.debug('%r', post)
2772
+ for w in reversed(ws):
2773
+ w.stop()
2774
+ w.join()
2493
2775
 
2494
- if not self._config.dry_run:
2495
- with urllib.request.urlopen(urllib.request.Request( # noqa
2496
- post.url,
2497
- method='POST',
2498
- headers=dict(post.headers),
2499
- data=post.data,
2500
- )) as resp:
2501
- response = AwsPutLogEventsResponse.from_aws(json.loads(resp.read().decode('utf-8')))
2502
- log.debug('%r', response)
2503
2776
 
2504
- if cur_cursor is not None:
2505
- self._write_cursor_file(cur_cursor)
2506
- last_cursor = cur_cursor # noqa
2777
+ ########################################
2778
+ # main.py
2507
2779
 
2508
2780
 
2509
2781
  def _main() -> None:
@@ -2518,6 +2790,8 @@ def _main() -> None:
2518
2790
 
2519
2791
  parser.add_argument('--message', nargs='?')
2520
2792
  parser.add_argument('--real', action='store_true')
2793
+ parser.add_argument('--num-messages', type=int)
2794
+ parser.add_argument('--runtime-limit', type=float)
2521
2795
 
2522
2796
  args = parser.parse_args()
2523
2797
 
@@ -2527,13 +2801,13 @@ def _main() -> None:
2527
2801
 
2528
2802
  #
2529
2803
 
2530
- config: JournalctlToAws.Config
2804
+ config: JournalctlToAwsDriver.Config
2531
2805
  if args.config_file:
2532
2806
  with open(os.path.expanduser(args.config_file)) as cf:
2533
2807
  config_dct = json.load(cf)
2534
- config = unmarshal_obj(config_dct, JournalctlToAws.Config)
2808
+ config = unmarshal_obj(config_dct, JournalctlToAwsDriver.Config)
2535
2809
  else:
2536
- config = JournalctlToAws.Config()
2810
+ config = JournalctlToAwsDriver.Config()
2537
2811
 
2538
2812
  #
2539
2813
 
@@ -2550,7 +2824,7 @@ def _main() -> None:
2550
2824
  '--sleep-n', '2',
2551
2825
  '--sleep-s', '.5',
2552
2826
  *(['--message', args.message] if args.message else []),
2553
- '100000',
2827
+ str(args.num_messages or 100_000),
2554
2828
  ])
2555
2829
 
2556
2830
  #
@@ -2558,14 +2832,14 @@ def _main() -> None:
2558
2832
  for ca, pa in [
2559
2833
  ('journalctl_after_cursor', 'after_cursor'),
2560
2834
  ('journalctl_since', 'since'),
2561
- ('dry_run', 'dry_run'),
2835
+ ('aws_dry_run', 'dry_run'),
2562
2836
  ]:
2563
2837
  if (av := getattr(args, pa)):
2564
2838
  config = dc.replace(config, **{ca: av})
2565
2839
 
2566
2840
  #
2567
2841
 
2568
- with JournalctlToAws(config) as jta:
2842
+ with JournalctlToAwsDriver(config) as jta:
2569
2843
  jta.run()
2570
2844
 
2571
2845