lightman_ai 0.20.2__py3-none-any.whl → 0.21.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lightman_ai might be problematic. Click here for more details.

@@ -0,0 +1,6 @@
1
+ class ArticleBaseError(Exception):
2
+ """Base class for article-related models errors."""
3
+
4
+
5
+ class NoTimeZoneError(ArticleBaseError):
6
+ """Exception class for when there is no timezone associated to a date."""
@@ -1,14 +1,24 @@
1
1
  from abc import ABC
2
- from typing import override
2
+ from datetime import datetime
3
+ from typing import Self, override
3
4
 
4
- from pydantic import BaseModel
5
+ from lightman_ai.article.exceptions import NoTimeZoneError
6
+ from pydantic import BaseModel, Field, field_validator
5
7
 
6
8
 
7
9
  class BaseArticle(BaseModel, ABC):
8
10
  """Base abstract class for all Articles."""
9
11
 
10
- title: str
11
- link: str
12
+ title: str = Field(..., min_length=1)
13
+ link: str = Field(..., min_length=1)
14
+ published_at: datetime = Field(..., description="Must be timezone aware")
15
+
16
+ @field_validator("published_at", mode="after")
17
+ @classmethod
18
+ def validate_timezone_aware(cls, v: datetime) -> datetime:
19
+ if v.tzinfo is None:
20
+ raise NoTimeZoneError("published_at must be timezone aware")
21
+ return v
12
22
 
13
23
  @override
14
24
  def __eq__(self, value: object) -> bool:
@@ -28,8 +38,7 @@ class SelectedArticle(BaseArticle):
28
38
 
29
39
 
30
40
  class Article(BaseArticle):
31
- title: str
32
- description: str
41
+ description: str = Field(..., min_length=1)
33
42
 
34
43
 
35
44
  class BaseArticlesList[TArticle: BaseArticle](BaseModel):
@@ -40,11 +49,18 @@ class BaseArticlesList[TArticle: BaseArticle](BaseModel):
40
49
 
41
50
  @property
42
51
  def titles(self) -> list[str]:
43
- return [new.title for new in self.articles]
52
+ return [article.title for article in self.articles]
44
53
 
45
54
  @property
46
55
  def links(self) -> list[str]:
47
- return [new.link for new in self.articles]
56
+ return [article.link for article in self.articles]
57
+
58
+ @classmethod
59
+ def get_articles_from_date_onwards(cls, articles: list[TArticle], start_date: datetime) -> Self:
60
+ if not start_date.tzinfo:
61
+ raise NoTimeZoneError("A timezone is needed for filtering articles")
62
+ articles = [article for article in articles if article.published_at >= start_date]
63
+ return cls(articles=articles)
48
64
 
49
65
 
50
66
  class SelectedArticlesList(BaseArticlesList[SelectedArticle]):
lightman_ai/cli.py CHANGED
@@ -1,5 +1,7 @@
1
1
  import logging
2
+ from datetime import date, datetime, time
2
3
  from importlib import metadata
4
+ from zoneinfo import ZoneInfo
3
5
 
4
6
  import click
5
7
  from dotenv import load_dotenv
@@ -8,6 +10,7 @@ from lightman_ai.constants import DEFAULT_CONFIG_FILE, DEFAULT_CONFIG_SECTION, D
8
10
  from lightman_ai.core.config import FileConfig, FinalConfig, PromptConfig
9
11
  from lightman_ai.core.exceptions import ConfigNotFoundError, InvalidConfigError, PromptNotFoundError
10
12
  from lightman_ai.core.sentry import configure_sentry
13
+ from lightman_ai.core.settings import settings
11
14
  from lightman_ai.main import lightman
12
15
 
13
16
  logger = logging.getLogger("lightman")
@@ -65,6 +68,8 @@ def entry_point() -> None:
65
68
  "When set, runs the script without publishing the results to the integrated services, just shows them in stdout."
66
69
  ),
67
70
  )
71
+ @click.option("--start-date", type=click.DateTime(formats=["%Y-%m-%d"]), help="Start date to retrieve articles")
72
+ @click.option("--today", is_flag=True, help="Retrieve articles from today.")
68
73
  def run(
69
74
  agent: str,
70
75
  prompt: str,
@@ -75,6 +80,8 @@ def run(
75
80
  config: str,
76
81
  env_file: str,
77
82
  dry_run: bool,
83
+ start_date: date | None,
84
+ today: bool,
78
85
  ) -> int:
79
86
  """
80
87
  Entrypoint of the application.
@@ -83,6 +90,17 @@ def run(
83
90
  """
84
91
  load_dotenv(env_file)
85
92
  configure_sentry()
93
+
94
+ if start_date and today:
95
+ raise click.UsageError("--today and --start-date cannot be set at the same time.")
96
+ elif today:
97
+ now = datetime.now(ZoneInfo(settings.TIME_ZONE))
98
+ start_datetime = datetime.combine(now, time(0, 0), tzinfo=ZoneInfo(settings.TIME_ZONE))
99
+ elif isinstance(start_date, date):
100
+ start_datetime = datetime.combine(start_date, time(0, 0), tzinfo=ZoneInfo(settings.TIME_ZONE))
101
+ else:
102
+ start_datetime = None
103
+
86
104
  try:
87
105
  prompt_config = PromptConfig.get_config_from_file(path=prompt_file)
88
106
  config_from_file = FileConfig.get_config_from_file(config_section=config, path=config_file)
@@ -107,6 +125,7 @@ def run(
107
125
  project_key=config_from_file.service_desk_project_key,
108
126
  request_id_type=config_from_file.service_desk_request_id_type,
109
127
  model=final_config.model,
128
+ start_date=start_datetime,
110
129
  )
111
130
  relevant_articles_metadata = [f"{article.title} ({article.link})" for article in relevant_articles]
112
131
  logger.warning("Found these articles: \n- %s", "\n- ".join(relevant_articles_metadata))
@@ -2,20 +2,31 @@ import logging
2
2
  import os
3
3
  from importlib import metadata
4
4
 
5
- import sentry_sdk
6
- from sentry_sdk.integrations.logging import LoggingIntegration
5
+ logger = logging.getLogger("lightman")
7
6
 
8
7
 
9
8
  def configure_sentry() -> None:
10
- """Configure Sentry for error tracking and performance monitoring using env vars with fallbacks."""
9
+ """Configure Sentry for error tracking."""
10
+ try:
11
+ import sentry_sdk
12
+ from sentry_sdk.integrations.logging import LoggingIntegration
13
+ except ImportError:
14
+ logger.warning(
15
+ "Could not initialize sentry, it is not installed! Add it by installing the project with `lightman-ai[sentry]`."
16
+ )
17
+ return
18
+
11
19
  try:
12
20
  if not os.getenv("SENTRY_DSN"):
13
- logging.getLogger("lightman").info("SENTRY_DSN not configured, skipping Sentry initialization")
21
+ logger.info("SENTRY_DSN not configured, skipping Sentry initialization")
14
22
  return
15
23
 
16
- # Logging level from ENV
17
24
  logging_level_str = os.getenv("LOGGING_LEVEL", "ERROR").upper()
18
- logging_level = getattr(logging, logging_level_str, logging.ERROR)
25
+ try:
26
+ logging_level = getattr(logging, logging_level_str, logging.ERROR)
27
+ except AttributeError:
28
+ logger.warning("The specified logging level `%s` does not exist. Defaulting to ERROR.", logging_level_str)
29
+ logging_level = logging.ERROR
19
30
 
20
31
  # Set up logging integration
21
32
  sentry_logging = LoggingIntegration(level=logging.INFO, event_level=logging_level)
@@ -25,4 +36,4 @@ def configure_sentry() -> None:
25
36
  integrations=[sentry_logging],
26
37
  )
27
38
  except Exception as e:
28
- logging.getLogger("lightman").warning("Could not instantiate Sentry! %s.\nContinuing with the execution.", e)
39
+ logger.warning("Could not instantiate Sentry! %s.\nContinuing with the execution.", e)
@@ -7,8 +7,8 @@ class Settings(BaseSettings):
7
7
  def __init__(self, *args: Any, **kwargs: Any) -> None:
8
8
  super().__init__(*args, **kwargs)
9
9
 
10
- OPENAI_ENCODING: str = "cl100k_base"
11
10
  PARALLEL_WORKERS: int = 5
11
+ TIME_ZONE: str = "UTC"
12
12
 
13
13
 
14
14
  settings = Settings()
lightman_ai/main.py CHANGED
@@ -1,5 +1,6 @@
1
1
  import asyncio
2
2
  import logging
3
+ from datetime import datetime
3
4
 
4
5
  from lightman_ai.ai.base.agent import BaseAgent
5
6
  from lightman_ai.ai.utils import get_agent_class_from_agent_name
@@ -12,8 +13,8 @@ from lightman_ai.sources.the_hacker_news import TheHackerNewsSource
12
13
  logger = logging.getLogger("lightman")
13
14
 
14
15
 
15
- def _get_articles() -> ArticlesList:
16
- return TheHackerNewsSource().get_articles()
16
+ def _get_articles_from_source(start_date: datetime | None = None) -> ArticlesList:
17
+ return TheHackerNewsSource().get_articles(start_date)
17
18
 
18
19
 
19
20
  def _classify_articles(articles: ArticlesList, agent: BaseAgent) -> SelectedArticlesList:
@@ -60,8 +61,9 @@ def lightman(
60
61
  request_id_type: str | None = None,
61
62
  dry_run: bool = False,
62
63
  model: str | None = None,
64
+ start_date: datetime | None = None,
63
65
  ) -> list[SelectedArticle]:
64
- articles: ArticlesList = _get_articles()
66
+ articles: ArticlesList = _get_articles_from_source(start_date)
65
67
 
66
68
  agent_class = get_agent_class_from_agent_name(agent)
67
69
  agent_instance = agent_class(prompt, model, logger=logger)
@@ -1,8 +1,9 @@
1
1
  from abc import ABC, abstractmethod
2
+ from datetime import datetime
2
3
 
3
4
  from lightman_ai.article.models import ArticlesList
4
5
 
5
6
 
6
7
  class BaseSource(ABC):
7
8
  @abstractmethod
8
- def get_articles(self) -> ArticlesList: ...
9
+ def get_articles(self, date: datetime | None = None) -> ArticlesList: ...
@@ -0,0 +1,10 @@
1
+ class BaseSourceError(Exception):
2
+ """Base Exception class for errors."""
3
+
4
+
5
+ class MalformedSourceResponseError(BaseSourceError):
6
+ """Exception for when the respose format does not match the expected one."""
7
+
8
+
9
+ class IncompleteArticleFromSourceError(MalformedSourceResponseError):
10
+ """Exception for when all the mandatory fields could not be retrieved from an article."""
@@ -1,3 +1,5 @@
1
+ import logging
2
+ from datetime import datetime
1
3
  from typing import override
2
4
  from xml.etree import ElementTree
3
5
 
@@ -6,6 +8,10 @@ import stamina
6
8
  from httpx import Client
7
9
  from lightman_ai.article.models import Article, ArticlesList
8
10
  from lightman_ai.sources.base import BaseSource
11
+ from lightman_ai.sources.exceptions import IncompleteArticleFromSourceError, MalformedSourceResponseError
12
+ from pydantic import ValidationError
13
+
14
+ logger = logging.getLogger("lightman")
9
15
 
10
16
  _RETRY_ON = httpx.TransportError
11
17
  _ATTEMPTS = 5
@@ -17,11 +23,14 @@ THN_URL = "https://feeds.feedburner.com/TheHackersNews"
17
23
 
18
24
  class TheHackerNewsSource(BaseSource):
19
25
  @override
20
- def get_articles(self) -> ArticlesList:
26
+ def get_articles(self, date: datetime | None = None) -> ArticlesList:
21
27
  """Return the articles that are present in THN feed."""
22
28
  feed = self.get_feed()
23
29
  articles = self._xml_to_list_of_articles(feed)
24
- return ArticlesList(articles=articles)
30
+ if date:
31
+ return ArticlesList.get_articles_from_date_onwards(articles=articles, start_date=date)
32
+ else:
33
+ return ArticlesList(articles=articles)
25
34
 
26
35
  def get_feed(self) -> str:
27
36
  """Retrieve the TheHackerNews' RSS Feed."""
@@ -36,22 +45,37 @@ class TheHackerNewsSource(BaseSource):
36
45
  return hacker_news_feed.text
37
46
 
38
47
  def _xml_to_list_of_articles(self, xml: str) -> list[Article]:
39
- root = ElementTree.fromstring(xml)
48
+ try:
49
+ root = ElementTree.fromstring(xml)
50
+ except ElementTree.ParseError as e:
51
+ raise MalformedSourceResponseError(f"Invalid XML format: {e}") from e
40
52
  channel = root.find("channel")
41
- assert channel
53
+
54
+ if channel is None:
55
+ raise MalformedSourceResponseError("No channel element found in RSS feed")
42
56
  items = channel.findall("item")
43
57
 
44
58
  parsed = []
45
59
 
46
60
  for item in items:
47
- title = item.findtext("title", default="").strip()
48
- description = self._clean(item.findtext("description", default="").strip())
49
- link = item.findtext("link", default="").strip()
61
+ try:
62
+ title = item.findtext("title", default="").strip()
63
+ description = self._clean(item.findtext("description", default="").strip())
64
+ link = item.findtext("link", default="").strip()
65
+ published_at_str = item.findtext("pubDate", default="").strip()
66
+
67
+ if not published_at_str:
68
+ logger.exception("Missing publication date. link: `%s`", link)
69
+ raise IncompleteArticleFromSourceError()
70
+ published_at = datetime.strptime(published_at_str, "%a, %d %b %Y %H:%M:%S %z")
71
+
72
+ parsed.append(Article(title=title, description=description, link=link, published_at=published_at))
73
+ except (ValidationError, ValueError) as e:
74
+ raise IncompleteArticleFromSourceError from e
50
75
 
51
- parsed.append(Article(title=title, description=description, link=link))
52
76
  return parsed
53
77
 
54
78
  @staticmethod
55
79
  def _clean(text: str) -> str:
56
- """Remove non-useful characters."""
80
+ """Remove non-useful characters. Helps cleaning the fields that will be sent to the Agent."""
57
81
  return text.replace("\\n", "").replace(" ", "")
@@ -1,33 +1,19 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lightman_ai
3
- Version: 0.20.2
4
- Summary: Cybersecurity news aggregator.
3
+ Version: 0.21.1
4
+ Summary: LLM-powered cybersecurity news aggregator.
5
5
  Author-email: sdn4z <imsdn4z@gmail.com>
6
6
  License-File: LICENSE
7
7
  Requires-Python: <4,>=3.13
8
8
  Requires-Dist: click<9.0.0,>=8.1.7
9
9
  Requires-Dist: httpx<1.0.0,>=0.28.0
10
- Requires-Dist: logfire>=3.25.0
11
10
  Requires-Dist: pydantic-ai-slim[google,openai]>=0.4.4
12
- Requires-Dist: pydantic-settings[dotenv]<3.0.0,>=2.9.1
11
+ Requires-Dist: pydantic-settings<3.0.0,>=2.9.1
13
12
  Requires-Dist: python-dotenv<2.0.0,>=1.1.1
14
- Requires-Dist: sentry-sdk<3.0.0,>=2.21.0
15
13
  Requires-Dist: stamina<26.0.0,>=25.1.0
16
- Requires-Dist: tiktoken<1.0.0,>=0.9.0
17
14
  Requires-Dist: tomlkit<1.0.0,>=0.13.3
18
- Provides-Extra: lint
19
- Requires-Dist: mypy<2.0.0,>=1.1.1; extra == 'lint'
20
- Requires-Dist: ruff<1.0.0,>=0.11.0; extra == 'lint'
21
- Provides-Extra: local
22
- Requires-Dist: codespell<3.0.0,>=2.2.4; extra == 'local'
23
- Requires-Dist: commitizen<5.0.0,>=4.8.3; extra == 'local'
24
- Requires-Dist: ipdb<1.0.0,>=0.13.13; extra == 'local'
25
- Requires-Dist: pdbpp<1.0.0,>=0.11.6; extra == 'local'
26
- Requires-Dist: pre-commit<4.0.0,>=3.2.2; extra == 'local'
27
- Provides-Extra: test
28
- Requires-Dist: pytest-asyncio<1.0.0,>=0.26.0; extra == 'test'
29
- Requires-Dist: pytest-cov<6.0.0,>=5.0.0; extra == 'test'
30
- Requires-Dist: pytest<9.0.0,>=8.0.0; extra == 'test'
15
+ Provides-Extra: sentry
16
+ Requires-Dist: sentry-sdk<3.0.0,>=2.21.0; extra == 'sentry'
31
17
  Description-Content-Type: text/markdown
32
18
 
33
19
  # 🔍 Lightman AI
@@ -1,7 +1,7 @@
1
1
  lightman_ai/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- lightman_ai/cli.py,sha256=Y3vl4xsJTBUBHRAR4zlMhjK5Cr2HNWRokWaGGuxFCQU,3775
2
+ lightman_ai/cli.py,sha256=cxaDI1ibyHMGLxWZAm-lYzWKc9LknLRedSJQeOAwA_Y,4638
3
3
  lightman_ai/constants.py,sha256=qfZgcTLK51l--JDhns-uRANjccFEPN6iTFsJKn8T4vs,101
4
- lightman_ai/main.py,sha256=Av4Jr8MhwVLUrVeCg24SCnpaT49hPYFlQJD0ABGfJGc,3379
4
+ lightman_ai/main.py,sha256=_RY05r1dd-q4L0BTIoKl6yC7lWrpQto0W3W72btGvQA,3527
5
5
  lightman_ai/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
6
  lightman_ai/ai/utils.py,sha256=vFTN8Tto7QHMV4DpTzn8Dz06niHm_bbgPivyc-rD1aE,509
7
7
  lightman_ai/ai/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -14,21 +14,23 @@ lightman_ai/ai/openai/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3h
14
14
  lightman_ai/ai/openai/agent.py,sha256=bMZS-F58WaXDR6f3j5DhtZ4077Gy80_oGYJczB7nV74,1043
15
15
  lightman_ai/ai/openai/exceptions.py,sha256=V41fLkhkDguKU9_Wy0vMn0UXHmuAkqqXTmrcNKTNRZE,2414
16
16
  lightman_ai/article/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
- lightman_ai/article/models.py,sha256=Zyx4CKBNGDcK9IWnPOb6CK6nKH2s40UaqsoZlIjSidU,1598
17
+ lightman_ai/article/exceptions.py,sha256=dvAN2bhFKPytgbaAOM-2ucNjH3lLhlbi5MMlLtNuO1E,212
18
+ lightman_ai/article/models.py,sha256=JZaBsqzw7MxC3qORUSJqxru9iSHsbLrgNW5nFc2N6Ks,2491
18
19
  lightman_ai/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
19
20
  lightman_ai/core/config.py,sha256=ppWNoNivNqxGi4TOZIMaTGo46W7Ic1a8cO8hWVAkkps,3031
20
21
  lightman_ai/core/exceptions.py,sha256=XKxA-EmLvd9cmbkUfdrBd27gsEFyIkSCQBf4pD-1AJk,239
21
- lightman_ai/core/sentry.py,sha256=yJYqZWhLH1OR11gVr1YPF0gPUSLjm8klnSmXFdDpyEo,1039
22
- lightman_ai/core/settings.py,sha256=pVhA0naxk7NRnJXRQB-CHVeIvYCEC2jP4mv8cbdfJYY,296
22
+ lightman_ai/core/sentry.py,sha256=kqO2sBX_29v4qpmcYyhOBuG0hv82OkDBWh1rcyiKVWk,1375
23
+ lightman_ai/core/settings.py,sha256=Z8mpGxBn00IZ84yEXPad82BdCsIyznp5Sgb5HkrWfS4,282
23
24
  lightman_ai/integrations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
24
25
  lightman_ai/integrations/service_desk/__init__.py,sha256=Ijs7_ysJVwiT_Y1U8iPOV17E7dtkVzNXL00d8cgyaCc,66
25
26
  lightman_ai/integrations/service_desk/constants.py,sha256=ropNDMengLp96pXD6wq2voWPeBESt6f2Wnfk7845mck,306
26
27
  lightman_ai/integrations/service_desk/exceptions.py,sha256=RjH9Jam7ONDJQ0uhWu3rtJApNybsqfi7TGtT0dlN490,2517
27
28
  lightman_ai/integrations/service_desk/integration.py,sha256=5r3bipao_apaDWLRiH4oiBJFb4c1mvo7Sj2AOQUWiFY,2895
28
- lightman_ai/sources/base.py,sha256=i544j1ubqhx7uKRrbuXq3yhRuRbwJgCQxCIavjznn5s,182
29
- lightman_ai/sources/the_hacker_news.py,sha256=qyLF4wVHJ7C2O01MGYrKej-KvPI4mHd8ntntfMksX6s,1809
30
- lightman_ai-0.20.2.dist-info/METADATA,sha256=_aJvzpnwAfwFgvhg-d2Sfi2n-GQd_Mqph_QLyMHnhVE,12558
31
- lightman_ai-0.20.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
32
- lightman_ai-0.20.2.dist-info/entry_points.txt,sha256=9H7Ji-zxbCWAaVL5Yg4fG5va0H_5Tr2kMGGMsghaAas,60
33
- lightman_ai-0.20.2.dist-info/licenses/LICENSE,sha256=NhxDmY3AGgeEsYHIfDGLNkzBVX94pARRDS8H46JZ1zQ,1076
34
- lightman_ai-0.20.2.dist-info/RECORD,,
29
+ lightman_ai/sources/base.py,sha256=M_n9H9GdcX2tbBcYkiakDxHCnuwP9fV4BQhNAadBubQ,242
30
+ lightman_ai/sources/exceptions.py,sha256=X43BZ6hx-lZAEyM2q5PQXBw3vKPDx5nmY_uRiuITK9s,379
31
+ lightman_ai/sources/the_hacker_news.py,sha256=JTqbAJ1Pf8QKxvwFg6vWmymgj8LtUpIV6hD0KMOwMG8,3034
32
+ lightman_ai-0.21.1.dist-info/METADATA,sha256=RoCKSRzEAkHyHpXsP7EvKFt1pA3blcForYdxBDEfOdM,11915
33
+ lightman_ai-0.21.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
34
+ lightman_ai-0.21.1.dist-info/entry_points.txt,sha256=9H7Ji-zxbCWAaVL5Yg4fG5va0H_5Tr2kMGGMsghaAas,60
35
+ lightman_ai-0.21.1.dist-info/licenses/LICENSE,sha256=NhxDmY3AGgeEsYHIfDGLNkzBVX94pARRDS8H46JZ1zQ,1076
36
+ lightman_ai-0.21.1.dist-info/RECORD,,