lightman_ai 0.20.1__tar.gz → 0.21.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lightman_ai might be problematic. Click here for more details.

Files changed (40) hide show
  1. {lightman_ai-0.20.1 → lightman_ai-0.21.0}/PKG-INFO +5 -19
  2. lightman_ai-0.21.0/VERSION +1 -0
  3. {lightman_ai-0.20.1 → lightman_ai-0.21.0}/pyproject.toml +23 -24
  4. lightman_ai-0.21.0/src/lightman_ai/article/exceptions.py +6 -0
  5. {lightman_ai-0.20.1 → lightman_ai-0.21.0}/src/lightman_ai/article/models.py +24 -8
  6. {lightman_ai-0.20.1 → lightman_ai-0.21.0}/src/lightman_ai/cli.py +18 -0
  7. lightman_ai-0.21.0/src/lightman_ai/core/sentry.py +39 -0
  8. {lightman_ai-0.20.1 → lightman_ai-0.21.0}/src/lightman_ai/core/settings.py +1 -1
  9. {lightman_ai-0.20.1 → lightman_ai-0.21.0}/src/lightman_ai/main.py +5 -3
  10. {lightman_ai-0.20.1 → lightman_ai-0.21.0}/src/lightman_ai/sources/base.py +2 -1
  11. lightman_ai-0.21.0/src/lightman_ai/sources/exceptions.py +10 -0
  12. lightman_ai-0.21.0/src/lightman_ai/sources/the_hacker_news.py +81 -0
  13. lightman_ai-0.20.1/VERSION +0 -1
  14. lightman_ai-0.20.1/src/lightman_ai/core/sentry.py +0 -28
  15. lightman_ai-0.20.1/src/lightman_ai/sources/the_hacker_news.py +0 -57
  16. {lightman_ai-0.20.1 → lightman_ai-0.21.0}/.gitignore +0 -0
  17. {lightman_ai-0.20.1 → lightman_ai-0.21.0}/LICENSE +0 -0
  18. {lightman_ai-0.20.1 → lightman_ai-0.21.0}/README.md +0 -0
  19. {lightman_ai-0.20.1 → lightman_ai-0.21.0}/src/lightman_ai/__init__.py +0 -0
  20. {lightman_ai-0.20.1 → lightman_ai-0.21.0}/src/lightman_ai/ai/base/__init__.py +0 -0
  21. {lightman_ai-0.20.1 → lightman_ai-0.21.0}/src/lightman_ai/ai/base/agent.py +0 -0
  22. {lightman_ai-0.20.1 → lightman_ai-0.21.0}/src/lightman_ai/ai/base/exceptions.py +0 -0
  23. {lightman_ai-0.20.1 → lightman_ai-0.21.0}/src/lightman_ai/ai/gemini/__init__.py +0 -0
  24. {lightman_ai-0.20.1 → lightman_ai-0.21.0}/src/lightman_ai/ai/gemini/agent.py +0 -0
  25. {lightman_ai-0.20.1 → lightman_ai-0.21.0}/src/lightman_ai/ai/gemini/exceptions.py +0 -0
  26. {lightman_ai-0.20.1 → lightman_ai-0.21.0}/src/lightman_ai/ai/openai/__init__.py +0 -0
  27. {lightman_ai-0.20.1 → lightman_ai-0.21.0}/src/lightman_ai/ai/openai/agent.py +0 -0
  28. {lightman_ai-0.20.1 → lightman_ai-0.21.0}/src/lightman_ai/ai/openai/exceptions.py +0 -0
  29. {lightman_ai-0.20.1 → lightman_ai-0.21.0}/src/lightman_ai/ai/utils.py +0 -0
  30. {lightman_ai-0.20.1 → lightman_ai-0.21.0}/src/lightman_ai/article/__init__.py +0 -0
  31. {lightman_ai-0.20.1 → lightman_ai-0.21.0}/src/lightman_ai/constants.py +0 -0
  32. {lightman_ai-0.20.1 → lightman_ai-0.21.0}/src/lightman_ai/core/__init__.py +0 -0
  33. {lightman_ai-0.20.1 → lightman_ai-0.21.0}/src/lightman_ai/core/config.py +0 -0
  34. {lightman_ai-0.20.1 → lightman_ai-0.21.0}/src/lightman_ai/core/exceptions.py +0 -0
  35. {lightman_ai-0.20.1 → lightman_ai-0.21.0}/src/lightman_ai/integrations/__init__.py +0 -0
  36. {lightman_ai-0.20.1 → lightman_ai-0.21.0}/src/lightman_ai/integrations/service_desk/__init__.py +0 -0
  37. {lightman_ai-0.20.1 → lightman_ai-0.21.0}/src/lightman_ai/integrations/service_desk/constants.py +0 -0
  38. {lightman_ai-0.20.1 → lightman_ai-0.21.0}/src/lightman_ai/integrations/service_desk/exceptions.py +0 -0
  39. {lightman_ai-0.20.1 → lightman_ai-0.21.0}/src/lightman_ai/integrations/service_desk/integration.py +0 -0
  40. {lightman_ai-0.20.1 → lightman_ai-0.21.0}/src/lightman_ai/py.typed +0 -0
@@ -1,33 +1,19 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lightman_ai
3
- Version: 0.20.1
4
- Summary: Cybersecurity news aggregator.
3
+ Version: 0.21.0
4
+ Summary: LLM-powered cybersecurity news aggregator.
5
5
  Author-email: sdn4z <imsdn4z@gmail.com>
6
6
  License-File: LICENSE
7
7
  Requires-Python: <4,>=3.13
8
8
  Requires-Dist: click<9.0.0,>=8.1.7
9
9
  Requires-Dist: httpx<1.0.0,>=0.28.0
10
- Requires-Dist: logfire>=3.25.0
11
10
  Requires-Dist: pydantic-ai-slim[google,openai]>=0.4.4
12
- Requires-Dist: pydantic-settings[dotenv]<3.0.0,>=2.9.1
11
+ Requires-Dist: pydantic-settings<3.0.0,>=2.9.1
13
12
  Requires-Dist: python-dotenv<2.0.0,>=1.1.1
14
- Requires-Dist: sentry-sdk<3.0.0,>=2.21.0
15
13
  Requires-Dist: stamina<26.0.0,>=25.1.0
16
- Requires-Dist: tiktoken<1.0.0,>=0.9.0
17
14
  Requires-Dist: tomlkit<1.0.0,>=0.13.3
18
- Provides-Extra: lint
19
- Requires-Dist: mypy<2.0.0,>=1.1.1; extra == 'lint'
20
- Requires-Dist: ruff<1.0.0,>=0.11.0; extra == 'lint'
21
- Provides-Extra: local
22
- Requires-Dist: codespell<3.0.0,>=2.2.4; extra == 'local'
23
- Requires-Dist: commitizen<5.0.0,>=4.8.3; extra == 'local'
24
- Requires-Dist: ipdb<1.0.0,>=0.13.13; extra == 'local'
25
- Requires-Dist: pdbpp<1.0.0,>=0.11.6; extra == 'local'
26
- Requires-Dist: pre-commit<4.0.0,>=3.2.2; extra == 'local'
27
- Provides-Extra: test
28
- Requires-Dist: pytest-asyncio<1.0.0,>=0.26.0; extra == 'test'
29
- Requires-Dist: pytest-cov<6.0.0,>=5.0.0; extra == 'test'
30
- Requires-Dist: pytest<9.0.0,>=8.0.0; extra == 'test'
15
+ Provides-Extra: sentry
16
+ Requires-Dist: sentry-sdk<3.0.0,>=2.21.0; extra == 'sentry'
31
17
  Description-Content-Type: text/markdown
32
18
 
33
19
  # 🔍 Lightman AI
@@ -0,0 +1 @@
1
+ v0.21.0
@@ -9,41 +9,23 @@ dependencies = [
9
9
  "python-dotenv<2.0.0,>=1.1.1",
10
10
  "click<9.0.0,>=8.1.7",
11
11
  "stamina<26.0.0,>=25.1.0",
12
- "pydantic-settings[dotenv]<3.0.0,>=2.9.1",
13
- "tiktoken<1.0.0,>=0.9.0",
12
+ "pydantic-settings<3.0.0,>=2.9.1",
14
13
  "tomlkit<1.0.0,>=0.13.3",
15
14
  "pydantic-ai-slim[google,openai]>=0.4.4",
16
- "logfire>=3.25.0",
17
- "sentry-sdk>=2.21.0,<3.0.0",
18
15
  ]
19
16
  name = "lightman_ai"
20
- description = "Cybersecurity news aggregator."
17
+ description = "LLM-powered cybersecurity news aggregator."
21
18
  readme = "README.md"
22
19
  dynamic = ["version"]
23
20
 
24
21
  [project.scripts]
25
22
  lightman-ai = "lightman_ai.cli:entry_point"
26
23
 
27
-
28
24
  [project.optional-dependencies]
29
- test = [
30
- "pytest<9.0.0,>=8.0.0",
31
- "pytest-cov<6.0.0,>=5.0.0",
32
- "pytest-asyncio<1.0.0,>=0.26.0",
33
- ]
34
- lint = [
35
- "mypy<2.0.0,>=1.1.1",
36
- "ruff<1.0.0,>=0.11.0",
37
- ]
38
- local = [
39
- "ipdb<1.0.0,>=0.13.13",
40
- "pdbpp<1.0.0,>=0.11.6",
41
- "pre-commit<4.0.0,>=3.2.2",
42
- "commitizen<5.0.0,>=4.8.3",
43
- "codespell<3.0.0,>=2.2.4",
25
+ sentry = [
26
+ "sentry-sdk>=2.21.0,<3.0.0",
44
27
  ]
45
28
 
46
-
47
29
  [build-system]
48
30
  requires = ["hatchling"]
49
31
  build-backend = "hatchling.build"
@@ -75,8 +57,6 @@ addopts = """
75
57
  asyncio_mode = "auto"
76
58
  asyncio_default_fixture_loop_scope = "session"
77
59
 
78
- [tool.logfire]
79
- ignore_no_config=true
80
60
 
81
61
  [tool.ruff]
82
62
  target-version = "py312"
@@ -159,3 +139,22 @@ exclude_lines = [
159
139
  "pragma: no cover",
160
140
  "raise NotImplementedError",
161
141
  ]
142
+
143
+ [dependency-groups]
144
+ test = [
145
+ "pytest<9.0.0,>=8.0.0",
146
+ "pytest-cov<6.0.0,>=5.0.0",
147
+ "pytest-asyncio<1.0.0,>=0.26.0",
148
+ "freezegun>=1.5.3",
149
+ ]
150
+ lint = [
151
+ "mypy<2.0.0,>=1.1.1",
152
+ "ruff<1.0.0,>=0.11.0",
153
+ ]
154
+ local = [
155
+ "ipdb<1.0.0,>=0.13.13",
156
+ "pdbpp<1.0.0,>=0.11.6",
157
+ "pre-commit<4.0.0,>=3.2.2",
158
+ "commitizen<5.0.0,>=4.8.3",
159
+ "codespell<3.0.0,>=2.2.4",
160
+ ]
@@ -0,0 +1,6 @@
1
+ class ArticleBaseError(Exception):
2
+ """Base class for article-related models errors."""
3
+
4
+
5
+ class NoTimeZoneError(ArticleBaseError):
6
+ """Exception class for when there is no timezone associated to a date."""
@@ -1,14 +1,24 @@
1
1
  from abc import ABC
2
- from typing import override
2
+ from datetime import datetime
3
+ from typing import Self, override
3
4
 
4
- from pydantic import BaseModel
5
+ from lightman_ai.article.exceptions import NoTimeZoneError
6
+ from pydantic import BaseModel, Field, field_validator
5
7
 
6
8
 
7
9
  class BaseArticle(BaseModel, ABC):
8
10
  """Base abstract class for all Articles."""
9
11
 
10
- title: str
11
- link: str
12
+ title: str = Field(..., min_length=1)
13
+ link: str = Field(..., min_length=1)
14
+ published_at: datetime = Field(..., description="Must be timezone aware")
15
+
16
+ @field_validator("published_at", mode="after")
17
+ @classmethod
18
+ def validate_timezone_aware(cls, v: datetime) -> datetime:
19
+ if v.tzinfo is None:
20
+ raise NoTimeZoneError("published_at must be timezone aware")
21
+ return v
12
22
 
13
23
  @override
14
24
  def __eq__(self, value: object) -> bool:
@@ -28,8 +38,7 @@ class SelectedArticle(BaseArticle):
28
38
 
29
39
 
30
40
  class Article(BaseArticle):
31
- title: str
32
- description: str
41
+ description: str = Field(..., min_length=1)
33
42
 
34
43
 
35
44
  class BaseArticlesList[TArticle: BaseArticle](BaseModel):
@@ -40,11 +49,18 @@ class BaseArticlesList[TArticle: BaseArticle](BaseModel):
40
49
 
41
50
  @property
42
51
  def titles(self) -> list[str]:
43
- return [new.title for new in self.articles]
52
+ return [article.title for article in self.articles]
44
53
 
45
54
  @property
46
55
  def links(self) -> list[str]:
47
- return [new.link for new in self.articles]
56
+ return [article.link for article in self.articles]
57
+
58
+ @classmethod
59
+ def get_articles_from_date_onwards(cls, articles: list[TArticle], start_date: datetime) -> Self:
60
+ if not start_date.tzinfo:
61
+ raise NoTimeZoneError("A timezone is needed for filtering articles")
62
+ articles = [article for article in articles if article.published_at >= start_date]
63
+ return cls(articles=articles)
48
64
 
49
65
 
50
66
  class SelectedArticlesList(BaseArticlesList[SelectedArticle]):
@@ -1,5 +1,7 @@
1
1
  import logging
2
+ from datetime import date, datetime, time
2
3
  from importlib import metadata
4
+ from zoneinfo import ZoneInfo
3
5
 
4
6
  import click
5
7
  from dotenv import load_dotenv
@@ -8,6 +10,7 @@ from lightman_ai.constants import DEFAULT_CONFIG_FILE, DEFAULT_CONFIG_SECTION, D
8
10
  from lightman_ai.core.config import FileConfig, FinalConfig, PromptConfig
9
11
  from lightman_ai.core.exceptions import ConfigNotFoundError, InvalidConfigError, PromptNotFoundError
10
12
  from lightman_ai.core.sentry import configure_sentry
13
+ from lightman_ai.core.settings import settings
11
14
  from lightman_ai.main import lightman
12
15
 
13
16
  logger = logging.getLogger("lightman")
@@ -65,6 +68,8 @@ def entry_point() -> None:
65
68
  "When set, runs the script without publishing the results to the integrated services, just shows them in stdout."
66
69
  ),
67
70
  )
71
+ @click.option("--start-date", type=click.DateTime(formats=["%Y-%m-%d"]), help="Start date to retrieve articles")
72
+ @click.option("--today", is_flag=True, help="Retrieve articles from today.")
68
73
  def run(
69
74
  agent: str,
70
75
  prompt: str,
@@ -75,6 +80,8 @@ def run(
75
80
  config: str,
76
81
  env_file: str,
77
82
  dry_run: bool,
83
+ start_date: date | None,
84
+ today: bool,
78
85
  ) -> int:
79
86
  """
80
87
  Entrypoint of the application.
@@ -83,6 +90,16 @@ def run(
83
90
  """
84
91
  load_dotenv(env_file)
85
92
  configure_sentry()
93
+
94
+ if start_date and today:
95
+ raise click.UsageError("--today and --start-date cannot be set at the same time.")
96
+ elif today:
97
+ start_datetime = datetime.now(ZoneInfo(settings.TIME_ZONE))
98
+ elif isinstance(start_date, date):
99
+ start_datetime = datetime.combine(start_date, time(0, 0), tzinfo=ZoneInfo(settings.TIME_ZONE))
100
+ else:
101
+ start_datetime = None
102
+
86
103
  try:
87
104
  prompt_config = PromptConfig.get_config_from_file(path=prompt_file)
88
105
  config_from_file = FileConfig.get_config_from_file(config_section=config, path=config_file)
@@ -107,6 +124,7 @@ def run(
107
124
  project_key=config_from_file.service_desk_project_key,
108
125
  request_id_type=config_from_file.service_desk_request_id_type,
109
126
  model=final_config.model,
127
+ start_date=start_datetime,
110
128
  )
111
129
  relevant_articles_metadata = [f"{article.title} ({article.link})" for article in relevant_articles]
112
130
  logger.warning("Found these articles: \n- %s", "\n- ".join(relevant_articles_metadata))
@@ -0,0 +1,39 @@
1
+ import logging
2
+ import os
3
+ from importlib import metadata
4
+
5
+ logger = logging.getLogger("lightman")
6
+
7
+
8
+ def configure_sentry() -> None:
9
+ """Configure Sentry for error tracking."""
10
+ try:
11
+ import sentry_sdk
12
+ from sentry_sdk.integrations.logging import LoggingIntegration
13
+ except ImportError:
14
+ logger.warning(
15
+ "Could not initialize sentry, it is not installed! Add it by installing the project with `lightman-ai[sentry]`."
16
+ )
17
+ return
18
+
19
+ try:
20
+ if not os.getenv("SENTRY_DSN"):
21
+ logger.info("SENTRY_DSN not configured, skipping Sentry initialization")
22
+ return
23
+
24
+ logging_level_str = os.getenv("LOGGING_LEVEL", "ERROR").upper()
25
+ try:
26
+ logging_level = getattr(logging, logging_level_str, logging.ERROR)
27
+ except AttributeError:
28
+ logger.warning("The specified logging level `%s` does not exist. Defaulting to ERROR.", logging_level_str)
29
+ logging_level = logging.ERROR
30
+
31
+ # Set up logging integration
32
+ sentry_logging = LoggingIntegration(level=logging.INFO, event_level=logging_level)
33
+
34
+ sentry_sdk.init(
35
+ release=metadata.version("lightman-ai"),
36
+ integrations=[sentry_logging],
37
+ )
38
+ except Exception as e:
39
+ logger.warning("Could not instantiate Sentry! %s.\nContinuing with the execution.", e)
@@ -7,8 +7,8 @@ class Settings(BaseSettings):
7
7
  def __init__(self, *args: Any, **kwargs: Any) -> None:
8
8
  super().__init__(*args, **kwargs)
9
9
 
10
- OPENAI_ENCODING: str = "cl100k_base"
11
10
  PARALLEL_WORKERS: int = 5
11
+ TIME_ZONE: str = "UTC"
12
12
 
13
13
 
14
14
  settings = Settings()
@@ -1,5 +1,6 @@
1
1
  import asyncio
2
2
  import logging
3
+ from datetime import datetime
3
4
 
4
5
  from lightman_ai.ai.base.agent import BaseAgent
5
6
  from lightman_ai.ai.utils import get_agent_class_from_agent_name
@@ -12,8 +13,8 @@ from lightman_ai.sources.the_hacker_news import TheHackerNewsSource
12
13
  logger = logging.getLogger("lightman")
13
14
 
14
15
 
15
- def _get_articles() -> ArticlesList:
16
- return TheHackerNewsSource().get_articles()
16
+ def _get_articles_from_source(start_date: datetime | None = None) -> ArticlesList:
17
+ return TheHackerNewsSource().get_articles(start_date)
17
18
 
18
19
 
19
20
  def _classify_articles(articles: ArticlesList, agent: BaseAgent) -> SelectedArticlesList:
@@ -60,8 +61,9 @@ def lightman(
60
61
  request_id_type: str | None = None,
61
62
  dry_run: bool = False,
62
63
  model: str | None = None,
64
+ start_date: datetime | None = None,
63
65
  ) -> list[SelectedArticle]:
64
- articles: ArticlesList = _get_articles()
66
+ articles: ArticlesList = _get_articles_from_source(start_date)
65
67
 
66
68
  agent_class = get_agent_class_from_agent_name(agent)
67
69
  agent_instance = agent_class(prompt, model, logger=logger)
@@ -1,8 +1,9 @@
1
1
  from abc import ABC, abstractmethod
2
+ from datetime import datetime
2
3
 
3
4
  from lightman_ai.article.models import ArticlesList
4
5
 
5
6
 
6
7
  class BaseSource(ABC):
7
8
  @abstractmethod
8
- def get_articles(self) -> ArticlesList: ...
9
+ def get_articles(self, date: datetime | None = None) -> ArticlesList: ...
@@ -0,0 +1,10 @@
1
+ class BaseSourceError(Exception):
2
+ """Base Exception class for errors."""
3
+
4
+
5
+ class MalformedSourceResponseError(BaseSourceError):
6
+ """Exception for when the respose format does not match the expected one."""
7
+
8
+
9
+ class IncompleteArticleFromSourceError(MalformedSourceResponseError):
10
+ """Exception for when all the mandatory fields could not be retrieved from an article."""
@@ -0,0 +1,81 @@
1
+ import logging
2
+ from datetime import datetime
3
+ from typing import override
4
+ from xml.etree import ElementTree
5
+
6
+ import httpx
7
+ import stamina
8
+ from httpx import Client
9
+ from lightman_ai.article.models import Article, ArticlesList
10
+ from lightman_ai.sources.base import BaseSource
11
+ from lightman_ai.sources.exceptions import IncompleteArticleFromSourceError, MalformedSourceResponseError
12
+ from pydantic import ValidationError
13
+
14
+ logger = logging.getLogger("lightman")
15
+
16
+ _RETRY_ON = httpx.TransportError
17
+ _ATTEMPTS = 5
18
+ _TIMEOUT = 5
19
+
20
+
21
+ THN_URL = "https://feeds.feedburner.com/TheHackersNews"
22
+
23
+
24
+ class TheHackerNewsSource(BaseSource):
25
+ @override
26
+ def get_articles(self, date: datetime | None = None) -> ArticlesList:
27
+ """Return the articles that are present in THN feed."""
28
+ feed = self.get_feed()
29
+ articles = self._xml_to_list_of_articles(feed)
30
+ if date:
31
+ return ArticlesList.get_articles_from_date_onwards(articles=articles, start_date=date)
32
+ else:
33
+ return ArticlesList(articles=articles)
34
+
35
+ def get_feed(self) -> str:
36
+ """Retrieve the TheHackerNews' RSS Feed."""
37
+ for attempt in stamina.retry_context(
38
+ on=_RETRY_ON,
39
+ attempts=_ATTEMPTS,
40
+ timeout=_TIMEOUT,
41
+ ):
42
+ with Client() as http_client, attempt:
43
+ hacker_news_feed = http_client.get(THN_URL)
44
+ hacker_news_feed.raise_for_status()
45
+ return hacker_news_feed.text
46
+
47
+ def _xml_to_list_of_articles(self, xml: str) -> list[Article]:
48
+ try:
49
+ root = ElementTree.fromstring(xml)
50
+ except ElementTree.ParseError as e:
51
+ raise MalformedSourceResponseError(f"Invalid XML format: {e}") from e
52
+ channel = root.find("channel")
53
+
54
+ if channel is None:
55
+ raise MalformedSourceResponseError("No channel element found in RSS feed")
56
+ items = channel.findall("item")
57
+
58
+ parsed = []
59
+
60
+ for item in items:
61
+ try:
62
+ title = item.findtext("title", default="").strip()
63
+ description = self._clean(item.findtext("description", default="").strip())
64
+ link = item.findtext("link", default="").strip()
65
+ published_at_str = item.findtext("pubDate", default="").strip()
66
+
67
+ if not published_at_str:
68
+ logger.exception("Missing publication date. link: `%s`", link)
69
+ raise IncompleteArticleFromSourceError()
70
+ published_at = datetime.strptime(published_at_str, "%a, %d %b %Y %H:%M:%S %z")
71
+
72
+ parsed.append(Article(title=title, description=description, link=link, published_at=published_at))
73
+ except (ValidationError, ValueError) as e:
74
+ raise IncompleteArticleFromSourceError from e
75
+
76
+ return parsed
77
+
78
+ @staticmethod
79
+ def _clean(text: str) -> str:
80
+ """Remove non-useful characters. Helps cleaning the fields that will be sent to the Agent."""
81
+ return text.replace("\\n", "").replace(" ", "")
@@ -1 +0,0 @@
1
- v0.20.1
@@ -1,28 +0,0 @@
1
- import logging
2
- import os
3
- from importlib import metadata
4
-
5
- import sentry_sdk
6
- from sentry_sdk.integrations.logging import LoggingIntegration
7
-
8
-
9
- def configure_sentry() -> None:
10
- """Configure Sentry for error tracking and performance monitoring using env vars with fallbacks."""
11
- try:
12
- if not os.getenv("SENTRY_DSN"):
13
- logging.getLogger("lightman").info("SENTRY_DSN not configured, skipping Sentry initialization")
14
- return
15
-
16
- # Logging level from ENV
17
- logging_level_str = os.getenv("LOGGING_LEVEL", "ERROR").upper()
18
- logging_level = getattr(logging, logging_level_str, logging.ERROR)
19
-
20
- # Set up logging integration
21
- sentry_logging = LoggingIntegration(level=logging.INFO, event_level=logging_level)
22
-
23
- sentry_sdk.init(
24
- release=metadata.version("lightman-ai"),
25
- integrations=[sentry_logging],
26
- )
27
- except Exception as e:
28
- logging.getLogger("lightman").warning("Could not instantiate Sentry! %s.\nContinuing with the execution.", e)
@@ -1,57 +0,0 @@
1
- from typing import override
2
- from xml.etree import ElementTree
3
-
4
- import httpx
5
- import stamina
6
- from httpx import Client
7
- from lightman_ai.article.models import Article, ArticlesList
8
- from lightman_ai.sources.base import BaseSource
9
-
10
- _RETRY_ON = httpx.TransportError
11
- _ATTEMPTS = 5
12
- _TIMEOUT = 5
13
-
14
-
15
- THN_URL = "https://feeds.feedburner.com/TheHackersNews"
16
-
17
-
18
- class TheHackerNewsSource(BaseSource):
19
- @override
20
- def get_articles(self) -> ArticlesList:
21
- """Return the articles that are present in THN feed."""
22
- feed = self.get_feed()
23
- articles = self._xml_to_list_of_articles(feed)
24
- return ArticlesList(articles=articles)
25
-
26
- def get_feed(self) -> str:
27
- """Retrieve the TheHackerNews' RSS Feed."""
28
- for attempt in stamina.retry_context(
29
- on=_RETRY_ON,
30
- attempts=_ATTEMPTS,
31
- timeout=_TIMEOUT,
32
- ):
33
- with Client() as http_client, attempt:
34
- hacker_news_feed = http_client.get(THN_URL)
35
- hacker_news_feed.raise_for_status()
36
- return hacker_news_feed.text
37
-
38
- def _xml_to_list_of_articles(self, xml: str) -> list[Article]:
39
- root = ElementTree.fromstring(xml)
40
- channel = root.find("channel")
41
- assert channel
42
- items = channel.findall("item")
43
-
44
- parsed = []
45
-
46
- for item in items:
47
- title = item.findtext("title", default="").strip()
48
- description = self._clean(item.findtext("description", default="").strip())
49
- link = item.findtext("link", default="").strip()
50
-
51
- parsed.append(Article(title=title, description=description, link=link))
52
- return parsed
53
-
54
- @staticmethod
55
- def _clean(text: str) -> str:
56
- """Remove non-useful characters."""
57
- return text.replace("\\n", "").replace(" ", "")
File without changes
File without changes
File without changes