feedship 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. feedship-1.0.0/PKG-INFO +44 -0
  2. feedship-1.0.0/README.md +75 -0
  3. feedship-1.0.0/feedship.egg-info/PKG-INFO +44 -0
  4. feedship-1.0.0/feedship.egg-info/SOURCES.txt +51 -0
  5. feedship-1.0.0/feedship.egg-info/dependency_links.txt +1 -0
  6. feedship-1.0.0/feedship.egg-info/entry_points.txt +2 -0
  7. feedship-1.0.0/feedship.egg-info/requires.txt +42 -0
  8. feedship-1.0.0/feedship.egg-info/top_level.txt +1 -0
  9. feedship-1.0.0/pyproject.toml +106 -0
  10. feedship-1.0.0/setup.cfg +4 -0
  11. feedship-1.0.0/src/__init__.py +0 -0
  12. feedship-1.0.0/src/application/__init__.py +1 -0
  13. feedship-1.0.0/src/application/articles.py +128 -0
  14. feedship-1.0.0/src/application/combine.py +63 -0
  15. feedship-1.0.0/src/application/config.py +36 -0
  16. feedship-1.0.0/src/application/feed.py +314 -0
  17. feedship-1.0.0/src/application/fetch.py +225 -0
  18. feedship-1.0.0/src/application/related.py +57 -0
  19. feedship-1.0.0/src/application/rerank.py +80 -0
  20. feedship-1.0.0/src/application/search.py +188 -0
  21. feedship-1.0.0/src/cli/__init__.py +41 -0
  22. feedship-1.0.0/src/cli/__main__.py +6 -0
  23. feedship-1.0.0/src/cli/article.py +283 -0
  24. feedship-1.0.0/src/cli/discover.py +112 -0
  25. feedship-1.0.0/src/cli/feed.py +452 -0
  26. feedship-1.0.0/src/cli/ui.py +230 -0
  27. feedship-1.0.0/src/constants.py +6 -0
  28. feedship-1.0.0/src/discovery/__init__.py +70 -0
  29. feedship-1.0.0/src/discovery/common_paths.py +82 -0
  30. feedship-1.0.0/src/discovery/deep_crawl.py +306 -0
  31. feedship-1.0.0/src/discovery/models.py +64 -0
  32. feedship-1.0.0/src/discovery/parser.py +132 -0
  33. feedship-1.0.0/src/models.py +89 -0
  34. feedship-1.0.0/src/providers/__init__.py +173 -0
  35. feedship-1.0.0/src/providers/base.py +117 -0
  36. feedship-1.0.0/src/providers/default_provider.py +121 -0
  37. feedship-1.0.0/src/providers/github_release_provider.py +232 -0
  38. feedship-1.0.0/src/providers/rss_provider.py +504 -0
  39. feedship-1.0.0/src/providers/webpage_provider.py +457 -0
  40. feedship-1.0.0/src/storage/__init__.py +29 -0
  41. feedship-1.0.0/src/storage/sqlite/__init__.py +51 -0
  42. feedship-1.0.0/src/storage/sqlite/impl.py +853 -0
  43. feedship-1.0.0/src/storage/sqlite/init.py +92 -0
  44. feedship-1.0.0/src/storage/vector.py +512 -0
  45. feedship-1.0.0/src/utils/__init__.py +44 -0
  46. feedship-1.0.0/src/utils/asyncio_utils.py +45 -0
  47. feedship-1.0.0/src/utils/github.py +54 -0
  48. feedship-1.0.0/src/utils/scraping_utils.py +125 -0
  49. feedship-1.0.0/tests/test_cli.py +522 -0
  50. feedship-1.0.0/tests/test_config.py +21 -0
  51. feedship-1.0.0/tests/test_fetch.py +152 -0
  52. feedship-1.0.0/tests/test_providers.py +475 -0
  53. feedship-1.0.0/tests/test_storage.py +708 -0
@@ -0,0 +1,44 @@
1
+ Metadata-Version: 2.4
2
+ Name: feedship
3
+ Version: 1.0.0
4
+ Summary: A personal information delivery system - collect, subscribe to, and organize information sources from the internet
5
+ Requires-Python: >=3.10
6
+ Requires-Dist: feedparser>=6.0.0
7
+ Requires-Dist: httpx>=0.28.0
8
+ Requires-Dist: click>=8.1.0
9
+ Requires-Dist: beautifulsoup4>=4.12.0
10
+ Requires-Dist: lxml>=6.0.0
11
+ Requires-Dist: rich>=13.0.0
12
+ Requires-Dist: PyGithub>=2.0.0
13
+ Requires-Dist: dynaconf>=3.2.13
14
+ Requires-Dist: trafilatura>=1.0.0
15
+ Requires-Dist: robotexclusionrulesparser>=1.7.1
16
+ Requires-Dist: platformdirs>=4.9.4
17
+ Requires-Dist: numpy<2,>=1.26.0
18
+ Requires-Dist: scikit-learn>=1.7.2
19
+ Requires-Dist: pyyaml>=6.0.3
20
+ Requires-Dist: uvloop>=0.22.0
21
+ Requires-Dist: nanoid>=2.0.0
22
+ Requires-Dist: scrapling>=0.4.0
23
+ Requires-Dist: msgspec>=0.20.0
24
+ Provides-Extra: test
25
+ Requires-Dist: pytest>=9.0.2; extra == "test"
26
+ Requires-Dist: pytest-asyncio>=1.0.0; extra == "test"
27
+ Requires-Dist: pytest-cov>=7.0.0; extra == "test"
28
+ Requires-Dist: pytest-mock>=3.15.0; extra == "test"
29
+ Requires-Dist: pytest-click>=1.1.0; extra == "test"
30
+ Requires-Dist: pytest-httpx>=0.36.0; extra == "test"
31
+ Requires-Dist: pytest-xdist>=3.8.0; extra == "test"
32
+ Requires-Dist: ruff>=0.6.0; extra == "test"
33
+ Requires-Dist: pre-commit>=3.0.0; extra == "test"
34
+ Provides-Extra: ml
35
+ Requires-Dist: sentence-transformers>=3.0.0; extra == "ml"
36
+ Requires-Dist: torch>=2.0.0; extra == "ml"
37
+ Requires-Dist: safetensors>=0.4.3; extra == "ml"
38
+ Requires-Dist: transformers>=4.40.0; extra == "ml"
39
+ Provides-Extra: cloudflare
40
+ Requires-Dist: scrapling>=0.4.0; extra == "cloudflare"
41
+ Requires-Dist: playwright>=1.49.0; extra == "cloudflare"
42
+ Requires-Dist: curl-cffi>=0.14.0; extra == "cloudflare"
43
+ Requires-Dist: socksio>=1.0.0; extra == "cloudflare"
44
+ Requires-Dist: browserforge>=1.2.0; extra == "cloudflare"
@@ -0,0 +1,75 @@
1
+ # feedship
2
+
3
+ Personal information system for collecting, subscribing to, and organizing information sources from the internet.
4
+
5
+ ## Features
6
+
7
+ - **Feed subscription** - RSS/Atom feeds and GitHub releases
8
+ - **Web article extraction** - Crawl webpages with Readability
9
+ - **Full-text search** - FTS5-powered search across all content
10
+ - **Semantic search** - Vector embeddings with ChromaDB
11
+ - **CLI tool** - Full-featured command-line interface
12
+
13
+ ## Tech Stack
14
+
15
+ Python 3.10+ | click | feedparser | httpx | BeautifulSoup4 | sqlite3 | ChromaDB | sentence-transformers
16
+
17
+ ## Installation
18
+
19
+ ```bash
20
+ pip install feedship
21
+ # or
22
+ uv pip install feedship
23
+ ```
24
+
25
+ ### Optional Dependencies
26
+
27
+ ```bash
28
+ # ML/AI features for auto-tagging
29
+ pip install feedship[ml]
30
+
31
+ # All features
32
+ pip install feedship[ml,semantic]
33
+ ```
34
+
35
+ ## Quick Start
36
+
37
+ ### Add a Feed
38
+
39
+ ```bash
40
+ feedship feed add <url> [options]
41
+
42
+ # Examples:
43
+ feedship feed add https://example.com/feed.xml
44
+ feedship feed add https://github.com/python/cpython
45
+
46
+ # Options:
47
+ --discover [on|off] Enable feed discovery (default: on)
48
+ --automatic [on|off] Auto-add all discovered feeds (default: off)
49
+ --discover-depth N Discovery depth 1-10 (default: 1)
50
+ --weight FLOAT Feed weight for semantic search (default: 0.3)
51
+ ```
52
+
53
+ ### Fetch & List
54
+
55
+ ```bash
56
+ feedship fetch --all # Fetch all feeds
57
+ feedship feed list # List all feeds
58
+ feedship article list # List articles
59
+ feedship article list --limit 50
60
+ ```
61
+
62
+ ### Search
63
+
64
+ ```bash
65
+ feedship search "machine learning"
66
+ feedship search "python" --limit 10
67
+ ```
68
+
69
+ ## Documentation
70
+
71
+ - @docs/feed.md - Feed provider architecture, fetch flow, refactoring status
72
+ - @docs/providers.md - Provider/TagParser interfaces, registration
73
+ - @docs/structure.md - Application structure, source files, structural rules
74
+ - @docs/cli.md - CLI command reference
75
+ - @docs/Automatic Discovery Feed.md - Automatic feed discovery system
@@ -0,0 +1,44 @@
1
+ Metadata-Version: 2.4
2
+ Name: feedship
3
+ Version: 1.0.0
4
+ Summary: A personal information delivery system - collect, subscribe to, and organize information sources from the internet
5
+ Requires-Python: >=3.10
6
+ Requires-Dist: feedparser>=6.0.0
7
+ Requires-Dist: httpx>=0.28.0
8
+ Requires-Dist: click>=8.1.0
9
+ Requires-Dist: beautifulsoup4>=4.12.0
10
+ Requires-Dist: lxml>=6.0.0
11
+ Requires-Dist: rich>=13.0.0
12
+ Requires-Dist: PyGithub>=2.0.0
13
+ Requires-Dist: dynaconf>=3.2.13
14
+ Requires-Dist: trafilatura>=1.0.0
15
+ Requires-Dist: robotexclusionrulesparser>=1.7.1
16
+ Requires-Dist: platformdirs>=4.9.4
17
+ Requires-Dist: numpy<2,>=1.26.0
18
+ Requires-Dist: scikit-learn>=1.7.2
19
+ Requires-Dist: pyyaml>=6.0.3
20
+ Requires-Dist: uvloop>=0.22.0
21
+ Requires-Dist: nanoid>=2.0.0
22
+ Requires-Dist: scrapling>=0.4.0
23
+ Requires-Dist: msgspec>=0.20.0
24
+ Provides-Extra: test
25
+ Requires-Dist: pytest>=9.0.2; extra == "test"
26
+ Requires-Dist: pytest-asyncio>=1.0.0; extra == "test"
27
+ Requires-Dist: pytest-cov>=7.0.0; extra == "test"
28
+ Requires-Dist: pytest-mock>=3.15.0; extra == "test"
29
+ Requires-Dist: pytest-click>=1.1.0; extra == "test"
30
+ Requires-Dist: pytest-httpx>=0.36.0; extra == "test"
31
+ Requires-Dist: pytest-xdist>=3.8.0; extra == "test"
32
+ Requires-Dist: ruff>=0.6.0; extra == "test"
33
+ Requires-Dist: pre-commit>=3.0.0; extra == "test"
34
+ Provides-Extra: ml
35
+ Requires-Dist: sentence-transformers>=3.0.0; extra == "ml"
36
+ Requires-Dist: torch>=2.0.0; extra == "ml"
37
+ Requires-Dist: safetensors>=0.4.3; extra == "ml"
38
+ Requires-Dist: transformers>=4.40.0; extra == "ml"
39
+ Provides-Extra: cloudflare
40
+ Requires-Dist: scrapling>=0.4.0; extra == "cloudflare"
41
+ Requires-Dist: playwright>=1.49.0; extra == "cloudflare"
42
+ Requires-Dist: curl-cffi>=0.14.0; extra == "cloudflare"
43
+ Requires-Dist: socksio>=1.0.0; extra == "cloudflare"
44
+ Requires-Dist: browserforge>=1.2.0; extra == "cloudflare"
@@ -0,0 +1,51 @@
1
+ README.md
2
+ pyproject.toml
3
+ feedship.egg-info/PKG-INFO
4
+ feedship.egg-info/SOURCES.txt
5
+ feedship.egg-info/dependency_links.txt
6
+ feedship.egg-info/entry_points.txt
7
+ feedship.egg-info/requires.txt
8
+ feedship.egg-info/top_level.txt
9
+ src/__init__.py
10
+ src/constants.py
11
+ src/models.py
12
+ src/application/__init__.py
13
+ src/application/articles.py
14
+ src/application/combine.py
15
+ src/application/config.py
16
+ src/application/feed.py
17
+ src/application/fetch.py
18
+ src/application/related.py
19
+ src/application/rerank.py
20
+ src/application/search.py
21
+ src/cli/__init__.py
22
+ src/cli/__main__.py
23
+ src/cli/article.py
24
+ src/cli/discover.py
25
+ src/cli/feed.py
26
+ src/cli/ui.py
27
+ src/discovery/__init__.py
28
+ src/discovery/common_paths.py
29
+ src/discovery/deep_crawl.py
30
+ src/discovery/models.py
31
+ src/discovery/parser.py
32
+ src/providers/__init__.py
33
+ src/providers/base.py
34
+ src/providers/default_provider.py
35
+ src/providers/github_release_provider.py
36
+ src/providers/rss_provider.py
37
+ src/providers/webpage_provider.py
38
+ src/storage/__init__.py
39
+ src/storage/vector.py
40
+ src/storage/sqlite/__init__.py
41
+ src/storage/sqlite/impl.py
42
+ src/storage/sqlite/init.py
43
+ src/utils/__init__.py
44
+ src/utils/asyncio_utils.py
45
+ src/utils/github.py
46
+ src/utils/scraping_utils.py
47
+ tests/test_cli.py
48
+ tests/test_config.py
49
+ tests/test_fetch.py
50
+ tests/test_providers.py
51
+ tests/test_storage.py
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ feedship = src.cli:cli
@@ -0,0 +1,42 @@
1
+ feedparser>=6.0.0
2
+ httpx>=0.28.0
3
+ click>=8.1.0
4
+ beautifulsoup4>=4.12.0
5
+ lxml>=6.0.0
6
+ rich>=13.0.0
7
+ PyGithub>=2.0.0
8
+ dynaconf>=3.2.13
9
+ trafilatura>=1.0.0
10
+ robotexclusionrulesparser>=1.7.1
11
+ platformdirs>=4.9.4
12
+ numpy<2,>=1.26.0
13
+ scikit-learn>=1.7.2
14
+ pyyaml>=6.0.3
15
+ uvloop>=0.22.0
16
+ nanoid>=2.0.0
17
+ scrapling>=0.4.0
18
+ msgspec>=0.20.0
19
+
20
+ [cloudflare]
21
+ scrapling>=0.4.0
22
+ playwright>=1.49.0
23
+ curl-cffi>=0.14.0
24
+ socksio>=1.0.0
25
+ browserforge>=1.2.0
26
+
27
+ [ml]
28
+ sentence-transformers>=3.0.0
29
+ torch>=2.0.0
30
+ safetensors>=0.4.3
31
+ transformers>=4.40.0
32
+
33
+ [test]
34
+ pytest>=9.0.2
35
+ pytest-asyncio>=1.0.0
36
+ pytest-cov>=7.0.0
37
+ pytest-mock>=3.15.0
38
+ pytest-click>=1.1.0
39
+ pytest-httpx>=0.36.0
40
+ pytest-xdist>=3.8.0
41
+ ruff>=0.6.0
42
+ pre-commit>=3.0.0
@@ -0,0 +1 @@
1
+ src
@@ -0,0 +1,106 @@
1
+ [project]
2
+ name = "feedship"
3
+ version = "1.0.0"
4
+ description = "A personal information delivery system - collect, subscribe to, and organize information sources from the internet"
5
+ requires-python = ">=3.10"
6
+ dependencies = [
7
+ "feedparser>=6.0.0",
8
+ "httpx>=0.28.0",
9
+ "click>=8.1.0",
10
+ "beautifulsoup4>=4.12.0",
11
+ "lxml>=6.0.0",
12
+ "rich>=13.0.0",
13
+ "PyGithub>=2.0.0",
14
+ "dynaconf>=3.2.13",
15
+ "trafilatura>=1.0.0",
16
+ "robotexclusionrulesparser>=1.7.1",
17
+ "platformdirs>=4.9.4",
18
+ "numpy>=1.26.0,<2",
19
+ "scikit-learn>=1.7.2",
20
+ "pyyaml>=6.0.3",
21
+ "uvloop>=0.22.0", # async event loop
22
+ "nanoid>=2.0.0", # URL-safe ID generation
23
+ "scrapling>=0.4.0", # CSS-selector HTML parsing
24
+ "msgspec>=0.20.0", # Required by scrapling browser engine
25
+ ]
26
+
27
+ [project.optional-dependencies]
28
+ test = [
29
+ "pytest>=9.0.2",
30
+ "pytest-asyncio>=1.0.0",
31
+ "pytest-cov>=7.0.0",
32
+ "pytest-mock>=3.15.0",
33
+ "pytest-click>=1.1.0",
34
+ "pytest-httpx>=0.36.0",
35
+ "pytest-xdist>=3.8.0",
36
+ "ruff>=0.6.0",
37
+ "pre-commit>=3.0.0",
38
+ ]
39
+ ml = [
40
+ "sentence-transformers>=3.0.0",
41
+ "torch>=2.0.0",
42
+ "safetensors>=0.4.3",
43
+ "transformers>=4.40.0",
44
+ ]
45
+ cloudflare = [
46
+ "scrapling>=0.4.0",
47
+ "playwright>=1.49.0",
48
+ "curl-cffi>=0.14.0",
49
+ "socksio>=1.0.0",
50
+ "browserforge>=1.2.0",
51
+ ]
52
+
53
+ [project.scripts]
54
+ feedship = "src.cli:cli"
55
+
56
+ [build-system]
57
+ requires = ["setuptools>=61.0"]
58
+ build-backend = "setuptools.build_meta"
59
+
60
+ [tool.setuptools.packages.find]
61
+ where = ["."]
62
+ include = ["src*"]
63
+
64
+ [tool.pytest.ini_options]
65
+ minversion = "9.0"
66
+ testpaths = ["tests"]
67
+ asyncio_mode = "auto"
68
+ addopts = [
69
+ "-v",
70
+ "--tb=short",
71
+ "--strict-markers",
72
+ ]
73
+ markers = [
74
+ "asyncio: mark test as async",
75
+ "integration: integration test requiring full app",
76
+ "slow: tests that take significant time",
77
+ ]
78
+ filterwarnings = [
79
+ "ignore::DeprecationWarning",
80
+ ]
81
+
82
+ [tool.ruff]
83
+ target-version = "py310"
84
+ line-length = 88
85
+
86
+ [tool.ruff.lint]
87
+ select = [
88
+ "E", "W", # pycodestyle errors & warnings
89
+ "F", # Pyflakes
90
+ "I", # isort import sorting
91
+ "UP", # pyupgrade modernization
92
+ "B", # flake8-bugbear common traps
93
+ "C4", # flake8-comprehensions comprehension improvements
94
+ "SIM", # flake8-simplify simplify code
95
+ ]
96
+ ignore = ["E501"] # line too long
97
+
98
+ [tool.ruff.lint.per-file-ignores]
99
+ "__init__.py" = ["F401", "E402"]
100
+ "src/cli/*.py" = ["E402"]
101
+ "tests/**/*.py" = ["E402", "F401"]
102
+
103
+ [tool.ruff.format]
104
+ quote-style = "double"
105
+ indent-style = "space"
106
+ docstring-code-format = true
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
File without changes
@@ -0,0 +1 @@
1
+ """Application layer - use cases and orchestration."""
@@ -0,0 +1,128 @@
1
+ """Article operations for RSS reader.
2
+
3
+ Provides functions for listing and retrieving articles from the database.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ from dataclasses import dataclass
9
+
10
+ from src.storage import (
11
+ get_article as storage_get_article,
12
+ )
13
+ from src.storage import (
14
+ get_article_detail as storage_get_article_detail,
15
+ )
16
+ from src.storage import (
17
+ list_articles as storage_list_articles,
18
+ )
19
+ from src.storage import (
20
+ search_articles as storage_search_articles,
21
+ )
22
+
23
+
24
+ @dataclass
25
+ class ArticleListItem:
26
+ """Represents an article with feed name for list display.
27
+
28
+ Attributes:
29
+ id: Unique identifier for the article.
30
+ feed_id: ID of the feed this article belongs to.
31
+ feed_name: Name of the feed.
32
+ title: Title of the article.
33
+ link: URL link to the full article.
34
+ guid: Global unique identifier from the feed.
35
+ pub_date: Publication date from the feed.
36
+ description: Short description or summary.
37
+ """
38
+
39
+ id: str
40
+ feed_id: str
41
+ feed_name: str
42
+ title: str | None
43
+ link: str | None
44
+ guid: str
45
+ pub_date: str | None
46
+ description: str | None
47
+ vec_sim: float = 0.0
48
+ bm25_score: float = 0.0
49
+ freshness: float = 0.0
50
+ source_weight: float = 0.3
51
+ ce_score: float = 0.0
52
+ final_score: float = 0.0
53
+ score: float = 1.0
54
+
55
+
56
+ def list_articles(
57
+ limit: int = 20,
58
+ feed_id: str | None = None,
59
+ since: str | None = None,
60
+ until: str | None = None,
61
+ on: list[str] | None = None,
62
+ ) -> list[ArticleListItem]:
63
+ """List articles ordered by publication date.
64
+
65
+ Args:
66
+ limit: Maximum number of articles to return (default 20).
67
+ feed_id: Optional feed ID to filter articles by a specific feed.
68
+ since: Optional start date (inclusive), format YYYY-MM-DD.
69
+ until: Optional end date (inclusive), format YYYY-MM-DD.
70
+ on: Optional list of specific dates to match.
71
+
72
+ Returns:
73
+ List of ArticleListItem objects.
74
+ """
75
+ return storage_list_articles(
76
+ limit=limit, feed_id=feed_id, since=since, until=until, on=on
77
+ )
78
+
79
+
80
+ def get_article(article_id: str) -> ArticleListItem | None:
81
+ """Get a single article by ID.
82
+
83
+ Args:
84
+ article_id: The ID of the article to retrieve.
85
+
86
+ Returns:
87
+ ArticleListItem object if found, None otherwise.
88
+ """
89
+ return storage_get_article(article_id)
90
+
91
+
92
+ def get_article_detail(article_id: str) -> dict | None:
93
+ """Get full article details including content.
94
+
95
+ Args:
96
+ article_id: The ID of the article (can be truncated 8-char or full 32-char).
97
+
98
+ Returns:
99
+ Dict with all article fields.
100
+ Returns None if article not found.
101
+ """
102
+ return storage_get_article_detail(article_id)
103
+
104
+
105
+ def search_articles(
106
+ query: str,
107
+ limit: int = 20,
108
+ feed_id: str | None = None,
109
+ since: str | None = None,
110
+ until: str | None = None,
111
+ on: list[str] | None = None,
112
+ ) -> list[ArticleListItem]:
113
+ """Search articles using FTS5 full-text search.
114
+
115
+ Args:
116
+ query: FTS5 query string (space-separated = AND, use quotes for phrases)
117
+ limit: Maximum number of results (default 20)
118
+ feed_id: Optional feed ID to filter by specific feed
119
+ since: Optional start date (inclusive), format YYYY-MM-DD.
120
+ until: Optional end date (inclusive), format YYYY-MM-DD.
121
+ on: Optional list of specific dates to match.
122
+
123
+ Returns:
124
+ List of ArticleListItem objects.
125
+ """
126
+ return storage_search_articles(
127
+ query=query, limit=limit, feed_id=feed_id, since=since, until=until, on=on
128
+ )
@@ -0,0 +1,63 @@
1
+ """Unified score combination using Newton's cooling law for freshness.
2
+
3
+ This module provides combine_scores() which merges multiple scoring signals
4
+ into a final ranking score using weighted combination with time-decay freshness.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import math
10
+ from datetime import datetime, timezone
11
+
12
+ from src.application.articles import ArticleListItem
13
+ from src.storage.vector import _pub_date_to_timestamp
14
+
15
+
16
+ def combine_scores(
17
+ candidates: list[ArticleListItem],
18
+ alpha: float = 0.3,
19
+ beta: float = 0.3,
20
+ gamma: float = 0.2,
21
+ delta: float = 0.2,
22
+ ) -> list[ArticleListItem]:
23
+ """Combine multiple scoring signals into final_score using weighted combination.
24
+
25
+ Newton's cooling law: freshness = exp(-days_ago / half_life_days)
26
+ half_life_days is fixed at 7 (one week).
27
+
28
+ Args:
29
+ candidates: List of ArticleListItem candidates to score.
30
+ alpha: Weight for Cross-Encoder score (ce_score).
31
+ beta: Weight for freshness (time decay).
32
+ gamma: Weight for vector similarity (vec_sim).
33
+ delta: Weight for BM25 score (bm25_score).
34
+
35
+ Returns:
36
+ List of candidates sorted by final_score descending.
37
+ """
38
+ half_life_days = 7
39
+ now = datetime.now(timezone.utc)
40
+
41
+ for c in candidates:
42
+ # Calculate freshness using Newton's cooling law
43
+ if c.pub_date:
44
+ timestamp = _pub_date_to_timestamp(c.pub_date)
45
+ if timestamp is not None:
46
+ pub_dt = datetime.fromtimestamp(timestamp, tz=timezone.utc)
47
+ days_ago = (now - pub_dt).days
48
+ c.freshness = math.exp(-days_ago / half_life_days)
49
+ else:
50
+ c.freshness = 0.0
51
+ else:
52
+ c.freshness = 0.0
53
+
54
+ # ce_score = 0 means not reranked, treat as no contribution
55
+ ce = c.ce_score if c.ce_score > 0 else 0.0
56
+
57
+ # Final score = weighted combination of 4 signals
58
+ c.final_score = (
59
+ alpha * ce + beta * c.freshness + gamma * c.vec_sim + delta * c.bm25_score
60
+ )
61
+
62
+ candidates.sort(key=lambda x: x.final_score, reverse=True)
63
+ return candidates
@@ -0,0 +1,36 @@
1
+ """Application configuration loaded from config.yaml via dynaconf."""
2
+
3
+ from pathlib import Path
4
+ from zoneinfo import ZoneInfo
5
+
6
+ from dynaconf import Dynaconf
7
+
8
+ _settings: Dynaconf | None = None
9
+
10
+
11
+ def _get_settings() -> Dynaconf:
12
+ global _settings
13
+ if _settings is None:
14
+ _settings = Dynaconf(
15
+ envvar_prefix="RADAR",
16
+ settings_files=[
17
+ Path(__file__).parent.parent / "config.yaml",
18
+ ],
19
+ )
20
+ return _settings
21
+
22
+
23
+ def get_timezone() -> ZoneInfo:
24
+ """Return the configured timezone as a ZoneInfo object."""
25
+ tz_name = _get_settings().get("timezone", "Asia/Shanghai")
26
+ return ZoneInfo(tz_name)
27
+
28
+
29
+ def get_default_feed_weight() -> float:
30
+ """Return the default feed weight for semantic search ranking."""
31
+ return _get_settings().get("feed.default.weight", 0.3)
32
+
33
+
34
+ def get_bm25_factor() -> float:
35
+ """Return the BM25 sigmoid normalization factor (default 0.5)."""
36
+ return _get_settings().get("bm25_factor", 0.5)