feedship 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- feedship-1.0.0/PKG-INFO +44 -0
- feedship-1.0.0/README.md +75 -0
- feedship-1.0.0/feedship.egg-info/PKG-INFO +44 -0
- feedship-1.0.0/feedship.egg-info/SOURCES.txt +51 -0
- feedship-1.0.0/feedship.egg-info/dependency_links.txt +1 -0
- feedship-1.0.0/feedship.egg-info/entry_points.txt +2 -0
- feedship-1.0.0/feedship.egg-info/requires.txt +42 -0
- feedship-1.0.0/feedship.egg-info/top_level.txt +1 -0
- feedship-1.0.0/pyproject.toml +106 -0
- feedship-1.0.0/setup.cfg +4 -0
- feedship-1.0.0/src/__init__.py +0 -0
- feedship-1.0.0/src/application/__init__.py +1 -0
- feedship-1.0.0/src/application/articles.py +128 -0
- feedship-1.0.0/src/application/combine.py +63 -0
- feedship-1.0.0/src/application/config.py +36 -0
- feedship-1.0.0/src/application/feed.py +314 -0
- feedship-1.0.0/src/application/fetch.py +225 -0
- feedship-1.0.0/src/application/related.py +57 -0
- feedship-1.0.0/src/application/rerank.py +80 -0
- feedship-1.0.0/src/application/search.py +188 -0
- feedship-1.0.0/src/cli/__init__.py +41 -0
- feedship-1.0.0/src/cli/__main__.py +6 -0
- feedship-1.0.0/src/cli/article.py +283 -0
- feedship-1.0.0/src/cli/discover.py +112 -0
- feedship-1.0.0/src/cli/feed.py +452 -0
- feedship-1.0.0/src/cli/ui.py +230 -0
- feedship-1.0.0/src/constants.py +6 -0
- feedship-1.0.0/src/discovery/__init__.py +70 -0
- feedship-1.0.0/src/discovery/common_paths.py +82 -0
- feedship-1.0.0/src/discovery/deep_crawl.py +306 -0
- feedship-1.0.0/src/discovery/models.py +64 -0
- feedship-1.0.0/src/discovery/parser.py +132 -0
- feedship-1.0.0/src/models.py +89 -0
- feedship-1.0.0/src/providers/__init__.py +173 -0
- feedship-1.0.0/src/providers/base.py +117 -0
- feedship-1.0.0/src/providers/default_provider.py +121 -0
- feedship-1.0.0/src/providers/github_release_provider.py +232 -0
- feedship-1.0.0/src/providers/rss_provider.py +504 -0
- feedship-1.0.0/src/providers/webpage_provider.py +457 -0
- feedship-1.0.0/src/storage/__init__.py +29 -0
- feedship-1.0.0/src/storage/sqlite/__init__.py +51 -0
- feedship-1.0.0/src/storage/sqlite/impl.py +853 -0
- feedship-1.0.0/src/storage/sqlite/init.py +92 -0
- feedship-1.0.0/src/storage/vector.py +512 -0
- feedship-1.0.0/src/utils/__init__.py +44 -0
- feedship-1.0.0/src/utils/asyncio_utils.py +45 -0
- feedship-1.0.0/src/utils/github.py +54 -0
- feedship-1.0.0/src/utils/scraping_utils.py +125 -0
- feedship-1.0.0/tests/test_cli.py +522 -0
- feedship-1.0.0/tests/test_config.py +21 -0
- feedship-1.0.0/tests/test_fetch.py +152 -0
- feedship-1.0.0/tests/test_providers.py +475 -0
- feedship-1.0.0/tests/test_storage.py +708 -0
feedship-1.0.0/PKG-INFO
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: feedship
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: A personal information delivery system - collect, subscribe to, and organize information sources from the internet
|
|
5
|
+
Requires-Python: >=3.10
|
|
6
|
+
Requires-Dist: feedparser>=6.0.0
|
|
7
|
+
Requires-Dist: httpx>=0.28.0
|
|
8
|
+
Requires-Dist: click>=8.1.0
|
|
9
|
+
Requires-Dist: beautifulsoup4>=4.12.0
|
|
10
|
+
Requires-Dist: lxml>=6.0.0
|
|
11
|
+
Requires-Dist: rich>=13.0.0
|
|
12
|
+
Requires-Dist: PyGithub>=2.0.0
|
|
13
|
+
Requires-Dist: dynaconf>=3.2.13
|
|
14
|
+
Requires-Dist: trafilatura>=1.0.0
|
|
15
|
+
Requires-Dist: robotexclusionrulesparser>=1.7.1
|
|
16
|
+
Requires-Dist: platformdirs>=4.9.4
|
|
17
|
+
Requires-Dist: numpy<2,>=1.26.0
|
|
18
|
+
Requires-Dist: scikit-learn>=1.7.2
|
|
19
|
+
Requires-Dist: pyyaml>=6.0.3
|
|
20
|
+
Requires-Dist: uvloop>=0.22.0
|
|
21
|
+
Requires-Dist: nanoid>=2.0.0
|
|
22
|
+
Requires-Dist: scrapling>=0.4.0
|
|
23
|
+
Requires-Dist: msgspec>=0.20.0
|
|
24
|
+
Provides-Extra: test
|
|
25
|
+
Requires-Dist: pytest>=9.0.2; extra == "test"
|
|
26
|
+
Requires-Dist: pytest-asyncio>=1.0.0; extra == "test"
|
|
27
|
+
Requires-Dist: pytest-cov>=7.0.0; extra == "test"
|
|
28
|
+
Requires-Dist: pytest-mock>=3.15.0; extra == "test"
|
|
29
|
+
Requires-Dist: pytest-click>=1.1.0; extra == "test"
|
|
30
|
+
Requires-Dist: pytest-httpx>=0.36.0; extra == "test"
|
|
31
|
+
Requires-Dist: pytest-xdist>=3.8.0; extra == "test"
|
|
32
|
+
Requires-Dist: ruff>=0.6.0; extra == "test"
|
|
33
|
+
Requires-Dist: pre-commit>=3.0.0; extra == "test"
|
|
34
|
+
Provides-Extra: ml
|
|
35
|
+
Requires-Dist: sentence-transformers>=3.0.0; extra == "ml"
|
|
36
|
+
Requires-Dist: torch>=2.0.0; extra == "ml"
|
|
37
|
+
Requires-Dist: safetensors>=0.4.3; extra == "ml"
|
|
38
|
+
Requires-Dist: transformers>=4.40.0; extra == "ml"
|
|
39
|
+
Provides-Extra: cloudflare
|
|
40
|
+
Requires-Dist: scrapling>=0.4.0; extra == "cloudflare"
|
|
41
|
+
Requires-Dist: playwright>=1.49.0; extra == "cloudflare"
|
|
42
|
+
Requires-Dist: curl-cffi>=0.14.0; extra == "cloudflare"
|
|
43
|
+
Requires-Dist: socksio>=1.0.0; extra == "cloudflare"
|
|
44
|
+
Requires-Dist: browserforge>=1.2.0; extra == "cloudflare"
|
feedship-1.0.0/README.md
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
# feedship
|
|
2
|
+
|
|
3
|
+
Personal information system for collecting, subscribing to, and organizing information sources from the internet.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- **Feed subscription** - RSS/Atom feeds and GitHub releases
|
|
8
|
+
- **Web article extraction** - Crawl webpages with Readability
|
|
9
|
+
- **Full-text search** - FTS5-powered search across all content
|
|
10
|
+
- **Semantic search** - Vector embeddings with ChromaDB
|
|
11
|
+
- **CLI tool** - Full-featured command-line interface
|
|
12
|
+
|
|
13
|
+
## Tech Stack
|
|
14
|
+
|
|
15
|
+
Python 3.10+ | click | feedparser | httpx | BeautifulSoup4 | sqlite3 | ChromaDB | sentence-transformers
|
|
16
|
+
|
|
17
|
+
## Installation
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
pip install feedship
|
|
21
|
+
# or
|
|
22
|
+
uv pip install feedship
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
### Optional Dependencies
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
# ML/AI features for auto-tagging
|
|
29
|
+
pip install feedship[ml]
|
|
30
|
+
|
|
31
|
+
# All features
|
|
32
|
+
pip install feedship[ml,semantic]
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
## Quick Start
|
|
36
|
+
|
|
37
|
+
### Add a Feed
|
|
38
|
+
|
|
39
|
+
```bash
|
|
40
|
+
feedship feed add <url> [options]
|
|
41
|
+
|
|
42
|
+
# Examples:
|
|
43
|
+
feedship feed add https://example.com/feed.xml
|
|
44
|
+
feedship feed add https://github.com/python/cpython
|
|
45
|
+
|
|
46
|
+
# Options:
|
|
47
|
+
--discover [on|off] Enable feed discovery (default: on)
|
|
48
|
+
--automatic [on|off] Auto-add all discovered feeds (default: off)
|
|
49
|
+
--discover-depth N Discovery depth 1-10 (default: 1)
|
|
50
|
+
--weight FLOAT Feed weight for semantic search (default: 0.3)
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
### Fetch & List
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
feedship fetch --all # Fetch all feeds
|
|
57
|
+
feedship feed list # List all feeds
|
|
58
|
+
feedship article list # List articles
|
|
59
|
+
feedship article list --limit 50
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
### Search
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
feedship search "machine learning"
|
|
66
|
+
feedship search "python" --limit 10
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
## Documentation
|
|
70
|
+
|
|
71
|
+
- @docs/feed.md - Feed provider architecture, fetch flow, refactoring status
|
|
72
|
+
- @docs/providers.md - Provider/TagParser interfaces, registration
|
|
73
|
+
- @docs/structure.md - Application structure, source files, structural rules
|
|
74
|
+
- @docs/cli.md - CLI command reference
|
|
75
|
+
- @docs/Automatic Discovery Feed.md - Automatic feed discovery system
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: feedship
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: A personal information delivery system - collect, subscribe to, and organize information sources from the internet
|
|
5
|
+
Requires-Python: >=3.10
|
|
6
|
+
Requires-Dist: feedparser>=6.0.0
|
|
7
|
+
Requires-Dist: httpx>=0.28.0
|
|
8
|
+
Requires-Dist: click>=8.1.0
|
|
9
|
+
Requires-Dist: beautifulsoup4>=4.12.0
|
|
10
|
+
Requires-Dist: lxml>=6.0.0
|
|
11
|
+
Requires-Dist: rich>=13.0.0
|
|
12
|
+
Requires-Dist: PyGithub>=2.0.0
|
|
13
|
+
Requires-Dist: dynaconf>=3.2.13
|
|
14
|
+
Requires-Dist: trafilatura>=1.0.0
|
|
15
|
+
Requires-Dist: robotexclusionrulesparser>=1.7.1
|
|
16
|
+
Requires-Dist: platformdirs>=4.9.4
|
|
17
|
+
Requires-Dist: numpy<2,>=1.26.0
|
|
18
|
+
Requires-Dist: scikit-learn>=1.7.2
|
|
19
|
+
Requires-Dist: pyyaml>=6.0.3
|
|
20
|
+
Requires-Dist: uvloop>=0.22.0
|
|
21
|
+
Requires-Dist: nanoid>=2.0.0
|
|
22
|
+
Requires-Dist: scrapling>=0.4.0
|
|
23
|
+
Requires-Dist: msgspec>=0.20.0
|
|
24
|
+
Provides-Extra: test
|
|
25
|
+
Requires-Dist: pytest>=9.0.2; extra == "test"
|
|
26
|
+
Requires-Dist: pytest-asyncio>=1.0.0; extra == "test"
|
|
27
|
+
Requires-Dist: pytest-cov>=7.0.0; extra == "test"
|
|
28
|
+
Requires-Dist: pytest-mock>=3.15.0; extra == "test"
|
|
29
|
+
Requires-Dist: pytest-click>=1.1.0; extra == "test"
|
|
30
|
+
Requires-Dist: pytest-httpx>=0.36.0; extra == "test"
|
|
31
|
+
Requires-Dist: pytest-xdist>=3.8.0; extra == "test"
|
|
32
|
+
Requires-Dist: ruff>=0.6.0; extra == "test"
|
|
33
|
+
Requires-Dist: pre-commit>=3.0.0; extra == "test"
|
|
34
|
+
Provides-Extra: ml
|
|
35
|
+
Requires-Dist: sentence-transformers>=3.0.0; extra == "ml"
|
|
36
|
+
Requires-Dist: torch>=2.0.0; extra == "ml"
|
|
37
|
+
Requires-Dist: safetensors>=0.4.3; extra == "ml"
|
|
38
|
+
Requires-Dist: transformers>=4.40.0; extra == "ml"
|
|
39
|
+
Provides-Extra: cloudflare
|
|
40
|
+
Requires-Dist: scrapling>=0.4.0; extra == "cloudflare"
|
|
41
|
+
Requires-Dist: playwright>=1.49.0; extra == "cloudflare"
|
|
42
|
+
Requires-Dist: curl-cffi>=0.14.0; extra == "cloudflare"
|
|
43
|
+
Requires-Dist: socksio>=1.0.0; extra == "cloudflare"
|
|
44
|
+
Requires-Dist: browserforge>=1.2.0; extra == "cloudflare"
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
pyproject.toml
|
|
3
|
+
feedship.egg-info/PKG-INFO
|
|
4
|
+
feedship.egg-info/SOURCES.txt
|
|
5
|
+
feedship.egg-info/dependency_links.txt
|
|
6
|
+
feedship.egg-info/entry_points.txt
|
|
7
|
+
feedship.egg-info/requires.txt
|
|
8
|
+
feedship.egg-info/top_level.txt
|
|
9
|
+
src/__init__.py
|
|
10
|
+
src/constants.py
|
|
11
|
+
src/models.py
|
|
12
|
+
src/application/__init__.py
|
|
13
|
+
src/application/articles.py
|
|
14
|
+
src/application/combine.py
|
|
15
|
+
src/application/config.py
|
|
16
|
+
src/application/feed.py
|
|
17
|
+
src/application/fetch.py
|
|
18
|
+
src/application/related.py
|
|
19
|
+
src/application/rerank.py
|
|
20
|
+
src/application/search.py
|
|
21
|
+
src/cli/__init__.py
|
|
22
|
+
src/cli/__main__.py
|
|
23
|
+
src/cli/article.py
|
|
24
|
+
src/cli/discover.py
|
|
25
|
+
src/cli/feed.py
|
|
26
|
+
src/cli/ui.py
|
|
27
|
+
src/discovery/__init__.py
|
|
28
|
+
src/discovery/common_paths.py
|
|
29
|
+
src/discovery/deep_crawl.py
|
|
30
|
+
src/discovery/models.py
|
|
31
|
+
src/discovery/parser.py
|
|
32
|
+
src/providers/__init__.py
|
|
33
|
+
src/providers/base.py
|
|
34
|
+
src/providers/default_provider.py
|
|
35
|
+
src/providers/github_release_provider.py
|
|
36
|
+
src/providers/rss_provider.py
|
|
37
|
+
src/providers/webpage_provider.py
|
|
38
|
+
src/storage/__init__.py
|
|
39
|
+
src/storage/vector.py
|
|
40
|
+
src/storage/sqlite/__init__.py
|
|
41
|
+
src/storage/sqlite/impl.py
|
|
42
|
+
src/storage/sqlite/init.py
|
|
43
|
+
src/utils/__init__.py
|
|
44
|
+
src/utils/asyncio_utils.py
|
|
45
|
+
src/utils/github.py
|
|
46
|
+
src/utils/scraping_utils.py
|
|
47
|
+
tests/test_cli.py
|
|
48
|
+
tests/test_config.py
|
|
49
|
+
tests/test_fetch.py
|
|
50
|
+
tests/test_providers.py
|
|
51
|
+
tests/test_storage.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
feedparser>=6.0.0
|
|
2
|
+
httpx>=0.28.0
|
|
3
|
+
click>=8.1.0
|
|
4
|
+
beautifulsoup4>=4.12.0
|
|
5
|
+
lxml>=6.0.0
|
|
6
|
+
rich>=13.0.0
|
|
7
|
+
PyGithub>=2.0.0
|
|
8
|
+
dynaconf>=3.2.13
|
|
9
|
+
trafilatura>=1.0.0
|
|
10
|
+
robotexclusionrulesparser>=1.7.1
|
|
11
|
+
platformdirs>=4.9.4
|
|
12
|
+
numpy<2,>=1.26.0
|
|
13
|
+
scikit-learn>=1.7.2
|
|
14
|
+
pyyaml>=6.0.3
|
|
15
|
+
uvloop>=0.22.0
|
|
16
|
+
nanoid>=2.0.0
|
|
17
|
+
scrapling>=0.4.0
|
|
18
|
+
msgspec>=0.20.0
|
|
19
|
+
|
|
20
|
+
[cloudflare]
|
|
21
|
+
scrapling>=0.4.0
|
|
22
|
+
playwright>=1.49.0
|
|
23
|
+
curl-cffi>=0.14.0
|
|
24
|
+
socksio>=1.0.0
|
|
25
|
+
browserforge>=1.2.0
|
|
26
|
+
|
|
27
|
+
[ml]
|
|
28
|
+
sentence-transformers>=3.0.0
|
|
29
|
+
torch>=2.0.0
|
|
30
|
+
safetensors>=0.4.3
|
|
31
|
+
transformers>=4.40.0
|
|
32
|
+
|
|
33
|
+
[test]
|
|
34
|
+
pytest>=9.0.2
|
|
35
|
+
pytest-asyncio>=1.0.0
|
|
36
|
+
pytest-cov>=7.0.0
|
|
37
|
+
pytest-mock>=3.15.0
|
|
38
|
+
pytest-click>=1.1.0
|
|
39
|
+
pytest-httpx>=0.36.0
|
|
40
|
+
pytest-xdist>=3.8.0
|
|
41
|
+
ruff>=0.6.0
|
|
42
|
+
pre-commit>=3.0.0
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
src
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "feedship"
|
|
3
|
+
version = "1.0.0"
|
|
4
|
+
description = "A personal information delivery system - collect, subscribe to, and organize information sources from the internet"
|
|
5
|
+
requires-python = ">=3.10"
|
|
6
|
+
dependencies = [
|
|
7
|
+
"feedparser>=6.0.0",
|
|
8
|
+
"httpx>=0.28.0",
|
|
9
|
+
"click>=8.1.0",
|
|
10
|
+
"beautifulsoup4>=4.12.0",
|
|
11
|
+
"lxml>=6.0.0",
|
|
12
|
+
"rich>=13.0.0",
|
|
13
|
+
"PyGithub>=2.0.0",
|
|
14
|
+
"dynaconf>=3.2.13",
|
|
15
|
+
"trafilatura>=1.0.0",
|
|
16
|
+
"robotexclusionrulesparser>=1.7.1",
|
|
17
|
+
"platformdirs>=4.9.4",
|
|
18
|
+
"numpy>=1.26.0,<2",
|
|
19
|
+
"scikit-learn>=1.7.2",
|
|
20
|
+
"pyyaml>=6.0.3",
|
|
21
|
+
"uvloop>=0.22.0", # async event loop
|
|
22
|
+
"nanoid>=2.0.0", # URL-safe ID generation
|
|
23
|
+
"scrapling>=0.4.0", # CSS-selector HTML parsing
|
|
24
|
+
"msgspec>=0.20.0", # Required by scrapling browser engine
|
|
25
|
+
]
|
|
26
|
+
|
|
27
|
+
[project.optional-dependencies]
|
|
28
|
+
test = [
|
|
29
|
+
"pytest>=9.0.2",
|
|
30
|
+
"pytest-asyncio>=1.0.0",
|
|
31
|
+
"pytest-cov>=7.0.0",
|
|
32
|
+
"pytest-mock>=3.15.0",
|
|
33
|
+
"pytest-click>=1.1.0",
|
|
34
|
+
"pytest-httpx>=0.36.0",
|
|
35
|
+
"pytest-xdist>=3.8.0",
|
|
36
|
+
"ruff>=0.6.0",
|
|
37
|
+
"pre-commit>=3.0.0",
|
|
38
|
+
]
|
|
39
|
+
ml = [
|
|
40
|
+
"sentence-transformers>=3.0.0",
|
|
41
|
+
"torch>=2.0.0",
|
|
42
|
+
"safetensors>=0.4.3",
|
|
43
|
+
"transformers>=4.40.0",
|
|
44
|
+
]
|
|
45
|
+
cloudflare = [
|
|
46
|
+
"scrapling>=0.4.0",
|
|
47
|
+
"playwright>=1.49.0",
|
|
48
|
+
"curl-cffi>=0.14.0",
|
|
49
|
+
"socksio>=1.0.0",
|
|
50
|
+
"browserforge>=1.2.0",
|
|
51
|
+
]
|
|
52
|
+
|
|
53
|
+
[project.scripts]
|
|
54
|
+
feedship = "src.cli:cli"
|
|
55
|
+
|
|
56
|
+
[build-system]
|
|
57
|
+
requires = ["setuptools>=61.0"]
|
|
58
|
+
build-backend = "setuptools.build_meta"
|
|
59
|
+
|
|
60
|
+
[tool.setuptools.packages.find]
|
|
61
|
+
where = ["."]
|
|
62
|
+
include = ["src*"]
|
|
63
|
+
|
|
64
|
+
[tool.pytest.ini_options]
|
|
65
|
+
minversion = "9.0"
|
|
66
|
+
testpaths = ["tests"]
|
|
67
|
+
asyncio_mode = "auto"
|
|
68
|
+
addopts = [
|
|
69
|
+
"-v",
|
|
70
|
+
"--tb=short",
|
|
71
|
+
"--strict-markers",
|
|
72
|
+
]
|
|
73
|
+
markers = [
|
|
74
|
+
"asyncio: mark test as async",
|
|
75
|
+
"integration: integration test requiring full app",
|
|
76
|
+
"slow: tests that take significant time",
|
|
77
|
+
]
|
|
78
|
+
filterwarnings = [
|
|
79
|
+
"ignore::DeprecationWarning",
|
|
80
|
+
]
|
|
81
|
+
|
|
82
|
+
[tool.ruff]
|
|
83
|
+
target-version = "py310"
|
|
84
|
+
line-length = 88
|
|
85
|
+
|
|
86
|
+
[tool.ruff.lint]
|
|
87
|
+
select = [
|
|
88
|
+
"E", "W", # pycodestyle errors & warnings
|
|
89
|
+
"F", # Pyflakes
|
|
90
|
+
"I", # isort import sorting
|
|
91
|
+
"UP", # pyupgrade modernization
|
|
92
|
+
"B", # flake8-bugbear common traps
|
|
93
|
+
"C4", # flake8-comprehensions comprehension improvements
|
|
94
|
+
"SIM", # flake8-simplify simplify code
|
|
95
|
+
]
|
|
96
|
+
ignore = ["E501"] # line too long
|
|
97
|
+
|
|
98
|
+
[tool.ruff.lint.per-file-ignores]
|
|
99
|
+
"__init__.py" = ["F401", "E402"]
|
|
100
|
+
"src/cli/*.py" = ["E402"]
|
|
101
|
+
"tests/**/*.py" = ["E402", "F401"]
|
|
102
|
+
|
|
103
|
+
[tool.ruff.format]
|
|
104
|
+
quote-style = "double"
|
|
105
|
+
indent-style = "space"
|
|
106
|
+
docstring-code-format = true
|
feedship-1.0.0/setup.cfg
ADDED
|
File without changes
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Application layer - use cases and orchestration."""
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
"""Article operations for RSS reader.
|
|
2
|
+
|
|
3
|
+
Provides functions for listing and retrieving articles from the database.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
from dataclasses import dataclass
|
|
9
|
+
|
|
10
|
+
from src.storage import (
|
|
11
|
+
get_article as storage_get_article,
|
|
12
|
+
)
|
|
13
|
+
from src.storage import (
|
|
14
|
+
get_article_detail as storage_get_article_detail,
|
|
15
|
+
)
|
|
16
|
+
from src.storage import (
|
|
17
|
+
list_articles as storage_list_articles,
|
|
18
|
+
)
|
|
19
|
+
from src.storage import (
|
|
20
|
+
search_articles as storage_search_articles,
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dataclass
|
|
25
|
+
class ArticleListItem:
|
|
26
|
+
"""Represents an article with feed name for list display.
|
|
27
|
+
|
|
28
|
+
Attributes:
|
|
29
|
+
id: Unique identifier for the article.
|
|
30
|
+
feed_id: ID of the feed this article belongs to.
|
|
31
|
+
feed_name: Name of the feed.
|
|
32
|
+
title: Title of the article.
|
|
33
|
+
link: URL link to the full article.
|
|
34
|
+
guid: Global unique identifier from the feed.
|
|
35
|
+
pub_date: Publication date from the feed.
|
|
36
|
+
description: Short description or summary.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
id: str
|
|
40
|
+
feed_id: str
|
|
41
|
+
feed_name: str
|
|
42
|
+
title: str | None
|
|
43
|
+
link: str | None
|
|
44
|
+
guid: str
|
|
45
|
+
pub_date: str | None
|
|
46
|
+
description: str | None
|
|
47
|
+
vec_sim: float = 0.0
|
|
48
|
+
bm25_score: float = 0.0
|
|
49
|
+
freshness: float = 0.0
|
|
50
|
+
source_weight: float = 0.3
|
|
51
|
+
ce_score: float = 0.0
|
|
52
|
+
final_score: float = 0.0
|
|
53
|
+
score: float = 1.0
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def list_articles(
|
|
57
|
+
limit: int = 20,
|
|
58
|
+
feed_id: str | None = None,
|
|
59
|
+
since: str | None = None,
|
|
60
|
+
until: str | None = None,
|
|
61
|
+
on: list[str] | None = None,
|
|
62
|
+
) -> list[ArticleListItem]:
|
|
63
|
+
"""List articles ordered by publication date.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
limit: Maximum number of articles to return (default 20).
|
|
67
|
+
feed_id: Optional feed ID to filter articles by a specific feed.
|
|
68
|
+
since: Optional start date (inclusive), format YYYY-MM-DD.
|
|
69
|
+
until: Optional end date (inclusive), format YYYY-MM-DD.
|
|
70
|
+
on: Optional list of specific dates to match.
|
|
71
|
+
|
|
72
|
+
Returns:
|
|
73
|
+
List of ArticleListItem objects.
|
|
74
|
+
"""
|
|
75
|
+
return storage_list_articles(
|
|
76
|
+
limit=limit, feed_id=feed_id, since=since, until=until, on=on
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def get_article(article_id: str) -> ArticleListItem | None:
|
|
81
|
+
"""Get a single article by ID.
|
|
82
|
+
|
|
83
|
+
Args:
|
|
84
|
+
article_id: The ID of the article to retrieve.
|
|
85
|
+
|
|
86
|
+
Returns:
|
|
87
|
+
ArticleListItem object if found, None otherwise.
|
|
88
|
+
"""
|
|
89
|
+
return storage_get_article(article_id)
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def get_article_detail(article_id: str) -> dict | None:
|
|
93
|
+
"""Get full article details including content.
|
|
94
|
+
|
|
95
|
+
Args:
|
|
96
|
+
article_id: The ID of the article (can be truncated 8-char or full 32-char).
|
|
97
|
+
|
|
98
|
+
Returns:
|
|
99
|
+
Dict with all article fields.
|
|
100
|
+
Returns None if article not found.
|
|
101
|
+
"""
|
|
102
|
+
return storage_get_article_detail(article_id)
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def search_articles(
|
|
106
|
+
query: str,
|
|
107
|
+
limit: int = 20,
|
|
108
|
+
feed_id: str | None = None,
|
|
109
|
+
since: str | None = None,
|
|
110
|
+
until: str | None = None,
|
|
111
|
+
on: list[str] | None = None,
|
|
112
|
+
) -> list[ArticleListItem]:
|
|
113
|
+
"""Search articles using FTS5 full-text search.
|
|
114
|
+
|
|
115
|
+
Args:
|
|
116
|
+
query: FTS5 query string (space-separated = AND, use quotes for phrases)
|
|
117
|
+
limit: Maximum number of results (default 20)
|
|
118
|
+
feed_id: Optional feed ID to filter by specific feed
|
|
119
|
+
since: Optional start date (inclusive), format YYYY-MM-DD.
|
|
120
|
+
until: Optional end date (inclusive), format YYYY-MM-DD.
|
|
121
|
+
on: Optional list of specific dates to match.
|
|
122
|
+
|
|
123
|
+
Returns:
|
|
124
|
+
List of ArticleListItem objects.
|
|
125
|
+
"""
|
|
126
|
+
return storage_search_articles(
|
|
127
|
+
query=query, limit=limit, feed_id=feed_id, since=since, until=until, on=on
|
|
128
|
+
)
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
"""Unified score combination using Newton's cooling law for freshness.
|
|
2
|
+
|
|
3
|
+
This module provides combine_scores() which merges multiple scoring signals
|
|
4
|
+
into a final ranking score using weighted combination with time-decay freshness.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import math
|
|
10
|
+
from datetime import datetime, timezone
|
|
11
|
+
|
|
12
|
+
from src.application.articles import ArticleListItem
|
|
13
|
+
from src.storage.vector import _pub_date_to_timestamp
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def combine_scores(
|
|
17
|
+
candidates: list[ArticleListItem],
|
|
18
|
+
alpha: float = 0.3,
|
|
19
|
+
beta: float = 0.3,
|
|
20
|
+
gamma: float = 0.2,
|
|
21
|
+
delta: float = 0.2,
|
|
22
|
+
) -> list[ArticleListItem]:
|
|
23
|
+
"""Combine multiple scoring signals into final_score using weighted combination.
|
|
24
|
+
|
|
25
|
+
Newton's cooling law: freshness = exp(-days_ago / half_life_days)
|
|
26
|
+
half_life_days is fixed at 7 (one week).
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
candidates: List of ArticleListItem candidates to score.
|
|
30
|
+
alpha: Weight for Cross-Encoder score (ce_score).
|
|
31
|
+
beta: Weight for freshness (time decay).
|
|
32
|
+
gamma: Weight for vector similarity (vec_sim).
|
|
33
|
+
delta: Weight for BM25 score (bm25_score).
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
List of candidates sorted by final_score descending.
|
|
37
|
+
"""
|
|
38
|
+
half_life_days = 7
|
|
39
|
+
now = datetime.now(timezone.utc)
|
|
40
|
+
|
|
41
|
+
for c in candidates:
|
|
42
|
+
# Calculate freshness using Newton's cooling law
|
|
43
|
+
if c.pub_date:
|
|
44
|
+
timestamp = _pub_date_to_timestamp(c.pub_date)
|
|
45
|
+
if timestamp is not None:
|
|
46
|
+
pub_dt = datetime.fromtimestamp(timestamp, tz=timezone.utc)
|
|
47
|
+
days_ago = (now - pub_dt).days
|
|
48
|
+
c.freshness = math.exp(-days_ago / half_life_days)
|
|
49
|
+
else:
|
|
50
|
+
c.freshness = 0.0
|
|
51
|
+
else:
|
|
52
|
+
c.freshness = 0.0
|
|
53
|
+
|
|
54
|
+
# ce_score = 0 means not reranked, treat as no contribution
|
|
55
|
+
ce = c.ce_score if c.ce_score > 0 else 0.0
|
|
56
|
+
|
|
57
|
+
# Final score = weighted combination of 4 signals
|
|
58
|
+
c.final_score = (
|
|
59
|
+
alpha * ce + beta * c.freshness + gamma * c.vec_sim + delta * c.bm25_score
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
candidates.sort(key=lambda x: x.final_score, reverse=True)
|
|
63
|
+
return candidates
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
"""Application configuration loaded from config.yaml via dynaconf."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from zoneinfo import ZoneInfo
|
|
5
|
+
|
|
6
|
+
from dynaconf import Dynaconf
|
|
7
|
+
|
|
8
|
+
_settings: Dynaconf | None = None
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _get_settings() -> Dynaconf:
|
|
12
|
+
global _settings
|
|
13
|
+
if _settings is None:
|
|
14
|
+
_settings = Dynaconf(
|
|
15
|
+
envvar_prefix="RADAR",
|
|
16
|
+
settings_files=[
|
|
17
|
+
Path(__file__).parent.parent / "config.yaml",
|
|
18
|
+
],
|
|
19
|
+
)
|
|
20
|
+
return _settings
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def get_timezone() -> ZoneInfo:
|
|
24
|
+
"""Return the configured timezone as a ZoneInfo object."""
|
|
25
|
+
tz_name = _get_settings().get("timezone", "Asia/Shanghai")
|
|
26
|
+
return ZoneInfo(tz_name)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def get_default_feed_weight() -> float:
|
|
30
|
+
"""Return the default feed weight for semantic search ranking."""
|
|
31
|
+
return _get_settings().get("feed.default.weight", 0.3)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def get_bm25_factor() -> float:
|
|
35
|
+
"""Return the BM25 sigmoid normalization factor (default 0.5)."""
|
|
36
|
+
return _get_settings().get("bm25_factor", 0.5)
|