recent-state-summarizer 0.0.2__tar.gz → 0.0.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of recent-state-summarizer might be problematic. Click here for more details.
- {recent_state_summarizer-0.0.2/recent_state_summarizer.egg-info → recent_state_summarizer-0.0.3}/PKG-INFO +1 -1
- recent_state_summarizer-0.0.3/recent_state_summarizer/__init__.py +1 -0
- {recent_state_summarizer-0.0.2 → recent_state_summarizer-0.0.3}/recent_state_summarizer/fetch.py +7 -0
- {recent_state_summarizer-0.0.2 → recent_state_summarizer-0.0.3/recent_state_summarizer.egg-info}/PKG-INFO +1 -1
- recent_state_summarizer-0.0.3/tests/test_fetch.py +150 -0
- recent_state_summarizer-0.0.2/recent_state_summarizer/__init__.py +0 -1
- recent_state_summarizer-0.0.2/tests/test_fetch.py +0 -64
- {recent_state_summarizer-0.0.2 → recent_state_summarizer-0.0.3}/LICENSE +0 -0
- {recent_state_summarizer-0.0.2 → recent_state_summarizer-0.0.3}/README.md +0 -0
- {recent_state_summarizer-0.0.2 → recent_state_summarizer-0.0.3}/pyproject.toml +0 -0
- {recent_state_summarizer-0.0.2 → recent_state_summarizer-0.0.3}/recent_state_summarizer/__main__.py +0 -0
- {recent_state_summarizer-0.0.2 → recent_state_summarizer-0.0.3}/recent_state_summarizer/summarize.py +0 -0
- {recent_state_summarizer-0.0.2 → recent_state_summarizer-0.0.3}/recent_state_summarizer.egg-info/SOURCES.txt +0 -0
- {recent_state_summarizer-0.0.2 → recent_state_summarizer-0.0.3}/recent_state_summarizer.egg-info/dependency_links.txt +0 -0
- {recent_state_summarizer-0.0.2 → recent_state_summarizer-0.0.3}/recent_state_summarizer.egg-info/entry_points.txt +0 -0
- {recent_state_summarizer-0.0.2 → recent_state_summarizer-0.0.3}/recent_state_summarizer.egg-info/requires.txt +0 -0
- {recent_state_summarizer-0.0.2 → recent_state_summarizer-0.0.3}/recent_state_summarizer.egg-info/top_level.txt +0 -0
- {recent_state_summarizer-0.0.2 → recent_state_summarizer-0.0.3}/setup.cfg +0 -0
- {recent_state_summarizer-0.0.2 → recent_state_summarizer-0.0.3}/setup.py +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.0.3"
|
{recent_state_summarizer-0.0.2 → recent_state_summarizer-0.0.3}/recent_state_summarizer/fetch.py
RENAMED
|
@@ -31,6 +31,13 @@ def _fetch_titles(url: str) -> Generator[TitleTag, None, None]:
|
|
|
31
31
|
raw_html = _fetch(url)
|
|
32
32
|
yield from _parse_titles(raw_html)
|
|
33
33
|
|
|
34
|
+
soup = BeautifulSoup(raw_html, "html.parser")
|
|
35
|
+
next_link = soup.find("a", class_="test-pager-next")
|
|
36
|
+
if next_link and "href" in next_link.attrs:
|
|
37
|
+
next_url = next_link["href"]
|
|
38
|
+
print(f"Next page found, fetching... {next_url}")
|
|
39
|
+
yield from _fetch_titles(next_url)
|
|
40
|
+
|
|
34
41
|
|
|
35
42
|
def _fetch(url: str) -> str:
|
|
36
43
|
with urlopen(url) as res:
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
|
|
3
|
+
from recent_state_summarizer.fetch import _main
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@pytest.fixture
|
|
7
|
+
def blog_server(httpserver):
|
|
8
|
+
httpserver.expect_request("/archive/2025/06").respond_with_data(
|
|
9
|
+
f"""\
|
|
10
|
+
<!DOCTYPE html>
|
|
11
|
+
<html>
|
|
12
|
+
<head><title>Archive</title></head>
|
|
13
|
+
<body>
|
|
14
|
+
<h1>Archive</h1>
|
|
15
|
+
<div id="content">
|
|
16
|
+
<div id="content-inner">
|
|
17
|
+
<div id="wrapper">
|
|
18
|
+
<div id="main">
|
|
19
|
+
<div id="main-inner">
|
|
20
|
+
<div class="archive-entries">
|
|
21
|
+
<section class="archive-entry">
|
|
22
|
+
<a class="entry-title-link" href="{httpserver.url_for('/')}archive/2025/06/03">Title 3</a>
|
|
23
|
+
</section>
|
|
24
|
+
<section class="archive-entry">
|
|
25
|
+
<a class="entry-title-link" href="{httpserver.url_for('/')}archive/2025/06/02">Title 2</a>
|
|
26
|
+
</section>
|
|
27
|
+
<section class="archive-entry">
|
|
28
|
+
<a class="entry-title-link" href="{httpserver.url_for('/')}archive/2025/06/01">Title 1</a>
|
|
29
|
+
</section>
|
|
30
|
+
</div>
|
|
31
|
+
</div>
|
|
32
|
+
</div>
|
|
33
|
+
</div>
|
|
34
|
+
</div>
|
|
35
|
+
</div>
|
|
36
|
+
</body>
|
|
37
|
+
</html>"""
|
|
38
|
+
)
|
|
39
|
+
return httpserver
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def test_fetch_as_bullet_list(blog_server, tmp_path):
|
|
43
|
+
_main(
|
|
44
|
+
blog_server.url_for("/archive/2025/06"),
|
|
45
|
+
tmp_path / "titles.txt",
|
|
46
|
+
save_as_json=False,
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
expected = """\
|
|
50
|
+
- Title 3
|
|
51
|
+
- Title 2
|
|
52
|
+
- Title 1"""
|
|
53
|
+
assert (tmp_path / "titles.txt").read_text(encoding="utf8") == expected
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def test_fetch_as_json(blog_server, tmp_path):
|
|
57
|
+
_main(
|
|
58
|
+
blog_server.url_for("/archive/2025/06"),
|
|
59
|
+
tmp_path / "titles.json",
|
|
60
|
+
save_as_json=True,
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
expected = f"""\
|
|
64
|
+
{{"title": "Title 3", "url": "{blog_server.url_for('/archive/2025/06/03')}"}}
|
|
65
|
+
{{"title": "Title 2", "url": "{blog_server.url_for('/archive/2025/06/02')}"}}
|
|
66
|
+
{{"title": "Title 1", "url": "{blog_server.url_for('/archive/2025/06/01')}"}}"""
|
|
67
|
+
assert (tmp_path / "titles.json").read_text(encoding="utf8") == expected
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
@pytest.fixture
|
|
71
|
+
def multi_page_blog_server(httpserver):
|
|
72
|
+
httpserver.expect_request(
|
|
73
|
+
"/archive/2025/07", query_string="page=2"
|
|
74
|
+
).respond_with_data(
|
|
75
|
+
f"""\
|
|
76
|
+
<!DOCTYPE html>
|
|
77
|
+
<html>
|
|
78
|
+
<head><title>Archive (Page 2)</title></head>
|
|
79
|
+
<body>
|
|
80
|
+
<h1>Archive</h1>
|
|
81
|
+
<div id="content">
|
|
82
|
+
<div id="content-inner">
|
|
83
|
+
<div id="wrapper">
|
|
84
|
+
<div id="main">
|
|
85
|
+
<div id="main-inner">
|
|
86
|
+
<div class="archive-entries">
|
|
87
|
+
<section class="archive-entry">
|
|
88
|
+
<a class="entry-title-link" href="{httpserver.url_for('/')}archive/2025/07/01">Title 1</a>
|
|
89
|
+
</section>
|
|
90
|
+
</div>
|
|
91
|
+
<div class="pager">
|
|
92
|
+
<span class="pager-prev">
|
|
93
|
+
<a href="{httpserver.url_for('/')}archive/2025/07" class="test-pager-prev" rel="prev">前のページ</a>
|
|
94
|
+
</span>
|
|
95
|
+
</div>
|
|
96
|
+
</div>
|
|
97
|
+
</div>
|
|
98
|
+
</div>
|
|
99
|
+
</div>
|
|
100
|
+
</div>
|
|
101
|
+
</body>
|
|
102
|
+
</html>"""
|
|
103
|
+
)
|
|
104
|
+
httpserver.expect_request("/archive/2025/07").respond_with_data(
|
|
105
|
+
f"""\
|
|
106
|
+
<!DOCTYPE html>
|
|
107
|
+
<html>
|
|
108
|
+
<head><title>Archive</title></head>
|
|
109
|
+
<body>
|
|
110
|
+
<h1>Archive</h1>
|
|
111
|
+
<div id="content">
|
|
112
|
+
<div id="content-inner">
|
|
113
|
+
<div id="wrapper">
|
|
114
|
+
<div id="main">
|
|
115
|
+
<div id="main-inner">
|
|
116
|
+
<div class="archive-entries">
|
|
117
|
+
<section class="archive-entry">
|
|
118
|
+
<a class="entry-title-link" href="{httpserver.url_for('/')}archive/2025/07/03">Title 3</a>
|
|
119
|
+
</section>
|
|
120
|
+
<section class="archive-entry">
|
|
121
|
+
<a class="entry-title-link" href="{httpserver.url_for('/')}archive/2025/07/02">Title 2</a>
|
|
122
|
+
</section>
|
|
123
|
+
</div>
|
|
124
|
+
</div>
|
|
125
|
+
<div class="pager">
|
|
126
|
+
<span class="pager-next">
|
|
127
|
+
<a href="{httpserver.url_for('/')}archive/2025/07?page=2" class="test-pager-next" rel="next">次のページ</a>
|
|
128
|
+
</span>
|
|
129
|
+
</div>
|
|
130
|
+
</div>
|
|
131
|
+
</div>
|
|
132
|
+
</div>
|
|
133
|
+
</div>
|
|
134
|
+
</body>
|
|
135
|
+
</html>"""
|
|
136
|
+
)
|
|
137
|
+
return httpserver
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def test_fetch_multiple_archive_page(multi_page_blog_server, tmp_path):
|
|
141
|
+
_main(
|
|
142
|
+
multi_page_blog_server.url_for("/archive/2025/07"),
|
|
143
|
+
tmp_path / "titles.txt",
|
|
144
|
+
save_as_json=False,
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
expected = """- Title 3
|
|
148
|
+
- Title 2
|
|
149
|
+
- Title 1"""
|
|
150
|
+
assert (tmp_path / "titles.txt").read_text(encoding="utf8") == expected
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "0.0.2"
|
|
@@ -1,64 +0,0 @@
|
|
|
1
|
-
from textwrap import dedent
|
|
2
|
-
|
|
3
|
-
import pytest
|
|
4
|
-
|
|
5
|
-
from recent_state_summarizer.fetch import _main
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
@pytest.fixture
|
|
9
|
-
def blog_server(httpserver):
|
|
10
|
-
httpserver.expect_request("/archive/2025/06").respond_with_data(
|
|
11
|
-
dedent(
|
|
12
|
-
f"""
|
|
13
|
-
<!DOCTYPE html>
|
|
14
|
-
<html>
|
|
15
|
-
<head><title>Archive</title></head>
|
|
16
|
-
<body>
|
|
17
|
-
<h1>Archive</h1>
|
|
18
|
-
<div id="content">
|
|
19
|
-
<div class="archive-entries">
|
|
20
|
-
<section class="archive-entry">
|
|
21
|
-
<a class="entry-title-link" href="{httpserver.url_for('/')}archive/2025/06/03">Title 3</a>
|
|
22
|
-
</section>
|
|
23
|
-
<section class="archive-entry">
|
|
24
|
-
<a class="entry-title-link" href="{httpserver.url_for('/')}archive/2025/06/02">Title 2</a>
|
|
25
|
-
</section>
|
|
26
|
-
<section class="archive-entry">
|
|
27
|
-
<a class="entry-title-link" href="{httpserver.url_for('/')}archive/2025/06/01">Title 1</a>
|
|
28
|
-
</section>
|
|
29
|
-
</div>
|
|
30
|
-
</div>
|
|
31
|
-
</body>
|
|
32
|
-
</html>
|
|
33
|
-
"""
|
|
34
|
-
)
|
|
35
|
-
)
|
|
36
|
-
return httpserver
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
def test_fetch_as_bullet_list(blog_server, tmp_path):
|
|
40
|
-
_main(
|
|
41
|
-
blog_server.url_for("/archive/2025/06"),
|
|
42
|
-
tmp_path / "titles.txt",
|
|
43
|
-
save_as_json=False,
|
|
44
|
-
)
|
|
45
|
-
|
|
46
|
-
expected = """\
|
|
47
|
-
- Title 3
|
|
48
|
-
- Title 2
|
|
49
|
-
- Title 1"""
|
|
50
|
-
assert (tmp_path / "titles.txt").read_text(encoding="utf8") == expected
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
def test_fetch_as_json(blog_server, tmp_path):
|
|
54
|
-
_main(
|
|
55
|
-
blog_server.url_for("/archive/2025/06"),
|
|
56
|
-
tmp_path / "titles.json",
|
|
57
|
-
save_as_json=True,
|
|
58
|
-
)
|
|
59
|
-
|
|
60
|
-
expected = f"""\
|
|
61
|
-
{{"title": "Title 3", "url": "{blog_server.url_for('/archive/2025/06/03')}"}}
|
|
62
|
-
{{"title": "Title 2", "url": "{blog_server.url_for('/archive/2025/06/02')}"}}
|
|
63
|
-
{{"title": "Title 1", "url": "{blog_server.url_for('/archive/2025/06/01')}"}}"""
|
|
64
|
-
assert (tmp_path / "titles.json").read_text(encoding="utf8") == expected
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{recent_state_summarizer-0.0.2 → recent_state_summarizer-0.0.3}/recent_state_summarizer/__main__.py
RENAMED
|
File without changes
|
{recent_state_summarizer-0.0.2 → recent_state_summarizer-0.0.3}/recent_state_summarizer/summarize.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|