recent-state-summarizer 0.0.2__tar.gz → 0.0.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of recent-state-summarizer might be problematic. Click here for more details.

Files changed (19) hide show
  1. {recent_state_summarizer-0.0.2/recent_state_summarizer.egg-info → recent_state_summarizer-0.0.3}/PKG-INFO +1 -1
  2. recent_state_summarizer-0.0.3/recent_state_summarizer/__init__.py +1 -0
  3. {recent_state_summarizer-0.0.2 → recent_state_summarizer-0.0.3}/recent_state_summarizer/fetch.py +7 -0
  4. {recent_state_summarizer-0.0.2 → recent_state_summarizer-0.0.3/recent_state_summarizer.egg-info}/PKG-INFO +1 -1
  5. recent_state_summarizer-0.0.3/tests/test_fetch.py +150 -0
  6. recent_state_summarizer-0.0.2/recent_state_summarizer/__init__.py +0 -1
  7. recent_state_summarizer-0.0.2/tests/test_fetch.py +0 -64
  8. {recent_state_summarizer-0.0.2 → recent_state_summarizer-0.0.3}/LICENSE +0 -0
  9. {recent_state_summarizer-0.0.2 → recent_state_summarizer-0.0.3}/README.md +0 -0
  10. {recent_state_summarizer-0.0.2 → recent_state_summarizer-0.0.3}/pyproject.toml +0 -0
  11. {recent_state_summarizer-0.0.2 → recent_state_summarizer-0.0.3}/recent_state_summarizer/__main__.py +0 -0
  12. {recent_state_summarizer-0.0.2 → recent_state_summarizer-0.0.3}/recent_state_summarizer/summarize.py +0 -0
  13. {recent_state_summarizer-0.0.2 → recent_state_summarizer-0.0.3}/recent_state_summarizer.egg-info/SOURCES.txt +0 -0
  14. {recent_state_summarizer-0.0.2 → recent_state_summarizer-0.0.3}/recent_state_summarizer.egg-info/dependency_links.txt +0 -0
  15. {recent_state_summarizer-0.0.2 → recent_state_summarizer-0.0.3}/recent_state_summarizer.egg-info/entry_points.txt +0 -0
  16. {recent_state_summarizer-0.0.2 → recent_state_summarizer-0.0.3}/recent_state_summarizer.egg-info/requires.txt +0 -0
  17. {recent_state_summarizer-0.0.2 → recent_state_summarizer-0.0.3}/recent_state_summarizer.egg-info/top_level.txt +0 -0
  18. {recent_state_summarizer-0.0.2 → recent_state_summarizer-0.0.3}/setup.cfg +0 -0
  19. {recent_state_summarizer-0.0.2 → recent_state_summarizer-0.0.3}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: recent-state-summarizer
3
- Version: 0.0.2
3
+ Version: 0.0.3
4
4
  Summary: Summarize a list of entry titles using LLM
5
5
  Author-email: nikkie <takuyafjp+develop@gmail.com>
6
6
  License: MIT
@@ -0,0 +1 @@
1
+ __version__ = "0.0.3"
@@ -31,6 +31,13 @@ def _fetch_titles(url: str) -> Generator[TitleTag, None, None]:
31
31
  raw_html = _fetch(url)
32
32
  yield from _parse_titles(raw_html)
33
33
 
34
+ soup = BeautifulSoup(raw_html, "html.parser")
35
+ next_link = soup.find("a", class_="test-pager-next")
36
+ if next_link and "href" in next_link.attrs:
37
+ next_url = next_link["href"]
38
+ print(f"Next page found, fetching... {next_url}")
39
+ yield from _fetch_titles(next_url)
40
+
34
41
 
35
42
  def _fetch(url: str) -> str:
36
43
  with urlopen(url) as res:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: recent-state-summarizer
3
- Version: 0.0.2
3
+ Version: 0.0.3
4
4
  Summary: Summarize a list of entry titles using LLM
5
5
  Author-email: nikkie <takuyafjp+develop@gmail.com>
6
6
  License: MIT
@@ -0,0 +1,150 @@
1
+ import pytest
2
+
3
+ from recent_state_summarizer.fetch import _main
4
+
5
+
6
+ @pytest.fixture
7
+ def blog_server(httpserver):
8
+ httpserver.expect_request("/archive/2025/06").respond_with_data(
9
+ f"""\
10
+ <!DOCTYPE html>
11
+ <html>
12
+ <head><title>Archive</title></head>
13
+ <body>
14
+ <h1>Archive</h1>
15
+ <div id="content">
16
+ <div id="content-inner">
17
+ <div id="wrapper">
18
+ <div id="main">
19
+ <div id="main-inner">
20
+ <div class="archive-entries">
21
+ <section class="archive-entry">
22
+ <a class="entry-title-link" href="{httpserver.url_for('/')}archive/2025/06/03">Title 3</a>
23
+ </section>
24
+ <section class="archive-entry">
25
+ <a class="entry-title-link" href="{httpserver.url_for('/')}archive/2025/06/02">Title 2</a>
26
+ </section>
27
+ <section class="archive-entry">
28
+ <a class="entry-title-link" href="{httpserver.url_for('/')}archive/2025/06/01">Title 1</a>
29
+ </section>
30
+ </div>
31
+ </div>
32
+ </div>
33
+ </div>
34
+ </div>
35
+ </div>
36
+ </body>
37
+ </html>"""
38
+ )
39
+ return httpserver
40
+
41
+
42
+ def test_fetch_as_bullet_list(blog_server, tmp_path):
43
+ _main(
44
+ blog_server.url_for("/archive/2025/06"),
45
+ tmp_path / "titles.txt",
46
+ save_as_json=False,
47
+ )
48
+
49
+ expected = """\
50
+ - Title 3
51
+ - Title 2
52
+ - Title 1"""
53
+ assert (tmp_path / "titles.txt").read_text(encoding="utf8") == expected
54
+
55
+
56
+ def test_fetch_as_json(blog_server, tmp_path):
57
+ _main(
58
+ blog_server.url_for("/archive/2025/06"),
59
+ tmp_path / "titles.json",
60
+ save_as_json=True,
61
+ )
62
+
63
+ expected = f"""\
64
+ {{"title": "Title 3", "url": "{blog_server.url_for('/archive/2025/06/03')}"}}
65
+ {{"title": "Title 2", "url": "{blog_server.url_for('/archive/2025/06/02')}"}}
66
+ {{"title": "Title 1", "url": "{blog_server.url_for('/archive/2025/06/01')}"}}"""
67
+ assert (tmp_path / "titles.json").read_text(encoding="utf8") == expected
68
+
69
+
70
+ @pytest.fixture
71
+ def multi_page_blog_server(httpserver):
72
+ httpserver.expect_request(
73
+ "/archive/2025/07", query_string="page=2"
74
+ ).respond_with_data(
75
+ f"""\
76
+ <!DOCTYPE html>
77
+ <html>
78
+ <head><title>Archive (Page 2)</title></head>
79
+ <body>
80
+ <h1>Archive</h1>
81
+ <div id="content">
82
+ <div id="content-inner">
83
+ <div id="wrapper">
84
+ <div id="main">
85
+ <div id="main-inner">
86
+ <div class="archive-entries">
87
+ <section class="archive-entry">
88
+ <a class="entry-title-link" href="{httpserver.url_for('/')}archive/2025/07/01">Title 1</a>
89
+ </section>
90
+ </div>
91
+ <div class="pager">
92
+ <span class="pager-prev">
93
+ <a href="{httpserver.url_for('/')}archive/2025/07" class="test-pager-prev" rel="prev">前のページ</a>
94
+ </span>
95
+ </div>
96
+ </div>
97
+ </div>
98
+ </div>
99
+ </div>
100
+ </div>
101
+ </body>
102
+ </html>"""
103
+ )
104
+ httpserver.expect_request("/archive/2025/07").respond_with_data(
105
+ f"""\
106
+ <!DOCTYPE html>
107
+ <html>
108
+ <head><title>Archive</title></head>
109
+ <body>
110
+ <h1>Archive</h1>
111
+ <div id="content">
112
+ <div id="content-inner">
113
+ <div id="wrapper">
114
+ <div id="main">
115
+ <div id="main-inner">
116
+ <div class="archive-entries">
117
+ <section class="archive-entry">
118
+ <a class="entry-title-link" href="{httpserver.url_for('/')}archive/2025/07/03">Title 3</a>
119
+ </section>
120
+ <section class="archive-entry">
121
+ <a class="entry-title-link" href="{httpserver.url_for('/')}archive/2025/07/02">Title 2</a>
122
+ </section>
123
+ </div>
124
+ </div>
125
+ <div class="pager">
126
+ <span class="pager-next">
127
+ <a href="{httpserver.url_for('/')}archive/2025/07?page=2" class="test-pager-next" rel="next">次のページ</a>
128
+ </span>
129
+ </div>
130
+ </div>
131
+ </div>
132
+ </div>
133
+ </div>
134
+ </body>
135
+ </html>"""
136
+ )
137
+ return httpserver
138
+
139
+
140
+ def test_fetch_multiple_archive_page(multi_page_blog_server, tmp_path):
141
+ _main(
142
+ multi_page_blog_server.url_for("/archive/2025/07"),
143
+ tmp_path / "titles.txt",
144
+ save_as_json=False,
145
+ )
146
+
147
+ expected = """- Title 3
148
+ - Title 2
149
+ - Title 1"""
150
+ assert (tmp_path / "titles.txt").read_text(encoding="utf8") == expected
@@ -1 +0,0 @@
1
- __version__ = "0.0.2"
@@ -1,64 +0,0 @@
1
- from textwrap import dedent
2
-
3
- import pytest
4
-
5
- from recent_state_summarizer.fetch import _main
6
-
7
-
8
- @pytest.fixture
9
- def blog_server(httpserver):
10
- httpserver.expect_request("/archive/2025/06").respond_with_data(
11
- dedent(
12
- f"""
13
- <!DOCTYPE html>
14
- <html>
15
- <head><title>Archive</title></head>
16
- <body>
17
- <h1>Archive</h1>
18
- <div id="content">
19
- <div class="archive-entries">
20
- <section class="archive-entry">
21
- <a class="entry-title-link" href="{httpserver.url_for('/')}archive/2025/06/03">Title 3</a>
22
- </section>
23
- <section class="archive-entry">
24
- <a class="entry-title-link" href="{httpserver.url_for('/')}archive/2025/06/02">Title 2</a>
25
- </section>
26
- <section class="archive-entry">
27
- <a class="entry-title-link" href="{httpserver.url_for('/')}archive/2025/06/01">Title 1</a>
28
- </section>
29
- </div>
30
- </div>
31
- </body>
32
- </html>
33
- """
34
- )
35
- )
36
- return httpserver
37
-
38
-
39
- def test_fetch_as_bullet_list(blog_server, tmp_path):
40
- _main(
41
- blog_server.url_for("/archive/2025/06"),
42
- tmp_path / "titles.txt",
43
- save_as_json=False,
44
- )
45
-
46
- expected = """\
47
- - Title 3
48
- - Title 2
49
- - Title 1"""
50
- assert (tmp_path / "titles.txt").read_text(encoding="utf8") == expected
51
-
52
-
53
- def test_fetch_as_json(blog_server, tmp_path):
54
- _main(
55
- blog_server.url_for("/archive/2025/06"),
56
- tmp_path / "titles.json",
57
- save_as_json=True,
58
- )
59
-
60
- expected = f"""\
61
- {{"title": "Title 3", "url": "{blog_server.url_for('/archive/2025/06/03')}"}}
62
- {{"title": "Title 2", "url": "{blog_server.url_for('/archive/2025/06/02')}"}}
63
- {{"title": "Title 1", "url": "{blog_server.url_for('/archive/2025/06/01')}"}}"""
64
- assert (tmp_path / "titles.json").read_text(encoding="utf8") == expected