PyPI - recent-state-summarizer - Versions diffs - 0.0.1__tar.gz → 0.0.3__tar.gz - Mend

recent-state-summarizer 0.0.1tar.gz → 0.0.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of recent-state-summarizer might be problematic. Click here for more details.

Files changed (18) hide show

{recent-state-summarizer-0.0.1/recent_state_summarizer.egg-info → recent_state_summarizer-0.0.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.1
+Metadata-Version: 2.4
 Name: recent-state-summarizer
-Version: 0.0.1
+Version: 0.0.3
 Summary: Summarize a list of entry titles using LLM
 Author-email: nikkie <takuyafjp+develop@gmail.com>
 License: MIT
@@ -8,16 +8,28 @@ Classifier: Development Status :: 1 - Planning
 Classifier: License :: OSI Approved :: MIT License
 Classifier: Programming Language :: Python
 Classifier: Programming Language :: Python :: 3
-Classifier: Programming Language :: Python :: 3.8
 Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
-Requires-Python: >=3.8
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
+Requires-Python: >=3.9
 Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: beautifulsoup4
+Requires-Dist: openai<1
 Provides-Extra: testing
+Requires-Dist: pytest; extra == "testing"
+Requires-Dist: pytest_httpserver; extra == "testing"
 Provides-Extra: lint
+Requires-Dist: flake8; extra == "lint"
+Requires-Dist: black; extra == "lint"
+Requires-Dist: isort; extra == "lint"
 Provides-Extra: dev
-License-File: LICENSE
+Requires-Dist: wheel; extra == "dev"
+Requires-Dist: build; extra == "dev"
+Requires-Dist: twine; extra == "dev"
+Dynamic: license-file
 # recent-state-summarizer

{recent-state-summarizer-0.0.1 → recent_state_summarizer-0.0.3}/pyproject.toml RENAMED Viewed

@@ -9,26 +9,27 @@ authors = [
 ]
 description = "Summarize a list of entry titles using LLM"
 readme = {file = "README.md", content-type = "text/markdown"}
-requires-python = ">=3.8"
+requires-python = ">=3.9"
 license = {text = "MIT"}
 classifiers = [
     "Development Status :: 1 - Planning",
     "License :: OSI Approved :: MIT License",
     "Programming Language :: Python",
     "Programming Language :: Python :: 3",
-    "Programming Language :: Python :: 3.8",
     "Programming Language :: Python :: 3.9",
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13",
 ]
 dependencies = [
     "beautifulsoup4",
-    "openai",
+    "openai<1",
 ]
 dynamic = ["version"]
 [project.optional-dependencies]
-testing = ["pytest"]
+testing = ["pytest", "pytest_httpserver"]
 lint = ["flake8", "black", "isort"]
 dev = ["wheel", "build", "twine"]

recent_state_summarizer-0.0.3/recent_state_summarizer/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__ = "0.0.3"

{recent-state-summarizer-0.0.1 → recent_state_summarizer-0.0.3}/recent_state_summarizer/fetch.py RENAMED Viewed

@@ -1,7 +1,9 @@
 from __future__ import annotations
+import json
 from collections.abc import Generator, Iterable
 from pathlib import Path
+from typing import TypedDict
 from urllib.request import urlopen
 from bs4 import BeautifulSoup
@@ -9,37 +11,57 @@ from bs4 import BeautifulSoup
 PARSE_HATENABLOG_KWARGS = {"name": "a", "attrs": {"class": "entry-title-link"}}
-def _main(url: str, save_path: str | Path) -> None:
-    contents = fetch_titles_as_bullet_list(url)
-    _save(save_path, contents)
+class TitleTag(TypedDict):
+    title: str
+    url: str
-def fetch_titles_as_bullet_list(url: str) -> str:
-    return _as_bullet_list(_fetch_titles(url))
+def _main(url: str, save_path: str | Path, save_as_json: bool) -> None:
+    title_tags = _fetch_titles(url)
+    if not save_as_json:
+        contents = _as_bullet_list(
+            title_tag["title"] for title_tag in title_tags
+        )
+    else:
+        contents = _as_json(title_tags)
+    _save(save_path, contents)
-def _fetch_titles(url: str) -> Generator[str, None, None]:
+def _fetch_titles(url: str) -> Generator[TitleTag, None, None]:
     raw_html = _fetch(url)
     yield from _parse_titles(raw_html)
+    soup = BeautifulSoup(raw_html, "html.parser")
+    next_link = soup.find("a", class_="test-pager-next")
+    if next_link and "href" in next_link.attrs:
+        next_url = next_link["href"]
+        print(f"Next page found, fetching... {next_url}")
+        yield from _fetch_titles(next_url)
 def _fetch(url: str) -> str:
     with urlopen(url) as res:
         return res.read()
-def _parse_titles(raw_html: str) -> Generator[str, None, None]:
+def _parse_titles(raw_html: str) -> Generator[TitleTag, None, None]:
     soup = BeautifulSoup(raw_html, "html.parser")
     body = soup.body
     title_tags = body.find_all(**PARSE_HATENABLOG_KWARGS)
     for title_tag in title_tags:
-        yield title_tag.text
+        yield {"title": title_tag.text, "url": title_tag["href"]}
 def _as_bullet_list(titles: Iterable[str]) -> str:
     return "\n".join(f"- {title}" for title in titles)
+def _as_json(title_tags: Iterable[TitleTag]) -> str:
+    return "\n".join(
+        json.dumps(title_tag, ensure_ascii=False) for title_tag in title_tags
+    )
 def _save(path: str | Path, contents: str) -> None:
     with open(path, "w", encoding="utf8", newline="") as f:
         f.write(contents)
@@ -66,6 +88,12 @@ if __name__ == "__main__":
     )
     parser.add_argument("url", help="URL of archive page")
     parser.add_argument("save_path", help="Local file path")
+    parser.add_argument(
+        "--as-json",
+        action="store_true",
+        default=False,
+        help="Save as JSON format instead of bullet list",
+    )
     args = parser.parse_args()
-    _main(args.url, args.save_path)
+    _main(args.url, args.save_path, args.as_json)

{recent-state-summarizer-0.0.1 → recent_state_summarizer-0.0.3}/recent_state_summarizer/summarize.py RENAMED Viewed

@@ -25,17 +25,19 @@ def _build_prompts(titles: str):
 def _build_summarize_prompt_text(titles_as_list: str) -> str:
     return f"""\
-以下は同一人物が最近書いたブログ記事のタイトルの一覧です。
+3つのバッククォートで囲まれた以下は、同一人物が最近書いたブログ記事のタイトルの一覧です。
 それを読み、この人物が最近何をやっているかを詳しく教えてください。
 応答は文ごとに改行して区切ってください。
+```
 {titles_as_list}
+```
 """
-def _complete_chat(prompts):
+def _complete_chat(prompts, temperature=0.0):
     return openai.ChatCompletion.create(
-        model=MODEL, messages=prompts, temperature=0.8
+        model=MODEL, messages=prompts, temperature=temperature
     )

{recent-state-summarizer-0.0.1 → recent_state_summarizer-0.0.3/recent_state_summarizer.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.1
+Metadata-Version: 2.4
 Name: recent-state-summarizer
-Version: 0.0.1
+Version: 0.0.3
 Summary: Summarize a list of entry titles using LLM
 Author-email: nikkie <takuyafjp+develop@gmail.com>
 License: MIT
@@ -8,16 +8,28 @@ Classifier: Development Status :: 1 - Planning
 Classifier: License :: OSI Approved :: MIT License
 Classifier: Programming Language :: Python
 Classifier: Programming Language :: Python :: 3
-Classifier: Programming Language :: Python :: 3.8
 Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
-Requires-Python: >=3.8
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
+Requires-Python: >=3.9
 Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: beautifulsoup4
+Requires-Dist: openai<1
 Provides-Extra: testing
+Requires-Dist: pytest; extra == "testing"
+Requires-Dist: pytest_httpserver; extra == "testing"
 Provides-Extra: lint
+Requires-Dist: flake8; extra == "lint"
+Requires-Dist: black; extra == "lint"
+Requires-Dist: isort; extra == "lint"
 Provides-Extra: dev
-License-File: LICENSE
+Requires-Dist: wheel; extra == "dev"
+Requires-Dist: build; extra == "dev"
+Requires-Dist: twine; extra == "dev"
+Dynamic: license-file
 # recent-state-summarizer

{recent-state-summarizer-0.0.1 → recent_state_summarizer-0.0.3}/recent_state_summarizer.egg-info/SOURCES.txt RENAMED Viewed

@@ -11,4 +11,5 @@ recent_state_summarizer.egg-info/SOURCES.txt
 recent_state_summarizer.egg-info/dependency_links.txt
 recent_state_summarizer.egg-info/entry_points.txt
 recent_state_summarizer.egg-info/requires.txt
-recent_state_summarizer.egg-info/top_level.txt
+recent_state_summarizer.egg-info/top_level.txt
+tests/test_fetch.py

{recent-state-summarizer-0.0.1 → recent_state_summarizer-0.0.3}/recent_state_summarizer.egg-info/requires.txt RENAMED Viewed

@@ -1,5 +1,5 @@
 beautifulsoup4
-openai
+openai<1
 [dev]
 wheel
@@ -13,3 +13,4 @@ isort
 [testing]
 pytest
+pytest_httpserver

{recent-state-summarizer-0.0.1 → recent_state_summarizer-0.0.3}/recent_state_summarizer.egg-info/top_level.txt RENAMED Viewed

@@ -1,3 +1,2 @@
-build
 dist
 recent_state_summarizer

recent_state_summarizer-0.0.3/tests/test_fetch.py ADDED Viewed

@@ -0,0 +1,150 @@
+import pytest
+from recent_state_summarizer.fetch import _main
+@pytest.fixture
+def blog_server(httpserver):
+    httpserver.expect_request("/archive/2025/06").respond_with_data(
+        f"""\
+<!DOCTYPE html>
+<html>
+  <head><title>Archive</title></head>
+  <body>
+    <h1>Archive</h1>
+    <div id="content">
+      <div id="content-inner">
+        <div id="wrapper">
+          <div id="main">
+            <div id="main-inner">
+              <div class="archive-entries">
+                <section class="archive-entry">
+                  <a class="entry-title-link" href="{httpserver.url_for('/')}archive/2025/06/03">Title 3</a>
+                </section>
+                <section class="archive-entry">
+                  <a class="entry-title-link" href="{httpserver.url_for('/')}archive/2025/06/02">Title 2</a>
+                </section>
+                <section class="archive-entry">
+                  <a class="entry-title-link" href="{httpserver.url_for('/')}archive/2025/06/01">Title 1</a>
+                </section>
+              </div>
+            </div>
+          </div>
+        </div>
+      </div>
+    </div>
+  </body>
+</html>"""
+    )
+    return httpserver
+def test_fetch_as_bullet_list(blog_server, tmp_path):
+    _main(
+        blog_server.url_for("/archive/2025/06"),
+        tmp_path / "titles.txt",
+        save_as_json=False,
+    )
+    expected = """\
+- Title 3
+- Title 2
+- Title 1"""
+    assert (tmp_path / "titles.txt").read_text(encoding="utf8") == expected
+def test_fetch_as_json(blog_server, tmp_path):
+    _main(
+        blog_server.url_for("/archive/2025/06"),
+        tmp_path / "titles.json",
+        save_as_json=True,
+    )
+    expected = f"""\
+{{"title": "Title 3", "url": "{blog_server.url_for('/archive/2025/06/03')}"}}
+{{"title": "Title 2", "url": "{blog_server.url_for('/archive/2025/06/02')}"}}
+{{"title": "Title 1", "url": "{blog_server.url_for('/archive/2025/06/01')}"}}"""
+    assert (tmp_path / "titles.json").read_text(encoding="utf8") == expected
+@pytest.fixture
+def multi_page_blog_server(httpserver):
+    httpserver.expect_request(
+        "/archive/2025/07", query_string="page=2"
+    ).respond_with_data(
+        f"""\
+<!DOCTYPE html>
+<html>
+  <head><title>Archive (Page 2)</title></head>
+  <body>
+    <h1>Archive</h1>
+    <div id="content">
+      <div id="content-inner">
+        <div id="wrapper">
+          <div id="main">
+            <div id="main-inner">
+              <div class="archive-entries">
+                <section class="archive-entry">
+                  <a class="entry-title-link" href="{httpserver.url_for('/')}archive/2025/07/01">Title 1</a>
+                </section>
+              </div>
+              <div class="pager">
+                <span class="pager-prev">
+                  <a href="{httpserver.url_for('/')}archive/2025/07" class="test-pager-prev" rel="prev">前のページ</a>
+                </span>
+              </div>
+            </div>
+          </div>
+        </div>
+      </div>
+    </div>
+  </body>
+</html>"""
+    )
+    httpserver.expect_request("/archive/2025/07").respond_with_data(
+        f"""\
+<!DOCTYPE html>
+<html>
+  <head><title>Archive</title></head>
+  <body>
+    <h1>Archive</h1>
+    <div id="content">
+      <div id="content-inner">
+        <div id="wrapper">
+          <div id="main">
+            <div id="main-inner">
+              <div class="archive-entries">
+                <section class="archive-entry">
+                  <a class="entry-title-link" href="{httpserver.url_for('/')}archive/2025/07/03">Title 3</a>
+                </section>
+                <section class="archive-entry">
+                  <a class="entry-title-link" href="{httpserver.url_for('/')}archive/2025/07/02">Title 2</a>
+                </section>
+              </div>
+            </div>
+            <div class="pager">
+              <span class="pager-next">
+                <a href="{httpserver.url_for('/')}archive/2025/07?page=2" class="test-pager-next" rel="next">次のページ</a>
+              </span>
+            </div>
+          </div>
+        </div>
+      </div>
+    </div>
+  </body>
+</html>"""
+    )
+    return httpserver
+def test_fetch_multiple_archive_page(multi_page_blog_server, tmp_path):
+    _main(
+        multi_page_blog_server.url_for("/archive/2025/07"),
+        tmp_path / "titles.txt",
+        save_as_json=False,
+    )
+    expected = """- Title 3
+- Title 2
+- Title 1"""
+    assert (tmp_path / "titles.txt").read_text(encoding="utf8") == expected