recent-state-summarizer 0.0.1__tar.gz → 0.0.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of recent-state-summarizer might be problematic. Click here for more details.
- {recent-state-summarizer-0.0.1/recent_state_summarizer.egg-info → recent_state_summarizer-0.0.2}/PKG-INFO +17 -5
- {recent-state-summarizer-0.0.1 → recent_state_summarizer-0.0.2}/pyproject.toml +5 -4
- recent_state_summarizer-0.0.2/recent_state_summarizer/__init__.py +1 -0
- {recent-state-summarizer-0.0.1 → recent_state_summarizer-0.0.2}/recent_state_summarizer/fetch.py +30 -9
- {recent-state-summarizer-0.0.1 → recent_state_summarizer-0.0.2}/recent_state_summarizer/summarize.py +5 -3
- {recent-state-summarizer-0.0.1 → recent_state_summarizer-0.0.2/recent_state_summarizer.egg-info}/PKG-INFO +17 -5
- {recent-state-summarizer-0.0.1 → recent_state_summarizer-0.0.2}/recent_state_summarizer.egg-info/SOURCES.txt +2 -1
- {recent-state-summarizer-0.0.1 → recent_state_summarizer-0.0.2}/recent_state_summarizer.egg-info/requires.txt +2 -1
- {recent-state-summarizer-0.0.1 → recent_state_summarizer-0.0.2}/recent_state_summarizer.egg-info/top_level.txt +0 -1
- recent_state_summarizer-0.0.2/tests/test_fetch.py +64 -0
- recent-state-summarizer-0.0.1/recent_state_summarizer/__init__.py +0 -1
- {recent-state-summarizer-0.0.1 → recent_state_summarizer-0.0.2}/LICENSE +0 -0
- {recent-state-summarizer-0.0.1 → recent_state_summarizer-0.0.2}/README.md +0 -0
- {recent-state-summarizer-0.0.1 → recent_state_summarizer-0.0.2}/recent_state_summarizer/__main__.py +0 -0
- {recent-state-summarizer-0.0.1 → recent_state_summarizer-0.0.2}/recent_state_summarizer.egg-info/dependency_links.txt +0 -0
- {recent-state-summarizer-0.0.1 → recent_state_summarizer-0.0.2}/recent_state_summarizer.egg-info/entry_points.txt +0 -0
- {recent-state-summarizer-0.0.1 → recent_state_summarizer-0.0.2}/setup.cfg +0 -0
- {recent-state-summarizer-0.0.1 → recent_state_summarizer-0.0.2}/setup.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: recent-state-summarizer
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.2
|
|
4
4
|
Summary: Summarize a list of entry titles using LLM
|
|
5
5
|
Author-email: nikkie <takuyafjp+develop@gmail.com>
|
|
6
6
|
License: MIT
|
|
@@ -8,16 +8,28 @@ Classifier: Development Status :: 1 - Planning
|
|
|
8
8
|
Classifier: License :: OSI Approved :: MIT License
|
|
9
9
|
Classifier: Programming Language :: Python
|
|
10
10
|
Classifier: Programming Language :: Python :: 3
|
|
11
|
-
Classifier: Programming Language :: Python :: 3.8
|
|
12
11
|
Classifier: Programming Language :: Python :: 3.9
|
|
13
12
|
Classifier: Programming Language :: Python :: 3.10
|
|
14
13
|
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
-
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
16
|
+
Requires-Python: >=3.9
|
|
16
17
|
Description-Content-Type: text/markdown
|
|
18
|
+
License-File: LICENSE
|
|
19
|
+
Requires-Dist: beautifulsoup4
|
|
20
|
+
Requires-Dist: openai<1
|
|
17
21
|
Provides-Extra: testing
|
|
22
|
+
Requires-Dist: pytest; extra == "testing"
|
|
23
|
+
Requires-Dist: pytest_httpserver; extra == "testing"
|
|
18
24
|
Provides-Extra: lint
|
|
25
|
+
Requires-Dist: flake8; extra == "lint"
|
|
26
|
+
Requires-Dist: black; extra == "lint"
|
|
27
|
+
Requires-Dist: isort; extra == "lint"
|
|
19
28
|
Provides-Extra: dev
|
|
20
|
-
|
|
29
|
+
Requires-Dist: wheel; extra == "dev"
|
|
30
|
+
Requires-Dist: build; extra == "dev"
|
|
31
|
+
Requires-Dist: twine; extra == "dev"
|
|
32
|
+
Dynamic: license-file
|
|
21
33
|
|
|
22
34
|
# recent-state-summarizer
|
|
23
35
|
|
|
@@ -9,26 +9,27 @@ authors = [
|
|
|
9
9
|
]
|
|
10
10
|
description = "Summarize a list of entry titles using LLM"
|
|
11
11
|
readme = {file = "README.md", content-type = "text/markdown"}
|
|
12
|
-
requires-python = ">=3.
|
|
12
|
+
requires-python = ">=3.9"
|
|
13
13
|
license = {text = "MIT"}
|
|
14
14
|
classifiers = [
|
|
15
15
|
"Development Status :: 1 - Planning",
|
|
16
16
|
"License :: OSI Approved :: MIT License",
|
|
17
17
|
"Programming Language :: Python",
|
|
18
18
|
"Programming Language :: Python :: 3",
|
|
19
|
-
"Programming Language :: Python :: 3.8",
|
|
20
19
|
"Programming Language :: Python :: 3.9",
|
|
21
20
|
"Programming Language :: Python :: 3.10",
|
|
22
21
|
"Programming Language :: Python :: 3.11",
|
|
22
|
+
"Programming Language :: Python :: 3.12",
|
|
23
|
+
"Programming Language :: Python :: 3.13",
|
|
23
24
|
]
|
|
24
25
|
dependencies = [
|
|
25
26
|
"beautifulsoup4",
|
|
26
|
-
"openai",
|
|
27
|
+
"openai<1",
|
|
27
28
|
]
|
|
28
29
|
dynamic = ["version"]
|
|
29
30
|
|
|
30
31
|
[project.optional-dependencies]
|
|
31
|
-
testing = ["pytest"]
|
|
32
|
+
testing = ["pytest", "pytest_httpserver"]
|
|
32
33
|
lint = ["flake8", "black", "isort"]
|
|
33
34
|
dev = ["wheel", "build", "twine"]
|
|
34
35
|
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.0.2"
|
{recent-state-summarizer-0.0.1 → recent_state_summarizer-0.0.2}/recent_state_summarizer/fetch.py
RENAMED
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import json
|
|
3
4
|
from collections.abc import Generator, Iterable
|
|
4
5
|
from pathlib import Path
|
|
6
|
+
from typing import TypedDict
|
|
5
7
|
from urllib.request import urlopen
|
|
6
8
|
|
|
7
9
|
from bs4 import BeautifulSoup
|
|
@@ -9,16 +11,23 @@ from bs4 import BeautifulSoup
|
|
|
9
11
|
PARSE_HATENABLOG_KWARGS = {"name": "a", "attrs": {"class": "entry-title-link"}}
|
|
10
12
|
|
|
11
13
|
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
14
|
+
class TitleTag(TypedDict):
|
|
15
|
+
title: str
|
|
16
|
+
url: str
|
|
15
17
|
|
|
16
18
|
|
|
17
|
-
def
|
|
18
|
-
|
|
19
|
+
def _main(url: str, save_path: str | Path, save_as_json: bool) -> None:
|
|
20
|
+
title_tags = _fetch_titles(url)
|
|
21
|
+
if not save_as_json:
|
|
22
|
+
contents = _as_bullet_list(
|
|
23
|
+
title_tag["title"] for title_tag in title_tags
|
|
24
|
+
)
|
|
25
|
+
else:
|
|
26
|
+
contents = _as_json(title_tags)
|
|
27
|
+
_save(save_path, contents)
|
|
19
28
|
|
|
20
29
|
|
|
21
|
-
def _fetch_titles(url: str) -> Generator[
|
|
30
|
+
def _fetch_titles(url: str) -> Generator[TitleTag, None, None]:
|
|
22
31
|
raw_html = _fetch(url)
|
|
23
32
|
yield from _parse_titles(raw_html)
|
|
24
33
|
|
|
@@ -28,18 +37,24 @@ def _fetch(url: str) -> str:
|
|
|
28
37
|
return res.read()
|
|
29
38
|
|
|
30
39
|
|
|
31
|
-
def _parse_titles(raw_html: str) -> Generator[
|
|
40
|
+
def _parse_titles(raw_html: str) -> Generator[TitleTag, None, None]:
|
|
32
41
|
soup = BeautifulSoup(raw_html, "html.parser")
|
|
33
42
|
body = soup.body
|
|
34
43
|
title_tags = body.find_all(**PARSE_HATENABLOG_KWARGS)
|
|
35
44
|
for title_tag in title_tags:
|
|
36
|
-
yield title_tag.text
|
|
45
|
+
yield {"title": title_tag.text, "url": title_tag["href"]}
|
|
37
46
|
|
|
38
47
|
|
|
39
48
|
def _as_bullet_list(titles: Iterable[str]) -> str:
|
|
40
49
|
return "\n".join(f"- {title}" for title in titles)
|
|
41
50
|
|
|
42
51
|
|
|
52
|
+
def _as_json(title_tags: Iterable[TitleTag]) -> str:
|
|
53
|
+
return "\n".join(
|
|
54
|
+
json.dumps(title_tag, ensure_ascii=False) for title_tag in title_tags
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
|
|
43
58
|
def _save(path: str | Path, contents: str) -> None:
|
|
44
59
|
with open(path, "w", encoding="utf8", newline="") as f:
|
|
45
60
|
f.write(contents)
|
|
@@ -66,6 +81,12 @@ if __name__ == "__main__":
|
|
|
66
81
|
)
|
|
67
82
|
parser.add_argument("url", help="URL of archive page")
|
|
68
83
|
parser.add_argument("save_path", help="Local file path")
|
|
84
|
+
parser.add_argument(
|
|
85
|
+
"--as-json",
|
|
86
|
+
action="store_true",
|
|
87
|
+
default=False,
|
|
88
|
+
help="Save as JSON format instead of bullet list",
|
|
89
|
+
)
|
|
69
90
|
args = parser.parse_args()
|
|
70
91
|
|
|
71
|
-
_main(args.url, args.save_path)
|
|
92
|
+
_main(args.url, args.save_path, args.as_json)
|
{recent-state-summarizer-0.0.1 → recent_state_summarizer-0.0.2}/recent_state_summarizer/summarize.py
RENAMED
|
@@ -25,17 +25,19 @@ def _build_prompts(titles: str):
|
|
|
25
25
|
|
|
26
26
|
def _build_summarize_prompt_text(titles_as_list: str) -> str:
|
|
27
27
|
return f"""\
|
|
28
|
-
|
|
28
|
+
3つのバッククォートで囲まれた以下は、同一人物が最近書いたブログ記事のタイトルの一覧です。
|
|
29
29
|
それを読み、この人物が最近何をやっているかを詳しく教えてください。
|
|
30
30
|
応答は文ごとに改行して区切ってください。
|
|
31
31
|
|
|
32
|
+
```
|
|
32
33
|
{titles_as_list}
|
|
34
|
+
```
|
|
33
35
|
"""
|
|
34
36
|
|
|
35
37
|
|
|
36
|
-
def _complete_chat(prompts):
|
|
38
|
+
def _complete_chat(prompts, temperature=0.0):
|
|
37
39
|
return openai.ChatCompletion.create(
|
|
38
|
-
model=MODEL, messages=prompts, temperature=
|
|
40
|
+
model=MODEL, messages=prompts, temperature=temperature
|
|
39
41
|
)
|
|
40
42
|
|
|
41
43
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: recent-state-summarizer
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.2
|
|
4
4
|
Summary: Summarize a list of entry titles using LLM
|
|
5
5
|
Author-email: nikkie <takuyafjp+develop@gmail.com>
|
|
6
6
|
License: MIT
|
|
@@ -8,16 +8,28 @@ Classifier: Development Status :: 1 - Planning
|
|
|
8
8
|
Classifier: License :: OSI Approved :: MIT License
|
|
9
9
|
Classifier: Programming Language :: Python
|
|
10
10
|
Classifier: Programming Language :: Python :: 3
|
|
11
|
-
Classifier: Programming Language :: Python :: 3.8
|
|
12
11
|
Classifier: Programming Language :: Python :: 3.9
|
|
13
12
|
Classifier: Programming Language :: Python :: 3.10
|
|
14
13
|
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
-
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
16
|
+
Requires-Python: >=3.9
|
|
16
17
|
Description-Content-Type: text/markdown
|
|
18
|
+
License-File: LICENSE
|
|
19
|
+
Requires-Dist: beautifulsoup4
|
|
20
|
+
Requires-Dist: openai<1
|
|
17
21
|
Provides-Extra: testing
|
|
22
|
+
Requires-Dist: pytest; extra == "testing"
|
|
23
|
+
Requires-Dist: pytest_httpserver; extra == "testing"
|
|
18
24
|
Provides-Extra: lint
|
|
25
|
+
Requires-Dist: flake8; extra == "lint"
|
|
26
|
+
Requires-Dist: black; extra == "lint"
|
|
27
|
+
Requires-Dist: isort; extra == "lint"
|
|
19
28
|
Provides-Extra: dev
|
|
20
|
-
|
|
29
|
+
Requires-Dist: wheel; extra == "dev"
|
|
30
|
+
Requires-Dist: build; extra == "dev"
|
|
31
|
+
Requires-Dist: twine; extra == "dev"
|
|
32
|
+
Dynamic: license-file
|
|
21
33
|
|
|
22
34
|
# recent-state-summarizer
|
|
23
35
|
|
|
@@ -11,4 +11,5 @@ recent_state_summarizer.egg-info/SOURCES.txt
|
|
|
11
11
|
recent_state_summarizer.egg-info/dependency_links.txt
|
|
12
12
|
recent_state_summarizer.egg-info/entry_points.txt
|
|
13
13
|
recent_state_summarizer.egg-info/requires.txt
|
|
14
|
-
recent_state_summarizer.egg-info/top_level.txt
|
|
14
|
+
recent_state_summarizer.egg-info/top_level.txt
|
|
15
|
+
tests/test_fetch.py
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
from textwrap import dedent
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
from recent_state_summarizer.fetch import _main
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@pytest.fixture
|
|
9
|
+
def blog_server(httpserver):
|
|
10
|
+
httpserver.expect_request("/archive/2025/06").respond_with_data(
|
|
11
|
+
dedent(
|
|
12
|
+
f"""
|
|
13
|
+
<!DOCTYPE html>
|
|
14
|
+
<html>
|
|
15
|
+
<head><title>Archive</title></head>
|
|
16
|
+
<body>
|
|
17
|
+
<h1>Archive</h1>
|
|
18
|
+
<div id="content">
|
|
19
|
+
<div class="archive-entries">
|
|
20
|
+
<section class="archive-entry">
|
|
21
|
+
<a class="entry-title-link" href="{httpserver.url_for('/')}archive/2025/06/03">Title 3</a>
|
|
22
|
+
</section>
|
|
23
|
+
<section class="archive-entry">
|
|
24
|
+
<a class="entry-title-link" href="{httpserver.url_for('/')}archive/2025/06/02">Title 2</a>
|
|
25
|
+
</section>
|
|
26
|
+
<section class="archive-entry">
|
|
27
|
+
<a class="entry-title-link" href="{httpserver.url_for('/')}archive/2025/06/01">Title 1</a>
|
|
28
|
+
</section>
|
|
29
|
+
</div>
|
|
30
|
+
</div>
|
|
31
|
+
</body>
|
|
32
|
+
</html>
|
|
33
|
+
"""
|
|
34
|
+
)
|
|
35
|
+
)
|
|
36
|
+
return httpserver
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def test_fetch_as_bullet_list(blog_server, tmp_path):
|
|
40
|
+
_main(
|
|
41
|
+
blog_server.url_for("/archive/2025/06"),
|
|
42
|
+
tmp_path / "titles.txt",
|
|
43
|
+
save_as_json=False,
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
expected = """\
|
|
47
|
+
- Title 3
|
|
48
|
+
- Title 2
|
|
49
|
+
- Title 1"""
|
|
50
|
+
assert (tmp_path / "titles.txt").read_text(encoding="utf8") == expected
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def test_fetch_as_json(blog_server, tmp_path):
|
|
54
|
+
_main(
|
|
55
|
+
blog_server.url_for("/archive/2025/06"),
|
|
56
|
+
tmp_path / "titles.json",
|
|
57
|
+
save_as_json=True,
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
expected = f"""\
|
|
61
|
+
{{"title": "Title 3", "url": "{blog_server.url_for('/archive/2025/06/03')}"}}
|
|
62
|
+
{{"title": "Title 2", "url": "{blog_server.url_for('/archive/2025/06/02')}"}}
|
|
63
|
+
{{"title": "Title 1", "url": "{blog_server.url_for('/archive/2025/06/01')}"}}"""
|
|
64
|
+
assert (tmp_path / "titles.json").read_text(encoding="utf8") == expected
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "0.0.1"
|
|
File without changes
|
|
File without changes
|
{recent-state-summarizer-0.0.1 → recent_state_summarizer-0.0.2}/recent_state_summarizer/__main__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|