recent-state-summarizer 0.0.1__py3-none-any.whl → 0.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of recent-state-summarizer might be problematic. Click here for more details.
- recent_state_summarizer/__init__.py +1 -1
- recent_state_summarizer/fetch.py +37 -9
- recent_state_summarizer/summarize.py +5 -3
- {recent_state_summarizer-0.0.1.dist-info → recent_state_summarizer-0.0.3.dist-info}/METADATA +17 -14
- recent_state_summarizer-0.0.3.dist-info/RECORD +10 -0
- {recent_state_summarizer-0.0.1.dist-info → recent_state_summarizer-0.0.3.dist-info}/WHEEL +1 -1
- recent_state_summarizer-0.0.1.dist-info/RECORD +0 -10
- {recent_state_summarizer-0.0.1.dist-info → recent_state_summarizer-0.0.3.dist-info}/entry_points.txt +0 -0
- {recent_state_summarizer-0.0.1.dist-info → recent_state_summarizer-0.0.3.dist-info/licenses}/LICENSE +0 -0
- {recent_state_summarizer-0.0.1.dist-info → recent_state_summarizer-0.0.3.dist-info}/top_level.txt +0 -0
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.0.
|
|
1
|
+
__version__ = "0.0.3"
|
recent_state_summarizer/fetch.py
CHANGED
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import json
|
|
3
4
|
from collections.abc import Generator, Iterable
|
|
4
5
|
from pathlib import Path
|
|
6
|
+
from typing import TypedDict
|
|
5
7
|
from urllib.request import urlopen
|
|
6
8
|
|
|
7
9
|
from bs4 import BeautifulSoup
|
|
@@ -9,37 +11,57 @@ from bs4 import BeautifulSoup
|
|
|
9
11
|
PARSE_HATENABLOG_KWARGS = {"name": "a", "attrs": {"class": "entry-title-link"}}
|
|
10
12
|
|
|
11
13
|
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
14
|
+
class TitleTag(TypedDict):
|
|
15
|
+
title: str
|
|
16
|
+
url: str
|
|
15
17
|
|
|
16
18
|
|
|
17
|
-
def
|
|
18
|
-
|
|
19
|
+
def _main(url: str, save_path: str | Path, save_as_json: bool) -> None:
|
|
20
|
+
title_tags = _fetch_titles(url)
|
|
21
|
+
if not save_as_json:
|
|
22
|
+
contents = _as_bullet_list(
|
|
23
|
+
title_tag["title"] for title_tag in title_tags
|
|
24
|
+
)
|
|
25
|
+
else:
|
|
26
|
+
contents = _as_json(title_tags)
|
|
27
|
+
_save(save_path, contents)
|
|
19
28
|
|
|
20
29
|
|
|
21
|
-
def _fetch_titles(url: str) -> Generator[
|
|
30
|
+
def _fetch_titles(url: str) -> Generator[TitleTag, None, None]:
|
|
22
31
|
raw_html = _fetch(url)
|
|
23
32
|
yield from _parse_titles(raw_html)
|
|
24
33
|
|
|
34
|
+
soup = BeautifulSoup(raw_html, "html.parser")
|
|
35
|
+
next_link = soup.find("a", class_="test-pager-next")
|
|
36
|
+
if next_link and "href" in next_link.attrs:
|
|
37
|
+
next_url = next_link["href"]
|
|
38
|
+
print(f"Next page found, fetching... {next_url}")
|
|
39
|
+
yield from _fetch_titles(next_url)
|
|
40
|
+
|
|
25
41
|
|
|
26
42
|
def _fetch(url: str) -> str:
|
|
27
43
|
with urlopen(url) as res:
|
|
28
44
|
return res.read()
|
|
29
45
|
|
|
30
46
|
|
|
31
|
-
def _parse_titles(raw_html: str) -> Generator[
|
|
47
|
+
def _parse_titles(raw_html: str) -> Generator[TitleTag, None, None]:
|
|
32
48
|
soup = BeautifulSoup(raw_html, "html.parser")
|
|
33
49
|
body = soup.body
|
|
34
50
|
title_tags = body.find_all(**PARSE_HATENABLOG_KWARGS)
|
|
35
51
|
for title_tag in title_tags:
|
|
36
|
-
yield title_tag.text
|
|
52
|
+
yield {"title": title_tag.text, "url": title_tag["href"]}
|
|
37
53
|
|
|
38
54
|
|
|
39
55
|
def _as_bullet_list(titles: Iterable[str]) -> str:
|
|
40
56
|
return "\n".join(f"- {title}" for title in titles)
|
|
41
57
|
|
|
42
58
|
|
|
59
|
+
def _as_json(title_tags: Iterable[TitleTag]) -> str:
|
|
60
|
+
return "\n".join(
|
|
61
|
+
json.dumps(title_tag, ensure_ascii=False) for title_tag in title_tags
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
|
|
43
65
|
def _save(path: str | Path, contents: str) -> None:
|
|
44
66
|
with open(path, "w", encoding="utf8", newline="") as f:
|
|
45
67
|
f.write(contents)
|
|
@@ -66,6 +88,12 @@ if __name__ == "__main__":
|
|
|
66
88
|
)
|
|
67
89
|
parser.add_argument("url", help="URL of archive page")
|
|
68
90
|
parser.add_argument("save_path", help="Local file path")
|
|
91
|
+
parser.add_argument(
|
|
92
|
+
"--as-json",
|
|
93
|
+
action="store_true",
|
|
94
|
+
default=False,
|
|
95
|
+
help="Save as JSON format instead of bullet list",
|
|
96
|
+
)
|
|
69
97
|
args = parser.parse_args()
|
|
70
98
|
|
|
71
|
-
_main(args.url, args.save_path)
|
|
99
|
+
_main(args.url, args.save_path, args.as_json)
|
|
@@ -25,17 +25,19 @@ def _build_prompts(titles: str):
|
|
|
25
25
|
|
|
26
26
|
def _build_summarize_prompt_text(titles_as_list: str) -> str:
|
|
27
27
|
return f"""\
|
|
28
|
-
|
|
28
|
+
3つのバッククォートで囲まれた以下は、同一人物が最近書いたブログ記事のタイトルの一覧です。
|
|
29
29
|
それを読み、この人物が最近何をやっているかを詳しく教えてください。
|
|
30
30
|
応答は文ごとに改行して区切ってください。
|
|
31
31
|
|
|
32
|
+
```
|
|
32
33
|
{titles_as_list}
|
|
34
|
+
```
|
|
33
35
|
"""
|
|
34
36
|
|
|
35
37
|
|
|
36
|
-
def _complete_chat(prompts):
|
|
38
|
+
def _complete_chat(prompts, temperature=0.0):
|
|
37
39
|
return openai.ChatCompletion.create(
|
|
38
|
-
model=MODEL, messages=prompts, temperature=
|
|
40
|
+
model=MODEL, messages=prompts, temperature=temperature
|
|
39
41
|
)
|
|
40
42
|
|
|
41
43
|
|
{recent_state_summarizer-0.0.1.dist-info → recent_state_summarizer-0.0.3.dist-info}/METADATA
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: recent-state-summarizer
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.3
|
|
4
4
|
Summary: Summarize a list of entry titles using LLM
|
|
5
5
|
Author-email: nikkie <takuyafjp+develop@gmail.com>
|
|
6
6
|
License: MIT
|
|
@@ -8,25 +8,28 @@ Classifier: Development Status :: 1 - Planning
|
|
|
8
8
|
Classifier: License :: OSI Approved :: MIT License
|
|
9
9
|
Classifier: Programming Language :: Python
|
|
10
10
|
Classifier: Programming Language :: Python :: 3
|
|
11
|
-
Classifier: Programming Language :: Python :: 3.8
|
|
12
11
|
Classifier: Programming Language :: Python :: 3.9
|
|
13
12
|
Classifier: Programming Language :: Python :: 3.10
|
|
14
13
|
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
-
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
16
|
+
Requires-Python: >=3.9
|
|
16
17
|
Description-Content-Type: text/markdown
|
|
17
18
|
License-File: LICENSE
|
|
18
19
|
Requires-Dist: beautifulsoup4
|
|
19
|
-
Requires-Dist: openai
|
|
20
|
-
Provides-Extra: dev
|
|
21
|
-
Requires-Dist: wheel ; extra == 'dev'
|
|
22
|
-
Requires-Dist: build ; extra == 'dev'
|
|
23
|
-
Requires-Dist: twine ; extra == 'dev'
|
|
24
|
-
Provides-Extra: lint
|
|
25
|
-
Requires-Dist: flake8 ; extra == 'lint'
|
|
26
|
-
Requires-Dist: black ; extra == 'lint'
|
|
27
|
-
Requires-Dist: isort ; extra == 'lint'
|
|
20
|
+
Requires-Dist: openai<1
|
|
28
21
|
Provides-Extra: testing
|
|
29
|
-
Requires-Dist: pytest
|
|
22
|
+
Requires-Dist: pytest; extra == "testing"
|
|
23
|
+
Requires-Dist: pytest_httpserver; extra == "testing"
|
|
24
|
+
Provides-Extra: lint
|
|
25
|
+
Requires-Dist: flake8; extra == "lint"
|
|
26
|
+
Requires-Dist: black; extra == "lint"
|
|
27
|
+
Requires-Dist: isort; extra == "lint"
|
|
28
|
+
Provides-Extra: dev
|
|
29
|
+
Requires-Dist: wheel; extra == "dev"
|
|
30
|
+
Requires-Dist: build; extra == "dev"
|
|
31
|
+
Requires-Dist: twine; extra == "dev"
|
|
32
|
+
Dynamic: license-file
|
|
30
33
|
|
|
31
34
|
# recent-state-summarizer
|
|
32
35
|
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
recent_state_summarizer/__init__.py,sha256=4GZKi13lDTD25YBkGakhZyEQZWTER_OWQMNPoH_UM2c,22
|
|
2
|
+
recent_state_summarizer/__main__.py,sha256=mpPX-XZ6Ggrs41Lilc657by7ori5JwjRU4ek_eKK49k,966
|
|
3
|
+
recent_state_summarizer/fetch.py,sha256=GtlmJvHsxgqtgqi258nOxdzrc7zl1xIYYBzY8CEVcUE,2808
|
|
4
|
+
recent_state_summarizer/summarize.py,sha256=0CgVNkY2m8qN0n1G9V2zvqghO3abUYlLIYEpJErdUTw,1989
|
|
5
|
+
recent_state_summarizer-0.0.3.dist-info/licenses/LICENSE,sha256=kB7ZyyVGuCxC6lFG2Yts8vznxcztjtI_0dsKVa08Tg8,1063
|
|
6
|
+
recent_state_summarizer-0.0.3.dist-info/METADATA,sha256=NvQqKKvZySn0uWGluPad-JcVvOdl3FyCSL5IfoU0h9o,2262
|
|
7
|
+
recent_state_summarizer-0.0.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
8
|
+
recent_state_summarizer-0.0.3.dist-info/entry_points.txt,sha256=DSCCRVear3M5V-kOSQi0O08zsyb4MRm8lft6yPUz5Yc,69
|
|
9
|
+
recent_state_summarizer-0.0.3.dist-info/top_level.txt,sha256=ZDx-fDfnpDSCeaxWI4rJq0otLE56owkWHOHKmj7zfZg,24
|
|
10
|
+
recent_state_summarizer-0.0.3.dist-info/RECORD,,
|
|
@@ -1,10 +0,0 @@
|
|
|
1
|
-
recent_state_summarizer/__init__.py,sha256=sXLh7g3KC4QCFxcZGBTpG2scR7hmmBsMjq6LqRptkRg,22
|
|
2
|
-
recent_state_summarizer/__main__.py,sha256=mpPX-XZ6Ggrs41Lilc657by7ori5JwjRU4ek_eKK49k,966
|
|
3
|
-
recent_state_summarizer/fetch.py,sha256=g4kQN9x3qXF8_p4y-nJNNiu3mcuuDJ4WbsV8b-D02KQ,1942
|
|
4
|
-
recent_state_summarizer/summarize.py,sha256=8Adyo_qhJLrOxkYIWZWlEVJ1WACpBGf-37rvhqu_BJk,1910
|
|
5
|
-
recent_state_summarizer-0.0.1.dist-info/LICENSE,sha256=kB7ZyyVGuCxC6lFG2Yts8vznxcztjtI_0dsKVa08Tg8,1063
|
|
6
|
-
recent_state_summarizer-0.0.1.dist-info/METADATA,sha256=kDsfcAiVp-y_uRNkyUe1zXh8HFM9M6aetSC4QsizlYI,2140
|
|
7
|
-
recent_state_summarizer-0.0.1.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
|
|
8
|
-
recent_state_summarizer-0.0.1.dist-info/entry_points.txt,sha256=DSCCRVear3M5V-kOSQi0O08zsyb4MRm8lft6yPUz5Yc,69
|
|
9
|
-
recent_state_summarizer-0.0.1.dist-info/top_level.txt,sha256=ZDx-fDfnpDSCeaxWI4rJq0otLE56owkWHOHKmj7zfZg,24
|
|
10
|
-
recent_state_summarizer-0.0.1.dist-info/RECORD,,
|
{recent_state_summarizer-0.0.1.dist-info → recent_state_summarizer-0.0.3.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
{recent_state_summarizer-0.0.1.dist-info → recent_state_summarizer-0.0.3.dist-info/licenses}/LICENSE
RENAMED
|
File without changes
|
{recent_state_summarizer-0.0.1.dist-info → recent_state_summarizer-0.0.3.dist-info}/top_level.txt
RENAMED
|
File without changes
|