recent-state-summarizer 0.0.1__py3-none-any.whl → 0.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of recent-state-summarizer might be problematic. Click here for more details.

@@ -1 +1 @@
1
- __version__ = "0.0.1"
1
+ __version__ = "0.0.3"
@@ -1,7 +1,9 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import json
3
4
  from collections.abc import Generator, Iterable
4
5
  from pathlib import Path
6
+ from typing import TypedDict
5
7
  from urllib.request import urlopen
6
8
 
7
9
  from bs4 import BeautifulSoup
@@ -9,37 +11,57 @@ from bs4 import BeautifulSoup
9
11
  PARSE_HATENABLOG_KWARGS = {"name": "a", "attrs": {"class": "entry-title-link"}}
10
12
 
11
13
 
12
- def _main(url: str, save_path: str | Path) -> None:
13
- contents = fetch_titles_as_bullet_list(url)
14
- _save(save_path, contents)
14
+ class TitleTag(TypedDict):
15
+ title: str
16
+ url: str
15
17
 
16
18
 
17
- def fetch_titles_as_bullet_list(url: str) -> str:
18
- return _as_bullet_list(_fetch_titles(url))
19
+ def _main(url: str, save_path: str | Path, save_as_json: bool) -> None:
20
+ title_tags = _fetch_titles(url)
21
+ if not save_as_json:
22
+ contents = _as_bullet_list(
23
+ title_tag["title"] for title_tag in title_tags
24
+ )
25
+ else:
26
+ contents = _as_json(title_tags)
27
+ _save(save_path, contents)
19
28
 
20
29
 
21
- def _fetch_titles(url: str) -> Generator[str, None, None]:
30
+ def _fetch_titles(url: str) -> Generator[TitleTag, None, None]:
22
31
  raw_html = _fetch(url)
23
32
  yield from _parse_titles(raw_html)
24
33
 
34
+ soup = BeautifulSoup(raw_html, "html.parser")
35
+ next_link = soup.find("a", class_="test-pager-next")
36
+ if next_link and "href" in next_link.attrs:
37
+ next_url = next_link["href"]
38
+ print(f"Next page found, fetching... {next_url}")
39
+ yield from _fetch_titles(next_url)
40
+
25
41
 
26
42
  def _fetch(url: str) -> str:
27
43
  with urlopen(url) as res:
28
44
  return res.read()
29
45
 
30
46
 
31
- def _parse_titles(raw_html: str) -> Generator[str, None, None]:
47
+ def _parse_titles(raw_html: str) -> Generator[TitleTag, None, None]:
32
48
  soup = BeautifulSoup(raw_html, "html.parser")
33
49
  body = soup.body
34
50
  title_tags = body.find_all(**PARSE_HATENABLOG_KWARGS)
35
51
  for title_tag in title_tags:
36
- yield title_tag.text
52
+ yield {"title": title_tag.text, "url": title_tag["href"]}
37
53
 
38
54
 
39
55
  def _as_bullet_list(titles: Iterable[str]) -> str:
40
56
  return "\n".join(f"- {title}" for title in titles)
41
57
 
42
58
 
59
+ def _as_json(title_tags: Iterable[TitleTag]) -> str:
60
+ return "\n".join(
61
+ json.dumps(title_tag, ensure_ascii=False) for title_tag in title_tags
62
+ )
63
+
64
+
43
65
  def _save(path: str | Path, contents: str) -> None:
44
66
  with open(path, "w", encoding="utf8", newline="") as f:
45
67
  f.write(contents)
@@ -66,6 +88,12 @@ if __name__ == "__main__":
66
88
  )
67
89
  parser.add_argument("url", help="URL of archive page")
68
90
  parser.add_argument("save_path", help="Local file path")
91
+ parser.add_argument(
92
+ "--as-json",
93
+ action="store_true",
94
+ default=False,
95
+ help="Save as JSON format instead of bullet list",
96
+ )
69
97
  args = parser.parse_args()
70
98
 
71
- _main(args.url, args.save_path)
99
+ _main(args.url, args.save_path, args.as_json)
@@ -25,17 +25,19 @@ def _build_prompts(titles: str):
25
25
 
26
26
  def _build_summarize_prompt_text(titles_as_list: str) -> str:
27
27
  return f"""\
28
- 以下は同一人物が最近書いたブログ記事のタイトルの一覧です。
28
+ 3つのバッククォートで囲まれた以下は、同一人物が最近書いたブログ記事のタイトルの一覧です。
29
29
  それを読み、この人物が最近何をやっているかを詳しく教えてください。
30
30
  応答は文ごとに改行して区切ってください。
31
31
 
32
+ ```
32
33
  {titles_as_list}
34
+ ```
33
35
  """
34
36
 
35
37
 
36
- def _complete_chat(prompts):
38
+ def _complete_chat(prompts, temperature=0.0):
37
39
  return openai.ChatCompletion.create(
38
- model=MODEL, messages=prompts, temperature=0.8
40
+ model=MODEL, messages=prompts, temperature=temperature
39
41
  )
40
42
 
41
43
 
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: recent-state-summarizer
3
- Version: 0.0.1
3
+ Version: 0.0.3
4
4
  Summary: Summarize a list of entry titles using LLM
5
5
  Author-email: nikkie <takuyafjp+develop@gmail.com>
6
6
  License: MIT
@@ -8,25 +8,28 @@ Classifier: Development Status :: 1 - Planning
8
8
  Classifier: License :: OSI Approved :: MIT License
9
9
  Classifier: Programming Language :: Python
10
10
  Classifier: Programming Language :: Python :: 3
11
- Classifier: Programming Language :: Python :: 3.8
12
11
  Classifier: Programming Language :: Python :: 3.9
13
12
  Classifier: Programming Language :: Python :: 3.10
14
13
  Classifier: Programming Language :: Python :: 3.11
15
- Requires-Python: >=3.8
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Classifier: Programming Language :: Python :: 3.13
16
+ Requires-Python: >=3.9
16
17
  Description-Content-Type: text/markdown
17
18
  License-File: LICENSE
18
19
  Requires-Dist: beautifulsoup4
19
- Requires-Dist: openai
20
- Provides-Extra: dev
21
- Requires-Dist: wheel ; extra == 'dev'
22
- Requires-Dist: build ; extra == 'dev'
23
- Requires-Dist: twine ; extra == 'dev'
24
- Provides-Extra: lint
25
- Requires-Dist: flake8 ; extra == 'lint'
26
- Requires-Dist: black ; extra == 'lint'
27
- Requires-Dist: isort ; extra == 'lint'
20
+ Requires-Dist: openai<1
28
21
  Provides-Extra: testing
29
- Requires-Dist: pytest ; extra == 'testing'
22
+ Requires-Dist: pytest; extra == "testing"
23
+ Requires-Dist: pytest_httpserver; extra == "testing"
24
+ Provides-Extra: lint
25
+ Requires-Dist: flake8; extra == "lint"
26
+ Requires-Dist: black; extra == "lint"
27
+ Requires-Dist: isort; extra == "lint"
28
+ Provides-Extra: dev
29
+ Requires-Dist: wheel; extra == "dev"
30
+ Requires-Dist: build; extra == "dev"
31
+ Requires-Dist: twine; extra == "dev"
32
+ Dynamic: license-file
30
33
 
31
34
  # recent-state-summarizer
32
35
 
@@ -0,0 +1,10 @@
1
+ recent_state_summarizer/__init__.py,sha256=4GZKi13lDTD25YBkGakhZyEQZWTER_OWQMNPoH_UM2c,22
2
+ recent_state_summarizer/__main__.py,sha256=mpPX-XZ6Ggrs41Lilc657by7ori5JwjRU4ek_eKK49k,966
3
+ recent_state_summarizer/fetch.py,sha256=GtlmJvHsxgqtgqi258nOxdzrc7zl1xIYYBzY8CEVcUE,2808
4
+ recent_state_summarizer/summarize.py,sha256=0CgVNkY2m8qN0n1G9V2zvqghO3abUYlLIYEpJErdUTw,1989
5
+ recent_state_summarizer-0.0.3.dist-info/licenses/LICENSE,sha256=kB7ZyyVGuCxC6lFG2Yts8vznxcztjtI_0dsKVa08Tg8,1063
6
+ recent_state_summarizer-0.0.3.dist-info/METADATA,sha256=NvQqKKvZySn0uWGluPad-JcVvOdl3FyCSL5IfoU0h9o,2262
7
+ recent_state_summarizer-0.0.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
8
+ recent_state_summarizer-0.0.3.dist-info/entry_points.txt,sha256=DSCCRVear3M5V-kOSQi0O08zsyb4MRm8lft6yPUz5Yc,69
9
+ recent_state_summarizer-0.0.3.dist-info/top_level.txt,sha256=ZDx-fDfnpDSCeaxWI4rJq0otLE56owkWHOHKmj7zfZg,24
10
+ recent_state_summarizer-0.0.3.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.40.0)
2
+ Generator: setuptools (80.9.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,10 +0,0 @@
1
- recent_state_summarizer/__init__.py,sha256=sXLh7g3KC4QCFxcZGBTpG2scR7hmmBsMjq6LqRptkRg,22
2
- recent_state_summarizer/__main__.py,sha256=mpPX-XZ6Ggrs41Lilc657by7ori5JwjRU4ek_eKK49k,966
3
- recent_state_summarizer/fetch.py,sha256=g4kQN9x3qXF8_p4y-nJNNiu3mcuuDJ4WbsV8b-D02KQ,1942
4
- recent_state_summarizer/summarize.py,sha256=8Adyo_qhJLrOxkYIWZWlEVJ1WACpBGf-37rvhqu_BJk,1910
5
- recent_state_summarizer-0.0.1.dist-info/LICENSE,sha256=kB7ZyyVGuCxC6lFG2Yts8vznxcztjtI_0dsKVa08Tg8,1063
6
- recent_state_summarizer-0.0.1.dist-info/METADATA,sha256=kDsfcAiVp-y_uRNkyUe1zXh8HFM9M6aetSC4QsizlYI,2140
7
- recent_state_summarizer-0.0.1.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
8
- recent_state_summarizer-0.0.1.dist-info/entry_points.txt,sha256=DSCCRVear3M5V-kOSQi0O08zsyb4MRm8lft6yPUz5Yc,69
9
- recent_state_summarizer-0.0.1.dist-info/top_level.txt,sha256=ZDx-fDfnpDSCeaxWI4rJq0otLE56owkWHOHKmj7zfZg,24
10
- recent_state_summarizer-0.0.1.dist-info/RECORD,,