recent-state-summarizer 0.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of recent-state-summarizer might be problematic. Click here for more details.

@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2023 nikkie
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,82 @@
1
+ Metadata-Version: 2.1
2
+ Name: recent-state-summarizer
3
+ Version: 0.0.1
4
+ Summary: Summarize a list of entry titles using LLM
5
+ Author-email: nikkie <takuyafjp+develop@gmail.com>
6
+ License: MIT
7
+ Classifier: Development Status :: 1 - Planning
8
+ Classifier: License :: OSI Approved :: MIT License
9
+ Classifier: Programming Language :: Python
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.8
12
+ Classifier: Programming Language :: Python :: 3.9
13
+ Classifier: Programming Language :: Python :: 3.10
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Requires-Python: >=3.8
16
+ Description-Content-Type: text/markdown
17
+ Provides-Extra: testing
18
+ Provides-Extra: lint
19
+ Provides-Extra: dev
20
+ License-File: LICENSE
21
+
22
+ # recent-state-summarizer
23
+
24
+ Summarize blog article titles with the OpenAI API
25
+
26
+ a.k.a. _RSS_ 😃
27
+
28
+ ## Setup
29
+
30
+ ```
31
+ $ pip install recent-state-summarizer
32
+ ```
33
+
34
+ ⚠️ Set `OPENAI_API_KEY` environment variable.
35
+ ref: https://platform.openai.com/account/api-keys
36
+
37
+ ## Usage
38
+
39
+ ```
40
+ $ omae-douyo https://nikkie-ftnext.hatenablog.com/archive/2023/4
41
+
42
+ この人物は最近、プログラミングに関することを中心にして活動しています。
43
+
44
+ (略)
45
+
46
+ 最近は、株式会社はてなに入社したようです。
47
+ ```
48
+
49
+ Currently support:
50
+
51
+ - はてなブログ(Hatena blog)
52
+
53
+ To see help, type `omae-douyo -h`.
54
+
55
+ ## Development
56
+
57
+ ### Sub commands
58
+
59
+ Fetch only:
60
+
61
+ ```
62
+ python -m recent_state_summarizer.fetch -h
63
+ ```
64
+
65
+ Summarize only:
66
+ It's convenient to omit fetching in tuning the prompt.
67
+
68
+ ```
69
+ python -m recent_state_summarizer.summarize -h
70
+ ```
71
+
72
+ ### Environment
73
+
74
+ ```
75
+ $ git clone https://github.com/ftnext/recent-state-summarizer.git
76
+ $ cd recent-state-summarizer
77
+
78
+ $ python -m venv venv
79
+ $ source venv/bin/activate
80
+ (venv) $ pip install -r requirements.lock
81
+ (venv) $ pip install -e '.'
82
+ ```
@@ -0,0 +1,61 @@
1
+ # recent-state-summarizer
2
+
3
+ Summarize blog article titles with the OpenAI API
4
+
5
+ a.k.a. _RSS_ 😃
6
+
7
+ ## Setup
8
+
9
+ ```
10
+ $ pip install recent-state-summarizer
11
+ ```
12
+
13
+ ⚠️ Set `OPENAI_API_KEY` environment variable.
14
+ ref: https://platform.openai.com/account/api-keys
15
+
16
+ ## Usage
17
+
18
+ ```
19
+ $ omae-douyo https://nikkie-ftnext.hatenablog.com/archive/2023/4
20
+
21
+ この人物は最近、プログラミングに関することを中心にして活動しています。
22
+
23
+ (略)
24
+
25
+ 最近は、株式会社はてなに入社したようです。
26
+ ```
27
+
28
+ Currently support:
29
+
30
+ - はてなブログ(Hatena blog)
31
+
32
+ To see help, type `omae-douyo -h`.
33
+
34
+ ## Development
35
+
36
+ ### Sub commands
37
+
38
+ Fetch only:
39
+
40
+ ```
41
+ python -m recent_state_summarizer.fetch -h
42
+ ```
43
+
44
+ Summarize only:
45
+ It's convenient to omit fetching in tuning the prompt.
46
+
47
+ ```
48
+ python -m recent_state_summarizer.summarize -h
49
+ ```
50
+
51
+ ### Environment
52
+
53
+ ```
54
+ $ git clone https://github.com/ftnext/recent-state-summarizer.git
55
+ $ cd recent-state-summarizer
56
+
57
+ $ python -m venv venv
58
+ $ source venv/bin/activate
59
+ (venv) $ pip install -r requirements.lock
60
+ (venv) $ pip install -e '.'
61
+ ```
@@ -0,0 +1,42 @@
1
+ [build-system]
2
+ requires = ["setuptools"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "recent-state-summarizer"
7
+ authors = [
8
+ {name = "nikkie", email = "takuyafjp+develop@gmail.com"},
9
+ ]
10
+ description = "Summarize a list of entry titles using LLM"
11
+ readme = {file = "README.md", content-type = "text/markdown"}
12
+ requires-python = ">=3.8"
13
+ license = {text = "MIT"}
14
+ classifiers = [
15
+ "Development Status :: 1 - Planning",
16
+ "License :: OSI Approved :: MIT License",
17
+ "Programming Language :: Python",
18
+ "Programming Language :: Python :: 3",
19
+ "Programming Language :: Python :: 3.8",
20
+ "Programming Language :: Python :: 3.9",
21
+ "Programming Language :: Python :: 3.10",
22
+ "Programming Language :: Python :: 3.11",
23
+ ]
24
+ dependencies = [
25
+ "beautifulsoup4",
26
+ "openai",
27
+ ]
28
+ dynamic = ["version"]
29
+
30
+ [project.optional-dependencies]
31
+ testing = ["pytest"]
32
+ lint = ["flake8", "black", "isort"]
33
+ dev = ["wheel", "build", "twine"]
34
+
35
+ [project.scripts]
36
+ omae-douyo = "recent_state_summarizer.__main__:main"
37
+
38
+ [tool.setuptools.packages.find]
39
+ exclude = ["tests"]
40
+
41
+ [tool.setuptools.dynamic]
42
+ version = {attr = "recent_state_summarizer.__version__"}
@@ -0,0 +1 @@
1
+ __version__ = "0.0.1"
@@ -0,0 +1,36 @@
1
+ import argparse
2
+ from textwrap import dedent
3
+
4
+ from recent_state_summarizer.fetch import fetch_titles_as_bullet_list
5
+ from recent_state_summarizer.summarize import summarize_titles
6
+
7
+
8
+ def parse_args():
9
+ help_message = """
10
+ Summarize blog article titles with the OpenAI API.
11
+
12
+ ⚠️ Set `OPENAI_API_KEY` environment variable.
13
+
14
+ Example:
15
+ omae-douyo https://awesome.hatenablog.com/archive/2023
16
+
17
+ Retrieve the titles of articles from a specified URL.
18
+ After summarization, prints the summary.
19
+
20
+ Support:
21
+ - はてなブログ(Hatena blog)
22
+ """
23
+ parser = argparse.ArgumentParser(
24
+ formatter_class=argparse.RawDescriptionHelpFormatter,
25
+ description=dedent(help_message),
26
+ )
27
+ parser.add_argument("url", help="URL of archive page")
28
+ return parser.parse_args()
29
+
30
+
31
+ def main():
32
+ args = parse_args()
33
+
34
+ titles = fetch_titles_as_bullet_list(args.url)
35
+ summary = summarize_titles(titles)
36
+ print(summary)
@@ -0,0 +1,71 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Generator, Iterable
4
+ from pathlib import Path
5
+ from urllib.request import urlopen
6
+
7
+ from bs4 import BeautifulSoup
8
+
9
+ PARSE_HATENABLOG_KWARGS = {"name": "a", "attrs": {"class": "entry-title-link"}}
10
+
11
+
12
+ def _main(url: str, save_path: str | Path) -> None:
13
+ contents = fetch_titles_as_bullet_list(url)
14
+ _save(save_path, contents)
15
+
16
+
17
+ def fetch_titles_as_bullet_list(url: str) -> str:
18
+ return _as_bullet_list(_fetch_titles(url))
19
+
20
+
21
+ def _fetch_titles(url: str) -> Generator[str, None, None]:
22
+ raw_html = _fetch(url)
23
+ yield from _parse_titles(raw_html)
24
+
25
+
26
+ def _fetch(url: str) -> str:
27
+ with urlopen(url) as res:
28
+ return res.read()
29
+
30
+
31
+ def _parse_titles(raw_html: str) -> Generator[str, None, None]:
32
+ soup = BeautifulSoup(raw_html, "html.parser")
33
+ body = soup.body
34
+ title_tags = body.find_all(**PARSE_HATENABLOG_KWARGS)
35
+ for title_tag in title_tags:
36
+ yield title_tag.text
37
+
38
+
39
+ def _as_bullet_list(titles: Iterable[str]) -> str:
40
+ return "\n".join(f"- {title}" for title in titles)
41
+
42
+
43
+ def _save(path: str | Path, contents: str) -> None:
44
+ with open(path, "w", encoding="utf8", newline="") as f:
45
+ f.write(contents)
46
+
47
+
48
+ if __name__ == "__main__":
49
+ import argparse
50
+ import textwrap
51
+
52
+ help_message = """
53
+ Retrieve the titles of articles from a specified URL page
54
+ and save them as a list.
55
+
56
+ Support:
57
+ - はてなブログ(Hatena blog)
58
+
59
+ Example:
60
+ python -m recent_state_summarizer.fetch \\
61
+ https://awesome.hatenablog.com/archive/2023 awesome_titles.txt
62
+ """
63
+ parser = argparse.ArgumentParser(
64
+ formatter_class=argparse.RawDescriptionHelpFormatter,
65
+ description=textwrap.dedent(help_message),
66
+ )
67
+ parser.add_argument("url", help="URL of archive page")
68
+ parser.add_argument("save_path", help="Local file path")
69
+ args = parser.parse_args()
70
+
71
+ _main(args.url, args.save_path)
@@ -0,0 +1,74 @@
1
+ from pathlib import Path
2
+
3
+ import openai
4
+
5
+ MODEL = "gpt-3.5-turbo"
6
+
7
+
8
+ def _main(titles_path: str | Path) -> str:
9
+ titles = _read_titles(titles_path)
10
+ return summarize_titles(titles)
11
+
12
+
13
+ def summarize_titles(titles: str) -> str:
14
+ prompts = _build_prompts(titles)
15
+ response = _complete_chat(prompts)
16
+ return _parse_response(response)
17
+
18
+
19
+ def _build_prompts(titles: str):
20
+ prompts = [
21
+ {"role": "user", "content": _build_summarize_prompt_text(titles)}
22
+ ]
23
+ return prompts
24
+
25
+
26
+ def _build_summarize_prompt_text(titles_as_list: str) -> str:
27
+ return f"""\
28
+ 以下は同一人物が最近書いたブログ記事のタイトルの一覧です。
29
+ それを読み、この人物が最近何をやっているかを詳しく教えてください。
30
+ 応答は文ごとに改行して区切ってください。
31
+
32
+ {titles_as_list}
33
+ """
34
+
35
+
36
+ def _complete_chat(prompts):
37
+ return openai.ChatCompletion.create(
38
+ model=MODEL, messages=prompts, temperature=0.8
39
+ )
40
+
41
+
42
+ def _parse_response(response) -> str:
43
+ return response["choices"][0]["message"]["content"]
44
+
45
+
46
+ def _read_titles(titles_path: str | Path) -> str:
47
+ with open(titles_path, encoding="utf8", newline="") as f:
48
+ return f.read()
49
+
50
+
51
+ if __name__ == "__main__":
52
+ import argparse
53
+ import textwrap
54
+
55
+ help_message = f"""
56
+ Summarize a list of blog article titles using the OpenAI API ({MODEL}).
57
+ This command prints the summary.
58
+
59
+ ⚠️ Set `OPENAI_API_KEY` environment variable.
60
+
61
+ Example:
62
+ python -m recent_state_summarizer.summarize awesome_titles.txt
63
+ """
64
+ parser = argparse.ArgumentParser(
65
+ formatter_class=argparse.RawDescriptionHelpFormatter,
66
+ description=textwrap.dedent(help_message),
67
+ )
68
+ parser.add_argument(
69
+ "titles_path",
70
+ help="Local file path where the list of titles is saved",
71
+ )
72
+ args = parser.parse_args()
73
+
74
+ print(_main(args.titles_path))
@@ -0,0 +1,82 @@
1
+ Metadata-Version: 2.1
2
+ Name: recent-state-summarizer
3
+ Version: 0.0.1
4
+ Summary: Summarize a list of entry titles using LLM
5
+ Author-email: nikkie <takuyafjp+develop@gmail.com>
6
+ License: MIT
7
+ Classifier: Development Status :: 1 - Planning
8
+ Classifier: License :: OSI Approved :: MIT License
9
+ Classifier: Programming Language :: Python
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.8
12
+ Classifier: Programming Language :: Python :: 3.9
13
+ Classifier: Programming Language :: Python :: 3.10
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Requires-Python: >=3.8
16
+ Description-Content-Type: text/markdown
17
+ Provides-Extra: testing
18
+ Provides-Extra: lint
19
+ Provides-Extra: dev
20
+ License-File: LICENSE
21
+
22
+ # recent-state-summarizer
23
+
24
+ Summarize blog article titles with the OpenAI API
25
+
26
+ a.k.a. _RSS_ 😃
27
+
28
+ ## Setup
29
+
30
+ ```
31
+ $ pip install recent-state-summarizer
32
+ ```
33
+
34
+ ⚠️ Set `OPENAI_API_KEY` environment variable.
35
+ ref: https://platform.openai.com/account/api-keys
36
+
37
+ ## Usage
38
+
39
+ ```
40
+ $ omae-douyo https://nikkie-ftnext.hatenablog.com/archive/2023/4
41
+
42
+ この人物は最近、プログラミングに関することを中心にして活動しています。
43
+
44
+ (略)
45
+
46
+ 最近は、株式会社はてなに入社したようです。
47
+ ```
48
+
49
+ Currently support:
50
+
51
+ - はてなブログ(Hatena blog)
52
+
53
+ To see help, type `omae-douyo -h`.
54
+
55
+ ## Development
56
+
57
+ ### Sub commands
58
+
59
+ Fetch only:
60
+
61
+ ```
62
+ python -m recent_state_summarizer.fetch -h
63
+ ```
64
+
65
+ Summarize only:
66
+ It's convenient to omit fetching in tuning the prompt.
67
+
68
+ ```
69
+ python -m recent_state_summarizer.summarize -h
70
+ ```
71
+
72
+ ### Environment
73
+
74
+ ```
75
+ $ git clone https://github.com/ftnext/recent-state-summarizer.git
76
+ $ cd recent-state-summarizer
77
+
78
+ $ python -m venv venv
79
+ $ source venv/bin/activate
80
+ (venv) $ pip install -r requirements.lock
81
+ (venv) $ pip install -e '.'
82
+ ```
@@ -0,0 +1,14 @@
1
+ LICENSE
2
+ README.md
3
+ pyproject.toml
4
+ setup.py
5
+ recent_state_summarizer/__init__.py
6
+ recent_state_summarizer/__main__.py
7
+ recent_state_summarizer/fetch.py
8
+ recent_state_summarizer/summarize.py
9
+ recent_state_summarizer.egg-info/PKG-INFO
10
+ recent_state_summarizer.egg-info/SOURCES.txt
11
+ recent_state_summarizer.egg-info/dependency_links.txt
12
+ recent_state_summarizer.egg-info/entry_points.txt
13
+ recent_state_summarizer.egg-info/requires.txt
14
+ recent_state_summarizer.egg-info/top_level.txt
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ omae-douyo = recent_state_summarizer.__main__:main
@@ -0,0 +1,15 @@
1
+ beautifulsoup4
2
+ openai
3
+
4
+ [dev]
5
+ wheel
6
+ build
7
+ twine
8
+
9
+ [lint]
10
+ flake8
11
+ black
12
+ isort
13
+
14
+ [testing]
15
+ pytest
@@ -0,0 +1,3 @@
1
+ build
2
+ dist
3
+ recent_state_summarizer
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,3 @@
1
+ from setuptools import setup
2
+
3
+ setup()