kabukit 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kabukit/__init__.py +6 -4
- kabukit/analysis/__init__.py +0 -0
- kabukit/analysis/indicators.py +0 -0
- kabukit/analysis/preprocess.py +0 -0
- kabukit/analysis/screener.py +0 -0
- kabukit/analysis/visualization.py +57 -0
- kabukit/cli/__init__.py +0 -0
- kabukit/cli/app.py +22 -0
- kabukit/cli/auth.py +86 -0
- kabukit/core/__init__.py +0 -0
- kabukit/core/base.py +45 -0
- kabukit/core/client.py +25 -0
- kabukit/core/info.py +12 -0
- kabukit/core/prices.py +30 -0
- kabukit/core/statements.py +7 -0
- kabukit/edinet/__init__.py +3 -0
- kabukit/edinet/client.py +113 -0
- kabukit/edinet/concurrent.py +153 -0
- kabukit/edinet/doc.py +32 -0
- kabukit/jquants/__init__.py +3 -0
- kabukit/jquants/client.py +197 -197
- kabukit/jquants/concurrent.py +91 -0
- kabukit/jquants/info.py +31 -0
- kabukit/jquants/prices.py +29 -0
- kabukit/jquants/schema.py +180 -0
- kabukit/jquants/statements.py +102 -0
- kabukit/py.typed +0 -0
- kabukit/utils/__init__.py +0 -0
- kabukit/utils/concurrent.py +148 -0
- kabukit/utils/config.py +26 -0
- kabukit/utils/params.py +47 -0
- kabukit-0.2.0.dist-info/METADATA +64 -0
- kabukit-0.2.0.dist-info/RECORD +35 -0
- {kabukit-0.1.0.dist-info → kabukit-0.2.0.dist-info}/WHEEL +1 -1
- kabukit-0.2.0.dist-info/entry_points.txt +3 -0
- kabukit/cli.py +0 -40
- kabukit-0.1.0.dist-info/METADATA +0 -33
- kabukit-0.1.0.dist-info/RECORD +0 -8
- kabukit-0.1.0.dist-info/entry_points.txt +0 -3
kabukit/__init__.py
CHANGED
@@ -1,5 +1,7 @@
|
|
1
|
-
from .
|
1
|
+
from .core.info import Info
|
2
|
+
from .core.prices import Prices
|
3
|
+
from .core.statements import Statements
|
4
|
+
from .edinet.client import EdinetClient
|
5
|
+
from .jquants.client import JQuantsClient
|
2
6
|
|
3
|
-
|
4
|
-
def main() -> None:
|
5
|
-
app()
|
7
|
+
__all__ = ["EdinetClient", "Info", "JQuantsClient", "Prices", "Statements"]
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
@@ -0,0 +1,57 @@
|
|
1
|
+
"""チャート作成のためのモジュール"""
|
2
|
+
|
3
|
+
from __future__ import annotations
|
4
|
+
|
5
|
+
from typing import TYPE_CHECKING, Literal
|
6
|
+
|
7
|
+
import altair as alt
|
8
|
+
|
9
|
+
if TYPE_CHECKING:
|
10
|
+
from kabukit.core.prices import Prices
|
11
|
+
|
12
|
+
|
13
|
+
def plot_prices(
|
14
|
+
prices: Prices,
|
15
|
+
kind: Literal["candlestick"] = "candlestick",
|
16
|
+
) -> alt.VConcatChart:
|
17
|
+
if kind == "candlestick":
|
18
|
+
chart_price = plot_prices_candlestick(prices)
|
19
|
+
chart_price_volume = plot_prices_volume(prices)
|
20
|
+
return alt.vconcat(chart_price, chart_price_volume)
|
21
|
+
|
22
|
+
raise NotImplementedError # pyright: ignore[reportUnreachable]
|
23
|
+
|
24
|
+
|
25
|
+
def plot_prices_candlestick(prices: Prices) -> alt.LayerChart:
|
26
|
+
rule = alt.Chart(prices.data, mark="rule").encode(y="Low:Q", y2="High:Q")
|
27
|
+
bar = alt.Chart(prices.data, mark="bar").encode(y="Open:Q", y2="Close:Q")
|
28
|
+
|
29
|
+
color_condition = alt.condition(
|
30
|
+
"datum.Open < datum.Close",
|
31
|
+
alt.value("#ff3030"),
|
32
|
+
alt.value("#3030ff"),
|
33
|
+
)
|
34
|
+
|
35
|
+
return alt.layer(rule, bar, height=200).encode(
|
36
|
+
x=alt.X("Date:T", axis=alt.Axis(title="日付", format="%Y-%m-%d")),
|
37
|
+
y=alt.Y(title="株価", scale=alt.Scale(zero=False)),
|
38
|
+
color=color_condition,
|
39
|
+
tooltip=[
|
40
|
+
alt.Tooltip("Date:T", title="日付"),
|
41
|
+
alt.Tooltip("Open:Q", title="始値"),
|
42
|
+
alt.Tooltip("High:Q", title="高値"),
|
43
|
+
alt.Tooltip("Low:Q", title="安値"),
|
44
|
+
alt.Tooltip("Close:Q", title="終値"),
|
45
|
+
],
|
46
|
+
)
|
47
|
+
|
48
|
+
|
49
|
+
def plot_prices_volume(prices: Prices) -> alt.Chart:
|
50
|
+
return alt.Chart(prices.data, mark="bar", height=50).encode(
|
51
|
+
x=alt.X("Date:T", axis=alt.Axis(title="日付", format="%Y-%m-%d")),
|
52
|
+
y=alt.Y("Volume:Q", title="出来高"),
|
53
|
+
tooltip=[
|
54
|
+
alt.Tooltip("Date:T", title="日付"),
|
55
|
+
alt.Tooltip("Volume:Q", title="出来高"),
|
56
|
+
],
|
57
|
+
)
|
kabukit/cli/__init__.py
ADDED
File without changes
|
kabukit/cli/app.py
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
"""kabukit CLI."""
|
2
|
+
|
3
|
+
from __future__ import annotations
|
4
|
+
|
5
|
+
import typer
|
6
|
+
from async_typer import AsyncTyper # pyright: ignore[reportMissingTypeStubs]
|
7
|
+
|
8
|
+
from . import auth
|
9
|
+
|
10
|
+
app = AsyncTyper(
|
11
|
+
add_completion=False,
|
12
|
+
help="J-Quants/EDINETデータツール",
|
13
|
+
)
|
14
|
+
app.add_typer(auth.app, name="auth")
|
15
|
+
|
16
|
+
|
17
|
+
@app.command()
|
18
|
+
def version() -> None:
|
19
|
+
"""バージョン情報を表示します。"""
|
20
|
+
from importlib.metadata import version
|
21
|
+
|
22
|
+
typer.echo(f"kabukit version: {version('kabukit')}")
|
kabukit/cli/auth.py
ADDED
@@ -0,0 +1,86 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
from typing import Annotated
|
4
|
+
|
5
|
+
import typer
|
6
|
+
from async_typer import AsyncTyper # pyright: ignore[reportMissingTypeStubs]
|
7
|
+
from httpx import HTTPStatusError
|
8
|
+
from typer import Exit, Option
|
9
|
+
|
10
|
+
app = AsyncTyper(
|
11
|
+
add_completion=False,
|
12
|
+
help="J-QuantsまたはEDINETの認証トークンを保存します。",
|
13
|
+
)
|
14
|
+
|
15
|
+
|
16
|
+
async def auth_jquants(mailaddress: str, password: str) -> None:
|
17
|
+
"""J-Quants APIの認証を行い、トークンを設定ファイルに保存します。"""
|
18
|
+
from kabukit.jquants.client import JQuantsClient
|
19
|
+
|
20
|
+
async with JQuantsClient() as client:
|
21
|
+
try:
|
22
|
+
await client.auth(mailaddress, password, save=True)
|
23
|
+
except HTTPStatusError as e:
|
24
|
+
typer.echo(f"認証に失敗しました: {e}")
|
25
|
+
raise Exit(1) from None
|
26
|
+
|
27
|
+
typer.echo("J-Quantsのリフレッシュトークン・IDトークンを保存しました。")
|
28
|
+
|
29
|
+
|
30
|
+
Mailaddress = Annotated[
|
31
|
+
str,
|
32
|
+
Option(prompt=True, help="J-Quantsに登録したメールアドレス。"),
|
33
|
+
]
|
34
|
+
Password = Annotated[
|
35
|
+
str,
|
36
|
+
Option(prompt=True, hide_input=True, help="J-Quantsのパスワード。"),
|
37
|
+
]
|
38
|
+
|
39
|
+
|
40
|
+
@app.async_command() # pyright: ignore[reportUnknownMemberType]
|
41
|
+
async def jquants(mailaddress: Mailaddress, password: Password) -> None:
|
42
|
+
"""J-Quants APIの認証を行い、トークンを設定ファイルに保存します。(エイリアス: j)"""
|
43
|
+
await auth_jquants(mailaddress, password)
|
44
|
+
|
45
|
+
|
46
|
+
@app.async_command(name="j", hidden=True) # pyright: ignore[reportUnknownMemberType]
|
47
|
+
async def jquants_alias(mailaddress: Mailaddress, password: Password) -> None:
|
48
|
+
await auth_jquants(mailaddress, password)
|
49
|
+
|
50
|
+
|
51
|
+
def auth_edinet(api_key: str) -> None:
|
52
|
+
"""EDINET APIのAPIキーを設定ファイルに保存します。"""
|
53
|
+
from kabukit.utils.config import set_key
|
54
|
+
|
55
|
+
set_key("EDINET_API_KEY", api_key)
|
56
|
+
typer.echo("EDINETのAPIキーを保存しました。")
|
57
|
+
|
58
|
+
|
59
|
+
ApiKey = Annotated[str, Option(prompt=True, help="取得したEDINET APIキー。")]
|
60
|
+
|
61
|
+
|
62
|
+
@app.command()
|
63
|
+
def edinet(api_key: ApiKey) -> None:
|
64
|
+
"""EDINET APIのAPIキーを設定ファイルに保存します。(エイリアス: e)"""
|
65
|
+
auth_edinet(api_key)
|
66
|
+
|
67
|
+
|
68
|
+
@app.command(name="e", hidden=True)
|
69
|
+
def edinet_alias(api_key: ApiKey) -> None:
|
70
|
+
auth_edinet(api_key)
|
71
|
+
|
72
|
+
|
73
|
+
@app.command()
|
74
|
+
def show() -> None:
|
75
|
+
"""設定ファイルに保存したトークン・APIキーを表示します。"""
|
76
|
+
from dotenv import dotenv_values
|
77
|
+
|
78
|
+
from kabukit.utils.config import get_dotenv_path
|
79
|
+
|
80
|
+
path = get_dotenv_path()
|
81
|
+
typer.echo(f"Configuration file: {path}")
|
82
|
+
|
83
|
+
if path.exists():
|
84
|
+
config = dotenv_values(path)
|
85
|
+
for key, value in config.items():
|
86
|
+
typer.echo(f"{key}: {value}")
|
kabukit/core/__init__.py
ADDED
File without changes
|
kabukit/core/base.py
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import datetime
|
4
|
+
from pathlib import Path
|
5
|
+
from typing import TYPE_CHECKING
|
6
|
+
|
7
|
+
import polars as pl
|
8
|
+
from platformdirs import user_cache_dir
|
9
|
+
|
10
|
+
if TYPE_CHECKING:
|
11
|
+
from typing import Self
|
12
|
+
|
13
|
+
from polars import DataFrame
|
14
|
+
|
15
|
+
|
16
|
+
class Base:
|
17
|
+
data: DataFrame
|
18
|
+
|
19
|
+
def __init__(self, data: DataFrame) -> None:
|
20
|
+
self.data = data
|
21
|
+
|
22
|
+
@classmethod
|
23
|
+
def data_dir(cls) -> Path:
|
24
|
+
clsname = cls.__name__.lower()
|
25
|
+
return Path(user_cache_dir("kabukit")) / clsname
|
26
|
+
|
27
|
+
def write(self) -> Path:
|
28
|
+
data_dir = self.data_dir()
|
29
|
+
data_dir.mkdir(parents=True, exist_ok=True)
|
30
|
+
path = datetime.datetime.today().strftime("%Y%m%d") # noqa: DTZ002
|
31
|
+
filename = data_dir / f"{path}.parquet"
|
32
|
+
self.data.write_parquet(filename)
|
33
|
+
return filename
|
34
|
+
|
35
|
+
@classmethod
|
36
|
+
def read(cls, path: str | None = None) -> Self:
|
37
|
+
data_dir = cls.data_dir()
|
38
|
+
|
39
|
+
if path is None:
|
40
|
+
filename = sorted(data_dir.glob("*.parquet"))[-1]
|
41
|
+
else:
|
42
|
+
filename = data_dir / path
|
43
|
+
|
44
|
+
data = pl.read_parquet(filename)
|
45
|
+
return cls(data)
|
kabukit/core/client.py
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
from typing import TYPE_CHECKING
|
4
|
+
|
5
|
+
from httpx import AsyncClient
|
6
|
+
|
7
|
+
if TYPE_CHECKING:
|
8
|
+
from typing import Self
|
9
|
+
|
10
|
+
|
11
|
+
class Client:
|
12
|
+
client: AsyncClient
|
13
|
+
|
14
|
+
def __init__(self, base_url: str = "") -> None:
|
15
|
+
self.client = AsyncClient(base_url=base_url)
|
16
|
+
|
17
|
+
async def aclose(self) -> None:
|
18
|
+
"""HTTPクライアントを閉じる。"""
|
19
|
+
await self.client.aclose()
|
20
|
+
|
21
|
+
async def __aenter__(self) -> Self:
|
22
|
+
return self
|
23
|
+
|
24
|
+
async def __aexit__(self, exc_type, exc_val, exc_tb) -> None: # pyright: ignore[reportMissingParameterType, reportUnknownParameterType] # noqa: ANN001
|
25
|
+
await self.aclose()
|
kabukit/core/info.py
ADDED
kabukit/core/prices.py
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
from typing import TYPE_CHECKING
|
4
|
+
|
5
|
+
import polars as pl
|
6
|
+
|
7
|
+
from .base import Base
|
8
|
+
|
9
|
+
if TYPE_CHECKING:
|
10
|
+
from datetime import timedelta
|
11
|
+
from typing import Self
|
12
|
+
|
13
|
+
from polars import Expr
|
14
|
+
|
15
|
+
|
16
|
+
class Prices(Base):
|
17
|
+
def truncate(self, every: str | timedelta | Expr) -> Self:
|
18
|
+
data = (
|
19
|
+
self.data.group_by(pl.col("Date").dt.truncate(every), "Code")
|
20
|
+
.agg(
|
21
|
+
pl.col("Open").drop_nulls().first(),
|
22
|
+
pl.col("High").max(),
|
23
|
+
pl.col("Low").min(),
|
24
|
+
pl.col("Close").drop_nulls().last(),
|
25
|
+
pl.col("Volume").sum(),
|
26
|
+
pl.col("TurnoverValue").sum(),
|
27
|
+
)
|
28
|
+
.sort("Code", "Date")
|
29
|
+
)
|
30
|
+
return self.__class__(data)
|
kabukit/edinet/client.py
ADDED
@@ -0,0 +1,113 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import io
|
4
|
+
import os
|
5
|
+
import zipfile
|
6
|
+
from enum import StrEnum
|
7
|
+
from typing import TYPE_CHECKING
|
8
|
+
|
9
|
+
import polars as pl
|
10
|
+
from polars import DataFrame
|
11
|
+
|
12
|
+
from kabukit.core.client import Client
|
13
|
+
from kabukit.utils.config import load_dotenv
|
14
|
+
from kabukit.utils.params import get_params
|
15
|
+
|
16
|
+
from .doc import clean_csv, clean_list
|
17
|
+
|
18
|
+
if TYPE_CHECKING:
|
19
|
+
import datetime
|
20
|
+
|
21
|
+
from httpx import Response
|
22
|
+
from httpx._types import QueryParamTypes
|
23
|
+
|
24
|
+
API_VERSION = "v2"
|
25
|
+
BASE_URL = f"https://api.edinet-fsa.go.jp/api/{API_VERSION}"
|
26
|
+
|
27
|
+
|
28
|
+
class AuthKey(StrEnum):
|
29
|
+
"""Environment variable keys for EDINET authentication."""
|
30
|
+
|
31
|
+
API_KEY = "EDINET_API_KEY"
|
32
|
+
|
33
|
+
|
34
|
+
class EdinetClient(Client):
|
35
|
+
def __init__(self, api_key: str | None = None) -> None:
|
36
|
+
super().__init__(BASE_URL)
|
37
|
+
self.set_api_key(api_key)
|
38
|
+
|
39
|
+
def set_api_key(self, api_key: str | None = None) -> None:
|
40
|
+
if api_key is None:
|
41
|
+
load_dotenv()
|
42
|
+
api_key = os.environ.get(AuthKey.API_KEY)
|
43
|
+
|
44
|
+
if api_key:
|
45
|
+
self.client.params = {"Subscription-Key": api_key}
|
46
|
+
|
47
|
+
async def get(self, url: str, params: QueryParamTypes) -> Response:
|
48
|
+
resp = await self.client.get(url, params=params)
|
49
|
+
resp.raise_for_status()
|
50
|
+
return resp
|
51
|
+
|
52
|
+
async def get_count(self, date: str | datetime.date) -> int:
|
53
|
+
params = get_params(date=date, type=1)
|
54
|
+
resp = await self.get("/documents.json", params)
|
55
|
+
data = resp.json()
|
56
|
+
metadata = data["metadata"]
|
57
|
+
|
58
|
+
if metadata["status"] != "200":
|
59
|
+
return 0
|
60
|
+
|
61
|
+
return metadata["resultset"]["count"]
|
62
|
+
|
63
|
+
async def get_list(self, date: str | datetime.date) -> DataFrame:
|
64
|
+
params = get_params(date=date, type=2)
|
65
|
+
resp = await self.get("/documents.json", params)
|
66
|
+
data = resp.json()
|
67
|
+
|
68
|
+
if "results" not in data:
|
69
|
+
return DataFrame()
|
70
|
+
|
71
|
+
df = DataFrame(data["results"], infer_schema_length=None)
|
72
|
+
|
73
|
+
if df.is_empty():
|
74
|
+
return df
|
75
|
+
|
76
|
+
return clean_list(df, date)
|
77
|
+
|
78
|
+
async def get_document(self, doc_id: str, doc_type: int) -> Response:
|
79
|
+
params = get_params(type=doc_type)
|
80
|
+
return await self.get(f"/documents/{doc_id}", params)
|
81
|
+
|
82
|
+
async def get_pdf(self, doc_id: str) -> bytes:
|
83
|
+
resp = await self.get_document(doc_id, doc_type=2)
|
84
|
+
if resp.headers["content-type"] == "application/pdf":
|
85
|
+
return resp.content
|
86
|
+
|
87
|
+
msg = "PDF is not available."
|
88
|
+
raise ValueError(msg)
|
89
|
+
|
90
|
+
async def get_zip(self, doc_id: str, doc_type: int) -> bytes:
|
91
|
+
resp = await self.get_document(doc_id, doc_type=doc_type)
|
92
|
+
if resp.headers["content-type"] == "application/octet-stream":
|
93
|
+
return resp.content
|
94
|
+
|
95
|
+
msg = "ZIP is not available."
|
96
|
+
raise ValueError(msg)
|
97
|
+
|
98
|
+
async def get_csv(self, doc_id: str) -> DataFrame:
|
99
|
+
content = await self.get_zip(doc_id, doc_type=5)
|
100
|
+
buffer = io.BytesIO(content)
|
101
|
+
|
102
|
+
with zipfile.ZipFile(buffer) as zf:
|
103
|
+
for info in zf.infolist():
|
104
|
+
if info.filename.endswith(".csv"):
|
105
|
+
with zf.open(info) as f:
|
106
|
+
return pl.read_csv(
|
107
|
+
f.read(),
|
108
|
+
separator="\t",
|
109
|
+
encoding="utf-16-le",
|
110
|
+
).pipe(clean_csv, doc_id)
|
111
|
+
|
112
|
+
msg = "CSV is not available."
|
113
|
+
raise ValueError(msg)
|
@@ -0,0 +1,153 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import datetime
|
4
|
+
from typing import TYPE_CHECKING
|
5
|
+
|
6
|
+
from kabukit.utils import concurrent
|
7
|
+
|
8
|
+
from .client import EdinetClient
|
9
|
+
|
10
|
+
if TYPE_CHECKING:
|
11
|
+
from collections.abc import Iterable
|
12
|
+
|
13
|
+
from polars import DataFrame
|
14
|
+
|
15
|
+
from kabukit.utils.concurrent import Callback, Progress
|
16
|
+
|
17
|
+
|
18
|
+
def get_dates(days: int | None = None, years: int | None = None) -> list[datetime.date]:
|
19
|
+
"""過去days日またはyears年の日付リストを返す。
|
20
|
+
|
21
|
+
Args:
|
22
|
+
days (int | None): 過去days日の日付リストを取得する。
|
23
|
+
years (int | None): 過去years年の日付リストを取得する。
|
24
|
+
daysが指定されている場合は無視される。
|
25
|
+
"""
|
26
|
+
end_date = datetime.date.today() # noqa: DTZ011
|
27
|
+
|
28
|
+
if days is not None:
|
29
|
+
start_date = end_date - datetime.timedelta(days=days)
|
30
|
+
elif years is not None:
|
31
|
+
start_date = end_date.replace(year=end_date.year - years)
|
32
|
+
else:
|
33
|
+
msg = "daysまたはyearsのいずれかを指定してください。"
|
34
|
+
raise ValueError(msg)
|
35
|
+
|
36
|
+
return [
|
37
|
+
start_date + datetime.timedelta(days=i)
|
38
|
+
for i in range(1, (end_date - start_date).days + 1)
|
39
|
+
]
|
40
|
+
|
41
|
+
|
42
|
+
async def fetch(
|
43
|
+
resource: str,
|
44
|
+
args: Iterable[str | datetime.date],
|
45
|
+
/,
|
46
|
+
max_concurrency: int | None = None,
|
47
|
+
progress: Progress | None = None,
|
48
|
+
callback: Callback | None = None,
|
49
|
+
) -> DataFrame:
|
50
|
+
"""引数に対応する各種データを取得し、単一のDataFrameにまとめて返す。
|
51
|
+
|
52
|
+
Args:
|
53
|
+
resource (str): 取得するデータの種類。EdinetClientのメソッド名から"get_"を
|
54
|
+
除いたものを指定する。
|
55
|
+
args (Iterable[str | datetime.date]): 取得対象の引数のリスト。
|
56
|
+
max_concurrency (int | None, optional): 同時に実行するリクエストの最大数。
|
57
|
+
指定しないときはデフォルト値が使用される。
|
58
|
+
progress (Progress | None, optional): 進捗表示のための関数。
|
59
|
+
tqdm, marimoなどのライブラリを使用できる。
|
60
|
+
指定しないときは進捗表示は行われない。
|
61
|
+
callback (Callback | None, optional): 各DataFrameに対して適用する
|
62
|
+
コールバック関数。指定しないときはそのままのDataFrameが使用される。
|
63
|
+
|
64
|
+
Returns:
|
65
|
+
DataFrame:
|
66
|
+
すべての銘柄の財務情報を含む単一のDataFrame。
|
67
|
+
"""
|
68
|
+
return await concurrent.fetch(
|
69
|
+
EdinetClient,
|
70
|
+
resource,
|
71
|
+
args,
|
72
|
+
max_concurrency=max_concurrency,
|
73
|
+
progress=progress,
|
74
|
+
callback=callback,
|
75
|
+
)
|
76
|
+
|
77
|
+
|
78
|
+
async def fetch_list(
|
79
|
+
days: int | None = None,
|
80
|
+
years: int | None = None,
|
81
|
+
limit: int | None = None,
|
82
|
+
max_concurrency: int | None = None,
|
83
|
+
progress: Progress | None = None,
|
84
|
+
callback: Callback | None = None,
|
85
|
+
) -> DataFrame:
|
86
|
+
"""過去days日またはyears年の文書一覧を取得し、単一のDataFrameにまとめて返す。
|
87
|
+
|
88
|
+
Args:
|
89
|
+
days (int | None): 過去days日の日付リストを取得する。
|
90
|
+
years (int | None): 過去years年の日付リストを取得する。
|
91
|
+
daysが指定されている場合は無視される。
|
92
|
+
max_concurrency (int | None, optional): 同時に実行するリクエストの最大数。
|
93
|
+
指定しないときはデフォルト値が使用される。
|
94
|
+
progress (Progress | None, optional): 進捗表示のための関数。
|
95
|
+
tqdm, marimoなどのライブラリを使用できる。
|
96
|
+
指定しないときは進捗表示は行われない。
|
97
|
+
callback (Callback | None, optional): 各DataFrameに対して適用する
|
98
|
+
コールバック関数。指定しないときはそのままのDataFrameが使用される。
|
99
|
+
|
100
|
+
Returns:
|
101
|
+
DataFrame:
|
102
|
+
文書一覧を含む単一のDataFrame。
|
103
|
+
"""
|
104
|
+
dates = get_dates(days=days, years=years)
|
105
|
+
|
106
|
+
if limit is not None:
|
107
|
+
dates = dates[:limit]
|
108
|
+
|
109
|
+
return await fetch(
|
110
|
+
"list",
|
111
|
+
dates,
|
112
|
+
max_concurrency=max_concurrency,
|
113
|
+
progress=progress,
|
114
|
+
callback=callback,
|
115
|
+
)
|
116
|
+
|
117
|
+
|
118
|
+
async def fetch_csv(
|
119
|
+
doc_ids: Iterable[str],
|
120
|
+
/,
|
121
|
+
limit: int | None = None,
|
122
|
+
max_concurrency: int | None = None,
|
123
|
+
progress: Progress | None = None,
|
124
|
+
callback: Callback | None = None,
|
125
|
+
) -> DataFrame:
|
126
|
+
"""文書をCSV形式で取得し、単一のDataFrameにまとめて返す。
|
127
|
+
|
128
|
+
Args:
|
129
|
+
doc_ids (Iterable[str]): 取得対象の文書IDのリスト。
|
130
|
+
max_concurrency (int | None, optional): 同時に実行するリクエストの最大数。
|
131
|
+
指定しないときはデフォルト値が使用される。
|
132
|
+
progress (Progress | None, optional): 進捗表示のための関数。
|
133
|
+
tqdm, marimoなどのライブラリを使用できる。
|
134
|
+
指定しないときは進捗表示は行われない。
|
135
|
+
callback (Callback | None, optional): 各DataFrameに対して適用する
|
136
|
+
コールバック関数。指定しないときはそのままのDataFrameが使用される。
|
137
|
+
|
138
|
+
Returns:
|
139
|
+
DataFrame:
|
140
|
+
文書含む単一のDataFrame。
|
141
|
+
"""
|
142
|
+
doc_ids = list(doc_ids)
|
143
|
+
|
144
|
+
if limit is not None:
|
145
|
+
doc_ids = doc_ids[:limit]
|
146
|
+
|
147
|
+
return await fetch(
|
148
|
+
"csv",
|
149
|
+
doc_ids,
|
150
|
+
max_concurrency=max_concurrency,
|
151
|
+
progress=progress,
|
152
|
+
callback=callback,
|
153
|
+
)
|
kabukit/edinet/doc.py
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import datetime
|
4
|
+
from typing import TYPE_CHECKING
|
5
|
+
|
6
|
+
import polars as pl
|
7
|
+
|
8
|
+
if TYPE_CHECKING:
|
9
|
+
from polars import DataFrame
|
10
|
+
|
11
|
+
|
12
|
+
def clean_list(df: DataFrame, date: str | datetime.date) -> DataFrame:
|
13
|
+
if isinstance(date, str):
|
14
|
+
date = datetime.datetime.strptime(date, "%Y-%m-%d").date() # noqa: DTZ007
|
15
|
+
|
16
|
+
return df.with_columns(
|
17
|
+
pl.lit(date).alias("Date"),
|
18
|
+
pl.col("submitDateTime").str.to_datetime("%Y-%m-%d %H:%M", strict=False),
|
19
|
+
pl.col("^period.+$").str.to_date("%Y-%m-%d", strict=False),
|
20
|
+
pl.col("^.+Flag$").cast(pl.Int8).cast(pl.Boolean),
|
21
|
+
pl.col("^.+Code$").cast(pl.String),
|
22
|
+
pl.col("opeDateTime")
|
23
|
+
.cast(pl.String)
|
24
|
+
.str.to_datetime("%Y-%m-%d %H:%M", strict=False),
|
25
|
+
).select("Date", pl.exclude("Date"))
|
26
|
+
|
27
|
+
|
28
|
+
def clean_csv(df: DataFrame, doc_id: str) -> DataFrame:
|
29
|
+
return df.select(
|
30
|
+
pl.lit(doc_id).alias("docID"),
|
31
|
+
pl.all(),
|
32
|
+
)
|
kabukit/jquants/__init__.py
CHANGED