adaptive-memory-engine 0.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- adaptive_memory_engine-0.1.6.dist-info/METADATA +228 -0
- adaptive_memory_engine-0.1.6.dist-info/RECORD +72 -0
- adaptive_memory_engine-0.1.6.dist-info/WHEEL +4 -0
- adaptive_memory_engine-0.1.6.dist-info/entry_points.txt +3 -0
- adaptive_memory_engine-0.1.6.dist-info/licenses/LICENSE +21 -0
- ame/__init__.py +1 -0
- ame/agent/__init__.py +1 -0
- ame/agent/mcp.py +474 -0
- ame/agent/memory_api.py +141 -0
- ame/agent/results.py +30 -0
- ame/bronze/schema.py +17 -0
- ame/bronze/store.py +38 -0
- ame/cli/__init__.py +1 -0
- ame/cli/main.py +903 -0
- ame/connectors/base.py +30 -0
- ame/connectors/contract.py +199 -0
- ame/connectors/github.py +66 -0
- ame/connectors/google.py +464 -0
- ame/connectors/google_oauth.py +156 -0
- ame/connectors/jira.py +66 -0
- ame/connectors/json_helpers.py +43 -0
- ame/connectors/markdown.py +116 -0
- ame/connectors/notion.py +59 -0
- ame/connectors/oauth_callback.py +102 -0
- ame/connectors/oauth_provider.py +250 -0
- ame/connectors/obsidian.py +19 -0
- ame/connectors/router.py +155 -0
- ame/connectors/slack.py +66 -0
- ame/connectors/slack_oauth.py +417 -0
- ame/connectors/sync_history.py +73 -0
- ame/context_budget.py +106 -0
- ame/core/config.py +77 -0
- ame/core/corpus.py +17 -0
- ame/core/errors.py +18 -0
- ame/core/paths.py +111 -0
- ame/core/state.py +57 -0
- ame/export/obsidian.py +123 -0
- ame/gold/builder.py +300 -0
- ame/gold/ontology.py +80 -0
- ame/gold/resolver.py +91 -0
- ame/gold/schema.py +40 -0
- ame/gold/store.py +45 -0
- ame/hardware/profiler.py +85 -0
- ame/hardware/tier.py +27 -0
- ame/hermes/__init__.py +3 -0
- ame/hermes/memory.py +209 -0
- ame/models/download.py +243 -0
- ame/models/ollama.py +60 -0
- ame/models/registry.py +101 -0
- ame/models/router.py +22 -0
- ame/pipeline.py +155 -0
- ame/query/diff.py +40 -0
- ame/query/engine.py +919 -0
- ame/query/memory_os.py +313 -0
- ame/query/mql.py +84 -0
- ame/query/multihop.py +264 -0
- ame/query/result.py +20 -0
- ame/sdk.py +52 -0
- ame/security.py +145 -0
- ame/silver/extractor.py +414 -0
- ame/silver/llm_extractor.py +181 -0
- ame/silver/prompts.py +56 -0
- ame/silver/rationale.py +140 -0
- ame/silver/schema.py +51 -0
- ame/silver/store.py +59 -0
- ame/storage/custom_kg.py +33 -0
- ame/storage/lightrag_adapter.py +362 -0
- ame/validation/confidence.py +5 -0
- ame/validation/grounding.py +10 -0
- ame/validation/type_gate.py +22 -0
- ame/writeback.py +173 -0
- memory/__init__.py +3 -0
ame/connectors/router.py
ADDED
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from ame.connectors.base import Connector
|
|
6
|
+
from ame.connectors.contract import ConnectorProfile, ExportConnectorRuntime
|
|
7
|
+
from ame.connectors.github import GitHubConnector
|
|
8
|
+
from ame.connectors.google import GmailConnector, GoogleCalendarConnector, GoogleDriveConnector, GoogleSheetsConnector
|
|
9
|
+
from ame.connectors.jira import JiraConnector
|
|
10
|
+
from ame.connectors.markdown import MarkdownConnector
|
|
11
|
+
from ame.connectors.notion import NotionConnector
|
|
12
|
+
from ame.connectors.obsidian import ObsidianConnector
|
|
13
|
+
from ame.connectors.slack import SlackExportConnector
|
|
14
|
+
from ame.core.config import load_config
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class ConnectorRouter:
|
|
18
|
+
def profiles(self) -> list[ConnectorProfile]:
|
|
19
|
+
return [
|
|
20
|
+
ConnectorProfile(
|
|
21
|
+
name="markdown",
|
|
22
|
+
source_type="markdown",
|
|
23
|
+
features=["file", "section", "frontmatter", "headings"],
|
|
24
|
+
),
|
|
25
|
+
ConnectorProfile(
|
|
26
|
+
name="obsidian",
|
|
27
|
+
source_type="obsidian",
|
|
28
|
+
features=["vault_import", "vault_export", "wikilink", "tag"],
|
|
29
|
+
),
|
|
30
|
+
ConnectorProfile(
|
|
31
|
+
name="slack-export",
|
|
32
|
+
source_type="slack",
|
|
33
|
+
features=["workspace", "channel", "thread", "message", "export_json"],
|
|
34
|
+
),
|
|
35
|
+
ConnectorProfile(
|
|
36
|
+
name="jira-json",
|
|
37
|
+
source_type="jira",
|
|
38
|
+
features=["issue", "comment", "status", "export_json"],
|
|
39
|
+
),
|
|
40
|
+
ConnectorProfile(
|
|
41
|
+
name="github-json",
|
|
42
|
+
source_type="github",
|
|
43
|
+
features=["issue", "pull_request", "discussion", "comment", "export_json"],
|
|
44
|
+
),
|
|
45
|
+
ConnectorProfile(
|
|
46
|
+
name="notion-json",
|
|
47
|
+
source_type="notion",
|
|
48
|
+
features=["page", "database", "block", "comment", "export_json"],
|
|
49
|
+
),
|
|
50
|
+
ConnectorProfile(
|
|
51
|
+
name="google-drive-json",
|
|
52
|
+
source_type="google_drive",
|
|
53
|
+
features=["file", "document", "owner", "modified_time", "original_url", "export_json"],
|
|
54
|
+
),
|
|
55
|
+
ConnectorProfile(
|
|
56
|
+
name="gmail-json",
|
|
57
|
+
source_type="gmail",
|
|
58
|
+
features=["thread", "message", "participants", "original_url", "export_json"],
|
|
59
|
+
),
|
|
60
|
+
ConnectorProfile(
|
|
61
|
+
name="google-calendar-json",
|
|
62
|
+
source_type="google_calendar",
|
|
63
|
+
features=["event", "meeting", "attendees", "time_range", "original_url", "export_json"],
|
|
64
|
+
),
|
|
65
|
+
ConnectorProfile(
|
|
66
|
+
name="google-sheets-json",
|
|
67
|
+
source_type="google_sheets",
|
|
68
|
+
features=["spreadsheet", "sheet", "row", "modified_time", "original_url", "export_json"],
|
|
69
|
+
),
|
|
70
|
+
ConnectorProfile(
|
|
71
|
+
name="slack-oauth",
|
|
72
|
+
source_type="slack",
|
|
73
|
+
mode="live",
|
|
74
|
+
features=["workspace", "channel", "thread", "message", "incremental_sync"],
|
|
75
|
+
),
|
|
76
|
+
ConnectorProfile(
|
|
77
|
+
name="google-oauth",
|
|
78
|
+
source_type="google",
|
|
79
|
+
mode="live",
|
|
80
|
+
features=["drive", "gmail", "calendar", "sheets", "shared_token_store"],
|
|
81
|
+
),
|
|
82
|
+
ConnectorProfile(
|
|
83
|
+
name="github-oauth",
|
|
84
|
+
source_type="github",
|
|
85
|
+
mode="live",
|
|
86
|
+
features=["oauth_login", "token_bootstrap", "issue_pr_future_sync"],
|
|
87
|
+
),
|
|
88
|
+
ConnectorProfile(
|
|
89
|
+
name="notion-oauth",
|
|
90
|
+
source_type="notion",
|
|
91
|
+
mode="live",
|
|
92
|
+
features=["oauth_login", "token_bootstrap", "document_future_sync"],
|
|
93
|
+
),
|
|
94
|
+
ConnectorProfile(
|
|
95
|
+
name="jira-oauth",
|
|
96
|
+
source_type="jira",
|
|
97
|
+
mode="live",
|
|
98
|
+
features=["oauth_login", "token_bootstrap", "issue_comment_future_sync"],
|
|
99
|
+
),
|
|
100
|
+
]
|
|
101
|
+
|
|
102
|
+
def resolve(self, path: Path, profile: str | None = None) -> Connector:
|
|
103
|
+
normalized = (profile or "").casefold().replace("_", "-")
|
|
104
|
+
if normalized in {"markdown", "markdown-files"}:
|
|
105
|
+
return MarkdownConnector()
|
|
106
|
+
if normalized in {"slack", "slack-export"}:
|
|
107
|
+
return SlackExportConnector()
|
|
108
|
+
if normalized in {"jira", "jira-json"}:
|
|
109
|
+
return JiraConnector()
|
|
110
|
+
if normalized in {"github", "github-json"}:
|
|
111
|
+
return GitHubConnector()
|
|
112
|
+
if normalized in {"notion", "notion-json"}:
|
|
113
|
+
return NotionConnector()
|
|
114
|
+
if normalized in {"google-drive", "google-drive-json", "drive"}:
|
|
115
|
+
return GoogleDriveConnector(pii_redaction=self._pii_redaction())
|
|
116
|
+
if normalized in {"gmail", "gmail-json", "google-email", "google-mail"}:
|
|
117
|
+
return GmailConnector(pii_redaction=self._pii_redaction())
|
|
118
|
+
if normalized in {"google-calendar", "google-calendar-json", "calendar"}:
|
|
119
|
+
return GoogleCalendarConnector(pii_redaction=self._pii_redaction())
|
|
120
|
+
if normalized in {"google-sheets", "google-sheets-json", "sheets"}:
|
|
121
|
+
return GoogleSheetsConnector(pii_redaction=self._pii_redaction())
|
|
122
|
+
return ObsidianConnector()
|
|
123
|
+
|
|
124
|
+
def runtime(self, path: Path, profile: str | None = None) -> ExportConnectorRuntime:
|
|
125
|
+
normalized = (profile or "").casefold().replace("_", "-")
|
|
126
|
+
selected_profile = self.profile(normalized or "obsidian")
|
|
127
|
+
if selected_profile.mode == "live":
|
|
128
|
+
raise ValueError(f"Live connector profile does not use path-based export runtime: {selected_profile.name}")
|
|
129
|
+
return ExportConnectorRuntime(self.resolve(path, selected_profile.name), selected_profile)
|
|
130
|
+
|
|
131
|
+
def profile(self, name: str) -> ConnectorProfile:
|
|
132
|
+
normalized = name.casefold().replace("_", "-")
|
|
133
|
+
aliases = {
|
|
134
|
+
"slack": "slack-export",
|
|
135
|
+
"jira": "jira-json",
|
|
136
|
+
"github": "github-json",
|
|
137
|
+
"markdown-files": "markdown",
|
|
138
|
+
"drive": "google-drive-json",
|
|
139
|
+
"google-drive": "google-drive-json",
|
|
140
|
+
"gmail": "gmail-json",
|
|
141
|
+
"google-email": "gmail-json",
|
|
142
|
+
"google-mail": "gmail-json",
|
|
143
|
+
"calendar": "google-calendar-json",
|
|
144
|
+
"google-calendar": "google-calendar-json",
|
|
145
|
+
"sheets": "google-sheets-json",
|
|
146
|
+
"google-sheets": "google-sheets-json",
|
|
147
|
+
}
|
|
148
|
+
normalized = aliases.get(normalized, normalized)
|
|
149
|
+
for profile in self.profiles():
|
|
150
|
+
if profile.name == normalized:
|
|
151
|
+
return profile
|
|
152
|
+
raise ValueError(f"Unknown connector profile: {name}")
|
|
153
|
+
|
|
154
|
+
def _pii_redaction(self) -> str:
|
|
155
|
+
return load_config().security.pii_redaction
|
ame/connectors/slack.py
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import hashlib
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from ame.bronze.schema import BronzeDocument
|
|
8
|
+
from ame.connectors.base import SourceRef
|
|
9
|
+
from ame.connectors.json_helpers import as_list, first_present, read_json
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class SlackExportConnector:
|
|
13
|
+
source_type = "slack"
|
|
14
|
+
|
|
15
|
+
def scan(self, path: Path) -> list[SourceRef]:
|
|
16
|
+
root = path.expanduser().resolve()
|
|
17
|
+
files = [root] if root.is_file() else sorted(root.rglob("*.json"))
|
|
18
|
+
refs: list[SourceRef] = []
|
|
19
|
+
for file in files:
|
|
20
|
+
rows = as_list(read_json(file))
|
|
21
|
+
channel = file.parent.name if file.parent != root else file.stem
|
|
22
|
+
for index, row in enumerate(rows):
|
|
23
|
+
if not isinstance(row, dict):
|
|
24
|
+
continue
|
|
25
|
+
ts = str(first_present(row, "ts", "timestamp", "created_at") or index)
|
|
26
|
+
source_id = f"slack:{channel}:{ts}"
|
|
27
|
+
refs.append(SourceRef(path=file, source_id=source_id, content=self._message_content(channel, row, ts)))
|
|
28
|
+
return refs
|
|
29
|
+
|
|
30
|
+
def load(self, corpus_id: str, ref: SourceRef) -> BronzeDocument:
|
|
31
|
+
content = ref.content or ref.path.read_text(encoding="utf-8")
|
|
32
|
+
digest = hashlib.sha256(content.encode("utf-8")).hexdigest()
|
|
33
|
+
parts = ref.source_id.split(":")
|
|
34
|
+
channel = parts[1] if len(parts) > 1 else ref.path.parent.name
|
|
35
|
+
return BronzeDocument(
|
|
36
|
+
id=f"bronze_{digest[:16]}",
|
|
37
|
+
corpus_id=corpus_id,
|
|
38
|
+
source_type=self.source_type,
|
|
39
|
+
source_id=ref.source_id,
|
|
40
|
+
content=content,
|
|
41
|
+
metadata={"path": str(ref.path), "channel": channel, "title": f"Slack #{channel}", "connector": "slack-export"},
|
|
42
|
+
content_hash=f"sha256:{digest}",
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
def _message_content(self, channel: str, row: dict[str, Any], ts: str) -> str:
|
|
46
|
+
user = first_present(row, "user", "username", "author") or "unknown"
|
|
47
|
+
text = str(first_present(row, "text", "message", "content") or "")
|
|
48
|
+
thread_ts = first_present(row, "thread_ts", "parent_ts")
|
|
49
|
+
return "\n".join(
|
|
50
|
+
[
|
|
51
|
+
"---",
|
|
52
|
+
f"title: Slack #{channel} {ts}",
|
|
53
|
+
f"channel: {channel}",
|
|
54
|
+
f"user: {user}",
|
|
55
|
+
f"timestamp: {ts}",
|
|
56
|
+
"---",
|
|
57
|
+
"",
|
|
58
|
+
f"# Slack #{channel}",
|
|
59
|
+
"",
|
|
60
|
+
f"User: {user}",
|
|
61
|
+
"",
|
|
62
|
+
text,
|
|
63
|
+
f"Thread: {thread_ts}" if thread_ts else "",
|
|
64
|
+
"",
|
|
65
|
+
]
|
|
66
|
+
)
|
|
@@ -0,0 +1,417 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import re
|
|
5
|
+
import urllib.parse
|
|
6
|
+
import urllib.request
|
|
7
|
+
from datetime import datetime, timezone
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Any, Protocol
|
|
10
|
+
|
|
11
|
+
from pydantic import BaseModel, Field
|
|
12
|
+
|
|
13
|
+
from ame.connectors.sync_history import ConnectorSyncStore
|
|
14
|
+
from ame.core.paths import ame_home, ensure_corpus_layout
|
|
15
|
+
from ame.pipeline import MemoryPipeline
|
|
16
|
+
from ame.security import token_vault
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
SLACK_AUTHORIZE_URL = "https://slack.com/oauth/v2/authorize"
|
|
20
|
+
SLACK_OAUTH_ACCESS_URL = "https://slack.com/api/oauth.v2.access"
|
|
21
|
+
SLACK_API_URL = "https://slack.com/api"
|
|
22
|
+
DEFAULT_SLACK_SCOPES = [
|
|
23
|
+
"channels:read",
|
|
24
|
+
"channels:history",
|
|
25
|
+
"groups:read",
|
|
26
|
+
"groups:history",
|
|
27
|
+
]
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class SlackOAuthError(RuntimeError):
|
|
31
|
+
pass
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class SlackOAuthConfig(BaseModel):
|
|
35
|
+
client_id: str = ""
|
|
36
|
+
client_secret: str = ""
|
|
37
|
+
redirect_uri: str = "http://localhost:8765/slack/oauth/callback"
|
|
38
|
+
scopes: list[str] = Field(default_factory=lambda: list(DEFAULT_SLACK_SCOPES))
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class SlackToken(BaseModel):
|
|
42
|
+
team_id: str
|
|
43
|
+
access_token: str
|
|
44
|
+
team_name: str | None = None
|
|
45
|
+
bot_user_id: str | None = None
|
|
46
|
+
authed_user_id: str | None = None
|
|
47
|
+
scopes: list[str] = Field(default_factory=list)
|
|
48
|
+
created_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class SlackSyncState(BaseModel):
|
|
52
|
+
team_id: str
|
|
53
|
+
last_ts_by_channel: dict[str, str] = Field(default_factory=dict)
|
|
54
|
+
updated_at: datetime | None = None
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class SlackHttpClient(Protocol):
|
|
58
|
+
def post_json(self, url: str, data: dict[str, Any], headers: dict[str, str] | None = None) -> dict[str, Any]:
|
|
59
|
+
...
|
|
60
|
+
|
|
61
|
+
def get_json(self, url: str, params: dict[str, Any], headers: dict[str, str] | None = None) -> dict[str, Any]:
|
|
62
|
+
...
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class UrlLibSlackHttpClient:
|
|
66
|
+
def post_json(self, url: str, data: dict[str, Any], headers: dict[str, str] | None = None) -> dict[str, Any]:
|
|
67
|
+
encoded = urllib.parse.urlencode(data).encode("utf-8")
|
|
68
|
+
request = urllib.request.Request(url, data=encoded, headers=headers or {}, method="POST")
|
|
69
|
+
with urllib.request.urlopen(request, timeout=30) as response:
|
|
70
|
+
return json.loads(response.read().decode("utf-8"))
|
|
71
|
+
|
|
72
|
+
def get_json(self, url: str, params: dict[str, Any], headers: dict[str, str] | None = None) -> dict[str, Any]:
|
|
73
|
+
query = urllib.parse.urlencode({key: value for key, value in params.items() if value is not None})
|
|
74
|
+
request_url = f"{url}?{query}" if query else url
|
|
75
|
+
request = urllib.request.Request(request_url, headers=headers or {}, method="GET")
|
|
76
|
+
with urllib.request.urlopen(request, timeout=30) as response:
|
|
77
|
+
return json.loads(response.read().decode("utf-8"))
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
class SlackTokenStore:
|
|
81
|
+
def __init__(self, path: Path | None = None, backend: str = "file"):
|
|
82
|
+
self.path = path or ame_home() / "tokens" / "slack.json"
|
|
83
|
+
self.vault = token_vault("slack", self.path, backend=backend) # type: ignore[arg-type]
|
|
84
|
+
|
|
85
|
+
def save(self, token: SlackToken) -> SlackToken:
|
|
86
|
+
data = self._read()
|
|
87
|
+
data[token.team_id] = token.model_dump(mode="json")
|
|
88
|
+
self.vault.save(data)
|
|
89
|
+
return token
|
|
90
|
+
|
|
91
|
+
def load(self, team_id: str) -> SlackToken:
|
|
92
|
+
data = self._read()
|
|
93
|
+
row = data.get(team_id)
|
|
94
|
+
if not isinstance(row, dict):
|
|
95
|
+
raise SlackOAuthError(f"Slack token not found for team_id={team_id}")
|
|
96
|
+
return SlackToken.model_validate(row)
|
|
97
|
+
|
|
98
|
+
def revoke(self, team_id: str) -> bool:
|
|
99
|
+
data = self._read()
|
|
100
|
+
existed = team_id in data
|
|
101
|
+
data.pop(team_id, None)
|
|
102
|
+
if data:
|
|
103
|
+
self.vault.save(data)
|
|
104
|
+
else:
|
|
105
|
+
self.vault.delete()
|
|
106
|
+
return existed
|
|
107
|
+
|
|
108
|
+
def _read(self) -> dict[str, Any]:
|
|
109
|
+
return self.vault.load()
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
class SlackSyncStateStore:
|
|
113
|
+
def __init__(self, corpus_root: Path):
|
|
114
|
+
self.path = corpus_root / "connectors" / "slack_oauth_state.json"
|
|
115
|
+
|
|
116
|
+
def load(self, team_id: str) -> SlackSyncState:
|
|
117
|
+
if not self.path.exists():
|
|
118
|
+
return SlackSyncState(team_id=team_id)
|
|
119
|
+
data = json.loads(self.path.read_text(encoding="utf-8"))
|
|
120
|
+
state = SlackSyncState.model_validate(data)
|
|
121
|
+
if state.team_id != team_id:
|
|
122
|
+
return SlackSyncState(team_id=team_id)
|
|
123
|
+
return state
|
|
124
|
+
|
|
125
|
+
def save(self, state: SlackSyncState) -> SlackSyncState:
|
|
126
|
+
state.updated_at = datetime.now(timezone.utc)
|
|
127
|
+
self.path.parent.mkdir(parents=True, exist_ok=True)
|
|
128
|
+
self.path.write_text(state.model_dump_json(indent=2), encoding="utf-8")
|
|
129
|
+
return state
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
class SlackOAuthClient:
|
|
133
|
+
def __init__(self, config: SlackOAuthConfig, http: SlackHttpClient | None = None):
|
|
134
|
+
self.config = config
|
|
135
|
+
self.http = http or UrlLibSlackHttpClient()
|
|
136
|
+
|
|
137
|
+
def authorization_url(self, state: str) -> str:
|
|
138
|
+
params = {
|
|
139
|
+
"client_id": self.config.client_id,
|
|
140
|
+
"redirect_uri": self.config.redirect_uri,
|
|
141
|
+
"scope": ",".join(self.config.scopes),
|
|
142
|
+
"state": state,
|
|
143
|
+
}
|
|
144
|
+
return f"{SLACK_AUTHORIZE_URL}?{urllib.parse.urlencode(params)}"
|
|
145
|
+
|
|
146
|
+
def exchange_code(self, code: str) -> SlackToken:
|
|
147
|
+
payload = self.http.post_json(
|
|
148
|
+
SLACK_OAUTH_ACCESS_URL,
|
|
149
|
+
{
|
|
150
|
+
"code": code,
|
|
151
|
+
"client_id": self.config.client_id,
|
|
152
|
+
"client_secret": self.config.client_secret,
|
|
153
|
+
"redirect_uri": self.config.redirect_uri,
|
|
154
|
+
},
|
|
155
|
+
)
|
|
156
|
+
if not payload.get("ok"):
|
|
157
|
+
raise SlackOAuthError(f"Slack OAuth exchange failed: {payload.get('error', 'unknown_error')}")
|
|
158
|
+
team = payload.get("team") or {}
|
|
159
|
+
authed_user = payload.get("authed_user") or {}
|
|
160
|
+
team_id = str(team.get("id") or payload.get("team_id") or "")
|
|
161
|
+
access_token = str(payload.get("access_token") or "")
|
|
162
|
+
if not team_id or not access_token:
|
|
163
|
+
raise SlackOAuthError("Slack OAuth response did not include team_id or access_token")
|
|
164
|
+
return SlackToken(
|
|
165
|
+
team_id=team_id,
|
|
166
|
+
team_name=team.get("name"),
|
|
167
|
+
access_token=access_token,
|
|
168
|
+
bot_user_id=payload.get("bot_user_id"),
|
|
169
|
+
authed_user_id=authed_user.get("id"),
|
|
170
|
+
scopes=_split_scopes(payload.get("scope")),
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
class SlackApiClient:
|
|
175
|
+
def __init__(self, token: SlackToken, http: SlackHttpClient | None = None):
|
|
176
|
+
self.token = token
|
|
177
|
+
self.http = http or UrlLibSlackHttpClient()
|
|
178
|
+
|
|
179
|
+
def conversations_list(self, *, types: str = "public_channel,private_channel", limit: int = 200) -> list[dict[str, Any]]:
|
|
180
|
+
rows: list[dict[str, Any]] = []
|
|
181
|
+
cursor: str | None = None
|
|
182
|
+
while True:
|
|
183
|
+
payload = self._get("conversations.list", {"types": types, "limit": limit, "cursor": cursor})
|
|
184
|
+
rows.extend(_dict_rows(payload.get("channels")))
|
|
185
|
+
cursor = _next_cursor(payload)
|
|
186
|
+
if not cursor:
|
|
187
|
+
return rows
|
|
188
|
+
|
|
189
|
+
def conversations_history(
|
|
190
|
+
self,
|
|
191
|
+
channel_id: str,
|
|
192
|
+
*,
|
|
193
|
+
oldest: str | None = None,
|
|
194
|
+
limit: int = 200,
|
|
195
|
+
) -> list[dict[str, Any]]:
|
|
196
|
+
rows: list[dict[str, Any]] = []
|
|
197
|
+
cursor: str | None = None
|
|
198
|
+
while True:
|
|
199
|
+
payload = self._get(
|
|
200
|
+
"conversations.history",
|
|
201
|
+
{"channel": channel_id, "oldest": oldest, "inclusive": False, "limit": limit, "cursor": cursor},
|
|
202
|
+
)
|
|
203
|
+
rows.extend(_dict_rows(payload.get("messages")))
|
|
204
|
+
cursor = _next_cursor(payload)
|
|
205
|
+
if not cursor:
|
|
206
|
+
return rows
|
|
207
|
+
|
|
208
|
+
def conversations_replies(self, channel_id: str, ts: str, *, limit: int = 200) -> list[dict[str, Any]]:
|
|
209
|
+
rows: list[dict[str, Any]] = []
|
|
210
|
+
cursor: str | None = None
|
|
211
|
+
while True:
|
|
212
|
+
payload = self._get(
|
|
213
|
+
"conversations.replies",
|
|
214
|
+
{"channel": channel_id, "ts": ts, "limit": limit, "cursor": cursor},
|
|
215
|
+
)
|
|
216
|
+
rows.extend(_dict_rows(payload.get("messages")))
|
|
217
|
+
cursor = _next_cursor(payload)
|
|
218
|
+
if not cursor:
|
|
219
|
+
return rows
|
|
220
|
+
|
|
221
|
+
def _get(self, method: str, params: dict[str, Any]) -> dict[str, Any]:
|
|
222
|
+
payload = self.http.get_json(
|
|
223
|
+
f"{SLACK_API_URL}/{method}",
|
|
224
|
+
params,
|
|
225
|
+
headers={"Authorization": f"Bearer {self.token.access_token}"},
|
|
226
|
+
)
|
|
227
|
+
if not payload.get("ok"):
|
|
228
|
+
raise SlackOAuthError(f"Slack API {method} failed: {payload.get('error', 'unknown_error')}")
|
|
229
|
+
return payload
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
class SlackOAuthSyncReport(BaseModel):
|
|
233
|
+
corpus_id: str
|
|
234
|
+
team_id: str
|
|
235
|
+
export_path: Path
|
|
236
|
+
channels: int
|
|
237
|
+
messages: int
|
|
238
|
+
ingested_documents: int = 0
|
|
239
|
+
sync_run_id: str | None = None
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
class SlackOAuthTransport:
|
|
243
|
+
def __init__(self, api: SlackApiClient):
|
|
244
|
+
self.api = api
|
|
245
|
+
|
|
246
|
+
def sync(
|
|
247
|
+
self,
|
|
248
|
+
corpus_id: str,
|
|
249
|
+
*,
|
|
250
|
+
channels: list[str] | None = None,
|
|
251
|
+
ingest: bool = True,
|
|
252
|
+
) -> SlackOAuthSyncReport:
|
|
253
|
+
started_at = datetime.now(timezone.utc)
|
|
254
|
+
corpus_root = ensure_corpus_layout(corpus_id)
|
|
255
|
+
sync_store = ConnectorSyncStore(corpus_root)
|
|
256
|
+
state_store = SlackSyncStateStore(corpus_root)
|
|
257
|
+
state = state_store.load(self.api.token.team_id)
|
|
258
|
+
selected = set(channels or [])
|
|
259
|
+
export_root = corpus_root / "imports" / "slack-oauth" / self.api.token.team_id
|
|
260
|
+
export_root.mkdir(parents=True, exist_ok=True)
|
|
261
|
+
|
|
262
|
+
channel_count = 0
|
|
263
|
+
message_count = 0
|
|
264
|
+
ingested = 0
|
|
265
|
+
try:
|
|
266
|
+
for channel in self.api.conversations_list():
|
|
267
|
+
channel_id = str(channel.get("id") or "")
|
|
268
|
+
channel_name = str(channel.get("name") or channel_id)
|
|
269
|
+
if not channel_id:
|
|
270
|
+
continue
|
|
271
|
+
if selected and channel_id not in selected and channel_name not in selected:
|
|
272
|
+
continue
|
|
273
|
+
rows, latest_ts = self._sync_channel(channel_id, channel_name, state)
|
|
274
|
+
if not rows:
|
|
275
|
+
continue
|
|
276
|
+
channel_count += 1
|
|
277
|
+
message_count += len(rows)
|
|
278
|
+
target = export_root / _safe_path_part(channel_name) / "oauth.json"
|
|
279
|
+
target.parent.mkdir(parents=True, exist_ok=True)
|
|
280
|
+
target.write_text(json.dumps(rows, ensure_ascii=False, indent=2), encoding="utf-8")
|
|
281
|
+
if latest_ts:
|
|
282
|
+
state.last_ts_by_channel[channel_id] = latest_ts
|
|
283
|
+
|
|
284
|
+
state_store.save(state)
|
|
285
|
+
if ingest and message_count:
|
|
286
|
+
report = MemoryPipeline().ingest(corpus_id, export_root, profile="slack-export")
|
|
287
|
+
ingested = report.documents
|
|
288
|
+
run = sync_store.record(
|
|
289
|
+
connector="slack-oauth",
|
|
290
|
+
status="success",
|
|
291
|
+
started_at=started_at,
|
|
292
|
+
source=self.api.token.team_id,
|
|
293
|
+
counts={"channels": channel_count, "messages": message_count, "ingested_documents": ingested},
|
|
294
|
+
metadata={"team_id": self.api.token.team_id, "channels_filter": ",".join(channels or [])},
|
|
295
|
+
)
|
|
296
|
+
return SlackOAuthSyncReport(
|
|
297
|
+
corpus_id=corpus_id,
|
|
298
|
+
team_id=self.api.token.team_id,
|
|
299
|
+
export_path=export_root,
|
|
300
|
+
channels=channel_count,
|
|
301
|
+
messages=message_count,
|
|
302
|
+
ingested_documents=ingested,
|
|
303
|
+
sync_run_id=run.id,
|
|
304
|
+
)
|
|
305
|
+
except Exception as exc:
|
|
306
|
+
sync_store.record(
|
|
307
|
+
connector="slack-oauth",
|
|
308
|
+
status="failed",
|
|
309
|
+
started_at=started_at,
|
|
310
|
+
source=self.api.token.team_id,
|
|
311
|
+
counts={"channels": channel_count, "messages": message_count, "ingested_documents": ingested},
|
|
312
|
+
metadata={"team_id": self.api.token.team_id, "channels_filter": ",".join(channels or [])},
|
|
313
|
+
error=str(exc),
|
|
314
|
+
)
|
|
315
|
+
raise
|
|
316
|
+
|
|
317
|
+
def _sync_channel(
|
|
318
|
+
self,
|
|
319
|
+
channel_id: str,
|
|
320
|
+
channel_name: str,
|
|
321
|
+
state: SlackSyncState,
|
|
322
|
+
) -> tuple[list[dict[str, Any]], str | None]:
|
|
323
|
+
oldest = state.last_ts_by_channel.get(channel_id)
|
|
324
|
+
rows: list[dict[str, Any]] = []
|
|
325
|
+
latest_ts: str | None = oldest
|
|
326
|
+
for message in self.api.conversations_history(channel_id, oldest=oldest):
|
|
327
|
+
row = self._message_row(message, channel_id, channel_name)
|
|
328
|
+
if row:
|
|
329
|
+
rows.append(row)
|
|
330
|
+
latest_ts = _max_ts(latest_ts, row["ts"])
|
|
331
|
+
if message.get("reply_count") or message.get("thread_ts"):
|
|
332
|
+
for reply in self.api.conversations_replies(channel_id, str(message.get("ts"))):
|
|
333
|
+
if str(reply.get("ts")) == str(message.get("ts")):
|
|
334
|
+
continue
|
|
335
|
+
reply_row = self._message_row(reply, channel_id, channel_name, parent_ts=str(message.get("ts")))
|
|
336
|
+
if reply_row:
|
|
337
|
+
rows.append(reply_row)
|
|
338
|
+
latest_ts = _max_ts(latest_ts, reply_row["ts"])
|
|
339
|
+
rows.sort(key=lambda row: _ts_as_float(row["ts"]))
|
|
340
|
+
return rows, latest_ts
|
|
341
|
+
|
|
342
|
+
def _message_row(
|
|
343
|
+
self,
|
|
344
|
+
message: dict[str, Any],
|
|
345
|
+
channel_id: str,
|
|
346
|
+
channel_name: str,
|
|
347
|
+
parent_ts: str | None = None,
|
|
348
|
+
) -> dict[str, Any] | None:
|
|
349
|
+
text = str(message.get("text") or "").strip()
|
|
350
|
+
ts = str(message.get("ts") or "")
|
|
351
|
+
if not text or not ts:
|
|
352
|
+
return None
|
|
353
|
+
return {
|
|
354
|
+
"ts": ts,
|
|
355
|
+
"user": message.get("user") or message.get("username") or message.get("bot_id") or "unknown",
|
|
356
|
+
"text": text,
|
|
357
|
+
"thread_ts": parent_ts or message.get("thread_ts"),
|
|
358
|
+
"channel_id": channel_id,
|
|
359
|
+
"channel_name": channel_name,
|
|
360
|
+
"source": "slack-oauth",
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
|
|
364
|
+
def load_slack_token(team_id: str, store_path: Path | None = None) -> SlackToken:
|
|
365
|
+
return SlackTokenStore(store_path).load(team_id)
|
|
366
|
+
|
|
367
|
+
|
|
368
|
+
def exchange_and_save_slack_token(
|
|
369
|
+
code: str,
|
|
370
|
+
config: SlackOAuthConfig,
|
|
371
|
+
*,
|
|
372
|
+
store_path: Path | None = None,
|
|
373
|
+
token_backend: str = "file",
|
|
374
|
+
http: SlackHttpClient | None = None,
|
|
375
|
+
) -> SlackToken:
|
|
376
|
+
token = SlackOAuthClient(config, http=http).exchange_code(code)
|
|
377
|
+
return SlackTokenStore(store_path, backend=token_backend).save(token)
|
|
378
|
+
|
|
379
|
+
|
|
380
|
+
def _split_scopes(value: Any) -> list[str]:
|
|
381
|
+
if isinstance(value, list):
|
|
382
|
+
return [str(item) for item in value if str(item).strip()]
|
|
383
|
+
if isinstance(value, str):
|
|
384
|
+
return [item.strip() for item in value.split(",") if item.strip()]
|
|
385
|
+
return []
|
|
386
|
+
|
|
387
|
+
|
|
388
|
+
def _dict_rows(value: Any) -> list[dict[str, Any]]:
|
|
389
|
+
if not isinstance(value, list):
|
|
390
|
+
return []
|
|
391
|
+
return [item for item in value if isinstance(item, dict)]
|
|
392
|
+
|
|
393
|
+
|
|
394
|
+
def _next_cursor(payload: dict[str, Any]) -> str | None:
|
|
395
|
+
metadata = payload.get("response_metadata")
|
|
396
|
+
if not isinstance(metadata, dict):
|
|
397
|
+
return None
|
|
398
|
+
cursor = metadata.get("next_cursor")
|
|
399
|
+
return str(cursor) if cursor else None
|
|
400
|
+
|
|
401
|
+
|
|
402
|
+
def _safe_path_part(value: str) -> str:
|
|
403
|
+
clean = re.sub(r"[^A-Za-z0-9._-]+", "-", value.strip()).strip("-")
|
|
404
|
+
return clean or "channel"
|
|
405
|
+
|
|
406
|
+
|
|
407
|
+
def _ts_as_float(value: str) -> float:
|
|
408
|
+
try:
|
|
409
|
+
return float(value)
|
|
410
|
+
except ValueError:
|
|
411
|
+
return 0.0
|
|
412
|
+
|
|
413
|
+
|
|
414
|
+
def _max_ts(left: str | None, right: str) -> str:
|
|
415
|
+
if left is None:
|
|
416
|
+
return right
|
|
417
|
+
return right if _ts_as_float(right) > _ts_as_float(left) else left
|