adaptive-memory-engine 0.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- adaptive_memory_engine-0.1.6.dist-info/METADATA +228 -0
- adaptive_memory_engine-0.1.6.dist-info/RECORD +72 -0
- adaptive_memory_engine-0.1.6.dist-info/WHEEL +4 -0
- adaptive_memory_engine-0.1.6.dist-info/entry_points.txt +3 -0
- adaptive_memory_engine-0.1.6.dist-info/licenses/LICENSE +21 -0
- ame/__init__.py +1 -0
- ame/agent/__init__.py +1 -0
- ame/agent/mcp.py +474 -0
- ame/agent/memory_api.py +141 -0
- ame/agent/results.py +30 -0
- ame/bronze/schema.py +17 -0
- ame/bronze/store.py +38 -0
- ame/cli/__init__.py +1 -0
- ame/cli/main.py +903 -0
- ame/connectors/base.py +30 -0
- ame/connectors/contract.py +199 -0
- ame/connectors/github.py +66 -0
- ame/connectors/google.py +464 -0
- ame/connectors/google_oauth.py +156 -0
- ame/connectors/jira.py +66 -0
- ame/connectors/json_helpers.py +43 -0
- ame/connectors/markdown.py +116 -0
- ame/connectors/notion.py +59 -0
- ame/connectors/oauth_callback.py +102 -0
- ame/connectors/oauth_provider.py +250 -0
- ame/connectors/obsidian.py +19 -0
- ame/connectors/router.py +155 -0
- ame/connectors/slack.py +66 -0
- ame/connectors/slack_oauth.py +417 -0
- ame/connectors/sync_history.py +73 -0
- ame/context_budget.py +106 -0
- ame/core/config.py +77 -0
- ame/core/corpus.py +17 -0
- ame/core/errors.py +18 -0
- ame/core/paths.py +111 -0
- ame/core/state.py +57 -0
- ame/export/obsidian.py +123 -0
- ame/gold/builder.py +300 -0
- ame/gold/ontology.py +80 -0
- ame/gold/resolver.py +91 -0
- ame/gold/schema.py +40 -0
- ame/gold/store.py +45 -0
- ame/hardware/profiler.py +85 -0
- ame/hardware/tier.py +27 -0
- ame/hermes/__init__.py +3 -0
- ame/hermes/memory.py +209 -0
- ame/models/download.py +243 -0
- ame/models/ollama.py +60 -0
- ame/models/registry.py +101 -0
- ame/models/router.py +22 -0
- ame/pipeline.py +155 -0
- ame/query/diff.py +40 -0
- ame/query/engine.py +919 -0
- ame/query/memory_os.py +313 -0
- ame/query/mql.py +84 -0
- ame/query/multihop.py +264 -0
- ame/query/result.py +20 -0
- ame/sdk.py +52 -0
- ame/security.py +145 -0
- ame/silver/extractor.py +414 -0
- ame/silver/llm_extractor.py +181 -0
- ame/silver/prompts.py +56 -0
- ame/silver/rationale.py +140 -0
- ame/silver/schema.py +51 -0
- ame/silver/store.py +59 -0
- ame/storage/custom_kg.py +33 -0
- ame/storage/lightrag_adapter.py +362 -0
- ame/validation/confidence.py +5 -0
- ame/validation/grounding.py +10 -0
- ame/validation/type_gate.py +22 -0
- ame/writeback.py +173 -0
- memory/__init__.py +3 -0
ame/connectors/google.py
ADDED
|
@@ -0,0 +1,464 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import hashlib
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from ame.bronze.schema import BronzeDocument
|
|
8
|
+
from ame.connectors.base import SourceRef
|
|
9
|
+
from ame.connectors.json_helpers import first_present, read_json
|
|
10
|
+
from ame.security import PiiRedactionMode, redact_pii
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class GoogleDriveConnector:
|
|
14
|
+
source_type = "google_drive"
|
|
15
|
+
profile_name = "google-drive-json"
|
|
16
|
+
|
|
17
|
+
def __init__(self, pii_redaction: PiiRedactionMode = "off"):
|
|
18
|
+
self.pii_redaction = pii_redaction
|
|
19
|
+
|
|
20
|
+
def scan(self, path: Path) -> list[SourceRef]:
|
|
21
|
+
refs: list[SourceRef] = []
|
|
22
|
+
for file in _json_files(path):
|
|
23
|
+
for index, row in enumerate(_rows(read_json(file), "files", "documents", "items")):
|
|
24
|
+
if not isinstance(row, dict):
|
|
25
|
+
continue
|
|
26
|
+
doc_id = _text(first_present(row, "id", "file_id", "document_id") or f"{file.stem}-{index}")
|
|
27
|
+
title = _text(first_present(row, "name", "title") or doc_id)
|
|
28
|
+
body = _text(first_present(row, "text", "body", "content", "description"))
|
|
29
|
+
url = _text(first_present(row, "webViewLink", "web_url", "url", "alternateLink"))
|
|
30
|
+
modified_at = _text(first_present(row, "modifiedTime", "modified_at", "updated_at"))
|
|
31
|
+
folder = _text(first_present(row, "folder", "folder_path", "path", "parents"))
|
|
32
|
+
owners = _people(first_present(row, "owners", "owner", "lastModifyingUser"))
|
|
33
|
+
metadata = {
|
|
34
|
+
"title": title,
|
|
35
|
+
"connector": self.profile_name,
|
|
36
|
+
"google_service": "drive",
|
|
37
|
+
"document_id": doc_id,
|
|
38
|
+
"folder": folder,
|
|
39
|
+
"original_url": url,
|
|
40
|
+
"modified_at": modified_at,
|
|
41
|
+
"occurred_at": modified_at,
|
|
42
|
+
"owners": owners,
|
|
43
|
+
"memory_type": "Document",
|
|
44
|
+
"privacy_level": "private",
|
|
45
|
+
}
|
|
46
|
+
content = _frontmatter(
|
|
47
|
+
title,
|
|
48
|
+
{
|
|
49
|
+
"google_service": "drive",
|
|
50
|
+
"document_id": doc_id,
|
|
51
|
+
"folder": folder,
|
|
52
|
+
"modified_at": modified_at,
|
|
53
|
+
"occurred_at": modified_at,
|
|
54
|
+
"original_url": url,
|
|
55
|
+
},
|
|
56
|
+
["# " + title, "", f"Folder: {folder}", "", body],
|
|
57
|
+
)
|
|
58
|
+
refs.append(
|
|
59
|
+
SourceRef(
|
|
60
|
+
path=file,
|
|
61
|
+
source_id=f"google_drive:{doc_id}",
|
|
62
|
+
content=_safe_content(content, self.pii_redaction),
|
|
63
|
+
metadata=_safe_metadata(metadata, self.pii_redaction),
|
|
64
|
+
)
|
|
65
|
+
)
|
|
66
|
+
return refs
|
|
67
|
+
|
|
68
|
+
def load(self, corpus_id: str, ref: SourceRef) -> BronzeDocument:
|
|
69
|
+
return _bronze(corpus_id, self.source_type, ref, self.profile_name)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
class GmailConnector:
|
|
73
|
+
source_type = "gmail"
|
|
74
|
+
profile_name = "gmail-json"
|
|
75
|
+
|
|
76
|
+
def __init__(self, pii_redaction: PiiRedactionMode = "off"):
|
|
77
|
+
self.pii_redaction = pii_redaction
|
|
78
|
+
|
|
79
|
+
def scan(self, path: Path) -> list[SourceRef]:
|
|
80
|
+
refs: list[SourceRef] = []
|
|
81
|
+
for file in _json_files(path):
|
|
82
|
+
for index, row in enumerate(_rows(read_json(file), "threads", "messages", "items")):
|
|
83
|
+
if not isinstance(row, dict):
|
|
84
|
+
continue
|
|
85
|
+
messages = _messages(row)
|
|
86
|
+
thread_id = _text(first_present(row, "threadId", "thread_id", "id") or f"{file.stem}-{index}")
|
|
87
|
+
subject = _text(first_present(row, "subject", "title") or _first_message_value(messages, "subject") or thread_id)
|
|
88
|
+
url = _text(first_present(row, "url", "web_url") or f"https://mail.google.com/mail/u/0/#all/{thread_id}")
|
|
89
|
+
participants = sorted(set(_message_people(messages)))
|
|
90
|
+
message_ids = _message_ids(messages)
|
|
91
|
+
labels = _message_labels(row, messages)
|
|
92
|
+
occurred_at = _text(
|
|
93
|
+
first_present(row, "date", "internalDate", "created_at", "updated_at")
|
|
94
|
+
or _first_message_value(messages, "date")
|
|
95
|
+
or _first_message_value(messages, "internalDate")
|
|
96
|
+
)
|
|
97
|
+
metadata = {
|
|
98
|
+
"title": subject,
|
|
99
|
+
"connector": self.profile_name,
|
|
100
|
+
"google_service": "gmail",
|
|
101
|
+
"thread_id": thread_id,
|
|
102
|
+
"message_ids": message_ids,
|
|
103
|
+
"labels": labels,
|
|
104
|
+
"original_url": url,
|
|
105
|
+
"occurred_at": occurred_at,
|
|
106
|
+
"participants": participants,
|
|
107
|
+
"memory_type": "Email",
|
|
108
|
+
"privacy_level": "private",
|
|
109
|
+
}
|
|
110
|
+
content_lines = ["# " + subject, "", f"Thread: {thread_id}", ""]
|
|
111
|
+
for message in messages:
|
|
112
|
+
message_id = _text(first_present(message, "id", "message_id", "messageId"))
|
|
113
|
+
sender = _text(first_present(message, "from", "sender", "author"))
|
|
114
|
+
to = _text(first_present(message, "to", "recipients"))
|
|
115
|
+
cc = _text(first_present(message, "cc"))
|
|
116
|
+
date = _text(first_present(message, "date", "internalDate", "created_at"))
|
|
117
|
+
message_labels = ", ".join(_message_labels(message, []))
|
|
118
|
+
body = _text(first_present(message, "body", "text", "content", "snippet"))
|
|
119
|
+
content_lines.extend(
|
|
120
|
+
[
|
|
121
|
+
f"Message ID: {message_id}",
|
|
122
|
+
f"From: {sender}",
|
|
123
|
+
f"To: {to}",
|
|
124
|
+
f"Cc: {cc}",
|
|
125
|
+
f"Date: {date}",
|
|
126
|
+
f"Labels: {message_labels}",
|
|
127
|
+
"",
|
|
128
|
+
body,
|
|
129
|
+
"",
|
|
130
|
+
]
|
|
131
|
+
)
|
|
132
|
+
content = _frontmatter(
|
|
133
|
+
subject,
|
|
134
|
+
{
|
|
135
|
+
"google_service": "gmail",
|
|
136
|
+
"thread_id": thread_id,
|
|
137
|
+
"message_ids": ", ".join(message_ids),
|
|
138
|
+
"labels": ", ".join(labels),
|
|
139
|
+
"occurred_at": occurred_at,
|
|
140
|
+
"original_url": url,
|
|
141
|
+
},
|
|
142
|
+
content_lines,
|
|
143
|
+
)
|
|
144
|
+
refs.append(
|
|
145
|
+
SourceRef(
|
|
146
|
+
path=file,
|
|
147
|
+
source_id=f"gmail:{thread_id}",
|
|
148
|
+
content=_safe_content(content, self.pii_redaction),
|
|
149
|
+
metadata=_safe_metadata(metadata, self.pii_redaction),
|
|
150
|
+
)
|
|
151
|
+
)
|
|
152
|
+
return refs
|
|
153
|
+
|
|
154
|
+
def load(self, corpus_id: str, ref: SourceRef) -> BronzeDocument:
|
|
155
|
+
return _bronze(corpus_id, self.source_type, ref, self.profile_name)
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
class GoogleCalendarConnector:
|
|
159
|
+
source_type = "google_calendar"
|
|
160
|
+
profile_name = "google-calendar-json"
|
|
161
|
+
|
|
162
|
+
def __init__(self, pii_redaction: PiiRedactionMode = "off"):
|
|
163
|
+
self.pii_redaction = pii_redaction
|
|
164
|
+
|
|
165
|
+
def scan(self, path: Path) -> list[SourceRef]:
|
|
166
|
+
refs: list[SourceRef] = []
|
|
167
|
+
for file in _json_files(path):
|
|
168
|
+
for index, row in enumerate(_rows(read_json(file), "events", "items")):
|
|
169
|
+
if not isinstance(row, dict):
|
|
170
|
+
continue
|
|
171
|
+
event_id = _text(first_present(row, "id", "event_id", "iCalUID") or f"{file.stem}-{index}")
|
|
172
|
+
calendar_id = _text(first_present(row, "calendar_id", "calendarId") or "primary")
|
|
173
|
+
title = _text(first_present(row, "summary", "title", "name") or event_id)
|
|
174
|
+
start = _date_value(first_present(row, "start", "start_time", "starts_at"))
|
|
175
|
+
end = _date_value(first_present(row, "end", "end_time", "ends_at"))
|
|
176
|
+
attendees = _people(first_present(row, "attendees", "participants"))
|
|
177
|
+
url = _text(first_present(row, "htmlLink", "url", "web_url"))
|
|
178
|
+
location = _text(first_present(row, "location", "meeting_location"))
|
|
179
|
+
description = _text(first_present(row, "description", "body", "content"))
|
|
180
|
+
metadata = {
|
|
181
|
+
"title": title,
|
|
182
|
+
"connector": self.profile_name,
|
|
183
|
+
"google_service": "calendar",
|
|
184
|
+
"calendar_id": calendar_id,
|
|
185
|
+
"event_id": event_id,
|
|
186
|
+
"original_url": url,
|
|
187
|
+
"start": start,
|
|
188
|
+
"end": end,
|
|
189
|
+
"location": location,
|
|
190
|
+
"occurred_at": start,
|
|
191
|
+
"participants": attendees,
|
|
192
|
+
"memory_type": "Meeting",
|
|
193
|
+
"privacy_level": "private",
|
|
194
|
+
}
|
|
195
|
+
content = _frontmatter(
|
|
196
|
+
title,
|
|
197
|
+
{
|
|
198
|
+
"google_service": "calendar",
|
|
199
|
+
"calendar_id": calendar_id,
|
|
200
|
+
"event_id": event_id,
|
|
201
|
+
"start": start,
|
|
202
|
+
"end": end,
|
|
203
|
+
"location": location,
|
|
204
|
+
"occurred_at": start,
|
|
205
|
+
"original_url": url,
|
|
206
|
+
},
|
|
207
|
+
[
|
|
208
|
+
"# " + title,
|
|
209
|
+
"",
|
|
210
|
+
f"Start: {start}",
|
|
211
|
+
f"End: {end}",
|
|
212
|
+
f"Location: {location}",
|
|
213
|
+
f"Participants: {', '.join(attendees)}",
|
|
214
|
+
"",
|
|
215
|
+
description,
|
|
216
|
+
],
|
|
217
|
+
)
|
|
218
|
+
refs.append(
|
|
219
|
+
SourceRef(
|
|
220
|
+
path=file,
|
|
221
|
+
source_id=f"google_calendar:{calendar_id}:{event_id}",
|
|
222
|
+
content=_safe_content(content, self.pii_redaction),
|
|
223
|
+
metadata=_safe_metadata(metadata, self.pii_redaction),
|
|
224
|
+
)
|
|
225
|
+
)
|
|
226
|
+
return refs
|
|
227
|
+
|
|
228
|
+
def load(self, corpus_id: str, ref: SourceRef) -> BronzeDocument:
|
|
229
|
+
return _bronze(corpus_id, self.source_type, ref, self.profile_name)
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
class GoogleSheetsConnector:
|
|
233
|
+
source_type = "google_sheets"
|
|
234
|
+
profile_name = "google-sheets-json"
|
|
235
|
+
|
|
236
|
+
def __init__(self, pii_redaction: PiiRedactionMode = "off"):
|
|
237
|
+
self.pii_redaction = pii_redaction
|
|
238
|
+
|
|
239
|
+
def scan(self, path: Path) -> list[SourceRef]:
|
|
240
|
+
refs: list[SourceRef] = []
|
|
241
|
+
for file in _json_files(path):
|
|
242
|
+
data = read_json(file)
|
|
243
|
+
parent = data if isinstance(data, dict) else {}
|
|
244
|
+
for index, row in enumerate(_sheet_rows(data)):
|
|
245
|
+
spreadsheet_id = _text(
|
|
246
|
+
first_present(row, "spreadsheet_id", "spreadsheetId")
|
|
247
|
+
or first_present(parent, "spreadsheet_id", "spreadsheetId", "id")
|
|
248
|
+
or file.stem
|
|
249
|
+
)
|
|
250
|
+
sheet_name = _text(first_present(row, "sheet_name", "sheetName") or first_present(parent, "sheet_name", "sheetName") or "Sheet1")
|
|
251
|
+
row_index = _text(first_present(row, "row_index", "rowIndex") or index + 1)
|
|
252
|
+
title = _text(first_present(row, "title", "summary") or f"{sheet_name} row {row_index}")
|
|
253
|
+
values = _sheet_values(row)
|
|
254
|
+
url = _text(first_present(row, "url", "web_url") or first_present(parent, "url", "web_url"))
|
|
255
|
+
modified_at = _text(first_present(row, "modified_at", "updated_at") or first_present(parent, "modified_at", "updated_at"))
|
|
256
|
+
occurred_at = modified_at
|
|
257
|
+
metadata = {
|
|
258
|
+
"title": title,
|
|
259
|
+
"connector": self.profile_name,
|
|
260
|
+
"google_service": "sheets",
|
|
261
|
+
"spreadsheet_id": spreadsheet_id,
|
|
262
|
+
"sheet_name": sheet_name,
|
|
263
|
+
"row_index": row_index,
|
|
264
|
+
"original_url": url,
|
|
265
|
+
"modified_at": modified_at,
|
|
266
|
+
"occurred_at": occurred_at,
|
|
267
|
+
"memory_type": "Document",
|
|
268
|
+
"privacy_level": "private",
|
|
269
|
+
}
|
|
270
|
+
content = _frontmatter(
|
|
271
|
+
title,
|
|
272
|
+
{
|
|
273
|
+
"google_service": "sheets",
|
|
274
|
+
"spreadsheet_id": spreadsheet_id,
|
|
275
|
+
"sheet_name": sheet_name,
|
|
276
|
+
"row_index": row_index,
|
|
277
|
+
"occurred_at": occurred_at,
|
|
278
|
+
"original_url": url,
|
|
279
|
+
},
|
|
280
|
+
["# " + title, "", values],
|
|
281
|
+
)
|
|
282
|
+
refs.append(
|
|
283
|
+
SourceRef(
|
|
284
|
+
path=file,
|
|
285
|
+
source_id=f"google_sheets:{spreadsheet_id}:{sheet_name}:{row_index}",
|
|
286
|
+
content=_safe_content(content, self.pii_redaction),
|
|
287
|
+
metadata=_safe_metadata(metadata, self.pii_redaction),
|
|
288
|
+
)
|
|
289
|
+
)
|
|
290
|
+
return refs
|
|
291
|
+
|
|
292
|
+
def load(self, corpus_id: str, ref: SourceRef) -> BronzeDocument:
|
|
293
|
+
return _bronze(corpus_id, self.source_type, ref, self.profile_name)
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
def _json_files(path: Path) -> list[Path]:
|
|
297
|
+
root = path.expanduser().resolve()
|
|
298
|
+
return [root] if root.is_file() else sorted(root.rglob("*.json"))
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
def _rows(data: Any, *keys: str) -> list[Any]:
|
|
302
|
+
if isinstance(data, list):
|
|
303
|
+
return data
|
|
304
|
+
if not isinstance(data, dict):
|
|
305
|
+
return []
|
|
306
|
+
for key in keys:
|
|
307
|
+
rows = data.get(key)
|
|
308
|
+
if isinstance(rows, list):
|
|
309
|
+
return rows
|
|
310
|
+
return [data]
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
def _messages(row: dict[str, Any]) -> list[dict[str, Any]]:
|
|
314
|
+
messages = row.get("messages")
|
|
315
|
+
if isinstance(messages, list):
|
|
316
|
+
return [message for message in messages if isinstance(message, dict)]
|
|
317
|
+
return [row]
|
|
318
|
+
|
|
319
|
+
|
|
320
|
+
def _first_message_value(messages: list[dict[str, Any]], key: str) -> Any:
|
|
321
|
+
for message in messages:
|
|
322
|
+
value = message.get(key)
|
|
323
|
+
if value:
|
|
324
|
+
return value
|
|
325
|
+
return None
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
def _message_people(messages: list[dict[str, Any]]) -> list[str]:
|
|
329
|
+
people: list[str] = []
|
|
330
|
+
for message in messages:
|
|
331
|
+
for key in ["from", "sender", "author", "to", "cc", "bcc", "recipients"]:
|
|
332
|
+
people.extend(_people(message.get(key)))
|
|
333
|
+
return people
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
def _message_ids(messages: list[dict[str, Any]]) -> list[str]:
|
|
337
|
+
ids: list[str] = []
|
|
338
|
+
for message in messages:
|
|
339
|
+
message_id = _text(first_present(message, "id", "message_id", "messageId"))
|
|
340
|
+
if message_id:
|
|
341
|
+
ids.append(message_id)
|
|
342
|
+
return _unique(ids)
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
def _message_labels(row: dict[str, Any], messages: list[dict[str, Any]]) -> list[str]:
|
|
346
|
+
labels: list[str] = []
|
|
347
|
+
for key in ["labelIds", "label_ids", "labels"]:
|
|
348
|
+
labels.extend(_people(row.get(key)))
|
|
349
|
+
for message in messages:
|
|
350
|
+
for key in ["labelIds", "label_ids", "labels"]:
|
|
351
|
+
labels.extend(_people(message.get(key)))
|
|
352
|
+
return _unique(labels)
|
|
353
|
+
|
|
354
|
+
|
|
355
|
+
def _unique(values: list[str]) -> list[str]:
|
|
356
|
+
seen: set[str] = set()
|
|
357
|
+
unique_values: list[str] = []
|
|
358
|
+
for value in values:
|
|
359
|
+
if value and value not in seen:
|
|
360
|
+
seen.add(value)
|
|
361
|
+
unique_values.append(value)
|
|
362
|
+
return unique_values
|
|
363
|
+
|
|
364
|
+
|
|
365
|
+
def _sheet_rows(data: Any) -> list[dict[str, Any]]:
|
|
366
|
+
if isinstance(data, list):
|
|
367
|
+
return [_sheet_row(row, index) for index, row in enumerate(data)]
|
|
368
|
+
if not isinstance(data, dict):
|
|
369
|
+
return []
|
|
370
|
+
rows = data.get("rows")
|
|
371
|
+
if isinstance(rows, list):
|
|
372
|
+
return [_sheet_row(row, index) for index, row in enumerate(rows)]
|
|
373
|
+
values = data.get("values")
|
|
374
|
+
if isinstance(values, list):
|
|
375
|
+
return [_sheet_row(row, index) for index, row in enumerate(values)]
|
|
376
|
+
return [_sheet_row(data, 0)]
|
|
377
|
+
|
|
378
|
+
|
|
379
|
+
def _sheet_row(row: Any, index: int) -> dict[str, Any]:
|
|
380
|
+
if isinstance(row, dict):
|
|
381
|
+
row.setdefault("row_index", index + 1)
|
|
382
|
+
return row
|
|
383
|
+
return {"row_index": index + 1, "values": row}
|
|
384
|
+
|
|
385
|
+
|
|
386
|
+
def _sheet_values(row: dict[str, Any]) -> str:
|
|
387
|
+
values = first_present(row, "values", "cells", "data")
|
|
388
|
+
if isinstance(values, dict):
|
|
389
|
+
return "\n".join(f"{key}: {_text(value)}" for key, value in values.items())
|
|
390
|
+
if isinstance(values, list):
|
|
391
|
+
return "\n".join(f"- {_text(value)}" for value in values)
|
|
392
|
+
return _text(first_present(row, "body", "content", "text", "description") or row)
|
|
393
|
+
|
|
394
|
+
|
|
395
|
+
def _frontmatter(title: str, values: dict[str, str], body_lines: list[str]) -> str:
|
|
396
|
+
frontmatter = ["---", f"title: {title}"]
|
|
397
|
+
for key, value in values.items():
|
|
398
|
+
frontmatter.append(f"{key}: {value}")
|
|
399
|
+
frontmatter.extend(["---", ""])
|
|
400
|
+
return "\n".join(frontmatter + body_lines + [""])
|
|
401
|
+
|
|
402
|
+
|
|
403
|
+
def _bronze(corpus_id: str, source_type: str, ref: SourceRef, connector: str) -> BronzeDocument:
|
|
404
|
+
content = ref.content or ref.path.read_text(encoding="utf-8")
|
|
405
|
+
digest = hashlib.sha256(content.encode("utf-8")).hexdigest()
|
|
406
|
+
metadata = {"path": str(ref.path), "connector": connector}
|
|
407
|
+
metadata.update(ref.metadata)
|
|
408
|
+
return BronzeDocument(
|
|
409
|
+
id=f"bronze_{digest[:16]}",
|
|
410
|
+
corpus_id=corpus_id,
|
|
411
|
+
source_type=source_type,
|
|
412
|
+
source_id=ref.source_id,
|
|
413
|
+
content=content,
|
|
414
|
+
metadata=metadata,
|
|
415
|
+
content_hash=f"sha256:{digest}",
|
|
416
|
+
)
|
|
417
|
+
|
|
418
|
+
|
|
419
|
+
def _text(value: Any) -> str:
|
|
420
|
+
if value is None:
|
|
421
|
+
return ""
|
|
422
|
+
if isinstance(value, dict):
|
|
423
|
+
for key in ["displayName", "emailAddress", "email", "name", "dateTime", "date", "value"]:
|
|
424
|
+
if value.get(key) is not None:
|
|
425
|
+
return _text(value[key])
|
|
426
|
+
return " ".join(f"{key}={_text(item)}" for key, item in value.items())
|
|
427
|
+
if isinstance(value, list):
|
|
428
|
+
return ", ".join(_text(item) for item in value if _text(item))
|
|
429
|
+
return str(value)
|
|
430
|
+
|
|
431
|
+
|
|
432
|
+
def _date_value(value: Any) -> str:
|
|
433
|
+
if isinstance(value, dict):
|
|
434
|
+
return _text(first_present(value, "dateTime", "date"))
|
|
435
|
+
return _text(value)
|
|
436
|
+
|
|
437
|
+
|
|
438
|
+
def _people(value: Any) -> list[str]:
|
|
439
|
+
if value is None:
|
|
440
|
+
return []
|
|
441
|
+
if isinstance(value, list):
|
|
442
|
+
people: list[str] = []
|
|
443
|
+
for item in value:
|
|
444
|
+
people.extend(_people(item))
|
|
445
|
+
return people
|
|
446
|
+
if isinstance(value, dict):
|
|
447
|
+
if "emailAddress" in value and isinstance(value["emailAddress"], dict):
|
|
448
|
+
return _people(value["emailAddress"])
|
|
449
|
+
text = _text(first_present(value, "email", "address", "displayName", "name"))
|
|
450
|
+
return [text] if text else []
|
|
451
|
+
text = _text(value)
|
|
452
|
+
return [part.strip() for part in text.replace(";", ",").split(",") if part.strip()]
|
|
453
|
+
|
|
454
|
+
|
|
455
|
+
def _safe_metadata(metadata: dict[str, Any], mode: PiiRedactionMode) -> dict[str, Any]:
|
|
456
|
+
payload = dict(metadata)
|
|
457
|
+
payload["pii_redaction"] = mode
|
|
458
|
+
if mode in {"metadata", "content"}:
|
|
459
|
+
return redact_pii(payload)
|
|
460
|
+
return payload
|
|
461
|
+
|
|
462
|
+
|
|
463
|
+
def _safe_content(content: str, mode: PiiRedactionMode) -> str:
|
|
464
|
+
return redact_pii(content) if mode == "content" else content
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import urllib.parse
|
|
5
|
+
import urllib.request
|
|
6
|
+
from datetime import datetime, timezone
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Any, Protocol
|
|
9
|
+
|
|
10
|
+
from pydantic import BaseModel, Field
|
|
11
|
+
|
|
12
|
+
from ame.core.paths import ame_home
|
|
13
|
+
from ame.security import token_vault
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
GOOGLE_AUTHORIZE_URL = "https://accounts.google.com/o/oauth2/v2/auth"
|
|
17
|
+
GOOGLE_TOKEN_URL = "https://oauth2.googleapis.com/token"
|
|
18
|
+
DEFAULT_GOOGLE_SCOPES = [
|
|
19
|
+
"https://www.googleapis.com/auth/drive.readonly",
|
|
20
|
+
"https://www.googleapis.com/auth/gmail.readonly",
|
|
21
|
+
"https://www.googleapis.com/auth/calendar.readonly",
|
|
22
|
+
"https://www.googleapis.com/auth/spreadsheets.readonly",
|
|
23
|
+
]
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class GoogleOAuthError(RuntimeError):
|
|
27
|
+
pass
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class GoogleOAuthConfig(BaseModel):
|
|
31
|
+
client_id: str = ""
|
|
32
|
+
client_secret: str = ""
|
|
33
|
+
redirect_uri: str = "http://localhost:8765/google/oauth/callback"
|
|
34
|
+
scopes: list[str] = Field(default_factory=lambda: list(DEFAULT_GOOGLE_SCOPES))
|
|
35
|
+
access_type: str = "offline"
|
|
36
|
+
prompt: str = "consent"
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class GoogleToken(BaseModel):
|
|
40
|
+
account_id: str
|
|
41
|
+
access_token: str
|
|
42
|
+
refresh_token: str | None = None
|
|
43
|
+
token_type: str = "Bearer"
|
|
44
|
+
expires_in: int | None = None
|
|
45
|
+
scopes: list[str] = Field(default_factory=list)
|
|
46
|
+
created_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class GoogleHttpClient(Protocol):
|
|
50
|
+
def post_json(self, url: str, data: dict[str, Any], headers: dict[str, str] | None = None) -> dict[str, Any]:
|
|
51
|
+
...
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class UrlLibGoogleHttpClient:
|
|
55
|
+
def post_json(self, url: str, data: dict[str, Any], headers: dict[str, str] | None = None) -> dict[str, Any]:
|
|
56
|
+
encoded = urllib.parse.urlencode(data).encode("utf-8")
|
|
57
|
+
request = urllib.request.Request(url, data=encoded, headers=headers or {}, method="POST")
|
|
58
|
+
with urllib.request.urlopen(request, timeout=30) as response:
|
|
59
|
+
return json.loads(response.read().decode("utf-8"))
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class GoogleTokenStore:
|
|
63
|
+
def __init__(self, path: Path | None = None, backend: str = "file"):
|
|
64
|
+
self.path = path or ame_home() / "tokens" / "google.json"
|
|
65
|
+
self.vault = token_vault("google", self.path, backend=backend) # type: ignore[arg-type]
|
|
66
|
+
|
|
67
|
+
def save(self, token: GoogleToken) -> GoogleToken:
|
|
68
|
+
data = self._read()
|
|
69
|
+
data[token.account_id] = token.model_dump(mode="json")
|
|
70
|
+
self.vault.save(data)
|
|
71
|
+
return token
|
|
72
|
+
|
|
73
|
+
def load(self, account_id: str) -> GoogleToken:
|
|
74
|
+
data = self._read()
|
|
75
|
+
row = data.get(account_id)
|
|
76
|
+
if not isinstance(row, dict):
|
|
77
|
+
raise GoogleOAuthError(f"Google token not found for account_id={account_id}")
|
|
78
|
+
return GoogleToken.model_validate(row)
|
|
79
|
+
|
|
80
|
+
def revoke(self, account_id: str) -> bool:
|
|
81
|
+
data = self._read()
|
|
82
|
+
existed = account_id in data
|
|
83
|
+
data.pop(account_id, None)
|
|
84
|
+
if data:
|
|
85
|
+
self.vault.save(data)
|
|
86
|
+
else:
|
|
87
|
+
self.vault.delete()
|
|
88
|
+
return existed
|
|
89
|
+
|
|
90
|
+
def _read(self) -> dict[str, Any]:
|
|
91
|
+
return self.vault.load()
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
class GoogleOAuthClient:
|
|
95
|
+
def __init__(self, config: GoogleOAuthConfig, http: GoogleHttpClient | None = None):
|
|
96
|
+
self.config = config
|
|
97
|
+
self.http = http or UrlLibGoogleHttpClient()
|
|
98
|
+
|
|
99
|
+
def authorization_url(self, state: str) -> str:
|
|
100
|
+
params = {
|
|
101
|
+
"client_id": self.config.client_id,
|
|
102
|
+
"redirect_uri": self.config.redirect_uri,
|
|
103
|
+
"response_type": "code",
|
|
104
|
+
"scope": " ".join(self.config.scopes),
|
|
105
|
+
"state": state,
|
|
106
|
+
"access_type": self.config.access_type,
|
|
107
|
+
"prompt": self.config.prompt,
|
|
108
|
+
"include_granted_scopes": "true",
|
|
109
|
+
}
|
|
110
|
+
return f"{GOOGLE_AUTHORIZE_URL}?{urllib.parse.urlencode(params)}"
|
|
111
|
+
|
|
112
|
+
def exchange_code(self, code: str, account_id: str = "default") -> GoogleToken:
|
|
113
|
+
payload = self.http.post_json(
|
|
114
|
+
GOOGLE_TOKEN_URL,
|
|
115
|
+
{
|
|
116
|
+
"code": code,
|
|
117
|
+
"client_id": self.config.client_id,
|
|
118
|
+
"client_secret": self.config.client_secret,
|
|
119
|
+
"redirect_uri": self.config.redirect_uri,
|
|
120
|
+
"grant_type": "authorization_code",
|
|
121
|
+
},
|
|
122
|
+
)
|
|
123
|
+
if error := payload.get("error"):
|
|
124
|
+
raise GoogleOAuthError(f"Google OAuth exchange failed: {error}")
|
|
125
|
+
access_token = str(payload.get("access_token") or "")
|
|
126
|
+
if not access_token:
|
|
127
|
+
raise GoogleOAuthError("Google OAuth response did not include access_token")
|
|
128
|
+
return GoogleToken(
|
|
129
|
+
account_id=account_id,
|
|
130
|
+
access_token=access_token,
|
|
131
|
+
refresh_token=payload.get("refresh_token"),
|
|
132
|
+
token_type=str(payload.get("token_type") or "Bearer"),
|
|
133
|
+
expires_in=payload.get("expires_in"),
|
|
134
|
+
scopes=_split_scopes(payload.get("scope")) or list(self.config.scopes),
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def exchange_and_save_google_token(
|
|
139
|
+
code: str,
|
|
140
|
+
config: GoogleOAuthConfig,
|
|
141
|
+
*,
|
|
142
|
+
account_id: str = "default",
|
|
143
|
+
store_path: Path | None = None,
|
|
144
|
+
token_backend: str = "file",
|
|
145
|
+
http: GoogleHttpClient | None = None,
|
|
146
|
+
) -> GoogleToken:
|
|
147
|
+
token = GoogleOAuthClient(config, http=http).exchange_code(code, account_id=account_id)
|
|
148
|
+
return GoogleTokenStore(store_path, backend=token_backend).save(token)
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def _split_scopes(value: Any) -> list[str]:
|
|
152
|
+
if not value:
|
|
153
|
+
return []
|
|
154
|
+
if isinstance(value, list):
|
|
155
|
+
return [str(item) for item in value]
|
|
156
|
+
return [item for item in str(value).replace(",", " ").split() if item]
|
ame/connectors/jira.py
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import hashlib
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from ame.bronze.schema import BronzeDocument
|
|
8
|
+
from ame.connectors.base import SourceRef
|
|
9
|
+
from ame.connectors.json_helpers import as_list, first_present, read_json
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class JiraConnector:
|
|
13
|
+
source_type = "jira"
|
|
14
|
+
|
|
15
|
+
def scan(self, path: Path) -> list[SourceRef]:
|
|
16
|
+
root = path.expanduser().resolve()
|
|
17
|
+
files = [root] if root.is_file() else sorted(root.rglob("*.json"))
|
|
18
|
+
refs: list[SourceRef] = []
|
|
19
|
+
for file in files:
|
|
20
|
+
for row in as_list(read_json(file)):
|
|
21
|
+
if not isinstance(row, dict):
|
|
22
|
+
continue
|
|
23
|
+
key = str(first_present(row, "key", "id", "issue_key") or file.stem)
|
|
24
|
+
refs.append(SourceRef(path=file, source_id=f"jira:{key}", content=self._issue_content(row, key)))
|
|
25
|
+
return refs
|
|
26
|
+
|
|
27
|
+
def load(self, corpus_id: str, ref: SourceRef) -> BronzeDocument:
|
|
28
|
+
content = ref.content or ref.path.read_text(encoding="utf-8")
|
|
29
|
+
digest = hashlib.sha256(content.encode("utf-8")).hexdigest()
|
|
30
|
+
key = ref.source_id.split(":", 1)[-1]
|
|
31
|
+
return BronzeDocument(
|
|
32
|
+
id=f"bronze_{digest[:16]}",
|
|
33
|
+
corpus_id=corpus_id,
|
|
34
|
+
source_type=self.source_type,
|
|
35
|
+
source_id=ref.source_id,
|
|
36
|
+
content=content,
|
|
37
|
+
metadata={"path": str(ref.path), "issue_key": key, "title": f"Jira {key}", "connector": "jira"},
|
|
38
|
+
content_hash=f"sha256:{digest}",
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
def _issue_content(self, row: dict[str, Any], key: str) -> str:
|
|
42
|
+
fields = row.get("fields") if isinstance(row.get("fields"), dict) else row
|
|
43
|
+
summary = str(first_present(fields, "summary", "title", "name") or key)
|
|
44
|
+
description = str(first_present(fields, "description", "body", "content") or "")
|
|
45
|
+
status = first_present(fields, "status", "state")
|
|
46
|
+
comments = first_present(fields, "comments", "comment") or []
|
|
47
|
+
if isinstance(comments, dict):
|
|
48
|
+
comments = comments.get("comments", [])
|
|
49
|
+
comment_text = "\n".join(f"- {first_present(comment, 'body', 'text', 'content')}" for comment in comments if isinstance(comment, dict))
|
|
50
|
+
return "\n".join(
|
|
51
|
+
[
|
|
52
|
+
"---",
|
|
53
|
+
f"title: {summary}",
|
|
54
|
+
f"issue_key: {key}",
|
|
55
|
+
f"status: {status or ''}",
|
|
56
|
+
"---",
|
|
57
|
+
"",
|
|
58
|
+
f"# {summary}",
|
|
59
|
+
"",
|
|
60
|
+
description,
|
|
61
|
+
"",
|
|
62
|
+
"## Comments",
|
|
63
|
+
comment_text,
|
|
64
|
+
"",
|
|
65
|
+
]
|
|
66
|
+
)
|