@oriro/orirocli 0.1.9 → 0.1.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +16 -18
- package/dist/cli.js +4776 -2964
- package/package.json +2 -2
- package/skills/craft/ai-engineering/SKILL.md +2 -2
- package/skills/graphify/SKILL.md +0 -619
- package/skills/graphify/__init__.py +0 -28
- package/skills/graphify/__main__.py +0 -4582
- package/skills/graphify/affected.py +0 -154
- package/skills/graphify/always_on/agents-md.md +0 -12
- package/skills/graphify/always_on/antigravity-rules.md +0 -14
- package/skills/graphify/always_on/claude-md.md +0 -9
- package/skills/graphify/always_on/gemini-md.md +0 -9
- package/skills/graphify/always_on/kiro-steering.md +0 -5
- package/skills/graphify/always_on/vscode-instructions.md +0 -17
- package/skills/graphify/analyze.py +0 -724
- package/skills/graphify/benchmark.py +0 -155
- package/skills/graphify/build.py +0 -487
- package/skills/graphify/cache.py +0 -417
- package/skills/graphify/callflow_html.py +0 -2020
- package/skills/graphify/cluster.py +0 -272
- package/skills/graphify/command-kilo.md +0 -15
- package/skills/graphify/dedup.py +0 -429
- package/skills/graphify/detect.py +0 -1379
- package/skills/graphify/diagnostics.py +0 -390
- package/skills/graphify/export.py +0 -1408
- package/skills/graphify/extract.py +0 -11570
- package/skills/graphify/global_graph.py +0 -159
- package/skills/graphify/google_workspace.py +0 -223
- package/skills/graphify/hooks.py +0 -457
- package/skills/graphify/ingest.py +0 -331
- package/skills/graphify/llm.py +0 -1896
- package/skills/graphify/manifest.py +0 -4
- package/skills/graphify/mcp_ingest.py +0 -392
- package/skills/graphify/multigraph_compat.py +0 -212
- package/skills/graphify/pg_introspect.py +0 -142
- package/skills/graphify/prs.py +0 -748
- package/skills/graphify/querylog.py +0 -70
- package/skills/graphify/report.py +0 -218
- package/skills/graphify/scip_ingest.py +0 -363
- package/skills/graphify/security.py +0 -336
- package/skills/graphify/semantic_cleanup.py +0 -319
- package/skills/graphify/serve.py +0 -1309
- package/skills/graphify/skill-aider.md +0 -1246
- package/skills/graphify/skill-amp.md +0 -613
- package/skills/graphify/skill-claw.md +0 -616
- package/skills/graphify/skill-codex.md +0 -613
- package/skills/graphify/skill-copilot.md +0 -616
- package/skills/graphify/skill-devin.md +0 -1372
- package/skills/graphify/skill-droid.md +0 -613
- package/skills/graphify/skill-kilo.md +0 -625
- package/skills/graphify/skill-kiro.md +0 -615
- package/skills/graphify/skill-opencode.md +0 -608
- package/skills/graphify/skill-pi.md +0 -615
- package/skills/graphify/skill-trae.md +0 -614
- package/skills/graphify/skill-vscode.md +0 -612
- package/skills/graphify/skill-windows.md +0 -651
- package/skills/graphify/skills/amp/references/add-watch.md +0 -56
- package/skills/graphify/skills/amp/references/exports.md +0 -71
- package/skills/graphify/skills/amp/references/extraction-spec.md +0 -68
- package/skills/graphify/skills/amp/references/github-and-merge.md +0 -46
- package/skills/graphify/skills/amp/references/hooks.md +0 -33
- package/skills/graphify/skills/amp/references/query.md +0 -249
- package/skills/graphify/skills/amp/references/transcribe.md +0 -48
- package/skills/graphify/skills/amp/references/update.md +0 -179
- package/skills/graphify/skills/claude/references/add-watch.md +0 -56
- package/skills/graphify/skills/claude/references/exports.md +0 -71
- package/skills/graphify/skills/claude/references/extraction-spec.md +0 -68
- package/skills/graphify/skills/claude/references/github-and-merge.md +0 -46
- package/skills/graphify/skills/claude/references/hooks.md +0 -33
- package/skills/graphify/skills/claude/references/query.md +0 -103
- package/skills/graphify/skills/claude/references/transcribe.md +0 -48
- package/skills/graphify/skills/claude/references/update.md +0 -179
- package/skills/graphify/skills/claw/references/add-watch.md +0 -56
- package/skills/graphify/skills/claw/references/exports.md +0 -71
- package/skills/graphify/skills/claw/references/extraction-spec.md +0 -29
- package/skills/graphify/skills/claw/references/github-and-merge.md +0 -46
- package/skills/graphify/skills/claw/references/hooks.md +0 -33
- package/skills/graphify/skills/claw/references/query.md +0 -249
- package/skills/graphify/skills/claw/references/transcribe.md +0 -48
- package/skills/graphify/skills/claw/references/update.md +0 -179
- package/skills/graphify/skills/codex/references/add-watch.md +0 -56
- package/skills/graphify/skills/codex/references/exports.md +0 -71
- package/skills/graphify/skills/codex/references/extraction-spec.md +0 -29
- package/skills/graphify/skills/codex/references/github-and-merge.md +0 -46
- package/skills/graphify/skills/codex/references/hooks.md +0 -33
- package/skills/graphify/skills/codex/references/query.md +0 -249
- package/skills/graphify/skills/codex/references/transcribe.md +0 -48
- package/skills/graphify/skills/codex/references/update.md +0 -179
- package/skills/graphify/skills/copilot/references/add-watch.md +0 -56
- package/skills/graphify/skills/copilot/references/exports.md +0 -71
- package/skills/graphify/skills/copilot/references/extraction-spec.md +0 -68
- package/skills/graphify/skills/copilot/references/github-and-merge.md +0 -46
- package/skills/graphify/skills/copilot/references/hooks.md +0 -33
- package/skills/graphify/skills/copilot/references/query.md +0 -249
- package/skills/graphify/skills/copilot/references/transcribe.md +0 -48
- package/skills/graphify/skills/copilot/references/update.md +0 -179
- package/skills/graphify/skills/droid/references/add-watch.md +0 -56
- package/skills/graphify/skills/droid/references/exports.md +0 -71
- package/skills/graphify/skills/droid/references/extraction-spec.md +0 -68
- package/skills/graphify/skills/droid/references/github-and-merge.md +0 -46
- package/skills/graphify/skills/droid/references/hooks.md +0 -33
- package/skills/graphify/skills/droid/references/query.md +0 -249
- package/skills/graphify/skills/droid/references/transcribe.md +0 -48
- package/skills/graphify/skills/droid/references/update.md +0 -179
- package/skills/graphify/skills/kilo/references/add-watch.md +0 -56
- package/skills/graphify/skills/kilo/references/exports.md +0 -71
- package/skills/graphify/skills/kilo/references/extraction-spec.md +0 -68
- package/skills/graphify/skills/kilo/references/github-and-merge.md +0 -46
- package/skills/graphify/skills/kilo/references/hooks.md +0 -33
- package/skills/graphify/skills/kilo/references/query.md +0 -249
- package/skills/graphify/skills/kilo/references/transcribe.md +0 -48
- package/skills/graphify/skills/kilo/references/update.md +0 -179
- package/skills/graphify/skills/kiro/references/add-watch.md +0 -56
- package/skills/graphify/skills/kiro/references/exports.md +0 -71
- package/skills/graphify/skills/kiro/references/extraction-spec.md +0 -29
- package/skills/graphify/skills/kiro/references/github-and-merge.md +0 -46
- package/skills/graphify/skills/kiro/references/hooks.md +0 -33
- package/skills/graphify/skills/kiro/references/query.md +0 -249
- package/skills/graphify/skills/kiro/references/transcribe.md +0 -48
- package/skills/graphify/skills/kiro/references/update.md +0 -179
- package/skills/graphify/skills/opencode/references/add-watch.md +0 -56
- package/skills/graphify/skills/opencode/references/exports.md +0 -71
- package/skills/graphify/skills/opencode/references/extraction-spec.md +0 -68
- package/skills/graphify/skills/opencode/references/github-and-merge.md +0 -46
- package/skills/graphify/skills/opencode/references/hooks.md +0 -33
- package/skills/graphify/skills/opencode/references/query.md +0 -249
- package/skills/graphify/skills/opencode/references/transcribe.md +0 -48
- package/skills/graphify/skills/opencode/references/update.md +0 -179
- package/skills/graphify/skills/pi/references/add-watch.md +0 -56
- package/skills/graphify/skills/pi/references/exports.md +0 -71
- package/skills/graphify/skills/pi/references/extraction-spec.md +0 -29
- package/skills/graphify/skills/pi/references/github-and-merge.md +0 -46
- package/skills/graphify/skills/pi/references/hooks.md +0 -33
- package/skills/graphify/skills/pi/references/query.md +0 -249
- package/skills/graphify/skills/pi/references/transcribe.md +0 -48
- package/skills/graphify/skills/pi/references/update.md +0 -179
- package/skills/graphify/skills/trae/references/add-watch.md +0 -56
- package/skills/graphify/skills/trae/references/exports.md +0 -71
- package/skills/graphify/skills/trae/references/extraction-spec.md +0 -68
- package/skills/graphify/skills/trae/references/github-and-merge.md +0 -46
- package/skills/graphify/skills/trae/references/hooks.md +0 -35
- package/skills/graphify/skills/trae/references/query.md +0 -249
- package/skills/graphify/skills/trae/references/transcribe.md +0 -48
- package/skills/graphify/skills/trae/references/update.md +0 -179
- package/skills/graphify/skills/vscode/references/add-watch.md +0 -56
- package/skills/graphify/skills/vscode/references/exports.md +0 -71
- package/skills/graphify/skills/vscode/references/extraction-spec.md +0 -68
- package/skills/graphify/skills/vscode/references/github-and-merge.md +0 -46
- package/skills/graphify/skills/vscode/references/hooks.md +0 -33
- package/skills/graphify/skills/vscode/references/query.md +0 -249
- package/skills/graphify/skills/vscode/references/transcribe.md +0 -48
- package/skills/graphify/skills/vscode/references/update.md +0 -179
- package/skills/graphify/skills/windows/references/add-watch.md +0 -56
- package/skills/graphify/skills/windows/references/exports.md +0 -71
- package/skills/graphify/skills/windows/references/extraction-spec.md +0 -68
- package/skills/graphify/skills/windows/references/github-and-merge.md +0 -46
- package/skills/graphify/skills/windows/references/hooks.md +0 -33
- package/skills/graphify/skills/windows/references/query.md +0 -249
- package/skills/graphify/skills/windows/references/transcribe.md +0 -48
- package/skills/graphify/skills/windows/references/update.md +0 -179
- package/skills/graphify/symbol_resolution.py +0 -538
- package/skills/graphify/transcribe.py +0 -184
- package/skills/graphify/tree_html.py +0 -582
- package/skills/graphify/validate.py +0 -72
- package/skills/graphify/watch.py +0 -898
- package/skills/graphify/wiki.py +0 -282
|
@@ -1,331 +0,0 @@
|
|
|
1
|
-
# fetch URLs (tweet/arxiv/pdf/web) and save as annotated markdown
|
|
2
|
-
from __future__ import annotations
|
|
3
|
-
import json
|
|
4
|
-
import re
|
|
5
|
-
import urllib.error
|
|
6
|
-
import urllib.parse
|
|
7
|
-
from datetime import datetime, timezone
|
|
8
|
-
from pathlib import Path
|
|
9
|
-
|
|
10
|
-
from graphify.security import safe_fetch, safe_fetch_text, validate_url
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
def _yaml_str(s: str) -> str:
|
|
14
|
-
"""Escape a string for embedding in a YAML double-quoted scalar.
|
|
15
|
-
|
|
16
|
-
Handles every YAML 1.1/1.2 line-break and control character that could
|
|
17
|
-
let a hostile value (e.g. a fetched page title) break out of the quoted
|
|
18
|
-
scalar and inject sibling YAML keys (F-009 / F-019). The previous
|
|
19
|
-
implementation missed `\\t`, `\\0`, the unicode line-separator U+2028 and
|
|
20
|
-
paragraph-separator U+2029 — all of which YAML treats as line breaks.
|
|
21
|
-
|
|
22
|
-
We intentionally do not depend on PyYAML (not in pyproject deps) and
|
|
23
|
-
instead emit safely-escaped double-quoted scalars by hand: the YAML
|
|
24
|
-
double-quoted form recognises `\\\\`, `\\"`, `\\n`, `\\r`, `\\t`, `\\0`,
|
|
25
|
-
`\\L` (U+2028), `\\P` (U+2029), and `\\xNN`/`\\uNNNN` numeric escapes.
|
|
26
|
-
"""
|
|
27
|
-
if s is None:
|
|
28
|
-
return ""
|
|
29
|
-
out: list[str] = []
|
|
30
|
-
for ch in str(s):
|
|
31
|
-
cp = ord(ch)
|
|
32
|
-
if ch == "\\":
|
|
33
|
-
out.append("\\\\")
|
|
34
|
-
elif ch == '"':
|
|
35
|
-
out.append('\\"')
|
|
36
|
-
elif ch == "\n":
|
|
37
|
-
out.append("\\n")
|
|
38
|
-
elif ch == "\r":
|
|
39
|
-
out.append("\\r")
|
|
40
|
-
elif ch == "\t":
|
|
41
|
-
out.append("\\t")
|
|
42
|
-
elif ch == "\0":
|
|
43
|
-
out.append("\\0")
|
|
44
|
-
elif cp == 0x2028:
|
|
45
|
-
out.append("\\L")
|
|
46
|
-
elif cp == 0x2029:
|
|
47
|
-
out.append("\\P")
|
|
48
|
-
elif cp < 0x20 or cp == 0x7F:
|
|
49
|
-
out.append(f"\\x{cp:02x}")
|
|
50
|
-
else:
|
|
51
|
-
out.append(ch)
|
|
52
|
-
return "".join(out)
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
def _safe_filename(url: str, suffix: str) -> str:
|
|
56
|
-
"""Turn a URL into a safe filename."""
|
|
57
|
-
parsed = urllib.parse.urlparse(url)
|
|
58
|
-
name = parsed.netloc + parsed.path
|
|
59
|
-
name = re.sub(r"[^\w\-]", "_", name).strip("_")
|
|
60
|
-
name = re.sub(r"_+", "_", name)[:80]
|
|
61
|
-
return name + suffix
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
def _detect_url_type(url: str) -> str:
|
|
65
|
-
"""Classify the URL for targeted extraction."""
|
|
66
|
-
lower = url.lower()
|
|
67
|
-
if "twitter.com" in lower or "x.com" in lower:
|
|
68
|
-
return "tweet"
|
|
69
|
-
if "arxiv.org" in lower:
|
|
70
|
-
return "arxiv"
|
|
71
|
-
if "github.com" in lower:
|
|
72
|
-
return "github"
|
|
73
|
-
if "youtube.com" in lower or "youtu.be" in lower:
|
|
74
|
-
return "youtube"
|
|
75
|
-
parsed = urllib.parse.urlparse(url)
|
|
76
|
-
path = parsed.path.lower()
|
|
77
|
-
if path.endswith(".pdf"):
|
|
78
|
-
return "pdf"
|
|
79
|
-
if any(path.endswith(ext) for ext in (".png", ".jpg", ".jpeg", ".webp", ".gif")):
|
|
80
|
-
return "image"
|
|
81
|
-
return "webpage"
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
def _fetch_html(url: str) -> str:
|
|
85
|
-
return safe_fetch_text(url)
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
def _html_to_markdown(html: str, url: str) -> str:
|
|
89
|
-
"""Convert HTML to clean markdown. Uses markdownify if available, else basic strip."""
|
|
90
|
-
# Always pre-strip script/style so their text content never leaks into output
|
|
91
|
-
html = re.sub(r"<script[^>]*>.*?</script>", "", html, flags=re.DOTALL | re.IGNORECASE)
|
|
92
|
-
html = re.sub(r"<style[^>]*>.*?</style>", "", html, flags=re.DOTALL | re.IGNORECASE)
|
|
93
|
-
try:
|
|
94
|
-
from markdownify import markdownify
|
|
95
|
-
return markdownify(html, heading_style="ATX", bullets="-", strip=["img"])
|
|
96
|
-
except ImportError:
|
|
97
|
-
# Fallback: basic tag strip
|
|
98
|
-
text = re.sub(r"<[^>]+>", " ", html)
|
|
99
|
-
text = re.sub(r"\s+", " ", text).strip()
|
|
100
|
-
return text[:8000]
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
def _fetch_tweet(url: str, author: str | None, contributor: str | None) -> tuple[str, str]:
|
|
104
|
-
"""Fetch a tweet URL. Returns (content, filename)."""
|
|
105
|
-
# Normalize to twitter.com for oEmbed
|
|
106
|
-
oembed_url = url.replace("x.com", "twitter.com")
|
|
107
|
-
oembed_api = f"https://publish.twitter.com/oembed?url={urllib.parse.quote(oembed_url)}&omit_script=true"
|
|
108
|
-
try:
|
|
109
|
-
data = json.loads(safe_fetch_text(oembed_api))
|
|
110
|
-
tweet_text = re.sub(r"<[^>]+>", "", data.get("html", "")).strip()
|
|
111
|
-
tweet_author = data.get("author_name", "unknown")
|
|
112
|
-
except Exception:
|
|
113
|
-
# oEmbed failed - save URL stub
|
|
114
|
-
tweet_text = f"Tweet at {url} (could not fetch content)"
|
|
115
|
-
tweet_author = "unknown"
|
|
116
|
-
|
|
117
|
-
now = datetime.now(timezone.utc).isoformat()
|
|
118
|
-
content = f"""---
|
|
119
|
-
source_url: "{_yaml_str(url)}"
|
|
120
|
-
type: tweet
|
|
121
|
-
author: "{_yaml_str(tweet_author)}"
|
|
122
|
-
captured_at: {now}
|
|
123
|
-
contributor: "{_yaml_str(contributor or author or 'unknown')}"
|
|
124
|
-
---
|
|
125
|
-
|
|
126
|
-
# Tweet by @{tweet_author}
|
|
127
|
-
|
|
128
|
-
{tweet_text}
|
|
129
|
-
|
|
130
|
-
Source: {url}
|
|
131
|
-
"""
|
|
132
|
-
filename = _safe_filename(url, ".md")
|
|
133
|
-
return content, filename
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
def _fetch_webpage(url: str, author: str | None, contributor: str | None) -> tuple[str, str]:
|
|
137
|
-
"""Fetch a generic webpage and convert to markdown."""
|
|
138
|
-
html = _fetch_html(url)
|
|
139
|
-
# Extract title
|
|
140
|
-
title_match = re.search(r"<title[^>]*>(.*?)</title>", html, re.IGNORECASE | re.DOTALL)
|
|
141
|
-
title = re.sub(r"\s+", " ", title_match.group(1)).strip() if title_match else url
|
|
142
|
-
|
|
143
|
-
markdown = _html_to_markdown(html, url)
|
|
144
|
-
now = datetime.now(timezone.utc).isoformat()
|
|
145
|
-
content = f"""---
|
|
146
|
-
source_url: "{_yaml_str(url)}"
|
|
147
|
-
type: webpage
|
|
148
|
-
title: "{_yaml_str(title)}"
|
|
149
|
-
captured_at: {now}
|
|
150
|
-
contributor: "{_yaml_str(contributor or author or 'unknown')}"
|
|
151
|
-
---
|
|
152
|
-
|
|
153
|
-
# {title}
|
|
154
|
-
|
|
155
|
-
Source: {url}
|
|
156
|
-
|
|
157
|
-
---
|
|
158
|
-
|
|
159
|
-
{markdown[:12000]}
|
|
160
|
-
"""
|
|
161
|
-
filename = _safe_filename(url, ".md")
|
|
162
|
-
return content, filename
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
def _fetch_arxiv(url: str, author: str | None, contributor: str | None) -> tuple[str, str]:
|
|
166
|
-
"""Fetch arXiv abstract page."""
|
|
167
|
-
# Convert /abs/ or /pdf/ to abs for the API
|
|
168
|
-
arxiv_id = re.search(r"(\d{4}\.\d{4,5})", url)
|
|
169
|
-
if arxiv_id:
|
|
170
|
-
api_url = f"https://export.arxiv.org/abs/{arxiv_id.group(1)}"
|
|
171
|
-
try:
|
|
172
|
-
html = _fetch_html(api_url)
|
|
173
|
-
abstract_match = re.search(r'class="abstract[^"]*"[^>]*>(.*?)</blockquote>', html, re.DOTALL | re.IGNORECASE)
|
|
174
|
-
abstract = re.sub(r"<[^>]+>", "", abstract_match.group(1)).strip() if abstract_match else ""
|
|
175
|
-
title_match = re.search(r'class="title[^"]*"[^>]*>(.*?)</h1>', html, re.DOTALL | re.IGNORECASE)
|
|
176
|
-
title = re.sub(r"<[^>]+>", " ", title_match.group(1)).strip() if title_match else arxiv_id.group(1)
|
|
177
|
-
authors_match = re.search(r'class="authors"[^>]*>(.*?)</div>', html, re.DOTALL | re.IGNORECASE)
|
|
178
|
-
paper_authors = re.sub(r"<[^>]+>", "", authors_match.group(1)).strip() if authors_match else ""
|
|
179
|
-
except Exception:
|
|
180
|
-
title, abstract, paper_authors = arxiv_id.group(1), "", ""
|
|
181
|
-
else:
|
|
182
|
-
return _fetch_webpage(url, author, contributor)
|
|
183
|
-
|
|
184
|
-
now = datetime.now(timezone.utc).isoformat()
|
|
185
|
-
content = f"""---
|
|
186
|
-
source_url: "{_yaml_str(url)}"
|
|
187
|
-
arxiv_id: "{_yaml_str(arxiv_id.group(1) if arxiv_id else '')}"
|
|
188
|
-
type: paper
|
|
189
|
-
title: "{_yaml_str(title)}"
|
|
190
|
-
paper_authors: "{_yaml_str(paper_authors)}"
|
|
191
|
-
captured_at: {now}
|
|
192
|
-
contributor: "{_yaml_str(contributor or author or 'unknown')}"
|
|
193
|
-
---
|
|
194
|
-
|
|
195
|
-
# {title}
|
|
196
|
-
|
|
197
|
-
**Authors:** {paper_authors}
|
|
198
|
-
**arXiv:** {arxiv_id.group(1) if arxiv_id else url}
|
|
199
|
-
|
|
200
|
-
## Abstract
|
|
201
|
-
|
|
202
|
-
{abstract}
|
|
203
|
-
|
|
204
|
-
Source: {url}
|
|
205
|
-
"""
|
|
206
|
-
filename = f"arxiv_{arxiv_id.group(1).replace('.', '_')}.md" if arxiv_id else _safe_filename(url, ".md")
|
|
207
|
-
return content, filename
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
def _download_binary(url: str, suffix: str, target_dir: Path) -> Path:
|
|
211
|
-
"""Download a binary file (PDF, image) directly."""
|
|
212
|
-
filename = _safe_filename(url, suffix)
|
|
213
|
-
out_path = target_dir / filename
|
|
214
|
-
out_path.write_bytes(safe_fetch(url))
|
|
215
|
-
return out_path
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
def ingest(url: str, target_dir: Path, author: str | None = None, contributor: str | None = None) -> Path:
|
|
219
|
-
"""
|
|
220
|
-
Fetch a URL and save it into target_dir as a graphify-ready file.
|
|
221
|
-
|
|
222
|
-
Returns the path of the saved file.
|
|
223
|
-
"""
|
|
224
|
-
target_dir.mkdir(parents=True, exist_ok=True)
|
|
225
|
-
url_type = _detect_url_type(url)
|
|
226
|
-
|
|
227
|
-
try:
|
|
228
|
-
validate_url(url)
|
|
229
|
-
except ValueError as exc:
|
|
230
|
-
raise ValueError(f"ingest: {exc}") from exc
|
|
231
|
-
|
|
232
|
-
try:
|
|
233
|
-
if url_type == "pdf":
|
|
234
|
-
out = _download_binary(url, ".pdf", target_dir)
|
|
235
|
-
print(f"Downloaded PDF: {out.name}")
|
|
236
|
-
return out
|
|
237
|
-
|
|
238
|
-
if url_type == "image":
|
|
239
|
-
suffix = Path(urllib.parse.urlparse(url).path).suffix or ".jpg"
|
|
240
|
-
out = _download_binary(url, suffix, target_dir)
|
|
241
|
-
print(f"Downloaded image: {out.name}")
|
|
242
|
-
return out
|
|
243
|
-
|
|
244
|
-
if url_type == "youtube":
|
|
245
|
-
from graphify.transcribe import download_audio
|
|
246
|
-
out = download_audio(url, target_dir)
|
|
247
|
-
print(f"Downloaded audio: {out.name}")
|
|
248
|
-
return out
|
|
249
|
-
|
|
250
|
-
if url_type == "tweet":
|
|
251
|
-
content, filename = _fetch_tweet(url, author, contributor)
|
|
252
|
-
elif url_type == "arxiv":
|
|
253
|
-
content, filename = _fetch_arxiv(url, author, contributor)
|
|
254
|
-
else:
|
|
255
|
-
content, filename = _fetch_webpage(url, author, contributor)
|
|
256
|
-
except (urllib.error.HTTPError, urllib.error.URLError, OSError) as exc:
|
|
257
|
-
raise RuntimeError(f"ingest: failed to fetch {url!r}: {exc}") from exc
|
|
258
|
-
|
|
259
|
-
out_path = target_dir / filename
|
|
260
|
-
# Avoid overwriting - append counter if needed
|
|
261
|
-
counter = 1
|
|
262
|
-
while out_path.exists() and counter < 1000:
|
|
263
|
-
stem = Path(filename).stem
|
|
264
|
-
out_path = target_dir / f"{stem}_{counter}.md"
|
|
265
|
-
counter += 1
|
|
266
|
-
|
|
267
|
-
out_path.write_text(content, encoding="utf-8")
|
|
268
|
-
print(f"Saved {url_type}: {out_path.name}")
|
|
269
|
-
return out_path
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
def save_query_result(
|
|
273
|
-
question: str,
|
|
274
|
-
answer: str,
|
|
275
|
-
memory_dir: Path,
|
|
276
|
-
query_type: str = "query",
|
|
277
|
-
source_nodes: list[str] | None = None,
|
|
278
|
-
) -> Path:
|
|
279
|
-
"""Save a Q&A result as markdown so it gets extracted into the graph on next --update.
|
|
280
|
-
|
|
281
|
-
Files are stored in memory_dir (typically graphify-out/memory/) with YAML frontmatter
|
|
282
|
-
that graphify's extractor reads as node metadata. This closes the feedback loop:
|
|
283
|
-
the system grows smarter from both what you add AND what you ask.
|
|
284
|
-
"""
|
|
285
|
-
memory_dir = Path(memory_dir)
|
|
286
|
-
memory_dir.mkdir(parents=True, exist_ok=True)
|
|
287
|
-
|
|
288
|
-
now = datetime.now(timezone.utc)
|
|
289
|
-
slug = re.sub(r"[^\w]", "_", question.lower())[:50].strip("_")
|
|
290
|
-
filename = f"query_{now.strftime('%Y%m%d_%H%M%S')}_{slug}.md"
|
|
291
|
-
|
|
292
|
-
frontmatter_lines = [
|
|
293
|
-
"---",
|
|
294
|
-
f'type: "{query_type}"',
|
|
295
|
-
f'date: "{now.isoformat()}"',
|
|
296
|
-
f'question: "{_yaml_str(question)}"',
|
|
297
|
-
'contributor: "graphify"',
|
|
298
|
-
]
|
|
299
|
-
if source_nodes:
|
|
300
|
-
nodes_str = ", ".join(f'"{n}"' for n in source_nodes[:10])
|
|
301
|
-
frontmatter_lines.append(f"source_nodes: [{nodes_str}]")
|
|
302
|
-
frontmatter_lines.append("---")
|
|
303
|
-
|
|
304
|
-
body_lines = [
|
|
305
|
-
"",
|
|
306
|
-
f"# Q: {question}",
|
|
307
|
-
"",
|
|
308
|
-
"## Answer",
|
|
309
|
-
"",
|
|
310
|
-
answer,
|
|
311
|
-
]
|
|
312
|
-
if source_nodes:
|
|
313
|
-
body_lines += ["", "## Source Nodes", ""]
|
|
314
|
-
body_lines += [f"- {n}" for n in source_nodes]
|
|
315
|
-
|
|
316
|
-
content = "\n".join(frontmatter_lines + body_lines)
|
|
317
|
-
out_path = memory_dir / filename
|
|
318
|
-
out_path.write_text(content, encoding="utf-8")
|
|
319
|
-
return out_path
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
if __name__ == "__main__":
|
|
323
|
-
import argparse
|
|
324
|
-
parser = argparse.ArgumentParser(description="Fetch a URL into a graphify /raw folder")
|
|
325
|
-
parser.add_argument("url", help="URL to fetch")
|
|
326
|
-
parser.add_argument("target_dir", nargs="?", default="./raw", help="Target directory (default: ./raw)")
|
|
327
|
-
parser.add_argument("--author", help="Your name (stored as node metadata)")
|
|
328
|
-
parser.add_argument("--contributor", help="Contributor name for team graphs")
|
|
329
|
-
args = parser.parse_args()
|
|
330
|
-
out = ingest(args.url, Path(args.target_dir), author=args.author, contributor=args.contributor)
|
|
331
|
-
print(f"Ready for graphify: {out}")
|